From 9fa45070a2e59a871e1cd3370173369f3a4f61e2 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 4 Sep 2018 11:48:26 +0100
Subject: locking/atomics: Switch to generated fallbacks

As a step to ensuring the atomic* APIs are consistent, switch to fallbacks
generated by gen-atomic-fallback.sh.

These are checked in rather than generated with Kbuild, since:

* This allows inspection of the atomics with git grep and ctags on a
  pristine tree, which Linus strongly prefers being able to do.

* The fallbacks are not affected by machine details or configuration
  options, so it is not necessary to regenerate them to take these into
  account.

* These are included by files required *very* early in the build process
  (e.g. for generating bounds.h), and we'd rather not complicate the
  top-level Kbuild file with dependencies.

The new fallback header should be equivalent to the old fallbacks in
<linux/atomic.h>, but:

* It is formatted a little differently due to scripting ensuring things
  are more regular than they used to be.

* Fallbacks are now expanded in-place as static inline functions rather
  than macros.

* The prototypes for fallbacks are arragned consistently with the return
  type on a separate line to try to keep to a sensible line length.

There should be no functional change as a result of this patch.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: catalin.marinas@arm.com
Cc: linuxdrivers@attotech.com
Cc: dvyukov@google.com
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: arnd@arndb.de
Cc: aryabinin@virtuozzo.com
Cc: glider@google.com
Link: http://lkml.kernel.org/r/20180904104830.2975-3-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic-fallback.h | 2294 +++++++++++++++++++++++++++++++++++++++
 include/linux/atomic.h          | 1241 +--------------------
 2 files changed, 2295 insertions(+), 1240 deletions(-)
 create mode 100644 include/linux/atomic-fallback.h

(limited to 'include')

diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
new file mode 100644
index 000000000000..1c02c0112fbb
--- /dev/null
+++ b/include/linux/atomic-fallback.h
@@ -0,0 +1,2294 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-fallback.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
+#ifndef _LINUX_ATOMIC_FALLBACK_H
+#define _LINUX_ATOMIC_FALLBACK_H
+
+#ifndef xchg_relaxed
+#define xchg_relaxed		xchg
+#define xchg_acquire		xchg
+#define xchg_release		xchg
+#else /* xchg_relaxed */
+
+#ifndef xchg_acquire
+#define xchg_acquire(...) \
+	__atomic_op_acquire(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg_release
+#define xchg_release(...) \
+	__atomic_op_release(xchg, __VA_ARGS__)
+#endif
+
+#ifndef xchg
+#define xchg(...) \
+	__atomic_op_fence(xchg, __VA_ARGS__)
+#endif
+
+#endif /* xchg_relaxed */
+
+#ifndef cmpxchg_relaxed
+#define cmpxchg_relaxed		cmpxchg
+#define cmpxchg_acquire		cmpxchg
+#define cmpxchg_release		cmpxchg
+#else /* cmpxchg_relaxed */
+
+#ifndef cmpxchg_acquire
+#define cmpxchg_acquire(...) \
+	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg_release
+#define cmpxchg_release(...) \
+	__atomic_op_release(cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg
+#define cmpxchg(...) \
+	__atomic_op_fence(cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* cmpxchg_relaxed */
+
+#ifndef cmpxchg64_relaxed
+#define cmpxchg64_relaxed		cmpxchg64
+#define cmpxchg64_acquire		cmpxchg64
+#define cmpxchg64_release		cmpxchg64
+#else /* cmpxchg64_relaxed */
+
+#ifndef cmpxchg64_acquire
+#define cmpxchg64_acquire(...) \
+	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64_release
+#define cmpxchg64_release(...) \
+	__atomic_op_release(cmpxchg64, __VA_ARGS__)
+#endif
+
+#ifndef cmpxchg64
+#define cmpxchg64(...) \
+	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
+#endif
+
+#endif /* cmpxchg64_relaxed */
+
+#ifndef atomic_read_acquire
+static inline int
+atomic_read_acquire(const atomic_t *v)
+{
+	return smp_load_acquire(&(v)->counter);
+}
+#define atomic_read_acquire atomic_read_acquire
+#endif
+
+#ifndef atomic_set_release
+static inline void
+atomic_set_release(atomic_t *v, int i)
+{
+	smp_store_release(&(v)->counter, i);
+}
+#define atomic_set_release atomic_set_release
+#endif
+
+#ifndef atomic_add_return_relaxed
+#define atomic_add_return_acquire atomic_add_return
+#define atomic_add_return_release atomic_add_return
+#define atomic_add_return_relaxed atomic_add_return
+#else /* atomic_add_return_relaxed */
+
+#ifndef atomic_add_return_acquire
+static inline int
+atomic_add_return_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_add_return_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_add_return_acquire atomic_add_return_acquire
+#endif
+
+#ifndef atomic_add_return_release
+static inline int
+atomic_add_return_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_add_return_relaxed(i, v);
+}
+#define atomic_add_return_release atomic_add_return_release
+#endif
+
+#ifndef atomic_add_return
+static inline int
+atomic_add_return(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_add_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_add_return atomic_add_return
+#endif
+
+#endif /* atomic_add_return_relaxed */
+
+#ifndef atomic_fetch_add_relaxed
+#define atomic_fetch_add_acquire atomic_fetch_add
+#define atomic_fetch_add_release atomic_fetch_add
+#define atomic_fetch_add_relaxed atomic_fetch_add
+#else /* atomic_fetch_add_relaxed */
+
+#ifndef atomic_fetch_add_acquire
+static inline int
+atomic_fetch_add_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_add_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_add_acquire atomic_fetch_add_acquire
+#endif
+
+#ifndef atomic_fetch_add_release
+static inline int
+atomic_fetch_add_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_add_relaxed(i, v);
+}
+#define atomic_fetch_add_release atomic_fetch_add_release
+#endif
+
+#ifndef atomic_fetch_add
+static inline int
+atomic_fetch_add(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_add_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_add atomic_fetch_add
+#endif
+
+#endif /* atomic_fetch_add_relaxed */
+
+#ifndef atomic_sub_return_relaxed
+#define atomic_sub_return_acquire atomic_sub_return
+#define atomic_sub_return_release atomic_sub_return
+#define atomic_sub_return_relaxed atomic_sub_return
+#else /* atomic_sub_return_relaxed */
+
+#ifndef atomic_sub_return_acquire
+static inline int
+atomic_sub_return_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_sub_return_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_sub_return_acquire atomic_sub_return_acquire
+#endif
+
+#ifndef atomic_sub_return_release
+static inline int
+atomic_sub_return_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_sub_return_relaxed(i, v);
+}
+#define atomic_sub_return_release atomic_sub_return_release
+#endif
+
+#ifndef atomic_sub_return
+static inline int
+atomic_sub_return(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_sub_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_sub_return atomic_sub_return
+#endif
+
+#endif /* atomic_sub_return_relaxed */
+
+#ifndef atomic_fetch_sub_relaxed
+#define atomic_fetch_sub_acquire atomic_fetch_sub
+#define atomic_fetch_sub_release atomic_fetch_sub
+#define atomic_fetch_sub_relaxed atomic_fetch_sub
+#else /* atomic_fetch_sub_relaxed */
+
+#ifndef atomic_fetch_sub_acquire
+static inline int
+atomic_fetch_sub_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_sub_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
+#endif
+
+#ifndef atomic_fetch_sub_release
+static inline int
+atomic_fetch_sub_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_sub_relaxed(i, v);
+}
+#define atomic_fetch_sub_release atomic_fetch_sub_release
+#endif
+
+#ifndef atomic_fetch_sub
+static inline int
+atomic_fetch_sub(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_sub_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_sub atomic_fetch_sub
+#endif
+
+#endif /* atomic_fetch_sub_relaxed */
+
+#ifndef atomic_inc
+static inline void
+atomic_inc(atomic_t *v)
+{
+	atomic_add(1, v);
+}
+#define atomic_inc atomic_inc
+#endif
+
+#ifndef atomic_inc_return_relaxed
+#ifdef atomic_inc_return
+#define atomic_inc_return_acquire atomic_inc_return
+#define atomic_inc_return_release atomic_inc_return
+#define atomic_inc_return_relaxed atomic_inc_return
+#endif /* atomic_inc_return */
+
+#ifndef atomic_inc_return
+static inline int
+atomic_inc_return(atomic_t *v)
+{
+	return atomic_add_return(1, v);
+}
+#define atomic_inc_return atomic_inc_return
+#endif
+
+#ifndef atomic_inc_return_acquire
+static inline int
+atomic_inc_return_acquire(atomic_t *v)
+{
+	return atomic_add_return_acquire(1, v);
+}
+#define atomic_inc_return_acquire atomic_inc_return_acquire
+#endif
+
+#ifndef atomic_inc_return_release
+static inline int
+atomic_inc_return_release(atomic_t *v)
+{
+	return atomic_add_return_release(1, v);
+}
+#define atomic_inc_return_release atomic_inc_return_release
+#endif
+
+#ifndef atomic_inc_return_relaxed
+static inline int
+atomic_inc_return_relaxed(atomic_t *v)
+{
+	return atomic_add_return_relaxed(1, v);
+}
+#define atomic_inc_return_relaxed atomic_inc_return_relaxed
+#endif
+
+#else /* atomic_inc_return_relaxed */
+
+#ifndef atomic_inc_return_acquire
+static inline int
+atomic_inc_return_acquire(atomic_t *v)
+{
+	int ret = atomic_inc_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_inc_return_acquire atomic_inc_return_acquire
+#endif
+
+#ifndef atomic_inc_return_release
+static inline int
+atomic_inc_return_release(atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_inc_return_relaxed(v);
+}
+#define atomic_inc_return_release atomic_inc_return_release
+#endif
+
+#ifndef atomic_inc_return
+static inline int
+atomic_inc_return(atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_inc_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_inc_return atomic_inc_return
+#endif
+
+#endif /* atomic_inc_return_relaxed */
+
+#ifndef atomic_fetch_inc_relaxed
+#ifdef atomic_fetch_inc
+#define atomic_fetch_inc_acquire atomic_fetch_inc
+#define atomic_fetch_inc_release atomic_fetch_inc
+#define atomic_fetch_inc_relaxed atomic_fetch_inc
+#endif /* atomic_fetch_inc */
+
+#ifndef atomic_fetch_inc
+static inline int
+atomic_fetch_inc(atomic_t *v)
+{
+	return atomic_fetch_add(1, v);
+}
+#define atomic_fetch_inc atomic_fetch_inc
+#endif
+
+#ifndef atomic_fetch_inc_acquire
+static inline int
+atomic_fetch_inc_acquire(atomic_t *v)
+{
+	return atomic_fetch_add_acquire(1, v);
+}
+#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
+#endif
+
+#ifndef atomic_fetch_inc_release
+static inline int
+atomic_fetch_inc_release(atomic_t *v)
+{
+	return atomic_fetch_add_release(1, v);
+}
+#define atomic_fetch_inc_release atomic_fetch_inc_release
+#endif
+
+#ifndef atomic_fetch_inc_relaxed
+static inline int
+atomic_fetch_inc_relaxed(atomic_t *v)
+{
+	return atomic_fetch_add_relaxed(1, v);
+}
+#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
+#endif
+
+#else /* atomic_fetch_inc_relaxed */
+
+#ifndef atomic_fetch_inc_acquire
+static inline int
+atomic_fetch_inc_acquire(atomic_t *v)
+{
+	int ret = atomic_fetch_inc_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
+#endif
+
+#ifndef atomic_fetch_inc_release
+static inline int
+atomic_fetch_inc_release(atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_inc_relaxed(v);
+}
+#define atomic_fetch_inc_release atomic_fetch_inc_release
+#endif
+
+#ifndef atomic_fetch_inc
+static inline int
+atomic_fetch_inc(atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_inc_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_inc atomic_fetch_inc
+#endif
+
+#endif /* atomic_fetch_inc_relaxed */
+
+#ifndef atomic_dec
+static inline void
+atomic_dec(atomic_t *v)
+{
+	atomic_sub(1, v);
+}
+#define atomic_dec atomic_dec
+#endif
+
+#ifndef atomic_dec_return_relaxed
+#ifdef atomic_dec_return
+#define atomic_dec_return_acquire atomic_dec_return
+#define atomic_dec_return_release atomic_dec_return
+#define atomic_dec_return_relaxed atomic_dec_return
+#endif /* atomic_dec_return */
+
+#ifndef atomic_dec_return
+static inline int
+atomic_dec_return(atomic_t *v)
+{
+	return atomic_sub_return(1, v);
+}
+#define atomic_dec_return atomic_dec_return
+#endif
+
+#ifndef atomic_dec_return_acquire
+static inline int
+atomic_dec_return_acquire(atomic_t *v)
+{
+	return atomic_sub_return_acquire(1, v);
+}
+#define atomic_dec_return_acquire atomic_dec_return_acquire
+#endif
+
+#ifndef atomic_dec_return_release
+static inline int
+atomic_dec_return_release(atomic_t *v)
+{
+	return atomic_sub_return_release(1, v);
+}
+#define atomic_dec_return_release atomic_dec_return_release
+#endif
+
+#ifndef atomic_dec_return_relaxed
+static inline int
+atomic_dec_return_relaxed(atomic_t *v)
+{
+	return atomic_sub_return_relaxed(1, v);
+}
+#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+#endif
+
+#else /* atomic_dec_return_relaxed */
+
+#ifndef atomic_dec_return_acquire
+static inline int
+atomic_dec_return_acquire(atomic_t *v)
+{
+	int ret = atomic_dec_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_dec_return_acquire atomic_dec_return_acquire
+#endif
+
+#ifndef atomic_dec_return_release
+static inline int
+atomic_dec_return_release(atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_dec_return_relaxed(v);
+}
+#define atomic_dec_return_release atomic_dec_return_release
+#endif
+
+#ifndef atomic_dec_return
+static inline int
+atomic_dec_return(atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_dec_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_dec_return atomic_dec_return
+#endif
+
+#endif /* atomic_dec_return_relaxed */
+
+#ifndef atomic_fetch_dec_relaxed
+#ifdef atomic_fetch_dec
+#define atomic_fetch_dec_acquire atomic_fetch_dec
+#define atomic_fetch_dec_release atomic_fetch_dec
+#define atomic_fetch_dec_relaxed atomic_fetch_dec
+#endif /* atomic_fetch_dec */
+
+#ifndef atomic_fetch_dec
+static inline int
+atomic_fetch_dec(atomic_t *v)
+{
+	return atomic_fetch_sub(1, v);
+}
+#define atomic_fetch_dec atomic_fetch_dec
+#endif
+
+#ifndef atomic_fetch_dec_acquire
+static inline int
+atomic_fetch_dec_acquire(atomic_t *v)
+{
+	return atomic_fetch_sub_acquire(1, v);
+}
+#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
+#endif
+
+#ifndef atomic_fetch_dec_release
+static inline int
+atomic_fetch_dec_release(atomic_t *v)
+{
+	return atomic_fetch_sub_release(1, v);
+}
+#define atomic_fetch_dec_release atomic_fetch_dec_release
+#endif
+
+#ifndef atomic_fetch_dec_relaxed
+static inline int
+atomic_fetch_dec_relaxed(atomic_t *v)
+{
+	return atomic_fetch_sub_relaxed(1, v);
+}
+#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
+#endif
+
+#else /* atomic_fetch_dec_relaxed */
+
+#ifndef atomic_fetch_dec_acquire
+static inline int
+atomic_fetch_dec_acquire(atomic_t *v)
+{
+	int ret = atomic_fetch_dec_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
+#endif
+
+#ifndef atomic_fetch_dec_release
+static inline int
+atomic_fetch_dec_release(atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_dec_relaxed(v);
+}
+#define atomic_fetch_dec_release atomic_fetch_dec_release
+#endif
+
+#ifndef atomic_fetch_dec
+static inline int
+atomic_fetch_dec(atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_dec_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_dec atomic_fetch_dec
+#endif
+
+#endif /* atomic_fetch_dec_relaxed */
+
+#ifndef atomic_fetch_and_relaxed
+#define atomic_fetch_and_acquire atomic_fetch_and
+#define atomic_fetch_and_release atomic_fetch_and
+#define atomic_fetch_and_relaxed atomic_fetch_and
+#else /* atomic_fetch_and_relaxed */
+
+#ifndef atomic_fetch_and_acquire
+static inline int
+atomic_fetch_and_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_and_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_and_acquire atomic_fetch_and_acquire
+#endif
+
+#ifndef atomic_fetch_and_release
+static inline int
+atomic_fetch_and_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_and_relaxed(i, v);
+}
+#define atomic_fetch_and_release atomic_fetch_and_release
+#endif
+
+#ifndef atomic_fetch_and
+static inline int
+atomic_fetch_and(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_and_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_and atomic_fetch_and
+#endif
+
+#endif /* atomic_fetch_and_relaxed */
+
+#ifndef atomic_andnot
+static inline void
+atomic_andnot(int i, atomic_t *v)
+{
+	atomic_and(~i, v);
+}
+#define atomic_andnot atomic_andnot
+#endif
+
+#ifndef atomic_fetch_andnot_relaxed
+#ifdef atomic_fetch_andnot
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot
+#define atomic_fetch_andnot_release atomic_fetch_andnot
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot
+#endif /* atomic_fetch_andnot */
+
+#ifndef atomic_fetch_andnot
+static inline int
+atomic_fetch_andnot(int i, atomic_t *v)
+{
+	return atomic_fetch_and(~i, v);
+}
+#define atomic_fetch_andnot atomic_fetch_andnot
+#endif
+
+#ifndef atomic_fetch_andnot_acquire
+static inline int
+atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	return atomic_fetch_and_acquire(~i, v);
+}
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
+#endif
+
+#ifndef atomic_fetch_andnot_release
+static inline int
+atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+	return atomic_fetch_and_release(~i, v);
+}
+#define atomic_fetch_andnot_release atomic_fetch_andnot_release
+#endif
+
+#ifndef atomic_fetch_andnot_relaxed
+static inline int
+atomic_fetch_andnot_relaxed(int i, atomic_t *v)
+{
+	return atomic_fetch_and_relaxed(~i, v);
+}
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#endif
+
+#else /* atomic_fetch_andnot_relaxed */
+
+#ifndef atomic_fetch_andnot_acquire
+static inline int
+atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_andnot_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
+#endif
+
+#ifndef atomic_fetch_andnot_release
+static inline int
+atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_andnot_relaxed(i, v);
+}
+#define atomic_fetch_andnot_release atomic_fetch_andnot_release
+#endif
+
+#ifndef atomic_fetch_andnot
+static inline int
+atomic_fetch_andnot(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_andnot_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_andnot atomic_fetch_andnot
+#endif
+
+#endif /* atomic_fetch_andnot_relaxed */
+
+#ifndef atomic_fetch_or_relaxed
+#define atomic_fetch_or_acquire atomic_fetch_or
+#define atomic_fetch_or_release atomic_fetch_or
+#define atomic_fetch_or_relaxed atomic_fetch_or
+#else /* atomic_fetch_or_relaxed */
+
+#ifndef atomic_fetch_or_acquire
+static inline int
+atomic_fetch_or_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_or_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_or_acquire atomic_fetch_or_acquire
+#endif
+
+#ifndef atomic_fetch_or_release
+static inline int
+atomic_fetch_or_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_or_relaxed(i, v);
+}
+#define atomic_fetch_or_release atomic_fetch_or_release
+#endif
+
+#ifndef atomic_fetch_or
+static inline int
+atomic_fetch_or(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_or_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_or atomic_fetch_or
+#endif
+
+#endif /* atomic_fetch_or_relaxed */
+
+#ifndef atomic_fetch_xor_relaxed
+#define atomic_fetch_xor_acquire atomic_fetch_xor
+#define atomic_fetch_xor_release atomic_fetch_xor
+#define atomic_fetch_xor_relaxed atomic_fetch_xor
+#else /* atomic_fetch_xor_relaxed */
+
+#ifndef atomic_fetch_xor_acquire
+static inline int
+atomic_fetch_xor_acquire(int i, atomic_t *v)
+{
+	int ret = atomic_fetch_xor_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
+#endif
+
+#ifndef atomic_fetch_xor_release
+static inline int
+atomic_fetch_xor_release(int i, atomic_t *v)
+{
+	__atomic_release_fence();
+	return atomic_fetch_xor_relaxed(i, v);
+}
+#define atomic_fetch_xor_release atomic_fetch_xor_release
+#endif
+
+#ifndef atomic_fetch_xor
+static inline int
+atomic_fetch_xor(int i, atomic_t *v)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_fetch_xor_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_fetch_xor atomic_fetch_xor
+#endif
+
+#endif /* atomic_fetch_xor_relaxed */
+
+#ifndef atomic_xchg_relaxed
+#define atomic_xchg_acquire atomic_xchg
+#define atomic_xchg_release atomic_xchg
+#define atomic_xchg_relaxed atomic_xchg
+#else /* atomic_xchg_relaxed */
+
+#ifndef atomic_xchg_acquire
+static inline int
+atomic_xchg_acquire(atomic_t *v, int i)
+{
+	int ret = atomic_xchg_relaxed(v, i);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_xchg_acquire atomic_xchg_acquire
+#endif
+
+#ifndef atomic_xchg_release
+static inline int
+atomic_xchg_release(atomic_t *v, int i)
+{
+	__atomic_release_fence();
+	return atomic_xchg_relaxed(v, i);
+}
+#define atomic_xchg_release atomic_xchg_release
+#endif
+
+#ifndef atomic_xchg
+static inline int
+atomic_xchg(atomic_t *v, int i)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_xchg_relaxed(v, i);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_xchg atomic_xchg
+#endif
+
+#endif /* atomic_xchg_relaxed */
+
+#ifndef atomic_cmpxchg_relaxed
+#define atomic_cmpxchg_acquire atomic_cmpxchg
+#define atomic_cmpxchg_release atomic_cmpxchg
+#define atomic_cmpxchg_relaxed atomic_cmpxchg
+#else /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic_cmpxchg_acquire
+static inline int
+atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+{
+	int ret = atomic_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
+#endif
+
+#ifndef atomic_cmpxchg_release
+static inline int
+atomic_cmpxchg_release(atomic_t *v, int old, int new)
+{
+	__atomic_release_fence();
+	return atomic_cmpxchg_relaxed(v, old, new);
+}
+#define atomic_cmpxchg_release atomic_cmpxchg_release
+#endif
+
+#ifndef atomic_cmpxchg
+static inline int
+atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	int ret;
+	__atomic_pre_full_fence();
+	ret = atomic_cmpxchg_relaxed(v, old, new);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_cmpxchg atomic_cmpxchg
+#endif
+
+#endif /* atomic_cmpxchg_relaxed */
+
+#ifndef atomic_try_cmpxchg_relaxed
+#ifdef atomic_try_cmpxchg
+#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg
+#define atomic_try_cmpxchg_release atomic_try_cmpxchg
+#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg
+#endif /* atomic_try_cmpxchg */
+
+#ifndef atomic_try_cmpxchg
+static inline bool
+atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	int r, o = *old;
+	r = atomic_cmpxchg(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic_try_cmpxchg atomic_try_cmpxchg
+#endif
+
+#ifndef atomic_try_cmpxchg_acquire
+static inline bool
+atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+	int r, o = *old;
+	r = atomic_cmpxchg_acquire(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
+#endif
+
+#ifndef atomic_try_cmpxchg_release
+static inline bool
+atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+	int r, o = *old;
+	r = atomic_cmpxchg_release(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
+#endif
+
+#ifndef atomic_try_cmpxchg_relaxed
+static inline bool
+atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
+{
+	int r, o = *old;
+	r = atomic_cmpxchg_relaxed(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
+#endif
+
+#else /* atomic_try_cmpxchg_relaxed */
+
+#ifndef atomic_try_cmpxchg_acquire
+static inline bool
+atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+	bool ret = atomic_try_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
+#endif
+
+#ifndef atomic_try_cmpxchg_release
+static inline bool
+atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+	__atomic_release_fence();
+	return atomic_try_cmpxchg_relaxed(v, old, new);
+}
+#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
+#endif
+
+#ifndef atomic_try_cmpxchg
+static inline bool
+atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	bool ret;
+	__atomic_pre_full_fence();
+	ret = atomic_try_cmpxchg_relaxed(v, old, new);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic_try_cmpxchg atomic_try_cmpxchg
+#endif
+
+#endif /* atomic_try_cmpxchg_relaxed */
+
+#ifndef atomic_sub_and_test
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline bool
+atomic_sub_and_test(int i, atomic_t *v)
+{
+	return atomic_sub_return(i, v) == 0;
+}
+#define atomic_sub_and_test atomic_sub_and_test
+#endif
+
+#ifndef atomic_dec_and_test
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline bool
+atomic_dec_and_test(atomic_t *v)
+{
+	return atomic_dec_return(v) == 0;
+}
+#define atomic_dec_and_test atomic_dec_and_test
+#endif
+
+#ifndef atomic_inc_and_test
+/**
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline bool
+atomic_inc_and_test(atomic_t *v)
+{
+	return atomic_inc_return(v) == 0;
+}
+#define atomic_inc_and_test atomic_inc_and_test
+#endif
+
+#ifndef atomic_add_negative
+/**
+ * atomic_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline bool
+atomic_add_negative(int i, atomic_t *v)
+{
+	return atomic_add_return(i, v) < 0;
+}
+#define atomic_add_negative atomic_add_negative
+#endif
+
+#ifndef atomic_fetch_add_unless
+/**
+ * atomic_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns original value of @v
+ */
+static inline int
+atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#define atomic_fetch_add_unless atomic_fetch_add_unless
+#endif
+
+#ifndef atomic_add_unless
+/**
+ * atomic_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+static inline bool
+atomic_add_unless(atomic_t *v, int a, int u)
+{
+	return atomic_fetch_add_unless(v, a, u) != u;
+}
+#define atomic_add_unless atomic_add_unless
+#endif
+
+#ifndef atomic_inc_not_zero
+/**
+ * atomic_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+static inline bool
+atomic_inc_not_zero(atomic_t *v)
+{
+	return atomic_add_unless(v, 1, 0);
+}
+#define atomic_inc_not_zero atomic_inc_not_zero
+#endif
+
+#ifndef atomic_inc_unless_negative
+static inline bool
+atomic_inc_unless_negative(atomic_t *v)
+{
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+	return true;
+}
+#define atomic_inc_unless_negative atomic_inc_unless_negative
+#endif
+
+#ifndef atomic_dec_unless_positive
+static inline bool
+atomic_dec_unless_positive(atomic_t *v)
+{
+	int c = atomic_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic_try_cmpxchg(v, &c, c - 1));
+
+	return true;
+}
+#define atomic_dec_unless_positive atomic_dec_unless_positive
+#endif
+
+#ifndef atomic_dec_if_positive
+static inline int
+atomic_dec_if_positive(atomic_t *v)
+{
+	int dec, c = atomic_read(v);
+
+	do {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+	} while (!atomic_try_cmpxchg(v, &c, dec));
+
+	return dec;
+}
+#define atomic_dec_if_positive atomic_dec_if_positive
+#endif
+
+#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#include <asm-generic/atomic64.h>
+#endif
+
+#ifndef atomic64_read_acquire
+static inline s64
+atomic64_read_acquire(const atomic64_t *v)
+{
+	return smp_load_acquire(&(v)->counter);
+}
+#define atomic64_read_acquire atomic64_read_acquire
+#endif
+
+#ifndef atomic64_set_release
+static inline void
+atomic64_set_release(atomic64_t *v, s64 i)
+{
+	smp_store_release(&(v)->counter, i);
+}
+#define atomic64_set_release atomic64_set_release
+#endif
+
+#ifndef atomic64_add_return_relaxed
+#define atomic64_add_return_acquire atomic64_add_return
+#define atomic64_add_return_release atomic64_add_return
+#define atomic64_add_return_relaxed atomic64_add_return
+#else /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_add_return_acquire
+static inline s64
+atomic64_add_return_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_add_return_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_add_return_acquire atomic64_add_return_acquire
+#endif
+
+#ifndef atomic64_add_return_release
+static inline s64
+atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_add_return_relaxed(i, v);
+}
+#define atomic64_add_return_release atomic64_add_return_release
+#endif
+
+#ifndef atomic64_add_return
+static inline s64
+atomic64_add_return(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_add_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_add_return atomic64_add_return
+#endif
+
+#endif /* atomic64_add_return_relaxed */
+
+#ifndef atomic64_fetch_add_relaxed
+#define atomic64_fetch_add_acquire atomic64_fetch_add
+#define atomic64_fetch_add_release atomic64_fetch_add
+#define atomic64_fetch_add_relaxed atomic64_fetch_add
+#else /* atomic64_fetch_add_relaxed */
+
+#ifndef atomic64_fetch_add_acquire
+static inline s64
+atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_add_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
+#endif
+
+#ifndef atomic64_fetch_add_release
+static inline s64
+atomic64_fetch_add_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_add_relaxed(i, v);
+}
+#define atomic64_fetch_add_release atomic64_fetch_add_release
+#endif
+
+#ifndef atomic64_fetch_add
+static inline s64
+atomic64_fetch_add(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_add_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_add atomic64_fetch_add
+#endif
+
+#endif /* atomic64_fetch_add_relaxed */
+
+#ifndef atomic64_sub_return_relaxed
+#define atomic64_sub_return_acquire atomic64_sub_return
+#define atomic64_sub_return_release atomic64_sub_return
+#define atomic64_sub_return_relaxed atomic64_sub_return
+#else /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_sub_return_acquire
+static inline s64
+atomic64_sub_return_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_sub_return_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_sub_return_acquire atomic64_sub_return_acquire
+#endif
+
+#ifndef atomic64_sub_return_release
+static inline s64
+atomic64_sub_return_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_sub_return_relaxed(i, v);
+}
+#define atomic64_sub_return_release atomic64_sub_return_release
+#endif
+
+#ifndef atomic64_sub_return
+static inline s64
+atomic64_sub_return(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_sub_return_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_sub_return atomic64_sub_return
+#endif
+
+#endif /* atomic64_sub_return_relaxed */
+
+#ifndef atomic64_fetch_sub_relaxed
+#define atomic64_fetch_sub_acquire atomic64_fetch_sub
+#define atomic64_fetch_sub_release atomic64_fetch_sub
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub
+#else /* atomic64_fetch_sub_relaxed */
+
+#ifndef atomic64_fetch_sub_acquire
+static inline s64
+atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_sub_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
+#endif
+
+#ifndef atomic64_fetch_sub_release
+static inline s64
+atomic64_fetch_sub_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_sub_relaxed(i, v);
+}
+#define atomic64_fetch_sub_release atomic64_fetch_sub_release
+#endif
+
+#ifndef atomic64_fetch_sub
+static inline s64
+atomic64_fetch_sub(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_sub_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_sub atomic64_fetch_sub
+#endif
+
+#endif /* atomic64_fetch_sub_relaxed */
+
+#ifndef atomic64_inc
+static inline void
+atomic64_inc(atomic64_t *v)
+{
+	atomic64_add(1, v);
+}
+#define atomic64_inc atomic64_inc
+#endif
+
+#ifndef atomic64_inc_return_relaxed
+#ifdef atomic64_inc_return
+#define atomic64_inc_return_acquire atomic64_inc_return
+#define atomic64_inc_return_release atomic64_inc_return
+#define atomic64_inc_return_relaxed atomic64_inc_return
+#endif /* atomic64_inc_return */
+
+#ifndef atomic64_inc_return
+static inline s64
+atomic64_inc_return(atomic64_t *v)
+{
+	return atomic64_add_return(1, v);
+}
+#define atomic64_inc_return atomic64_inc_return
+#endif
+
+#ifndef atomic64_inc_return_acquire
+static inline s64
+atomic64_inc_return_acquire(atomic64_t *v)
+{
+	return atomic64_add_return_acquire(1, v);
+}
+#define atomic64_inc_return_acquire atomic64_inc_return_acquire
+#endif
+
+#ifndef atomic64_inc_return_release
+static inline s64
+atomic64_inc_return_release(atomic64_t *v)
+{
+	return atomic64_add_return_release(1, v);
+}
+#define atomic64_inc_return_release atomic64_inc_return_release
+#endif
+
+#ifndef atomic64_inc_return_relaxed
+static inline s64
+atomic64_inc_return_relaxed(atomic64_t *v)
+{
+	return atomic64_add_return_relaxed(1, v);
+}
+#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
+#endif
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+static inline s64
+atomic64_inc_return_acquire(atomic64_t *v)
+{
+	s64 ret = atomic64_inc_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_inc_return_acquire atomic64_inc_return_acquire
+#endif
+
+#ifndef atomic64_inc_return_release
+static inline s64
+atomic64_inc_return_release(atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_inc_return_relaxed(v);
+}
+#define atomic64_inc_return_release atomic64_inc_return_release
+#endif
+
+#ifndef atomic64_inc_return
+static inline s64
+atomic64_inc_return(atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_inc_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_inc_return atomic64_inc_return
+#endif
+
+#endif /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_fetch_inc_relaxed
+#ifdef atomic64_fetch_inc
+#define atomic64_fetch_inc_acquire atomic64_fetch_inc
+#define atomic64_fetch_inc_release atomic64_fetch_inc
+#define atomic64_fetch_inc_relaxed atomic64_fetch_inc
+#endif /* atomic64_fetch_inc */
+
+#ifndef atomic64_fetch_inc
+static inline s64
+atomic64_fetch_inc(atomic64_t *v)
+{
+	return atomic64_fetch_add(1, v);
+}
+#define atomic64_fetch_inc atomic64_fetch_inc
+#endif
+
+#ifndef atomic64_fetch_inc_acquire
+static inline s64
+atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+	return atomic64_fetch_add_acquire(1, v);
+}
+#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
+#endif
+
+#ifndef atomic64_fetch_inc_release
+static inline s64
+atomic64_fetch_inc_release(atomic64_t *v)
+{
+	return atomic64_fetch_add_release(1, v);
+}
+#define atomic64_fetch_inc_release atomic64_fetch_inc_release
+#endif
+
+#ifndef atomic64_fetch_inc_relaxed
+static inline s64
+atomic64_fetch_inc_relaxed(atomic64_t *v)
+{
+	return atomic64_fetch_add_relaxed(1, v);
+}
+#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
+#endif
+
+#else /* atomic64_fetch_inc_relaxed */
+
+#ifndef atomic64_fetch_inc_acquire
+static inline s64
+atomic64_fetch_inc_acquire(atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_inc_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
+#endif
+
+#ifndef atomic64_fetch_inc_release
+static inline s64
+atomic64_fetch_inc_release(atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_inc_relaxed(v);
+}
+#define atomic64_fetch_inc_release atomic64_fetch_inc_release
+#endif
+
+#ifndef atomic64_fetch_inc
+static inline s64
+atomic64_fetch_inc(atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_inc_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_inc atomic64_fetch_inc
+#endif
+
+#endif /* atomic64_fetch_inc_relaxed */
+
+#ifndef atomic64_dec
+static inline void
+atomic64_dec(atomic64_t *v)
+{
+	atomic64_sub(1, v);
+}
+#define atomic64_dec atomic64_dec
+#endif
+
+#ifndef atomic64_dec_return_relaxed
+#ifdef atomic64_dec_return
+#define atomic64_dec_return_acquire atomic64_dec_return
+#define atomic64_dec_return_release atomic64_dec_return
+#define atomic64_dec_return_relaxed atomic64_dec_return
+#endif /* atomic64_dec_return */
+
+#ifndef atomic64_dec_return
+static inline s64
+atomic64_dec_return(atomic64_t *v)
+{
+	return atomic64_sub_return(1, v);
+}
+#define atomic64_dec_return atomic64_dec_return
+#endif
+
+#ifndef atomic64_dec_return_acquire
+static inline s64
+atomic64_dec_return_acquire(atomic64_t *v)
+{
+	return atomic64_sub_return_acquire(1, v);
+}
+#define atomic64_dec_return_acquire atomic64_dec_return_acquire
+#endif
+
+#ifndef atomic64_dec_return_release
+static inline s64
+atomic64_dec_return_release(atomic64_t *v)
+{
+	return atomic64_sub_return_release(1, v);
+}
+#define atomic64_dec_return_release atomic64_dec_return_release
+#endif
+
+#ifndef atomic64_dec_return_relaxed
+static inline s64
+atomic64_dec_return_relaxed(atomic64_t *v)
+{
+	return atomic64_sub_return_relaxed(1, v);
+}
+#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#endif
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+static inline s64
+atomic64_dec_return_acquire(atomic64_t *v)
+{
+	s64 ret = atomic64_dec_return_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_dec_return_acquire atomic64_dec_return_acquire
+#endif
+
+#ifndef atomic64_dec_return_release
+static inline s64
+atomic64_dec_return_release(atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_dec_return_relaxed(v);
+}
+#define atomic64_dec_return_release atomic64_dec_return_release
+#endif
+
+#ifndef atomic64_dec_return
+static inline s64
+atomic64_dec_return(atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_dec_return_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_dec_return atomic64_dec_return
+#endif
+
+#endif /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_fetch_dec_relaxed
+#ifdef atomic64_fetch_dec
+#define atomic64_fetch_dec_acquire atomic64_fetch_dec
+#define atomic64_fetch_dec_release atomic64_fetch_dec
+#define atomic64_fetch_dec_relaxed atomic64_fetch_dec
+#endif /* atomic64_fetch_dec */
+
+#ifndef atomic64_fetch_dec
+static inline s64
+atomic64_fetch_dec(atomic64_t *v)
+{
+	return atomic64_fetch_sub(1, v);
+}
+#define atomic64_fetch_dec atomic64_fetch_dec
+#endif
+
+#ifndef atomic64_fetch_dec_acquire
+static inline s64
+atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+	return atomic64_fetch_sub_acquire(1, v);
+}
+#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
+#endif
+
+#ifndef atomic64_fetch_dec_release
+static inline s64
+atomic64_fetch_dec_release(atomic64_t *v)
+{
+	return atomic64_fetch_sub_release(1, v);
+}
+#define atomic64_fetch_dec_release atomic64_fetch_dec_release
+#endif
+
+#ifndef atomic64_fetch_dec_relaxed
+static inline s64
+atomic64_fetch_dec_relaxed(atomic64_t *v)
+{
+	return atomic64_fetch_sub_relaxed(1, v);
+}
+#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
+#endif
+
+#else /* atomic64_fetch_dec_relaxed */
+
+#ifndef atomic64_fetch_dec_acquire
+static inline s64
+atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_dec_relaxed(v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
+#endif
+
+#ifndef atomic64_fetch_dec_release
+static inline s64
+atomic64_fetch_dec_release(atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_dec_relaxed(v);
+}
+#define atomic64_fetch_dec_release atomic64_fetch_dec_release
+#endif
+
+#ifndef atomic64_fetch_dec
+static inline s64
+atomic64_fetch_dec(atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_dec_relaxed(v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_dec atomic64_fetch_dec
+#endif
+
+#endif /* atomic64_fetch_dec_relaxed */
+
+#ifndef atomic64_fetch_and_relaxed
+#define atomic64_fetch_and_acquire atomic64_fetch_and
+#define atomic64_fetch_and_release atomic64_fetch_and
+#define atomic64_fetch_and_relaxed atomic64_fetch_and
+#else /* atomic64_fetch_and_relaxed */
+
+#ifndef atomic64_fetch_and_acquire
+static inline s64
+atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_and_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
+#endif
+
+#ifndef atomic64_fetch_and_release
+static inline s64
+atomic64_fetch_and_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_and_relaxed(i, v);
+}
+#define atomic64_fetch_and_release atomic64_fetch_and_release
+#endif
+
+#ifndef atomic64_fetch_and
+static inline s64
+atomic64_fetch_and(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_and_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_and atomic64_fetch_and
+#endif
+
+#endif /* atomic64_fetch_and_relaxed */
+
+#ifndef atomic64_andnot
+static inline void
+atomic64_andnot(s64 i, atomic64_t *v)
+{
+	atomic64_and(~i, v);
+}
+#define atomic64_andnot atomic64_andnot
+#endif
+
+#ifndef atomic64_fetch_andnot_relaxed
+#ifdef atomic64_fetch_andnot
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot
+#endif /* atomic64_fetch_andnot */
+
+#ifndef atomic64_fetch_andnot
+static inline s64
+atomic64_fetch_andnot(s64 i, atomic64_t *v)
+{
+	return atomic64_fetch_and(~i, v);
+}
+#define atomic64_fetch_andnot atomic64_fetch_andnot
+#endif
+
+#ifndef atomic64_fetch_andnot_acquire
+static inline s64
+atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+{
+	return atomic64_fetch_and_acquire(~i, v);
+}
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
+#endif
+
+#ifndef atomic64_fetch_andnot_release
+static inline s64
+atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+	return atomic64_fetch_and_release(~i, v);
+}
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
+#endif
+
+#ifndef atomic64_fetch_andnot_relaxed
+static inline s64
+atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
+{
+	return atomic64_fetch_and_relaxed(~i, v);
+}
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
+#endif
+
+#else /* atomic64_fetch_andnot_relaxed */
+
+#ifndef atomic64_fetch_andnot_acquire
+static inline s64
+atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_andnot_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
+#endif
+
+#ifndef atomic64_fetch_andnot_release
+static inline s64
+atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_andnot_relaxed(i, v);
+}
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
+#endif
+
+#ifndef atomic64_fetch_andnot
+static inline s64
+atomic64_fetch_andnot(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_andnot_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_andnot atomic64_fetch_andnot
+#endif
+
+#endif /* atomic64_fetch_andnot_relaxed */
+
+#ifndef atomic64_fetch_or_relaxed
+#define atomic64_fetch_or_acquire atomic64_fetch_or
+#define atomic64_fetch_or_release atomic64_fetch_or
+#define atomic64_fetch_or_relaxed atomic64_fetch_or
+#else /* atomic64_fetch_or_relaxed */
+
+#ifndef atomic64_fetch_or_acquire
+static inline s64
+atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_or_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
+#endif
+
+#ifndef atomic64_fetch_or_release
+static inline s64
+atomic64_fetch_or_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_or_relaxed(i, v);
+}
+#define atomic64_fetch_or_release atomic64_fetch_or_release
+#endif
+
+#ifndef atomic64_fetch_or
+static inline s64
+atomic64_fetch_or(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_or_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_or atomic64_fetch_or
+#endif
+
+#endif /* atomic64_fetch_or_relaxed */
+
+#ifndef atomic64_fetch_xor_relaxed
+#define atomic64_fetch_xor_acquire atomic64_fetch_xor
+#define atomic64_fetch_xor_release atomic64_fetch_xor
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor
+#else /* atomic64_fetch_xor_relaxed */
+
+#ifndef atomic64_fetch_xor_acquire
+static inline s64
+atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
+{
+	s64 ret = atomic64_fetch_xor_relaxed(i, v);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
+#endif
+
+#ifndef atomic64_fetch_xor_release
+static inline s64
+atomic64_fetch_xor_release(s64 i, atomic64_t *v)
+{
+	__atomic_release_fence();
+	return atomic64_fetch_xor_relaxed(i, v);
+}
+#define atomic64_fetch_xor_release atomic64_fetch_xor_release
+#endif
+
+#ifndef atomic64_fetch_xor
+static inline s64
+atomic64_fetch_xor(s64 i, atomic64_t *v)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_fetch_xor_relaxed(i, v);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_fetch_xor atomic64_fetch_xor
+#endif
+
+#endif /* atomic64_fetch_xor_relaxed */
+
+#ifndef atomic64_xchg_relaxed
+#define atomic64_xchg_acquire atomic64_xchg
+#define atomic64_xchg_release atomic64_xchg
+#define atomic64_xchg_relaxed atomic64_xchg
+#else /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_xchg_acquire
+static inline s64
+atomic64_xchg_acquire(atomic64_t *v, s64 i)
+{
+	s64 ret = atomic64_xchg_relaxed(v, i);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_xchg_acquire atomic64_xchg_acquire
+#endif
+
+#ifndef atomic64_xchg_release
+static inline s64
+atomic64_xchg_release(atomic64_t *v, s64 i)
+{
+	__atomic_release_fence();
+	return atomic64_xchg_relaxed(v, i);
+}
+#define atomic64_xchg_release atomic64_xchg_release
+#endif
+
+#ifndef atomic64_xchg
+static inline s64
+atomic64_xchg(atomic64_t *v, s64 i)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_xchg_relaxed(v, i);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_xchg atomic64_xchg
+#endif
+
+#endif /* atomic64_xchg_relaxed */
+
+#ifndef atomic64_cmpxchg_relaxed
+#define atomic64_cmpxchg_acquire atomic64_cmpxchg
+#define atomic64_cmpxchg_release atomic64_cmpxchg
+#define atomic64_cmpxchg_relaxed atomic64_cmpxchg
+#else /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_cmpxchg_acquire
+static inline s64
+atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
+{
+	s64 ret = atomic64_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
+#endif
+
+#ifndef atomic64_cmpxchg_release
+static inline s64
+atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
+{
+	__atomic_release_fence();
+	return atomic64_cmpxchg_relaxed(v, old, new);
+}
+#define atomic64_cmpxchg_release atomic64_cmpxchg_release
+#endif
+
+#ifndef atomic64_cmpxchg
+static inline s64
+atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+{
+	s64 ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_cmpxchg_relaxed(v, old, new);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_cmpxchg atomic64_cmpxchg
+#endif
+
+#endif /* atomic64_cmpxchg_relaxed */
+
+#ifndef atomic64_try_cmpxchg_relaxed
+#ifdef atomic64_try_cmpxchg
+#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg
+#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg
+#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg
+#endif /* atomic64_try_cmpxchg */
+
+#ifndef atomic64_try_cmpxchg
+static inline bool
+atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+	s64 r, o = *old;
+	r = atomic64_cmpxchg(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic64_try_cmpxchg atomic64_try_cmpxchg
+#endif
+
+#ifndef atomic64_try_cmpxchg_acquire
+static inline bool
+atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+	s64 r, o = *old;
+	r = atomic64_cmpxchg_acquire(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
+#endif
+
+#ifndef atomic64_try_cmpxchg_release
+static inline bool
+atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+	s64 r, o = *old;
+	r = atomic64_cmpxchg_release(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
+#endif
+
+#ifndef atomic64_try_cmpxchg_relaxed
+static inline bool
+atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
+{
+	s64 r, o = *old;
+	r = atomic64_cmpxchg_relaxed(v, o, new);
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
+#endif
+
+#else /* atomic64_try_cmpxchg_relaxed */
+
+#ifndef atomic64_try_cmpxchg_acquire
+static inline bool
+atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
+{
+	bool ret = atomic64_try_cmpxchg_relaxed(v, old, new);
+	__atomic_acquire_fence();
+	return ret;
+}
+#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
+#endif
+
+#ifndef atomic64_try_cmpxchg_release
+static inline bool
+atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
+{
+	__atomic_release_fence();
+	return atomic64_try_cmpxchg_relaxed(v, old, new);
+}
+#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
+#endif
+
+#ifndef atomic64_try_cmpxchg
+static inline bool
+atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+	bool ret;
+	__atomic_pre_full_fence();
+	ret = atomic64_try_cmpxchg_relaxed(v, old, new);
+	__atomic_post_full_fence();
+	return ret;
+}
+#define atomic64_try_cmpxchg atomic64_try_cmpxchg
+#endif
+
+#endif /* atomic64_try_cmpxchg_relaxed */
+
+#ifndef atomic64_sub_and_test
+/**
+ * atomic64_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.
+ */
+static inline bool
+atomic64_sub_and_test(s64 i, atomic64_t *v)
+{
+	return atomic64_sub_return(i, v) == 0;
+}
+#define atomic64_sub_and_test atomic64_sub_and_test
+#endif
+
+#ifndef atomic64_dec_and_test
+/**
+ * atomic64_dec_and_test - decrement and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline bool
+atomic64_dec_and_test(atomic64_t *v)
+{
+	return atomic64_dec_return(v) == 0;
+}
+#define atomic64_dec_and_test atomic64_dec_and_test
+#endif
+
+#ifndef atomic64_inc_and_test
+/**
+ * atomic64_inc_and_test - increment and test
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+static inline bool
+atomic64_inc_and_test(atomic64_t *v)
+{
+	return atomic64_inc_return(v) == 0;
+}
+#define atomic64_inc_and_test atomic64_inc_and_test
+#endif
+
+#ifndef atomic64_add_negative
+/**
+ * atomic64_add_negative - add and test if negative
+ * @i: integer value to add
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.
+ */
+static inline bool
+atomic64_add_negative(s64 i, atomic64_t *v)
+{
+	return atomic64_add_return(i, v) < 0;
+}
+#define atomic64_add_negative atomic64_add_negative
+#endif
+
+#ifndef atomic64_fetch_add_unless
+/**
+ * atomic64_fetch_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as @v was not already @u.
+ * Returns original value of @v
+ */
+static inline s64
+atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	s64 c = atomic64_read(v);
+
+	do {
+		if (unlikely(c == u))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, c + a));
+
+	return c;
+}
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#endif
+
+#ifndef atomic64_add_unless
+/**
+ * atomic64_add_unless - add unless the number is already a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, if @v was not already @u.
+ * Returns true if the addition was done.
+ */
+static inline bool
+atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	return atomic64_fetch_add_unless(v, a, u) != u;
+}
+#define atomic64_add_unless atomic64_add_unless
+#endif
+
+#ifndef atomic64_inc_not_zero
+/**
+ * atomic64_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic64_t
+ *
+ * Atomically increments @v by 1, if @v is non-zero.
+ * Returns true if the increment was done.
+ */
+static inline bool
+atomic64_inc_not_zero(atomic64_t *v)
+{
+	return atomic64_add_unless(v, 1, 0);
+}
+#define atomic64_inc_not_zero atomic64_inc_not_zero
+#endif
+
+#ifndef atomic64_inc_unless_negative
+static inline bool
+atomic64_inc_unless_negative(atomic64_t *v)
+{
+	s64 c = atomic64_read(v);
+
+	do {
+		if (unlikely(c < 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c + 1));
+
+	return true;
+}
+#define atomic64_inc_unless_negative atomic64_inc_unless_negative
+#endif
+
+#ifndef atomic64_dec_unless_positive
+static inline bool
+atomic64_dec_unless_positive(atomic64_t *v)
+{
+	s64 c = atomic64_read(v);
+
+	do {
+		if (unlikely(c > 0))
+			return false;
+	} while (!atomic64_try_cmpxchg(v, &c, c - 1));
+
+	return true;
+}
+#define atomic64_dec_unless_positive atomic64_dec_unless_positive
+#endif
+
+#ifndef atomic64_dec_if_positive
+static inline s64
+atomic64_dec_if_positive(atomic64_t *v)
+{
+	s64 dec, c = atomic64_read(v);
+
+	do {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+	} while (!atomic64_try_cmpxchg(v, &c, dec));
+
+	return dec;
+}
+#define atomic64_dec_if_positive atomic64_dec_if_positive
+#endif
+
+#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+#define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
+
+#endif /* _LINUX_ATOMIC_FALLBACK_H */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 1e8e88bdaf09..4c0d009a46f0 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -25,14 +25,6 @@
  * See Documentation/memory-barriers.txt for ACQUIRE/RELEASE definitions.
  */
 
-#ifndef atomic_read_acquire
-#define  atomic_read_acquire(v)		smp_load_acquire(&(v)->counter)
-#endif
-
-#ifndef atomic_set_release
-#define  atomic_set_release(v, i)	smp_store_release(&(v)->counter, (i))
-#endif
-
 /*
  * The idea here is to build acquire/release variants by adding explicit
  * barriers on top of the relaxed variant. In the case where the relaxed
@@ -79,1238 +71,7 @@
 	__ret;								\
 })
 
-/* atomic_add_return_relaxed */
-#ifndef atomic_add_return_relaxed
-#define  atomic_add_return_relaxed	atomic_add_return
-#define  atomic_add_return_acquire	atomic_add_return
-#define  atomic_add_return_release	atomic_add_return
-
-#else /* atomic_add_return_relaxed */
-
-#ifndef atomic_add_return_acquire
-#define  atomic_add_return_acquire(...)					\
-	__atomic_op_acquire(atomic_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_add_return_release
-#define  atomic_add_return_release(...)					\
-	__atomic_op_release(atomic_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_add_return
-#define  atomic_add_return(...)						\
-	__atomic_op_fence(atomic_add_return, __VA_ARGS__)
-#endif
-#endif /* atomic_add_return_relaxed */
-
-#ifndef atomic_inc
-#define atomic_inc(v)			atomic_add(1, (v))
-#endif
-
-/* atomic_inc_return_relaxed */
-#ifndef atomic_inc_return_relaxed
-
-#ifndef atomic_inc_return
-#define atomic_inc_return(v)		atomic_add_return(1, (v))
-#define atomic_inc_return_relaxed(v)	atomic_add_return_relaxed(1, (v))
-#define atomic_inc_return_acquire(v)	atomic_add_return_acquire(1, (v))
-#define atomic_inc_return_release(v)	atomic_add_return_release(1, (v))
-#else /* atomic_inc_return */
-#define  atomic_inc_return_relaxed	atomic_inc_return
-#define  atomic_inc_return_acquire	atomic_inc_return
-#define  atomic_inc_return_release	atomic_inc_return
-#endif /* atomic_inc_return */
-
-#else /* atomic_inc_return_relaxed */
-
-#ifndef atomic_inc_return_acquire
-#define  atomic_inc_return_acquire(...)					\
-	__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_inc_return_release
-#define  atomic_inc_return_release(...)					\
-	__atomic_op_release(atomic_inc_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_inc_return
-#define  atomic_inc_return(...)						\
-	__atomic_op_fence(atomic_inc_return, __VA_ARGS__)
-#endif
-#endif /* atomic_inc_return_relaxed */
-
-/* atomic_sub_return_relaxed */
-#ifndef atomic_sub_return_relaxed
-#define  atomic_sub_return_relaxed	atomic_sub_return
-#define  atomic_sub_return_acquire	atomic_sub_return
-#define  atomic_sub_return_release	atomic_sub_return
-
-#else /* atomic_sub_return_relaxed */
-
-#ifndef atomic_sub_return_acquire
-#define  atomic_sub_return_acquire(...)					\
-	__atomic_op_acquire(atomic_sub_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_sub_return_release
-#define  atomic_sub_return_release(...)					\
-	__atomic_op_release(atomic_sub_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_sub_return
-#define  atomic_sub_return(...)						\
-	__atomic_op_fence(atomic_sub_return, __VA_ARGS__)
-#endif
-#endif /* atomic_sub_return_relaxed */
-
-#ifndef atomic_dec
-#define atomic_dec(v)			atomic_sub(1, (v))
-#endif
-
-/* atomic_dec_return_relaxed */
-#ifndef atomic_dec_return_relaxed
-
-#ifndef atomic_dec_return
-#define atomic_dec_return(v)		atomic_sub_return(1, (v))
-#define atomic_dec_return_relaxed(v)	atomic_sub_return_relaxed(1, (v))
-#define atomic_dec_return_acquire(v)	atomic_sub_return_acquire(1, (v))
-#define atomic_dec_return_release(v)	atomic_sub_return_release(1, (v))
-#else /* atomic_dec_return */
-#define  atomic_dec_return_relaxed	atomic_dec_return
-#define  atomic_dec_return_acquire	atomic_dec_return
-#define  atomic_dec_return_release	atomic_dec_return
-#endif /* atomic_dec_return */
-
-#else /* atomic_dec_return_relaxed */
-
-#ifndef atomic_dec_return_acquire
-#define  atomic_dec_return_acquire(...)					\
-	__atomic_op_acquire(atomic_dec_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_dec_return_release
-#define  atomic_dec_return_release(...)					\
-	__atomic_op_release(atomic_dec_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic_dec_return
-#define  atomic_dec_return(...)						\
-	__atomic_op_fence(atomic_dec_return, __VA_ARGS__)
-#endif
-#endif /* atomic_dec_return_relaxed */
-
-
-/* atomic_fetch_add_relaxed */
-#ifndef atomic_fetch_add_relaxed
-#define atomic_fetch_add_relaxed	atomic_fetch_add
-#define atomic_fetch_add_acquire	atomic_fetch_add
-#define atomic_fetch_add_release	atomic_fetch_add
-
-#else /* atomic_fetch_add_relaxed */
-
-#ifndef atomic_fetch_add_acquire
-#define atomic_fetch_add_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_add, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_add_release
-#define atomic_fetch_add_release(...)					\
-	__atomic_op_release(atomic_fetch_add, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_add
-#define atomic_fetch_add(...)						\
-	__atomic_op_fence(atomic_fetch_add, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_add_relaxed */
-
-/* atomic_fetch_inc_relaxed */
-#ifndef atomic_fetch_inc_relaxed
-
-#ifndef atomic_fetch_inc
-#define atomic_fetch_inc(v)	        atomic_fetch_add(1, (v))
-#define atomic_fetch_inc_relaxed(v)	atomic_fetch_add_relaxed(1, (v))
-#define atomic_fetch_inc_acquire(v)	atomic_fetch_add_acquire(1, (v))
-#define atomic_fetch_inc_release(v)	atomic_fetch_add_release(1, (v))
-#else /* atomic_fetch_inc */
-#define atomic_fetch_inc_relaxed	atomic_fetch_inc
-#define atomic_fetch_inc_acquire	atomic_fetch_inc
-#define atomic_fetch_inc_release	atomic_fetch_inc
-#endif /* atomic_fetch_inc */
-
-#else /* atomic_fetch_inc_relaxed */
-
-#ifndef atomic_fetch_inc_acquire
-#define atomic_fetch_inc_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_inc, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_inc_release
-#define atomic_fetch_inc_release(...)					\
-	__atomic_op_release(atomic_fetch_inc, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_inc
-#define atomic_fetch_inc(...)						\
-	__atomic_op_fence(atomic_fetch_inc, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_inc_relaxed */
-
-/* atomic_fetch_sub_relaxed */
-#ifndef atomic_fetch_sub_relaxed
-#define atomic_fetch_sub_relaxed	atomic_fetch_sub
-#define atomic_fetch_sub_acquire	atomic_fetch_sub
-#define atomic_fetch_sub_release	atomic_fetch_sub
-
-#else /* atomic_fetch_sub_relaxed */
-
-#ifndef atomic_fetch_sub_acquire
-#define atomic_fetch_sub_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_sub, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_sub_release
-#define atomic_fetch_sub_release(...)					\
-	__atomic_op_release(atomic_fetch_sub, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_sub
-#define atomic_fetch_sub(...)						\
-	__atomic_op_fence(atomic_fetch_sub, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_sub_relaxed */
-
-/* atomic_fetch_dec_relaxed */
-#ifndef atomic_fetch_dec_relaxed
-
-#ifndef atomic_fetch_dec
-#define atomic_fetch_dec(v)	        atomic_fetch_sub(1, (v))
-#define atomic_fetch_dec_relaxed(v)	atomic_fetch_sub_relaxed(1, (v))
-#define atomic_fetch_dec_acquire(v)	atomic_fetch_sub_acquire(1, (v))
-#define atomic_fetch_dec_release(v)	atomic_fetch_sub_release(1, (v))
-#else /* atomic_fetch_dec */
-#define atomic_fetch_dec_relaxed	atomic_fetch_dec
-#define atomic_fetch_dec_acquire	atomic_fetch_dec
-#define atomic_fetch_dec_release	atomic_fetch_dec
-#endif /* atomic_fetch_dec */
-
-#else /* atomic_fetch_dec_relaxed */
-
-#ifndef atomic_fetch_dec_acquire
-#define atomic_fetch_dec_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_dec, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_dec_release
-#define atomic_fetch_dec_release(...)					\
-	__atomic_op_release(atomic_fetch_dec, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_dec
-#define atomic_fetch_dec(...)						\
-	__atomic_op_fence(atomic_fetch_dec, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_dec_relaxed */
-
-/* atomic_fetch_or_relaxed */
-#ifndef atomic_fetch_or_relaxed
-#define atomic_fetch_or_relaxed	atomic_fetch_or
-#define atomic_fetch_or_acquire	atomic_fetch_or
-#define atomic_fetch_or_release	atomic_fetch_or
-
-#else /* atomic_fetch_or_relaxed */
-
-#ifndef atomic_fetch_or_acquire
-#define atomic_fetch_or_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_or, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_or_release
-#define atomic_fetch_or_release(...)					\
-	__atomic_op_release(atomic_fetch_or, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_or
-#define atomic_fetch_or(...)						\
-	__atomic_op_fence(atomic_fetch_or, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_or_relaxed */
-
-/* atomic_fetch_and_relaxed */
-#ifndef atomic_fetch_and_relaxed
-#define atomic_fetch_and_relaxed	atomic_fetch_and
-#define atomic_fetch_and_acquire	atomic_fetch_and
-#define atomic_fetch_and_release	atomic_fetch_and
-
-#else /* atomic_fetch_and_relaxed */
-
-#ifndef atomic_fetch_and_acquire
-#define atomic_fetch_and_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_and, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_and_release
-#define atomic_fetch_and_release(...)					\
-	__atomic_op_release(atomic_fetch_and, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_and
-#define atomic_fetch_and(...)						\
-	__atomic_op_fence(atomic_fetch_and, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_and_relaxed */
-
-#ifndef atomic_andnot
-#define atomic_andnot(i, v)		atomic_and(~(int)(i), (v))
-#endif
-
-#ifndef atomic_fetch_andnot_relaxed
-
-#ifndef atomic_fetch_andnot
-#define atomic_fetch_andnot(i, v)		atomic_fetch_and(~(int)(i), (v))
-#define atomic_fetch_andnot_relaxed(i, v)	atomic_fetch_and_relaxed(~(int)(i), (v))
-#define atomic_fetch_andnot_acquire(i, v)	atomic_fetch_and_acquire(~(int)(i), (v))
-#define atomic_fetch_andnot_release(i, v)	atomic_fetch_and_release(~(int)(i), (v))
-#else /* atomic_fetch_andnot */
-#define atomic_fetch_andnot_relaxed		atomic_fetch_andnot
-#define atomic_fetch_andnot_acquire		atomic_fetch_andnot
-#define atomic_fetch_andnot_release		atomic_fetch_andnot
-#endif /* atomic_fetch_andnot */
-
-#else /* atomic_fetch_andnot_relaxed */
-
-#ifndef atomic_fetch_andnot_acquire
-#define atomic_fetch_andnot_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_andnot, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_andnot_release
-#define atomic_fetch_andnot_release(...)					\
-	__atomic_op_release(atomic_fetch_andnot, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_andnot
-#define atomic_fetch_andnot(...)						\
-	__atomic_op_fence(atomic_fetch_andnot, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_andnot_relaxed */
-
-/* atomic_fetch_xor_relaxed */
-#ifndef atomic_fetch_xor_relaxed
-#define atomic_fetch_xor_relaxed	atomic_fetch_xor
-#define atomic_fetch_xor_acquire	atomic_fetch_xor
-#define atomic_fetch_xor_release	atomic_fetch_xor
-
-#else /* atomic_fetch_xor_relaxed */
-
-#ifndef atomic_fetch_xor_acquire
-#define atomic_fetch_xor_acquire(...)					\
-	__atomic_op_acquire(atomic_fetch_xor, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_xor_release
-#define atomic_fetch_xor_release(...)					\
-	__atomic_op_release(atomic_fetch_xor, __VA_ARGS__)
-#endif
-
-#ifndef atomic_fetch_xor
-#define atomic_fetch_xor(...)						\
-	__atomic_op_fence(atomic_fetch_xor, __VA_ARGS__)
-#endif
-#endif /* atomic_fetch_xor_relaxed */
-
-
-/* atomic_xchg_relaxed */
-#ifndef atomic_xchg_relaxed
-#define  atomic_xchg_relaxed		atomic_xchg
-#define  atomic_xchg_acquire		atomic_xchg
-#define  atomic_xchg_release		atomic_xchg
-
-#else /* atomic_xchg_relaxed */
-
-#ifndef atomic_xchg_acquire
-#define  atomic_xchg_acquire(...)					\
-	__atomic_op_acquire(atomic_xchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic_xchg_release
-#define  atomic_xchg_release(...)					\
-	__atomic_op_release(atomic_xchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic_xchg
-#define  atomic_xchg(...)						\
-	__atomic_op_fence(atomic_xchg, __VA_ARGS__)
-#endif
-#endif /* atomic_xchg_relaxed */
-
-/* atomic_cmpxchg_relaxed */
-#ifndef atomic_cmpxchg_relaxed
-#define  atomic_cmpxchg_relaxed		atomic_cmpxchg
-#define  atomic_cmpxchg_acquire		atomic_cmpxchg
-#define  atomic_cmpxchg_release		atomic_cmpxchg
-
-#else /* atomic_cmpxchg_relaxed */
-
-#ifndef atomic_cmpxchg_acquire
-#define  atomic_cmpxchg_acquire(...)					\
-	__atomic_op_acquire(atomic_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic_cmpxchg_release
-#define  atomic_cmpxchg_release(...)					\
-	__atomic_op_release(atomic_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic_cmpxchg
-#define  atomic_cmpxchg(...)						\
-	__atomic_op_fence(atomic_cmpxchg, __VA_ARGS__)
-#endif
-#endif /* atomic_cmpxchg_relaxed */
-
-#ifndef atomic_try_cmpxchg
-
-#define __atomic_try_cmpxchg(type, _p, _po, _n)				\
-({									\
-	typeof(_po) __po = (_po);					\
-	typeof(*(_po)) __r, __o = *__po;				\
-	__r = atomic_cmpxchg##type((_p), __o, (_n));			\
-	if (unlikely(__r != __o))					\
-		*__po = __r;						\
-	likely(__r == __o);						\
-})
-
-#define atomic_try_cmpxchg(_p, _po, _n)		__atomic_try_cmpxchg(, _p, _po, _n)
-#define atomic_try_cmpxchg_relaxed(_p, _po, _n)	__atomic_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic_try_cmpxchg_acquire(_p, _po, _n)	__atomic_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic_try_cmpxchg_release(_p, _po, _n)	__atomic_try_cmpxchg(_release, _p, _po, _n)
-
-#else /* atomic_try_cmpxchg */
-#define atomic_try_cmpxchg_relaxed	atomic_try_cmpxchg
-#define atomic_try_cmpxchg_acquire	atomic_try_cmpxchg
-#define atomic_try_cmpxchg_release	atomic_try_cmpxchg
-#endif /* atomic_try_cmpxchg */
-
-/* cmpxchg_relaxed */
-#ifndef cmpxchg_relaxed
-#define  cmpxchg_relaxed		cmpxchg
-#define  cmpxchg_acquire		cmpxchg
-#define  cmpxchg_release		cmpxchg
-
-#else /* cmpxchg_relaxed */
-
-#ifndef cmpxchg_acquire
-#define  cmpxchg_acquire(...)						\
-	__atomic_op_acquire(cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg_release
-#define  cmpxchg_release(...)						\
-	__atomic_op_release(cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg
-#define  cmpxchg(...)							\
-	__atomic_op_fence(cmpxchg, __VA_ARGS__)
-#endif
-#endif /* cmpxchg_relaxed */
-
-/* cmpxchg64_relaxed */
-#ifndef cmpxchg64_relaxed
-#define  cmpxchg64_relaxed		cmpxchg64
-#define  cmpxchg64_acquire		cmpxchg64
-#define  cmpxchg64_release		cmpxchg64
-
-#else /* cmpxchg64_relaxed */
-
-#ifndef cmpxchg64_acquire
-#define  cmpxchg64_acquire(...)						\
-	__atomic_op_acquire(cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg64_release
-#define  cmpxchg64_release(...)						\
-	__atomic_op_release(cmpxchg64, __VA_ARGS__)
-#endif
-
-#ifndef cmpxchg64
-#define  cmpxchg64(...)							\
-	__atomic_op_fence(cmpxchg64, __VA_ARGS__)
-#endif
-#endif /* cmpxchg64_relaxed */
-
-/* xchg_relaxed */
-#ifndef xchg_relaxed
-#define  xchg_relaxed			xchg
-#define  xchg_acquire			xchg
-#define  xchg_release			xchg
-
-#else /* xchg_relaxed */
-
-#ifndef xchg_acquire
-#define  xchg_acquire(...)		__atomic_op_acquire(xchg, __VA_ARGS__)
-#endif
-
-#ifndef xchg_release
-#define  xchg_release(...)		__atomic_op_release(xchg, __VA_ARGS__)
-#endif
-
-#ifndef xchg
-#define  xchg(...)			__atomic_op_fence(xchg, __VA_ARGS__)
-#endif
-#endif /* xchg_relaxed */
-
-/**
- * atomic_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns the original value of @v.
- */
-#ifndef atomic_fetch_add_unless
-static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!atomic_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-#endif
-
-/**
- * atomic_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
-static inline bool atomic_add_unless(atomic_t *v, int a, int u)
-{
-	return atomic_fetch_add_unless(v, a, u) != u;
-}
-
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
-#ifndef atomic_inc_not_zero
-#define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
-#endif
-
-/**
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#ifndef atomic_inc_and_test
-static inline bool atomic_inc_and_test(atomic_t *v)
-{
-	return atomic_inc_return(v) == 0;
-}
-#endif
-
-/**
- * atomic_dec_and_test - decrement and test
- * @v: pointer of type atomic_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#ifndef atomic_dec_and_test
-static inline bool atomic_dec_and_test(atomic_t *v)
-{
-	return atomic_dec_return(v) == 0;
-}
-#endif
-
-/**
- * atomic_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#ifndef atomic_sub_and_test
-static inline bool atomic_sub_and_test(int i, atomic_t *v)
-{
-	return atomic_sub_return(i, v) == 0;
-}
-#endif
-
-/**
- * atomic_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#ifndef atomic_add_negative
-static inline bool atomic_add_negative(int i, atomic_t *v)
-{
-	return atomic_add_return(i, v) < 0;
-}
-#endif
-
-#ifndef atomic_inc_unless_negative
-static inline bool atomic_inc_unless_negative(atomic_t *v)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c < 0))
-			return false;
-	} while (!atomic_try_cmpxchg(v, &c, c + 1));
-
-	return true;
-}
-#endif
-
-#ifndef atomic_dec_unless_positive
-static inline bool atomic_dec_unless_positive(atomic_t *v)
-{
-	int c = atomic_read(v);
-
-	do {
-		if (unlikely(c > 0))
-			return false;
-	} while (!atomic_try_cmpxchg(v, &c, c - 1));
-
-	return true;
-}
-#endif
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-#ifndef atomic_dec_if_positive
-static inline int atomic_dec_if_positive(atomic_t *v)
-{
-	int dec, c = atomic_read(v);
-
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!atomic_try_cmpxchg(v, &c, dec));
-
-	return dec;
-}
-#endif
-
-#define atomic_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
-#define atomic_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
-
-#ifdef CONFIG_GENERIC_ATOMIC64
-#include <asm-generic/atomic64.h>
-#endif
-
-#ifndef atomic64_read_acquire
-#define  atomic64_read_acquire(v)	smp_load_acquire(&(v)->counter)
-#endif
-
-#ifndef atomic64_set_release
-#define  atomic64_set_release(v, i)	smp_store_release(&(v)->counter, (i))
-#endif
-
-/* atomic64_add_return_relaxed */
-#ifndef atomic64_add_return_relaxed
-#define  atomic64_add_return_relaxed	atomic64_add_return
-#define  atomic64_add_return_acquire	atomic64_add_return
-#define  atomic64_add_return_release	atomic64_add_return
-
-#else /* atomic64_add_return_relaxed */
-
-#ifndef atomic64_add_return_acquire
-#define  atomic64_add_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_add_return_release
-#define  atomic64_add_return_release(...)				\
-	__atomic_op_release(atomic64_add_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_add_return
-#define  atomic64_add_return(...)					\
-	__atomic_op_fence(atomic64_add_return, __VA_ARGS__)
-#endif
-#endif /* atomic64_add_return_relaxed */
-
-#ifndef atomic64_inc
-#define atomic64_inc(v)			atomic64_add(1, (v))
-#endif
-
-/* atomic64_inc_return_relaxed */
-#ifndef atomic64_inc_return_relaxed
-
-#ifndef atomic64_inc_return
-#define atomic64_inc_return(v)		atomic64_add_return(1, (v))
-#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1, (v))
-#define atomic64_inc_return_acquire(v)	atomic64_add_return_acquire(1, (v))
-#define atomic64_inc_return_release(v)	atomic64_add_return_release(1, (v))
-#else /* atomic64_inc_return */
-#define  atomic64_inc_return_relaxed	atomic64_inc_return
-#define  atomic64_inc_return_acquire	atomic64_inc_return
-#define  atomic64_inc_return_release	atomic64_inc_return
-#endif /* atomic64_inc_return */
-
-#else /* atomic64_inc_return_relaxed */
-
-#ifndef atomic64_inc_return_acquire
-#define  atomic64_inc_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_inc_return_release
-#define  atomic64_inc_return_release(...)				\
-	__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_inc_return
-#define  atomic64_inc_return(...)					\
-	__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
-#endif
-#endif /* atomic64_inc_return_relaxed */
-
-
-/* atomic64_sub_return_relaxed */
-#ifndef atomic64_sub_return_relaxed
-#define  atomic64_sub_return_relaxed	atomic64_sub_return
-#define  atomic64_sub_return_acquire	atomic64_sub_return
-#define  atomic64_sub_return_release	atomic64_sub_return
-
-#else /* atomic64_sub_return_relaxed */
-
-#ifndef atomic64_sub_return_acquire
-#define  atomic64_sub_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_sub_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_sub_return_release
-#define  atomic64_sub_return_release(...)				\
-	__atomic_op_release(atomic64_sub_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_sub_return
-#define  atomic64_sub_return(...)					\
-	__atomic_op_fence(atomic64_sub_return, __VA_ARGS__)
-#endif
-#endif /* atomic64_sub_return_relaxed */
-
-#ifndef atomic64_dec
-#define atomic64_dec(v)			atomic64_sub(1, (v))
-#endif
-
-/* atomic64_dec_return_relaxed */
-#ifndef atomic64_dec_return_relaxed
-
-#ifndef atomic64_dec_return
-#define atomic64_dec_return(v)		atomic64_sub_return(1, (v))
-#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1, (v))
-#define atomic64_dec_return_acquire(v)	atomic64_sub_return_acquire(1, (v))
-#define atomic64_dec_return_release(v)	atomic64_sub_return_release(1, (v))
-#else /* atomic64_dec_return */
-#define  atomic64_dec_return_relaxed	atomic64_dec_return
-#define  atomic64_dec_return_acquire	atomic64_dec_return
-#define  atomic64_dec_return_release	atomic64_dec_return
-#endif /* atomic64_dec_return */
-
-#else /* atomic64_dec_return_relaxed */
-
-#ifndef atomic64_dec_return_acquire
-#define  atomic64_dec_return_acquire(...)				\
-	__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_dec_return_release
-#define  atomic64_dec_return_release(...)				\
-	__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_dec_return
-#define  atomic64_dec_return(...)					\
-	__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
-#endif
-#endif /* atomic64_dec_return_relaxed */
-
-
-/* atomic64_fetch_add_relaxed */
-#ifndef atomic64_fetch_add_relaxed
-#define atomic64_fetch_add_relaxed	atomic64_fetch_add
-#define atomic64_fetch_add_acquire	atomic64_fetch_add
-#define atomic64_fetch_add_release	atomic64_fetch_add
-
-#else /* atomic64_fetch_add_relaxed */
-
-#ifndef atomic64_fetch_add_acquire
-#define atomic64_fetch_add_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_add, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_add_release
-#define atomic64_fetch_add_release(...)					\
-	__atomic_op_release(atomic64_fetch_add, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_add
-#define atomic64_fetch_add(...)						\
-	__atomic_op_fence(atomic64_fetch_add, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_add_relaxed */
-
-/* atomic64_fetch_inc_relaxed */
-#ifndef atomic64_fetch_inc_relaxed
-
-#ifndef atomic64_fetch_inc
-#define atomic64_fetch_inc(v)		atomic64_fetch_add(1, (v))
-#define atomic64_fetch_inc_relaxed(v)	atomic64_fetch_add_relaxed(1, (v))
-#define atomic64_fetch_inc_acquire(v)	atomic64_fetch_add_acquire(1, (v))
-#define atomic64_fetch_inc_release(v)	atomic64_fetch_add_release(1, (v))
-#else /* atomic64_fetch_inc */
-#define atomic64_fetch_inc_relaxed	atomic64_fetch_inc
-#define atomic64_fetch_inc_acquire	atomic64_fetch_inc
-#define atomic64_fetch_inc_release	atomic64_fetch_inc
-#endif /* atomic64_fetch_inc */
-
-#else /* atomic64_fetch_inc_relaxed */
-
-#ifndef atomic64_fetch_inc_acquire
-#define atomic64_fetch_inc_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_inc, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_inc_release
-#define atomic64_fetch_inc_release(...)					\
-	__atomic_op_release(atomic64_fetch_inc, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_inc
-#define atomic64_fetch_inc(...)						\
-	__atomic_op_fence(atomic64_fetch_inc, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_inc_relaxed */
-
-/* atomic64_fetch_sub_relaxed */
-#ifndef atomic64_fetch_sub_relaxed
-#define atomic64_fetch_sub_relaxed	atomic64_fetch_sub
-#define atomic64_fetch_sub_acquire	atomic64_fetch_sub
-#define atomic64_fetch_sub_release	atomic64_fetch_sub
-
-#else /* atomic64_fetch_sub_relaxed */
-
-#ifndef atomic64_fetch_sub_acquire
-#define atomic64_fetch_sub_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_sub, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_sub_release
-#define atomic64_fetch_sub_release(...)					\
-	__atomic_op_release(atomic64_fetch_sub, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_sub
-#define atomic64_fetch_sub(...)						\
-	__atomic_op_fence(atomic64_fetch_sub, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_sub_relaxed */
-
-/* atomic64_fetch_dec_relaxed */
-#ifndef atomic64_fetch_dec_relaxed
-
-#ifndef atomic64_fetch_dec
-#define atomic64_fetch_dec(v)		atomic64_fetch_sub(1, (v))
-#define atomic64_fetch_dec_relaxed(v)	atomic64_fetch_sub_relaxed(1, (v))
-#define atomic64_fetch_dec_acquire(v)	atomic64_fetch_sub_acquire(1, (v))
-#define atomic64_fetch_dec_release(v)	atomic64_fetch_sub_release(1, (v))
-#else /* atomic64_fetch_dec */
-#define atomic64_fetch_dec_relaxed	atomic64_fetch_dec
-#define atomic64_fetch_dec_acquire	atomic64_fetch_dec
-#define atomic64_fetch_dec_release	atomic64_fetch_dec
-#endif /* atomic64_fetch_dec */
-
-#else /* atomic64_fetch_dec_relaxed */
-
-#ifndef atomic64_fetch_dec_acquire
-#define atomic64_fetch_dec_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_dec, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_dec_release
-#define atomic64_fetch_dec_release(...)					\
-	__atomic_op_release(atomic64_fetch_dec, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_dec
-#define atomic64_fetch_dec(...)						\
-	__atomic_op_fence(atomic64_fetch_dec, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_dec_relaxed */
-
-/* atomic64_fetch_or_relaxed */
-#ifndef atomic64_fetch_or_relaxed
-#define atomic64_fetch_or_relaxed	atomic64_fetch_or
-#define atomic64_fetch_or_acquire	atomic64_fetch_or
-#define atomic64_fetch_or_release	atomic64_fetch_or
-
-#else /* atomic64_fetch_or_relaxed */
-
-#ifndef atomic64_fetch_or_acquire
-#define atomic64_fetch_or_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_or, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_or_release
-#define atomic64_fetch_or_release(...)					\
-	__atomic_op_release(atomic64_fetch_or, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_or
-#define atomic64_fetch_or(...)						\
-	__atomic_op_fence(atomic64_fetch_or, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_or_relaxed */
-
-/* atomic64_fetch_and_relaxed */
-#ifndef atomic64_fetch_and_relaxed
-#define atomic64_fetch_and_relaxed	atomic64_fetch_and
-#define atomic64_fetch_and_acquire	atomic64_fetch_and
-#define atomic64_fetch_and_release	atomic64_fetch_and
-
-#else /* atomic64_fetch_and_relaxed */
-
-#ifndef atomic64_fetch_and_acquire
-#define atomic64_fetch_and_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_and, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_and_release
-#define atomic64_fetch_and_release(...)					\
-	__atomic_op_release(atomic64_fetch_and, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_and
-#define atomic64_fetch_and(...)						\
-	__atomic_op_fence(atomic64_fetch_and, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_and_relaxed */
-
-#ifndef atomic64_andnot
-#define atomic64_andnot(i, v)		atomic64_and(~(long long)(i), (v))
-#endif
-
-#ifndef atomic64_fetch_andnot_relaxed
-
-#ifndef atomic64_fetch_andnot
-#define atomic64_fetch_andnot(i, v)		atomic64_fetch_and(~(long long)(i), (v))
-#define atomic64_fetch_andnot_relaxed(i, v)	atomic64_fetch_and_relaxed(~(long long)(i), (v))
-#define atomic64_fetch_andnot_acquire(i, v)	atomic64_fetch_and_acquire(~(long long)(i), (v))
-#define atomic64_fetch_andnot_release(i, v)	atomic64_fetch_and_release(~(long long)(i), (v))
-#else /* atomic64_fetch_andnot */
-#define atomic64_fetch_andnot_relaxed		atomic64_fetch_andnot
-#define atomic64_fetch_andnot_acquire		atomic64_fetch_andnot
-#define atomic64_fetch_andnot_release		atomic64_fetch_andnot
-#endif /* atomic64_fetch_andnot */
-
-#else /* atomic64_fetch_andnot_relaxed */
-
-#ifndef atomic64_fetch_andnot_acquire
-#define atomic64_fetch_andnot_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_andnot, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_andnot_release
-#define atomic64_fetch_andnot_release(...)					\
-	__atomic_op_release(atomic64_fetch_andnot, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_andnot
-#define atomic64_fetch_andnot(...)						\
-	__atomic_op_fence(atomic64_fetch_andnot, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_andnot_relaxed */
-
-/* atomic64_fetch_xor_relaxed */
-#ifndef atomic64_fetch_xor_relaxed
-#define atomic64_fetch_xor_relaxed	atomic64_fetch_xor
-#define atomic64_fetch_xor_acquire	atomic64_fetch_xor
-#define atomic64_fetch_xor_release	atomic64_fetch_xor
-
-#else /* atomic64_fetch_xor_relaxed */
-
-#ifndef atomic64_fetch_xor_acquire
-#define atomic64_fetch_xor_acquire(...)					\
-	__atomic_op_acquire(atomic64_fetch_xor, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_xor_release
-#define atomic64_fetch_xor_release(...)					\
-	__atomic_op_release(atomic64_fetch_xor, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_fetch_xor
-#define atomic64_fetch_xor(...)						\
-	__atomic_op_fence(atomic64_fetch_xor, __VA_ARGS__)
-#endif
-#endif /* atomic64_fetch_xor_relaxed */
-
-
-/* atomic64_xchg_relaxed */
-#ifndef atomic64_xchg_relaxed
-#define  atomic64_xchg_relaxed		atomic64_xchg
-#define  atomic64_xchg_acquire		atomic64_xchg
-#define  atomic64_xchg_release		atomic64_xchg
-
-#else /* atomic64_xchg_relaxed */
-
-#ifndef atomic64_xchg_acquire
-#define  atomic64_xchg_acquire(...)					\
-	__atomic_op_acquire(atomic64_xchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_xchg_release
-#define  atomic64_xchg_release(...)					\
-	__atomic_op_release(atomic64_xchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_xchg
-#define  atomic64_xchg(...)						\
-	__atomic_op_fence(atomic64_xchg, __VA_ARGS__)
-#endif
-#endif /* atomic64_xchg_relaxed */
-
-/* atomic64_cmpxchg_relaxed */
-#ifndef atomic64_cmpxchg_relaxed
-#define  atomic64_cmpxchg_relaxed	atomic64_cmpxchg
-#define  atomic64_cmpxchg_acquire	atomic64_cmpxchg
-#define  atomic64_cmpxchg_release	atomic64_cmpxchg
-
-#else /* atomic64_cmpxchg_relaxed */
-
-#ifndef atomic64_cmpxchg_acquire
-#define  atomic64_cmpxchg_acquire(...)					\
-	__atomic_op_acquire(atomic64_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_cmpxchg_release
-#define  atomic64_cmpxchg_release(...)					\
-	__atomic_op_release(atomic64_cmpxchg, __VA_ARGS__)
-#endif
-
-#ifndef atomic64_cmpxchg
-#define  atomic64_cmpxchg(...)						\
-	__atomic_op_fence(atomic64_cmpxchg, __VA_ARGS__)
-#endif
-#endif /* atomic64_cmpxchg_relaxed */
-
-#ifndef atomic64_try_cmpxchg
-
-#define __atomic64_try_cmpxchg(type, _p, _po, _n)			\
-({									\
-	typeof(_po) __po = (_po);					\
-	typeof(*(_po)) __r, __o = *__po;				\
-	__r = atomic64_cmpxchg##type((_p), __o, (_n));			\
-	if (unlikely(__r != __o))					\
-		*__po = __r;						\
-	likely(__r == __o);						\
-})
-
-#define atomic64_try_cmpxchg(_p, _po, _n)		__atomic64_try_cmpxchg(, _p, _po, _n)
-#define atomic64_try_cmpxchg_relaxed(_p, _po, _n)	__atomic64_try_cmpxchg(_relaxed, _p, _po, _n)
-#define atomic64_try_cmpxchg_acquire(_p, _po, _n)	__atomic64_try_cmpxchg(_acquire, _p, _po, _n)
-#define atomic64_try_cmpxchg_release(_p, _po, _n)	__atomic64_try_cmpxchg(_release, _p, _po, _n)
-
-#else /* atomic64_try_cmpxchg */
-#define atomic64_try_cmpxchg_relaxed	atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg_acquire	atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg_release	atomic64_try_cmpxchg
-#endif /* atomic64_try_cmpxchg */
-
-/**
- * atomic64_fetch_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic64_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns the original value of @v.
- */
-#ifndef atomic64_fetch_add_unless
-static inline long long atomic64_fetch_add_unless(atomic64_t *v, long long a,
-						  long long u)
-{
-	long long c = atomic64_read(v);
-
-	do {
-		if (unlikely(c == u))
-			break;
-	} while (!atomic64_try_cmpxchg(v, &c, c + a));
-
-	return c;
-}
-#endif
-
-/**
- * atomic64_add_unless - add unless the number is already a given value
- * @v: pointer of type atomic_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @v, if @v was not already @u.
- * Returns true if the addition was done.
- */
-static inline bool atomic64_add_unless(atomic64_t *v, long long a, long long u)
-{
-	return atomic64_fetch_add_unless(v, a, u) != u;
-}
-
-/**
- * atomic64_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1, if @v is non-zero.
- * Returns true if the increment was done.
- */
-#ifndef atomic64_inc_not_zero
-#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
-#endif
-
-/**
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#ifndef atomic64_inc_and_test
-static inline bool atomic64_inc_and_test(atomic64_t *v)
-{
-	return atomic64_inc_return(v) == 0;
-}
-#endif
-
-/**
- * atomic64_dec_and_test - decrement and test
- * @v: pointer of type atomic64_t
- *
- * Atomically decrements @v by 1 and
- * returns true if the result is 0, or false for all other
- * cases.
- */
-#ifndef atomic64_dec_and_test
-static inline bool atomic64_dec_and_test(atomic64_t *v)
-{
-	return atomic64_dec_return(v) == 0;
-}
-#endif
-
-/**
- * atomic64_sub_and_test - subtract value from variable and test result
- * @i: integer value to subtract
- * @v: pointer of type atomic64_t
- *
- * Atomically subtracts @i from @v and returns
- * true if the result is zero, or false for all
- * other cases.
- */
-#ifndef atomic64_sub_and_test
-static inline bool atomic64_sub_and_test(long long i, atomic64_t *v)
-{
-	return atomic64_sub_return(i, v) == 0;
-}
-#endif
-
-/**
- * atomic64_add_negative - add and test if negative
- * @i: integer value to add
- * @v: pointer of type atomic64_t
- *
- * Atomically adds @i to @v and returns true
- * if the result is negative, or false when
- * result is greater than or equal to zero.
- */
-#ifndef atomic64_add_negative
-static inline bool atomic64_add_negative(long long i, atomic64_t *v)
-{
-	return atomic64_add_return(i, v) < 0;
-}
-#endif
-
-#ifndef atomic64_inc_unless_negative
-static inline bool atomic64_inc_unless_negative(atomic64_t *v)
-{
-	long long c = atomic64_read(v);
-
-	do {
-		if (unlikely(c < 0))
-			return false;
-	} while (!atomic64_try_cmpxchg(v, &c, c + 1));
-
-	return true;
-}
-#endif
-
-#ifndef atomic64_dec_unless_positive
-static inline bool atomic64_dec_unless_positive(atomic64_t *v)
-{
-	long long c = atomic64_read(v);
-
-	do {
-		if (unlikely(c > 0))
-			return false;
-	} while (!atomic64_try_cmpxchg(v, &c, c - 1));
-
-	return true;
-}
-#endif
-
-/*
- * atomic64_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic64_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic64 variable, v, was not decremented.
- */
-#ifndef atomic64_dec_if_positive
-static inline long long atomic64_dec_if_positive(atomic64_t *v)
-{
-	long long dec, c = atomic64_read(v);
-
-	do {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-	} while (!atomic64_try_cmpxchg(v, &c, dec));
-
-	return dec;
-}
-#endif
-
-#define atomic64_cond_read_relaxed(v, c)	smp_cond_load_relaxed(&(v)->counter, (c))
-#define atomic64_cond_read_acquire(v, c)	smp_cond_load_acquire(&(v)->counter, (c))
+#include <linux/atomic-fallback.h>
 
 #include <asm-generic/atomic-long.h>
 
-- 
cgit v1.2.3


From b5d47ef9ea5c5fe31d7eabeb79f697629bd9e2cb Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 4 Sep 2018 11:48:27 +0100
Subject: locking/atomics: Switch to generated atomic-long

As a step towards ensuring the atomic* APIs are consistent, let's switch
to wrappers generated by gen-atomic-long.h, using the same table that
gen-atomic-fallbacks.h uses to fill in gaps in the atomic_* and
atomic64_* APIs.

These are checked in rather than generated with Kbuild, since:

* This allows inspection of the atomics with git grep and ctags on a
  pristine tree, which Linus strongly prefers being able to do.

* The fallbacks are not affected by machine details or configuration
  options, so it is not necessary to regenerate them to take these into
  account.

* These are included by files required *very* early in the build process
  (e.g. for generating bounds.h), and we'd rather not complicate the
  top-level Kbuild file with dependencies.

Other than *_INIT() and *_cond_read_acquire(), all API functions are
implemented as static inline C functions, ensuring consistent type
promotion and/or truncation without requiring explicit casts to be
applied to parameters or return values.

Since we typedef atomic_long_t to either atomic_t or atomic64_t, we know
these types are equivalent, and don't require explicit casts between
them. However, as the try_cmpxchg*() functions take a pointer for the
'old' parameter, which may be an int or s64, an explicit cast is
generated for this.

There should be no functional change as a result of this patch (i.e.
existing code should not be affected). However, this introduces a number
of functions into the atomic_long_* API, bringing it into line with the
atomic_* and atomic64_* APIs.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: catalin.marinas@arm.com
Cc: linuxdrivers@attotech.com
Cc: dvyukov@google.com
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: aryabinin@virtuozzo.com
Cc: glider@google.com
Link: http://lkml.kernel.org/r/20180904104830.2975-4-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-long.h | 1173 ++++++++++++++++++++++++++++++-------
 1 file changed, 958 insertions(+), 215 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 87d14476edc2..a833d385a70b 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -1,269 +1,1012 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-long.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
-/*
- * Copyright (C) 2005 Silicon Graphics, Inc.
- *	Christoph Lameter
- *
- * Allows to provide arch independent atomic definitions without the need to
- * edit all arch specific atomic.h files.
- */
 
 #include <asm/types.h>
 
-/*
- * Suppport for atomic_long_t
- *
- * Casts for parameters are avoided for existing atomic functions in order to
- * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
- * macros of a platform may have.
- */
+#ifdef CONFIG_64BIT
+typedef atomic64_t atomic_long_t;
+#define ATOMIC_LONG_INIT(i)		ATOMIC64_INIT(i)
+#define atomic_long_cond_read_acquire	atomic64_cond_read_acquire
+#define atomic_long_cond_read_relaxed	atomic64_cond_read_relaxed
+#else
+typedef atomic_t atomic_long_t;
+#define ATOMIC_LONG_INIT(i)		ATOMIC_INIT(i)
+#define atomic_long_cond_read_acquire	atomic_cond_read_acquire
+#define atomic_long_cond_read_relaxed	atomic_cond_read_relaxed
+#endif
 
-#if BITS_PER_LONG == 64
+#ifdef CONFIG_64BIT
 
-typedef atomic64_t atomic_long_t;
+static inline long
+atomic_long_read(const atomic_long_t *v)
+{
+	return atomic64_read(v);
+}
 
-#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
-#define ATOMIC_LONG_PFX(x)	atomic64 ## x
-#define ATOMIC_LONG_TYPE	s64
+static inline long
+atomic_long_read_acquire(const atomic_long_t *v)
+{
+	return atomic64_read_acquire(v);
+}
 
-#else
+static inline void
+atomic_long_set(atomic_long_t *v, long i)
+{
+	atomic64_set(v, i);
+}
 
-typedef atomic_t atomic_long_t;
+static inline void
+atomic_long_set_release(atomic_long_t *v, long i)
+{
+	atomic64_set_release(v, i);
+}
 
-#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
-#define ATOMIC_LONG_PFX(x)	atomic ## x
-#define ATOMIC_LONG_TYPE	int
+static inline void
+atomic_long_add(long i, atomic_long_t *v)
+{
+	atomic64_add(i, v);
+}
 
-#endif
+static inline long
+atomic_long_add_return(long i, atomic_long_t *v)
+{
+	return atomic64_add_return(i, v);
+}
+
+static inline long
+atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_add_return_acquire(i, v);
+}
+
+static inline long
+atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+	return atomic64_add_return_release(i, v);
+}
+
+static inline long
+atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_add_return_relaxed(i, v);
+}
+
+static inline long
+atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_add(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_add_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_add_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_add_relaxed(i, v);
+}
+
+static inline void
+atomic_long_sub(long i, atomic_long_t *v)
+{
+	atomic64_sub(i, v);
+}
+
+static inline long
+atomic_long_sub_return(long i, atomic_long_t *v)
+{
+	return atomic64_sub_return(i, v);
+}
+
+static inline long
+atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_sub_return_acquire(i, v);
+}
+
+static inline long
+atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+	return atomic64_sub_return_release(i, v);
+}
+
+static inline long
+atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_sub_return_relaxed(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_sub(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_sub_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_sub_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_sub_relaxed(i, v);
+}
+
+static inline void
+atomic_long_inc(atomic_long_t *v)
+{
+	atomic64_inc(v);
+}
+
+static inline long
+atomic_long_inc_return(atomic_long_t *v)
+{
+	return atomic64_inc_return(v);
+}
+
+static inline long
+atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+	return atomic64_inc_return_acquire(v);
+}
+
+static inline long
+atomic_long_inc_return_release(atomic_long_t *v)
+{
+	return atomic64_inc_return_release(v);
+}
+
+static inline long
+atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+	return atomic64_inc_return_relaxed(v);
+}
+
+static inline long
+atomic_long_fetch_inc(atomic_long_t *v)
+{
+	return atomic64_fetch_inc(v);
+}
+
+static inline long
+atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+	return atomic64_fetch_inc_acquire(v);
+}
+
+static inline long
+atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+	return atomic64_fetch_inc_release(v);
+}
+
+static inline long
+atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+	return atomic64_fetch_inc_relaxed(v);
+}
+
+static inline void
+atomic_long_dec(atomic_long_t *v)
+{
+	atomic64_dec(v);
+}
+
+static inline long
+atomic_long_dec_return(atomic_long_t *v)
+{
+	return atomic64_dec_return(v);
+}
+
+static inline long
+atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+	return atomic64_dec_return_acquire(v);
+}
+
+static inline long
+atomic_long_dec_return_release(atomic_long_t *v)
+{
+	return atomic64_dec_return_release(v);
+}
+
+static inline long
+atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+	return atomic64_dec_return_relaxed(v);
+}
+
+static inline long
+atomic_long_fetch_dec(atomic_long_t *v)
+{
+	return atomic64_fetch_dec(v);
+}
+
+static inline long
+atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+	return atomic64_fetch_dec_acquire(v);
+}
+
+static inline long
+atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+	return atomic64_fetch_dec_release(v);
+}
+
+static inline long
+atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+	return atomic64_fetch_dec_relaxed(v);
+}
+
+static inline void
+atomic_long_and(long i, atomic_long_t *v)
+{
+	atomic64_and(i, v);
+}
+
+static inline long
+atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_and(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_and_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_and_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_and_relaxed(i, v);
+}
+
+static inline void
+atomic_long_andnot(long i, atomic_long_t *v)
+{
+	atomic64_andnot(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_andnot_relaxed(i, v);
+}
+
+static inline void
+atomic_long_or(long i, atomic_long_t *v)
+{
+	atomic64_or(i, v);
+}
+
+static inline long
+atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_or(i, v);
+}
+
+static inline long
+atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_or_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_or_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_or_relaxed(i, v);
+}
+
+static inline void
+atomic_long_xor(long i, atomic_long_t *v)
+{
+	atomic64_xor(i, v);
+}
+
+static inline long
+atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_xor(i, v);
+}
+
+static inline long
+atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_xor_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_xor_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+	return atomic64_fetch_xor_relaxed(i, v);
+}
+
+static inline long
+atomic_long_xchg(atomic_long_t *v, long i)
+{
+	return atomic64_xchg(v, i);
+}
+
+static inline long
+atomic_long_xchg_acquire(atomic_long_t *v, long i)
+{
+	return atomic64_xchg_acquire(v, i);
+}
+
+static inline long
+atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+	return atomic64_xchg_release(v, i);
+}
+
+static inline long
+atomic_long_xchg_relaxed(atomic_long_t *v, long i)
+{
+	return atomic64_xchg_relaxed(v, i);
+}
+
+static inline long
+atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+	return atomic64_cmpxchg(v, old, new);
+}
+
+static inline long
+atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
+{
+	return atomic64_cmpxchg_acquire(v, old, new);
+}
+
+static inline long
+atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+	return atomic64_cmpxchg_release(v, old, new);
+}
+
+static inline long
+atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
+{
+	return atomic64_cmpxchg_relaxed(v, old, new);
+}
+
+static inline bool
+atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+	return atomic64_try_cmpxchg(v, (s64 *)old, new);
+}
+
+static inline bool
+atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+	return atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
+}
+
+static inline bool
+atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+	return atomic64_try_cmpxchg_release(v, (s64 *)old, new);
+}
+
+static inline bool
+atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+	return atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
+}
+
+static inline bool
+atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+	return atomic64_sub_and_test(i, v);
+}
+
+static inline bool
+atomic_long_dec_and_test(atomic_long_t *v)
+{
+	return atomic64_dec_and_test(v);
+}
+
+static inline bool
+atomic_long_inc_and_test(atomic_long_t *v)
+{
+	return atomic64_inc_and_test(v);
+}
+
+static inline bool
+atomic_long_add_negative(long i, atomic_long_t *v)
+{
+	return atomic64_add_negative(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+	return atomic64_fetch_add_unless(v, a, u);
+}
+
+static inline bool
+atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+	return atomic64_add_unless(v, a, u);
+}
+
+static inline bool
+atomic_long_inc_not_zero(atomic_long_t *v)
+{
+	return atomic64_inc_not_zero(v);
+}
+
+static inline bool
+atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+	return atomic64_inc_unless_negative(v);
+}
+
+static inline bool
+atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+	return atomic64_dec_unless_positive(v);
+}
+
+static inline long
+atomic_long_dec_if_positive(atomic_long_t *v)
+{
+	return atomic64_dec_if_positive(v);
+}
+
+#else /* CONFIG_64BIT */
+
+static inline long
+atomic_long_read(const atomic_long_t *v)
+{
+	return atomic_read(v);
+}
+
+static inline long
+atomic_long_read_acquire(const atomic_long_t *v)
+{
+	return atomic_read_acquire(v);
+}
+
+static inline void
+atomic_long_set(atomic_long_t *v, long i)
+{
+	atomic_set(v, i);
+}
+
+static inline void
+atomic_long_set_release(atomic_long_t *v, long i)
+{
+	atomic_set_release(v, i);
+}
+
+static inline void
+atomic_long_add(long i, atomic_long_t *v)
+{
+	atomic_add(i, v);
+}
+
+static inline long
+atomic_long_add_return(long i, atomic_long_t *v)
+{
+	return atomic_add_return(i, v);
+}
+
+static inline long
+atomic_long_add_return_acquire(long i, atomic_long_t *v)
+{
+	return atomic_add_return_acquire(i, v);
+}
+
+static inline long
+atomic_long_add_return_release(long i, atomic_long_t *v)
+{
+	return atomic_add_return_release(i, v);
+}
+
+static inline long
+atomic_long_add_return_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_add_return_relaxed(i, v);
+}
+
+static inline long
+atomic_long_fetch_add(long i, atomic_long_t *v)
+{
+	return atomic_fetch_add(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_add_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_add_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_add_relaxed(i, v);
+}
+
+static inline void
+atomic_long_sub(long i, atomic_long_t *v)
+{
+	atomic_sub(i, v);
+}
+
+static inline long
+atomic_long_sub_return(long i, atomic_long_t *v)
+{
+	return atomic_sub_return(i, v);
+}
+
+static inline long
+atomic_long_sub_return_acquire(long i, atomic_long_t *v)
+{
+	return atomic_sub_return_acquire(i, v);
+}
+
+static inline long
+atomic_long_sub_return_release(long i, atomic_long_t *v)
+{
+	return atomic_sub_return_release(i, v);
+}
+
+static inline long
+atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_sub_return_relaxed(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub(long i, atomic_long_t *v)
+{
+	return atomic_fetch_sub(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_sub_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_sub_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_sub_relaxed(i, v);
+}
+
+static inline void
+atomic_long_inc(atomic_long_t *v)
+{
+	atomic_inc(v);
+}
+
+static inline long
+atomic_long_inc_return(atomic_long_t *v)
+{
+	return atomic_inc_return(v);
+}
+
+static inline long
+atomic_long_inc_return_acquire(atomic_long_t *v)
+{
+	return atomic_inc_return_acquire(v);
+}
+
+static inline long
+atomic_long_inc_return_release(atomic_long_t *v)
+{
+	return atomic_inc_return_release(v);
+}
+
+static inline long
+atomic_long_inc_return_relaxed(atomic_long_t *v)
+{
+	return atomic_inc_return_relaxed(v);
+}
+
+static inline long
+atomic_long_fetch_inc(atomic_long_t *v)
+{
+	return atomic_fetch_inc(v);
+}
+
+static inline long
+atomic_long_fetch_inc_acquire(atomic_long_t *v)
+{
+	return atomic_fetch_inc_acquire(v);
+}
+
+static inline long
+atomic_long_fetch_inc_release(atomic_long_t *v)
+{
+	return atomic_fetch_inc_release(v);
+}
+
+static inline long
+atomic_long_fetch_inc_relaxed(atomic_long_t *v)
+{
+	return atomic_fetch_inc_relaxed(v);
+}
+
+static inline void
+atomic_long_dec(atomic_long_t *v)
+{
+	atomic_dec(v);
+}
+
+static inline long
+atomic_long_dec_return(atomic_long_t *v)
+{
+	return atomic_dec_return(v);
+}
+
+static inline long
+atomic_long_dec_return_acquire(atomic_long_t *v)
+{
+	return atomic_dec_return_acquire(v);
+}
+
+static inline long
+atomic_long_dec_return_release(atomic_long_t *v)
+{
+	return atomic_dec_return_release(v);
+}
+
+static inline long
+atomic_long_dec_return_relaxed(atomic_long_t *v)
+{
+	return atomic_dec_return_relaxed(v);
+}
+
+static inline long
+atomic_long_fetch_dec(atomic_long_t *v)
+{
+	return atomic_fetch_dec(v);
+}
+
+static inline long
+atomic_long_fetch_dec_acquire(atomic_long_t *v)
+{
+	return atomic_fetch_dec_acquire(v);
+}
+
+static inline long
+atomic_long_fetch_dec_release(atomic_long_t *v)
+{
+	return atomic_fetch_dec_release(v);
+}
+
+static inline long
+atomic_long_fetch_dec_relaxed(atomic_long_t *v)
+{
+	return atomic_fetch_dec_relaxed(v);
+}
+
+static inline void
+atomic_long_and(long i, atomic_long_t *v)
+{
+	atomic_and(i, v);
+}
+
+static inline long
+atomic_long_fetch_and(long i, atomic_long_t *v)
+{
+	return atomic_fetch_and(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_and_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_and_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_and_relaxed(i, v);
+}
+
+static inline void
+atomic_long_andnot(long i, atomic_long_t *v)
+{
+	atomic_andnot(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot(long i, atomic_long_t *v)
+{
+	return atomic_fetch_andnot(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_acquire(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_release(i, v);
+}
+
+static inline long
+atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_andnot_relaxed(i, v);
+}
 
-#define ATOMIC_LONG_READ_OP(mo)						\
-static inline long atomic_long_read##mo(const atomic_long_t *l)		\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	return (long)ATOMIC_LONG_PFX(_read##mo)(v);			\
-}
-ATOMIC_LONG_READ_OP()
-ATOMIC_LONG_READ_OP(_acquire)
-
-#undef ATOMIC_LONG_READ_OP
-
-#define ATOMIC_LONG_SET_OP(mo)						\
-static inline void atomic_long_set##mo(atomic_long_t *l, long i)	\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	ATOMIC_LONG_PFX(_set##mo)(v, i);				\
-}
-ATOMIC_LONG_SET_OP()
-ATOMIC_LONG_SET_OP(_release)
-
-#undef ATOMIC_LONG_SET_OP
-
-#define ATOMIC_LONG_ADD_SUB_OP(op, mo)					\
-static inline long							\
-atomic_long_##op##_return##mo(long i, atomic_long_t *l)			\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(i, v);		\
-}
-ATOMIC_LONG_ADD_SUB_OP(add,)
-ATOMIC_LONG_ADD_SUB_OP(add, _relaxed)
-ATOMIC_LONG_ADD_SUB_OP(add, _acquire)
-ATOMIC_LONG_ADD_SUB_OP(add, _release)
-ATOMIC_LONG_ADD_SUB_OP(sub,)
-ATOMIC_LONG_ADD_SUB_OP(sub, _relaxed)
-ATOMIC_LONG_ADD_SUB_OP(sub, _acquire)
-ATOMIC_LONG_ADD_SUB_OP(sub, _release)
-
-#undef ATOMIC_LONG_ADD_SUB_OP
-
-#define atomic_long_cmpxchg_relaxed(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (old), (new)))
-#define atomic_long_cmpxchg_acquire(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (old), (new)))
-#define atomic_long_cmpxchg_release(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (old), (new)))
-#define atomic_long_cmpxchg(l, old, new) \
-	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
-
-
-#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
-	(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
-#define atomic_long_try_cmpxchg_acquire(l, old, new) \
-	(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
-#define atomic_long_try_cmpxchg_release(l, old, new) \
-	(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
-					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
-#define atomic_long_try_cmpxchg(l, old, new) \
-	(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
-				       (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
-
-
-#define atomic_long_xchg_relaxed(v, new) \
-	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
-#define atomic_long_xchg_acquire(v, new) \
-	(ATOMIC_LONG_PFX(_xchg_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
-#define atomic_long_xchg_release(v, new) \
-	(ATOMIC_LONG_PFX(_xchg_release)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
-#define atomic_long_xchg(v, new) \
-	(ATOMIC_LONG_PFX(_xchg)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
-
-static __always_inline void atomic_long_inc(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	ATOMIC_LONG_PFX(_inc)(v);
-}
-
-static __always_inline void atomic_long_dec(atomic_long_t *l)
+static inline void
+atomic_long_or(long i, atomic_long_t *v)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	atomic_or(i, v);
+}
 
-	ATOMIC_LONG_PFX(_dec)(v);
+static inline long
+atomic_long_fetch_or(long i, atomic_long_t *v)
+{
+	return atomic_fetch_or(i, v);
 }
 
-#define ATOMIC_LONG_FETCH_OP(op, mo)					\
-static inline long							\
-atomic_long_fetch_##op##mo(long i, atomic_long_t *l)			\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(i, v);		\
+static inline long
+atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_or_acquire(i, v);
 }
 
-ATOMIC_LONG_FETCH_OP(add, )
-ATOMIC_LONG_FETCH_OP(add, _relaxed)
-ATOMIC_LONG_FETCH_OP(add, _acquire)
-ATOMIC_LONG_FETCH_OP(add, _release)
-ATOMIC_LONG_FETCH_OP(sub, )
-ATOMIC_LONG_FETCH_OP(sub, _relaxed)
-ATOMIC_LONG_FETCH_OP(sub, _acquire)
-ATOMIC_LONG_FETCH_OP(sub, _release)
-ATOMIC_LONG_FETCH_OP(and, )
-ATOMIC_LONG_FETCH_OP(and, _relaxed)
-ATOMIC_LONG_FETCH_OP(and, _acquire)
-ATOMIC_LONG_FETCH_OP(and, _release)
-ATOMIC_LONG_FETCH_OP(andnot, )
-ATOMIC_LONG_FETCH_OP(andnot, _relaxed)
-ATOMIC_LONG_FETCH_OP(andnot, _acquire)
-ATOMIC_LONG_FETCH_OP(andnot, _release)
-ATOMIC_LONG_FETCH_OP(or, )
-ATOMIC_LONG_FETCH_OP(or, _relaxed)
-ATOMIC_LONG_FETCH_OP(or, _acquire)
-ATOMIC_LONG_FETCH_OP(or, _release)
-ATOMIC_LONG_FETCH_OP(xor, )
-ATOMIC_LONG_FETCH_OP(xor, _relaxed)
-ATOMIC_LONG_FETCH_OP(xor, _acquire)
-ATOMIC_LONG_FETCH_OP(xor, _release)
+static inline long
+atomic_long_fetch_or_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_or_release(i, v);
+}
 
-#undef ATOMIC_LONG_FETCH_OP
+static inline long
+atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_or_relaxed(i, v);
+}
 
-#define ATOMIC_LONG_FETCH_INC_DEC_OP(op, mo)					\
-static inline long							\
-atomic_long_fetch_##op##mo(atomic_long_t *l)				\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	return (long)ATOMIC_LONG_PFX(_fetch_##op##mo)(v);		\
+static inline void
+atomic_long_xor(long i, atomic_long_t *v)
+{
+	atomic_xor(i, v);
 }
 
-ATOMIC_LONG_FETCH_INC_DEC_OP(inc,)
-ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _relaxed)
-ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _acquire)
-ATOMIC_LONG_FETCH_INC_DEC_OP(inc, _release)
-ATOMIC_LONG_FETCH_INC_DEC_OP(dec,)
-ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _relaxed)
-ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _acquire)
-ATOMIC_LONG_FETCH_INC_DEC_OP(dec, _release)
+static inline long
+atomic_long_fetch_xor(long i, atomic_long_t *v)
+{
+	return atomic_fetch_xor(i, v);
+}
 
-#undef ATOMIC_LONG_FETCH_INC_DEC_OP
+static inline long
+atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
+{
+	return atomic_fetch_xor_acquire(i, v);
+}
 
-#define ATOMIC_LONG_OP(op)						\
-static __always_inline void						\
-atomic_long_##op(long i, atomic_long_t *l)				\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	ATOMIC_LONG_PFX(_##op)(i, v);					\
+static inline long
+atomic_long_fetch_xor_release(long i, atomic_long_t *v)
+{
+	return atomic_fetch_xor_release(i, v);
 }
 
-ATOMIC_LONG_OP(add)
-ATOMIC_LONG_OP(sub)
-ATOMIC_LONG_OP(and)
-ATOMIC_LONG_OP(andnot)
-ATOMIC_LONG_OP(or)
-ATOMIC_LONG_OP(xor)
+static inline long
+atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
+{
+	return atomic_fetch_xor_relaxed(i, v);
+}
 
-#undef ATOMIC_LONG_OP
+static inline long
+atomic_long_xchg(atomic_long_t *v, long i)
+{
+	return atomic_xchg(v, i);
+}
 
-static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
+static inline long
+atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	return atomic_xchg_acquire(v, i);
+}
 
-	return ATOMIC_LONG_PFX(_sub_and_test)(i, v);
+static inline long
+atomic_long_xchg_release(atomic_long_t *v, long i)
+{
+	return atomic_xchg_release(v, i);
 }
 
-static inline int atomic_long_dec_and_test(atomic_long_t *l)
+static inline long
+atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	return atomic_xchg_relaxed(v, i);
+}
 
-	return ATOMIC_LONG_PFX(_dec_and_test)(v);
+static inline long
+atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
+{
+	return atomic_cmpxchg(v, old, new);
 }
 
-static inline int atomic_long_inc_and_test(atomic_long_t *l)
+static inline long
+atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	return atomic_cmpxchg_acquire(v, old, new);
+}
 
-	return ATOMIC_LONG_PFX(_inc_and_test)(v);
+static inline long
+atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
+{
+	return atomic_cmpxchg_release(v, old, new);
 }
 
-static inline int atomic_long_add_negative(long i, atomic_long_t *l)
+static inline long
+atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	return atomic_cmpxchg_relaxed(v, old, new);
+}
 
-	return ATOMIC_LONG_PFX(_add_negative)(i, v);
+static inline bool
+atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
+{
+	return atomic_try_cmpxchg(v, (int *)old, new);
 }
 
-#define ATOMIC_LONG_INC_DEC_OP(op, mo)					\
-static inline long							\
-atomic_long_##op##_return##mo(atomic_long_t *l)				\
-{									\
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
-									\
-	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v);		\
+static inline bool
+atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
+{
+	return atomic_try_cmpxchg_acquire(v, (int *)old, new);
 }
-ATOMIC_LONG_INC_DEC_OP(inc,)
-ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
-ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
-ATOMIC_LONG_INC_DEC_OP(inc, _release)
-ATOMIC_LONG_INC_DEC_OP(dec,)
-ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
-ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
-ATOMIC_LONG_INC_DEC_OP(dec, _release)
 
-#undef ATOMIC_LONG_INC_DEC_OP
+static inline bool
+atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
+{
+	return atomic_try_cmpxchg_release(v, (int *)old, new);
+}
+
+static inline bool
+atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
+{
+	return atomic_try_cmpxchg_relaxed(v, (int *)old, new);
+}
+
+static inline bool
+atomic_long_sub_and_test(long i, atomic_long_t *v)
+{
+	return atomic_sub_and_test(i, v);
+}
+
+static inline bool
+atomic_long_dec_and_test(atomic_long_t *v)
+{
+	return atomic_dec_and_test(v);
+}
+
+static inline bool
+atomic_long_inc_and_test(atomic_long_t *v)
+{
+	return atomic_inc_and_test(v);
+}
+
+static inline bool
+atomic_long_add_negative(long i, atomic_long_t *v)
+{
+	return atomic_add_negative(i, v);
+}
+
+static inline long
+atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
+{
+	return atomic_fetch_add_unless(v, a, u);
+}
+
+static inline bool
+atomic_long_add_unless(atomic_long_t *v, long a, long u)
+{
+	return atomic_add_unless(v, a, u);
+}
 
-static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
+static inline bool
+atomic_long_inc_not_zero(atomic_long_t *v)
 {
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+	return atomic_inc_not_zero(v);
+}
 
-	return (long)ATOMIC_LONG_PFX(_add_unless)(v, a, u);
+static inline bool
+atomic_long_inc_unless_negative(atomic_long_t *v)
+{
+	return atomic_inc_unless_negative(v);
 }
 
-#define atomic_long_inc_not_zero(l) \
-	ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
+static inline bool
+atomic_long_dec_unless_positive(atomic_long_t *v)
+{
+	return atomic_dec_unless_positive(v);
+}
 
-#define atomic_long_cond_read_relaxed(v, c) \
-	ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c))
-#define atomic_long_cond_read_acquire(v, c) \
-	ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
+static inline long
+atomic_long_dec_if_positive(atomic_long_t *v)
+{
+	return atomic_dec_if_positive(v);
+}
 
-#endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
+#endif /* CONFIG_64BIT */
+#endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-- 
cgit v1.2.3


From aa525d063851a98e020b827fdd1d7776ae652301 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 4 Sep 2018 11:48:28 +0100
Subject: locking/atomics: Switch to generated instrumentation

As a step towards ensuring the atomic* APIs are consistent, let's switch
to wrappers generated by gen-atomic-instrumented.h, using the same table
used to generate the fallbacks and atomic-long wrappers.

These are checked in rather than generated with Kbuild, since:

* This allows inspection of the atomics with git grep and ctags on a
  pristine tree, which Linus strongly prefers being able to do.

* The fallbacks are not affected by machine details or configuration
  options, so it is not necessary to regenerate them to take these into
  account.

* These are included by files required *very* early in the build process
  (e.g. for generating bounds.h), and we'd rather not complicate the
  top-level Kbuild file with dependencies.

Generating the atomic headers means that the instrumented wrappers will
remain in sync with the rest of the atomic APIs, and we gain all the
ordering variants of each atomic without having to manually expanded
them all.

The KASAN checks are automatically generated based on the function
parameters defined in atomics.tbl. Note that try_cmpxchg() now correctly
treats 'old' as a parameter that may be written to, and not only read as
the hand-written instrumentation assumed.

Other than the change to try_cmpxchg(), existing code should not be
affected by this patch. The patch introduces instrumentation for all
optional atomics (and ordering variants), along with the ifdeffery this
requires, enabling other architectures to make use of the instrumented
atomics.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: catalin.marinas@arm.com
Cc: linuxdrivers@attotech.com
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Link: http://lkml.kernel.org/r/20180904104830.2975-5-mark.rutland@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h | 1688 +++++++++++++++++++++++++----
 1 file changed, 1504 insertions(+), 184 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 0d4b1d3dbc1e..b8f5b35216e1 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -1,3 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Generated by scripts/atomic/gen-atomic-instrumented.sh
+// DO NOT MODIFY THIS FILE DIRECTLY
+
 /*
  * This file provides wrappers with KASAN instrumentation for atomic operations.
  * To use this functionality an arch's atomic.h file needs to define all
@@ -9,459 +14,1774 @@
  * arch_ variants (i.e. arch_atomic_read()/arch_atomic_cmpxchg()) to avoid
  * double instrumentation.
  */
+#ifndef _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
+#define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
+
+#include <linux/build_bug.h>
+#include <linux/kasan-checks.h>
+
+static inline int
+atomic_read(const atomic_t *v)
+{
+	kasan_check_read(v, sizeof(*v));
+	return arch_atomic_read(v);
+}
+#define atomic_read atomic_read
+
+#if defined(arch_atomic_read_acquire)
+static inline int
+atomic_read_acquire(const atomic_t *v)
+{
+	kasan_check_read(v, sizeof(*v));
+	return arch_atomic_read_acquire(v);
+}
+#define atomic_read_acquire atomic_read_acquire
+#endif
+
+static inline void
+atomic_set(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_set(v, i);
+}
+#define atomic_set atomic_set
+
+#if defined(arch_atomic_set_release)
+static inline void
+atomic_set_release(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_set_release(v, i);
+}
+#define atomic_set_release atomic_set_release
+#endif
+
+static inline void
+atomic_add(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_add(i, v);
+}
+#define atomic_add atomic_add
+
+#if !defined(arch_atomic_add_return_relaxed) || defined(arch_atomic_add_return)
+static inline int
+atomic_add_return(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_return(i, v);
+}
+#define atomic_add_return atomic_add_return
+#endif
+
+#if defined(arch_atomic_add_return_acquire)
+static inline int
+atomic_add_return_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_return_acquire(i, v);
+}
+#define atomic_add_return_acquire atomic_add_return_acquire
+#endif
+
+#if defined(arch_atomic_add_return_release)
+static inline int
+atomic_add_return_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_return_release(i, v);
+}
+#define atomic_add_return_release atomic_add_return_release
+#endif
+
+#if defined(arch_atomic_add_return_relaxed)
+static inline int
+atomic_add_return_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_return_relaxed(i, v);
+}
+#define atomic_add_return_relaxed atomic_add_return_relaxed
+#endif
+
+#if !defined(arch_atomic_fetch_add_relaxed) || defined(arch_atomic_fetch_add)
+static inline int
+atomic_fetch_add(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_add(i, v);
+}
+#define atomic_fetch_add atomic_fetch_add
+#endif
+
+#if defined(arch_atomic_fetch_add_acquire)
+static inline int
+atomic_fetch_add_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_add_acquire(i, v);
+}
+#define atomic_fetch_add_acquire atomic_fetch_add_acquire
+#endif
+
+#if defined(arch_atomic_fetch_add_release)
+static inline int
+atomic_fetch_add_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_add_release(i, v);
+}
+#define atomic_fetch_add_release atomic_fetch_add_release
+#endif
+
+#if defined(arch_atomic_fetch_add_relaxed)
+static inline int
+atomic_fetch_add_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_add_relaxed(i, v);
+}
+#define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
+#endif
+
+static inline void
+atomic_sub(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_sub(i, v);
+}
+#define atomic_sub atomic_sub
+
+#if !defined(arch_atomic_sub_return_relaxed) || defined(arch_atomic_sub_return)
+static inline int
+atomic_sub_return(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_sub_return(i, v);
+}
+#define atomic_sub_return atomic_sub_return
+#endif
+
+#if defined(arch_atomic_sub_return_acquire)
+static inline int
+atomic_sub_return_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_sub_return_acquire(i, v);
+}
+#define atomic_sub_return_acquire atomic_sub_return_acquire
+#endif
+
+#if defined(arch_atomic_sub_return_release)
+static inline int
+atomic_sub_return_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_sub_return_release(i, v);
+}
+#define atomic_sub_return_release atomic_sub_return_release
+#endif
+
+#if defined(arch_atomic_sub_return_relaxed)
+static inline int
+atomic_sub_return_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_sub_return_relaxed(i, v);
+}
+#define atomic_sub_return_relaxed atomic_sub_return_relaxed
+#endif
+
+#if !defined(arch_atomic_fetch_sub_relaxed) || defined(arch_atomic_fetch_sub)
+static inline int
+atomic_fetch_sub(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_sub(i, v);
+}
+#define atomic_fetch_sub atomic_fetch_sub
+#endif
+
+#if defined(arch_atomic_fetch_sub_acquire)
+static inline int
+atomic_fetch_sub_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_sub_acquire(i, v);
+}
+#define atomic_fetch_sub_acquire atomic_fetch_sub_acquire
+#endif
+
+#if defined(arch_atomic_fetch_sub_release)
+static inline int
+atomic_fetch_sub_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_sub_release(i, v);
+}
+#define atomic_fetch_sub_release atomic_fetch_sub_release
+#endif
+
+#if defined(arch_atomic_fetch_sub_relaxed)
+static inline int
+atomic_fetch_sub_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_sub_relaxed(i, v);
+}
+#define atomic_fetch_sub_relaxed atomic_fetch_sub_relaxed
+#endif
+
+#if defined(arch_atomic_inc)
+static inline void
+atomic_inc(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_inc(v);
+}
+#define atomic_inc atomic_inc
+#endif
+
+#if defined(arch_atomic_inc_return)
+static inline int
+atomic_inc_return(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_return(v);
+}
+#define atomic_inc_return atomic_inc_return
+#endif
+
+#if defined(arch_atomic_inc_return_acquire)
+static inline int
+atomic_inc_return_acquire(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_return_acquire(v);
+}
+#define atomic_inc_return_acquire atomic_inc_return_acquire
+#endif
+
+#if defined(arch_atomic_inc_return_release)
+static inline int
+atomic_inc_return_release(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_return_release(v);
+}
+#define atomic_inc_return_release atomic_inc_return_release
+#endif
+
+#if defined(arch_atomic_inc_return_relaxed)
+static inline int
+atomic_inc_return_relaxed(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_return_relaxed(v);
+}
+#define atomic_inc_return_relaxed atomic_inc_return_relaxed
+#endif
+
+#if defined(arch_atomic_fetch_inc)
+static inline int
+atomic_fetch_inc(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_inc(v);
+}
+#define atomic_fetch_inc atomic_fetch_inc
+#endif
+
+#if defined(arch_atomic_fetch_inc_acquire)
+static inline int
+atomic_fetch_inc_acquire(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_inc_acquire(v);
+}
+#define atomic_fetch_inc_acquire atomic_fetch_inc_acquire
+#endif
+
+#if defined(arch_atomic_fetch_inc_release)
+static inline int
+atomic_fetch_inc_release(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_inc_release(v);
+}
+#define atomic_fetch_inc_release atomic_fetch_inc_release
+#endif
+
+#if defined(arch_atomic_fetch_inc_relaxed)
+static inline int
+atomic_fetch_inc_relaxed(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_inc_relaxed(v);
+}
+#define atomic_fetch_inc_relaxed atomic_fetch_inc_relaxed
+#endif
+
+#if defined(arch_atomic_dec)
+static inline void
+atomic_dec(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_dec(v);
+}
+#define atomic_dec atomic_dec
+#endif
+
+#if defined(arch_atomic_dec_return)
+static inline int
+atomic_dec_return(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_return(v);
+}
+#define atomic_dec_return atomic_dec_return
+#endif
+
+#if defined(arch_atomic_dec_return_acquire)
+static inline int
+atomic_dec_return_acquire(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_return_acquire(v);
+}
+#define atomic_dec_return_acquire atomic_dec_return_acquire
+#endif
+
+#if defined(arch_atomic_dec_return_release)
+static inline int
+atomic_dec_return_release(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_return_release(v);
+}
+#define atomic_dec_return_release atomic_dec_return_release
+#endif
+
+#if defined(arch_atomic_dec_return_relaxed)
+static inline int
+atomic_dec_return_relaxed(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_return_relaxed(v);
+}
+#define atomic_dec_return_relaxed atomic_dec_return_relaxed
+#endif
+
+#if defined(arch_atomic_fetch_dec)
+static inline int
+atomic_fetch_dec(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_dec(v);
+}
+#define atomic_fetch_dec atomic_fetch_dec
+#endif
+
+#if defined(arch_atomic_fetch_dec_acquire)
+static inline int
+atomic_fetch_dec_acquire(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_dec_acquire(v);
+}
+#define atomic_fetch_dec_acquire atomic_fetch_dec_acquire
+#endif
+
+#if defined(arch_atomic_fetch_dec_release)
+static inline int
+atomic_fetch_dec_release(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_dec_release(v);
+}
+#define atomic_fetch_dec_release atomic_fetch_dec_release
+#endif
+
+#if defined(arch_atomic_fetch_dec_relaxed)
+static inline int
+atomic_fetch_dec_relaxed(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_dec_relaxed(v);
+}
+#define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
+#endif
+
+static inline void
+atomic_and(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_and(i, v);
+}
+#define atomic_and atomic_and
+
+#if !defined(arch_atomic_fetch_and_relaxed) || defined(arch_atomic_fetch_and)
+static inline int
+atomic_fetch_and(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_and(i, v);
+}
+#define atomic_fetch_and atomic_fetch_and
+#endif
+
+#if defined(arch_atomic_fetch_and_acquire)
+static inline int
+atomic_fetch_and_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_and_acquire(i, v);
+}
+#define atomic_fetch_and_acquire atomic_fetch_and_acquire
+#endif
+
+#if defined(arch_atomic_fetch_and_release)
+static inline int
+atomic_fetch_and_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_and_release(i, v);
+}
+#define atomic_fetch_and_release atomic_fetch_and_release
+#endif
+
+#if defined(arch_atomic_fetch_and_relaxed)
+static inline int
+atomic_fetch_and_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_and_relaxed(i, v);
+}
+#define atomic_fetch_and_relaxed atomic_fetch_and_relaxed
+#endif
+
+#if defined(arch_atomic_andnot)
+static inline void
+atomic_andnot(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_andnot(i, v);
+}
+#define atomic_andnot atomic_andnot
+#endif
+
+#if defined(arch_atomic_fetch_andnot)
+static inline int
+atomic_fetch_andnot(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot(i, v);
+}
+#define atomic_fetch_andnot atomic_fetch_andnot
+#endif
+
+#if defined(arch_atomic_fetch_andnot_acquire)
+static inline int
+atomic_fetch_andnot_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_acquire(i, v);
+}
+#define atomic_fetch_andnot_acquire atomic_fetch_andnot_acquire
+#endif
+
+#if defined(arch_atomic_fetch_andnot_release)
+static inline int
+atomic_fetch_andnot_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_release(i, v);
+}
+#define atomic_fetch_andnot_release atomic_fetch_andnot_release
+#endif
+
+#if defined(arch_atomic_fetch_andnot_relaxed)
+static inline int
+atomic_fetch_andnot_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_andnot_relaxed(i, v);
+}
+#define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
+#endif
+
+static inline void
+atomic_or(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_or(i, v);
+}
+#define atomic_or atomic_or
+
+#if !defined(arch_atomic_fetch_or_relaxed) || defined(arch_atomic_fetch_or)
+static inline int
+atomic_fetch_or(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_or(i, v);
+}
+#define atomic_fetch_or atomic_fetch_or
+#endif
+
+#if defined(arch_atomic_fetch_or_acquire)
+static inline int
+atomic_fetch_or_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_or_acquire(i, v);
+}
+#define atomic_fetch_or_acquire atomic_fetch_or_acquire
+#endif
+
+#if defined(arch_atomic_fetch_or_release)
+static inline int
+atomic_fetch_or_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_or_release(i, v);
+}
+#define atomic_fetch_or_release atomic_fetch_or_release
+#endif
+
+#if defined(arch_atomic_fetch_or_relaxed)
+static inline int
+atomic_fetch_or_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_or_relaxed(i, v);
+}
+#define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
+#endif
+
+static inline void
+atomic_xor(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic_xor(i, v);
+}
+#define atomic_xor atomic_xor
+
+#if !defined(arch_atomic_fetch_xor_relaxed) || defined(arch_atomic_fetch_xor)
+static inline int
+atomic_fetch_xor(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_xor(i, v);
+}
+#define atomic_fetch_xor atomic_fetch_xor
+#endif
+
+#if defined(arch_atomic_fetch_xor_acquire)
+static inline int
+atomic_fetch_xor_acquire(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_xor_acquire(i, v);
+}
+#define atomic_fetch_xor_acquire atomic_fetch_xor_acquire
+#endif
+
+#if defined(arch_atomic_fetch_xor_release)
+static inline int
+atomic_fetch_xor_release(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_xor_release(i, v);
+}
+#define atomic_fetch_xor_release atomic_fetch_xor_release
+#endif
+
+#if defined(arch_atomic_fetch_xor_relaxed)
+static inline int
+atomic_fetch_xor_relaxed(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_xor_relaxed(i, v);
+}
+#define atomic_fetch_xor_relaxed atomic_fetch_xor_relaxed
+#endif
+
+#if !defined(arch_atomic_xchg_relaxed) || defined(arch_atomic_xchg)
+static inline int
+atomic_xchg(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_xchg(v, i);
+}
+#define atomic_xchg atomic_xchg
+#endif
+
+#if defined(arch_atomic_xchg_acquire)
+static inline int
+atomic_xchg_acquire(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_xchg_acquire(v, i);
+}
+#define atomic_xchg_acquire atomic_xchg_acquire
+#endif
+
+#if defined(arch_atomic_xchg_release)
+static inline int
+atomic_xchg_release(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_xchg_release(v, i);
+}
+#define atomic_xchg_release atomic_xchg_release
+#endif
+
+#if defined(arch_atomic_xchg_relaxed)
+static inline int
+atomic_xchg_relaxed(atomic_t *v, int i)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_xchg_relaxed(v, i);
+}
+#define atomic_xchg_relaxed atomic_xchg_relaxed
+#endif
+
+#if !defined(arch_atomic_cmpxchg_relaxed) || defined(arch_atomic_cmpxchg)
+static inline int
+atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_cmpxchg(v, old, new);
+}
+#define atomic_cmpxchg atomic_cmpxchg
+#endif
+
+#if defined(arch_atomic_cmpxchg_acquire)
+static inline int
+atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_cmpxchg_acquire(v, old, new);
+}
+#define atomic_cmpxchg_acquire atomic_cmpxchg_acquire
+#endif
+
+#if defined(arch_atomic_cmpxchg_release)
+static inline int
+atomic_cmpxchg_release(atomic_t *v, int old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_cmpxchg_release(v, old, new);
+}
+#define atomic_cmpxchg_release atomic_cmpxchg_release
+#endif
+
+#if defined(arch_atomic_cmpxchg_relaxed)
+static inline int
+atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_cmpxchg_relaxed(v, old, new);
+}
+#define atomic_cmpxchg_relaxed atomic_cmpxchg_relaxed
+#endif
+
+#if defined(arch_atomic_try_cmpxchg)
+static inline bool
+atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic_try_cmpxchg(v, old, new);
+}
+#define atomic_try_cmpxchg atomic_try_cmpxchg
+#endif
+
+#if defined(arch_atomic_try_cmpxchg_acquire)
+static inline bool
+atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic_try_cmpxchg_acquire(v, old, new);
+}
+#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg_acquire
+#endif
+
+#if defined(arch_atomic_try_cmpxchg_release)
+static inline bool
+atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic_try_cmpxchg_release(v, old, new);
+}
+#define atomic_try_cmpxchg_release atomic_try_cmpxchg_release
+#endif
+
+#if defined(arch_atomic_try_cmpxchg_relaxed)
+static inline bool
+atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
+{
+	kasan_check_write(v, sizeof(*v));
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic_try_cmpxchg_relaxed(v, old, new);
+}
+#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg_relaxed
+#endif
+
+#if defined(arch_atomic_sub_and_test)
+static inline bool
+atomic_sub_and_test(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_sub_and_test(i, v);
+}
+#define atomic_sub_and_test atomic_sub_and_test
+#endif
+
+#if defined(arch_atomic_dec_and_test)
+static inline bool
+atomic_dec_and_test(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_and_test(v);
+}
+#define atomic_dec_and_test atomic_dec_and_test
+#endif
+
+#if defined(arch_atomic_inc_and_test)
+static inline bool
+atomic_inc_and_test(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_and_test(v);
+}
+#define atomic_inc_and_test atomic_inc_and_test
+#endif
+
+#if defined(arch_atomic_add_negative)
+static inline bool
+atomic_add_negative(int i, atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_negative(i, v);
+}
+#define atomic_add_negative atomic_add_negative
+#endif
+
+#if defined(arch_atomic_fetch_add_unless)
+static inline int
+atomic_fetch_add_unless(atomic_t *v, int a, int u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_fetch_add_unless(v, a, u);
+}
+#define atomic_fetch_add_unless atomic_fetch_add_unless
+#endif
+
+#if defined(arch_atomic_add_unless)
+static inline bool
+atomic_add_unless(atomic_t *v, int a, int u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_add_unless(v, a, u);
+}
+#define atomic_add_unless atomic_add_unless
+#endif
+
+#if defined(arch_atomic_inc_not_zero)
+static inline bool
+atomic_inc_not_zero(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_not_zero(v);
+}
+#define atomic_inc_not_zero atomic_inc_not_zero
+#endif
+
+#if defined(arch_atomic_inc_unless_negative)
+static inline bool
+atomic_inc_unless_negative(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_inc_unless_negative(v);
+}
+#define atomic_inc_unless_negative atomic_inc_unless_negative
+#endif
+
+#if defined(arch_atomic_dec_unless_positive)
+static inline bool
+atomic_dec_unless_positive(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_unless_positive(v);
+}
+#define atomic_dec_unless_positive atomic_dec_unless_positive
+#endif
+
+#if defined(arch_atomic_dec_if_positive)
+static inline int
+atomic_dec_if_positive(atomic_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic_dec_if_positive(v);
+}
+#define atomic_dec_if_positive atomic_dec_if_positive
+#endif
+
+static inline s64
+atomic64_read(const atomic64_t *v)
+{
+	kasan_check_read(v, sizeof(*v));
+	return arch_atomic64_read(v);
+}
+#define atomic64_read atomic64_read
+
+#if defined(arch_atomic64_read_acquire)
+static inline s64
+atomic64_read_acquire(const atomic64_t *v)
+{
+	kasan_check_read(v, sizeof(*v));
+	return arch_atomic64_read_acquire(v);
+}
+#define atomic64_read_acquire atomic64_read_acquire
+#endif
+
+static inline void
+atomic64_set(atomic64_t *v, s64 i)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic64_set(v, i);
+}
+#define atomic64_set atomic64_set
+
+#if defined(arch_atomic64_set_release)
+static inline void
+atomic64_set_release(atomic64_t *v, s64 i)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic64_set_release(v, i);
+}
+#define atomic64_set_release atomic64_set_release
+#endif
+
+static inline void
+atomic64_add(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic64_add(i, v);
+}
+#define atomic64_add atomic64_add
+
+#if !defined(arch_atomic64_add_return_relaxed) || defined(arch_atomic64_add_return)
+static inline s64
+atomic64_add_return(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_add_return(i, v);
+}
+#define atomic64_add_return atomic64_add_return
+#endif
+
+#if defined(arch_atomic64_add_return_acquire)
+static inline s64
+atomic64_add_return_acquire(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_add_return_acquire(i, v);
+}
+#define atomic64_add_return_acquire atomic64_add_return_acquire
+#endif
+
+#if defined(arch_atomic64_add_return_release)
+static inline s64
+atomic64_add_return_release(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_add_return_release(i, v);
+}
+#define atomic64_add_return_release atomic64_add_return_release
+#endif
+
+#if defined(arch_atomic64_add_return_relaxed)
+static inline s64
+atomic64_add_return_relaxed(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_add_return_relaxed(i, v);
+}
+#define atomic64_add_return_relaxed atomic64_add_return_relaxed
+#endif
+
+#if !defined(arch_atomic64_fetch_add_relaxed) || defined(arch_atomic64_fetch_add)
+static inline s64
+atomic64_fetch_add(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add(i, v);
+}
+#define atomic64_fetch_add atomic64_fetch_add
+#endif
+
+#if defined(arch_atomic64_fetch_add_acquire)
+static inline s64
+atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_acquire(i, v);
+}
+#define atomic64_fetch_add_acquire atomic64_fetch_add_acquire
+#endif
+
+#if defined(arch_atomic64_fetch_add_release)
+static inline s64
+atomic64_fetch_add_release(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_release(i, v);
+}
+#define atomic64_fetch_add_release atomic64_fetch_add_release
+#endif
+
+#if defined(arch_atomic64_fetch_add_relaxed)
+static inline s64
+atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_relaxed(i, v);
+}
+#define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
+#endif
+
+static inline void
+atomic64_sub(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	arch_atomic64_sub(i, v);
+}
+#define atomic64_sub atomic64_sub
 
-#ifndef _LINUX_ATOMIC_INSTRUMENTED_H
-#define _LINUX_ATOMIC_INSTRUMENTED_H
+#if !defined(arch_atomic64_sub_return_relaxed) || defined(arch_atomic64_sub_return)
+static inline s64
+atomic64_sub_return(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_sub_return(i, v);
+}
+#define atomic64_sub_return atomic64_sub_return
+#endif
 
-#include <linux/build_bug.h>
-#include <linux/kasan-checks.h>
+#if defined(arch_atomic64_sub_return_acquire)
+static inline s64
+atomic64_sub_return_acquire(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_sub_return_acquire(i, v);
+}
+#define atomic64_sub_return_acquire atomic64_sub_return_acquire
+#endif
 
-static __always_inline int atomic_read(const atomic_t *v)
+#if defined(arch_atomic64_sub_return_release)
+static inline s64
+atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
-	kasan_check_read(v, sizeof(*v));
-	return arch_atomic_read(v);
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_sub_return_release(i, v);
 }
+#define atomic64_sub_return_release atomic64_sub_return_release
+#endif
 
-static __always_inline s64 atomic64_read(const atomic64_t *v)
+#if defined(arch_atomic64_sub_return_relaxed)
+static inline s64
+atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
-	kasan_check_read(v, sizeof(*v));
-	return arch_atomic64_read(v);
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_sub_return_relaxed(i, v);
 }
+#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed
+#endif
 
-static __always_inline void atomic_set(atomic_t *v, int i)
+#if !defined(arch_atomic64_fetch_sub_relaxed) || defined(arch_atomic64_fetch_sub)
+static inline s64
+atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_set(v, i);
+	return arch_atomic64_fetch_sub(i, v);
 }
+#define atomic64_fetch_sub atomic64_fetch_sub
+#endif
 
-static __always_inline void atomic64_set(atomic64_t *v, s64 i)
+#if defined(arch_atomic64_fetch_sub_acquire)
+static inline s64
+atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_set(v, i);
+	return arch_atomic64_fetch_sub_acquire(i, v);
 }
+#define atomic64_fetch_sub_acquire atomic64_fetch_sub_acquire
+#endif
 
-static __always_inline int atomic_xchg(atomic_t *v, int i)
+#if defined(arch_atomic64_fetch_sub_release)
+static inline s64
+atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_xchg(v, i);
+	return arch_atomic64_fetch_sub_release(i, v);
 }
+#define atomic64_fetch_sub_release atomic64_fetch_sub_release
+#endif
 
-static __always_inline s64 atomic64_xchg(atomic64_t *v, s64 i)
+#if defined(arch_atomic64_fetch_sub_relaxed)
+static inline s64
+atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_xchg(v, i);
+	return arch_atomic64_fetch_sub_relaxed(i, v);
 }
+#define atomic64_fetch_sub_relaxed atomic64_fetch_sub_relaxed
+#endif
 
-static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+#if defined(arch_atomic64_inc)
+static inline void
+atomic64_inc(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_cmpxchg(v, old, new);
+	arch_atomic64_inc(v);
 }
+#define atomic64_inc atomic64_inc
+#endif
 
-static __always_inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
+#if defined(arch_atomic64_inc_return)
+static inline s64
+atomic64_inc_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_cmpxchg(v, old, new);
+	return arch_atomic64_inc_return(v);
 }
+#define atomic64_inc_return atomic64_inc_return
+#endif
 
-#ifdef arch_atomic_try_cmpxchg
-#define atomic_try_cmpxchg atomic_try_cmpxchg
-static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new)
+#if defined(arch_atomic64_inc_return_acquire)
+static inline s64
+atomic64_inc_return_acquire(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	kasan_check_read(old, sizeof(*old));
-	return arch_atomic_try_cmpxchg(v, old, new);
+	return arch_atomic64_inc_return_acquire(v);
 }
+#define atomic64_inc_return_acquire atomic64_inc_return_acquire
 #endif
 
-#ifdef arch_atomic64_try_cmpxchg
-#define atomic64_try_cmpxchg atomic64_try_cmpxchg
-static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+#if defined(arch_atomic64_inc_return_release)
+static inline s64
+atomic64_inc_return_release(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	kasan_check_read(old, sizeof(*old));
-	return arch_atomic64_try_cmpxchg(v, old, new);
+	return arch_atomic64_inc_return_release(v);
 }
+#define atomic64_inc_return_release atomic64_inc_return_release
 #endif
 
-#ifdef arch_atomic_fetch_add_unless
-#define atomic_fetch_add_unless atomic_fetch_add_unless
-static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
+#if defined(arch_atomic64_inc_return_relaxed)
+static inline s64
+atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_add_unless(v, a, u);
+	return arch_atomic64_inc_return_relaxed(v);
 }
+#define atomic64_inc_return_relaxed atomic64_inc_return_relaxed
 #endif
 
-#ifdef arch_atomic64_fetch_add_unless
-#define atomic64_fetch_add_unless atomic64_fetch_add_unless
-static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+#if defined(arch_atomic64_fetch_inc)
+static inline s64
+atomic64_fetch_inc(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add_unless(v, a, u);
+	return arch_atomic64_fetch_inc(v);
 }
+#define atomic64_fetch_inc atomic64_fetch_inc
 #endif
 
-#ifdef arch_atomic_inc
-#define atomic_inc atomic_inc
-static __always_inline void atomic_inc(atomic_t *v)
+#if defined(arch_atomic64_fetch_inc_acquire)
+static inline s64
+atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_inc(v);
+	return arch_atomic64_fetch_inc_acquire(v);
 }
+#define atomic64_fetch_inc_acquire atomic64_fetch_inc_acquire
 #endif
 
-#ifdef arch_atomic64_inc
-#define atomic64_inc atomic64_inc
-static __always_inline void atomic64_inc(atomic64_t *v)
+#if defined(arch_atomic64_fetch_inc_release)
+static inline s64
+atomic64_fetch_inc_release(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_inc(v);
+	return arch_atomic64_fetch_inc_release(v);
 }
+#define atomic64_fetch_inc_release atomic64_fetch_inc_release
 #endif
 
-#ifdef arch_atomic_dec
-#define atomic_dec atomic_dec
-static __always_inline void atomic_dec(atomic_t *v)
+#if defined(arch_atomic64_fetch_inc_relaxed)
+static inline s64
+atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_dec(v);
+	return arch_atomic64_fetch_inc_relaxed(v);
 }
+#define atomic64_fetch_inc_relaxed atomic64_fetch_inc_relaxed
 #endif
 
-#ifdef atch_atomic64_dec
-#define atomic64_dec
-static __always_inline void atomic64_dec(atomic64_t *v)
+#if defined(arch_atomic64_dec)
+static inline void
+atomic64_dec(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_dec(v);
 }
+#define atomic64_dec atomic64_dec
 #endif
 
-static __always_inline void atomic_add(int i, atomic_t *v)
+#if defined(arch_atomic64_dec_return)
+static inline s64
+atomic64_dec_return(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_add(i, v);
+	return arch_atomic64_dec_return(v);
 }
+#define atomic64_dec_return atomic64_dec_return
+#endif
 
-static __always_inline void atomic64_add(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_dec_return_acquire)
+static inline s64
+atomic64_dec_return_acquire(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_add(i, v);
+	return arch_atomic64_dec_return_acquire(v);
 }
+#define atomic64_dec_return_acquire atomic64_dec_return_acquire
+#endif
 
-static __always_inline void atomic_sub(int i, atomic_t *v)
+#if defined(arch_atomic64_dec_return_release)
+static inline s64
+atomic64_dec_return_release(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_sub(i, v);
+	return arch_atomic64_dec_return_release(v);
 }
+#define atomic64_dec_return_release atomic64_dec_return_release
+#endif
 
-static __always_inline void atomic64_sub(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_dec_return_relaxed)
+static inline s64
+atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_sub(i, v);
+	return arch_atomic64_dec_return_relaxed(v);
 }
+#define atomic64_dec_return_relaxed atomic64_dec_return_relaxed
+#endif
 
-static __always_inline void atomic_and(int i, atomic_t *v)
+#if defined(arch_atomic64_fetch_dec)
+static inline s64
+atomic64_fetch_dec(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_and(i, v);
+	return arch_atomic64_fetch_dec(v);
+}
+#define atomic64_fetch_dec atomic64_fetch_dec
+#endif
+
+#if defined(arch_atomic64_fetch_dec_acquire)
+static inline s64
+atomic64_fetch_dec_acquire(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_dec_acquire(v);
+}
+#define atomic64_fetch_dec_acquire atomic64_fetch_dec_acquire
+#endif
+
+#if defined(arch_atomic64_fetch_dec_release)
+static inline s64
+atomic64_fetch_dec_release(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_dec_release(v);
 }
+#define atomic64_fetch_dec_release atomic64_fetch_dec_release
+#endif
+
+#if defined(arch_atomic64_fetch_dec_relaxed)
+static inline s64
+atomic64_fetch_dec_relaxed(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_dec_relaxed(v);
+}
+#define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
+#endif
 
-static __always_inline void atomic64_and(s64 i, atomic64_t *v)
+static inline void
+atomic64_and(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	arch_atomic64_and(i, v);
 }
+#define atomic64_and atomic64_and
 
-static __always_inline void atomic_or(int i, atomic_t *v)
+#if !defined(arch_atomic64_fetch_and_relaxed) || defined(arch_atomic64_fetch_and)
+static inline s64
+atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_or(i, v);
+	return arch_atomic64_fetch_and(i, v);
 }
+#define atomic64_fetch_and atomic64_fetch_and
+#endif
 
-static __always_inline void atomic64_or(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_fetch_and_acquire)
+static inline s64
+atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_or(i, v);
+	return arch_atomic64_fetch_and_acquire(i, v);
 }
+#define atomic64_fetch_and_acquire atomic64_fetch_and_acquire
+#endif
 
-static __always_inline void atomic_xor(int i, atomic_t *v)
+#if defined(arch_atomic64_fetch_and_release)
+static inline s64
+atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic_xor(i, v);
+	return arch_atomic64_fetch_and_release(i, v);
 }
+#define atomic64_fetch_and_release atomic64_fetch_and_release
+#endif
 
-static __always_inline void atomic64_xor(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_fetch_and_relaxed)
+static inline s64
+atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	arch_atomic64_xor(i, v);
+	return arch_atomic64_fetch_and_relaxed(i, v);
 }
+#define atomic64_fetch_and_relaxed atomic64_fetch_and_relaxed
+#endif
 
-#ifdef arch_atomic_inc_return
-#define atomic_inc_return atomic_inc_return
-static __always_inline int atomic_inc_return(atomic_t *v)
+#if defined(arch_atomic64_andnot)
+static inline void
+atomic64_andnot(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_inc_return(v);
+	arch_atomic64_andnot(i, v);
 }
+#define atomic64_andnot atomic64_andnot
 #endif
 
-#ifdef arch_atomic64_in_return
-#define atomic64_inc_return atomic64_inc_return
-static __always_inline s64 atomic64_inc_return(atomic64_t *v)
+#if defined(arch_atomic64_fetch_andnot)
+static inline s64
+atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_inc_return(v);
+	return arch_atomic64_fetch_andnot(i, v);
 }
+#define atomic64_fetch_andnot atomic64_fetch_andnot
 #endif
 
-#ifdef arch_atomic_dec_return
-#define atomic_dec_return atomic_dec_return
-static __always_inline int atomic_dec_return(atomic_t *v)
+#if defined(arch_atomic64_fetch_andnot_acquire)
+static inline s64
+atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_dec_return(v);
+	return arch_atomic64_fetch_andnot_acquire(i, v);
 }
+#define atomic64_fetch_andnot_acquire atomic64_fetch_andnot_acquire
 #endif
 
-#ifdef arch_atomic64_dec_return
-#define atomic64_dec_return atomic64_dec_return
-static __always_inline s64 atomic64_dec_return(atomic64_t *v)
+#if defined(arch_atomic64_fetch_andnot_release)
+static inline s64
+atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_dec_return(v);
+	return arch_atomic64_fetch_andnot_release(i, v);
 }
+#define atomic64_fetch_andnot_release atomic64_fetch_andnot_release
 #endif
 
-#ifdef arch_atomic64_inc_not_zero
-#define atomic64_inc_not_zero atomic64_inc_not_zero
-static __always_inline bool atomic64_inc_not_zero(atomic64_t *v)
+#if defined(arch_atomic64_fetch_andnot_relaxed)
+static inline s64
+atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_inc_not_zero(v);
+	return arch_atomic64_fetch_andnot_relaxed(i, v);
 }
+#define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
 #endif
 
-#ifdef arch_atomic64_dec_if_positive
-#define atomic64_dec_if_positive atomic64_dec_if_positive
-static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v)
+static inline void
+atomic64_or(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_dec_if_positive(v);
+	arch_atomic64_or(i, v);
+}
+#define atomic64_or atomic64_or
+
+#if !defined(arch_atomic64_fetch_or_relaxed) || defined(arch_atomic64_fetch_or)
+static inline s64
+atomic64_fetch_or(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_or(i, v);
 }
+#define atomic64_fetch_or atomic64_fetch_or
 #endif
 
-#ifdef arch_atomic_dec_and_test
-#define atomic_dec_and_test atomic_dec_and_test
-static __always_inline bool atomic_dec_and_test(atomic_t *v)
+#if defined(arch_atomic64_fetch_or_acquire)
+static inline s64
+atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_dec_and_test(v);
+	return arch_atomic64_fetch_or_acquire(i, v);
 }
+#define atomic64_fetch_or_acquire atomic64_fetch_or_acquire
 #endif
 
-#ifdef arch_atomic64_dec_and_test
-#define atomic64_dec_and_test atomic64_dec_and_test
-static __always_inline bool atomic64_dec_and_test(atomic64_t *v)
+#if defined(arch_atomic64_fetch_or_release)
+static inline s64
+atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_dec_and_test(v);
+	return arch_atomic64_fetch_or_release(i, v);
 }
+#define atomic64_fetch_or_release atomic64_fetch_or_release
 #endif
 
-#ifdef arch_atomic_inc_and_test
-#define atomic_inc_and_test atomic_inc_and_test
-static __always_inline bool atomic_inc_and_test(atomic_t *v)
+#if defined(arch_atomic64_fetch_or_relaxed)
+static inline s64
+atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_inc_and_test(v);
+	return arch_atomic64_fetch_or_relaxed(i, v);
 }
+#define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
 #endif
 
-#ifdef arch_atomic64_inc_and_test
-#define atomic64_inc_and_test atomic64_inc_and_test
-static __always_inline bool atomic64_inc_and_test(atomic64_t *v)
+static inline void
+atomic64_xor(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_inc_and_test(v);
+	arch_atomic64_xor(i, v);
+}
+#define atomic64_xor atomic64_xor
+
+#if !defined(arch_atomic64_fetch_xor_relaxed) || defined(arch_atomic64_fetch_xor)
+static inline s64
+atomic64_fetch_xor(s64 i, atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_xor(i, v);
 }
+#define atomic64_fetch_xor atomic64_fetch_xor
 #endif
 
-static __always_inline int atomic_add_return(int i, atomic_t *v)
+#if defined(arch_atomic64_fetch_xor_acquire)
+static inline s64
+atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_add_return(i, v);
+	return arch_atomic64_fetch_xor_acquire(i, v);
 }
+#define atomic64_fetch_xor_acquire atomic64_fetch_xor_acquire
+#endif
 
-static __always_inline s64 atomic64_add_return(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_fetch_xor_release)
+static inline s64
+atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_add_return(i, v);
+	return arch_atomic64_fetch_xor_release(i, v);
 }
+#define atomic64_fetch_xor_release atomic64_fetch_xor_release
+#endif
 
-static __always_inline int atomic_sub_return(int i, atomic_t *v)
+#if defined(arch_atomic64_fetch_xor_relaxed)
+static inline s64
+atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_sub_return(i, v);
+	return arch_atomic64_fetch_xor_relaxed(i, v);
 }
+#define atomic64_fetch_xor_relaxed atomic64_fetch_xor_relaxed
+#endif
 
-static __always_inline s64 atomic64_sub_return(s64 i, atomic64_t *v)
+#if !defined(arch_atomic64_xchg_relaxed) || defined(arch_atomic64_xchg)
+static inline s64
+atomic64_xchg(atomic64_t *v, s64 i)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_sub_return(i, v);
+	return arch_atomic64_xchg(v, i);
 }
+#define atomic64_xchg atomic64_xchg
+#endif
 
-static __always_inline int atomic_fetch_add(int i, atomic_t *v)
+#if defined(arch_atomic64_xchg_acquire)
+static inline s64
+atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_add(i, v);
+	return arch_atomic64_xchg_acquire(v, i);
 }
+#define atomic64_xchg_acquire atomic64_xchg_acquire
+#endif
 
-static __always_inline s64 atomic64_fetch_add(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_xchg_release)
+static inline s64
+atomic64_xchg_release(atomic64_t *v, s64 i)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_add(i, v);
+	return arch_atomic64_xchg_release(v, i);
 }
+#define atomic64_xchg_release atomic64_xchg_release
+#endif
 
-static __always_inline int atomic_fetch_sub(int i, atomic_t *v)
+#if defined(arch_atomic64_xchg_relaxed)
+static inline s64
+atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_sub(i, v);
+	return arch_atomic64_xchg_relaxed(v, i);
 }
+#define atomic64_xchg_relaxed atomic64_xchg_relaxed
+#endif
 
-static __always_inline s64 atomic64_fetch_sub(s64 i, atomic64_t *v)
+#if !defined(arch_atomic64_cmpxchg_relaxed) || defined(arch_atomic64_cmpxchg)
+static inline s64
+atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_sub(i, v);
+	return arch_atomic64_cmpxchg(v, old, new);
 }
+#define atomic64_cmpxchg atomic64_cmpxchg
+#endif
 
-static __always_inline int atomic_fetch_and(int i, atomic_t *v)
+#if defined(arch_atomic64_cmpxchg_acquire)
+static inline s64
+atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_and(i, v);
+	return arch_atomic64_cmpxchg_acquire(v, old, new);
 }
+#define atomic64_cmpxchg_acquire atomic64_cmpxchg_acquire
+#endif
 
-static __always_inline s64 atomic64_fetch_and(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_cmpxchg_release)
+static inline s64
+atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_and(i, v);
+	return arch_atomic64_cmpxchg_release(v, old, new);
 }
+#define atomic64_cmpxchg_release atomic64_cmpxchg_release
+#endif
 
-static __always_inline int atomic_fetch_or(int i, atomic_t *v)
+#if defined(arch_atomic64_cmpxchg_relaxed)
+static inline s64
+atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_or(i, v);
+	return arch_atomic64_cmpxchg_relaxed(v, old, new);
 }
+#define atomic64_cmpxchg_relaxed atomic64_cmpxchg_relaxed
+#endif
 
-static __always_inline s64 atomic64_fetch_or(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_try_cmpxchg)
+static inline bool
+atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_or(i, v);
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic64_try_cmpxchg(v, old, new);
 }
+#define atomic64_try_cmpxchg atomic64_try_cmpxchg
+#endif
 
-static __always_inline int atomic_fetch_xor(int i, atomic_t *v)
+#if defined(arch_atomic64_try_cmpxchg_acquire)
+static inline bool
+atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_fetch_xor(i, v);
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic64_try_cmpxchg_acquire(v, old, new);
 }
+#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg_acquire
+#endif
 
-static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_try_cmpxchg_release)
+static inline bool
+atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic64_fetch_xor(i, v);
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic64_try_cmpxchg_release(v, old, new);
 }
+#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg_release
+#endif
 
-#ifdef arch_atomic_sub_and_test
-#define atomic_sub_and_test atomic_sub_and_test
-static __always_inline bool atomic_sub_and_test(int i, atomic_t *v)
+#if defined(arch_atomic64_try_cmpxchg_relaxed)
+static inline bool
+atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_sub_and_test(i, v);
+	kasan_check_write(old, sizeof(*old));
+	return arch_atomic64_try_cmpxchg_relaxed(v, old, new);
 }
+#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg_relaxed
 #endif
 
-#ifdef arch_atomic64_sub_and_test
-#define atomic64_sub_and_test atomic64_sub_and_test
-static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_sub_and_test)
+static inline bool
+atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_sub_and_test(i, v);
 }
+#define atomic64_sub_and_test atomic64_sub_and_test
 #endif
 
-#ifdef arch_atomic_add_negative
-#define atomic_add_negative atomic_add_negative
-static __always_inline bool atomic_add_negative(int i, atomic_t *v)
+#if defined(arch_atomic64_dec_and_test)
+static inline bool
+atomic64_dec_and_test(atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
-	return arch_atomic_add_negative(i, v);
+	return arch_atomic64_dec_and_test(v);
 }
+#define atomic64_dec_and_test atomic64_dec_and_test
 #endif
 
-#ifdef arch_atomic64_add_negative
-#define atomic64_add_negative atomic64_add_negative
-static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v)
+#if defined(arch_atomic64_inc_and_test)
+static inline bool
+atomic64_inc_and_test(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_inc_and_test(v);
+}
+#define atomic64_inc_and_test atomic64_inc_and_test
+#endif
+
+#if defined(arch_atomic64_add_negative)
+static inline bool
+atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	kasan_check_write(v, sizeof(*v));
 	return arch_atomic64_add_negative(i, v);
 }
+#define atomic64_add_negative atomic64_add_negative
+#endif
+
+#if defined(arch_atomic64_fetch_add_unless)
+static inline s64
+atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_fetch_add_unless(v, a, u);
+}
+#define atomic64_fetch_add_unless atomic64_fetch_add_unless
+#endif
+
+#if defined(arch_atomic64_add_unless)
+static inline bool
+atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_add_unless(v, a, u);
+}
+#define atomic64_add_unless atomic64_add_unless
+#endif
+
+#if defined(arch_atomic64_inc_not_zero)
+static inline bool
+atomic64_inc_not_zero(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_inc_not_zero(v);
+}
+#define atomic64_inc_not_zero atomic64_inc_not_zero
+#endif
+
+#if defined(arch_atomic64_inc_unless_negative)
+static inline bool
+atomic64_inc_unless_negative(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_inc_unless_negative(v);
+}
+#define atomic64_inc_unless_negative atomic64_inc_unless_negative
 #endif
 
-#define xchg(ptr, new)							\
+#if defined(arch_atomic64_dec_unless_positive)
+static inline bool
+atomic64_dec_unless_positive(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_dec_unless_positive(v);
+}
+#define atomic64_dec_unless_positive atomic64_dec_unless_positive
+#endif
+
+#if defined(arch_atomic64_dec_if_positive)
+static inline s64
+atomic64_dec_if_positive(atomic64_t *v)
+{
+	kasan_check_write(v, sizeof(*v));
+	return arch_atomic64_dec_if_positive(v);
+}
+#define atomic64_dec_if_positive atomic64_dec_if_positive
+#endif
+
+#if !defined(arch_xchg_relaxed) || defined(arch_xchg)
+#define xchg(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_xchg(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_xchg_acquire)
+#define xchg_acquire(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_xchg_acquire(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_xchg_release)
+#define xchg_release(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_xchg_release(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_xchg_relaxed)
+#define xchg_relaxed(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_xchg_relaxed(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if !defined(arch_cmpxchg_relaxed) || defined(arch_cmpxchg)
+#define cmpxchg(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_xchg(__ai_ptr, (new));					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg(__ai_ptr, __VA_ARGS__);				\
 })
+#endif
+
+#if defined(arch_cmpxchg_acquire)
+#define cmpxchg_acquire(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_cmpxchg_release)
+#define cmpxchg_release(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg_release(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_cmpxchg_relaxed)
+#define cmpxchg_relaxed(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
 
-#define cmpxchg(ptr, old, new)						\
+#if !defined(arch_cmpxchg64_relaxed) || defined(arch_cmpxchg64)
+#define cmpxchg64(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_cmpxchg(__ai_ptr, (old), (new));				\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg64(__ai_ptr, __VA_ARGS__);				\
 })
+#endif
+
+#if defined(arch_cmpxchg64_acquire)
+#define cmpxchg64_acquire(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg64_acquire(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
+
+#if defined(arch_cmpxchg64_release)
+#define cmpxchg64_release(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg64_release(__ai_ptr, __VA_ARGS__);				\
+})
+#endif
 
-#define sync_cmpxchg(ptr, old, new)					\
+#if defined(arch_cmpxchg64_relaxed)
+#define cmpxchg64_relaxed(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_sync_cmpxchg(__ai_ptr, (old), (new));			\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg64_relaxed(__ai_ptr, __VA_ARGS__);				\
 })
+#endif
 
-#define cmpxchg_local(ptr, old, new)					\
+#define cmpxchg_local(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_cmpxchg_local(__ai_ptr, (old), (new));			\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg_local(__ai_ptr, __VA_ARGS__);				\
 })
 
-#define cmpxchg64(ptr, old, new)					\
+#define cmpxchg64_local(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_cmpxchg64(__ai_ptr, (old), (new));				\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_cmpxchg64_local(__ai_ptr, __VA_ARGS__);				\
 })
 
-#define cmpxchg64_local(ptr, old, new)					\
+#define sync_cmpxchg(ptr, ...)						\
 ({									\
 	typeof(ptr) __ai_ptr = (ptr);					\
-	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));			\
-	arch_cmpxchg64_local(__ai_ptr, (old), (new));			\
+	kasan_check_write(__ai_ptr, sizeof(*__ai_ptr));		\
+	arch_sync_cmpxchg(__ai_ptr, __VA_ARGS__);				\
 })
 
-#define cmpxchg_double(p1, p2, o1, o2, n1, n2)				\
+#define cmpxchg_double(ptr, ...)						\
 ({									\
-	typeof(p1) __ai_p1 = (p1);					\
-	kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1));		\
-	arch_cmpxchg_double(__ai_p1, (p2), (o1), (o2), (n1), (n2));	\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, 2 * sizeof(*__ai_ptr));		\
+	arch_cmpxchg_double(__ai_ptr, __VA_ARGS__);				\
 })
 
-#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2)				\
-({										\
-	typeof(p1) __ai_p1 = (p1);						\
-	kasan_check_write(__ai_p1, 2 * sizeof(*__ai_p1));			\
-	arch_cmpxchg_double_local(__ai_p1, (p2), (o1), (o2), (n1), (n2));	\
+
+#define cmpxchg_double_local(ptr, ...)						\
+({									\
+	typeof(ptr) __ai_ptr = (ptr);					\
+	kasan_check_write(__ai_ptr, 2 * sizeof(*__ai_ptr));		\
+	arch_cmpxchg_double_local(__ai_ptr, __VA_ARGS__);				\
 })
 
-#endif /* _LINUX_ATOMIC_INSTRUMENTED_H */
+#endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-- 
cgit v1.2.3


From 17abc9ec68b73ddeb262a507a62421016b9c54d5 Mon Sep 17 00:00:00 2001
From: Tomasz Duszynski <tduszyns@gmail.com>
Date: Fri, 14 Dec 2018 19:28:01 +0100
Subject: iio: add IIO_MASSCONCENTRATION channel type

Measuring particulate matter in ug / m3 (micro-grams per cubic meter)
is de facto standard. Existing air quality sensors usually follow
this convention and are capable of returning measurements using
this unit.

IIO currently does not offer suitable channel type for this
type of measurements hence this patch adds this.

In addition, extra modifiers are introduced used for distinguishing
between fine pm1, pm2p5 and coarse pm4, pm10 particle measurements, i.e
IIO_MOD_PM1, IIO_MOD_PM25 and IIO_MOD_PM4, IIO_MOD_PM10.

pmX consists of particles with aerodynamic diameter less or equal to
X micrometers.

Signed-off-by: Tomasz Duszynski <tduszyns@gmail.com>
Acked-by: Matt Ranostay <matt.ranostay@konsulko.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 17 ++++++++++++++++-
 drivers/iio/industrialio-core.c         |  5 +++++
 include/uapi/linux/iio/types.h          |  5 +++++
 tools/iio/iio_event_monitor.c           | 10 ++++++++++
 4 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 8127a08e366d..67fd88bf7910 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1684,4 +1684,19 @@ KernelVersion:	4.18
 Contact:	linux-iio@vger.kernel.org
 Description:
 		Raw (unscaled) phase difference reading from channel Y
-		that can be processed to radians.
\ No newline at end of file
+		that can be processed to radians.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm1_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm1_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm2p5_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm2p5_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm4_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm4_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentration_pm10_input
+What:		/sys/bus/iio/devices/iio:deviceX/in_massconcentrationY_pm10_input
+KernelVersion:	4.22
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Mass concentration reading of particulate matter in ug / m3.
+		pmX consists of particles with aerodynamic diameter less or
+		equal to X micrometers.
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 4f5cd9f60870..4700fd5d8c90 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -87,6 +87,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_GRAVITY]  = "gravity",
 	[IIO_POSITIONRELATIVE]  = "positionrelative",
 	[IIO_PHASE] = "phase",
+	[IIO_MASSCONCENTRATION] = "massconcentration",
 };
 
 static const char * const iio_modifier_names[] = {
@@ -127,6 +128,10 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_Q] = "q",
 	[IIO_MOD_CO2] = "co2",
 	[IIO_MOD_VOC] = "voc",
+	[IIO_MOD_PM1] = "pm1",
+	[IIO_MOD_PM2P5] = "pm2p5",
+	[IIO_MOD_PM4] = "pm4",
+	[IIO_MOD_PM10] = "pm10",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 92baabc103ac..c59adac24b1c 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -46,6 +46,7 @@ enum iio_chan_type {
 	IIO_GRAVITY,
 	IIO_POSITIONRELATIVE,
 	IIO_PHASE,
+	IIO_MASSCONCENTRATION,
 };
 
 enum iio_modifier {
@@ -87,6 +88,10 @@ enum iio_modifier {
 	IIO_MOD_VOC,
 	IIO_MOD_LIGHT_UV,
 	IIO_MOD_LIGHT_DUV,
+	IIO_MOD_PM1,
+	IIO_MOD_PM2P5,
+	IIO_MOD_PM4,
+	IIO_MOD_PM10,
 };
 
 enum iio_event_type {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index ac2de6b7e89f..f6b8003fbe3c 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -60,6 +60,7 @@ static const char * const iio_chan_type_name_spec[] = {
 	[IIO_GRAVITY] = "gravity",
 	[IIO_POSITIONRELATIVE] = "positionrelative",
 	[IIO_PHASE] = "phase",
+	[IIO_MASSCONCENTRATION] = "massconcentration",
 };
 
 static const char * const iio_ev_type_text[] = {
@@ -115,6 +116,10 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_Q] = "q",
 	[IIO_MOD_CO2] = "co2",
 	[IIO_MOD_VOC] = "voc",
+	[IIO_MOD_PM1] = "pm1",
+	[IIO_MOD_PM2P5] = "pm2p5",
+	[IIO_MOD_PM4] = "pm4",
+	[IIO_MOD_PM10] = "pm10",
 };
 
 static bool event_is_known(struct iio_event_data *event)
@@ -156,6 +161,7 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_GRAVITY:
 	case IIO_POSITIONRELATIVE:
 	case IIO_PHASE:
+	case IIO_MASSCONCENTRATION:
 		break;
 	default:
 		return false;
@@ -200,6 +206,10 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_Q:
 	case IIO_MOD_CO2:
 	case IIO_MOD_VOC:
+	case IIO_MOD_PM1:
+	case IIO_MOD_PM2P5:
+	case IIO_MOD_PM4:
+	case IIO_MOD_PM10:
 		break;
 	default:
 		return false;
-- 
cgit v1.2.3


From b170f7d48443d1ea3e4ffbf409025b5e5b1146fe Mon Sep 17 00:00:00 2001
From: Andreas Brauchli <a.brauchli@elementarea.net>
Date: Thu, 13 Dec 2018 15:43:22 +0100
Subject: iio: Add modifiers for ethanol and H2 gases

Add ethanol and H2 gas modifiers:
* IIO_MOD_ETHANOL
* IIO_MOD_H2

Signed-off-by: Andreas Brauchli <andreas.brauchli@sensirion.com>
Acked-by: Matt Ranostay <matt.ranostay@konsulko.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/ABI/testing/sysfs-bus-iio | 4 ++++
 include/uapi/linux/iio/types.h          | 2 ++
 tools/iio/iio_event_monitor.c           | 4 ++++
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index 67fd88bf7910..864f8efd12e5 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -1554,6 +1554,10 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_co2_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_co2_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_ethanol_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_ethanol_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_h2_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_h2_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentration_voc_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_concentrationX_voc_raw
 KernelVersion:	4.3
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index c59adac24b1c..fdd81affca4b 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -92,6 +92,8 @@ enum iio_modifier {
 	IIO_MOD_PM2P5,
 	IIO_MOD_PM4,
 	IIO_MOD_PM10,
+	IIO_MOD_ETHANOL,
+	IIO_MOD_H2,
 };
 
 enum iio_event_type {
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f6b8003fbe3c..7bf9bde28bcc 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -115,6 +115,8 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_I] = "i",
 	[IIO_MOD_Q] = "q",
 	[IIO_MOD_CO2] = "co2",
+	[IIO_MOD_ETHANOL] = "ethanol",
+	[IIO_MOD_H2] = "h2",
 	[IIO_MOD_VOC] = "voc",
 	[IIO_MOD_PM1] = "pm1",
 	[IIO_MOD_PM2P5] = "pm2p5",
@@ -205,6 +207,8 @@ static bool event_is_known(struct iio_event_data *event)
 	case IIO_MOD_I:
 	case IIO_MOD_Q:
 	case IIO_MOD_CO2:
+	case IIO_MOD_ETHANOL:
+	case IIO_MOD_H2:
 	case IIO_MOD_VOC:
 	case IIO_MOD_PM1:
 	case IIO_MOD_PM2P5:
-- 
cgit v1.2.3


From 7a79d717e0817610932ce3b7b6033ea06ee1d577 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 31 Dec 2018 23:59:59 +0100
Subject: batman-adv: Update copyright years for 2019

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h     | 2 +-
 include/uapi/linux/batman_adv.h        | 2 +-
 net/batman-adv/Kconfig                 | 2 +-
 net/batman-adv/Makefile                | 2 +-
 net/batman-adv/bat_algo.c              | 2 +-
 net/batman-adv/bat_algo.h              | 2 +-
 net/batman-adv/bat_iv_ogm.c            | 2 +-
 net/batman-adv/bat_iv_ogm.h            | 2 +-
 net/batman-adv/bat_v.c                 | 2 +-
 net/batman-adv/bat_v.h                 | 2 +-
 net/batman-adv/bat_v_elp.c             | 2 +-
 net/batman-adv/bat_v_elp.h             | 2 +-
 net/batman-adv/bat_v_ogm.c             | 2 +-
 net/batman-adv/bat_v_ogm.h             | 2 +-
 net/batman-adv/bitarray.c              | 2 +-
 net/batman-adv/bitarray.h              | 2 +-
 net/batman-adv/bridge_loop_avoidance.c | 2 +-
 net/batman-adv/bridge_loop_avoidance.h | 2 +-
 net/batman-adv/debugfs.c               | 2 +-
 net/batman-adv/debugfs.h               | 2 +-
 net/batman-adv/distributed-arp-table.c | 2 +-
 net/batman-adv/distributed-arp-table.h | 2 +-
 net/batman-adv/fragmentation.c         | 2 +-
 net/batman-adv/fragmentation.h         | 2 +-
 net/batman-adv/gateway_client.c        | 2 +-
 net/batman-adv/gateway_client.h        | 2 +-
 net/batman-adv/gateway_common.c        | 2 +-
 net/batman-adv/gateway_common.h        | 2 +-
 net/batman-adv/hard-interface.c        | 2 +-
 net/batman-adv/hard-interface.h        | 2 +-
 net/batman-adv/hash.c                  | 2 +-
 net/batman-adv/hash.h                  | 2 +-
 net/batman-adv/icmp_socket.c           | 2 +-
 net/batman-adv/icmp_socket.h           | 2 +-
 net/batman-adv/log.c                   | 2 +-
 net/batman-adv/log.h                   | 2 +-
 net/batman-adv/main.c                  | 2 +-
 net/batman-adv/main.h                  | 2 +-
 net/batman-adv/multicast.c             | 2 +-
 net/batman-adv/multicast.h             | 2 +-
 net/batman-adv/netlink.c               | 2 +-
 net/batman-adv/netlink.h               | 2 +-
 net/batman-adv/network-coding.c        | 2 +-
 net/batman-adv/network-coding.h        | 2 +-
 net/batman-adv/originator.c            | 2 +-
 net/batman-adv/originator.h            | 2 +-
 net/batman-adv/routing.c               | 2 +-
 net/batman-adv/routing.h               | 2 +-
 net/batman-adv/send.c                  | 2 +-
 net/batman-adv/send.h                  | 2 +-
 net/batman-adv/soft-interface.c        | 2 +-
 net/batman-adv/soft-interface.h        | 2 +-
 net/batman-adv/sysfs.c                 | 2 +-
 net/batman-adv/sysfs.h                 | 2 +-
 net/batman-adv/tp_meter.c              | 2 +-
 net/batman-adv/tp_meter.h              | 2 +-
 net/batman-adv/trace.c                 | 2 +-
 net/batman-adv/trace.h                 | 2 +-
 net/batman-adv/translation-table.c     | 2 +-
 net/batman-adv/translation-table.h     | 2 +-
 net/batman-adv/tvlv.c                  | 2 +-
 net/batman-adv/tvlv.h                  | 2 +-
 net/batman-adv/types.h                 | 2 +-
 63 files changed, 63 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 894d8d2f713d..7eb2936a8e22 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 324a0e1143e7..a28e76a7e0a2 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: MIT */
-/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2019  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index c386e6981416..a31db5e9ac8e 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 9b58160fe485..a887ecc3efa1 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-# Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+# Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
 #
 # Marek Lindner, Simon Wunderlich
 #
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index ea309ad06175..7b7e15641fef 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 534b790c3753..25e7bb51928c 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index f97e566f0402..de61091af666 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_iv_ogm.h b/net/batman-adv/bat_iv_ogm.h
index 3dc6a7a43eb7..785f6666273c 100644
--- a/net/batman-adv/bat_iv_ogm.h
+++ b/net/batman-adv/bat_iv_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 90e33f84d37a..445594ed58af 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v.h b/net/batman-adv/bat_v.h
index ec4a2a569750..465a4fc23354 100644
--- a/net/batman-adv/bat_v.h
+++ b/net/batman-adv/bat_v.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Linus Lüssing
  *
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index e8090f099eb8..7b80f6f8d4dc 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h
index e8c7b7fd290d..75f189ee4a1c 100644
--- a/net/batman-adv/bat_v_elp.h
+++ b/net/batman-adv/bat_v_elp.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing, Marek Lindner
  *
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 2948b41b06d4..c9698ad41854 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h
index e5be14c908c6..f67cf7ee06b2 100644
--- a/net/batman-adv/bat_v_ogm.h
+++ b/net/batman-adv/bat_v_ogm.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/bitarray.c b/net/batman-adv/bitarray.c
index a296a4d851f5..63e134e763e3 100644
--- a/net/batman-adv/bitarray.c
+++ b/net/batman-adv/bitarray.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bitarray.h b/net/batman-adv/bitarray.h
index 48f683289531..f3a05ad9afad 100644
--- a/net/batman-adv/bitarray.h
+++ b/net/batman-adv/bitarray.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 5fdde2947802..ef39aabdb694 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 71f95a3e4d3f..31771c751efb 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich
  *
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index d4a7702e48d8..3b9d1ad2f467 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 8de018e5c577..c0b8694041ec 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index c7f3e2231233..899ab051ffce 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index e58d7a9bfc8d..68c0ff321acd 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2011-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2011-2019  B.A.T.M.A.N. contributors:
  *
  * Antonio Quartulli
  *
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5b71a289d04f..b506d15b8230 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
index 944512e07782..abdac26579bf 100644
--- a/net/batman-adv/fragmentation.h
+++ b/net/batman-adv/fragmentation.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2013-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2013-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll <martin@hundeboll.net>
  *
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 9d8e5eda2314..0a6b9b30eabd 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index f0b86fcb2493..b5732c8be81a 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 936c107f3199..53d03adc9bfc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 80afb2793687..89bae100a0b0 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 508f4416dfc9..28c1fb8d1af0 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h
index d1c0f6189301..48de28c83401 100644
--- a/net/batman-adv/hard-interface.h
+++ b/net/batman-adv/hard-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c
index 9194f4d891b1..56a08ce193d5 100644
--- a/net/batman-adv/hash.c
+++ b/net/batman-adv/hash.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h
index 0e36fa1c7c3e..37507b6d4006 100644
--- a/net/batman-adv/hash.h
+++ b/net/batman-adv/hash.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2006-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2006-2019  B.A.T.M.A.N. contributors:
  *
  * Simon Wunderlich, Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index d70f363c52ae..d34bb79e0405 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 958be22beda9..5f8926522ff0 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 02e55b78132f..f97849a8380b 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h
index 35f4f397ed57..660e9bcc85a2 100644
--- a/net/batman-adv/log.h
+++ b/net/batman-adv/log.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index d1ed839fd32b..75750870cf04 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index f3ef2c725a41..3ed669d7dc6b 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 69244e4598f5..5de6a37525f5 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3b04ab13f0eb..466013fe88af 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2014-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2014-2019  B.A.T.M.A.N. contributors:
  *
  * Linus Lüssing
  *
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 2dc3304cee54..5fe833cc9507 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2019  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 571d9a5ae7aa..216484b8b82d 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2016-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2016-2019  B.A.T.M.A.N. contributors:
  *
  * Matthias Schiffer
  *
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 34caf129a9bf..278762bd94c6 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 65c346812bc1..96ef0a511fc7 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Martin Hundebøll, Jeppe Ledet-Pedersen
  *
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 56a981af5c92..e5cdf89ef63c 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2009-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2009-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index a8b4c7b667ec..dca1e4a34ec6 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 469fa97d1add..cae0e5dd0768 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index db54c2d9b8bf..0102d69d345c 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 4a35f5c2f52b..66a8b3e44501 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 64cce07b8fe6..1f6132922e60 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 943071008670..b14fb3462af7 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h
index daf87f07fadd..538bb661878c 100644
--- a/net/batman-adv/soft-interface.h
+++ b/net/batman-adv/soft-interface.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 09427fc6494a..e1b816262c53 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/sysfs.h b/net/batman-adv/sysfs.h
index c1e3fb69952d..705ffbe763f4 100644
--- a/net/batman-adv/sysfs.h
+++ b/net/batman-adv/sysfs.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner
  *
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index 11520de96ccb..500109bbd551 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/tp_meter.h b/net/batman-adv/tp_meter.h
index 68e600974759..6b4d0f733896 100644
--- a/net/batman-adv/tp_meter.h
+++ b/net/batman-adv/tp_meter.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2012-2019  B.A.T.M.A.N. contributors:
  *
  * Edo Monticelli, Antonio Quartulli
  *
diff --git a/net/batman-adv/trace.c b/net/batman-adv/trace.c
index 8e1024217cff..f77c917ed20d 100644
--- a/net/batman-adv/trace.c
+++ b/net/batman-adv/trace.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Sven Eckelmann
  *
diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index 104784be94d7..5e5579051400 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2010-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2010-2019  B.A.T.M.A.N. contributors:
  *
  * Sven Eckelmann
  *
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 8dcd4968cde7..f73d79139ae7 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 01b6c8eafaf9..61bca75e5911 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich, Antonio Quartulli
  *
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 40e69c9346d2..7e947b01919d 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index ef5867f49824..c0f033b1acb8 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index cbe17da36fcb..a21b34ed6548 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2007-2018  B.A.T.M.A.N. contributors:
+/* Copyright (C) 2007-2019  B.A.T.M.A.N. contributors:
  *
  * Marek Lindner, Simon Wunderlich
  *
-- 
cgit v1.2.3


From 95f295f9fe08166307d94b641fba0b0e924d5d31 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben.dooks@codethink.co.uk>
Date: Wed, 21 Nov 2018 16:13:23 +0000
Subject: dmaengine: tegra: add tracepoints to driver

Add some trace-points to the driver to allow for debuging via the
trace pipe.

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/tegra20-apb-dma.c        |  7 +++++
 include/trace/events/tegra_apb_dma.h | 61 ++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)
 create mode 100644 include/trace/events/tegra_apb_dma.h

(limited to 'include')

diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index c60c3f7cad5f..659b49bf6fdb 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -38,6 +38,9 @@
 
 #include "dmaengine.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/tegra_apb_dma.h>
+
 #define TEGRA_APBDMA_GENERAL			0x0
 #define TEGRA_APBDMA_GENERAL_ENABLE		BIT(31)
 
@@ -672,6 +675,8 @@ static void tegra_dma_tasklet(unsigned long data)
 		dmaengine_desc_get_callback(&dma_desc->txd, &cb);
 		cb_count = dma_desc->cb_count;
 		dma_desc->cb_count = 0;
+		trace_tegra_dma_complete_cb(&tdc->dma_chan, cb_count,
+					    cb.callback);
 		spin_unlock_irqrestore(&tdc->lock, flags);
 		while (cb_count--)
 			dmaengine_desc_callback_invoke(&cb, NULL);
@@ -688,6 +693,7 @@ static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
 
 	spin_lock_irqsave(&tdc->lock, flags);
 
+	trace_tegra_dma_isr(&tdc->dma_chan, irq);
 	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
 	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
@@ -846,6 +852,7 @@ found:
 		dma_set_residue(txstate, residual);
 	}
 
+	trace_tegra_dma_tx_status(&tdc->dma_chan, cookie, txstate);
 	spin_unlock_irqrestore(&tdc->lock, flags);
 	return ret;
 }
diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h
new file mode 100644
index 000000000000..0818f6286110
--- /dev/null
+++ b/include/trace/events/tegra_apb_dma.h
@@ -0,0 +1,61 @@
+#if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TEGRA_APM_DMA_H
+
+#include <linux/tracepoint.h>
+#include <linux/dmaengine.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tegra_apb_dma
+
+TRACE_EVENT(tegra_dma_tx_status,
+	TP_PROTO(struct dma_chan *dc, dma_cookie_t cookie, struct dma_tx_state *state),
+	TP_ARGS(dc, cookie, state),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(dma_cookie_t, cookie)
+		__field(__u32,	residue)
+	),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->cookie = cookie;
+		__entry->residue = state ? state->residue : (u32)-1;
+	),
+	TP_printk("channel %s: dma cookie %d, residue %u",
+		  __get_str(chan), __entry->cookie, __entry->residue)
+);
+
+TRACE_EVENT(tegra_dma_complete_cb,
+	TP_PROTO(struct dma_chan *dc, int count, void *ptr),
+	TP_ARGS(dc, count, ptr),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(int,	count)
+		__field(void *,	ptr)
+		),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->count = count;
+		__entry->ptr = ptr;
+		),
+	TP_printk("channel %s: done %d, ptr %p",
+		  __get_str(chan), __entry->count, __entry->ptr)
+);
+
+TRACE_EVENT(tegra_dma_isr,
+	TP_PROTO(struct dma_chan *dc, int irq),
+	TP_ARGS(dc, irq),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(int,	irq)
+	),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->irq = irq;
+	),
+	TP_printk("%s: irq %d\n",  __get_str(chan), __entry->irq)
+);
+
+#endif /*  _TRACE_TEGRADMA_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 8fc5c73554db0ac18c0c6ac5b2099ab917f83bdf Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 9 Nov 2018 12:43:07 -0800
Subject: acpi/nfit, device-dax: Identify differentiated memory with a unique
 numa-node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Persistent memory, as described by the ACPI NFIT (NVDIMM Firmware
Interface Table), is the first known instance of a memory range
described by a unique "target" proximity domain. Where "initiator" and
"target" proximity domains is an approach that the ACPI HMAT
(Heterogeneous Memory Attributes Table) uses to described the unique
performance properties of a memory range relative to a given initiator
(e.g. CPU or DMA device).

Currently the numa-node for a /dev/pmemX block-device or /dev/daxX.Y
char-device follows the traditional notion of 'numa-node' where the
attribute conveys the closest online numa-node. That numa-node attribute
is useful for cpu-binding and memory-binding processes *near* the
device. However, when the memory range backing a 'pmem', or 'dax' device
is onlined (memory hot-add) the memory-only-numa-node representing that
address needs to be differentiated from the set of online nodes. In
other words, the numa-node association of the device depends on whether
you can bind processes *near* the cpu-numa-node in the offline
device-case, or bind process *on* the memory-range directly after the
backing address range is onlined.

Allow for the case that platform firmware describes persistent memory
with a unique proximity domain, i.e. when it is distinct from the
proximity of DRAM and CPUs that are on the same socket. Plumb the Linux
numa-node translation of that proximity through the libnvdimm region
device to namespaces that are in device-dax mode. With this in place the
proposed kmem driver [1] can optionally discover a unique numa-node
number for the address range as it transitions the memory from an
offline state managed by a device-driver to an online memory range
managed by the core-mm.

[1]: https://lore.kernel.org/lkml/20181022201317.8558C1D8@viggo.jf.intel.com

Reported-by: Fan Du <fan.du@intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Oliver O'Halloran" <oohall@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/platforms/pseries/papr_scm.c | 1 +
 drivers/acpi/nfit/core.c                  | 8 ++++++--
 drivers/acpi/numa.c                       | 1 +
 drivers/dax/bus.c                         | 4 +++-
 drivers/dax/bus.h                         | 3 ++-
 drivers/dax/dax-private.h                 | 4 ++++
 drivers/dax/pmem/core.c                   | 4 +++-
 drivers/nvdimm/e820.c                     | 1 +
 drivers/nvdimm/nd.h                       | 2 +-
 drivers/nvdimm/of_pmem.c                  | 1 +
 drivers/nvdimm/region_devs.c              | 1 +
 include/linux/acpi.h                      | 5 +++++
 include/linux/libnvdimm.h                 | 1 +
 13 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 7d6457ab5d34..8806ac822627 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -236,6 +236,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
 	memset(&ndr_desc, 0, sizeof(ndr_desc));
 	ndr_desc.attr_groups = region_attr_groups;
 	ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+	ndr_desc.target_node = ndr_desc.numa_node;
 	ndr_desc.res = &p->res;
 	ndr_desc.of_node = p->dn;
 	ndr_desc.provider_data = p;
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 011d3db19c80..475899974c70 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2869,11 +2869,15 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 	ndr_desc->res = &res;
 	ndr_desc->provider_data = nfit_spa;
 	ndr_desc->attr_groups = acpi_nfit_region_attribute_groups;
-	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID)
+	if (spa->flags & ACPI_NFIT_PROXIMITY_VALID) {
 		ndr_desc->numa_node = acpi_map_pxm_to_online_node(
 						spa->proximity_domain);
-	else
+		ndr_desc->target_node = acpi_map_pxm_to_node(
+				spa->proximity_domain);
+	} else {
 		ndr_desc->numa_node = NUMA_NO_NODE;
+		ndr_desc->target_node = NUMA_NO_NODE;
+	}
 
 	/*
 	 * Persistence domain bits are hierarchical, if
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 274699463b4f..b9d86babb13a 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -84,6 +84,7 @@ int acpi_map_pxm_to_node(int pxm)
 
 	return node;
 }
+EXPORT_SYMBOL(acpi_map_pxm_to_node);
 
 /**
  * acpi_map_pxm_to_online_node - Map proximity ID to online node
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 568168500217..c620ad52d7e5 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -214,7 +214,7 @@ static void dax_region_unregister(void *region)
 }
 
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
-		struct resource *res, unsigned int align,
+		struct resource *res, int target_node, unsigned int align,
 		unsigned long pfn_flags)
 {
 	struct dax_region *dax_region;
@@ -244,6 +244,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 	dax_region->id = region_id;
 	dax_region->align = align;
 	dax_region->dev = parent;
+	dax_region->target_node = target_node;
 	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
 		kfree(dax_region);
 		return NULL;
@@ -348,6 +349,7 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
 
 	dev_dax->dax_dev = dax_dev;
 	dev_dax->region = dax_region;
+	dev_dax->target_node = dax_region->target_node;
 	kref_get(&dax_region->kref);
 
 	inode = dax_inode(dax_dev);
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index ce977552ffb5..8619e3299943 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -10,7 +10,8 @@ struct dax_device;
 struct dax_region;
 void dax_region_put(struct dax_region *dax_region);
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
-		struct resource *res, unsigned int align, unsigned long flags);
+		struct resource *res, int target_node, unsigned int align,
+		unsigned long flags);
 
 enum dev_dax_subsys {
 	DEV_DAX_BUS,
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index a82ce48f5884..a45612148ca0 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -26,6 +26,7 @@ void dax_bus_exit(void);
 /**
  * struct dax_region - mapping infrastructure for dax devices
  * @id: kernel-wide unique region for a memory range
+ * @target_node: effective numa node if this memory range is onlined
  * @kref: to pin while other agents have a need to do lookups
  * @dev: parent device backing this region
  * @align: allocation and mapping alignment for child dax devices
@@ -34,6 +35,7 @@ void dax_bus_exit(void);
  */
 struct dax_region {
 	int id;
+	int target_node;
 	struct kref kref;
 	struct device *dev;
 	unsigned int align;
@@ -46,6 +48,7 @@ struct dax_region {
  * data while the device is activated in the driver.
  * @region - parent region
  * @dax_dev - core dax functionality
+ * @target_node: effective numa node if dev_dax memory range is onlined
  * @dev - device core
  * @pgmap - pgmap for memmap setup / lifetime (driver owned)
  * @ref: pgmap reference count (driver owned)
@@ -54,6 +57,7 @@ struct dax_region {
 struct dev_dax {
 	struct dax_region *region;
 	struct dax_device *dax_dev;
+	int target_node;
 	struct device dev;
 	struct dev_pagemap pgmap;
 	struct percpu_ref ref;
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
index bdcff1b14e95..f71019ce0647 100644
--- a/drivers/dax/pmem/core.c
+++ b/drivers/dax/pmem/core.c
@@ -20,6 +20,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 	struct nd_namespace_common *ndns;
 	struct nd_dax *nd_dax = to_nd_dax(dev);
 	struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
 
 	ndns = nvdimm_namespace_common_probe(dev);
 	if (IS_ERR(ndns))
@@ -52,7 +53,8 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 	memcpy(&res, &pgmap.res, sizeof(res));
 	res.start += offset;
 	dax_region = alloc_dax_region(dev, region_id, &res,
-			le32_to_cpu(pfn_sb->align), PFN_DEV|PFN_MAP);
+			nd_region->target_node, le32_to_cpu(pfn_sb->align),
+			PFN_DEV|PFN_MAP);
 	if (!dax_region)
 		return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index 521eaf53a52a..36be9b619187 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -47,6 +47,7 @@ static int e820_register_one(struct resource *res, void *data)
 	ndr_desc.res = res;
 	ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
 	ndr_desc.numa_node = e820_range_to_nid(res->start);
+	ndr_desc.target_node = ndr_desc.numa_node;
 	set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
 	if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
 		return -ENXIO;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index cfde992684e7..0b3d7595b3cb 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -153,7 +153,7 @@ struct nd_region {
 	u16 ndr_mappings;
 	u64 ndr_size;
 	u64 ndr_start;
-	int id, num_lanes, ro, numa_node;
+	int id, num_lanes, ro, numa_node, target_node;
 	void *provider_data;
 	struct kernfs_node *bb_state;
 	struct badblocks bb;
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 0a701837dfc0..ecaaa27438e2 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -68,6 +68,7 @@ static int of_pmem_region_probe(struct platform_device *pdev)
 		memset(&ndr_desc, 0, sizeof(ndr_desc));
 		ndr_desc.attr_groups = region_attr_groups;
 		ndr_desc.numa_node = dev_to_node(&pdev->dev);
+		ndr_desc.target_node = ndr_desc.numa_node;
 		ndr_desc.res = &pdev->resource[i];
 		ndr_desc.of_node = np;
 		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e2818f94f292..caf2f3129ccd 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1065,6 +1065,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	nd_region->flags = ndr_desc->flags;
 	nd_region->ro = ro;
 	nd_region->numa_node = ndr_desc->numa_node;
+	nd_region->target_node = ndr_desc->target_node;
 	ida_init(&nd_region->ns_ida);
 	ida_init(&nd_region->btt_ida);
 	ida_init(&nd_region->pfn_ida);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 87715f20b69a..eddf2736e5a6 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -400,12 +400,17 @@ extern bool acpi_osi_is_win8(void);
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_map_pxm_to_online_node(int pxm);
+int acpi_map_pxm_to_node(int pxm);
 int acpi_get_node(acpi_handle handle);
 #else
 static inline int acpi_map_pxm_to_online_node(int pxm)
 {
 	return 0;
 }
+static inline int acpi_map_pxm_to_node(int pxm)
+{
+	return 0;
+}
 static inline int acpi_get_node(acpi_handle handle)
 {
 	return 0;
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 5440f11b0907..56bc545ad3b2 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -128,6 +128,7 @@ struct nd_region_desc {
 	void *provider_data;
 	int num_lanes;
 	int numa_node;
+	int target_node;
 	unsigned long flags;
 	struct device_node *of_node;
 };
-- 
cgit v1.2.3


From ebc40be2b8eec093abbbd87658a6726ff84a61f5 Mon Sep 17 00:00:00 2001
From: Fabien Dessenne <fabien.dessenne@st.com>
Date: Wed, 7 Nov 2018 11:18:34 +0100
Subject: remoteproc: fix kernel-doc comment for parse_fw

Fix the kernel-doc comment for "parse_fw" and fix a typo.

Signed-off-by: Fabien Dessenne <fabien.dessenne@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 507a2b524208..68e72f33c705 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -345,9 +345,9 @@ struct firmware;
  * @stop:	power off the device
  * @kick:	kick a virtqueue (virtqueue id given as a parameter)
  * @da_to_va:	optional platform hook to perform address translations
- * @load_rsc_table:	load resource table from firmware image
+ * @parse_fw:	parse firmware to extract information (e.g. resource table)
  * @find_loaded_rsc_table: find the loaded resouce table
- * @load:		load firmeware to memory, where the remote processor
+ * @load:		load firmware to memory, where the remote processor
  *			expects to find it
  * @sanity_check:	sanity check the fw image
  * @get_boot_addr:	get boot address to entry point specified in firmware
-- 
cgit v1.2.3


From d7dba6be0f31ae61f5f3296aa130f45d57d30f74 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Jan 2019 13:07:36 +0200
Subject: dmaengine: dw: Remove misleading is_private property

The commit a9ddb575d6d6

   ("dmaengine: dw_dmac: Enhance device tree support")

introduces is_private property in uncertain understanding what does it mean.

First of all, documentation defines DMA_PRIVATE capability as

Documentation/crypto/async-tx-api.txt:
  The DMA_PRIVATE capability flag is used to tag dma devices that should not be
  used by the general-purpose allocator. It can be set at initialization time
  if it is known that a channel will always be private. Alternatively,
  it is set when dma_request_channel() finds an unused "public" channel.

  A couple caveats to note when implementing a driver and consumer:
  1/ Once a channel has been privately allocated it will no longer be
     considered by the general-purpose allocator even after a call to
     dma_release_channel().
  2/ Since capabilities are specified at the device level a dma_device with
     multiple channels will either have all channels public, or all channels
     private.

Documentation/driver-api/dmaengine/provider.rst:
  - DMA_PRIVATE
    The devices only supports slave transfers, and as such isn't available
    for async transfers.

The capability had been introduced by the commit 59b5ec21446b

  ("dmaengine: introduce dma_request_channel and private channels")

and some code didn't changed from that times ever.

Taking into consideration above and the fact that on all known platforms
Synopsys DesignWare DMA engine is attached to serve slave transfers,
the DMA_PRIVATE capability must be enabled for this device unconditionally.
Otherwise, as rightfully noticed in drivers/dma/at_xdmac.c:
  /*
   * Without DMA_PRIVATE the driver is not able to allocate more than
   * one channel, second allocation fails in private_candidate.
   */
because of of a caveats mentioned in above documentation excerpts.

So, remove conditional around DMA_PRIVATE followed by removal leftovers.

If someone wonders, DMA_PRIVATE can be not used if and only if the all channels
of the DMA controller are supposed to serve memory-to-memory like operations.
For example, EP93xx has two controllers, one of which can only perform
memory-to-memory transfers

Note, this change doesn't affect dmatest to be able to test such controllers.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> (maintainer:SERIAL DRIVERS)
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/snps-dma.txt | 2 --
 drivers/dma/dw/core.c                              | 4 +---
 drivers/dma/dw/pci.c                               | 1 -
 drivers/dma/dw/platform.c                          | 3 ---
 drivers/tty/serial/8250/8250_lpss.c                | 1 -
 include/linux/platform_data/dma-dw.h               | 3 ---
 6 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index db757df7057d..0bedceed1963 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -23,8 +23,6 @@ Deprecated properties:
 
 
 Optional properties:
-- is_private: The device channels should be marked as private and not for by the
-  general purpose DMA channel allocator. False if not passed.
 - multi-block: Multi block transfers supported by hardware. Array property with
   one cell per channel. 0: not supported, 1 (default): supported.
 - snps,dma-protection-control: AHB HPROT[3:1] protection setting.
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index dc053e62f894..e25503986680 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 		pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
 
 		/* Fill platform data with the default values */
-		pdata->is_private = true;
 		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
@@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 
 	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
-	if (pdata->is_private)
-		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
 	if (pdata->is_memcpy)
 		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 313ba10c6224..570498faadc3 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -17,7 +17,6 @@
 
 static struct dw_dma_platform_data mrfld_pdata = {
 	.nr_channels = 8,
-	.is_private = true,
 	.is_memcpy = true,
 	.is_idma32 = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 31ff8113c3de..6dd8cd1820c1 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -128,9 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	pdata->nr_masters = nr_masters;
 	pdata->nr_channels = nr_channels;
 
-	if (of_property_read_bool(np, "is_private"))
-		pdata->is_private = true;
-
 	/*
 	 * All known devices, which use DT for configuration, support
 	 * memory-to-memory transfers. So enable it by default.
diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
index 98dbc796353f..53ca9ba6ab4b 100644
--- a/drivers/tty/serial/8250/8250_lpss.c
+++ b/drivers/tty/serial/8250/8250_lpss.c
@@ -153,7 +153,6 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
 #ifdef CONFIG_SERIAL_8250_DMA
 static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
 	.nr_channels = 2,
-	.is_private = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
 	.block_size = 4095,
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 1a1d58ebffbf..d443025c5c72 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -38,8 +38,6 @@ struct dw_dma_slave {
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_private: The device channels should be marked as private and not for
- *	by the general purpose DMA channel allocator.
  * @is_memcpy: The device channels do support memory-to-memory transfers.
  * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
@@ -53,7 +51,6 @@ struct dw_dma_slave {
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
-	bool		is_private;
 	bool		is_memcpy;
 	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
-- 
cgit v1.2.3


From 078165779608873e7b6eae1316a39c73af9f3edc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Jan 2019 13:07:37 +0200
Subject: dmaengine: dw: Remove unused internal property

All known devices, which use DT for configuration, support
memory-to-memory transfers. So enable it by default.

The rest two cases, i.e. Intel Quark and PPC460ex, instantiate DMA driver and
use its channels exclusively for hardware, which means there is no available
channel for any other purposes anyway.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw/core.c                | 4 +---
 drivers/dma/dw/pci.c                 | 1 -
 drivers/dma/dw/platform.c            | 6 ------
 include/linux/platform_data/dma-dw.h | 2 --
 4 files changed, 1 insertion(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index e25503986680..4982e443869c 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1227,7 +1227,6 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 		pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
 
 		/* Fill platform data with the default values */
-		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
 	} else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
@@ -1340,8 +1339,7 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
 	dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
-	if (pdata->is_memcpy)
-		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 
 	dw->dma.dev = chip->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 570498faadc3..66d98d7ccad0 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -17,7 +17,6 @@
 
 static struct dw_dma_platform_data mrfld_pdata = {
 	.nr_channels = 8,
-	.is_memcpy = true,
 	.is_idma32 = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 6dd8cd1820c1..58fc1ba02a1e 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -128,12 +128,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	pdata->nr_masters = nr_masters;
 	pdata->nr_channels = nr_channels;
 
-	/*
-	 * All known devices, which use DT for configuration, support
-	 * memory-to-memory transfers. So enable it by default.
-	 */
-	pdata->is_memcpy = true;
-
 	if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
 		pdata->chan_allocation_order = (unsigned char)tmp;
 
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index d443025c5c72..1c85eeee4171 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -38,7 +38,6 @@ struct dw_dma_slave {
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_memcpy: The device channels do support memory-to-memory transfers.
  * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
@@ -51,7 +50,6 @@ struct dw_dma_slave {
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
-	bool		is_memcpy;
 	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
-- 
cgit v1.2.3


From 69da8be90d5e85e60b5377c47384154b9dabf592 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Jan 2019 13:07:38 +0200
Subject: dmaengine: dw: Split DW and iDMA 32-bit operations

Here is a kinda big refactoring that should have been done
in the first place, when Intel iDMA 32-bit support appeared.

It splits operations which are different to Synopsys DesignWare and
Intel iDMA 32-bit controllers.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw/Makefile              |   2 +-
 drivers/dma/dw/core.c                | 190 +++++++----------------------------
 drivers/dma/dw/dw.c                  | 112 +++++++++++++++++++++
 drivers/dma/dw/idma32.c              | 138 +++++++++++++++++++++++++
 drivers/dma/dw/internal.h            |  10 +-
 drivers/dma/dw/pci.c                 |  45 ++++++---
 drivers/dma/dw/platform.c            |   8 +-
 drivers/dma/dw/regs.h                |  13 +++
 include/linux/dma/dw.h               |   4 +
 include/linux/platform_data/dma-dw.h |   2 -
 10 files changed, 343 insertions(+), 181 deletions(-)
 create mode 100644 drivers/dma/dw/dw.c
 create mode 100644 drivers/dma/dw/idma32.c

(limited to 'include')

diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile
index 2b949c2e4504..63ed895c09aa 100644
--- a/drivers/dma/dw/Makefile
+++ b/drivers/dma/dw/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DW_DMAC_CORE)	+= dw_dmac_core.o
-dw_dmac_core-objs	:= core.o
+dw_dmac_core-objs	:= core.o dw.o idma32.o
 
 obj-$(CONFIG_DW_DMAC)		+= dw_dmac.o
 dw_dmac-objs		:= platform.o
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 4982e443869c..8a581d86ea8d 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -138,44 +138,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	dwc->descs_allocated--;
 }
 
-static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc)
-{
-	u32 cfghi = 0;
-	u32 cfglo = 0;
-
-	/* Set default burst alignment */
-	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
-
-	/* Low 4 bits of the request lines */
-	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
-	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
-
-	/* Request line extension (2 bits) */
-	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
-	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
-
-	channel_writel(dwc, CFG_LO, cfglo);
-	channel_writel(dwc, CFG_HI, cfghi);
-}
-
-static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	u32 cfghi = DWC_CFGH_FIFO_MODE;
-	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
-	bool hs_polarity = dwc->dws.hs_polarity;
-
-	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
-	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
-	cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl);
-
-	/* Set polarity of handshake interface */
-	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
-
-	channel_writel(dwc, CFG_LO, cfglo);
-	channel_writel(dwc, CFG_HI, cfghi);
-}
-
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
@@ -183,10 +145,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
 		return;
 
-	if (dw->pdata->is_idma32)
-		dwc_initialize_chan_idma32(dwc);
-	else
-		dwc_initialize_chan_dw(dwc);
+	dw->initialize_chan(dwc);
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
@@ -215,37 +174,6 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		cpu_relax();
 }
 
-static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes,
-			  unsigned int width, size_t *len)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	u32 block;
-
-	/* Always in bytes for iDMA 32-bit */
-	if (dw->pdata->is_idma32)
-		width = 0;
-
-	if ((bytes >> width) > dwc->block_size) {
-		block = dwc->block_size;
-		*len = block << width;
-	} else {
-		block = bytes >> width;
-		*len = bytes;
-	}
-
-	return block;
-}
-
-static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-
-	if (dw->pdata->is_idma32)
-		return IDMA32C_CTLH_BLOCK_TS(block);
-
-	return DWC_CTLH_BLOCK_TS(block) << width;
-}
-
 /*----------------------------------------------------------------------*/
 
 /* Perform single block transfer */
@@ -391,10 +319,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 /* Returns how many bytes were already received from source */
 static inline u32 dwc_get_sent(struct dw_dma_chan *dwc)
 {
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	u32 ctlhi = channel_readl(dwc, CTL_HI);
 	u32 ctllo = channel_readl(dwc, CTL_LO);
 
-	return block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
+	return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
 }
 
 static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -651,7 +580,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	unsigned int		src_width;
 	unsigned int		dst_width;
 	unsigned int		data_width = dw->pdata->data_width[m_master];
-	u32			ctllo;
+	u32			ctllo, ctlhi;
 	u8			lms = DWC_LLP_LMS(m_master);
 
 	dev_vdbg(chan2dev(chan),
@@ -680,10 +609,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		if (!desc)
 			goto err_desc_get;
 
+		ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count);
+
 		lli_write(desc, sar, src + offset);
 		lli_write(desc, dar, dest + offset);
 		lli_write(desc, ctllo, ctllo);
-		lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count));
+		lli_write(desc, ctlhi, ctlhi);
 		desc->len = xfer_count;
 
 		if (!first) {
@@ -721,7 +652,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
-	u32			ctllo;
+	u32			ctllo, ctlhi;
 	u8			m_master = dwc->dws.m_master;
 	u8			lms = DWC_LLP_LMS(m_master);
 	dma_addr_t		reg;
@@ -768,9 +699,11 @@ slave_sg_todev_fill_desc:
 			if (!desc)
 				goto err_desc_get;
 
+			ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen);
+
 			lli_write(desc, sar, mem);
 			lli_write(desc, dar, reg);
-			lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen));
+			lli_write(desc, ctlhi, ctlhi);
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
 			desc->len = dlen;
 
@@ -814,9 +747,11 @@ slave_sg_fromdev_fill_desc:
 			if (!desc)
 				goto err_desc_get;
 
+			ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen);
+
 			lli_write(desc, sar, reg);
 			lli_write(desc, dar, mem);
-			lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen));
+			lli_write(desc, ctlhi, ctlhi);
 			mem_width = __ffs(data_width | mem | dlen);
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
 			desc->len = dlen;
@@ -876,22 +811,12 @@ EXPORT_SYMBOL_GPL(dw_dma_filter);
 static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 {
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
-	struct dma_slave_config *sc = &dwc->dma_sconfig;
 	struct dw_dma *dw = to_dw_dma(chan->device);
-	/*
-	 * Fix sconfig's burst size according to dw_dmac. We need to convert
-	 * them as:
-	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
-	 *
-	 * NOTE: burst size 2 is not supported by DesignWare controller.
-	 *       iDMA 32-bit supports it.
-	 */
-	u32 s = dw->pdata->is_idma32 ? 1 : 2;
 
 	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
 
-	sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0;
-	sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0;
+	dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst);
+	dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst);
 
 	return 0;
 }
@@ -900,16 +825,9 @@ static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	unsigned int		count = 20;	/* timeout iterations */
-	u32			cfglo;
 
-	cfglo = channel_readl(dwc, CFG_LO);
-	if (dw->pdata->is_idma32) {
-		if (drain)
-			cfglo |= IDMA32C_CFGL_CH_DRAIN;
-		else
-			cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
-	}
-	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+	dw->suspend_chan(dwc, drain);
+
 	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
 		udelay(2);
 
@@ -1058,33 +976,7 @@ static void dwc_issue_pending(struct dma_chan *chan)
 
 /*----------------------------------------------------------------------*/
 
-/*
- * Program FIFO size of channels.
- *
- * By default full FIFO (512 bytes) is assigned to channel 0. Here we
- * slice FIFO on equal parts between channels.
- */
-static void idma32_fifo_partition(struct dw_dma *dw)
-{
-	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
-		    IDMA32C_FP_UPDATE;
-	u64 fifo_partition = 0;
-
-	if (!dw->pdata->is_idma32)
-		return;
-
-	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
-	fifo_partition |= value << 0;
-
-	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
-	fifo_partition |= value << 32;
-
-	/* Program FIFO Partition registers - 64 bytes per channel */
-	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
-	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
-}
-
-static void dw_dma_off(struct dw_dma *dw)
+void do_dw_dma_off(struct dw_dma *dw)
 {
 	unsigned int i;
 
@@ -1103,7 +995,7 @@ static void dw_dma_off(struct dw_dma *dw)
 		clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
 }
 
-static void dw_dma_on(struct dw_dma *dw)
+void do_dw_dma_on(struct dw_dma *dw)
 {
 	dma_writel(dw, CFG, DW_CFG_DMA_EN);
 }
@@ -1139,7 +1031,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 
 	/* Enable controller here if needed */
 	if (!dw->in_use)
-		dw_dma_on(dw);
+		do_dw_dma_on(dw);
 	dw->in_use |= dwc->mask;
 
 	return 0;
@@ -1177,30 +1069,25 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	/* Disable controller in case it was a last user */
 	dw->in_use &= ~dwc->mask;
 	if (!dw->in_use)
-		dw_dma_off(dw);
+		do_dw_dma_off(dw);
 
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-int dw_dma_probe(struct dw_dma_chip *chip)
+int do_dma_probe(struct dw_dma_chip *chip)
 {
+	struct dw_dma *dw = chip->dw;
 	struct dw_dma_platform_data *pdata;
-	struct dw_dma		*dw;
 	bool			autocfg = false;
 	unsigned int		dw_params;
 	unsigned int		i;
 	int			err;
 
-	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
-	if (!dw)
-		return -ENOMEM;
-
 	dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
 	if (!dw->pdata)
 		return -ENOMEM;
 
 	dw->regs = chip->regs;
-	chip->dw = dw;
 
 	pm_runtime_get_sync(chip->dev);
 
@@ -1250,15 +1137,10 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
 	/* Force dma off, just in case */
-	dw_dma_off(dw);
-
-	idma32_fifo_partition(dw);
+	dw->disable(dw);
 
 	/* Device and instance ID for IRQ and DMA pool */
-	if (pdata->is_idma32)
-		snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id);
-	else
-		snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id);
+	dw->set_device_name(dw, chip->id);
 
 	/* Create a pool of consistent memory blocks for hardware descriptors */
 	dw->desc_pool = dmam_pool_create(dw->name, chip->dev,
@@ -1380,16 +1262,15 @@ err_pdata:
 	pm_runtime_put_sync_suspend(chip->dev);
 	return err;
 }
-EXPORT_SYMBOL_GPL(dw_dma_probe);
 
-int dw_dma_remove(struct dw_dma_chip *chip)
+int do_dma_remove(struct dw_dma_chip *chip)
 {
 	struct dw_dma		*dw = chip->dw;
 	struct dw_dma_chan	*dwc, *_dwc;
 
 	pm_runtime_get_sync(chip->dev);
 
-	dw_dma_off(dw);
+	do_dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
 	free_irq(chip->irq, dw);
@@ -1404,27 +1285,24 @@ int dw_dma_remove(struct dw_dma_chip *chip)
 	pm_runtime_put_sync_suspend(chip->dev);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_remove);
 
-int dw_dma_disable(struct dw_dma_chip *chip)
+int do_dw_dma_disable(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
 
-	dw_dma_off(dw);
+	dw->disable(dw);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_disable);
+EXPORT_SYMBOL_GPL(do_dw_dma_disable);
 
-int dw_dma_enable(struct dw_dma_chip *chip)
+int do_dw_dma_enable(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
 
-	idma32_fifo_partition(dw);
-
-	dw_dma_on(dw);
+	dw->enable(dw);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_enable);
+EXPORT_SYMBOL_GPL(do_dw_dma_enable);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver");
diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c
new file mode 100644
index 000000000000..977aa28bf81d
--- /dev/null
+++ b/drivers/dma/dw/dw.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2007-2008 Atmel Corporation
+// Copyright (C) 2010-2011 ST Microelectronics
+// Copyright (C) 2013,2018 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+static void dw_dma_initialize_chan(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	u32 cfghi = DWC_CFGH_FIFO_MODE;
+	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+	bool hs_polarity = dwc->dws.hs_polarity;
+
+	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+	cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl);
+
+	/* Set polarity of handshake interface */
+	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc,
+			      size_t bytes, unsigned int width, size_t *len)
+{
+	u32 block;
+
+	if ((bytes >> width) > dwc->block_size) {
+		block = dwc->block_size;
+		*len = dwc->block_size << width;
+	} else {
+		block = bytes >> width;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	return DWC_CTLH_BLOCK_TS(block) << width;
+}
+
+static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+	/*
+	 * Fix burst size according to dw_dmac. We need to convert them as:
+	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+	 */
+	*maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0;
+}
+
+static void dw_dma_set_device_name(struct dw_dma *dw, int id)
+{
+	snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id);
+}
+
+static void dw_dma_disable(struct dw_dma *dw)
+{
+	do_dw_dma_off(dw);
+}
+
+static void dw_dma_enable(struct dw_dma *dw)
+{
+	do_dw_dma_on(dw);
+}
+
+int dw_dma_probe(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw;
+
+	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	/* Channel operations */
+	dw->initialize_chan = dw_dma_initialize_chan;
+	dw->suspend_chan = dw_dma_suspend_chan;
+	dw->encode_maxburst = dw_dma_encode_maxburst;
+	dw->bytes2block = dw_dma_bytes2block;
+	dw->block2bytes = dw_dma_block2bytes;
+
+	/* Device operations */
+	dw->set_device_name = dw_dma_set_device_name;
+	dw->disable = dw_dma_disable;
+	dw->enable = dw_dma_enable;
+
+	chip->dw = dw;
+	return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_probe);
+
+int dw_dma_remove(struct dw_dma_chip *chip)
+{
+	return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_remove);
diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c
new file mode 100644
index 000000000000..8707830f39ad
--- /dev/null
+++ b/drivers/dma/dw/idma32.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2013,2018 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+static void idma32_initialize_chan(struct dw_dma_chan *dwc)
+{
+	u32 cfghi = 0;
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+	/* Low 4 bits of the request lines */
+	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
+	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
+
+	/* Request line extension (2 bits) */
+	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
+	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	if (drain)
+		cfglo |= IDMA32C_CFGL_CH_DRAIN;
+	else
+		cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
+
+	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static u32 idma32_bytes2block(struct dw_dma_chan *dwc,
+			      size_t bytes, unsigned int width, size_t *len)
+{
+	u32 block;
+
+	if (bytes > dwc->block_size) {
+		block = dwc->block_size;
+		*len = dwc->block_size;
+	} else {
+		block = bytes;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	return IDMA32C_CTLH_BLOCK_TS(block);
+}
+
+static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+	*maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0;
+}
+
+static void idma32_set_device_name(struct dw_dma *dw, int id)
+{
+	snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id);
+}
+
+/*
+ * Program FIFO size of channels.
+ *
+ * By default full FIFO (512 bytes) is assigned to channel 0. Here we
+ * slice FIFO on equal parts between channels.
+ */
+static void idma32_fifo_partition(struct dw_dma *dw)
+{
+	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
+		    IDMA32C_FP_UPDATE;
+	u64 fifo_partition = 0;
+
+	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
+	fifo_partition |= value << 0;
+
+	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
+	fifo_partition |= value << 32;
+
+	/* Program FIFO Partition registers - 64 bytes per channel */
+	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
+	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
+}
+
+static void idma32_disable(struct dw_dma *dw)
+{
+	do_dw_dma_off(dw);
+	idma32_fifo_partition(dw);
+}
+
+static void idma32_enable(struct dw_dma *dw)
+{
+	idma32_fifo_partition(dw);
+	do_dw_dma_on(dw);
+}
+
+int idma32_dma_probe(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw;
+
+	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	/* Channel operations */
+	dw->initialize_chan = idma32_initialize_chan;
+	dw->suspend_chan = idma32_suspend_chan;
+	dw->encode_maxburst = idma32_encode_maxburst;
+	dw->bytes2block = idma32_bytes2block;
+	dw->block2bytes = idma32_block2bytes;
+
+	/* Device operations */
+	dw->set_device_name = idma32_set_device_name;
+	dw->disable = idma32_disable;
+	dw->enable = idma32_enable;
+
+	chip->dw = dw;
+	return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_probe);
+
+int idma32_dma_remove(struct dw_dma_chip *chip)
+{
+	return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_remove);
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 41439732ff6b..fdcac21ea665 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -15,8 +15,14 @@
 
 #include "regs.h"
 
-int dw_dma_disable(struct dw_dma_chip *chip);
-int dw_dma_enable(struct dw_dma_chip *chip);
+int do_dma_probe(struct dw_dma_chip *chip);
+int do_dma_remove(struct dw_dma_chip *chip);
+
+void do_dw_dma_on(struct dw_dma *dw);
+void do_dw_dma_off(struct dw_dma *dw);
+
+int do_dw_dma_disable(struct dw_dma_chip *chip);
+int do_dw_dma_enable(struct dw_dma_chip *chip);
 
 extern bool dw_dma_filter(struct dma_chan *chan, void *param);
 
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 66d98d7ccad0..e9ba25b4f950 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -15,9 +15,17 @@
 
 #include "internal.h"
 
-static struct dw_dma_platform_data mrfld_pdata = {
+struct dw_dma_pci_data {
+	const struct dw_dma_platform_data *pdata;
+	int (*probe)(struct dw_dma_chip *chip);
+};
+
+static const struct dw_dma_pci_data dw_pci_data = {
+	.probe = dw_dma_probe,
+};
+
+static const struct dw_dma_platform_data idma32_pdata = {
 	.nr_channels = 8,
-	.is_idma32 = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
 	.block_size = 131071,
@@ -26,9 +34,14 @@ static struct dw_dma_platform_data mrfld_pdata = {
 	.multi_block = {1, 1, 1, 1, 1, 1, 1, 1},
 };
 
+static const struct dw_dma_pci_data idma32_pci_data = {
+	.pdata = &idma32_pdata,
+	.probe = idma32_dma_probe,
+};
+
 static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
-	const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+	const struct dw_dma_pci_data *data = (void *)pid->driver_data;
 	struct dw_dma_chip *chip;
 	int ret;
 
@@ -61,9 +74,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 	chip->id = pdev->devfn;
 	chip->regs = pcim_iomap_table(pdev)[0];
 	chip->irq = pdev->irq;
-	chip->pdata = pdata;
+	chip->pdata = data->pdata;
 
-	ret = dw_dma_probe(chip);
+	ret = data->probe(chip);
 	if (ret)
 		return ret;
 
@@ -89,7 +102,7 @@ static int dw_pci_suspend_late(struct device *dev)
 	struct pci_dev *pci = to_pci_dev(dev);
 	struct dw_dma_chip *chip = pci_get_drvdata(pci);
 
-	return dw_dma_disable(chip);
+	return do_dw_dma_disable(chip);
 };
 
 static int dw_pci_resume_early(struct device *dev)
@@ -97,7 +110,7 @@ static int dw_pci_resume_early(struct device *dev)
 	struct pci_dev *pci = to_pci_dev(dev);
 	struct dw_dma_chip *chip = pci_get_drvdata(pci);
 
-	return dw_dma_enable(chip);
+	return do_dw_dma_enable(chip);
 };
 
 #endif /* CONFIG_PM_SLEEP */
@@ -108,24 +121,24 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = {
 
 static const struct pci_device_id dw_pci_id_table[] = {
 	/* Medfield (GPDMA) */
-	{ PCI_VDEVICE(INTEL, 0x0827) },
+	{ PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_data },
 
 	/* BayTrail */
-	{ PCI_VDEVICE(INTEL, 0x0f06) },
-	{ PCI_VDEVICE(INTEL, 0x0f40) },
+	{ PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_data },
+	{ PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_data },
 
-	/* Merrifield iDMA 32-bit (GPDMA) */
-	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata },
+	/* Merrifield */
+	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_pci_data },
 
 	/* Braswell */
-	{ PCI_VDEVICE(INTEL, 0x2286) },
-	{ PCI_VDEVICE(INTEL, 0x22c0) },
+	{ PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_data },
+	{ PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_data },
 
 	/* Haswell */
-	{ PCI_VDEVICE(INTEL, 0x9c60) },
+	{ PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_data },
 
 	/* Broadwell */
-	{ PCI_VDEVICE(INTEL, 0x9ce0) },
+	{ PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_pci_data },
 
 	{ }
 };
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 58fc1ba02a1e..d5196c97e4f4 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -255,7 +255,7 @@ static void dw_shutdown(struct platform_device *pdev)
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
 
 	/*
-	 * We have to call dw_dma_disable() to stop any ongoing transfer. On
+	 * We have to call do_dw_dma_disable() to stop any ongoing transfer. On
 	 * some platforms we can't do that since DMA device is powered off.
 	 * Moreover we have no possibility to check if the platform is affected
 	 * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
@@ -264,7 +264,7 @@ static void dw_shutdown(struct platform_device *pdev)
 	 * used by the driver.
 	 */
 	pm_runtime_get_sync(chip->dev);
-	dw_dma_disable(chip);
+	do_dw_dma_disable(chip);
 	pm_runtime_put_sync_suspend(chip->dev);
 
 	clk_disable_unprepare(chip->clk);
@@ -294,7 +294,7 @@ static int dw_suspend_late(struct device *dev)
 {
 	struct dw_dma_chip *chip = dev_get_drvdata(dev);
 
-	dw_dma_disable(chip);
+	do_dw_dma_disable(chip);
 	clk_disable_unprepare(chip->clk);
 
 	return 0;
@@ -309,7 +309,7 @@ static int dw_resume_early(struct device *dev)
 	if (ret)
 		return ret;
 
-	return dw_dma_enable(chip);
+	return do_dw_dma_enable(chip);
 }
 
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 646c9c960c07..66aa8b227248 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -312,6 +312,19 @@ struct dw_dma {
 	u8			all_chan_mask;
 	u8			in_use;
 
+	/* Channel operations */
+	void	(*initialize_chan)(struct dw_dma_chan *dwc);
+	void	(*suspend_chan)(struct dw_dma_chan *dwc, bool drain);
+	void	(*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst);
+	u32	(*bytes2block)(struct dw_dma_chan *dwc, size_t bytes,
+			       unsigned int width, size_t *len);
+	size_t	(*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width);
+
+	/* Device operations */
+	void (*set_device_name)(struct dw_dma *dw, int id);
+	void (*disable)(struct dw_dma *dw);
+	void (*enable)(struct dw_dma *dw);
+
 	/* platform data */
 	struct dw_dma_platform_data	*pdata;
 };
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index e166cac8e870..d643d331c20e 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -45,9 +45,13 @@ struct dw_dma_chip {
 #if IS_ENABLED(CONFIG_DW_DMAC_CORE)
 int dw_dma_probe(struct dw_dma_chip *chip);
 int dw_dma_remove(struct dw_dma_chip *chip);
+int idma32_dma_probe(struct dw_dma_chip *chip);
+int idma32_dma_remove(struct dw_dma_chip *chip);
 #else
 static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
 static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; }
+static inline int idma32_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
+static inline int idma32_dma_remove(struct dw_dma_chip *chip) { return 0; }
 #endif /* CONFIG_DW_DMAC_CORE */
 
 #endif /* _DMA_DW_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 1c85eeee4171..576048433809 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -38,7 +38,6 @@ struct dw_dma_slave {
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -50,7 +49,6 @@ struct dw_dma_slave {
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
-	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
-- 
cgit v1.2.3


From b466a37fbcc99ef79ea59e40ef6aa8391430b0d8 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 7 Jan 2019 13:07:41 +0200
Subject: dmaengine: dw: convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw/Kconfig               | 2 ++
 drivers/dma/dw/core.c                | 5 +----
 drivers/dma/dw/internal.h            | 5 +----
 drivers/dma/dw/pci.c                 | 5 +----
 drivers/dma/dw/platform.c            | 5 +----
 drivers/dma/dw/regs.h                | 5 +----
 include/linux/dma/dw.h               | 5 +----
 include/linux/platform_data/dma-dw.h | 5 +----
 8 files changed, 9 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
index 04b9728c1d26..e5162690de8f 100644
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
 #
 # DMA engine configuration for dw
 #
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index b7e4dab28f8a..3a1ab52ffae0 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Core driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007-2008 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitops.h>
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index fdcac21ea665..1dd7a4e6dd23 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _DMA_DW_INTERNAL_H
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index e9ba25b4f950..e79a75db0852 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * PCI driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2013 Intel Corporation
  * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index d5196c97e4f4..382dfd9e9600 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Platform driver for the Synopsys DesignWare DMA Controller
  *
@@ -6,10 +7,6 @@
  * Copyright (C) 2013 Intel Corporation
  *
  * Some parts of this driver are derived from the original dw_dmac.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 07f91325e559..3fce66ecee7a 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare AHB DMA Controller
  *
  * Copyright (C) 2005-2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2016 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitops.h>
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index d643d331c20e..9752f3745f76 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2014 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _DMA_DW_H
 #define _DMA_DW_H
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 576048433809..f3eaf9ec00a1 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _PLATFORM_DATA_DMA_DW_H
 #define _PLATFORM_DATA_DMA_DW_H
-- 
cgit v1.2.3


From 83d0ea237bac5fb6a7509c69a8939e999693d311 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 21 Dec 2018 17:02:35 +0100
Subject: dt-bindings: clk: meson: add ao slow clock path ids

Add the CLKIDs for the slow clock generation path

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lkml.kernel.org/r/20181221160239.26265-2-jbrunet@baylibre.com
---
 include/dt-bindings/clock/axg-aoclkc.h  | 7 ++++++-
 include/dt-bindings/clock/gxbb-aoclkc.h | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/axg-aoclkc.h b/include/dt-bindings/clock/axg-aoclkc.h
index 61955016a55b..8ec4a269c7a6 100644
--- a/include/dt-bindings/clock/axg-aoclkc.h
+++ b/include/dt-bindings/clock/axg-aoclkc.h
@@ -21,6 +21,11 @@
 #define CLKID_AO_SAR_ADC_SEL	8
 #define CLKID_AO_SAR_ADC_DIV	9
 #define CLKID_AO_SAR_ADC_CLK	10
-#define CLKID_AO_ALT_XTAL	11
+#define CLKID_AO_CTS_OSCIN	11
+#define CLKID_AO_32K_PRE	12
+#define CLKID_AO_32K_DIV	13
+#define CLKID_AO_32K_SEL	14
+#define CLKID_AO_32K		15
+#define CLKID_AO_CTS_RTC_OSCIN	16
 
 #endif
diff --git a/include/dt-bindings/clock/gxbb-aoclkc.h b/include/dt-bindings/clock/gxbb-aoclkc.h
index 9d15e2221fdb..ec3b26319fc4 100644
--- a/include/dt-bindings/clock/gxbb-aoclkc.h
+++ b/include/dt-bindings/clock/gxbb-aoclkc.h
@@ -63,5 +63,12 @@
 #define CLKID_AO_UART2		4
 #define CLKID_AO_IR_BLASTER	5
 #define CLKID_AO_CEC_32K	6
+#define CLKID_AO_CTS_OSCIN	7
+#define CLKID_AO_32K_PRE	8
+#define CLKID_AO_32K_DIV	9
+#define CLKID_AO_32K_SEL	10
+#define CLKID_AO_32K		11
+#define CLKID_AO_CTS_RTC_OSCIN	12
+#define CLKID_AO_CLK81		13
 
 #endif
-- 
cgit v1.2.3


From e2d8ffe2e76028457759988ba6216fd13eeea01b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 13 Nov 2018 03:52:18 -0500
Subject: media: v4l2-mem2mem: add v4l2_m2m_buf_copy_data helper function

Memory-to-memory devices should copy various parts of
struct v4l2_buffer from the output buffer to the capture buffer.

Add a helper function that does that to simplify the driver code.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Alexandre Courbot <acourbot@chromium.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-mem2mem.c | 20 ++++++++++++++++++++
 include/media/v4l2-mem2mem.h           | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 5bbdec55b7d7..631f4e2aa942 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -975,6 +975,26 @@ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_buf_queue);
 
+void v4l2_m2m_buf_copy_data(const struct vb2_v4l2_buffer *out_vb,
+			    struct vb2_v4l2_buffer *cap_vb,
+			    bool copy_frame_flags)
+{
+	u32 mask = V4L2_BUF_FLAG_TIMECODE | V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
+
+	if (copy_frame_flags)
+		mask |= V4L2_BUF_FLAG_KEYFRAME | V4L2_BUF_FLAG_PFRAME |
+			V4L2_BUF_FLAG_BFRAME;
+
+	cap_vb->vb2_buf.timestamp = out_vb->vb2_buf.timestamp;
+
+	if (out_vb->flags & V4L2_BUF_FLAG_TIMECODE)
+		cap_vb->timecode = out_vb->timecode;
+	cap_vb->field = out_vb->field;
+	cap_vb->flags &= ~mask;
+	cap_vb->flags |= out_vb->flags & mask;
+}
+EXPORT_SYMBOL_GPL(v4l2_m2m_buf_copy_data);
+
 void v4l2_m2m_request_queue(struct media_request *req)
 {
 	struct media_request_object *obj, *obj_safe;
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 5467264771ec..43e447dcf69d 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -622,6 +622,26 @@ v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx)
 	return v4l2_m2m_buf_remove_by_idx(&m2m_ctx->cap_q_ctx, idx);
 }
 
+/**
+ * v4l2_m2m_buf_copy_data() - copy buffer data from the output buffer to the
+ * capture buffer
+ *
+ * @out_vb: the output buffer that is the source of the data.
+ * @cap_vb: the capture buffer that will receive the data.
+ * @copy_frame_flags: copy the KEY/B/PFRAME flags as well.
+ *
+ * This helper function copies the timestamp, timecode (if the TIMECODE
+ * buffer flag was set), field and the TIMECODE, KEYFRAME, BFRAME, PFRAME
+ * and TSTAMP_SRC_MASK flags from @out_vb to @cap_vb.
+ *
+ * If @copy_frame_flags is false, then the KEYFRAME, BFRAME and PFRAME
+ * flags are not copied. This is typically needed for encoders that
+ * set this bits explicitly.
+ */
+void v4l2_m2m_buf_copy_data(const struct vb2_v4l2_buffer *out_vb,
+			    struct vb2_v4l2_buffer *cap_vb,
+			    bool copy_frame_flags);
+
 /* v4l2 request helper */
 
 void v4l2_m2m_request_queue(struct media_request *req);
-- 
cgit v1.2.3


From c2eb8effb265ac5cdd960d8e61ecb931e9c767cd Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 24 Oct 2018 06:50:34 -0400
Subject: media: videodev2.h: add v4l2_timeval_to_ns inline function

We want to be able to uniquely identify buffers for stateless
codecs. The internal timestamp (a u64) as stored internally in the
kernel is a suitable candidate for that, but in struct v4l2_buffer
it is represented as a struct timeval.

Add a v4l2_timeval_to_ns() function that converts the struct timeval
into a u64 in the same way that the kernel does. This makes it possible
to use this u64 elsewhere as a unique identifier of the buffer.

Since timestamps are also copied from the output buffer to the
corresponding capture buffer(s) by M2M devices, the u64 can be
used to refer to both output and capture buffers.

The plan is that in the future we redesign struct v4l2_buffer and use
u64 for the timestamp instead of a struct timeval (which has lots of
problems with 32 vs 64 bit and y2038 layout changes), and then there
is no more need to use this function.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/videodev2.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index b5671ce2724f..d6eed479c3a6 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -973,6 +973,18 @@ struct v4l2_buffer {
 	};
 };
 
+/**
+ * v4l2_timeval_to_ns - Convert timeval to nanoseconds
+ * @ts:		pointer to the timeval variable to be converted
+ *
+ * Returns the scalar nanosecond representation of the timeval
+ * parameter.
+ */
+static inline __u64 v4l2_timeval_to_ns(const struct timeval *tv)
+{
+	return (__u64)tv->tv_sec * 1000000000ULL + tv->tv_usec * 1000;
+}
+
 /*  Flags for 'flags' field */
 /* Buffer is mapped (flag) */
 #define V4L2_BUF_FLAG_MAPPED			0x00000001
-- 
cgit v1.2.3


From 245ede423b43a6e081e94e0e5d4e895bd1f31228 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 24 Oct 2018 06:51:01 -0400
Subject: media: vb2: add vb2_find_timestamp()

Use v4l2_timeval_to_ns instead of timeval_to_ns to ensure that
both kernelspace and userspace will use the same conversion
function.

Next add a new vb2_find_timestamp() function to find buffers
with a specific timestamp.

This function will only look at DEQUEUED and DONE buffers, i.e.
buffers that are already processed.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-v4l2.c | 19 ++++++++++++++++++-
 include/media/videobuf2-v4l2.h                  | 17 +++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 3a0ca2f9854f..75ea90e795d8 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -143,7 +143,7 @@ static void __copy_timestamp(struct vb2_buffer *vb, const void *pb)
 		 * and the timecode field and flag if needed.
 		 */
 		if (q->copy_timestamp)
-			vb->timestamp = timeval_to_ns(&b->timestamp);
+			vb->timestamp = v4l2_timeval_to_ns(&b->timestamp);
 		vbuf->flags |= b->flags & V4L2_BUF_FLAG_TIMECODE;
 		if (b->flags & V4L2_BUF_FLAG_TIMECODE)
 			vbuf->timecode = b->timecode;
@@ -589,6 +589,23 @@ static const struct vb2_buf_ops v4l2_buf_ops = {
 	.copy_timestamp		= __copy_timestamp,
 };
 
+int vb2_find_timestamp(const struct vb2_queue *q, u64 timestamp,
+		       unsigned int start_idx)
+{
+	unsigned int i;
+
+	for (i = start_idx; i < q->num_buffers; i++) {
+		struct vb2_buffer *vb = q->bufs[i];
+
+		if ((vb->state == VB2_BUF_STATE_DEQUEUED ||
+		     vb->state == VB2_BUF_STATE_DONE) &&
+		    vb->timestamp == timestamp)
+			return i;
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(vb2_find_timestamp);
+
 /*
  * vb2_querybuf() - query video buffer information
  * @q:		videobuf queue
diff --git a/include/media/videobuf2-v4l2.h b/include/media/videobuf2-v4l2.h
index 727855463838..a9961bc776dc 100644
--- a/include/media/videobuf2-v4l2.h
+++ b/include/media/videobuf2-v4l2.h
@@ -55,6 +55,23 @@ struct vb2_v4l2_buffer {
 #define to_vb2_v4l2_buffer(vb) \
 	container_of(vb, struct vb2_v4l2_buffer, vb2_buf)
 
+/**
+ * vb2_find_timestamp() - Find buffer with given timestamp in the queue
+ *
+ * @q:		pointer to &struct vb2_queue with videobuf2 queue.
+ * @timestamp:	the timestamp to find. Only buffers in state DEQUEUED or DONE
+ *		are considered.
+ * @start_idx:	the start index (usually 0) in the buffer array to start
+ *		searching from. Note that there may be multiple buffers
+ *		with the same timestamp value, so you can restart the search
+ *		by setting @start_idx to the previously found index + 1.
+ *
+ * Returns the buffer index of the buffer with the given @timestamp, or
+ * -1 if no buffer with @timestamp was found.
+ */
+int vb2_find_timestamp(const struct vb2_queue *q, u64 timestamp,
+		       unsigned int start_idx);
+
 int vb2_querybuf(struct vb2_queue *q, struct v4l2_buffer *b);
 
 /**
-- 
cgit v1.2.3


From d998e03e322fc497454f584adfc35743713a44c5 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Fri, 9 Nov 2018 04:16:21 -0500
Subject: media: cedrus: identify buffers by timestamp

Use the new v4l2_m2m_buf_copy_data helper function and use
timestamps to refer to reference frames instead of using
buffer indices.

Also remove the padding fields in the structs, that's a bad
idea. Just use the right types to keep everything aligned.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Tested-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-ctrls.c              |  9 ---------
 drivers/staging/media/sunxi/cedrus/cedrus.h       |  9 ++++++---
 drivers/staging/media/sunxi/cedrus/cedrus_dec.c   |  2 ++
 drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c | 23 +++++++++++------------
 include/media/mpeg2-ctrls.h                       | 14 +++++---------
 5 files changed, 24 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 5e3806feb5d7..e3bd441fa29a 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -1661,15 +1661,6 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx,
 			return -EINVAL;
 		}
 
-		if (p_mpeg2_slice_params->backward_ref_index >= VIDEO_MAX_FRAME ||
-		    p_mpeg2_slice_params->forward_ref_index >= VIDEO_MAX_FRAME)
-			return -EINVAL;
-
-		if (p_mpeg2_slice_params->pad ||
-		    p_mpeg2_slice_params->picture.pad ||
-		    p_mpeg2_slice_params->sequence.pad)
-			return -EINVAL;
-
 		return 0;
 
 	case V4L2_CTRL_TYPE_MPEG2_QUANTIZATION:
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h
index 3acfdcf83691..4aedd24a9848 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus.h
+++ b/drivers/staging/media/sunxi/cedrus/cedrus.h
@@ -140,11 +140,14 @@ static inline dma_addr_t cedrus_buf_addr(struct vb2_buffer *buf,
 }
 
 static inline dma_addr_t cedrus_dst_buf_addr(struct cedrus_ctx *ctx,
-					     unsigned int index,
-					     unsigned int plane)
+					     int index, unsigned int plane)
 {
-	struct vb2_buffer *buf = ctx->dst_bufs[index];
+	struct vb2_buffer *buf;
 
+	if (index < 0)
+		return 0;
+
+	buf = ctx->dst_bufs[index];
 	return buf ? cedrus_buf_addr(buf, &ctx->dst_fmt, plane) : 0;
 }
 
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 591d191d4286..443fb037e1cf 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -50,6 +50,8 @@ void cedrus_device_run(void *priv)
 		break;
 	}
 
+	v4l2_m2m_buf_copy_data(run.src, run.dst, true);
+
 	dev->dec_ops[ctx->current_codec]->setup(ctx, &run);
 
 	/* Complete request(s) controls if needed. */
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
index 9abd39cae38c..cb45fda9aaeb 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_mpeg2.c
@@ -82,7 +82,10 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	dma_addr_t fwd_luma_addr, fwd_chroma_addr;
 	dma_addr_t bwd_luma_addr, bwd_chroma_addr;
 	struct cedrus_dev *dev = ctx->dev;
+	struct vb2_queue *cap_q = &ctx->fh.m2m_ctx->cap_q_ctx.q;
 	const u8 *matrix;
+	int forward_idx;
+	int backward_idx;
 	unsigned int i;
 	u32 reg;
 
@@ -156,23 +159,19 @@ static void cedrus_mpeg2_setup(struct cedrus_ctx *ctx, struct cedrus_run *run)
 	cedrus_write(dev, VE_DEC_MPEG_PICBOUNDSIZE, reg);
 
 	/* Forward and backward prediction reference buffers. */
+	forward_idx = vb2_find_timestamp(cap_q,
+					 slice_params->forward_ref_ts, 0);
 
-	fwd_luma_addr = cedrus_dst_buf_addr(ctx,
-					    slice_params->forward_ref_index,
-					    0);
-	fwd_chroma_addr = cedrus_dst_buf_addr(ctx,
-					      slice_params->forward_ref_index,
-					      1);
+	fwd_luma_addr = cedrus_dst_buf_addr(ctx, forward_idx, 0);
+	fwd_chroma_addr = cedrus_dst_buf_addr(ctx, forward_idx, 1);
 
 	cedrus_write(dev, VE_DEC_MPEG_FWD_REF_LUMA_ADDR, fwd_luma_addr);
 	cedrus_write(dev, VE_DEC_MPEG_FWD_REF_CHROMA_ADDR, fwd_chroma_addr);
 
-	bwd_luma_addr = cedrus_dst_buf_addr(ctx,
-					    slice_params->backward_ref_index,
-					    0);
-	bwd_chroma_addr = cedrus_dst_buf_addr(ctx,
-					      slice_params->backward_ref_index,
-					      1);
+	backward_idx = vb2_find_timestamp(cap_q,
+					  slice_params->backward_ref_ts, 0);
+	bwd_luma_addr = cedrus_dst_buf_addr(ctx, backward_idx, 0);
+	bwd_chroma_addr = cedrus_dst_buf_addr(ctx, backward_idx, 1);
 
 	cedrus_write(dev, VE_DEC_MPEG_BWD_REF_LUMA_ADDR, bwd_luma_addr);
 	cedrus_write(dev, VE_DEC_MPEG_BWD_REF_CHROMA_ADDR, bwd_chroma_addr);
diff --git a/include/media/mpeg2-ctrls.h b/include/media/mpeg2-ctrls.h
index d21f40edc09e..6601455b3d5e 100644
--- a/include/media/mpeg2-ctrls.h
+++ b/include/media/mpeg2-ctrls.h
@@ -30,10 +30,9 @@ struct v4l2_mpeg2_sequence {
 	__u32	vbv_buffer_size;
 
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Sequence extension */
-	__u8	profile_and_level_indication;
+	__u16	profile_and_level_indication;
 	__u8	progressive_sequence;
 	__u8	chroma_format;
-	__u8	pad;
 };
 
 struct v4l2_mpeg2_picture {
@@ -51,23 +50,20 @@ struct v4l2_mpeg2_picture {
 	__u8	intra_vlc_format;
 	__u8	alternate_scan;
 	__u8	repeat_first_field;
-	__u8	progressive_frame;
-	__u8	pad;
+	__u16	progressive_frame;
 };
 
 struct v4l2_ctrl_mpeg2_slice_params {
 	__u32	bit_size;
 	__u32	data_bit_offset;
+	__u64	backward_ref_ts;
+	__u64	forward_ref_ts;
 
 	struct v4l2_mpeg2_sequence sequence;
 	struct v4l2_mpeg2_picture picture;
 
 	/* ISO/IEC 13818-2, ITU-T Rec. H.262: Slice */
-	__u8	quantiser_scale_code;
-
-	__u8	backward_ref_index;
-	__u8	forward_ref_index;
-	__u8	pad;
+	__u32	quantiser_scale_code;
 };
 
 struct v4l2_ctrl_mpeg2_quantization {
-- 
cgit v1.2.3


From 983f6ae944c273c3a64e30f451b60d40398afc8a Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Dec 2018 11:35:35 +0100
Subject: spi/trace: drop useless and wrong (but harmless) casts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

bus_num, chip_select and len are already ints, so there is no gain in
casting them to int. xfer is a pointer to a struct spi_transfer. Casting
that to struct spi_message * is wrong but as only the pointer value is
used for the %p format specifier no harm is done.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/trace/events/spi.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index 277bb9d25779..6eb014f59efb 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -129,10 +129,9 @@ DECLARE_EVENT_CLASS(spi_transfer,
 		__entry->len = xfer->len;
 	),
 
-        TP_printk("spi%d.%d %p len=%d", (int)__entry->bus_num,
-		  (int)__entry->chip_select,
-		  (struct spi_message *)__entry->xfer,
-		  (int)__entry->len)
+        TP_printk("spi%d.%d %p len=%d",
+		  __entry->bus_num, __entry->chip_select,
+		  __entry->xfer, __entry->len)
 );
 
 DEFINE_EVENT(spi_transfer, spi_transfer_start,
-- 
cgit v1.2.3


From 8d245475c3f6e82d4685d5d9fd7b95ea118a7e25 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Dec 2018 11:35:36 +0100
Subject: spi/trace: include buffer contents in traces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It highly improves usability when the buffer contents are inspecable via
tracing.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/trace/events/spi.h | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/spi.h b/include/trace/events/spi.h
index 6eb014f59efb..aef6869f563d 100644
--- a/include/trace/events/spi.h
+++ b/include/trace/events/spi.h
@@ -109,6 +109,16 @@ TRACE_EVENT(spi_message_done,
                   (unsigned)__entry->actual, (unsigned)__entry->frame)
 );
 
+/*
+ * consider a buffer valid if non-NULL and if it doesn't match the dummy buffer
+ * that only exist to work with controllers that have SPI_CONTROLLER_MUST_TX or
+ * SPI_CONTROLLER_MUST_RX.
+ */
+#define spi_valid_txbuf(msg, xfer) \
+	(xfer->tx_buf && xfer->tx_buf != msg->spi->controller->dummy_tx)
+#define spi_valid_rxbuf(msg, xfer) \
+	(xfer->rx_buf && xfer->rx_buf != msg->spi->controller->dummy_rx)
+
 DECLARE_EVENT_CLASS(spi_transfer,
 
 	TP_PROTO(struct spi_message *msg, struct spi_transfer *xfer),
@@ -120,6 +130,10 @@ DECLARE_EVENT_CLASS(spi_transfer,
 		__field(        int,            chip_select     )
 		__field(        struct spi_transfer *,   xfer   )
 		__field(        int,            len             )
+		__dynamic_array(u8, rx_buf,
+				spi_valid_rxbuf(msg, xfer) ? xfer->len : 0)
+		__dynamic_array(u8, tx_buf,
+				spi_valid_txbuf(msg, xfer) ? xfer->len : 0)
 	),
 
 	TP_fast_assign(
@@ -127,11 +141,21 @@ DECLARE_EVENT_CLASS(spi_transfer,
 		__entry->chip_select = msg->spi->chip_select;
 		__entry->xfer = xfer;
 		__entry->len = xfer->len;
+
+		if (spi_valid_txbuf(msg, xfer))
+			memcpy(__get_dynamic_array(tx_buf),
+			       xfer->tx_buf, xfer->len);
+
+		if (spi_valid_rxbuf(msg, xfer))
+			memcpy(__get_dynamic_array(rx_buf),
+			       xfer->rx_buf, xfer->len);
 	),
 
-        TP_printk("spi%d.%d %p len=%d",
+	TP_printk("spi%d.%d %p len=%d tx=[%*phD] rx=[%*phD]",
 		  __entry->bus_num, __entry->chip_select,
-		  __entry->xfer, __entry->len)
+		  __entry->xfer, __entry->len,
+		  __get_dynamic_array_len(tx_buf), __get_dynamic_array(tx_buf),
+		  __get_dynamic_array_len(rx_buf), __get_dynamic_array(rx_buf))
 );
 
 DEFINE_EVENT(spi_transfer, spi_transfer_start,
-- 
cgit v1.2.3


From d0dcde6426ce071ad447fb9d91c85ab649026114 Mon Sep 17 00:00:00 2001
From: Otto Sabart <ottosabart@seberm.com>
Date: Sun, 6 Jan 2019 00:29:15 +0100
Subject: doc: networking: convert offload files into RST and update references

This patch renames offload files. This is necessary for Sphinx.

Also update reference to checksum-offloads.rst file.

Whole kernel code was grepped for references using:
$ grep -r "\(segmentation\|checksum\)-offloads.txt" .

There should be no other references
to {segmentation,checksum}-offloads.txt files.

Signed-off-by: Otto Sabart <ottosabart@seberm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/networking/checksum-offloads.rst     | 143 ++++++++++++++++
 Documentation/networking/checksum-offloads.txt     | 143 ----------------
 Documentation/networking/segmentation-offloads.rst | 184 +++++++++++++++++++++
 Documentation/networking/segmentation-offloads.txt | 184 ---------------------
 include/linux/skbuff.h                             |   2 +-
 5 files changed, 328 insertions(+), 328 deletions(-)
 create mode 100644 Documentation/networking/checksum-offloads.rst
 delete mode 100644 Documentation/networking/checksum-offloads.txt
 create mode 100644 Documentation/networking/segmentation-offloads.rst
 delete mode 100644 Documentation/networking/segmentation-offloads.txt

(limited to 'include')

diff --git a/Documentation/networking/checksum-offloads.rst b/Documentation/networking/checksum-offloads.rst
new file mode 100644
index 000000000000..1a1cd94a3f6d
--- /dev/null
+++ b/Documentation/networking/checksum-offloads.rst
@@ -0,0 +1,143 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+Checksum Offloads in the Linux Networking Stack
+===============================================
+
+
+Introduction
+============
+
+This document describes a set of techniques in the Linux networking stack to
+take advantage of checksum offload capabilities of various NICs.
+
+The following technologies are described:
+
+* TX Checksum Offload
+* LCO: Local Checksum Offload
+* RCO: Remote Checksum Offload
+
+Things that should be documented here but aren't yet:
+
+* RX Checksum Offload
+* CHECKSUM_UNNECESSARY conversion
+
+
+TX Checksum Offload
+===================
+
+The interface for offloading a transmit checksum to a device is explained in
+detail in comments near the top of include/linux/skbuff.h.
+
+In brief, it allows to request the device fill in a single ones-complement
+checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset.
+The device should compute the 16-bit ones-complement checksum (i.e. the
+'IP-style' checksum) from csum_start to the end of the packet, and fill in the
+result at (csum_start + csum_offset).
+
+Because csum_offset cannot be negative, this ensures that the previous value of
+the checksum field is included in the checksum computation, thus it can be used
+to supply any needed corrections to the checksum (such as the sum of the
+pseudo-header for UDP or TCP).
+
+This interface only allows a single checksum to be offloaded.  Where
+encapsulation is used, the packet may have multiple checksum fields in
+different header layers, and the rest will have to be handled by another
+mechanism such as LCO or RCO.
+
+CRC32c can also be offloaded using this interface, by means of filling
+skb->csum_start and skb->csum_offset as described above, and setting
+skb->csum_not_inet: see skbuff.h comment (section 'D') for more details.
+
+No offloading of the IP header checksum is performed; it is always done in
+software.  This is OK because when we build the IP header, we obviously have it
+in cache, so summing it isn't expensive.  It's also rather short.
+
+The requirements for GSO are more complicated, because when segmenting an
+encapsulated packet both the inner and outer checksums may need to be edited or
+recomputed for each resulting segment.  See the skbuff.h comment (section 'E')
+for more details.
+
+A driver declares its offload capabilities in netdev->hw_features; see
+Documentation/networking/netdev-features.txt for more.  Note that a device
+which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and
+csum_offset given in the SKB; if it tries to deduce these itself in hardware
+(as some NICs do) the driver should check that the values in the SKB match
+those which the hardware will deduce, and if not, fall back to checksumming in
+software instead (with skb_csum_hwoffload_help() or one of the
+skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in
+include/linux/skbuff.h).
+
+The stack should, for the most part, assume that checksum offload is supported
+by the underlying device.  The only place that should check is
+validate_xmit_skb(), and the functions it calls directly or indirectly.  That
+function compares the offload features requested by the SKB (which may include
+other offloads besides TX Checksum Offload) and, if they are not supported or
+enabled on the device (determined by netdev->features), performs the
+corresponding offload in software.  In the case of TX Checksum Offload, that
+means calling skb_csum_hwoffload_help(skb, features).
+
+
+LCO: Local Checksum Offload
+===========================
+
+LCO is a technique for efficiently computing the outer checksum of an
+encapsulated datagram when the inner checksum is due to be offloaded.
+
+The ones-complement sum of a correctly checksummed TCP or UDP packet is equal
+to the complement of the sum of the pseudo header, because everything else gets
+'cancelled out' by the checksum field.  This is because the sum was
+complemented before being written to the checksum field.
+
+More generally, this holds in any case where the 'IP-style' ones complement
+checksum is used, and thus any checksum that TX Checksum Offload supports.
+
+That is, if we have set up TX Checksum Offload with a start/offset pair, we
+know that after the device has filled in that checksum, the ones complement sum
+from csum_start to the end of the packet will be equal to the complement of
+whatever value we put in the checksum field beforehand.  This allows us to
+compute the outer checksum without looking at the payload: we simply stop
+summing when we get to csum_start, then add the complement of the 16-bit word
+at (csum_start + csum_offset).
+
+Then, when the true inner checksum is filled in (either by hardware or by
+skb_checksum_help()), the outer checksum will become correct by virtue of the
+arithmetic.
+
+LCO is performed by the stack when constructing an outer UDP header for an
+encapsulation such as VXLAN or GENEVE, in udp_set_csum().  Similarly for the
+IPv6 equivalents, in udp6_set_csum().
+
+It is also performed when constructing an IPv4 GRE header, in
+net/ipv4/ip_gre.c:build_header().  It is *not* currently performed when
+constructing an IPv6 GRE header; the GRE checksum is computed over the whole
+packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use
+LCO here as IPv6 GRE still uses an IP-style checksum.
+
+All of the LCO implementations use a helper function lco_csum(), in
+include/linux/skbuff.h.
+
+LCO can safely be used for nested encapsulations; in this case, the outer
+encapsulation layer will sum over both its own header and the 'middle' header.
+This does mean that the 'middle' header will get summed multiple times, but
+there doesn't seem to be a way to avoid that without incurring bigger costs
+(e.g. in SKB bloat).
+
+
+RCO: Remote Checksum Offload
+============================
+
+RCO is a technique for eliding the inner checksum of an encapsulated datagram,
+allowing the outer checksum to be offloaded.  It does, however, involve a
+change to the encapsulation protocols, which the receiver must also support.
+For this reason, it is disabled by default.
+
+RCO is detailed in the following Internet-Drafts:
+
+* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00
+* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00
+
+In Linux, RCO is implemented individually in each encapsulation protocol, and
+most tunnel types have flags controlling its use.  For instance, VXLAN has the
+flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be
+used when transmitting to a given remote destination.
diff --git a/Documentation/networking/checksum-offloads.txt b/Documentation/networking/checksum-offloads.txt
deleted file mode 100644
index 1a1cd94a3f6d..000000000000
--- a/Documentation/networking/checksum-offloads.txt
+++ /dev/null
@@ -1,143 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===============================================
-Checksum Offloads in the Linux Networking Stack
-===============================================
-
-
-Introduction
-============
-
-This document describes a set of techniques in the Linux networking stack to
-take advantage of checksum offload capabilities of various NICs.
-
-The following technologies are described:
-
-* TX Checksum Offload
-* LCO: Local Checksum Offload
-* RCO: Remote Checksum Offload
-
-Things that should be documented here but aren't yet:
-
-* RX Checksum Offload
-* CHECKSUM_UNNECESSARY conversion
-
-
-TX Checksum Offload
-===================
-
-The interface for offloading a transmit checksum to a device is explained in
-detail in comments near the top of include/linux/skbuff.h.
-
-In brief, it allows to request the device fill in a single ones-complement
-checksum defined by the sk_buff fields skb->csum_start and skb->csum_offset.
-The device should compute the 16-bit ones-complement checksum (i.e. the
-'IP-style' checksum) from csum_start to the end of the packet, and fill in the
-result at (csum_start + csum_offset).
-
-Because csum_offset cannot be negative, this ensures that the previous value of
-the checksum field is included in the checksum computation, thus it can be used
-to supply any needed corrections to the checksum (such as the sum of the
-pseudo-header for UDP or TCP).
-
-This interface only allows a single checksum to be offloaded.  Where
-encapsulation is used, the packet may have multiple checksum fields in
-different header layers, and the rest will have to be handled by another
-mechanism such as LCO or RCO.
-
-CRC32c can also be offloaded using this interface, by means of filling
-skb->csum_start and skb->csum_offset as described above, and setting
-skb->csum_not_inet: see skbuff.h comment (section 'D') for more details.
-
-No offloading of the IP header checksum is performed; it is always done in
-software.  This is OK because when we build the IP header, we obviously have it
-in cache, so summing it isn't expensive.  It's also rather short.
-
-The requirements for GSO are more complicated, because when segmenting an
-encapsulated packet both the inner and outer checksums may need to be edited or
-recomputed for each resulting segment.  See the skbuff.h comment (section 'E')
-for more details.
-
-A driver declares its offload capabilities in netdev->hw_features; see
-Documentation/networking/netdev-features.txt for more.  Note that a device
-which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and
-csum_offset given in the SKB; if it tries to deduce these itself in hardware
-(as some NICs do) the driver should check that the values in the SKB match
-those which the hardware will deduce, and if not, fall back to checksumming in
-software instead (with skb_csum_hwoffload_help() or one of the
-skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in
-include/linux/skbuff.h).
-
-The stack should, for the most part, assume that checksum offload is supported
-by the underlying device.  The only place that should check is
-validate_xmit_skb(), and the functions it calls directly or indirectly.  That
-function compares the offload features requested by the SKB (which may include
-other offloads besides TX Checksum Offload) and, if they are not supported or
-enabled on the device (determined by netdev->features), performs the
-corresponding offload in software.  In the case of TX Checksum Offload, that
-means calling skb_csum_hwoffload_help(skb, features).
-
-
-LCO: Local Checksum Offload
-===========================
-
-LCO is a technique for efficiently computing the outer checksum of an
-encapsulated datagram when the inner checksum is due to be offloaded.
-
-The ones-complement sum of a correctly checksummed TCP or UDP packet is equal
-to the complement of the sum of the pseudo header, because everything else gets
-'cancelled out' by the checksum field.  This is because the sum was
-complemented before being written to the checksum field.
-
-More generally, this holds in any case where the 'IP-style' ones complement
-checksum is used, and thus any checksum that TX Checksum Offload supports.
-
-That is, if we have set up TX Checksum Offload with a start/offset pair, we
-know that after the device has filled in that checksum, the ones complement sum
-from csum_start to the end of the packet will be equal to the complement of
-whatever value we put in the checksum field beforehand.  This allows us to
-compute the outer checksum without looking at the payload: we simply stop
-summing when we get to csum_start, then add the complement of the 16-bit word
-at (csum_start + csum_offset).
-
-Then, when the true inner checksum is filled in (either by hardware or by
-skb_checksum_help()), the outer checksum will become correct by virtue of the
-arithmetic.
-
-LCO is performed by the stack when constructing an outer UDP header for an
-encapsulation such as VXLAN or GENEVE, in udp_set_csum().  Similarly for the
-IPv6 equivalents, in udp6_set_csum().
-
-It is also performed when constructing an IPv4 GRE header, in
-net/ipv4/ip_gre.c:build_header().  It is *not* currently performed when
-constructing an IPv6 GRE header; the GRE checksum is computed over the whole
-packet in net/ipv6/ip6_gre.c:ip6gre_xmit2(), but it should be possible to use
-LCO here as IPv6 GRE still uses an IP-style checksum.
-
-All of the LCO implementations use a helper function lco_csum(), in
-include/linux/skbuff.h.
-
-LCO can safely be used for nested encapsulations; in this case, the outer
-encapsulation layer will sum over both its own header and the 'middle' header.
-This does mean that the 'middle' header will get summed multiple times, but
-there doesn't seem to be a way to avoid that without incurring bigger costs
-(e.g. in SKB bloat).
-
-
-RCO: Remote Checksum Offload
-============================
-
-RCO is a technique for eliding the inner checksum of an encapsulated datagram,
-allowing the outer checksum to be offloaded.  It does, however, involve a
-change to the encapsulation protocols, which the receiver must also support.
-For this reason, it is disabled by default.
-
-RCO is detailed in the following Internet-Drafts:
-
-* https://tools.ietf.org/html/draft-herbert-remotecsumoffload-00
-* https://tools.ietf.org/html/draft-herbert-vxlan-rco-00
-
-In Linux, RCO is implemented individually in each encapsulation protocol, and
-most tunnel types have flags controlling its use.  For instance, VXLAN has the
-flag VXLAN_F_REMCSUM_TX (per struct vxlan_rdst) to indicate that RCO should be
-used when transmitting to a given remote destination.
diff --git a/Documentation/networking/segmentation-offloads.rst b/Documentation/networking/segmentation-offloads.rst
new file mode 100644
index 000000000000..1794bfe98196
--- /dev/null
+++ b/Documentation/networking/segmentation-offloads.rst
@@ -0,0 +1,184 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================
+Segmentation Offloads in the Linux Networking Stack
+===================================================
+
+
+Introduction
+============
+
+This document describes a set of techniques in the Linux networking stack
+to take advantage of segmentation offload capabilities of various NICs.
+
+The following technologies are described:
+ * TCP Segmentation Offload - TSO
+ * UDP Fragmentation Offload - UFO
+ * IPIP, SIT, GRE, and UDP Tunnel Offloads
+ * Generic Segmentation Offload - GSO
+ * Generic Receive Offload - GRO
+ * Partial Generic Segmentation Offload - GSO_PARTIAL
+ * SCTP accelleration with GSO - GSO_BY_FRAGS
+
+
+TCP Segmentation Offload
+========================
+
+TCP segmentation allows a device to segment a single frame into multiple
+frames with a data payload size specified in skb_shinfo()->gso_size.
+When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or
+SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and
+skb_shinfo()->gso_size should be set to a non-zero value.
+
+TCP segmentation is dependent on support for the use of partial checksum
+offload.  For this reason TSO is normally disabled if the Tx checksum
+offload for a given device is disabled.
+
+In order to support TCP segmentation offload it is necessary to populate
+the network and transport header offsets of the skbuff so that the device
+drivers will be able determine the offsets of the IP or IPv6 header and the
+TCP header.  In addition as CHECKSUM_PARTIAL is required csum_start should
+also point to the TCP header of the packet.
+
+For IPv4 segmentation we support one of two types in terms of the IP ID.
+The default behavior is to increment the IP ID with every segment.  If the
+GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP
+ID and all segments will use the same IP ID.  If a device has
+NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO
+and we will either increment the IP ID for all frames, or leave it at a
+static value based on driver preference.
+
+
+UDP Fragmentation Offload
+=========================
+
+UDP fragmentation offload allows a device to fragment an oversized UDP
+datagram into multiple IPv4 fragments.  Many of the requirements for UDP
+fragmentation offload are the same as TSO.  However the IPv4 ID for
+fragments should not increment as a single IPv4 datagram is fragmented.
+
+UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
+still receive them from tuntap and similar devices. Offload of UDP-based
+tunnel protocols is still supported.
+
+
+IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
+========================================================
+
+In addition to the offloads described above it is possible for a frame to
+contain additional headers such as an outer tunnel.  In order to account
+for such instances an additional set of segmentation offload types were
+introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and
+SKB_GSO_UDP_TUNNEL.  These extra segmentation types are used to identify
+cases where there are more than just 1 set of headers.  For example in the
+case of IPIP and SIT we should have the network and transport headers moved
+from the standard list of headers to "inner" header offsets.
+
+Currently only two levels of headers are supported.  The convention is to
+refer to the tunnel headers as the outer headers, while the encapsulated
+data is normally referred to as the inner headers.  Below is the list of
+calls to access the given headers:
+
+IPIP/SIT Tunnel::
+
+             Outer                  Inner
+  MAC        skb_mac_header
+  Network    skb_network_header     skb_inner_network_header
+  Transport  skb_transport_header
+
+UDP/GRE Tunnel::
+
+             Outer                  Inner
+  MAC        skb_mac_header         skb_inner_mac_header
+  Network    skb_network_header     skb_inner_network_header
+  Transport  skb_transport_header   skb_inner_transport_header
+
+In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and
+SKB_GSO_UDP_TUNNEL_CSUM.  These two additional tunnel types reflect the
+fact that the outer header also requests to have a non-zero checksum
+included in the outer header.
+
+Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
+header has requested a remote checksum offload.  In this case the inner
+headers will be left with a partial checksum and only the outer header
+checksum will be computed.
+
+
+Generic Segmentation Offload
+============================
+
+Generic segmentation offload is a pure software offload that is meant to
+deal with cases where device drivers cannot perform the offloads described
+above.  What occurs in GSO is that a given skbuff will have its data broken
+out over multiple skbuffs that have been resized to match the MSS provided
+via skb_shinfo()->gso_size.
+
+Before enabling any hardware segmentation offload a corresponding software
+offload is required in GSO.  Otherwise it becomes possible for a frame to
+be re-routed between devices and end up being unable to be transmitted.
+
+
+Generic Receive Offload
+=======================
+
+Generic receive offload is the complement to GSO.  Ideally any frame
+assembled by GRO should be segmented to create an identical sequence of
+frames using GSO, and any sequence of frames segmented by GSO should be
+able to be reassembled back to the original by GRO.  The only exception to
+this is IPv4 ID in the case that the DF bit is set for a given IP header.
+If the value of the IPv4 ID is not sequentially incrementing it will be
+altered so that it is when a frame assembled via GRO is segmented via GSO.
+
+
+Partial Generic Segmentation Offload
+====================================
+
+Partial generic segmentation offload is a hybrid between TSO and GSO.  What
+it effectively does is take advantage of certain traits of TCP and tunnels
+so that instead of having to rewrite the packet headers for each segment
+only the inner-most transport header and possibly the outer-most network
+header need to be updated.  This allows devices that do not support tunnel
+offloads or tunnel offloads with checksum to still make use of segmentation.
+
+With the partial offload what occurs is that all headers excluding the
+inner transport header are updated such that they will contain the correct
+values for if the header was simply duplicated.  The one exception to this
+is the outer IPv4 ID field.  It is up to the device drivers to guarantee
+that the IPv4 ID field is incremented in the case that a given header does
+not have the DF bit set.
+
+
+SCTP accelleration with GSO
+===========================
+
+SCTP - despite the lack of hardware support - can still take advantage of
+GSO to pass one large packet through the network stack, rather than
+multiple small packets.
+
+This requires a different approach to other offloads, as SCTP packets
+cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
+IP segments, padding respected. So unlike regular GSO, SCTP can't just
+generate a big skb, set gso_size to the fragmentation point and deliver it
+to IP layer.
+
+Instead, the SCTP protocol layer builds an skb with the segments correctly
+padded and stored as chained skbs, and skb_segment() splits based on those.
+To signal this, gso_size is set to the special value GSO_BY_FRAGS.
+
+Therefore, any code in the core networking stack must be aware of the
+possibility that gso_size will be GSO_BY_FRAGS and handle that case
+appropriately.
+
+There are some helpers to make this easier:
+
+- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if
+  an skb is an SCTP GSO skb.
+
+- For size checks, the skb_gso_validate_*_len family of helpers correctly
+  considers GSO_BY_FRAGS.
+
+- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size
+  will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs.
+
+This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
+set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
diff --git a/Documentation/networking/segmentation-offloads.txt b/Documentation/networking/segmentation-offloads.txt
deleted file mode 100644
index 1794bfe98196..000000000000
--- a/Documentation/networking/segmentation-offloads.txt
+++ /dev/null
@@ -1,184 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===================================================
-Segmentation Offloads in the Linux Networking Stack
-===================================================
-
-
-Introduction
-============
-
-This document describes a set of techniques in the Linux networking stack
-to take advantage of segmentation offload capabilities of various NICs.
-
-The following technologies are described:
- * TCP Segmentation Offload - TSO
- * UDP Fragmentation Offload - UFO
- * IPIP, SIT, GRE, and UDP Tunnel Offloads
- * Generic Segmentation Offload - GSO
- * Generic Receive Offload - GRO
- * Partial Generic Segmentation Offload - GSO_PARTIAL
- * SCTP accelleration with GSO - GSO_BY_FRAGS
-
-
-TCP Segmentation Offload
-========================
-
-TCP segmentation allows a device to segment a single frame into multiple
-frames with a data payload size specified in skb_shinfo()->gso_size.
-When TCP segmentation requested the bit for either SKB_GSO_TCPV4 or
-SKB_GSO_TCPV6 should be set in skb_shinfo()->gso_type and
-skb_shinfo()->gso_size should be set to a non-zero value.
-
-TCP segmentation is dependent on support for the use of partial checksum
-offload.  For this reason TSO is normally disabled if the Tx checksum
-offload for a given device is disabled.
-
-In order to support TCP segmentation offload it is necessary to populate
-the network and transport header offsets of the skbuff so that the device
-drivers will be able determine the offsets of the IP or IPv6 header and the
-TCP header.  In addition as CHECKSUM_PARTIAL is required csum_start should
-also point to the TCP header of the packet.
-
-For IPv4 segmentation we support one of two types in terms of the IP ID.
-The default behavior is to increment the IP ID with every segment.  If the
-GSO type SKB_GSO_TCP_FIXEDID is specified then we will not increment the IP
-ID and all segments will use the same IP ID.  If a device has
-NETIF_F_TSO_MANGLEID set then the IP ID can be ignored when performing TSO
-and we will either increment the IP ID for all frames, or leave it at a
-static value based on driver preference.
-
-
-UDP Fragmentation Offload
-=========================
-
-UDP fragmentation offload allows a device to fragment an oversized UDP
-datagram into multiple IPv4 fragments.  Many of the requirements for UDP
-fragmentation offload are the same as TSO.  However the IPv4 ID for
-fragments should not increment as a single IPv4 datagram is fragmented.
-
-UFO is deprecated: modern kernels will no longer generate UFO skbs, but can
-still receive them from tuntap and similar devices. Offload of UDP-based
-tunnel protocols is still supported.
-
-
-IPIP, SIT, GRE, UDP Tunnel, and Remote Checksum Offloads
-========================================================
-
-In addition to the offloads described above it is possible for a frame to
-contain additional headers such as an outer tunnel.  In order to account
-for such instances an additional set of segmentation offload types were
-introduced including SKB_GSO_IPXIP4, SKB_GSO_IPXIP6, SKB_GSO_GRE, and
-SKB_GSO_UDP_TUNNEL.  These extra segmentation types are used to identify
-cases where there are more than just 1 set of headers.  For example in the
-case of IPIP and SIT we should have the network and transport headers moved
-from the standard list of headers to "inner" header offsets.
-
-Currently only two levels of headers are supported.  The convention is to
-refer to the tunnel headers as the outer headers, while the encapsulated
-data is normally referred to as the inner headers.  Below is the list of
-calls to access the given headers:
-
-IPIP/SIT Tunnel::
-
-             Outer                  Inner
-  MAC        skb_mac_header
-  Network    skb_network_header     skb_inner_network_header
-  Transport  skb_transport_header
-
-UDP/GRE Tunnel::
-
-             Outer                  Inner
-  MAC        skb_mac_header         skb_inner_mac_header
-  Network    skb_network_header     skb_inner_network_header
-  Transport  skb_transport_header   skb_inner_transport_header
-
-In addition to the above tunnel types there are also SKB_GSO_GRE_CSUM and
-SKB_GSO_UDP_TUNNEL_CSUM.  These two additional tunnel types reflect the
-fact that the outer header also requests to have a non-zero checksum
-included in the outer header.
-
-Finally there is SKB_GSO_TUNNEL_REMCSUM which indicates that a given tunnel
-header has requested a remote checksum offload.  In this case the inner
-headers will be left with a partial checksum and only the outer header
-checksum will be computed.
-
-
-Generic Segmentation Offload
-============================
-
-Generic segmentation offload is a pure software offload that is meant to
-deal with cases where device drivers cannot perform the offloads described
-above.  What occurs in GSO is that a given skbuff will have its data broken
-out over multiple skbuffs that have been resized to match the MSS provided
-via skb_shinfo()->gso_size.
-
-Before enabling any hardware segmentation offload a corresponding software
-offload is required in GSO.  Otherwise it becomes possible for a frame to
-be re-routed between devices and end up being unable to be transmitted.
-
-
-Generic Receive Offload
-=======================
-
-Generic receive offload is the complement to GSO.  Ideally any frame
-assembled by GRO should be segmented to create an identical sequence of
-frames using GSO, and any sequence of frames segmented by GSO should be
-able to be reassembled back to the original by GRO.  The only exception to
-this is IPv4 ID in the case that the DF bit is set for a given IP header.
-If the value of the IPv4 ID is not sequentially incrementing it will be
-altered so that it is when a frame assembled via GRO is segmented via GSO.
-
-
-Partial Generic Segmentation Offload
-====================================
-
-Partial generic segmentation offload is a hybrid between TSO and GSO.  What
-it effectively does is take advantage of certain traits of TCP and tunnels
-so that instead of having to rewrite the packet headers for each segment
-only the inner-most transport header and possibly the outer-most network
-header need to be updated.  This allows devices that do not support tunnel
-offloads or tunnel offloads with checksum to still make use of segmentation.
-
-With the partial offload what occurs is that all headers excluding the
-inner transport header are updated such that they will contain the correct
-values for if the header was simply duplicated.  The one exception to this
-is the outer IPv4 ID field.  It is up to the device drivers to guarantee
-that the IPv4 ID field is incremented in the case that a given header does
-not have the DF bit set.
-
-
-SCTP accelleration with GSO
-===========================
-
-SCTP - despite the lack of hardware support - can still take advantage of
-GSO to pass one large packet through the network stack, rather than
-multiple small packets.
-
-This requires a different approach to other offloads, as SCTP packets
-cannot be just segmented to (P)MTU. Rather, the chunks must be contained in
-IP segments, padding respected. So unlike regular GSO, SCTP can't just
-generate a big skb, set gso_size to the fragmentation point and deliver it
-to IP layer.
-
-Instead, the SCTP protocol layer builds an skb with the segments correctly
-padded and stored as chained skbs, and skb_segment() splits based on those.
-To signal this, gso_size is set to the special value GSO_BY_FRAGS.
-
-Therefore, any code in the core networking stack must be aware of the
-possibility that gso_size will be GSO_BY_FRAGS and handle that case
-appropriately.
-
-There are some helpers to make this easier:
-
-- skb_is_gso(skb) && skb_is_gso_sctp(skb) is the best way to see if
-  an skb is an SCTP GSO skb.
-
-- For size checks, the skb_gso_validate_*_len family of helpers correctly
-  considers GSO_BY_FRAGS.
-
-- For manipulating packets, skb_increase_gso_size and skb_decrease_gso_size
-  will check for GSO_BY_FRAGS and WARN if asked to manipulate these skbs.
-
-This also affects drivers with the NETIF_F_FRAGLIST & NETIF_F_GSO_SCTP bits
-set. Note also that NETIF_F_GSO_SCTP is included in NETIF_F_GSO_SOFTWARE.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 93f56fddd92a..4e671b46e767 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4296,7 +4296,7 @@ static inline bool skb_head_is_locked(const struct sk_buff *skb)
 /* Local Checksum Offload.
  * Compute outer checksum based on the assumption that the
  * inner checksum will be offloaded later.
- * See Documentation/networking/checksum-offloads.txt for
+ * See Documentation/networking/checksum-offloads.rst for
  * explanation of how this works.
  * Fill in outer checksum adjustment (e.g. with sum of outer
  * pseudo-header) before calling.
-- 
cgit v1.2.3


From 539cd352239f6acae4a7997bf31edca5c9a69c81 Mon Sep 17 00:00:00 2001
From: Zhikang Zhang <zhangzhikang1@huawei.com>
Date: Thu, 3 Jan 2019 20:06:38 +0800
Subject: f2fs: fix compile warnings: 'struct *' declared inside parameter list

We meet these compile warnings below, which caused by missing declare structs:
struct f2fs_io_info, struct extent, struct f2fs_sb_info.

warning: 'struct f2fs_io_info' declared inside parameter list
warning: 'struct extent_info' declared inside parameter list
warning: 'struct f2fs_sb_info' declared inside parameter list

Signed-off-by: Zhikang Zhang <zhangzhikang1@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 3ec73f17ee2a..8a28a2b1be74 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -149,6 +149,9 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 		{ CP_SPEC_LOG_NUM,	"log type is 2" },		\
 		{ CP_RECOVER_DIR,	"dir needs recovery" })
 
+struct f2fs_sb_info;
+struct f2fs_io_info;
+struct extent_info;
 struct victim_sel_policy;
 struct f2fs_map_blocks;
 
-- 
cgit v1.2.3


From 9ac6cb5fbb1781d120ca0ad29d014d35c9c3f0c4 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 19 Dec 2018 17:48:17 +0100
Subject: i2c: add suspended flag and accessors for i2c adapters

A few drivers open code the handling of suspended adapters. It could be
handled by the core, though, to ensure generic handling. This patch adds
the flag and accessor functions. The usage of these helpers is optional,
though. See the kerneldoc in this patch. Using the new flag, we now
reject further transfers if the adapter is already marked suspended.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/i2c/fault-codes |  4 ++++
 drivers/i2c/i2c-core-base.c   |  3 +++
 include/linux/i2c.h           | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes
index 47c25abb7d52..0cee0fc545b4 100644
--- a/Documentation/i2c/fault-codes
+++ b/Documentation/i2c/fault-codes
@@ -112,6 +112,10 @@ EPROTO
 	case is when the length of an SMBus block data response
 	(from the SMBus slave) is outside the range 1-32 bytes.
 
+ESHUTDOWN
+	Returned when a transfer was requested using an adapter
+	which is already suspended.
+
 ETIMEDOUT
 	This is returned by drivers when an operation took too much
 	time, and was aborted before it completed.
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 28460f6a60cc..926ca0a7477f 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1232,6 +1232,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 	if (!adap->lock_ops)
 		adap->lock_ops = &i2c_adapter_lock_ops;
 
+	adap->locked_flags = 0;
 	rt_mutex_init(&adap->bus_lock);
 	rt_mutex_init(&adap->mux_lock);
 	mutex_init(&adap->userspace_clients_lock);
@@ -1865,6 +1866,8 @@ int __i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 
 	if (WARN_ON(!msgs || num < 1))
 		return -EINVAL;
+	if (WARN_ON(test_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags)))
+		return -ESHUTDOWN;
 
 	if (adap->quirks && i2c_check_for_quirks(adap, msgs, num))
 		return -EOPNOTSUPP;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 65b4eaed1d96..cba59d66c00d 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -680,6 +680,8 @@ struct i2c_adapter {
 	int timeout;			/* in jiffies */
 	int retries;
 	struct device dev;		/* the adapter device */
+	unsigned long locked_flags;	/* owned by the I2C core */
+#define I2C_ALF_IS_SUSPENDED	0
 
 	int nr;
 	char name[48];
@@ -762,6 +764,38 @@ i2c_unlock_bus(struct i2c_adapter *adapter, unsigned int flags)
 	adapter->lock_ops->unlock_bus(adapter, flags);
 }
 
+/**
+ * i2c_mark_adapter_suspended - Report suspended state of the adapter to the core
+ * @adap: Adapter to mark as suspended
+ *
+ * When using this helper to mark an adapter as suspended, the core will reject
+ * further transfers to this adapter. The usage of this helper is optional but
+ * recommended for devices having distinct handlers for system suspend and
+ * runtime suspend. More complex devices are free to implement custom solutions
+ * to reject transfers when suspended.
+ */
+static inline void i2c_mark_adapter_suspended(struct i2c_adapter *adap)
+{
+	i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
+	set_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags);
+	i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
+}
+
+/**
+ * i2c_mark_adapter_resumed - Report resumed state of the adapter to the core
+ * @adap: Adapter to mark as resumed
+ *
+ * When using this helper to mark an adapter as resumed, the core will allow
+ * further transfers to this adapter. See also further notes to
+ * @i2c_mark_adapter_suspended().
+ */
+static inline void i2c_mark_adapter_resumed(struct i2c_adapter *adap)
+{
+	i2c_lock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
+	clear_bit(I2C_ALF_IS_SUSPENDED, &adap->locked_flags);
+	i2c_unlock_bus(adap, I2C_LOCK_ROOT_ADAPTER);
+}
+
 /*flags for the client struct: */
 #define I2C_CLIENT_PEC		0x04	/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN		0x10	/* we have a ten bit chip address */
-- 
cgit v1.2.3


From 991e873223e9bcfaf10a0074eec1507c1ddfb6ab Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 4 Dec 2018 11:20:29 +0000
Subject: soc: fsl: dpio: use a cpumask to identify which cpus are unused

The current implementation of the dpio driver uses a static next_cpu
variable to keep track of the index of the next cpu available. This
approach does not handle well unbinding and binding dpio devices in a
random order. For example, unbinding a dpio and then binding it again
with the driver, will generate the below error:

$ echo dpio.5 > /sys/bus/fsl-mc/drivers/fsl_mc_dpio/unbind
$ echo dpio.5 > /sys/bus/fsl-mc/drivers/fsl_mc_dpio/bind
[  103.946380] fsl_mc_dpio dpio.5: probe failed. Number of DPIOs exceeds
NR_CPUS.
[  103.955157] fsl_mc_dpio dpio.5: fsl_mc_driver_probe failed: -34
-bash: echo: write error: No such device

Fix this error by keeping a global cpumask of unused cpus that will be
updated at every dpaa2_dpio_[probe,remove].

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/soc/fsl/dpio/dpio-driver.c  | 25 ++++++++++++++++---------
 drivers/soc/fsl/dpio/dpio-service.c | 13 +++++++++++++
 include/soc/fsl/dpaa2-io.h          |  2 ++
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
index e58fcc9096e8..832175cac739 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -30,6 +30,8 @@ struct dpio_priv {
 	struct dpaa2_io *io;
 };
 
+static cpumask_var_t cpus_unused_mask;
+
 static irqreturn_t dpio_irq_handler(int irq_num, void *arg)
 {
 	struct device *dev = (struct device *)arg;
@@ -86,7 +88,7 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
 	struct dpio_priv *priv;
 	int err = -ENOMEM;
 	struct device *dev = &dpio_dev->dev;
-	static int next_cpu = -1;
+	int possible_next_cpu;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -128,17 +130,14 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
 	desc.dpio_id = dpio_dev->obj_desc.id;
 
 	/* get the cpu to use for the affinity hint */
-	if (next_cpu == -1)
-		next_cpu = cpumask_first(cpu_online_mask);
-	else
-		next_cpu = cpumask_next(next_cpu, cpu_online_mask);
-
-	if (!cpu_possible(next_cpu)) {
+	possible_next_cpu = cpumask_first(cpus_unused_mask);
+	if (possible_next_cpu >= nr_cpu_ids) {
 		dev_err(dev, "probe failed. Number of DPIOs exceeds NR_CPUS.\n");
 		err = -ERANGE;
 		goto err_allocate_irqs;
 	}
-	desc.cpu = next_cpu;
+	desc.cpu = possible_next_cpu;
+	cpumask_clear_cpu(possible_next_cpu, cpus_unused_mask);
 
 	/*
 	 * Set the CENA regs to be the cache inhibited area of the portal to
@@ -211,7 +210,7 @@ static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
 {
 	struct device *dev;
 	struct dpio_priv *priv;
-	int err;
+	int err = 0, cpu;
 
 	dev = &dpio_dev->dev;
 	priv = dev_get_drvdata(dev);
@@ -220,6 +219,9 @@ static int dpaa2_dpio_remove(struct fsl_mc_device *dpio_dev)
 
 	dpio_teardown_irqs(dpio_dev);
 
+	cpu = dpaa2_io_get_cpu(priv->io);
+	cpumask_set_cpu(cpu, cpus_unused_mask);
+
 	err = fsl_mc_portal_allocate(dpio_dev, 0, &dpio_dev->mc_io);
 	if (err) {
 		dev_err(dev, "MC portal allocation failed\n");
@@ -267,11 +269,16 @@ static struct fsl_mc_driver dpaa2_dpio_driver = {
 
 static int dpio_driver_init(void)
 {
+	if (!zalloc_cpumask_var(&cpus_unused_mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_copy(cpus_unused_mask, cpu_online_mask);
+
 	return fsl_mc_driver_register(&dpaa2_dpio_driver);
 }
 
 static void dpio_driver_exit(void)
 {
+	free_cpumask_var(cpus_unused_mask);
 	fsl_mc_driver_unregister(&dpaa2_dpio_driver);
 }
 module_init(dpio_driver_init);
diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
index ab046f241d32..c02bfb18a1f0 100644
--- a/drivers/soc/fsl/dpio/dpio-service.c
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -214,6 +214,19 @@ done:
 	return IRQ_HANDLED;
 }
 
+/**
+ * dpaa2_io_get_cpu() - get the cpu associated with a given DPIO object
+ *
+ * @d: the given DPIO object.
+ *
+ * Return the cpu associated with the DPIO object
+ */
+int dpaa2_io_get_cpu(struct dpaa2_io *d)
+{
+	return d->dpio_desc.cpu;
+}
+EXPORT_SYMBOL(dpaa2_io_get_cpu);
+
 /**
  * dpaa2_io_service_register() - Prepare for servicing of FQDAN or CDAN
  *                               notifications on the given DPIO service.
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
index 3fbd71c27ba3..243119c53cdf 100644
--- a/include/soc/fsl/dpaa2-io.h
+++ b/include/soc/fsl/dpaa2-io.h
@@ -90,6 +90,8 @@ struct dpaa2_io_notification_ctx {
 	void *dpio_private;
 };
 
+int dpaa2_io_get_cpu(struct dpaa2_io *d);
+
 int dpaa2_io_service_register(struct dpaa2_io *service,
 			      struct dpaa2_io_notification_ctx *ctx);
 void dpaa2_io_service_deregister(struct dpaa2_io *service,
-- 
cgit v1.2.3


From 47008e5161fa097ce9b848dee194b43262b743a5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 19 Sep 2018 16:13:25 -0700
Subject: LSM: Introduce LSM_FLAG_LEGACY_MAJOR

This adds a flag for the current "major" LSMs to distinguish them when
we have a universal method for ordering all LSMs. It's called "legacy"
since the distinction of "major" will go away in the blob-sharing world.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
---
 include/linux/lsm_hooks.h  | 3 +++
 security/apparmor/lsm.c    | 1 +
 security/selinux/hooks.c   | 1 +
 security/smack/smack_lsm.c | 1 +
 security/tomoyo/tomoyo.c   | 1 +
 5 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9a0bdf91e646..318d93f918c3 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2042,8 +2042,11 @@ extern char *lsm_names;
 extern void security_add_hooks(struct security_hook_list *hooks, int count,
 				char *lsm);
 
+#define LSM_FLAG_LEGACY_MAJOR	BIT(0)
+
 struct lsm_info {
 	const char *name;	/* Required. */
+	unsigned long flags;	/* Optional: flags describing LSM */
 	int (*init)(void);	/* Required. */
 };
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 2c010874329f..e49c50e0d5ab 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1729,5 +1729,6 @@ alloc_out:
 
 DEFINE_LSM(apparmor) = {
 	.name = "apparmor",
+	.flags = LSM_FLAG_LEGACY_MAJOR,
 	.init = apparmor_init,
 };
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f0e36c3492ba..41908d2d6149 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6999,6 +6999,7 @@ void selinux_complete_init(void)
    all processes and objects when they are created. */
 DEFINE_LSM(selinux) = {
 	.name = "selinux",
+	.flags = LSM_FLAG_LEGACY_MAJOR,
 	.init = selinux_init,
 };
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 430d4f35e55c..d72d215d7fde 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4812,5 +4812,6 @@ static __init int smack_init(void)
  */
 DEFINE_LSM(smack) = {
 	.name = "smack",
+	.flags = LSM_FLAG_LEGACY_MAJOR,
 	.init = smack_init,
 };
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 1b5b5097efd7..09f7af130d3a 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -552,5 +552,6 @@ static int __init tomoyo_init(void)
 
 DEFINE_LSM(tomoyo) = {
 	.name = "tomoyo",
+	.flags = LSM_FLAG_LEGACY_MAJOR,
 	.init = tomoyo_init,
 };
-- 
cgit v1.2.3


From c5459b829b716dafd226ad270f25c9a3050f7586 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 13 Sep 2018 22:28:48 -0700
Subject: LSM: Plumb visibility into optional "enabled" state

In preparation for lifting the "is this LSM enabled?" logic out of the
individual LSMs, pass in any special enabled state tracking (as needed
for SELinux, AppArmor, and LoadPin). This should be an "int" to include
handling any future cases where "enabled" is exposed via sysctl which
has no "bool" type.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
---
 include/linux/lsm_hooks.h | 1 +
 security/apparmor/lsm.c   | 5 +++--
 security/selinux/hooks.c  | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 318d93f918c3..7bbe5e287161 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2047,6 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 struct lsm_info {
 	const char *name;	/* Required. */
 	unsigned long flags;	/* Optional: flags describing LSM */
+	int *enabled;		/* Optional: NULL means enabled. */
 	int (*init)(void);	/* Required. */
 };
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index e49c50e0d5ab..a4652ff622cf 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1333,8 +1333,8 @@ bool aa_g_paranoid_load = true;
 module_param_named(paranoid_load, aa_g_paranoid_load, aabool, S_IRUGO);
 
 /* Boot time disable flag */
-static bool apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
-module_param_named(enabled, apparmor_enabled, bool, S_IRUGO);
+static int apparmor_enabled = CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE;
+module_param_named(enabled, apparmor_enabled, int, 0444);
 
 static int __init apparmor_enabled_setup(char *str)
 {
@@ -1730,5 +1730,6 @@ alloc_out:
 DEFINE_LSM(apparmor) = {
 	.name = "apparmor",
 	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.enabled = &apparmor_enabled,
 	.init = apparmor_init,
 };
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 41908d2d6149..f847514d6f03 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -7000,6 +7000,7 @@ void selinux_complete_init(void)
 DEFINE_LSM(selinux) = {
 	.name = "selinux",
 	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.enabled = &selinux_enabled,
 	.init = selinux_init,
 };
 
-- 
cgit v1.2.3


From f4941d75b9cba5e1fae1aebe0139dcca0703a294 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 13 Sep 2018 23:17:50 -0700
Subject: LSM: Lift LSM selection out of individual LSMs

As a prerequisite to adjusting LSM selection logic in the future, this
moves the selection logic up out of the individual major LSMs, making
their init functions only run when actually enabled. This considers all
LSMs enabled by default unless they specified an external "enable"
variable.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: John Johansen <john.johansen@canonical.com>
---
 include/linux/lsm_hooks.h  |   1 -
 security/apparmor/lsm.c    |   6 ---
 security/security.c        | 102 +++++++++++++++++++++++++++++++--------------
 security/selinux/hooks.c   |  10 -----
 security/smack/smack_lsm.c |   3 --
 security/tomoyo/tomoyo.c   |   2 -
 6 files changed, 71 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 7bbe5e287161..be1581d18e3e 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2088,7 +2088,6 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 #define __lsm_ro_after_init	__ro_after_init
 #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */
 
-extern int __init security_module_enable(const char *module);
 extern void __init capability_add_hooks(void);
 #ifdef CONFIG_SECURITY_YAMA
 extern void __init yama_add_hooks(void);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index a4652ff622cf..dfc5fbf8ba82 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1663,12 +1663,6 @@ static int __init apparmor_init(void)
 {
 	int error;
 
-	if (!apparmor_enabled || !security_module_enable("apparmor")) {
-		aa_info_message("AppArmor disabled by boot time parameter");
-		apparmor_enabled = false;
-		return 0;
-	}
-
 	aa_secids_init();
 
 	error = aa_setup_dfa_engine();
diff --git a/security/security.c b/security/security.c
index 6bc591f77b1a..c900d7a1441a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -52,33 +52,96 @@ static __initdata bool debug;
 			pr_info(__VA_ARGS__);			\
 	} while (0)
 
+static bool __init is_enabled(struct lsm_info *lsm)
+{
+	if (!lsm->enabled || *lsm->enabled)
+		return true;
+
+	return false;
+}
+
+/* Mark an LSM's enabled flag. */
+static int lsm_enabled_true __initdata = 1;
+static int lsm_enabled_false __initdata = 0;
+static void __init set_enabled(struct lsm_info *lsm, bool enabled)
+{
+	/*
+	 * When an LSM hasn't configured an enable variable, we can use
+	 * a hard-coded location for storing the default enabled state.
+	 */
+	if (!lsm->enabled) {
+		if (enabled)
+			lsm->enabled = &lsm_enabled_true;
+		else
+			lsm->enabled = &lsm_enabled_false;
+	} else if (lsm->enabled == &lsm_enabled_true) {
+		if (!enabled)
+			lsm->enabled = &lsm_enabled_false;
+	} else if (lsm->enabled == &lsm_enabled_false) {
+		if (enabled)
+			lsm->enabled = &lsm_enabled_true;
+	} else {
+		*lsm->enabled = enabled;
+	}
+}
+
+/* Is an LSM allowed to be initialized? */
+static bool __init lsm_allowed(struct lsm_info *lsm)
+{
+	/* Skip if the LSM is disabled. */
+	if (!is_enabled(lsm))
+		return false;
+
+	/* Skip major-specific checks if not a major LSM. */
+	if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0)
+		return true;
+
+	/* Disabled if this LSM isn't the chosen one. */
+	if (strcmp(lsm->name, chosen_lsm) != 0)
+		return false;
+
+	return true;
+}
+
+/* Check if LSM should be initialized. */
+static void __init maybe_initialize_lsm(struct lsm_info *lsm)
+{
+	int enabled = lsm_allowed(lsm);
+
+	/* Record enablement (to handle any following exclusive LSMs). */
+	set_enabled(lsm, enabled);
+
+	/* If selected, initialize the LSM. */
+	if (enabled) {
+		int ret;
+
+		init_debug("initializing %s\n", lsm->name);
+		ret = lsm->init();
+		WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret);
+	}
+}
+
 static void __init ordered_lsm_init(void)
 {
 	struct lsm_info *lsm;
-	int ret;
 
 	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
 		if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) != 0)
 			continue;
 
-		init_debug("initializing %s\n", lsm->name);
-		ret = lsm->init();
-		WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret);
+		maybe_initialize_lsm(lsm);
 	}
 }
 
 static void __init major_lsm_init(void)
 {
 	struct lsm_info *lsm;
-	int ret;
 
 	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
 		if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0)
 			continue;
 
-		init_debug("initializing %s\n", lsm->name);
-		ret = lsm->init();
-		WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret);
+		maybe_initialize_lsm(lsm);
 	}
 }
 
@@ -168,29 +231,6 @@ static int lsm_append(char *new, char **result)
 	return 0;
 }
 
-/**
- * security_module_enable - Load given security module on boot ?
- * @module: the name of the module
- *
- * Each LSM must pass this method before registering its own operations
- * to avoid security registration races. This method may also be used
- * to check if your LSM is currently loaded during kernel initialization.
- *
- * Returns:
- *
- * true if:
- *
- * - The passed LSM is the one chosen by user at boot time,
- * - or the passed LSM is configured as the default and the user did not
- *   choose an alternate LSM at boot time.
- *
- * Otherwise, return false.
- */
-int __init security_module_enable(const char *module)
-{
-	return !strcmp(module, chosen_lsm);
-}
-
 /**
  * security_add_hooks - Add a modules hooks to the hook lists.
  * @hooks: the hooks to add
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f847514d6f03..0f8ae2fbd14a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6928,16 +6928,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 
 static __init int selinux_init(void)
 {
-	if (!security_module_enable("selinux")) {
-		selinux_enabled = 0;
-		return 0;
-	}
-
-	if (!selinux_enabled) {
-		pr_info("SELinux:  Disabled at boot.\n");
-		return 0;
-	}
-
 	pr_info("SELinux:  Initializing.\n");
 
 	memset(&selinux_state, 0, sizeof(selinux_state));
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index d72d215d7fde..580e9d6e5680 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4762,9 +4762,6 @@ static __init int smack_init(void)
 	struct cred *cred;
 	struct task_smack *tsp;
 
-	if (!security_module_enable("smack"))
-		return 0;
-
 	smack_inode_cache = KMEM_CACHE(inode_smack, 0);
 	if (!smack_inode_cache)
 		return -ENOMEM;
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 09f7af130d3a..a46f6bc1e97c 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -540,8 +540,6 @@ static int __init tomoyo_init(void)
 {
 	struct cred *cred = (struct cred *) current_cred();
 
-	if (!security_module_enable("tomoyo"))
-		return 0;
 	/* register ourselves with the security framework */
 	security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo");
 	printk(KERN_INFO "TOMOYO Linux initialized\n");
-- 
cgit v1.2.3


From a8027fb0d188599ccdb2096f49f708bae04d86c4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 9 Oct 2018 14:42:57 -0700
Subject: LSM: Tie enabling logic to presence in ordered list

Until now, any LSM without an enable storage variable was considered
enabled. This inverts the logic and sets defaults to true only if the
LSM gets added to the ordered initialization list. (And an exception
continues for the major LSMs until they are integrated into the ordered
initialization in a later patch.)

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h |  2 +-
 security/security.c       | 14 +++++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index be1581d18e3e..e28a3aa639e8 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2047,7 +2047,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 struct lsm_info {
 	const char *name;	/* Required. */
 	unsigned long flags;	/* Optional: flags describing LSM */
-	int *enabled;		/* Optional: NULL means enabled. */
+	int *enabled;		/* Optional: controlled by CONFIG_LSM */
 	int (*init)(void);	/* Required. */
 };
 
diff --git a/security/security.c b/security/security.c
index 2e1f48e8a6f2..b6d3456978a4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -63,10 +63,10 @@ static __initdata bool debug;
 
 static bool __init is_enabled(struct lsm_info *lsm)
 {
-	if (!lsm->enabled || *lsm->enabled)
-		return true;
+	if (!lsm->enabled)
+		return false;
 
-	return false;
+	return *lsm->enabled;
 }
 
 /* Mark an LSM's enabled flag. */
@@ -117,7 +117,11 @@ static void __init append_ordered_lsm(struct lsm_info *lsm, const char *from)
 	if (WARN(last_lsm == LSM_COUNT, "%s: out of LSM slots!?\n", from))
 		return;
 
+	/* Enable this LSM, if it is not already set. */
+	if (!lsm->enabled)
+		lsm->enabled = &lsm_enabled_true;
 	ordered_lsms[last_lsm++] = lsm;
+
 	init_debug("%s ordering: %s (%sabled)\n", from, lsm->name,
 		   is_enabled(lsm) ? "en" : "dis");
 }
@@ -210,6 +214,10 @@ static void __init major_lsm_init(void)
 		if ((lsm->flags & LSM_FLAG_LEGACY_MAJOR) == 0)
 			continue;
 
+		/* Enable this LSM, if it is not already set. */
+		if (!lsm->enabled)
+			lsm->enabled = &lsm_enabled_true;
+
 		maybe_initialize_lsm(lsm);
 	}
 }
-- 
cgit v1.2.3


From 14bd99c821f7ace0e8110a1bfdfaa27e1788e20f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 19 Sep 2018 19:57:06 -0700
Subject: LSM: Separate idea of "major" LSM from "exclusive" LSM

In order to both support old "security=" Legacy Major LSM selection, and
handling real exclusivity, this creates LSM_FLAG_EXCLUSIVE and updates
the selection logic to handle them.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_hooks.h  |  1 +
 security/apparmor/lsm.c    |  2 +-
 security/security.c        | 12 ++++++++++++
 security/selinux/hooks.c   |  2 +-
 security/smack/smack_lsm.c |  2 +-
 security/tomoyo/tomoyo.c   |  2 +-
 6 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index e28a3aa639e8..c3843b33da9e 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2043,6 +2043,7 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 				char *lsm);
 
 #define LSM_FLAG_LEGACY_MAJOR	BIT(0)
+#define LSM_FLAG_EXCLUSIVE	BIT(1)
 
 struct lsm_info {
 	const char *name;	/* Required. */
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index dfc5fbf8ba82..149a3e16b5da 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1723,7 +1723,7 @@ alloc_out:
 
 DEFINE_LSM(apparmor) = {
 	.name = "apparmor",
-	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.enabled = &apparmor_enabled,
 	.init = apparmor_init,
 };
diff --git a/security/security.c b/security/security.c
index 88de6b073246..a8dd7defe30a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -49,6 +49,7 @@ static __initconst const char * const builtin_lsm_order = CONFIG_LSM;
 
 /* Ordered list of LSMs to initialize. */
 static __initdata struct lsm_info **ordered_lsms;
+static __initdata struct lsm_info *exclusive;
 
 static __initdata bool debug;
 #define init_debug(...)						\
@@ -129,6 +130,12 @@ static bool __init lsm_allowed(struct lsm_info *lsm)
 	if (!is_enabled(lsm))
 		return false;
 
+	/* Not allowed if another exclusive LSM already initialized. */
+	if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && exclusive) {
+		init_debug("exclusive disabled: %s\n", lsm->name);
+		return false;
+	}
+
 	return true;
 }
 
@@ -144,6 +151,11 @@ static void __init maybe_initialize_lsm(struct lsm_info *lsm)
 	if (enabled) {
 		int ret;
 
+		if ((lsm->flags & LSM_FLAG_EXCLUSIVE) && !exclusive) {
+			exclusive = lsm;
+			init_debug("exclusive chosen: %s\n", lsm->name);
+		}
+
 		init_debug("initializing %s\n", lsm->name);
 		ret = lsm->init();
 		WARN(ret, "%s failed to initialize: %d\n", lsm->name, ret);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 0f8ae2fbd14a..49865f119b16 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -6989,7 +6989,7 @@ void selinux_complete_init(void)
    all processes and objects when they are created. */
 DEFINE_LSM(selinux) = {
 	.name = "selinux",
-	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.enabled = &selinux_enabled,
 	.init = selinux_init,
 };
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 580e9d6e5680..780733341d02 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4809,6 +4809,6 @@ static __init int smack_init(void)
  */
 DEFINE_LSM(smack) = {
 	.name = "smack",
-	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.init = smack_init,
 };
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index a46f6bc1e97c..daff7d7897ad 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -550,6 +550,6 @@ static int __init tomoyo_init(void)
 
 DEFINE_LSM(tomoyo) = {
 	.name = "tomoyo",
-	.flags = LSM_FLAG_LEGACY_MAJOR,
+	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.init = tomoyo_init,
 };
-- 
cgit v1.2.3


From 70b62c25665f636c9f6c700b26af7df296b0887e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 14 Sep 2018 15:26:37 -0700
Subject: LoadPin: Initialize as ordered LSM

This converts LoadPin from being a direct "minor" LSM into an ordered LSM.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_hooks.h  |  5 -----
 security/Kconfig           | 39 +--------------------------------------
 security/loadpin/loadpin.c |  8 +++++++-
 security/security.c        |  1 -
 4 files changed, 8 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index c3843b33da9e..fb1a653ccfcb 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2095,10 +2095,5 @@ extern void __init yama_add_hooks(void);
 #else
 static inline void __init yama_add_hooks(void) { }
 #endif
-#ifdef CONFIG_SECURITY_LOADPIN
-void __init loadpin_add_hooks(void);
-#else
-static inline void loadpin_add_hooks(void) { };
-#endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/Kconfig b/security/Kconfig
index cedf69e8a22c..2cd737ba7660 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -239,46 +239,9 @@ source "security/yama/Kconfig"
 
 source "security/integrity/Kconfig"
 
-choice
-	prompt "Default security module"
-	default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX
-	default DEFAULT_SECURITY_SMACK if SECURITY_SMACK
-	default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO
-	default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR
-	default DEFAULT_SECURITY_DAC
-
-	help
-	  Select the security module that will be used by default if the
-	  kernel parameter security= is not specified.
-
-	config DEFAULT_SECURITY_SELINUX
-		bool "SELinux" if SECURITY_SELINUX=y
-
-	config DEFAULT_SECURITY_SMACK
-		bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y
-
-	config DEFAULT_SECURITY_TOMOYO
-		bool "TOMOYO" if SECURITY_TOMOYO=y
-
-	config DEFAULT_SECURITY_APPARMOR
-		bool "AppArmor" if SECURITY_APPARMOR=y
-
-	config DEFAULT_SECURITY_DAC
-		bool "Unix Discretionary Access Controls"
-
-endchoice
-
-config DEFAULT_SECURITY
-	string
-	default "selinux" if DEFAULT_SECURITY_SELINUX
-	default "smack" if DEFAULT_SECURITY_SMACK
-	default "tomoyo" if DEFAULT_SECURITY_TOMOYO
-	default "apparmor" if DEFAULT_SECURITY_APPARMOR
-	default "" if DEFAULT_SECURITY_DAC
-
 config LSM
 	string "Ordered list of enabled LSMs"
-	default "integrity"
+	default "loadpin,integrity,selinux,smack,tomoyo,apparmor"
 	help
 	  A comma-separated list of LSMs, in initialization order.
 	  Any LSMs left off this list will be ignored. This can be
diff --git a/security/loadpin/loadpin.c b/security/loadpin/loadpin.c
index 48f39631b370..055fb0a64169 100644
--- a/security/loadpin/loadpin.c
+++ b/security/loadpin/loadpin.c
@@ -187,13 +187,19 @@ static struct security_hook_list loadpin_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(kernel_load_data, loadpin_load_data),
 };
 
-void __init loadpin_add_hooks(void)
+static int __init loadpin_init(void)
 {
 	pr_info("ready to pin (currently %senforcing)\n",
 		enforce ? "" : "not ");
 	security_add_hooks(loadpin_hooks, ARRAY_SIZE(loadpin_hooks), "loadpin");
+	return 0;
 }
 
+DEFINE_LSM(loadpin) = {
+	.name = "loadpin",
+	.init = loadpin_init,
+};
+
 /* Should not be mutable after boot, so not listed in sysfs (perm == 0). */
 module_param(enforce, int, 0);
 MODULE_PARM_DESC(enforce, "Enforce module/firmware pinning");
diff --git a/security/security.c b/security/security.c
index 46c5b0fa515e..b8d75f5a948d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -275,7 +275,6 @@ int __init security_init(void)
 	 */
 	capability_add_hooks();
 	yama_add_hooks();
-	loadpin_add_hooks();
 
 	/* Load LSMs in specified order. */
 	ordered_lsm_init();
-- 
cgit v1.2.3


From d6aed64b74b73b64278c059eacd59d87167aa968 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 14 Sep 2018 15:37:20 -0700
Subject: Yama: Initialize as ordered LSM

This converts Yama from being a direct "minor" LSM into an ordered LSM.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_hooks.h | 5 -----
 security/Kconfig          | 2 +-
 security/security.c       | 1 -
 security/yama/yama_lsm.c  | 8 +++++++-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index fb1a653ccfcb..2849e9b2c01d 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2090,10 +2090,5 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */
 
 extern void __init capability_add_hooks(void);
-#ifdef CONFIG_SECURITY_YAMA
-extern void __init yama_add_hooks(void);
-#else
-static inline void __init yama_add_hooks(void) { }
-#endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/Kconfig b/security/Kconfig
index 2cd737ba7660..78dc12b7eeb3 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -241,7 +241,7 @@ source "security/integrity/Kconfig"
 
 config LSM
 	string "Ordered list of enabled LSMs"
-	default "loadpin,integrity,selinux,smack,tomoyo,apparmor"
+	default "yama,loadpin,integrity,selinux,smack,tomoyo,apparmor"
 	help
 	  A comma-separated list of LSMs, in initialization order.
 	  Any LSMs left off this list will be ignored. This can be
diff --git a/security/security.c b/security/security.c
index b8d75f5a948d..35f93b7c585b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -274,7 +274,6 @@ int __init security_init(void)
 	 * Load minor LSMs, with the capability module always first.
 	 */
 	capability_add_hooks();
-	yama_add_hooks();
 
 	/* Load LSMs in specified order. */
 	ordered_lsm_init();
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index ffda91a4a1aa..eb1da1303d2e 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -477,9 +477,15 @@ static void __init yama_init_sysctl(void)
 static inline void yama_init_sysctl(void) { }
 #endif /* CONFIG_SYSCTL */
 
-void __init yama_add_hooks(void)
+static int __init yama_init(void)
 {
 	pr_info("Yama: becoming mindful.\n");
 	security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks), "yama");
 	yama_init_sysctl();
+	return 0;
 }
+
+DEFINE_LSM(yama) = {
+	.name = "yama",
+	.init = yama_init,
+};
-- 
cgit v1.2.3


From e2bc445b66cad25b0627391df8138a83d0e48f97 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 19 Sep 2018 17:48:21 -0700
Subject: LSM: Introduce enum lsm_order

In preparation for distinguishing the "capability" LSM from other LSMs, it
must be ordered first. This introduces LSM_ORDER_MUTABLE for the general
LSMs and LSM_ORDER_FIRST for capability. In the future LSM_ORDER_LAST
for could be added for anything that must run last (e.g. Landlock may
use this).

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h | 6 ++++++
 security/security.c       | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 2849e9b2c01d..27d4db9588bb 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2045,8 +2045,14 @@ extern void security_add_hooks(struct security_hook_list *hooks, int count,
 #define LSM_FLAG_LEGACY_MAJOR	BIT(0)
 #define LSM_FLAG_EXCLUSIVE	BIT(1)
 
+enum lsm_order {
+	LSM_ORDER_FIRST = -1,	/* This is only for capabilities. */
+	LSM_ORDER_MUTABLE = 0,
+};
+
 struct lsm_info {
 	const char *name;	/* Required. */
+	enum lsm_order order;	/* Optional: default is LSM_ORDER_MUTABLE */
 	unsigned long flags;	/* Optional: flags describing LSM */
 	int *enabled;		/* Optional: controlled by CONFIG_LSM */
 	int (*init)(void);	/* Required. */
diff --git a/security/security.c b/security/security.c
index 35f93b7c585b..8b673bb2a0dd 100644
--- a/security/security.c
+++ b/security/security.c
@@ -174,6 +174,12 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 	struct lsm_info *lsm;
 	char *sep, *name, *next;
 
+	/* LSM_ORDER_FIRST is always first. */
+	for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
+		if (lsm->order == LSM_ORDER_FIRST)
+			append_ordered_lsm(lsm, "first");
+	}
+
 	/* Process "security=", if given. */
 	if (chosen_major_lsm) {
 		struct lsm_info *major;
@@ -202,7 +208,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 		bool found = false;
 
 		for (lsm = __start_lsm_info; lsm < __end_lsm_info; lsm++) {
-			if (strcmp(lsm->name, name) == 0) {
+			if (lsm->order == LSM_ORDER_MUTABLE &&
+			    strcmp(lsm->name, name) == 0) {
 				append_ordered_lsm(lsm, origin);
 				found = true;
 			}
-- 
cgit v1.2.3


From d117a154e6128abac5409d3f173584e7b25981a2 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 14 Sep 2018 15:40:45 -0700
Subject: capability: Initialize as LSM_ORDER_FIRST

This converts capabilities to use the new LSM_ORDER_FIRST position.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Casey Schaufler <casey@schaufler-ca.com>
---
 include/linux/lsm_hooks.h | 2 --
 security/commoncap.c      | 9 ++++++++-
 security/security.c       | 5 -----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 27d4db9588bb..0c908c091a03 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2095,6 +2095,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 #define __lsm_ro_after_init	__ro_after_init
 #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */
 
-extern void __init capability_add_hooks(void);
-
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/commoncap.c b/security/commoncap.c
index 232db019f051..52e04136bfa8 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1362,10 +1362,17 @@ struct security_hook_list capability_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
 };
 
-void __init capability_add_hooks(void)
+static int __init capability_init(void)
 {
 	security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
 				"capability");
+	return 0;
 }
 
+DEFINE_LSM(capability) = {
+	.name = "capability",
+	.order = LSM_ORDER_FIRST,
+	.init = capability_init,
+};
+
 #endif /* CONFIG_SECURITY */
diff --git a/security/security.c b/security/security.c
index 8b673bb2a0dd..9411f659454b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -277,11 +277,6 @@ int __init security_init(void)
 	     i++)
 		INIT_HLIST_HEAD(&list[i]);
 
-	/*
-	 * Load minor LSMs, with the capability module always first.
-	 */
-	capability_add_hooks();
-
 	/* Load LSMs in specified order. */
 	ordered_lsm_init();
 
-- 
cgit v1.2.3


From 6d9c939dbe4d0bcea09cd4b410f624cde1acb678 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 21 Sep 2018 17:16:59 -0700
Subject: procfs: add smack subdir to attrs

Back in 2007 I made what turned out to be a rather serious
mistake in the implementation of the Smack security module.
The SELinux module used an interface in /proc to manipulate
the security context on processes. Rather than use a similar
interface, I used the same interface. The AppArmor team did
likewise. Now /proc/.../attr/current will tell you the
security "context" of the process, but it will be different
depending on the security module you're using.

This patch provides a subdirectory in /proc/.../attr for
Smack. Smack user space can use the "current" file in
this subdirectory and never have to worry about getting
SELinux attributes by mistake. Programs that use the
old interface will continue to work (or fail, as the case
may be) as before.

The proposed S.A.R.A security module is dependent on
the mechanism to create its own attr subdirectory.

The original implementation is by Kees Cook.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/LSM/index.rst | 13 +++++--
 fs/proc/base.c                          | 64 ++++++++++++++++++++++++++++-----
 fs/proc/internal.h                      |  1 +
 include/linux/security.h                | 15 +++++---
 security/security.c                     | 24 ++++++++++---
 5 files changed, 96 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/LSM/index.rst b/Documentation/admin-guide/LSM/index.rst
index c980dfe9abf1..9842e21afd4a 100644
--- a/Documentation/admin-guide/LSM/index.rst
+++ b/Documentation/admin-guide/LSM/index.rst
@@ -17,9 +17,8 @@ MAC extensions, other extensions can be built using the LSM to provide
 specific changes to system operation when these tweaks are not available
 in the core functionality of Linux itself.
 
-Without a specific LSM built into the kernel, the default LSM will be the
-Linux capabilities system. Most LSMs choose to extend the capabilities
-system, building their checks on top of the defined capability hooks.
+The Linux capabilities modules will always be included. This may be
+followed by any number of "minor" modules and at most one "major" module.
 For more details on capabilities, see ``capabilities(7)`` in the Linux
 man-pages project.
 
@@ -30,6 +29,14 @@ order in which checks are made. The capability module will always
 be first, followed by any "minor" modules (e.g. Yama) and then
 the one "major" module (e.g. SELinux) if there is one configured.
 
+Process attributes associated with "major" security modules should
+be accessed and maintained using the special files in ``/proc/.../attr``.
+A security module may maintain a module specific subdirectory there,
+named after the module. ``/proc/.../attr/smack`` is provided by the Smack
+security module and contains all its special files. The files directly
+in ``/proc/.../attr`` remain as legacy interfaces for modules that provide
+subdirectories.
+
 .. toctree::
    :maxdepth: 1
 
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 633a63462573..c9d775fd24ef 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -140,9 +140,13 @@ struct pid_entry {
 #define REG(NAME, MODE, fops)				\
 	NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
 #define ONE(NAME, MODE, show)				\
-	NOD(NAME, (S_IFREG|(MODE)), 			\
+	NOD(NAME, (S_IFREG|(MODE)),			\
 		NULL, &proc_single_file_operations,	\
 		{ .proc_show = show } )
+#define ATTR(LSM, NAME, MODE)				\
+	NOD(NAME, (S_IFREG|(MODE)),			\
+		NULL, &proc_pid_attr_operations,	\
+		{ .lsm = LSM })
 
 /*
  * Count the number of hardlinks for the pid_entry table, excluding the .
@@ -2525,7 +2529,7 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 	if (!task)
 		return -ESRCH;
 
-	length = security_getprocattr(task,
+	length = security_getprocattr(task, PROC_I(inode)->op.lsm,
 				      (char*)file->f_path.dentry->d_name.name,
 				      &p);
 	put_task_struct(task);
@@ -2574,7 +2578,9 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 	if (rv < 0)
 		goto out_free;
 
-	rv = security_setprocattr(file->f_path.dentry->d_name.name, page, count);
+	rv = security_setprocattr(PROC_I(inode)->op.lsm,
+				  file->f_path.dentry->d_name.name, page,
+				  count);
 	mutex_unlock(&current->signal->cred_guard_mutex);
 out_free:
 	kfree(page);
@@ -2588,13 +2594,53 @@ static const struct file_operations proc_pid_attr_operations = {
 	.llseek		= generic_file_llseek,
 };
 
+#define LSM_DIR_OPS(LSM) \
+static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
+			     struct dir_context *ctx) \
+{ \
+	return proc_pident_readdir(filp, ctx, \
+				   LSM##_attr_dir_stuff, \
+				   ARRAY_SIZE(LSM##_attr_dir_stuff)); \
+} \
+\
+static const struct file_operations proc_##LSM##_attr_dir_ops = { \
+	.read		= generic_read_dir, \
+	.iterate	= proc_##LSM##_attr_dir_iterate, \
+	.llseek		= default_llseek, \
+}; \
+\
+static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
+				struct dentry *dentry, unsigned int flags) \
+{ \
+	return proc_pident_lookup(dir, dentry, \
+				  LSM##_attr_dir_stuff, \
+				  ARRAY_SIZE(LSM##_attr_dir_stuff)); \
+} \
+\
+static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
+	.lookup		= proc_##LSM##_attr_dir_lookup, \
+	.getattr	= pid_getattr, \
+	.setattr	= proc_setattr, \
+}
+
+#ifdef CONFIG_SECURITY_SMACK
+static const struct pid_entry smack_attr_dir_stuff[] = {
+	ATTR("smack", "current",	0666),
+};
+LSM_DIR_OPS(smack);
+#endif
+
 static const struct pid_entry attr_dir_stuff[] = {
-	REG("current",    S_IRUGO|S_IWUGO, proc_pid_attr_operations),
-	REG("prev",       S_IRUGO,	   proc_pid_attr_operations),
-	REG("exec",       S_IRUGO|S_IWUGO, proc_pid_attr_operations),
-	REG("fscreate",   S_IRUGO|S_IWUGO, proc_pid_attr_operations),
-	REG("keycreate",  S_IRUGO|S_IWUGO, proc_pid_attr_operations),
-	REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
+	ATTR(NULL, "current",		0666),
+	ATTR(NULL, "prev",		0444),
+	ATTR(NULL, "exec",		0666),
+	ATTR(NULL, "fscreate",		0666),
+	ATTR(NULL, "keycreate",		0666),
+	ATTR(NULL, "sockcreate",	0666),
+#ifdef CONFIG_SECURITY_SMACK
+	DIR("smack",			0555,
+	    proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops),
+#endif
 };
 
 static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5185d7f6a51e..d4f9989063d0 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -81,6 +81,7 @@ union proc_op {
 	int (*proc_show)(struct seq_file *m,
 		struct pid_namespace *ns, struct pid *pid,
 		struct task_struct *task);
+	const char *lsm;
 };
 
 struct proc_inode {
diff --git a/include/linux/security.h b/include/linux/security.h
index dbfb5a66babb..b2c5333ed4b5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -366,8 +366,10 @@ int security_sem_semctl(struct kern_ipc_perm *sma, int cmd);
 int security_sem_semop(struct kern_ipc_perm *sma, struct sembuf *sops,
 			unsigned nsops, int alter);
 void security_d_instantiate(struct dentry *dentry, struct inode *inode);
-int security_getprocattr(struct task_struct *p, char *name, char **value);
-int security_setprocattr(const char *name, void *value, size_t size);
+int security_getprocattr(struct task_struct *p, const char *lsm, char *name,
+			 char **value);
+int security_setprocattr(const char *lsm, const char *name, void *value,
+			 size_t size);
 int security_netlink_send(struct sock *sk, struct sk_buff *skb);
 int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
@@ -1112,15 +1114,18 @@ static inline int security_sem_semop(struct kern_ipc_perm *sma,
 	return 0;
 }
 
-static inline void security_d_instantiate(struct dentry *dentry, struct inode *inode)
+static inline void security_d_instantiate(struct dentry *dentry,
+					  struct inode *inode)
 { }
 
-static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
+static inline int security_getprocattr(struct task_struct *p, const char *lsm,
+				       char *name, char **value)
 {
 	return -EINVAL;
 }
 
-static inline int security_setprocattr(char *name, void *value, size_t size)
+static inline int security_setprocattr(const char *lsm, char *name,
+				       void *value, size_t size)
 {
 	return -EINVAL;
 }
diff --git a/security/security.c b/security/security.c
index 9411f659454b..60b39db95c2f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1485,14 +1485,30 @@ void security_d_instantiate(struct dentry *dentry, struct inode *inode)
 }
 EXPORT_SYMBOL(security_d_instantiate);
 
-int security_getprocattr(struct task_struct *p, char *name, char **value)
+int security_getprocattr(struct task_struct *p, const char *lsm, char *name,
+				char **value)
 {
-	return call_int_hook(getprocattr, -EINVAL, p, name, value);
+	struct security_hook_list *hp;
+
+	hlist_for_each_entry(hp, &security_hook_heads.getprocattr, list) {
+		if (lsm != NULL && strcmp(lsm, hp->lsm))
+			continue;
+		return hp->hook.getprocattr(p, name, value);
+	}
+	return -EINVAL;
 }
 
-int security_setprocattr(const char *name, void *value, size_t size)
+int security_setprocattr(const char *lsm, const char *name, void *value,
+			 size_t size)
 {
-	return call_int_hook(setprocattr, -EINVAL, name, value, size);
+	struct security_hook_list *hp;
+
+	hlist_for_each_entry(hp, &security_hook_heads.setprocattr, list) {
+		if (lsm != NULL && strcmp(lsm, hp->lsm))
+			continue;
+		return hp->hook.setprocattr(name, value, size);
+	}
+	return -EINVAL;
 }
 
 int security_netlink_send(struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From 3d252529480c68bfd6a6774652df7c8968b28e41 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 21 Sep 2018 17:17:34 -0700
Subject: SELinux: Remove unused selinux_is_enabled

There are no longer users of selinux_is_enabled().
Remove it. As selinux_is_enabled() is the only reason
for include/linux/selinux.h remove that as well.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/cred.h             |  1 -
 include/linux/selinux.h          | 35 -----------------------------------
 security/selinux/Makefile        |  2 +-
 security/selinux/exports.c       | 23 -----------------------
 security/selinux/hooks.c         |  1 -
 security/selinux/include/audit.h |  3 ---
 security/selinux/ss/services.c   |  1 -
 7 files changed, 1 insertion(+), 65 deletions(-)
 delete mode 100644 include/linux/selinux.h
 delete mode 100644 security/selinux/exports.c

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4907c9df86b3..ddd45bb74887 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -15,7 +15,6 @@
 #include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/key.h>
-#include <linux/selinux.h>
 #include <linux/atomic.h>
 #include <linux/uidgid.h>
 #include <linux/sched.h>
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
deleted file mode 100644
index 44f459612690..000000000000
--- a/include/linux/selinux.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * SELinux services exported to the rest of the kernel.
- *
- * Author: James Morris <jmorris@redhat.com>
- *
- * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
- * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
- * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez <tinytim@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2,
- * as published by the Free Software Foundation.
- */
-#ifndef _LINUX_SELINUX_H
-#define _LINUX_SELINUX_H
-
-struct selinux_audit_rule;
-struct audit_context;
-struct kern_ipc_perm;
-
-#ifdef CONFIG_SECURITY_SELINUX
-
-/**
- * selinux_is_enabled - is SELinux enabled?
- */
-bool selinux_is_enabled(void);
-#else
-
-static inline bool selinux_is_enabled(void)
-{
-	return false;
-}
-#endif	/* CONFIG_SECURITY_SELINUX */
-
-#endif /* _LINUX_SELINUX_H */
diff --git a/security/selinux/Makefile b/security/selinux/Makefile
index c7161f8792b2..ccf950409384 100644
--- a/security/selinux/Makefile
+++ b/security/selinux/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_SECURITY_SELINUX) := selinux.o
 
 selinux-y := avc.o hooks.o selinuxfs.o netlink.o nlmsgtab.o netif.o \
-	     netnode.o netport.o ibpkey.o exports.o \
+	     netnode.o netport.o ibpkey.o \
 	     ss/ebitmap.o ss/hashtab.o ss/symtab.o ss/sidtab.o ss/avtab.o \
 	     ss/policydb.o ss/services.o ss/conditional.o ss/mls.o ss/status.o
 
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
deleted file mode 100644
index e75dd94e2d2b..000000000000
--- a/security/selinux/exports.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * SELinux services exported to the rest of the kernel.
- *
- * Author: James Morris <jmorris@redhat.com>
- *
- * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
- * Copyright (C) 2006 Trusted Computer Solutions, Inc. <dgoeddel@trustedcs.com>
- * Copyright (C) 2006 IBM Corporation, Timothy R. Chavez <tinytim@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2,
- * as published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/selinux.h>
-
-#include "security.h"
-
-bool selinux_is_enabled(void)
-{
-	return selinux_enabled;
-}
-EXPORT_SYMBOL_GPL(selinux_is_enabled);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ad227177550b..169cf5b3334b 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -79,7 +79,6 @@
 #include <linux/personality.h>
 #include <linux/audit.h>
 #include <linux/string.h>
-#include <linux/selinux.h>
 #include <linux/mutex.h>
 #include <linux/posix-timers.h>
 #include <linux/syslog.h>
diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h
index 1bdf973433cc..36e1d44c0209 100644
--- a/security/selinux/include/audit.h
+++ b/security/selinux/include/audit.h
@@ -1,9 +1,6 @@
 /*
  * SELinux support for the Audit LSM hooks
  *
- * Most of below header was moved from include/linux/selinux.h which
- * is released under below copyrights:
- *
  * Author: James Morris <jmorris@redhat.com>
  *
  * Copyright (C) 2005 Red Hat, Inc., James Morris <jmorris@redhat.com>
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index dd44126c8d14..d6e7b4856d93 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -49,7 +49,6 @@
 #include <linux/sched.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
-#include <linux/selinux.h>
 #include <linux/flex_array.h>
 #include <linux/vmalloc.h>
 #include <net/netlabel.h>
-- 
cgit v1.2.3


From bbd3662a834813730912a58efb44dd6df6d952e6 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Mon, 12 Nov 2018 09:30:56 -0800
Subject: Infrastructure management of the cred security blob

Move management of the cred security blob out of the
security modules and into the security infrastructre.
Instead of allocating and freeing space the security
modules tell the infrastructure how much space they
require.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
[kees: adjusted for ordered init series]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h         | 12 ++++++
 security/apparmor/include/cred.h  |  4 +-
 security/apparmor/include/lib.h   |  4 ++
 security/apparmor/lsm.c           |  9 ++++
 security/security.c               | 89 ++++++++++++++++++++++++++++++++++++++-
 security/selinux/hooks.c          | 51 +++++-----------------
 security/selinux/include/objsec.h |  4 +-
 security/smack/smack.h            |  3 +-
 security/smack/smack_lsm.c        | 79 +++++++++++-----------------------
 security/tomoyo/common.h          |  3 +-
 security/tomoyo/tomoyo.c          |  6 +++
 11 files changed, 162 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0c908c091a03..dd33666567bc 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2027,6 +2027,13 @@ struct security_hook_list {
 	char				*lsm;
 } __randomize_layout;
 
+/*
+ * Security blob size or offset data.
+ */
+struct lsm_blob_sizes {
+	int	lbs_cred;
+};
+
 /*
  * Initializing a security_hook_list structure takes
  * up a lot of space in a source file. This macro takes
@@ -2056,6 +2063,7 @@ struct lsm_info {
 	unsigned long flags;	/* Optional: flags describing LSM */
 	int *enabled;		/* Optional: controlled by CONFIG_LSM */
 	int (*init)(void);	/* Required. */
+	struct lsm_blob_sizes *blobs; /* Optional: for blob sharing. */
 };
 
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
@@ -2095,4 +2103,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 #define __lsm_ro_after_init	__ro_after_init
 #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */
 
+#ifdef CONFIG_SECURITY
+void __init lsm_early_cred(struct cred *cred);
+#endif
+
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h
index a757370f2a0c..b9504a05fddc 100644
--- a/security/apparmor/include/cred.h
+++ b/security/apparmor/include/cred.h
@@ -25,7 +25,7 @@
 
 static inline struct aa_label *cred_label(const struct cred *cred)
 {
-	struct aa_label **blob = cred->security;
+	struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred;
 
 	AA_BUG(!blob);
 	return *blob;
@@ -34,7 +34,7 @@ static inline struct aa_label *cred_label(const struct cred *cred)
 static inline void set_cred_label(const struct cred *cred,
 				  struct aa_label *label)
 {
-	struct aa_label **blob = cred->security;
+	struct aa_label **blob = cred->security + apparmor_blob_sizes.lbs_cred;
 
 	AA_BUG(!blob);
 	*blob = label;
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index 6505e1ad9e23..bbe9b384d71d 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -16,6 +16,7 @@
 
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/lsm_hooks.h>
 
 #include "match.h"
 
@@ -55,6 +56,9 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
 			     size_t *ns_len);
 void aa_info_message(const char *str);
 
+/* Security blob offsets */
+extern struct lsm_blob_sizes apparmor_blob_sizes;
+
 /**
  * aa_strneq - compare null terminated @str to a non null terminated substring
  * @str: a null terminated string
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 8c2cb4b1a6c3..d5e4a384f205 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1151,6 +1151,13 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 }
 #endif
 
+/*
+ * The cred blob is a pointer to, not an instance of, an aa_task_ctx.
+ */
+struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
+	.lbs_cred = sizeof(struct aa_task_ctx *),
+};
+
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, apparmor_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, apparmor_ptrace_traceme),
@@ -1485,6 +1492,7 @@ static int __init set_init_ctx(void)
 	if (!ctx)
 		return -ENOMEM;
 
+	lsm_early_cred(cred);
 	set_cred_label(cred, aa_get_label(ns_unconfined(root_ns)));
 	task_ctx(current) = ctx;
 
@@ -1725,5 +1733,6 @@ DEFINE_LSM(apparmor) = {
 	.name = "apparmor",
 	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.enabled = &apparmor_enabled,
+	.blobs = &apparmor_blob_sizes,
 	.init = apparmor_init,
 };
diff --git a/security/security.c b/security/security.c
index 60b39db95c2f..09be8ce007a2 100644
--- a/security/security.c
+++ b/security/security.c
@@ -41,6 +41,8 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
 
 char *lsm_names;
+static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init;
+
 /* Boot-time LSM user choice */
 static __initdata const char *chosen_lsm_order;
 static __initdata const char *chosen_major_lsm;
@@ -139,6 +141,25 @@ static bool __init lsm_allowed(struct lsm_info *lsm)
 	return true;
 }
 
+static void __init lsm_set_blob_size(int *need, int *lbs)
+{
+	int offset;
+
+	if (*need > 0) {
+		offset = *lbs;
+		*lbs += *need;
+		*need = offset;
+	}
+}
+
+static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
+{
+	if (!needed)
+		return;
+
+	lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
+}
+
 /* Prepare LSM for initialization. */
 static void __init prepare_lsm(struct lsm_info *lsm)
 {
@@ -153,6 +174,8 @@ static void __init prepare_lsm(struct lsm_info *lsm)
 			exclusive = lsm;
 			init_debug("exclusive chosen: %s\n", lsm->name);
 		}
+
+		lsm_set_blob_sizes(lsm->blobs);
 	}
 }
 
@@ -255,6 +278,8 @@ static void __init ordered_lsm_init(void)
 	for (lsm = ordered_lsms; *lsm; lsm++)
 		prepare_lsm(*lsm);
 
+	init_debug("cred blob size     = %d\n", blob_sizes.lbs_cred);
+
 	for (lsm = ordered_lsms; *lsm; lsm++)
 		initialize_lsm(*lsm);
 
@@ -382,6 +407,47 @@ int unregister_lsm_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_lsm_notifier);
 
+/**
+ * lsm_cred_alloc - allocate a composite cred blob
+ * @cred: the cred that needs a blob
+ * @gfp: allocation type
+ *
+ * Allocate the cred blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
+{
+	if (blob_sizes.lbs_cred == 0) {
+		cred->security = NULL;
+		return 0;
+	}
+
+	cred->security = kzalloc(blob_sizes.lbs_cred, gfp);
+	if (cred->security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * lsm_early_cred - during initialization allocate a composite cred blob
+ * @cred: the cred that needs a blob
+ *
+ * Allocate the cred blob for all the modules if it's not already there
+ */
+void __init lsm_early_cred(struct cred *cred)
+{
+	int rc;
+
+	if (cred == NULL)
+		panic("%s: NULL cred.\n", __func__);
+	if (cred->security != NULL)
+		return;
+	rc = lsm_cred_alloc(cred, GFP_KERNEL);
+	if (rc)
+		panic("%s: Early cred alloc failed.\n", __func__);
+}
+
 /*
  * Hook list operation macros.
  *
@@ -1195,17 +1261,36 @@ void security_task_free(struct task_struct *task)
 
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
 {
-	return call_int_hook(cred_alloc_blank, 0, cred, gfp);
+	int rc = lsm_cred_alloc(cred, gfp);
+
+	if (rc)
+		return rc;
+
+	rc = call_int_hook(cred_alloc_blank, 0, cred, gfp);
+	if (rc)
+		security_cred_free(cred);
+	return rc;
 }
 
 void security_cred_free(struct cred *cred)
 {
 	call_void_hook(cred_free, cred);
+
+	kfree(cred->security);
+	cred->security = NULL;
 }
 
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
 {
-	return call_int_hook(cred_prepare, 0, new, old, gfp);
+	int rc = lsm_cred_alloc(new, gfp);
+
+	if (rc)
+		return rc;
+
+	rc = call_int_hook(cred_prepare, 0, new, old, gfp);
+	if (rc)
+		security_cred_free(new);
+	return rc;
 }
 
 void security_transfer_creds(struct cred *new, const struct cred *old)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 169cf5b3334b..239b13b442e7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -210,12 +210,9 @@ static void cred_init_security(void)
 	struct cred *cred = (struct cred *) current->real_cred;
 	struct task_security_struct *tsec;
 
-	tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL);
-	if (!tsec)
-		panic("SELinux:  Failed to initialize initial task.\n");
-
+	lsm_early_cred(cred);
+	tsec = selinux_cred(cred);
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
-	cred->security = tsec;
 }
 
 /*
@@ -3685,47 +3682,16 @@ static int selinux_task_alloc(struct task_struct *task,
 			    sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL);
 }
 
-/*
- * allocate the SELinux part of blank credentials
- */
-static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
-{
-	struct task_security_struct *tsec;
-
-	tsec = kzalloc(sizeof(struct task_security_struct), gfp);
-	if (!tsec)
-		return -ENOMEM;
-
-	cred->security = tsec;
-	return 0;
-}
-
-/*
- * detach and free the LSM part of a set of credentials
- */
-static void selinux_cred_free(struct cred *cred)
-{
-	struct task_security_struct *tsec = selinux_cred(cred);
-
-	kfree(tsec);
-}
-
 /*
  * prepare a new set of credentials for modification
  */
 static int selinux_cred_prepare(struct cred *new, const struct cred *old,
 				gfp_t gfp)
 {
-	const struct task_security_struct *old_tsec;
-	struct task_security_struct *tsec;
-
-	old_tsec = selinux_cred(old);
-
-	tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp);
-	if (!tsec)
-		return -ENOMEM;
+	const struct task_security_struct *old_tsec = selinux_cred(old);
+	struct task_security_struct *tsec = selinux_cred(new);
 
-	new->security = tsec;
+	*tsec = *old_tsec;
 	return 0;
 }
 
@@ -6678,6 +6644,10 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
 }
 #endif
 
+struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
+	.lbs_cred = sizeof(struct task_security_struct),
+};
+
 static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr),
 	LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction),
@@ -6761,8 +6731,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(file_open, selinux_file_open),
 
 	LSM_HOOK_INIT(task_alloc, selinux_task_alloc),
-	LSM_HOOK_INIT(cred_alloc_blank, selinux_cred_alloc_blank),
-	LSM_HOOK_INIT(cred_free, selinux_cred_free),
 	LSM_HOOK_INIT(cred_prepare, selinux_cred_prepare),
 	LSM_HOOK_INIT(cred_transfer, selinux_cred_transfer),
 	LSM_HOOK_INIT(cred_getsecid, selinux_cred_getsecid),
@@ -6981,6 +6949,7 @@ DEFINE_LSM(selinux) = {
 	.name = "selinux",
 	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
 	.enabled = &selinux_enabled,
+	.blobs = &selinux_blob_sizes,
 	.init = selinux_init,
 };
 
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 734b6833bdff..c2974b031d05 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -25,6 +25,7 @@
 #include <linux/binfmts.h>
 #include <linux/in.h>
 #include <linux/spinlock.h>
+#include <linux/lsm_hooks.h>
 #include <net/net_namespace.h>
 #include "flask.h"
 #include "avc.h"
@@ -158,9 +159,10 @@ struct bpf_security_struct {
 	u32 sid;  /*SID of bpf obj creater*/
 };
 
+extern struct lsm_blob_sizes selinux_blob_sizes;
 static inline struct task_security_struct *selinux_cred(const struct cred *cred)
 {
-	return cred->security;
+	return cred->security + selinux_blob_sizes.lbs_cred;
 }
 
 #endif /* _SELINUX_OBJSEC_H_ */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 01a922856eba..b27eb252e953 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -336,6 +336,7 @@ extern struct smack_known *smack_syslog_label;
 extern struct smack_known *smack_unconfined;
 #endif
 extern int smack_ptrace_rule;
+extern struct lsm_blob_sizes smack_blob_sizes;
 
 extern struct smack_known smack_known_floor;
 extern struct smack_known smack_known_hat;
@@ -358,7 +359,7 @@ extern struct hlist_head smack_known_hash[SMACK_HASH_SLOTS];
 
 static inline struct task_smack *smack_cred(const struct cred *cred)
 {
-	return cred->security;
+	return cred->security + smack_blob_sizes.lbs_cred;
 }
 
 /*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 9a050ca17296..bad27a8e1631 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -326,29 +326,20 @@ static struct inode_smack *new_inode_smack(struct smack_known *skp)
 }
 
 /**
- * new_task_smack - allocate a task security blob
+ * init_task_smack - initialize a task security blob
+ * @tsp: blob to initialize
  * @task: a pointer to the Smack label for the running task
  * @forked: a pointer to the Smack label for the forked task
- * @gfp: type of the memory for the allocation
  *
- * Returns the new blob or NULL if there's no memory available
  */
-static struct task_smack *new_task_smack(struct smack_known *task,
-					struct smack_known *forked, gfp_t gfp)
+static void init_task_smack(struct task_smack *tsp, struct smack_known *task,
+					struct smack_known *forked)
 {
-	struct task_smack *tsp;
-
-	tsp = kzalloc(sizeof(struct task_smack), gfp);
-	if (tsp == NULL)
-		return NULL;
-
 	tsp->smk_task = task;
 	tsp->smk_forked = forked;
 	INIT_LIST_HEAD(&tsp->smk_rules);
 	INIT_LIST_HEAD(&tsp->smk_relabel);
 	mutex_init(&tsp->smk_rules_lock);
-
-	return tsp;
 }
 
 /**
@@ -1881,14 +1872,7 @@ static int smack_file_open(struct file *file)
  */
 static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
 {
-	struct task_smack *tsp;
-
-	tsp = new_task_smack(NULL, NULL, gfp);
-	if (tsp == NULL)
-		return -ENOMEM;
-
-	cred->security = tsp;
-
+	init_task_smack(smack_cred(cred), NULL, NULL);
 	return 0;
 }
 
@@ -1905,10 +1889,6 @@ static void smack_cred_free(struct cred *cred)
 	struct list_head *l;
 	struct list_head *n;
 
-	if (tsp == NULL)
-		return;
-	cred->security = NULL;
-
 	smk_destroy_label_list(&tsp->smk_relabel);
 
 	list_for_each_safe(l, n, &tsp->smk_rules) {
@@ -1916,7 +1896,6 @@ static void smack_cred_free(struct cred *cred)
 		list_del(&rp->list);
 		kfree(rp);
 	}
-	kfree(tsp);
 }
 
 /**
@@ -1931,14 +1910,10 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old,
 			      gfp_t gfp)
 {
 	struct task_smack *old_tsp = smack_cred(old);
-	struct task_smack *new_tsp;
+	struct task_smack *new_tsp = smack_cred(new);
 	int rc;
 
-	new_tsp = new_task_smack(old_tsp->smk_task, old_tsp->smk_task, gfp);
-	if (new_tsp == NULL)
-		return -ENOMEM;
-
-	new->security = new_tsp;
+	init_task_smack(new_tsp, old_tsp->smk_task, old_tsp->smk_task);
 
 	rc = smk_copy_rules(&new_tsp->smk_rules, &old_tsp->smk_rules, gfp);
 	if (rc != 0)
@@ -1946,10 +1921,7 @@ static int smack_cred_prepare(struct cred *new, const struct cred *old,
 
 	rc = smk_copy_relabel(&new_tsp->smk_relabel, &old_tsp->smk_relabel,
 				gfp);
-	if (rc != 0)
-		return rc;
-
-	return 0;
+	return rc;
 }
 
 /**
@@ -4581,6 +4553,10 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode,
 	return 0;
 }
 
+struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = {
+	.lbs_cred = sizeof(struct task_smack),
+};
+
 static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, smack_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
@@ -4758,20 +4734,25 @@ static __init void init_smack_known_list(void)
  */
 static __init int smack_init(void)
 {
-	struct cred *cred;
+	struct cred *cred = (struct cred *) current->cred;
 	struct task_smack *tsp;
 
 	smack_inode_cache = KMEM_CACHE(inode_smack, 0);
 	if (!smack_inode_cache)
 		return -ENOMEM;
 
-	tsp = new_task_smack(&smack_known_floor, &smack_known_floor,
-				GFP_KERNEL);
-	if (tsp == NULL) {
-		kmem_cache_destroy(smack_inode_cache);
-		return -ENOMEM;
-	}
+	lsm_early_cred(cred);
 
+	/*
+	 * Set the security state for the initial task.
+	 */
+	tsp = smack_cred(cred);
+	init_task_smack(tsp, &smack_known_floor, &smack_known_floor);
+
+	/*
+	 * Register with LSM
+	 */
+	security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack");
 	smack_enabled = 1;
 
 	pr_info("Smack:  Initializing.\n");
@@ -4785,20 +4766,9 @@ static __init int smack_init(void)
 	pr_info("Smack:  IPv6 Netfilter enabled.\n");
 #endif
 
-	/*
-	 * Set the security state for the initial task.
-	 */
-	cred = (struct cred *) current->cred;
-	cred->security = tsp;
-
 	/* initialize the smack_known_list */
 	init_smack_known_list();
 
-	/*
-	 * Register with LSM
-	 */
-	security_add_hooks(smack_hooks, ARRAY_SIZE(smack_hooks), "smack");
-
 	return 0;
 }
 
@@ -4809,5 +4779,6 @@ static __init int smack_init(void)
 DEFINE_LSM(smack) = {
 	.name = "smack",
 	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
+	.blobs = &smack_blob_sizes,
 	.init = smack_init,
 };
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index 41898613d93b..4fc17294a12d 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -1087,6 +1087,7 @@ extern struct tomoyo_domain_info tomoyo_kernel_domain;
 extern struct tomoyo_policy_namespace tomoyo_kernel_namespace;
 extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT];
 extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT];
+extern struct lsm_blob_sizes tomoyo_blob_sizes;
 
 /********** Inlined functions. **********/
 
@@ -1206,7 +1207,7 @@ static inline void tomoyo_put_group(struct tomoyo_group *group)
  */
 static inline struct tomoyo_domain_info **tomoyo_cred(const struct cred *cred)
 {
-	return (struct tomoyo_domain_info **)&cred->security;
+	return cred->security + tomoyo_blob_sizes.lbs_cred;
 }
 
 /**
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 15864307925d..9094cf41a247 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -509,6 +509,10 @@ static int tomoyo_socket_sendmsg(struct socket *sock, struct msghdr *msg,
 	return tomoyo_socket_sendmsg_permission(sock, msg, size);
 }
 
+struct lsm_blob_sizes tomoyo_blob_sizes __lsm_ro_after_init = {
+	.lbs_cred = sizeof(struct tomoyo_domain_info *),
+};
+
 /*
  * tomoyo_security_ops is a "struct security_operations" which is used for
  * registering TOMOYO.
@@ -562,6 +566,7 @@ static int __init tomoyo_init(void)
 	/* register ourselves with the security framework */
 	security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo");
 	printk(KERN_INFO "TOMOYO Linux initialized\n");
+	lsm_early_cred(cred);
 	blob = tomoyo_cred(cred);
 	*blob = &tomoyo_kernel_domain;
 	tomoyo_mm_init();
@@ -573,5 +578,6 @@ DEFINE_LSM(tomoyo) = {
 	.name = "tomoyo",
 	.enabled = &tomoyo_enabled,
 	.flags = LSM_FLAG_LEGACY_MAJOR | LSM_FLAG_EXCLUSIVE,
+	.blobs = &tomoyo_blob_sizes,
 	.init = tomoyo_init,
 };
-- 
cgit v1.2.3


From 33bf60cabcc7687b194a689b068b65e9ecd556be Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Mon, 12 Nov 2018 12:02:49 -0800
Subject: LSM: Infrastructure management of the file security

Move management of the file->f_security blob out of the
individual security modules and into the infrastructure.
The modules no longer allocate or free the data, instead
they tell the infrastructure how much space they require.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
[kees: adjusted for ordered init series]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h         |  1 +
 security/apparmor/include/file.h  |  5 +++-
 security/apparmor/lsm.c           | 19 +++++++-------
 security/security.c               | 54 ++++++++++++++++++++++++++++++++++++---
 security/selinux/hooks.c          | 25 ++----------------
 security/selinux/include/objsec.h |  2 +-
 security/smack/smack.h            |  3 ++-
 security/smack/smack_lsm.c        | 14 +---------
 8 files changed, 72 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index dd33666567bc..e8cef019b645 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2032,6 +2032,7 @@ struct security_hook_list {
  */
 struct lsm_blob_sizes {
 	int	lbs_cred;
+	int	lbs_file;
 };
 
 /*
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 4c2c8ac8842f..8be09208cf7c 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -32,7 +32,10 @@ struct path;
 				 AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_LOCK | \
 				 AA_EXEC_MMAP | AA_MAY_LINK)
 
-#define file_ctx(X) ((struct aa_file_ctx *)(X)->f_security)
+static inline struct aa_file_ctx *file_ctx(struct file *file)
+{
+	return file->f_security + apparmor_blob_sizes.lbs_file;
+}
 
 /* struct aa_file_ctx - the AppArmor context the file was opened in
  * @lock: lock to update the ctx
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index d5e4a384f205..6821187b06ad 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -434,21 +434,21 @@ static int apparmor_file_open(struct file *file)
 
 static int apparmor_file_alloc_security(struct file *file)
 {
-	int error = 0;
-
-	/* freed by apparmor_file_free_security */
+	struct aa_file_ctx *ctx = file_ctx(file);
 	struct aa_label *label = begin_current_label_crit_section();
-	file->f_security = aa_alloc_file_ctx(label, GFP_KERNEL);
-	if (!file_ctx(file))
-		error = -ENOMEM;
-	end_current_label_crit_section(label);
 
-	return error;
+	spin_lock_init(&ctx->lock);
+	rcu_assign_pointer(ctx->label, aa_get_label(label));
+	end_current_label_crit_section(label);
+	return 0;
 }
 
 static void apparmor_file_free_security(struct file *file)
 {
-	aa_free_file_ctx(file_ctx(file));
+	struct aa_file_ctx *ctx = file_ctx(file);
+
+	if (ctx)
+		aa_put_label(rcu_access_pointer(ctx->label));
 }
 
 static int common_file_perm(const char *op, struct file *file, u32 mask)
@@ -1156,6 +1156,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
  */
 struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct aa_task_ctx *),
+	.lbs_file = sizeof(struct aa_file_ctx),
 };
 
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
diff --git a/security/security.c b/security/security.c
index 09be8ce007a2..f32d7d2075c6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -40,6 +40,8 @@
 struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
 
+static struct kmem_cache *lsm_file_cache;
+
 char *lsm_names;
 static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init;
 
@@ -158,6 +160,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 		return;
 
 	lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
+	lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
 }
 
 /* Prepare LSM for initialization. */
@@ -279,6 +282,15 @@ static void __init ordered_lsm_init(void)
 		prepare_lsm(*lsm);
 
 	init_debug("cred blob size     = %d\n", blob_sizes.lbs_cred);
+	init_debug("file blob size     = %d\n", blob_sizes.lbs_file);
+
+	/*
+	 * Create any kmem_caches needed for blobs
+	 */
+	if (blob_sizes.lbs_file)
+		lsm_file_cache = kmem_cache_create("lsm_file_cache",
+						   blob_sizes.lbs_file, 0,
+						   SLAB_PANIC, NULL);
 
 	for (lsm = ordered_lsms; *lsm; lsm++)
 		initialize_lsm(*lsm);
@@ -448,6 +460,27 @@ void __init lsm_early_cred(struct cred *cred)
 		panic("%s: Early cred alloc failed.\n", __func__);
 }
 
+/**
+ * lsm_file_alloc - allocate a composite file blob
+ * @file: the file that needs a blob
+ *
+ * Allocate the file blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+static int lsm_file_alloc(struct file *file)
+{
+	if (!lsm_file_cache) {
+		file->f_security = NULL;
+		return 0;
+	}
+
+	file->f_security = kmem_cache_zalloc(lsm_file_cache, GFP_KERNEL);
+	if (file->f_security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * Hook list operation macros.
  *
@@ -1144,12 +1177,27 @@ int security_file_permission(struct file *file, int mask)
 
 int security_file_alloc(struct file *file)
 {
-	return call_int_hook(file_alloc_security, 0, file);
+	int rc = lsm_file_alloc(file);
+
+	if (rc)
+		return rc;
+	rc = call_int_hook(file_alloc_security, 0, file);
+	if (unlikely(rc))
+		security_file_free(file);
+	return rc;
 }
 
 void security_file_free(struct file *file)
 {
+	void *blob;
+
 	call_void_hook(file_free_security, file);
+
+	blob = file->f_security;
+	if (blob) {
+		file->f_security = NULL;
+		kmem_cache_free(lsm_file_cache, blob);
+	}
 }
 
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1267,7 +1315,7 @@ int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
 		return rc;
 
 	rc = call_int_hook(cred_alloc_blank, 0, cred, gfp);
-	if (rc)
+	if (unlikely(rc))
 		security_cred_free(cred);
 	return rc;
 }
@@ -1288,7 +1336,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
 		return rc;
 
 	rc = call_int_hook(cred_prepare, 0, new, old, gfp);
-	if (rc)
+	if (unlikely(rc))
 		security_cred_free(new);
 	return rc;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 620be0367c0b..632813821da6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -146,7 +146,6 @@ static int __init checkreqprot_setup(char *str)
 __setup("checkreqprot=", checkreqprot_setup);
 
 static struct kmem_cache *sel_inode_cache;
-static struct kmem_cache *file_security_cache;
 
 /**
  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
@@ -378,27 +377,15 @@ static void inode_free_security(struct inode *inode)
 
 static int file_alloc_security(struct file *file)
 {
-	struct file_security_struct *fsec;
+	struct file_security_struct *fsec = selinux_file(file);
 	u32 sid = current_sid();
 
-	fsec = kmem_cache_zalloc(file_security_cache, GFP_KERNEL);
-	if (!fsec)
-		return -ENOMEM;
-
 	fsec->sid = sid;
 	fsec->fown_sid = sid;
-	file->f_security = fsec;
 
 	return 0;
 }
 
-static void file_free_security(struct file *file)
-{
-	struct file_security_struct *fsec = selinux_file(file);
-	file->f_security = NULL;
-	kmem_cache_free(file_security_cache, fsec);
-}
-
 static int superblock_alloc_security(struct super_block *sb)
 {
 	struct superblock_security_struct *sbsec;
@@ -3345,11 +3332,6 @@ static int selinux_file_alloc_security(struct file *file)
 	return file_alloc_security(file);
 }
 
-static void selinux_file_free_security(struct file *file)
-{
-	file_free_security(file);
-}
-
 /*
  * Check whether a task has the ioctl permission and cmd
  * operation to an inode.
@@ -6646,6 +6628,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
 
 struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_security_struct),
+	.lbs_file = sizeof(struct file_security_struct),
 };
 
 static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
@@ -6717,7 +6700,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 
 	LSM_HOOK_INIT(file_permission, selinux_file_permission),
 	LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
-	LSM_HOOK_INIT(file_free_security, selinux_file_free_security),
 	LSM_HOOK_INIT(file_ioctl, selinux_file_ioctl),
 	LSM_HOOK_INIT(mmap_file, selinux_mmap_file),
 	LSM_HOOK_INIT(mmap_addr, selinux_mmap_addr),
@@ -6902,9 +6884,6 @@ static __init int selinux_init(void)
 	sel_inode_cache = kmem_cache_create("selinux_inode_security",
 					    sizeof(struct inode_security_struct),
 					    0, SLAB_PANIC, NULL);
-	file_security_cache = kmem_cache_create("selinux_file_security",
-					    sizeof(struct file_security_struct),
-					    0, SLAB_PANIC, NULL);
 	avc_init();
 
 	avtab_cache_init();
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index e0ac2992e059..96374dbf4ace 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -167,7 +167,7 @@ static inline struct task_security_struct *selinux_cred(const struct cred *cred)
 
 static inline struct file_security_struct *selinux_file(const struct file *file)
 {
-	return file->f_security;
+	return file->f_security + selinux_blob_sizes.lbs_file;
 }
 
 #endif /* _SELINUX_OBJSEC_H_ */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 50854969a391..2007d38d0e46 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -364,7 +364,8 @@ static inline struct task_smack *smack_cred(const struct cred *cred)
 
 static inline struct smack_known **smack_file(const struct file *file)
 {
-	return (struct smack_known **)&file->f_security;
+	return (struct smack_known **)(file->f_security +
+				       smack_blob_sizes.lbs_file);
 }
 
 /*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8f72641f94ab..7c76668ea3a6 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1495,18 +1495,6 @@ static int smack_file_alloc_security(struct file *file)
 	return 0;
 }
 
-/**
- * smack_file_free_security - clear a file security blob
- * @file: the object
- *
- * The security blob for a file is a pointer to the master
- * label list, so no memory is freed.
- */
-static void smack_file_free_security(struct file *file)
-{
-	file->f_security = NULL;
-}
-
 /**
  * smack_file_ioctl - Smack check on ioctls
  * @file: the object
@@ -4559,6 +4547,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode,
 
 struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_smack),
+	.lbs_file = sizeof(struct smack_known *),
 };
 
 static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
@@ -4595,7 +4584,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(inode_getsecid, smack_inode_getsecid),
 
 	LSM_HOOK_INIT(file_alloc_security, smack_file_alloc_security),
-	LSM_HOOK_INIT(file_free_security, smack_file_free_security),
 	LSM_HOOK_INIT(file_ioctl, smack_file_ioctl),
 	LSM_HOOK_INIT(file_lock, smack_file_lock),
 	LSM_HOOK_INIT(file_fcntl, smack_file_fcntl),
-- 
cgit v1.2.3


From afb1cbe37440c7f38b9cf46fc331cc9dfd5cce21 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 21 Sep 2018 17:19:29 -0700
Subject: LSM: Infrastructure management of the inode security

Move management of the inode->i_security blob out
of the individual security modules and into the security
infrastructure. Instead of allocating the blobs from within
the modules the modules tell the infrastructure how much
space is required, and the space is allocated there.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
[kees: adjusted for ordered init series]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h         |  3 ++
 security/security.c               | 64 +++++++++++++++++++++++++++++++--
 security/selinux/hooks.c          | 37 ++++---------------
 security/selinux/include/objsec.h |  9 +++--
 security/smack/smack.h            |  2 +-
 security/smack/smack_lsm.c        | 76 +++++++++------------------------------
 6 files changed, 93 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index e8cef019b645..1c798e842de2 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2033,6 +2033,7 @@ struct security_hook_list {
 struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
+	int	lbs_inode;
 };
 
 /*
@@ -2104,6 +2105,8 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 #define __lsm_ro_after_init	__ro_after_init
 #endif /* CONFIG_SECURITY_WRITABLE_HOOKS */
 
+extern int lsm_inode_alloc(struct inode *inode);
+
 #ifdef CONFIG_SECURITY
 void __init lsm_early_cred(struct cred *cred);
 #endif
diff --git a/security/security.c b/security/security.c
index f32d7d2075c6..4989fb65e662 100644
--- a/security/security.c
+++ b/security/security.c
@@ -41,6 +41,7 @@ struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
 
 static struct kmem_cache *lsm_file_cache;
+static struct kmem_cache *lsm_inode_cache;
 
 char *lsm_names;
 static struct lsm_blob_sizes blob_sizes __lsm_ro_after_init;
@@ -161,6 +162,13 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 
 	lsm_set_blob_size(&needed->lbs_cred, &blob_sizes.lbs_cred);
 	lsm_set_blob_size(&needed->lbs_file, &blob_sizes.lbs_file);
+	/*
+	 * The inode blob gets an rcu_head in addition to
+	 * what the modules might need.
+	 */
+	if (needed->lbs_inode && blob_sizes.lbs_inode == 0)
+		blob_sizes.lbs_inode = sizeof(struct rcu_head);
+	lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
 }
 
 /* Prepare LSM for initialization. */
@@ -283,6 +291,7 @@ static void __init ordered_lsm_init(void)
 
 	init_debug("cred blob size     = %d\n", blob_sizes.lbs_cred);
 	init_debug("file blob size     = %d\n", blob_sizes.lbs_file);
+	init_debug("inode blob size    = %d\n", blob_sizes.lbs_inode);
 
 	/*
 	 * Create any kmem_caches needed for blobs
@@ -291,6 +300,10 @@ static void __init ordered_lsm_init(void)
 		lsm_file_cache = kmem_cache_create("lsm_file_cache",
 						   blob_sizes.lbs_file, 0,
 						   SLAB_PANIC, NULL);
+	if (blob_sizes.lbs_inode)
+		lsm_inode_cache = kmem_cache_create("lsm_inode_cache",
+						    blob_sizes.lbs_inode, 0,
+						    SLAB_PANIC, NULL);
 
 	for (lsm = ordered_lsms; *lsm; lsm++)
 		initialize_lsm(*lsm);
@@ -481,6 +494,27 @@ static int lsm_file_alloc(struct file *file)
 	return 0;
 }
 
+/**
+ * lsm_inode_alloc - allocate a composite inode blob
+ * @inode: the inode that needs a blob
+ *
+ * Allocate the inode blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+int lsm_inode_alloc(struct inode *inode)
+{
+	if (!lsm_inode_cache) {
+		inode->i_security = NULL;
+		return 0;
+	}
+
+	inode->i_security = kmem_cache_zalloc(lsm_inode_cache, GFP_NOFS);
+	if (inode->i_security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
 /*
  * Hook list operation macros.
  *
@@ -740,14 +774,40 @@ EXPORT_SYMBOL(security_add_mnt_opt);
 
 int security_inode_alloc(struct inode *inode)
 {
-	inode->i_security = NULL;
-	return call_int_hook(inode_alloc_security, 0, inode);
+	int rc = lsm_inode_alloc(inode);
+
+	if (unlikely(rc))
+		return rc;
+	rc = call_int_hook(inode_alloc_security, 0, inode);
+	if (unlikely(rc))
+		security_inode_free(inode);
+	return rc;
+}
+
+static void inode_free_by_rcu(struct rcu_head *head)
+{
+	/*
+	 * The rcu head is at the start of the inode blob
+	 */
+	kmem_cache_free(lsm_inode_cache, head);
 }
 
 void security_inode_free(struct inode *inode)
 {
 	integrity_inode_free(inode);
 	call_void_hook(inode_free_security, inode);
+	/*
+	 * The inode may still be referenced in a path walk and
+	 * a call to security_inode_permission() can be made
+	 * after inode_free_security() is called. Ideally, the VFS
+	 * wouldn't do this, but fixing that is a much harder
+	 * job. For now, simply free the i_security via RCU, and
+	 * leave the current inode->i_security pointer intact.
+	 * The inode will be freed after the RCU grace period too.
+	 */
+	if (inode->i_security)
+		call_rcu((struct rcu_head *)inode->i_security,
+				inode_free_by_rcu);
 }
 
 int security_dentry_init_security(struct dentry *dentry, int mode,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2d691e8dfbbf..23da46cd6e37 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -145,8 +145,6 @@ static int __init checkreqprot_setup(char *str)
 }
 __setup("checkreqprot=", checkreqprot_setup);
 
-static struct kmem_cache *sel_inode_cache;
-
 /**
  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
  *
@@ -242,13 +240,9 @@ static inline u32 task_sid(const struct task_struct *task)
 
 static int inode_alloc_security(struct inode *inode)
 {
-	struct inode_security_struct *isec;
+	struct inode_security_struct *isec = selinux_inode(inode);
 	u32 sid = current_sid();
 
-	isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS);
-	if (!isec)
-		return -ENOMEM;
-
 	spin_lock_init(&isec->lock);
 	INIT_LIST_HEAD(&isec->list);
 	isec->inode = inode;
@@ -256,7 +250,6 @@ static int inode_alloc_security(struct inode *inode)
 	isec->sclass = SECCLASS_FILE;
 	isec->task_sid = sid;
 	isec->initialized = LABEL_INVALID;
-	inode->i_security = isec;
 
 	return 0;
 }
@@ -334,19 +327,14 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr
 	return selinux_inode(inode);
 }
 
-static void inode_free_rcu(struct rcu_head *head)
-{
-	struct inode_security_struct *isec;
-
-	isec = container_of(head, struct inode_security_struct, rcu);
-	kmem_cache_free(sel_inode_cache, isec);
-}
-
 static void inode_free_security(struct inode *inode)
 {
 	struct inode_security_struct *isec = selinux_inode(inode);
-	struct superblock_security_struct *sbsec = inode->i_sb->s_security;
+	struct superblock_security_struct *sbsec;
 
+	if (!isec)
+		return;
+	sbsec = inode->i_sb->s_security;
 	/*
 	 * As not all inode security structures are in a list, we check for
 	 * empty list outside of the lock to make sure that we won't waste
@@ -362,17 +350,6 @@ static void inode_free_security(struct inode *inode)
 		list_del_init(&isec->list);
 		spin_unlock(&sbsec->isec_lock);
 	}
-
-	/*
-	 * The inode may still be referenced in a path walk and
-	 * a call to selinux_inode_permission() can be made
-	 * after inode_free_security() is called. Ideally, the VFS
-	 * wouldn't do this, but fixing that is a much harder
-	 * job. For now, simply free the i_security via RCU, and
-	 * leave the current inode->i_security pointer intact.
-	 * The inode will be freed after the RCU grace period too.
-	 */
-	call_rcu(&isec->rcu, inode_free_rcu);
 }
 
 static int file_alloc_security(struct file *file)
@@ -6629,6 +6606,7 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
 struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_security_struct),
 	.lbs_file = sizeof(struct file_security_struct),
+	.lbs_inode = sizeof(struct inode_security_struct),
 };
 
 static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
@@ -6881,9 +6859,6 @@ static __init int selinux_init(void)
 
 	default_noexec = !(VM_DATA_DEFAULT_FLAGS & VM_EXEC);
 
-	sel_inode_cache = kmem_cache_create("selinux_inode_security",
-					    sizeof(struct inode_security_struct),
-					    0, SLAB_PANIC, NULL);
 	avc_init();
 
 	avtab_cache_init();
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 26b4ff6b4d81..562fad58c56b 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -57,10 +57,7 @@ enum label_initialized {
 
 struct inode_security_struct {
 	struct inode *inode;	/* back pointer to inode object */
-	union {
-		struct list_head list;	/* list of inode_security_struct */
-		struct rcu_head rcu;	/* for freeing the inode_security_struct */
-	};
+	struct list_head list;	/* list of inode_security_struct */
 	u32 task_sid;		/* SID of creating task */
 	u32 sid;		/* SID of this object */
 	u16 sclass;		/* security class of this object */
@@ -173,7 +170,9 @@ static inline struct file_security_struct *selinux_file(const struct file *file)
 static inline struct inode_security_struct *selinux_inode(
 						const struct inode *inode)
 {
-	return inode->i_security;
+	if (unlikely(!inode->i_security))
+		return NULL;
+	return inode->i_security + selinux_blob_sizes.lbs_inode;
 }
 
 #endif /* _SELINUX_OBJSEC_H_ */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 436231dfae33..bf0abc35ca1c 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -370,7 +370,7 @@ static inline struct smack_known **smack_file(const struct file *file)
 
 static inline struct inode_smack *smack_inode(const struct inode *inode)
 {
-	return inode->i_security;
+	return inode->i_security + smack_blob_sizes.lbs_inode;
 }
 
 /*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index ddffda39d107..804897c82810 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -305,24 +305,18 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip,
 }
 
 /**
- * new_inode_smack - allocate an inode security blob
+ * init_inode_smack - initialize an inode security blob
+ * @isp: the blob to initialize
  * @skp: a pointer to the Smack label entry to use in the blob
  *
- * Returns the new blob or NULL if there's no memory available
  */
-static struct inode_smack *new_inode_smack(struct smack_known *skp)
+static void init_inode_smack(struct inode *inode, struct smack_known *skp)
 {
-	struct inode_smack *isp;
-
-	isp = kmem_cache_zalloc(smack_inode_cache, GFP_NOFS);
-	if (isp == NULL)
-		return NULL;
+	struct inode_smack *isp = smack_inode(inode);
 
 	isp->smk_inode = skp;
 	isp->smk_flags = 0;
 	mutex_init(&isp->smk_lock);
-
-	return isp;
 }
 
 /**
@@ -709,6 +703,13 @@ static int smack_set_mnt_opts(struct super_block *sb,
 	if (sp->smk_flags & SMK_SB_INITIALIZED)
 		return 0;
 
+	if (inode->i_security == NULL) {
+		int rc = lsm_inode_alloc(inode);
+
+		if (rc)
+			return rc;
+	}
+
 	if (!smack_privileged(CAP_MAC_ADMIN)) {
 		/*
 		 * Unprivileged mounts don't get to specify Smack values.
@@ -773,17 +774,12 @@ static int smack_set_mnt_opts(struct super_block *sb,
 	/*
 	 * Initialize the root inode.
 	 */
-	isp = smack_inode(inode);
-	if (isp == NULL) {
-		isp = new_inode_smack(sp->smk_root);
-		if (isp == NULL)
-			return -ENOMEM;
-		inode->i_security = isp;
-	} else
-		isp->smk_inode = sp->smk_root;
+	init_inode_smack(inode, sp->smk_root);
 
-	if (transmute)
+	if (transmute) {
+		isp = smack_inode(inode);
 		isp->smk_flags |= SMK_INODE_TRANSMUTE;
+	}
 
 	return 0;
 }
@@ -881,48 +877,10 @@ static int smack_inode_alloc_security(struct inode *inode)
 {
 	struct smack_known *skp = smk_of_current();
 
-	inode->i_security = new_inode_smack(skp);
-	if (inode->i_security == NULL)
-		return -ENOMEM;
+	init_inode_smack(inode, skp);
 	return 0;
 }
 
-/**
- * smack_inode_free_rcu - Free inode_smack blob from cache
- * @head: the rcu_head for getting inode_smack pointer
- *
- *  Call back function called from call_rcu() to free
- *  the i_security blob pointer in inode
- */
-static void smack_inode_free_rcu(struct rcu_head *head)
-{
-	struct inode_smack *issp;
-
-	issp = container_of(head, struct inode_smack, smk_rcu);
-	kmem_cache_free(smack_inode_cache, issp);
-}
-
-/**
- * smack_inode_free_security - free an inode blob using call_rcu()
- * @inode: the inode with a blob
- *
- * Clears the blob pointer in inode using RCU
- */
-static void smack_inode_free_security(struct inode *inode)
-{
-	struct inode_smack *issp = smack_inode(inode);
-
-	/*
-	 * The inode may still be referenced in a path walk and
-	 * a call to smack_inode_permission() can be made
-	 * after smack_inode_free_security() is called.
-	 * To avoid race condition free the i_security via RCU
-	 * and leave the current inode->i_security pointer intact.
-	 * The inode will be freed after the RCU grace period too.
-	 */
-	call_rcu(&issp->smk_rcu, smack_inode_free_rcu);
-}
-
 /**
  * smack_inode_init_security - copy out the smack from an inode
  * @inode: the newly created inode
@@ -4548,6 +4506,7 @@ static int smack_dentry_create_files_as(struct dentry *dentry, int mode,
 struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_smack),
 	.lbs_file = sizeof(struct smack_known *),
+	.lbs_inode = sizeof(struct inode_smack),
 };
 
 static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
@@ -4565,7 +4524,6 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(bprm_set_creds, smack_bprm_set_creds),
 
 	LSM_HOOK_INIT(inode_alloc_security, smack_inode_alloc_security),
-	LSM_HOOK_INIT(inode_free_security, smack_inode_free_security),
 	LSM_HOOK_INIT(inode_init_security, smack_inode_init_security),
 	LSM_HOOK_INIT(inode_link, smack_inode_link),
 	LSM_HOOK_INIT(inode_unlink, smack_inode_unlink),
-- 
cgit v1.2.3


From f4ad8f2c40769b3cc9497ba0883bbaf823f7752f Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Fri, 21 Sep 2018 17:19:37 -0700
Subject: LSM: Infrastructure management of the task security

Move management of the task_struct->security blob out
of the individual security modules and into the security
infrastructure. Instead of allocating the blobs from within
the modules the modules tell the infrastructure how much
space is required, and the space is allocated there.
The only user of this blob is AppArmor. The AppArmor use
is abstracted to avoid future conflict.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
[kees: adjusted for ordered init series]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h        |  2 ++
 security/apparmor/include/task.h | 18 +++-----------
 security/apparmor/lsm.c          | 15 +++--------
 security/security.c              | 54 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 62 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 1c798e842de2..9b39fefa88c4 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2034,6 +2034,7 @@ struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
 	int	lbs_inode;
+	int	lbs_task;
 };
 
 /*
@@ -2109,6 +2110,7 @@ extern int lsm_inode_alloc(struct inode *inode);
 
 #ifdef CONFIG_SECURITY
 void __init lsm_early_cred(struct cred *cred);
+void __init lsm_early_task(struct task_struct *task);
 #endif
 
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/apparmor/include/task.h b/security/apparmor/include/task.h
index 55edaa1d83f8..039c1e60887a 100644
--- a/security/apparmor/include/task.h
+++ b/security/apparmor/include/task.h
@@ -14,7 +14,10 @@
 #ifndef __AA_TASK_H
 #define __AA_TASK_H
 
-#define task_ctx(X) ((X)->security)
+static inline struct aa_task_ctx *task_ctx(struct task_struct *task)
+{
+	return task->security;
+}
 
 /*
  * struct aa_task_ctx - information for current task label change
@@ -36,17 +39,6 @@ int aa_set_current_hat(struct aa_label *label, u64 token);
 int aa_restore_previous_label(u64 cookie);
 struct aa_label *aa_get_task_label(struct task_struct *task);
 
-/**
- * aa_alloc_task_ctx - allocate a new task_ctx
- * @flags: gfp flags for allocation
- *
- * Returns: allocated buffer or NULL on failure
- */
-static inline struct aa_task_ctx *aa_alloc_task_ctx(gfp_t flags)
-{
-	return kzalloc(sizeof(struct aa_task_ctx), flags);
-}
-
 /**
  * aa_free_task_ctx - free a task_ctx
  * @ctx: task_ctx to free (MAYBE NULL)
@@ -57,8 +49,6 @@ static inline void aa_free_task_ctx(struct aa_task_ctx *ctx)
 		aa_put_label(ctx->nnp);
 		aa_put_label(ctx->previous);
 		aa_put_label(ctx->onexec);
-
-		kzfree(ctx);
 	}
 }
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 6821187b06ad..60ef71268ccf 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -94,19 +94,14 @@ static void apparmor_task_free(struct task_struct *task)
 {
 
 	aa_free_task_ctx(task_ctx(task));
-	task_ctx(task) = NULL;
 }
 
 static int apparmor_task_alloc(struct task_struct *task,
 			       unsigned long clone_flags)
 {
-	struct aa_task_ctx *new = aa_alloc_task_ctx(GFP_KERNEL);
-
-	if (!new)
-		return -ENOMEM;
+	struct aa_task_ctx *new = task_ctx(task);
 
 	aa_dup_task_ctx(new, task_ctx(current));
-	task_ctx(task) = new;
 
 	return 0;
 }
@@ -1157,6 +1152,7 @@ static int apparmor_inet_conn_request(struct sock *sk, struct sk_buff *skb,
 struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct aa_task_ctx *),
 	.lbs_file = sizeof(struct aa_file_ctx),
+	.lbs_task = sizeof(struct aa_task_ctx),
 };
 
 static struct security_hook_list apparmor_hooks[] __lsm_ro_after_init = {
@@ -1487,15 +1483,10 @@ static int param_set_mode(const char *val, const struct kernel_param *kp)
 static int __init set_init_ctx(void)
 {
 	struct cred *cred = (struct cred *)current->real_cred;
-	struct aa_task_ctx *ctx;
-
-	ctx = aa_alloc_task_ctx(GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
 
 	lsm_early_cred(cred);
+	lsm_early_task(current);
 	set_cred_label(cred, aa_get_label(ns_unconfined(root_ns)));
-	task_ctx(current) = ctx;
 
 	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 4989fb65e662..e59a1e1514ee 100644
--- a/security/security.c
+++ b/security/security.c
@@ -169,6 +169,7 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 	if (needed->lbs_inode && blob_sizes.lbs_inode == 0)
 		blob_sizes.lbs_inode = sizeof(struct rcu_head);
 	lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
+	lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
 }
 
 /* Prepare LSM for initialization. */
@@ -292,6 +293,7 @@ static void __init ordered_lsm_init(void)
 	init_debug("cred blob size     = %d\n", blob_sizes.lbs_cred);
 	init_debug("file blob size     = %d\n", blob_sizes.lbs_file);
 	init_debug("inode blob size    = %d\n", blob_sizes.lbs_inode);
+	init_debug("task blob size     = %d\n", blob_sizes.lbs_task);
 
 	/*
 	 * Create any kmem_caches needed for blobs
@@ -515,6 +517,46 @@ int lsm_inode_alloc(struct inode *inode)
 	return 0;
 }
 
+/**
+ * lsm_task_alloc - allocate a composite task blob
+ * @task: the task that needs a blob
+ *
+ * Allocate the task blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+int lsm_task_alloc(struct task_struct *task)
+{
+	if (blob_sizes.lbs_task == 0) {
+		task->security = NULL;
+		return 0;
+	}
+
+	task->security = kzalloc(blob_sizes.lbs_task, GFP_KERNEL);
+	if (task->security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * lsm_early_task - during initialization allocate a composite task blob
+ * @task: the task that needs a blob
+ *
+ * Allocate the task blob for all the modules if it's not already there
+ */
+void __init lsm_early_task(struct task_struct *task)
+{
+	int rc;
+
+	if (task == NULL)
+		panic("%s: task cred.\n", __func__);
+	if (task->security != NULL)
+		return;
+	rc = lsm_task_alloc(task);
+	if (rc)
+		panic("%s: Early task alloc failed.\n", __func__);
+}
+
 /*
  * Hook list operation macros.
  *
@@ -1359,12 +1401,22 @@ int security_file_open(struct file *file)
 
 int security_task_alloc(struct task_struct *task, unsigned long clone_flags)
 {
-	return call_int_hook(task_alloc, 0, task, clone_flags);
+	int rc = lsm_task_alloc(task);
+
+	if (rc)
+		return rc;
+	rc = call_int_hook(task_alloc, 0, task, clone_flags);
+	if (unlikely(rc))
+		security_task_free(task);
+	return rc;
 }
 
 void security_task_free(struct task_struct *task)
 {
 	call_void_hook(task_free, task);
+
+	kfree(task->security);
+	task->security = NULL;
 }
 
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
-- 
cgit v1.2.3


From ecd5f82e05ddd9b06c258167ec7467ac79741d77 Mon Sep 17 00:00:00 2001
From: Casey Schaufler <casey@schaufler-ca.com>
Date: Tue, 20 Nov 2018 11:55:02 -0800
Subject: LSM: Infrastructure management of the ipc security blob

Move management of the kern_ipc_perm->security and
msg_msg->security blobs out of the individual security
modules and into the security infrastructure. Instead
of allocating the blobs from within the modules the modules
tell the infrastructure how much space is required, and
the space is allocated there.

Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
[kees: adjusted for ordered init series]
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/lsm_hooks.h         |  2 +
 security/security.c               | 91 ++++++++++++++++++++++++++++++++++--
 security/selinux/hooks.c          | 98 ++++++---------------------------------
 security/selinux/include/objsec.h |  4 +-
 security/smack/smack.h            |  4 +-
 security/smack/smack_lsm.c        | 32 ++-----------
 6 files changed, 110 insertions(+), 121 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9b39fefa88c4..40511a8a5ae6 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2034,6 +2034,8 @@ struct lsm_blob_sizes {
 	int	lbs_cred;
 	int	lbs_file;
 	int	lbs_inode;
+	int	lbs_ipc;
+	int	lbs_msg_msg;
 	int	lbs_task;
 };
 
diff --git a/security/security.c b/security/security.c
index e59a1e1514ee..953fc3ea18a9 100644
--- a/security/security.c
+++ b/security/security.c
@@ -30,6 +30,7 @@
 #include <linux/personality.h>
 #include <linux/backing-dev.h>
 #include <linux/string.h>
+#include <linux/msg.h>
 #include <net/flow.h>
 
 #define MAX_LSM_EVM_XATTR	2
@@ -169,6 +170,8 @@ static void __init lsm_set_blob_sizes(struct lsm_blob_sizes *needed)
 	if (needed->lbs_inode && blob_sizes.lbs_inode == 0)
 		blob_sizes.lbs_inode = sizeof(struct rcu_head);
 	lsm_set_blob_size(&needed->lbs_inode, &blob_sizes.lbs_inode);
+	lsm_set_blob_size(&needed->lbs_ipc, &blob_sizes.lbs_ipc);
+	lsm_set_blob_size(&needed->lbs_msg_msg, &blob_sizes.lbs_msg_msg);
 	lsm_set_blob_size(&needed->lbs_task, &blob_sizes.lbs_task);
 }
 
@@ -293,6 +296,8 @@ static void __init ordered_lsm_init(void)
 	init_debug("cred blob size     = %d\n", blob_sizes.lbs_cred);
 	init_debug("file blob size     = %d\n", blob_sizes.lbs_file);
 	init_debug("inode blob size    = %d\n", blob_sizes.lbs_inode);
+	init_debug("ipc blob size      = %d\n", blob_sizes.lbs_ipc);
+	init_debug("msg_msg blob size  = %d\n", blob_sizes.lbs_msg_msg);
 	init_debug("task blob size     = %d\n", blob_sizes.lbs_task);
 
 	/*
@@ -538,6 +543,48 @@ int lsm_task_alloc(struct task_struct *task)
 	return 0;
 }
 
+/**
+ * lsm_ipc_alloc - allocate a composite ipc blob
+ * @kip: the ipc that needs a blob
+ *
+ * Allocate the ipc blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+int lsm_ipc_alloc(struct kern_ipc_perm *kip)
+{
+	if (blob_sizes.lbs_ipc == 0) {
+		kip->security = NULL;
+		return 0;
+	}
+
+	kip->security = kzalloc(blob_sizes.lbs_ipc, GFP_KERNEL);
+	if (kip->security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+/**
+ * lsm_msg_msg_alloc - allocate a composite msg_msg blob
+ * @mp: the msg_msg that needs a blob
+ *
+ * Allocate the ipc blob for all the modules
+ *
+ * Returns 0, or -ENOMEM if memory can't be allocated.
+ */
+int lsm_msg_msg_alloc(struct msg_msg *mp)
+{
+	if (blob_sizes.lbs_msg_msg == 0) {
+		mp->security = NULL;
+		return 0;
+	}
+
+	mp->security = kzalloc(blob_sizes.lbs_msg_msg, GFP_KERNEL);
+	if (mp->security == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
 /**
  * lsm_early_task - during initialization allocate a composite task blob
  * @task: the task that needs a blob
@@ -1631,22 +1678,40 @@ void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
 
 int security_msg_msg_alloc(struct msg_msg *msg)
 {
-	return call_int_hook(msg_msg_alloc_security, 0, msg);
+	int rc = lsm_msg_msg_alloc(msg);
+
+	if (unlikely(rc))
+		return rc;
+	rc = call_int_hook(msg_msg_alloc_security, 0, msg);
+	if (unlikely(rc))
+		security_msg_msg_free(msg);
+	return rc;
 }
 
 void security_msg_msg_free(struct msg_msg *msg)
 {
 	call_void_hook(msg_msg_free_security, msg);
+	kfree(msg->security);
+	msg->security = NULL;
 }
 
 int security_msg_queue_alloc(struct kern_ipc_perm *msq)
 {
-	return call_int_hook(msg_queue_alloc_security, 0, msq);
+	int rc = lsm_ipc_alloc(msq);
+
+	if (unlikely(rc))
+		return rc;
+	rc = call_int_hook(msg_queue_alloc_security, 0, msq);
+	if (unlikely(rc))
+		security_msg_queue_free(msq);
+	return rc;
 }
 
 void security_msg_queue_free(struct kern_ipc_perm *msq)
 {
 	call_void_hook(msg_queue_free_security, msq);
+	kfree(msq->security);
+	msq->security = NULL;
 }
 
 int security_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg)
@@ -1673,12 +1738,21 @@ int security_msg_queue_msgrcv(struct kern_ipc_perm *msq, struct msg_msg *msg,
 
 int security_shm_alloc(struct kern_ipc_perm *shp)
 {
-	return call_int_hook(shm_alloc_security, 0, shp);
+	int rc = lsm_ipc_alloc(shp);
+
+	if (unlikely(rc))
+		return rc;
+	rc = call_int_hook(shm_alloc_security, 0, shp);
+	if (unlikely(rc))
+		security_shm_free(shp);
+	return rc;
 }
 
 void security_shm_free(struct kern_ipc_perm *shp)
 {
 	call_void_hook(shm_free_security, shp);
+	kfree(shp->security);
+	shp->security = NULL;
 }
 
 int security_shm_associate(struct kern_ipc_perm *shp, int shmflg)
@@ -1698,12 +1772,21 @@ int security_shm_shmat(struct kern_ipc_perm *shp, char __user *shmaddr, int shmf
 
 int security_sem_alloc(struct kern_ipc_perm *sma)
 {
-	return call_int_hook(sem_alloc_security, 0, sma);
+	int rc = lsm_ipc_alloc(sma);
+
+	if (unlikely(rc))
+		return rc;
+	rc = call_int_hook(sem_alloc_security, 0, sma);
+	if (unlikely(rc))
+		security_sem_free(sma);
+	return rc;
 }
 
 void security_sem_free(struct kern_ipc_perm *sma)
 {
 	call_void_hook(sem_free_security, sma);
+	kfree(sma->security);
+	sma->security = NULL;
 }
 
 int security_sem_associate(struct kern_ipc_perm *sma, int semflg)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 4b64ad31326f..d98e1d8d18f6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5626,51 +5626,22 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
 	return selinux_nlmsg_perm(sk, skb);
 }
 
-static int ipc_alloc_security(struct kern_ipc_perm *perm,
-			      u16 sclass)
+static void ipc_init_security(struct ipc_security_struct *isec, u16 sclass)
 {
-	struct ipc_security_struct *isec;
-
-	isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL);
-	if (!isec)
-		return -ENOMEM;
-
 	isec->sclass = sclass;
 	isec->sid = current_sid();
-	perm->security = isec;
-
-	return 0;
-}
-
-static void ipc_free_security(struct kern_ipc_perm *perm)
-{
-	struct ipc_security_struct *isec = perm->security;
-	perm->security = NULL;
-	kfree(isec);
 }
 
 static int msg_msg_alloc_security(struct msg_msg *msg)
 {
 	struct msg_security_struct *msec;
 
-	msec = kzalloc(sizeof(struct msg_security_struct), GFP_KERNEL);
-	if (!msec)
-		return -ENOMEM;
-
+	msec = selinux_msg_msg(msg);
 	msec->sid = SECINITSID_UNLABELED;
-	msg->security = msec;
 
 	return 0;
 }
 
-static void msg_msg_free_security(struct msg_msg *msg)
-{
-	struct msg_security_struct *msec = msg->security;
-
-	msg->security = NULL;
-	kfree(msec);
-}
-
 static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
 			u32 perms)
 {
@@ -5692,11 +5663,6 @@ static int selinux_msg_msg_alloc_security(struct msg_msg *msg)
 	return msg_msg_alloc_security(msg);
 }
 
-static void selinux_msg_msg_free_security(struct msg_msg *msg)
-{
-	msg_msg_free_security(msg);
-}
-
 /* message queue security operations */
 static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq)
 {
@@ -5705,11 +5671,8 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq)
 	u32 sid = current_sid();
 	int rc;
 
-	rc = ipc_alloc_security(msq, SECCLASS_MSGQ);
-	if (rc)
-		return rc;
-
-	isec = msq->security;
+	isec = selinux_ipc(msq);
+	ipc_init_security(isec, SECCLASS_MSGQ);
 
 	ad.type = LSM_AUDIT_DATA_IPC;
 	ad.u.ipc_id = msq->key;
@@ -5717,16 +5680,7 @@ static int selinux_msg_queue_alloc_security(struct kern_ipc_perm *msq)
 	rc = avc_has_perm(&selinux_state,
 			  sid, isec->sid, SECCLASS_MSGQ,
 			  MSGQ__CREATE, &ad);
-	if (rc) {
-		ipc_free_security(msq);
-		return rc;
-	}
-	return 0;
-}
-
-static void selinux_msg_queue_free_security(struct kern_ipc_perm *msq)
-{
-	ipc_free_security(msq);
+	return rc;
 }
 
 static int selinux_msg_queue_associate(struct kern_ipc_perm *msq, int msqflg)
@@ -5856,11 +5810,8 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp)
 	u32 sid = current_sid();
 	int rc;
 
-	rc = ipc_alloc_security(shp, SECCLASS_SHM);
-	if (rc)
-		return rc;
-
-	isec = shp->security;
+	isec = selinux_ipc(shp);
+	ipc_init_security(isec, SECCLASS_SHM);
 
 	ad.type = LSM_AUDIT_DATA_IPC;
 	ad.u.ipc_id = shp->key;
@@ -5868,16 +5819,7 @@ static int selinux_shm_alloc_security(struct kern_ipc_perm *shp)
 	rc = avc_has_perm(&selinux_state,
 			  sid, isec->sid, SECCLASS_SHM,
 			  SHM__CREATE, &ad);
-	if (rc) {
-		ipc_free_security(shp);
-		return rc;
-	}
-	return 0;
-}
-
-static void selinux_shm_free_security(struct kern_ipc_perm *shp)
-{
-	ipc_free_security(shp);
+	return rc;
 }
 
 static int selinux_shm_associate(struct kern_ipc_perm *shp, int shmflg)
@@ -5953,11 +5895,8 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma)
 	u32 sid = current_sid();
 	int rc;
 
-	rc = ipc_alloc_security(sma, SECCLASS_SEM);
-	if (rc)
-		return rc;
-
-	isec = sma->security;
+	isec = selinux_ipc(sma);
+	ipc_init_security(isec, SECCLASS_SEM);
 
 	ad.type = LSM_AUDIT_DATA_IPC;
 	ad.u.ipc_id = sma->key;
@@ -5965,16 +5904,7 @@ static int selinux_sem_alloc_security(struct kern_ipc_perm *sma)
 	rc = avc_has_perm(&selinux_state,
 			  sid, isec->sid, SECCLASS_SEM,
 			  SEM__CREATE, &ad);
-	if (rc) {
-		ipc_free_security(sma);
-		return rc;
-	}
-	return 0;
-}
-
-static void selinux_sem_free_security(struct kern_ipc_perm *sma)
-{
-	ipc_free_security(sma);
+	return rc;
 }
 
 static int selinux_sem_associate(struct kern_ipc_perm *sma, int semflg)
@@ -6607,6 +6537,8 @@ struct lsm_blob_sizes selinux_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_security_struct),
 	.lbs_file = sizeof(struct file_security_struct),
 	.lbs_inode = sizeof(struct inode_security_struct),
+	.lbs_ipc = sizeof(struct ipc_security_struct),
+	.lbs_msg_msg = sizeof(struct msg_security_struct),
 };
 
 static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
@@ -6718,24 +6650,20 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ipc_getsecid, selinux_ipc_getsecid),
 
 	LSM_HOOK_INIT(msg_msg_alloc_security, selinux_msg_msg_alloc_security),
-	LSM_HOOK_INIT(msg_msg_free_security, selinux_msg_msg_free_security),
 
 	LSM_HOOK_INIT(msg_queue_alloc_security,
 			selinux_msg_queue_alloc_security),
-	LSM_HOOK_INIT(msg_queue_free_security, selinux_msg_queue_free_security),
 	LSM_HOOK_INIT(msg_queue_associate, selinux_msg_queue_associate),
 	LSM_HOOK_INIT(msg_queue_msgctl, selinux_msg_queue_msgctl),
 	LSM_HOOK_INIT(msg_queue_msgsnd, selinux_msg_queue_msgsnd),
 	LSM_HOOK_INIT(msg_queue_msgrcv, selinux_msg_queue_msgrcv),
 
 	LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security),
-	LSM_HOOK_INIT(shm_free_security, selinux_shm_free_security),
 	LSM_HOOK_INIT(shm_associate, selinux_shm_associate),
 	LSM_HOOK_INIT(shm_shmctl, selinux_shm_shmctl),
 	LSM_HOOK_INIT(shm_shmat, selinux_shm_shmat),
 
 	LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security),
-	LSM_HOOK_INIT(sem_free_security, selinux_sem_free_security),
 	LSM_HOOK_INIT(sem_associate, selinux_sem_associate),
 	LSM_HOOK_INIT(sem_semctl, selinux_sem_semctl),
 	LSM_HOOK_INIT(sem_semop, selinux_sem_semop),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 539cacf4a572..231262d8eac9 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -179,13 +179,13 @@ static inline struct inode_security_struct *selinux_inode(
 static inline struct msg_security_struct *selinux_msg_msg(
 						const struct msg_msg *msg_msg)
 {
-	return msg_msg->security;
+	return msg_msg->security + selinux_blob_sizes.lbs_msg_msg;
 }
 
 static inline struct ipc_security_struct *selinux_ipc(
 						const struct kern_ipc_perm *ipc)
 {
-	return ipc->security;
+	return ipc->security + selinux_blob_sizes.lbs_ipc;
 }
 
 #endif /* _SELINUX_OBJSEC_H_ */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 0adddbeecc62..9c7c95a5c497 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -376,12 +376,12 @@ static inline struct inode_smack *smack_inode(const struct inode *inode)
 
 static inline struct smack_known **smack_msg_msg(const struct msg_msg *msg)
 {
-	return (struct smack_known **)&msg->security;
+	return msg->security + smack_blob_sizes.lbs_msg_msg;
 }
 
 static inline struct smack_known **smack_ipc(const struct kern_ipc_perm *ipc)
 {
-	return (struct smack_known **)&ipc->security;
+	return ipc->security + smack_blob_sizes.lbs_ipc;
 }
 
 /*
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 154521b6843b..0b848b1f6366 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2809,23 +2809,12 @@ static int smack_flags_to_may(int flags)
  */
 static int smack_msg_msg_alloc_security(struct msg_msg *msg)
 {
-	struct smack_known *skp = smk_of_current();
+	struct smack_known **blob = smack_msg_msg(msg);
 
-	msg->security = skp;
+	*blob = smk_of_current();
 	return 0;
 }
 
-/**
- * smack_msg_msg_free_security - Clear the security blob for msg_msg
- * @msg: the object
- *
- * Clears the blob pointer
- */
-static void smack_msg_msg_free_security(struct msg_msg *msg)
-{
-	msg->security = NULL;
-}
-
 /**
  * smack_of_ipc - the smack pointer for the ipc
  * @isp: the object
@@ -2853,17 +2842,6 @@ static int smack_ipc_alloc_security(struct kern_ipc_perm *isp)
 	return 0;
 }
 
-/**
- * smack_ipc_free_security - Clear the security blob for ipc
- * @isp: the object
- *
- * Clears the blob pointer
- */
-static void smack_ipc_free_security(struct kern_ipc_perm *isp)
-{
-	isp->security = NULL;
-}
-
 /**
  * smk_curacc_shm : check if current has access on shm
  * @isp : the object
@@ -4511,6 +4489,8 @@ struct lsm_blob_sizes smack_blob_sizes __lsm_ro_after_init = {
 	.lbs_cred = sizeof(struct task_smack),
 	.lbs_file = sizeof(struct smack_known *),
 	.lbs_inode = sizeof(struct inode_smack),
+	.lbs_ipc = sizeof(struct smack_known *),
+	.lbs_msg_msg = sizeof(struct smack_known *),
 };
 
 static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
@@ -4581,23 +4561,19 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ipc_getsecid, smack_ipc_getsecid),
 
 	LSM_HOOK_INIT(msg_msg_alloc_security, smack_msg_msg_alloc_security),
-	LSM_HOOK_INIT(msg_msg_free_security, smack_msg_msg_free_security),
 
 	LSM_HOOK_INIT(msg_queue_alloc_security, smack_ipc_alloc_security),
-	LSM_HOOK_INIT(msg_queue_free_security, smack_ipc_free_security),
 	LSM_HOOK_INIT(msg_queue_associate, smack_msg_queue_associate),
 	LSM_HOOK_INIT(msg_queue_msgctl, smack_msg_queue_msgctl),
 	LSM_HOOK_INIT(msg_queue_msgsnd, smack_msg_queue_msgsnd),
 	LSM_HOOK_INIT(msg_queue_msgrcv, smack_msg_queue_msgrcv),
 
 	LSM_HOOK_INIT(shm_alloc_security, smack_ipc_alloc_security),
-	LSM_HOOK_INIT(shm_free_security, smack_ipc_free_security),
 	LSM_HOOK_INIT(shm_associate, smack_shm_associate),
 	LSM_HOOK_INIT(shm_shmctl, smack_shm_shmctl),
 	LSM_HOOK_INIT(shm_shmat, smack_shm_shmat),
 
 	LSM_HOOK_INIT(sem_alloc_security, smack_ipc_alloc_security),
-	LSM_HOOK_INIT(sem_free_security, smack_ipc_free_security),
 	LSM_HOOK_INIT(sem_associate, smack_sem_associate),
 	LSM_HOOK_INIT(sem_semctl, smack_sem_semctl),
 	LSM_HOOK_INIT(sem_semop, smack_sem_semop),
-- 
cgit v1.2.3


From 96f87ee1811306d0c8cf94b8c37b0e4f725b01d1 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 8 Jan 2019 16:07:23 +0200
Subject: RDMA: Clean structures from CONFIG_INFINIBAND_ON_DEMAND_PAGING

CONFIG_INFINIBAND_ON_DEMAND_PAGING is used in general structures to
micro-optimize the memory footprint. Remove it, so it will allow us to
simplify various ODP device flows.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 --
 include/rdma/ib_verbs.h              | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b06d3b1efea8..93c288a4de2b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -911,7 +911,6 @@ struct mlx5_ib_dev {
 	/* Prevents soft lock on massive reg MRs */
 	struct mutex			slow_path_mutex;
 	int				fill_delay;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	struct ib_odp_caps	odp_caps;
 	u64			odp_max_size;
 	struct mlx5_ib_pf_eq	odp_pf_eq;
@@ -923,7 +922,6 @@ struct mlx5_ib_dev {
 	struct srcu_struct      mr_srcu;
 	u32			null_mkey;
 	struct workqueue_struct *advise_mr_wq;
-#endif
 	struct mlx5_ib_flow_db	*flow_db;
 	/* protect resources needed as part of reset flow */
 	spinlock_t		reset_flow_resource_lock;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a3ceed3a040a..3ddd199ba602 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1504,12 +1504,10 @@ struct ib_ucontext {
 
 	bool cleanup_retryable;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	void (*invalidate_range)(struct ib_umem_odp *umem_odp,
 				 unsigned long start, unsigned long end);
 	struct mutex per_mm_list_lock;
 	struct list_head per_mm_list;
-#endif
 
 	struct ib_rdmacg_object	cg_obj;
 	/*
-- 
cgit v1.2.3


From 13859d5df418ea535926e2b57c29d5161c522b9d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 8 Jan 2019 16:07:26 +0200
Subject: RDMA/mlx5: Embed into the code flow the ODP config option

Convert various places to more readable code, which embeds
CONFIG_INFINIBAND_ON_DEMAND_PAGING into the code flow.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c |  3 ---
 drivers/infiniband/hw/mlx5/main.c    | 41 ++++++++++++++++++------------------
 drivers/infiniband/hw/mlx5/mem.c     |  5 +----
 drivers/infiniband/hw/mlx5/mr.c      | 28 +++++++++++-------------
 include/rdma/ib_umem_odp.h           | 26 +++++++++++------------
 5 files changed, 46 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 549d9eedf62e..d4f1a2ef5015 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -234,14 +234,11 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	ucontext->closing = false;
 	ucontext->cleanup_retryable = false;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	mutex_init(&ucontext->per_mm_list_lock);
 	INIT_LIST_HEAD(&ucontext->per_mm_list);
 	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
 		ucontext->invalidate_range = NULL;
 
-#endif
-
 	resp.num_comp_vectors = file->device->num_comp_vectors;
 
 	ret = get_unused_fd_flags(O_CLOEXEC);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 06ee1f0cb22d..11e9783cefcc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1763,9 +1763,9 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (err)
 		goto out_sys_pages;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
-#endif
+	if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
+		context->ibucontext.invalidate_range =
+			&mlx5_ib_invalidate_range;
 
 	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
 		err = mlx5_ib_devx_create(dev, true);
@@ -1897,12 +1897,10 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 	struct mlx5_bfreg_info *bfregi;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	/* All umem's must be destroyed before destroying the ucontext. */
 	mutex_lock(&ibcontext->per_mm_list_lock);
 	WARN_ON(!list_empty(&ibcontext->per_mm_list));
 	mutex_unlock(&ibcontext->per_mm_list_lock);
-#endif
 
 	bfregi = &context->bfregi;
 	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
@@ -5722,11 +5720,11 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	cleanup_srcu_struct(&dev->mr_srcu);
-	drain_workqueue(dev->advise_mr_wq);
-	destroy_workqueue(dev->advise_mr_wq);
-#endif
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+		cleanup_srcu_struct(&dev->mr_srcu);
+		drain_workqueue(dev->advise_mr_wq);
+		destroy_workqueue(dev->advise_mr_wq);
+	}
 	kfree(dev->port);
 }
 
@@ -5779,19 +5777,20 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 	spin_lock_init(&dev->memic.memic_lock);
 	dev->memic.dev = mdev;
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
-	if (!dev->advise_mr_wq) {
-		err = -ENOMEM;
-		goto err_mp;
-	}
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+		dev->advise_mr_wq =
+			alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
+		if (!dev->advise_mr_wq) {
+			err = -ENOMEM;
+			goto err_mp;
+		}
 
-	err = init_srcu_struct(&dev->mr_srcu);
-	if (err) {
-		destroy_workqueue(dev->advise_mr_wq);
-		goto err_mp;
+		err = init_srcu_struct(&dev->mr_srcu);
+		if (err) {
+			destroy_workqueue(dev->advise_mr_wq);
+			goto err_mp;
+		}
 	}
-#endif
 
 	return 0;
 err_mp:
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 549234988bb4..9f90be296ee0 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -111,7 +111,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 	*count = i;
 }
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
 {
 	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
@@ -123,7 +122,6 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
 
 	return mtt_entry;
 }
-#endif
 
 /*
  * Populate the given array with bus addresses from the umem.
@@ -151,7 +149,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	int len;
 	struct scatterlist *sg;
 	int entry;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
 	if (umem->is_odp) {
 		WARN_ON(shift != 0);
 		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
@@ -164,7 +162,6 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 		}
 		return;
 	}
-#endif
 
 	i = 0;
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index c389750f771e..494a90f4348c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -71,10 +71,9 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	/* Wait until all page fault handlers using the mr complete. */
-	synchronize_srcu(&dev->mr_srcu);
-#endif
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+		/* Wait until all page fault handlers using the mr complete. */
+		synchronize_srcu(&dev->mr_srcu);
 
 	return err;
 }
@@ -254,9 +253,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 	}
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	synchronize_srcu(&dev->mr_srcu);
-#endif
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+		synchronize_srcu(&dev->mr_srcu);
 
 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
 		list_del(&mr->list);
@@ -1329,8 +1327,8 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 		    start, virt_addr, length, access_flags);
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (!start && length == U64_MAX) {
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
+	    length == U64_MAX) {
 		if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
 		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
 			return ERR_PTR(-EINVAL);
@@ -1340,7 +1338,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 			return ERR_CAST(mr);
 		return &mr->ibmr;
 	}
-#endif
 
 	err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
 			   &page_shift, &ncont, &order);
@@ -1401,9 +1398,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		}
 	}
 
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	mr->live = 1;
-#endif
+	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+		mr->live = 1;
+
 	return &mr->ibmr;
 error:
 	ib_umem_release(umem);
@@ -1518,9 +1515,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		}
 
 		mr->allocated_from_cache = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-		mr->live = 1;
-#endif
+		if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+			mr->live = 1;
 	} else {
 		/*
 		 * Send a UMR WQE
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 0b1446fe2fab..d3725cf13ecd 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -83,6 +83,19 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
 	return container_of(umem, struct ib_umem_odp, umem);
 }
 
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
 struct ib_ucontext_per_mm {
@@ -107,19 +120,6 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
 				      unsigned long addr, size_t size);
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
-/*
- * The lower 2 bits of the DMA address signal the R/W permissions for
- * the entry. To upgrade the permissions, provide the appropriate
- * bitmask to the map_dma_pages function.
- *
- * Be aware that upgrading a mapped address might result in change of
- * the DMA address for the page.
- */
-#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
-#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
-
-#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
-
 int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
 			      u64 bcnt, u64 access_mask,
 			      unsigned long current_seq);
-- 
cgit v1.2.3


From 0ada768517dafa1504ef5986ba04f118b7436960 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 8 Jan 2019 16:07:27 +0200
Subject: RDMA/mlx5: Delete declaration of already removed function

The implementation of mlx5_core_page_fault_resume() was removed in commit
d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to
RDMA"). This patch removes declaration too.

Fixes: d5d284b829a6 ("{net,IB}/mlx5: Move Page fault EQ and ODP logic to RDMA")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/mlx5/driver.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 54299251d40d..b6f5839f129a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -939,10 +939,6 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
 			struct mlx5_odp_caps *odp_caps);
 int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev,
 			     u8 port_num, void *out, size_t sz);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
-				u32 wq_num, u8 type, int error);
-#endif
 
 int mlx5_init_rl_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From f3186dd876697e696d07136623d5cf0a6fb0bc0f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 7 Jan 2019 16:51:50 +0100
Subject: spi: Optionally use GPIO descriptors for CS GPIOs

This augments the SPI core to optionally use GPIO descriptors
for chip select on a per-master-driver opt-in basis.

Drivers using this will rely on the SPI core to look up
GPIO descriptors associated with the device, such as
when using device tree or board files with GPIO descriptor
tables.

When getting descriptors from the device tree, this will in
turn activate the code in gpiolib that was
added in commit 6953c57ab172
("gpio: of: Handle SPI chipselect legacy bindings")
which means that these descriptors are aware of the active
low semantics that is the default for SPI CS GPIO lines
and we can assume that all of these are "active high" and
thus assign SPI_CS_HIGH to all CS lines on the DT path.

The previously used gpio_set_value() would call down into
gpiod_set_raw_value() and ignore the polarity inversion
semantics.

It seems like many drivers go to great lengths to set up the
CS GPIO line as non-asserted, respecting SPI_CS_HIGH. We pull
this out of the SPI drivers and into the core, and by simply
requesting the line as GPIOD_OUT_LOW when retrieveing it from
the device and relying on the gpiolib to handle any inversion
semantics. This way a lot of code can be simplified and
removed in each converted driver.

The end goal after dealing with each driver in turn, is to
delete the non-descriptor path (of_spi_register_master() for
example) and let the core deal with only descriptors.

The different SPI drivers have complex interactions with the
core so we cannot simply change them all over, we need to use
a stepwise, bisectable approach so that each driver can be
converted and fixed in isolation.

This patch has the intended side effect of adding support for
ACPI GPIOs as it starts relying on gpiod_get_*() to get
the GPIO handle associated with the device.

Cc: Linuxarm <linuxarm@huawei.com>
Acked-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Tested-by: Fangjian (Turing) <f.fangjian@huawei.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 104 +++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/spi/spi.h |  23 +++++++++--
 2 files changed, 113 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 9a7def7c3237..13f447a67d67 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -19,6 +19,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
 #include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/property.h>
@@ -578,7 +579,10 @@ int spi_add_device(struct spi_device *spi)
 		goto done;
 	}
 
-	if (ctlr->cs_gpios)
+	/* Descriptors take precedence */
+	if (ctlr->cs_gpiods)
+		spi->cs_gpiod = ctlr->cs_gpiods[spi->chip_select];
+	else if (ctlr->cs_gpios)
 		spi->cs_gpio = ctlr->cs_gpios[spi->chip_select];
 
 	/* Drivers may modify this initial i/o setup, but will
@@ -772,10 +776,20 @@ static void spi_set_cs(struct spi_device *spi, bool enable)
 	if (spi->mode & SPI_CS_HIGH)
 		enable = !enable;
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		/* Honour the SPI_NO_CS flag */
-		if (!(spi->mode & SPI_NO_CS))
-			gpio_set_value(spi->cs_gpio, !enable);
+	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
+		/*
+		 * Honour the SPI_NO_CS flag and invert the enable line, as
+		 * active low is default for SPI. Execution paths that handle
+		 * polarity inversion in gpiolib (such as device tree) will
+		 * enforce active high using the SPI_CS_HIGH resulting in a
+		 * double inversion through the code above.
+		 */
+		if (!(spi->mode & SPI_NO_CS)) {
+			if (spi->cs_gpiod)
+				gpiod_set_value(spi->cs_gpiod, !enable);
+			else
+				gpio_set_value(spi->cs_gpio, !enable);
+		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
 		    spi->controller->set_cs)
@@ -1615,13 +1629,21 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi,
 		spi->mode |= SPI_CPHA;
 	if (of_property_read_bool(nc, "spi-cpol"))
 		spi->mode |= SPI_CPOL;
-	if (of_property_read_bool(nc, "spi-cs-high"))
-		spi->mode |= SPI_CS_HIGH;
 	if (of_property_read_bool(nc, "spi-3wire"))
 		spi->mode |= SPI_3WIRE;
 	if (of_property_read_bool(nc, "spi-lsb-first"))
 		spi->mode |= SPI_LSB_FIRST;
 
+	/*
+	 * For descriptors associated with the device, polarity inversion is
+	 * handled in the gpiolib, so all chip selects are "active high" in
+	 * the logical sense, the gpiolib will invert the line if need be.
+	 */
+	if (ctlr->use_gpio_descriptors)
+		spi->mode |= SPI_CS_HIGH;
+	else if (of_property_read_bool(nc, "spi-cs-high"))
+		spi->mode |= SPI_CS_HIGH;
+
 	/* Device DUAL/QUAD mode */
 	if (!of_property_read_u32(nc, "spi-tx-bus-width", &value)) {
 		switch (value) {
@@ -2137,6 +2159,60 @@ static int of_spi_register_master(struct spi_controller *ctlr)
 }
 #endif
 
+/**
+ * spi_get_gpio_descs() - grab chip select GPIOs for the master
+ * @ctlr: The SPI master to grab GPIO descriptors for
+ */
+static int spi_get_gpio_descs(struct spi_controller *ctlr)
+{
+	int nb, i;
+	struct gpio_desc **cs;
+	struct device *dev = &ctlr->dev;
+
+	nb = gpiod_count(dev, "cs");
+	ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
+
+	/* No GPIOs at all is fine, else return the error */
+	if (nb == 0 || nb == -ENOENT)
+		return 0;
+	else if (nb < 0)
+		return nb;
+
+	cs = devm_kcalloc(dev, ctlr->num_chipselect, sizeof(*cs),
+			  GFP_KERNEL);
+	if (!cs)
+		return -ENOMEM;
+	ctlr->cs_gpiods = cs;
+
+	for (i = 0; i < nb; i++) {
+		/*
+		 * Most chipselects are active low, the inverted
+		 * semantics are handled by special quirks in gpiolib,
+		 * so initializing them GPIOD_OUT_LOW here means
+		 * "unasserted", in most cases this will drive the physical
+		 * line high.
+		 */
+		cs[i] = devm_gpiod_get_index_optional(dev, "cs", i,
+						      GPIOD_OUT_LOW);
+
+		if (cs[i]) {
+			/*
+			 * If we find a CS GPIO, name it after the device and
+			 * chip select line.
+			 */
+			char *gpioname;
+
+			gpioname = devm_kasprintf(dev, GFP_KERNEL, "%s CS%d",
+						  dev_name(dev), i);
+			if (!gpioname)
+				return -ENOMEM;
+			gpiod_set_consumer_name(cs[i], gpioname);
+		}
+	}
+
+	return 0;
+}
+
 static int spi_controller_check_ops(struct spi_controller *ctlr)
 {
 	/*
@@ -2199,9 +2275,16 @@ int spi_register_controller(struct spi_controller *ctlr)
 		return status;
 
 	if (!spi_controller_is_slave(ctlr)) {
-		status = of_spi_register_master(ctlr);
-		if (status)
-			return status;
+		if (ctlr->use_gpio_descriptors) {
+			status = spi_get_gpio_descs(ctlr);
+			if (status)
+				return status;
+		} else {
+			/* Legacy code path for GPIOs from DT */
+			status = of_spi_register_master(ctlr);
+			if (status)
+				return status;
+		}
 	}
 
 	/* even if it's just one always-selected device, there must
@@ -2915,6 +2998,7 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 * cs_change is set for each transfer.
 	 */
 	if ((spi->mode & SPI_CS_WORD) && (!(ctlr->mode_bits & SPI_CS_WORD) ||
+					  spi->cs_gpiod ||
 					  gpio_is_valid(spi->cs_gpio))) {
 		size_t maxsize;
 		int ret;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 314d922ca607..916bba47d156 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -12,6 +12,7 @@
 #include <linux/kthread.h>
 #include <linux/completion.h>
 #include <linux/scatterlist.h>
+#include <linux/gpio/consumer.h>
 
 struct dma_chan;
 struct property_entry;
@@ -116,7 +117,10 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  * @modalias: Name of the driver to use with this device, or an alias
  *	for that name.  This appears in the sysfs "modalias" attribute
  *	for driver coldplugging, and in uevents used for hotplugging
- * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when
+ * @cs_gpio: LEGACY: gpio number of the chipselect line (optional, -ENOENT when
+ *	not using a GPIO line) use cs_gpiod in new drivers by opting in on
+ *	the spi_master.
+ * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
  *	not using a GPIO line)
  *
  * @statistics: statistics for the spi_device
@@ -163,7 +167,8 @@ struct spi_device {
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
 	const char		*driver_override;
-	int			cs_gpio;	/* chip select gpio */
+	int			cs_gpio;	/* LEGACY: chip select gpio */
+	struct gpio_desc	*cs_gpiod;	/* chip select gpio desc */
 
 	/* the statistics */
 	struct spi_statistics	statistics;
@@ -376,9 +381,17 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	     controller has native support for memory like operations.
  * @unprepare_message: undo any work done by prepare_message().
  * @slave_abort: abort the ongoing transfer request on an SPI slave controller
- * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS
- *	number. Any individual value may be -ENOENT for CS lines that
+ * @cs_gpios: LEGACY: array of GPIO descs to use as chip select lines; one per
+ *	CS number. Any individual value may be -ENOENT for CS lines that
+ *	are not GPIOs (driven by the SPI controller itself). Use the cs_gpiods
+ *	in new drivers.
+ * @cs_gpiods: Array of GPIO descs to use as chip select lines; one per CS
+ *	number. Any individual value may be NULL for CS lines that
  *	are not GPIOs (driven by the SPI controller itself).
+ * @use_gpio_descriptors: Turns on the code in the SPI core to parse and grab
+ *	GPIO descriptors rather than using global GPIO numbers grabbed by the
+ *	driver. This will fill in @cs_gpiods and @cs_gpios should not be used,
+ *	and SPI devices will have the cs_gpiod assigned rather than cs_gpio.
  * @statistics: statistics for the spi_controller
  * @dma_tx: DMA transmit channel
  * @dma_rx: DMA receive channel
@@ -557,6 +570,8 @@ struct spi_controller {
 
 	/* gpio chip select */
 	int			*cs_gpios;
+	struct gpio_desc	**cs_gpiods;
+	bool			use_gpio_descriptors;
 
 	/* statistics */
 	struct spi_statistics	statistics;
-- 
cgit v1.2.3


From c6275e4b12bfb53097bca852dd49c15b1af50cb4 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Fri, 7 Dec 2018 19:21:10 +0100
Subject: soc: bcm2835: sync firmware properties with downstream

Add latest firmware property tags from the latest Raspberry Pi downstream
kernel. This is needed to use the reboot notify in the following
commit.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 include/soc/bcm2835/raspberrypi-firmware.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/soc/bcm2835/raspberrypi-firmware.h b/include/soc/bcm2835/raspberrypi-firmware.h
index 4be1aa4435ae..7800e12ee042 100644
--- a/include/soc/bcm2835/raspberrypi-firmware.h
+++ b/include/soc/bcm2835/raspberrypi-firmware.h
@@ -73,6 +73,8 @@ enum rpi_firmware_property_tag {
 	RPI_FIRMWARE_GET_CUSTOMER_OTP =                       0x00030021,
 	RPI_FIRMWARE_GET_DOMAIN_STATE =                       0x00030030,
 	RPI_FIRMWARE_GET_THROTTLED =                          0x00030046,
+	RPI_FIRMWARE_GET_CLOCK_MEASURED =                     0x00030047,
+	RPI_FIRMWARE_NOTIFY_REBOOT =                          0x00030048,
 	RPI_FIRMWARE_SET_CLOCK_STATE =                        0x00038001,
 	RPI_FIRMWARE_SET_CLOCK_RATE =                         0x00038002,
 	RPI_FIRMWARE_SET_VOLTAGE =                            0x00038003,
@@ -86,6 +88,8 @@ enum rpi_firmware_property_tag {
 	RPI_FIRMWARE_SET_GPIO_CONFIG =                        0x00038043,
 	RPI_FIRMWARE_GET_PERIPH_REG =                         0x00030045,
 	RPI_FIRMWARE_SET_PERIPH_REG =                         0x00038045,
+	RPI_FIRMWARE_GET_POE_HAT_VAL =                        0x00030049,
+	RPI_FIRMWARE_SET_POE_HAT_VAL =                        0x00030050,
 
 
 	/* Dispmanx TAGS */
-- 
cgit v1.2.3


From 5e6acc3e678ed3db746ab4fb53a980861cd711b6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 12 Dec 2018 15:51:47 -0800
Subject: bcm2835-pm: Move bcm2835-watchdog's DT probe to an MFD.

The PM block that the wdt driver was binding to actually has multiple
features we want to expose (power domains, reset, watchdog).  Move the
DT attachment to a MFD driver and make WDT probe against MFD.

Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
---
 arch/arm/mach-bcm/Kconfig      |  1 +
 drivers/mfd/Makefile           |  1 +
 drivers/mfd/bcm2835-pm.c       | 64 ++++++++++++++++++++++++++++++++++++++++++
 drivers/watchdog/bcm2835_wdt.c | 26 ++++++-----------
 include/linux/mfd/bcm2835-pm.h | 13 +++++++++
 5 files changed, 88 insertions(+), 17 deletions(-)
 create mode 100644 drivers/mfd/bcm2835-pm.c
 create mode 100644 include/linux/mfd/bcm2835-pm.h

(limited to 'include')

diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index a067adf9f1ee..4ef1e55f4a0b 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -167,6 +167,7 @@ config ARCH_BCM2835
 	select BCM2835_TIMER
 	select PINCTRL
 	select PINCTRL_BCM2835
+	select MFD_CORE
 	help
 	  This enables support for the Broadcom BCM2835 and BCM2836 SoCs.
 	  This SoC is used in the Raspberry Pi and Roku 2 devices.
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 12980a4ad460..ee6fb6af655e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_MFD_88PM805)	+= 88pm805.o 88pm80x.o
 obj-$(CONFIG_MFD_ACT8945A)	+= act8945a.o
 obj-$(CONFIG_MFD_SM501)		+= sm501.o
 obj-$(CONFIG_MFD_ASIC3)		+= asic3.o tmio_core.o
+obj-$(CONFIG_ARCH_BCM2835)	+= bcm2835-pm.o
 obj-$(CONFIG_MFD_BCM590XX)	+= bcm590xx.o
 obj-$(CONFIG_MFD_BD9571MWV)	+= bd9571mwv.o
 cros_ec_core-objs		:= cros_ec.o
diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c
new file mode 100644
index 000000000000..53839e6a81e7
--- /dev/null
+++ b/drivers/mfd/bcm2835-pm.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PM MFD driver for Broadcom BCM2835
+ *
+ * This driver binds to the PM block and creates the MFD device for
+ * the WDT driver.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/bcm2835-pm.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+static const struct mfd_cell bcm2835_pm_devs[] = {
+	{ .name = "bcm2835-wdt" },
+};
+
+static int bcm2835_pm_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+	struct bcm2835_pm *pm;
+
+	pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL);
+	if (!pm)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, pm);
+
+	pm->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pm->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pm->base))
+		return PTR_ERR(pm->base);
+
+	return devm_mfd_add_devices(dev, -1,
+				    bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs),
+				    NULL, 0, NULL);
+}
+
+static const struct of_device_id bcm2835_pm_of_match[] = {
+	{ .compatible = "brcm,bcm2835-pm-wdt", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match);
+
+static struct platform_driver bcm2835_pm_driver = {
+	.probe		= bcm2835_pm_probe,
+	.driver = {
+		.name =	"bcm2835-pm",
+		.of_match_table = bcm2835_pm_of_match,
+	},
+};
+module_platform_driver(bcm2835_pm_driver);
+
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM MFD");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/bcm2835_wdt.c b/drivers/watchdog/bcm2835_wdt.c
index ed05514cc2dc..1834524ae373 100644
--- a/drivers/watchdog/bcm2835_wdt.c
+++ b/drivers/watchdog/bcm2835_wdt.c
@@ -12,6 +12,7 @@
 
 #include <linux/delay.h>
 #include <linux/types.h>
+#include <linux/mfd/bcm2835-pm.h>
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/watchdog.h>
@@ -47,6 +48,8 @@ struct bcm2835_wdt {
 	spinlock_t		lock;
 };
 
+static struct bcm2835_wdt *bcm2835_power_off_wdt;
+
 static unsigned int heartbeat;
 static bool nowayout = WATCHDOG_NOWAYOUT;
 
@@ -148,10 +151,7 @@ static struct watchdog_device bcm2835_wdt_wdd = {
  */
 static void bcm2835_power_off(void)
 {
-	struct device_node *np =
-		of_find_compatible_node(NULL, NULL, "brcm,bcm2835-pm-wdt");
-	struct platform_device *pdev = of_find_device_by_node(np);
-	struct bcm2835_wdt *wdt = platform_get_drvdata(pdev);
+	struct bcm2835_wdt *wdt = bcm2835_power_off_wdt;
 	u32 val;
 
 	/*
@@ -169,7 +169,7 @@ static void bcm2835_power_off(void)
 
 static int bcm2835_wdt_probe(struct platform_device *pdev)
 {
-	struct resource *res;
+	struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent);
 	struct device *dev = &pdev->dev;
 	struct bcm2835_wdt *wdt;
 	int err;
@@ -181,10 +181,7 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
 
 	spin_lock_init(&wdt->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	wdt->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(wdt->base))
-		return PTR_ERR(wdt->base);
+	wdt->base = pm->base;
 
 	watchdog_set_drvdata(&bcm2835_wdt_wdd, wdt);
 	watchdog_init_timeout(&bcm2835_wdt_wdd, heartbeat, dev);
@@ -211,8 +208,10 @@ static int bcm2835_wdt_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	if (pm_power_off == NULL)
+	if (pm_power_off == NULL) {
 		pm_power_off = bcm2835_power_off;
+		bcm2835_power_off_wdt = wdt;
+	}
 
 	dev_info(dev, "Broadcom BCM2835 watchdog timer");
 	return 0;
@@ -226,18 +225,11 @@ static int bcm2835_wdt_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id bcm2835_wdt_of_match[] = {
-	{ .compatible = "brcm,bcm2835-pm-wdt", },
-	{},
-};
-MODULE_DEVICE_TABLE(of, bcm2835_wdt_of_match);
-
 static struct platform_driver bcm2835_wdt_driver = {
 	.probe		= bcm2835_wdt_probe,
 	.remove		= bcm2835_wdt_remove,
 	.driver = {
 		.name =		"bcm2835-wdt",
-		.of_match_table = bcm2835_wdt_of_match,
 	},
 };
 module_platform_driver(bcm2835_wdt_driver);
diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h
new file mode 100644
index 000000000000..b7d0ee1feffa
--- /dev/null
+++ b/include/linux/mfd/bcm2835-pm.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef BCM2835_MFD_PM_H
+#define BCM2835_MFD_PM_H
+
+#include <linux/regmap.h>
+
+struct bcm2835_pm {
+	struct device *dev;
+	void __iomem *base;
+};
+
+#endif /* BCM2835_MFD_PM_H */
-- 
cgit v1.2.3


From 670c672608a1ffcbc7ac0f872734843593bb8b15 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 12 Dec 2018 15:51:48 -0800
Subject: soc: bcm: bcm2835-pm: Add support for power domains under a new
 binding.

This provides a free software alternative to raspberrypi-power.c's
firmware calls to manage power domains.  It also exposes a reset line,
where previously the vc4 driver had to try to force power off the
domain in order to trigger a reset.

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
---
 drivers/mfd/bcm2835-pm.c             |  36 +-
 drivers/soc/bcm/Kconfig              |  11 +
 drivers/soc/bcm/Makefile             |   1 +
 drivers/soc/bcm/bcm2835-power.c      | 661 +++++++++++++++++++++++++++++++++++
 include/dt-bindings/soc/bcm2835-pm.h |  28 ++
 include/linux/mfd/bcm2835-pm.h       |   1 +
 6 files changed, 734 insertions(+), 4 deletions(-)
 create mode 100644 drivers/soc/bcm/bcm2835-power.c
 create mode 100644 include/dt-bindings/soc/bcm2835-pm.h

(limited to 'include')

diff --git a/drivers/mfd/bcm2835-pm.c b/drivers/mfd/bcm2835-pm.c
index 53839e6a81e7..42fe67f1538e 100644
--- a/drivers/mfd/bcm2835-pm.c
+++ b/drivers/mfd/bcm2835-pm.c
@@ -3,7 +3,7 @@
  * PM MFD driver for Broadcom BCM2835
  *
  * This driver binds to the PM block and creates the MFD device for
- * the WDT driver.
+ * the WDT and power drivers.
  */
 
 #include <linux/delay.h>
@@ -21,11 +21,16 @@ static const struct mfd_cell bcm2835_pm_devs[] = {
 	{ .name = "bcm2835-wdt" },
 };
 
+static const struct mfd_cell bcm2835_power_devs[] = {
+	{ .name = "bcm2835-power" },
+};
+
 static int bcm2835_pm_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct device *dev = &pdev->dev;
 	struct bcm2835_pm *pm;
+	int ret;
 
 	pm = devm_kzalloc(dev, sizeof(*pm), GFP_KERNEL);
 	if (!pm)
@@ -39,13 +44,36 @@ static int bcm2835_pm_probe(struct platform_device *pdev)
 	if (IS_ERR(pm->base))
 		return PTR_ERR(pm->base);
 
-	return devm_mfd_add_devices(dev, -1,
-				    bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs),
-				    NULL, 0, NULL);
+	ret = devm_mfd_add_devices(dev, -1,
+				   bcm2835_pm_devs, ARRAY_SIZE(bcm2835_pm_devs),
+				   NULL, 0, NULL);
+	if (ret)
+		return ret;
+
+	/* We'll use the presence of the AXI ASB regs in the
+	 * bcm2835-pm binding as the key for whether we can reference
+	 * the full PM register range and support power domains.
+	 */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (res) {
+		pm->asb = devm_ioremap_resource(dev, res);
+		if (IS_ERR(pm->asb))
+			return PTR_ERR(pm->asb);
+
+		ret = devm_mfd_add_devices(dev, -1,
+					   bcm2835_power_devs,
+					   ARRAY_SIZE(bcm2835_power_devs),
+					   NULL, 0, NULL);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static const struct of_device_id bcm2835_pm_of_match[] = {
 	{ .compatible = "brcm,bcm2835-pm-wdt", },
+	{ .compatible = "brcm,bcm2835-pm", },
 	{},
 };
 MODULE_DEVICE_TABLE(of, bcm2835_pm_of_match);
diff --git a/drivers/soc/bcm/Kconfig b/drivers/soc/bcm/Kconfig
index 055a845ed979..fe1af29560e9 100644
--- a/drivers/soc/bcm/Kconfig
+++ b/drivers/soc/bcm/Kconfig
@@ -1,5 +1,16 @@
 menu "Broadcom SoC drivers"
 
+config BCM2835_POWER
+	bool "BCM2835 power domain driver"
+	depends on ARCH_BCM2835 || (COMPILE_TEST && OF)
+	select PM_GENERIC_DOMAINS if PM
+	select RESET_CONTROLLER
+	help
+	  This enables support for the BCM2835 power domains and reset
+	  controller.  Any usage of power domains by the Raspberry Pi
+	  firmware means that Linux usage of the same power domain
+	  must be accessed using the RASPBERRYPI_POWER driver
+
 config RASPBERRYPI_POWER
 	bool "Raspberry Pi power domain driver"
 	depends on ARCH_BCM2835 || (COMPILE_TEST && OF)
diff --git a/drivers/soc/bcm/Makefile b/drivers/soc/bcm/Makefile
index dc4fced72d21..c81df4b2403c 100644
--- a/drivers/soc/bcm/Makefile
+++ b/drivers/soc/bcm/Makefile
@@ -1,2 +1,3 @@
+obj-$(CONFIG_BCM2835_POWER)	+= bcm2835-power.o
 obj-$(CONFIG_RASPBERRYPI_POWER)	+= raspberrypi-power.o
 obj-$(CONFIG_SOC_BRCMSTB)	+= brcmstb/
diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c
new file mode 100644
index 000000000000..48412957ec7a
--- /dev/null
+++ b/drivers/soc/bcm/bcm2835-power.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Power domain driver for Broadcom BCM2835
+ *
+ * Copyright (C) 2018 Broadcom
+ */
+
+#include <dt-bindings/soc/bcm2835-pm.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/bcm2835-pm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/reset-controller.h>
+#include <linux/types.h>
+
+#define PM_GNRIC                        0x00
+#define PM_AUDIO                        0x04
+#define PM_STATUS                       0x18
+#define PM_RSTC				0x1c
+#define PM_RSTS				0x20
+#define PM_WDOG				0x24
+#define PM_PADS0			0x28
+#define PM_PADS2			0x2c
+#define PM_PADS3			0x30
+#define PM_PADS4			0x34
+#define PM_PADS5			0x38
+#define PM_PADS6			0x3c
+#define PM_CAM0				0x44
+#define PM_CAM0_LDOHPEN			BIT(2)
+#define PM_CAM0_LDOLPEN			BIT(1)
+#define PM_CAM0_CTRLEN			BIT(0)
+
+#define PM_CAM1				0x48
+#define PM_CAM1_LDOHPEN			BIT(2)
+#define PM_CAM1_LDOLPEN			BIT(1)
+#define PM_CAM1_CTRLEN			BIT(0)
+
+#define PM_CCP2TX			0x4c
+#define PM_CCP2TX_LDOEN			BIT(1)
+#define PM_CCP2TX_CTRLEN		BIT(0)
+
+#define PM_DSI0				0x50
+#define PM_DSI0_LDOHPEN			BIT(2)
+#define PM_DSI0_LDOLPEN			BIT(1)
+#define PM_DSI0_CTRLEN			BIT(0)
+
+#define PM_DSI1				0x54
+#define PM_DSI1_LDOHPEN			BIT(2)
+#define PM_DSI1_LDOLPEN			BIT(1)
+#define PM_DSI1_CTRLEN			BIT(0)
+
+#define PM_HDMI				0x58
+#define PM_HDMI_RSTDR			BIT(19)
+#define PM_HDMI_LDOPD			BIT(1)
+#define PM_HDMI_CTRLEN			BIT(0)
+
+#define PM_USB				0x5c
+/* The power gates must be enabled with this bit before enabling the LDO in the
+ * USB block.
+ */
+#define PM_USB_CTRLEN			BIT(0)
+
+#define PM_PXLDO			0x60
+#define PM_PXBG				0x64
+#define PM_DFT				0x68
+#define PM_SMPS				0x6c
+#define PM_XOSC				0x70
+#define PM_SPAREW			0x74
+#define PM_SPARER			0x78
+#define PM_AVS_RSTDR			0x7c
+#define PM_AVS_STAT			0x80
+#define PM_AVS_EVENT			0x84
+#define PM_AVS_INTEN			0x88
+#define PM_DUMMY			0xfc
+
+#define PM_IMAGE			0x108
+#define PM_GRAFX			0x10c
+#define PM_PROC				0x110
+#define PM_ENAB				BIT(12)
+#define PM_ISPRSTN			BIT(8)
+#define PM_H264RSTN			BIT(7)
+#define PM_PERIRSTN			BIT(6)
+#define PM_V3DRSTN			BIT(6)
+#define PM_ISFUNC			BIT(5)
+#define PM_MRDONE			BIT(4)
+#define PM_MEMREP			BIT(3)
+#define PM_ISPOW			BIT(2)
+#define PM_POWOK			BIT(1)
+#define PM_POWUP			BIT(0)
+#define PM_INRUSH_SHIFT			13
+#define PM_INRUSH_3_5_MA		0
+#define PM_INRUSH_5_MA			1
+#define PM_INRUSH_10_MA			2
+#define PM_INRUSH_20_MA			3
+#define PM_INRUSH_MASK			(3 << PM_INRUSH_SHIFT)
+
+#define PM_PASSWORD			0x5a000000
+
+#define PM_WDOG_TIME_SET		0x000fffff
+#define PM_RSTC_WRCFG_CLR		0xffffffcf
+#define PM_RSTS_HADWRH_SET		0x00000040
+#define PM_RSTC_WRCFG_SET		0x00000030
+#define PM_RSTC_WRCFG_FULL_RESET	0x00000020
+#define PM_RSTC_RESET			0x00000102
+
+#define PM_READ(reg) readl(power->base + (reg))
+#define PM_WRITE(reg, val) writel(PM_PASSWORD | (val), power->base + (reg))
+
+#define ASB_BRDG_VERSION                0x00
+#define ASB_CPR_CTRL                    0x04
+
+#define ASB_V3D_S_CTRL			0x08
+#define ASB_V3D_M_CTRL			0x0c
+#define ASB_ISP_S_CTRL			0x10
+#define ASB_ISP_M_CTRL			0x14
+#define ASB_H264_S_CTRL			0x18
+#define ASB_H264_M_CTRL			0x1c
+
+#define ASB_REQ_STOP                    BIT(0)
+#define ASB_ACK                         BIT(1)
+#define ASB_EMPTY                       BIT(2)
+#define ASB_FULL                        BIT(3)
+
+#define ASB_AXI_BRDG_ID			0x20
+
+#define ASB_READ(reg) readl(power->asb + (reg))
+#define ASB_WRITE(reg, val) writel(PM_PASSWORD | (val), power->asb + (reg))
+
+struct bcm2835_power_domain {
+	struct generic_pm_domain base;
+	struct bcm2835_power *power;
+	u32 domain;
+	struct clk *clk;
+};
+
+struct bcm2835_power {
+	struct device		*dev;
+	/* PM registers. */
+	void __iomem		*base;
+	/* AXI Async bridge registers. */
+	void __iomem		*asb;
+
+	struct genpd_onecell_data pd_xlate;
+	struct bcm2835_power_domain domains[BCM2835_POWER_DOMAIN_COUNT];
+	struct reset_controller_dev reset;
+};
+
+static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg)
+{
+	u64 start = ktime_get_ns();
+
+	/* Enable the module's async AXI bridges. */
+	ASB_WRITE(reg, ASB_READ(reg) & ~ASB_REQ_STOP);
+	while (ASB_READ(reg) & ASB_ACK) {
+		cpu_relax();
+		if (ktime_get_ns() - start >= 1000)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int bcm2835_asb_disable(struct bcm2835_power *power, u32 reg)
+{
+	u64 start = ktime_get_ns();
+
+	/* Enable the module's async AXI bridges. */
+	ASB_WRITE(reg, ASB_READ(reg) | ASB_REQ_STOP);
+	while (!(ASB_READ(reg) & ASB_ACK)) {
+		cpu_relax();
+		if (ktime_get_ns() - start >= 1000)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int bcm2835_power_power_off(struct bcm2835_power_domain *pd, u32 pm_reg)
+{
+	struct bcm2835_power *power = pd->power;
+
+	/* Enable functional isolation */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISFUNC);
+
+	/* Enable electrical isolation */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW);
+
+	/* Open the power switches. */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_POWUP);
+
+	return 0;
+}
+
+static int bcm2835_power_power_on(struct bcm2835_power_domain *pd, u32 pm_reg)
+{
+	struct bcm2835_power *power = pd->power;
+	struct device *dev = power->dev;
+	u64 start;
+	int ret;
+	int inrush;
+	bool powok;
+
+	/* If it was already powered on by the fw, leave it that way. */
+	if (PM_READ(pm_reg) & PM_POWUP)
+		return 0;
+
+	/* Enable power.  Allowing too much current at once may result
+	 * in POWOK never getting set, so start low and ramp it up as
+	 * necessary to succeed.
+	 */
+	powok = false;
+	for (inrush = PM_INRUSH_3_5_MA; inrush <= PM_INRUSH_20_MA; inrush++) {
+		PM_WRITE(pm_reg,
+			 (PM_READ(pm_reg) & ~PM_INRUSH_MASK) |
+			 (inrush << PM_INRUSH_SHIFT) |
+			 PM_POWUP);
+
+		start = ktime_get_ns();
+		while (!(powok = !!(PM_READ(pm_reg) & PM_POWOK))) {
+			cpu_relax();
+			if (ktime_get_ns() - start >= 3000)
+				break;
+		}
+	}
+	if (!powok) {
+		dev_err(dev, "Timeout waiting for %s power OK\n",
+			pd->base.name);
+		ret = -ETIMEDOUT;
+		goto err_disable_powup;
+	}
+
+	/* Disable electrical isolation */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISPOW);
+
+	/* Repair memory */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_MEMREP);
+	start = ktime_get_ns();
+	while (!(PM_READ(pm_reg) & PM_MRDONE)) {
+		cpu_relax();
+		if (ktime_get_ns() - start >= 1000) {
+			dev_err(dev, "Timeout waiting for %s memory repair\n",
+				pd->base.name);
+			ret = -ETIMEDOUT;
+			goto err_disable_ispow;
+		}
+	}
+
+	/* Disable functional isolation */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) | PM_ISFUNC);
+
+	return 0;
+
+err_disable_ispow:
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~PM_ISPOW);
+err_disable_powup:
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~(PM_POWUP | PM_INRUSH_MASK));
+	return ret;
+}
+
+static int bcm2835_asb_power_on(struct bcm2835_power_domain *pd,
+				u32 pm_reg,
+				u32 asb_m_reg,
+				u32 asb_s_reg,
+				u32 reset_flags)
+{
+	struct bcm2835_power *power = pd->power;
+	int ret;
+
+	ret = clk_prepare_enable(pd->clk);
+	if (ret) {
+		dev_err(power->dev, "Failed to enable clock for %s\n",
+			pd->base.name);
+		return ret;
+	}
+
+	/* Wait 32 clocks for reset to propagate, 1 us will be enough */
+	udelay(1);
+
+	clk_disable_unprepare(pd->clk);
+
+	/* Deassert the resets. */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) | reset_flags);
+
+	ret = clk_prepare_enable(pd->clk);
+	if (ret) {
+		dev_err(power->dev, "Failed to enable clock for %s\n",
+			pd->base.name);
+		goto err_enable_resets;
+	}
+
+	ret = bcm2835_asb_enable(power, asb_m_reg);
+	if (ret) {
+		dev_err(power->dev, "Failed to enable ASB master for %s\n",
+			pd->base.name);
+		goto err_disable_clk;
+	}
+	ret = bcm2835_asb_enable(power, asb_s_reg);
+	if (ret) {
+		dev_err(power->dev, "Failed to enable ASB slave for %s\n",
+			pd->base.name);
+		goto err_disable_asb_master;
+	}
+
+	return 0;
+
+err_disable_asb_master:
+	bcm2835_asb_disable(power, asb_m_reg);
+err_disable_clk:
+	clk_disable_unprepare(pd->clk);
+err_enable_resets:
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags);
+	return ret;
+}
+
+static int bcm2835_asb_power_off(struct bcm2835_power_domain *pd,
+				 u32 pm_reg,
+				 u32 asb_m_reg,
+				 u32 asb_s_reg,
+				 u32 reset_flags)
+{
+	struct bcm2835_power *power = pd->power;
+	int ret;
+
+	ret = bcm2835_asb_disable(power, asb_s_reg);
+	if (ret) {
+		dev_warn(power->dev, "Failed to disable ASB slave for %s\n",
+			 pd->base.name);
+		return ret;
+	}
+	ret = bcm2835_asb_disable(power, asb_m_reg);
+	if (ret) {
+		dev_warn(power->dev, "Failed to disable ASB master for %s\n",
+			 pd->base.name);
+		bcm2835_asb_enable(power, asb_s_reg);
+		return ret;
+	}
+
+	clk_disable_unprepare(pd->clk);
+
+	/* Assert the resets. */
+	PM_WRITE(pm_reg, PM_READ(pm_reg) & ~reset_flags);
+
+	return 0;
+}
+
+static int bcm2835_power_pd_power_on(struct generic_pm_domain *domain)
+{
+	struct bcm2835_power_domain *pd =
+		container_of(domain, struct bcm2835_power_domain, base);
+	struct bcm2835_power *power = pd->power;
+
+	switch (pd->domain) {
+	case BCM2835_POWER_DOMAIN_GRAFX:
+		return bcm2835_power_power_on(pd, PM_GRAFX);
+
+	case BCM2835_POWER_DOMAIN_GRAFX_V3D:
+		return bcm2835_asb_power_on(pd, PM_GRAFX,
+					    ASB_V3D_M_CTRL, ASB_V3D_S_CTRL,
+					    PM_V3DRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE:
+		return bcm2835_power_power_on(pd, PM_IMAGE);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_PERI:
+		return bcm2835_asb_power_on(pd, PM_IMAGE,
+					    0, 0,
+					    PM_PERIRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_ISP:
+		return bcm2835_asb_power_on(pd, PM_IMAGE,
+					    ASB_ISP_M_CTRL, ASB_ISP_S_CTRL,
+					    PM_ISPRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_H264:
+		return bcm2835_asb_power_on(pd, PM_IMAGE,
+					    ASB_H264_M_CTRL, ASB_H264_S_CTRL,
+					    PM_H264RSTN);
+
+	case BCM2835_POWER_DOMAIN_USB:
+		PM_WRITE(PM_USB, PM_USB_CTRLEN);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_DSI0:
+		PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN);
+		PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN | PM_DSI0_LDOHPEN);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_DSI1:
+		PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN);
+		PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN | PM_DSI1_LDOHPEN);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_CCP2TX:
+		PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN);
+		PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN | PM_CCP2TX_LDOEN);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_HDMI:
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_RSTDR);
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_CTRLEN);
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_LDOPD);
+		usleep_range(100, 200);
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_RSTDR);
+		return 0;
+
+	default:
+		dev_err(power->dev, "Invalid domain %d\n", pd->domain);
+		return -EINVAL;
+	}
+}
+
+static int bcm2835_power_pd_power_off(struct generic_pm_domain *domain)
+{
+	struct bcm2835_power_domain *pd =
+		container_of(domain, struct bcm2835_power_domain, base);
+	struct bcm2835_power *power = pd->power;
+
+	switch (pd->domain) {
+	case BCM2835_POWER_DOMAIN_GRAFX:
+		return bcm2835_power_power_off(pd, PM_GRAFX);
+
+	case BCM2835_POWER_DOMAIN_GRAFX_V3D:
+		return bcm2835_asb_power_off(pd, PM_GRAFX,
+					     ASB_V3D_M_CTRL, ASB_V3D_S_CTRL,
+					     PM_V3DRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE:
+		return bcm2835_power_power_off(pd, PM_IMAGE);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_PERI:
+		return bcm2835_asb_power_off(pd, PM_IMAGE,
+					     0, 0,
+					     PM_PERIRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_ISP:
+		return bcm2835_asb_power_off(pd, PM_IMAGE,
+					     ASB_ISP_M_CTRL, ASB_ISP_S_CTRL,
+					     PM_ISPRSTN);
+
+	case BCM2835_POWER_DOMAIN_IMAGE_H264:
+		return bcm2835_asb_power_off(pd, PM_IMAGE,
+					     ASB_H264_M_CTRL, ASB_H264_S_CTRL,
+					     PM_H264RSTN);
+
+	case BCM2835_POWER_DOMAIN_USB:
+		PM_WRITE(PM_USB, 0);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_DSI0:
+		PM_WRITE(PM_DSI0, PM_DSI0_CTRLEN);
+		PM_WRITE(PM_DSI0, 0);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_DSI1:
+		PM_WRITE(PM_DSI1, PM_DSI1_CTRLEN);
+		PM_WRITE(PM_DSI1, 0);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_CCP2TX:
+		PM_WRITE(PM_CCP2TX, PM_CCP2TX_CTRLEN);
+		PM_WRITE(PM_CCP2TX, 0);
+		return 0;
+
+	case BCM2835_POWER_DOMAIN_HDMI:
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) | PM_HDMI_LDOPD);
+		PM_WRITE(PM_HDMI, PM_READ(PM_HDMI) & ~PM_HDMI_CTRLEN);
+		return 0;
+
+	default:
+		dev_err(power->dev, "Invalid domain %d\n", pd->domain);
+		return -EINVAL;
+	}
+}
+
+static void
+bcm2835_init_power_domain(struct bcm2835_power *power,
+			  int pd_xlate_index, const char *name)
+{
+	struct device *dev = power->dev;
+	struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index];
+
+	dom->clk = devm_clk_get(dev->parent, name);
+
+	dom->base.name = name;
+	dom->base.power_on = bcm2835_power_pd_power_on;
+	dom->base.power_off = bcm2835_power_pd_power_off;
+
+	dom->domain = pd_xlate_index;
+	dom->power = power;
+
+	/* XXX: on/off at boot? */
+	pm_genpd_init(&dom->base, NULL, true);
+
+	power->pd_xlate.domains[pd_xlate_index] = &dom->base;
+}
+
+/** bcm2835_reset_reset - Resets a block that has a reset line in the
+ * PM block.
+ *
+ * The consumer of the reset controller must have the power domain up
+ * -- there's no reset ability with the power domain down.  To reset
+ * the sub-block, we just disable its access to memory through the
+ * ASB, reset, and re-enable.
+ */
+static int bcm2835_reset_reset(struct reset_controller_dev *rcdev,
+			       unsigned long id)
+{
+	struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power,
+						   reset);
+	struct bcm2835_power_domain *pd;
+	int ret;
+
+	switch (id) {
+	case BCM2835_RESET_V3D:
+		pd = &power->domains[BCM2835_POWER_DOMAIN_GRAFX_V3D];
+		break;
+	case BCM2835_RESET_H264:
+		pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_H264];
+		break;
+	case BCM2835_RESET_ISP:
+		pd = &power->domains[BCM2835_POWER_DOMAIN_IMAGE_ISP];
+		break;
+	default:
+		dev_err(power->dev, "Bad reset id %ld\n", id);
+		return -EINVAL;
+	}
+
+	ret = bcm2835_power_pd_power_off(&pd->base);
+	if (ret)
+		return ret;
+
+	return bcm2835_power_pd_power_on(&pd->base);
+}
+
+static int bcm2835_reset_status(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	struct bcm2835_power *power = container_of(rcdev, struct bcm2835_power,
+						   reset);
+
+	switch (id) {
+	case BCM2835_RESET_V3D:
+		return !PM_READ(PM_GRAFX & PM_V3DRSTN);
+	case BCM2835_RESET_H264:
+		return !PM_READ(PM_IMAGE & PM_H264RSTN);
+	case BCM2835_RESET_ISP:
+		return !PM_READ(PM_IMAGE & PM_ISPRSTN);
+	default:
+		return -EINVAL;
+	}
+}
+
+const struct reset_control_ops bcm2835_reset_ops = {
+	.reset = bcm2835_reset_reset,
+	.status = bcm2835_reset_status,
+};
+
+static const char *const power_domain_names[] = {
+	[BCM2835_POWER_DOMAIN_GRAFX] = "grafx",
+	[BCM2835_POWER_DOMAIN_GRAFX_V3D] = "v3d",
+
+	[BCM2835_POWER_DOMAIN_IMAGE] = "image",
+	[BCM2835_POWER_DOMAIN_IMAGE_PERI] = "peri_image",
+	[BCM2835_POWER_DOMAIN_IMAGE_H264] = "h264",
+	[BCM2835_POWER_DOMAIN_IMAGE_ISP] = "isp",
+
+	[BCM2835_POWER_DOMAIN_USB] = "usb",
+	[BCM2835_POWER_DOMAIN_DSI0] = "dsi0",
+	[BCM2835_POWER_DOMAIN_DSI1] = "dsi1",
+	[BCM2835_POWER_DOMAIN_CAM0] = "cam0",
+	[BCM2835_POWER_DOMAIN_CAM1] = "cam1",
+	[BCM2835_POWER_DOMAIN_CCP2TX] = "ccp2tx",
+	[BCM2835_POWER_DOMAIN_HDMI] = "hdmi",
+};
+
+static int bcm2835_power_probe(struct platform_device *pdev)
+{
+	struct bcm2835_pm *pm = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	struct bcm2835_power *power;
+	static const struct {
+		int parent, child;
+	} domain_deps[] = {
+		{ BCM2835_POWER_DOMAIN_GRAFX, BCM2835_POWER_DOMAIN_GRAFX_V3D },
+		{ BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_PERI },
+		{ BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_H264 },
+		{ BCM2835_POWER_DOMAIN_IMAGE, BCM2835_POWER_DOMAIN_IMAGE_ISP },
+		{ BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_USB },
+		{ BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM0 },
+		{ BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM1 },
+	};
+	int ret, i;
+	u32 id;
+
+	power = devm_kzalloc(dev, sizeof(*power), GFP_KERNEL);
+	if (!power)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, power);
+
+	power->dev = dev;
+	power->base = pm->base;
+	power->asb = pm->asb;
+
+	id = ASB_READ(ASB_AXI_BRDG_ID);
+	if (id != 0x62726467 /* "BRDG" */) {
+		dev_err(dev, "ASB register ID returned 0x%08x\n", id);
+		return -ENODEV;
+	}
+
+	power->pd_xlate.domains = devm_kcalloc(dev,
+					       ARRAY_SIZE(power_domain_names),
+					       sizeof(*power->pd_xlate.domains),
+					       GFP_KERNEL);
+	if (!power->pd_xlate.domains)
+		return -ENOMEM;
+
+	power->pd_xlate.num_domains = ARRAY_SIZE(power_domain_names);
+
+	for (i = 0; i < ARRAY_SIZE(power_domain_names); i++)
+		bcm2835_init_power_domain(power, i, power_domain_names[i]);
+
+	for (i = 0; i < ARRAY_SIZE(domain_deps); i++) {
+		pm_genpd_add_subdomain(&power->domains[domain_deps[i].parent].base,
+				       &power->domains[domain_deps[i].child].base);
+	}
+
+	power->reset.owner = THIS_MODULE;
+	power->reset.nr_resets = BCM2835_RESET_COUNT;
+	power->reset.ops = &bcm2835_reset_ops;
+	power->reset.of_node = dev->parent->of_node;
+
+	ret = devm_reset_controller_register(dev, &power->reset);
+	if (ret)
+		return ret;
+
+	of_genpd_add_provider_onecell(dev->parent->of_node, &power->pd_xlate);
+
+	dev_info(dev, "Broadcom BCM2835 power domains driver");
+	return 0;
+}
+
+static int bcm2835_power_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct platform_driver bcm2835_power_driver = {
+	.probe		= bcm2835_power_probe,
+	.remove		= bcm2835_power_remove,
+	.driver = {
+		.name =	"bcm2835-power",
+	},
+};
+module_platform_driver(bcm2835_power_driver);
+
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_DESCRIPTION("Driver for Broadcom BCM2835 PM power domains and reset");
+MODULE_LICENSE("GPL");
diff --git a/include/dt-bindings/soc/bcm2835-pm.h b/include/dt-bindings/soc/bcm2835-pm.h
new file mode 100644
index 000000000000..153d75b8d99f
--- /dev/null
+++ b/include/dt-bindings/soc/bcm2835-pm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR MIT) */
+
+#ifndef _DT_BINDINGS_ARM_BCM2835_PM_H
+#define _DT_BINDINGS_ARM_BCM2835_PM_H
+
+#define BCM2835_POWER_DOMAIN_GRAFX		0
+#define BCM2835_POWER_DOMAIN_GRAFX_V3D		1
+#define BCM2835_POWER_DOMAIN_IMAGE		2
+#define BCM2835_POWER_DOMAIN_IMAGE_PERI		3
+#define BCM2835_POWER_DOMAIN_IMAGE_ISP		4
+#define BCM2835_POWER_DOMAIN_IMAGE_H264		5
+#define BCM2835_POWER_DOMAIN_USB		6
+#define BCM2835_POWER_DOMAIN_DSI0		7
+#define BCM2835_POWER_DOMAIN_DSI1		8
+#define BCM2835_POWER_DOMAIN_CAM0		9
+#define BCM2835_POWER_DOMAIN_CAM1		10
+#define BCM2835_POWER_DOMAIN_CCP2TX		11
+#define BCM2835_POWER_DOMAIN_HDMI		12
+
+#define BCM2835_POWER_DOMAIN_COUNT		13
+
+#define BCM2835_RESET_V3D			0
+#define BCM2835_RESET_ISP			1
+#define BCM2835_RESET_H264			2
+
+#define BCM2835_RESET_COUNT			3
+
+#endif /* _DT_BINDINGS_ARM_BCM2835_PM_H */
diff --git a/include/linux/mfd/bcm2835-pm.h b/include/linux/mfd/bcm2835-pm.h
index b7d0ee1feffa..ed37dc40e82a 100644
--- a/include/linux/mfd/bcm2835-pm.h
+++ b/include/linux/mfd/bcm2835-pm.h
@@ -8,6 +8,7 @@
 struct bcm2835_pm {
 	struct device *dev;
 	void __iomem *base;
+	void __iomem *asb;
 };
 
 #endif /* BCM2835_MFD_PM_H */
-- 
cgit v1.2.3


From 03c87b95ac04c2a34045641b25dded6e3e889556 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 9 Jan 2019 18:44:00 +0100
Subject: regulator: provide rdev_get_regmap()

Provide a helper allowing to access regulator's regmap.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         | 6 ++++++
 include/linux/regulator/driver.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 3f9d81b6e763..430a73dea487 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -5251,6 +5251,12 @@ struct device *rdev_get_dev(struct regulator_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(rdev_get_dev);
 
+struct regmap *rdev_get_regmap(struct regulator_dev *rdev)
+{
+	return rdev->regmap;
+}
+EXPORT_SYMBOL_GPL(rdev_get_regmap);
+
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data)
 {
 	return reg_init_data->driver_data;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 389bcaf7900f..795b38a06b6c 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -503,6 +503,7 @@ int regulator_notifier_call_chain(struct regulator_dev *rdev,
 
 void *rdev_get_drvdata(struct regulator_dev *rdev);
 struct device *rdev_get_dev(struct regulator_dev *rdev);
+struct regmap *rdev_get_regmap(struct regulator_dev *rdev);
 int rdev_get_id(struct regulator_dev *rdev);
 
 int regulator_mode_to_status(unsigned int);
-- 
cgit v1.2.3


From 412e60373245fd1dfae8d4d44c5d1406b3d90971 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Tue, 8 Jan 2019 12:13:45 +0000
Subject: spi: core: avoid waking pump thread from spi_sync instead run
 teardown delayed

When spi_sync is running alone with no other spi devices connected
to the bus the worker thread is woken during spi_finalize_current_message
to run the teardown code every time.

This is totally unnecessary in the case that there is no message queued.

On a multi-core system this results in one wakeup of the thread for each
spi_message processed via spi_sync where in most cases the teardown does
not happen as the hw is already in use.

This patch now delays the teardown by 1 second by using a separate
kthread_delayed_work for the teardown.

This avoids waking the kthread too often.

For spi_sync transfers in a tight loop (say 40k messages/s) this
avoids the penalty of waking the worker thread 40k times/s.
On a rasperry pi 3 with 4 cores the results in 32% of a single core
only to find out that there is nothing in the queue and it can go back
to sleep.

With this patch applied the spi-worker is woken exactly once: after
the load finishes and the spi bus is idle for 1 second.

I believe I have also seen situations where during a spi_sync loop
the worker thread (triggered by the last message finished) is slightly
faster and _wins_ the race to process the message, so we are actually
running the kthread and letting it do some work...

This is also no longer observed with this patch applied as.

Tested with a new CAN controller driver for the mcp2517fd which
uses spi_sync for interrupt handling and spi_async for scheduling
of can frames for transmission (in a different thread)

Some statistics when receiving 100000 CAN frames with the mcp25xxfd driver
on a Raspberry pi 3:

without the patch:
------------------
root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done
(spi0)                    5
(irq/94-mcp25xxf)         0
root@raspcm3:~# vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
 1  0      0 821960  13592  50848    0    0    80     2 1986  105  1  2 97  0  0
 0  0      0 821968  13592  50876    0    0     0     0 8046   30  0  0 100  0  0
 0  0      0 821936  13592  50876    0    0     0     0 8032   24  0  0 100  0  0
 0  0      0 821936  13592  50876    0    0     0     0 8035   30  0  0 100  0  0
 0  0      0 821936  13592  50876    0    0     0     0 8033   22  0  0 100  0  0
 2  0      0 821936  13592  50876    0    0     0     0 11598 7129  0  3 97  0  0
 1  0      0 821872  13592  50876    0    0     0     0 37741 59003  0 31 69  0  0
 2  0      0 821840  13592  50876    0    0     0     0 37762 59078  0 29 71  0  0
 2  0      0 821776  13592  50876    0    0     0     0 37593 58792  0 28 72  0  0
 1  0      0 821744  13592  50876    0    0     0     0 37642 58881  0 30 70  0  0
 2  0      0 821680  13592  50876    0    0     0     0 37490 58602  0 27 73  0  0
 1  0      0 821648  13592  50876    0    0     0     0 37412 58418  0 29 71  0  0
 1  0      0 821584  13592  50876    0    0     0     0 37337 58288  0 27 73  0  0
 1  0      0 821552  13592  50876    0    0     0     0 37584 58774  0 27 73  0  0
 0  0      0 821520  13592  50876    0    0     0     0 18363 20566  0  9 91  0  0
 0  0      0 821520  13592  50876    0    0     0     0 8037   32  0  0 100  0  0
 0  0      0 821520  13592  50876    0    0     0     0 8031   23  0  0 100  0  0
 0  0      0 821520  13592  50876    0    0     0     0 8034   26  0  0 100  0  0
 0  0      0 821520  13592  50876    0    0     0     0 8033   24  0  0 100  0  0
^C
root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done
(spi0)                  228
(irq/94-mcp25xxf)       794
root@raspcm3:~# cat /proc/interrupts
           CPU0       CPU1       CPU2       CPU3
 17:         34          0          0          0  ARMCTRL-level   1 Edge      3f00b880.mailbox
 27:          1          0          0          0  ARMCTRL-level  35 Edge      timer
 33:    1416870          0          0          0  ARMCTRL-level  41 Edge      3f980000.usb, dwc2_hsotg:usb1
 34:          1          0          0          0  ARMCTRL-level  42 Edge      vc4
 35:          0          0          0          0  ARMCTRL-level  43 Edge      3f004000.txp
 40:       1753          0          0          0  ARMCTRL-level  48 Edge      DMA IRQ
 42:         11          0          0          0  ARMCTRL-level  50 Edge      DMA IRQ
 44:         11          0          0          0  ARMCTRL-level  52 Edge      DMA IRQ
 45:          0          0          0          0  ARMCTRL-level  53 Edge      DMA IRQ
 66:          0          0          0          0  ARMCTRL-level  74 Edge      vc4 crtc
 69:          0          0          0          0  ARMCTRL-level  77 Edge      vc4 crtc
 70:          0          0          0          0  ARMCTRL-level  78 Edge      vc4 crtc
 77:         20          0          0          0  ARMCTRL-level  85 Edge      3f205000.i2c, 3f804000.i2c, 3f805000.i2c
 78:       6346          0          0          0  ARMCTRL-level  86 Edge      3f204000.spi
 80:        205          0          0          0  ARMCTRL-level  88 Edge      mmc0
 81:        493          0          0          0  ARMCTRL-level  89 Edge      uart-pl011
 89:          0          0          0          0  bcm2836-timer   0 Edge      arch_timer
 90:       4291       3821       2180       1649  bcm2836-timer   1 Edge      arch_timer
 94:      14289          0          0          0  pinctrl-bcm2835  16 Level     mcp25xxfd
IPI0:          0          0          0          0  CPU wakeup interrupts
IPI1:          0          0          0          0  Timer broadcast interrupts
IPI2:       3645     242371       7919       1328  Rescheduling interrupts
IPI3:        112        543        273        194  Function call interrupts
IPI4:          0          0          0          0  CPU stop interrupts
IPI5:          1          0          0          0  IRQ work interrupts
IPI6:          0          0          0          0  completion interrupts
Err:          0

top shows 93% for the mcp25xxfd interrupt handler, 31% for spi0.

with the patch:
---------------
root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done
(spi0)                    0
(irq/94-mcp25xxf)         0
root@raspcm3:~# vmstat 1
procs -----------memory---------- ---swap-- -----io---- -system-- ------cpu-----
 0  0      0 804768  13584  62628    0    0     0     0 8038   24  0  0 100  0  0
 0  0      0 804768  13584  62628    0    0     0     0 8042   25  0  0 100  0  0
 1  0      0 804704  13584  62628    0    0     0     0 9603 2967  0 20 80  0  0
 1  0      0 804672  13584  62628    0    0     0     0 9828 3380  0 24 76  0  0
 1  0      0 804608  13584  62628    0    0     0     0 9823 3375  0 23 77  0  0
 1  0      0 804608  13584  62628    0    0     0    12 9829 3394  0 23 77  0  0
 1  0      0 804544  13584  62628    0    0     0     0 9816 3362  0 22 78  0  0
 1  0      0 804512  13584  62628    0    0     0     0 9817 3367  0 23 77  0  0
 1  0      0 804448  13584  62628    0    0     0     0 9822 3370  0 22 78  0  0
 1  0      0 804416  13584  62628    0    0     0     0 9815 3367  0 23 77  0  0
 0  0      0 804352  13584  62628    0    0     0    84 9222 2250  0 14 86  0  0
 0  0      0 804352  13592  62620    0    0     0    24 8131  209  0  0 93  7  0
 0  0      0 804320  13592  62628    0    0     0     0 8041   27  0  0 100  0  0
 0  0      0 804352  13592  62628    0    0     0     0 8040   26  0  0 100  0  0
root@raspcm3:~# for x in $(pgrep spi0) $(pgrep irq/94-mcp25xxf) ; do awk '{printf "%-20s %6i\n", $2,$15}' /proc/$x/stat; done
(spi0)                    0
(irq/94-mcp25xxf)       767
root@raspcm3:~# cat /proc/interrupts
           CPU0       CPU1       CPU2       CPU3
 17:         29          0          0          0  ARMCTRL-level   1 Edge      3f00b880.mailbox
 27:          1          0          0          0  ARMCTRL-level  35 Edge      timer
 33:    1024412          0          0          0  ARMCTRL-level  41 Edge      3f980000.usb, dwc2_hsotg:usb1
 34:          1          0          0          0  ARMCTRL-level  42 Edge      vc4
 35:          0          0          0          0  ARMCTRL-level  43 Edge      3f004000.txp
 40:       1773          0          0          0  ARMCTRL-level  48 Edge      DMA IRQ
 42:         11          0          0          0  ARMCTRL-level  50 Edge      DMA IRQ
 44:         11          0          0          0  ARMCTRL-level  52 Edge      DMA IRQ
 45:          0          0          0          0  ARMCTRL-level  53 Edge      DMA IRQ
 66:          0          0          0          0  ARMCTRL-level  74 Edge      vc4 crtc
 69:          0          0          0          0  ARMCTRL-level  77 Edge      vc4 crtc
 70:          0          0          0          0  ARMCTRL-level  78 Edge      vc4 crtc
 77:         20          0          0          0  ARMCTRL-level  85 Edge      3f205000.i2c, 3f804000.i2c, 3f805000.i2c
 78:       6417          0          0          0  ARMCTRL-level  86 Edge      3f204000.spi
 80:        237          0          0          0  ARMCTRL-level  88 Edge      mmc0
 81:        489          0          0          0  ARMCTRL-level  89 Edge      uart-pl011
 89:          0          0          0          0  bcm2836-timer   0 Edge      arch_timer
 90:       4048       3704       2383       1892  bcm2836-timer   1 Edge      arch_timer
 94:      14287          0          0          0  pinctrl-bcm2835  16 Level     mcp25xxfd
IPI0:          0          0          0          0  CPU wakeup interrupts
IPI1:          0          0          0          0  Timer broadcast interrupts
IPI2:       2361       2948       7890       1616  Rescheduling interrupts
IPI3:         65        617        301        166  Function call interrupts
IPI4:          0          0          0          0  CPU stop interrupts
IPI5:          1          0          0          0  IRQ work interrupts
IPI6:          0          0          0          0  completion interrupts
Err:          0
top shows 91% for the mcp25xxfd interrupt handler, 0% for spi0

So we see that spi0 is no longer getting scheduled wasting CPU cycles
There are a lot less context switches and corresponding Rescheduling interrupts
All of these show that this improves efficiency of the system and reduces
CPU utilization.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 122 +++++++++++++++++++++++++++++++++++-------------
 include/linux/spi/spi.h |   2 +
 2 files changed, 91 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 13f447a67d67..06b9139664a3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1225,7 +1225,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 		return;
 	}
 
-	/* If another context is idling the device then defer */
+	/* If another context is idling the device then defer to kthread */
 	if (ctlr->idling) {
 		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
@@ -1239,34 +1239,10 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 			return;
 		}
 
-		/* Only do teardown in the thread */
-		if (!in_kthread) {
-			kthread_queue_work(&ctlr->kworker,
-					   &ctlr->pump_messages);
-			spin_unlock_irqrestore(&ctlr->queue_lock, flags);
-			return;
-		}
-
-		ctlr->busy = false;
-		ctlr->idling = true;
-		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
-
-		kfree(ctlr->dummy_rx);
-		ctlr->dummy_rx = NULL;
-		kfree(ctlr->dummy_tx);
-		ctlr->dummy_tx = NULL;
-		if (ctlr->unprepare_transfer_hardware &&
-		    ctlr->unprepare_transfer_hardware(ctlr))
-			dev_err(&ctlr->dev,
-				"failed to unprepare transfer hardware\n");
-		if (ctlr->auto_runtime_pm) {
-			pm_runtime_mark_last_busy(ctlr->dev.parent);
-			pm_runtime_put_autosuspend(ctlr->dev.parent);
-		}
-		trace_spi_controller_idle(ctlr);
-
-		spin_lock_irqsave(&ctlr->queue_lock, flags);
-		ctlr->idling = false;
+		/* schedule idle teardown with a delay of 1 second */
+		kthread_mod_delayed_work(&ctlr->kworker,
+					 &ctlr->pump_idle_teardown,
+					 HZ);
 		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
@@ -1359,6 +1335,77 @@ static void spi_pump_messages(struct kthread_work *work)
 	__spi_pump_messages(ctlr, true);
 }
 
+/**
+ * spi_pump_idle_teardown - kthread delayed work function which tears down
+ *                          the controller settings after some delay
+ * @work: pointer to kthread work struct contained in the controller struct
+ */
+static void spi_pump_idle_teardown(struct kthread_work *work)
+{
+	struct spi_controller *ctlr =
+		container_of(work, struct spi_controller,
+			     pump_idle_teardown.work);
+	unsigned long flags;
+
+	/* Lock queue */
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
+
+	/* Make sure we are not already running a message */
+	if (ctlr->cur_msg)
+		goto out;
+
+	/* if there is anything in the list then exit */
+	if (!list_empty(&ctlr->queue))
+		goto out;
+
+	/* if the controller is running then exit */
+	if (ctlr->running)
+		goto out;
+
+	/* if the controller is busy then exit */
+	if (ctlr->busy)
+		goto out;
+
+	/* if the controller is idling then exit
+	 * this is actually a bit strange and would indicate that
+	 * this function is scheduled twice, which should not happen
+	 */
+	if (ctlr->idling)
+		goto out;
+
+	/* set up the initial states */
+	ctlr->busy = false;
+	ctlr->idling = true;
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
+
+	/* free dummy receive buffers */
+	kfree(ctlr->dummy_rx);
+	ctlr->dummy_rx = NULL;
+	kfree(ctlr->dummy_tx);
+	ctlr->dummy_tx = NULL;
+
+	/* unprepare hardware */
+	if (ctlr->unprepare_transfer_hardware &&
+	    ctlr->unprepare_transfer_hardware(ctlr))
+		dev_err(&ctlr->dev,
+			"failed to unprepare transfer hardware\n");
+	/* handle pm */
+	if (ctlr->auto_runtime_pm) {
+		pm_runtime_mark_last_busy(ctlr->dev.parent);
+		pm_runtime_put_autosuspend(ctlr->dev.parent);
+	}
+
+	/* mark controller as idle */
+	trace_spi_controller_idle(ctlr);
+
+	/* finally put us from idling into stopped */
+	spin_lock_irqsave(&ctlr->queue_lock, flags);
+	ctlr->idling = false;
+
+out:
+	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
+}
+
 static int spi_init_queue(struct spi_controller *ctlr)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
@@ -1374,7 +1421,8 @@ static int spi_init_queue(struct spi_controller *ctlr)
 		return PTR_ERR(ctlr->kworker_task);
 	}
 	kthread_init_work(&ctlr->pump_messages, spi_pump_messages);
-
+	kthread_init_delayed_work(&ctlr->pump_idle_teardown,
+				  spi_pump_idle_teardown);
 	/*
 	 * Controller config will indicate if this controller should run the
 	 * message pump with high (realtime) priority to reduce the transfer
@@ -1446,7 +1494,16 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
 	spin_lock_irqsave(&ctlr->queue_lock, flags);
 	ctlr->cur_msg = NULL;
 	ctlr->cur_msg_prepared = false;
-	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+
+	/* if there is something queued, then wake the queue */
+	if (!list_empty(&ctlr->queue))
+		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+	else
+		/* otherwise schedule delayed teardown */
+		kthread_mod_delayed_work(&ctlr->kworker,
+					 &ctlr->pump_idle_teardown,
+					 HZ);
+
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	trace_spi_message_done(mesg);
@@ -1551,7 +1608,7 @@ static int __spi_queued_transfer(struct spi_device *spi,
 	msg->status = -EINPROGRESS;
 
 	list_add_tail(&msg->queue, &ctlr->queue);
-	if (!ctlr->busy && need_pump)
+	if (need_pump)
 		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
@@ -3726,4 +3783,3 @@ err0:
  * include needing to have boardinfo data structures be much more public.
  */
 postcore_initcall(spi_init);
-
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 916bba47d156..79ad62e2487c 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -334,6 +334,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @kworker: thread struct for message pump
  * @kworker_task: pointer to task for message pump kworker thread
  * @pump_messages: work struct for scheduling work to the message pump
+ * @pump_idle_teardown: work structure for scheduling a teardown delayed
  * @queue_lock: spinlock to syncronise access to message queue
  * @queue: message queue
  * @idling: the device is entering idle state
@@ -532,6 +533,7 @@ struct spi_controller {
 	struct kthread_worker		kworker;
 	struct task_struct		*kworker_task;
 	struct kthread_work		pump_messages;
+	struct kthread_delayed_work     pump_idle_teardown;
 	spinlock_t			queue_lock;
 	struct list_head		queue;
 	struct spi_message		*cur_msg;
-- 
cgit v1.2.3


From 6131dc81211cfcd8b7bd210665fbe0b6ba2e5fc5 Mon Sep 17 00:00:00 2001
From: Jeffrey Hugo <jhugo@codeaurora.org>
Date: Mon, 17 Dec 2018 19:15:36 -0700
Subject: clk: qcom: smd: Add support for MSM8998 rpm clocks

Add rpm smd clocks, PMIC and bus clocks which are required on MSM8998
for clients to vote on.

Signed-off-by: Jeffrey Hugo <jhugo@codeaurora.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/qcom,rpmcc.txt       |  1 +
 drivers/clk/qcom/clk-smd-rpm.c                     | 63 ++++++++++++++++++++++
 include/dt-bindings/clock/qcom,rpmcc.h             | 10 ++++
 3 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt b/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
index 87b4949e9bc8..944719bd586f 100644
--- a/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmcc.txt
@@ -16,6 +16,7 @@ Required properties :
 			"qcom,rpmcc-msm8974", "qcom,rpmcc"
 			"qcom,rpmcc-apq8064", "qcom,rpmcc"
 			"qcom,rpmcc-msm8996", "qcom,rpmcc"
+			"qcom,rpmcc-msm8998", "qcom,rpmcc"
 			"qcom,rpmcc-qcs404", "qcom,rpmcc"
 
 - #clock-cells : shall contain 1
diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index d3aadaeb2903..22dd42ad9223 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -655,10 +655,73 @@ static const struct rpm_smd_clk_desc rpm_clk_qcs404 = {
 	.num_clks = ARRAY_SIZE(qcs404_clks),
 };
 
+/* msm8998 */
+DEFINE_CLK_SMD_RPM(msm8998, snoc_clk, snoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 1);
+DEFINE_CLK_SMD_RPM(msm8998, cnoc_clk, cnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, 2);
+DEFINE_CLK_SMD_RPM(msm8998, ce1_clk, ce1_a_clk, QCOM_SMD_RPM_CE_CLK, 0);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, div_clk1, div_clk1_a, 0xb);
+DEFINE_CLK_SMD_RPM(msm8998, ipa_clk, ipa_a_clk, QCOM_SMD_RPM_IPA_CLK, 0);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, ln_bb_clk1, ln_bb_clk1_a, 1);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, ln_bb_clk2, ln_bb_clk2_a, 2);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, ln_bb_clk3_pin, ln_bb_clk3_a_pin,
+				     3);
+DEFINE_CLK_SMD_RPM(msm8998, mmssnoc_axi_rpm_clk, mmssnoc_axi_rpm_a_clk,
+		   QCOM_SMD_RPM_MMAXI_CLK, 0);
+DEFINE_CLK_SMD_RPM(msm8998, aggre1_noc_clk, aggre1_noc_a_clk,
+		   QCOM_SMD_RPM_AGGR_CLK, 1);
+DEFINE_CLK_SMD_RPM(msm8998, aggre2_noc_clk, aggre2_noc_a_clk,
+		   QCOM_SMD_RPM_AGGR_CLK, 2);
+DEFINE_CLK_SMD_RPM_QDSS(msm8998, qdss_clk, qdss_a_clk,
+			QCOM_SMD_RPM_MISC_CLK, 1);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, rf_clk1, rf_clk1_a, 4);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk2_pin, rf_clk2_a_pin, 5);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(msm8998, rf_clk3, rf_clk3_a, 6);
+DEFINE_CLK_SMD_RPM_XO_BUFFER_PINCTRL(msm8998, rf_clk3_pin, rf_clk3_a_pin, 6);
+static struct clk_smd_rpm *msm8998_clks[] = {
+	[RPM_SMD_SNOC_CLK] = &msm8998_snoc_clk,
+	[RPM_SMD_SNOC_A_CLK] = &msm8998_snoc_a_clk,
+	[RPM_SMD_CNOC_CLK] = &msm8998_cnoc_clk,
+	[RPM_SMD_CNOC_A_CLK] = &msm8998_cnoc_a_clk,
+	[RPM_SMD_CE1_CLK] = &msm8998_ce1_clk,
+	[RPM_SMD_CE1_A_CLK] = &msm8998_ce1_a_clk,
+	[RPM_SMD_DIV_CLK1] = &msm8998_div_clk1,
+	[RPM_SMD_DIV_A_CLK1] = &msm8998_div_clk1_a,
+	[RPM_SMD_IPA_CLK] = &msm8998_ipa_clk,
+	[RPM_SMD_IPA_A_CLK] = &msm8998_ipa_a_clk,
+	[RPM_SMD_LN_BB_CLK1] = &msm8998_ln_bb_clk1,
+	[RPM_SMD_LN_BB_CLK1_A] = &msm8998_ln_bb_clk1_a,
+	[RPM_SMD_LN_BB_CLK2] = &msm8998_ln_bb_clk2,
+	[RPM_SMD_LN_BB_CLK2_A] = &msm8998_ln_bb_clk2_a,
+	[RPM_SMD_LN_BB_CLK3_PIN] = &msm8998_ln_bb_clk3_pin,
+	[RPM_SMD_LN_BB_CLK3_A_PIN] = &msm8998_ln_bb_clk3_a_pin,
+	[RPM_SMD_MMAXI_CLK] = &msm8998_mmssnoc_axi_rpm_clk,
+	[RPM_SMD_MMAXI_A_CLK] = &msm8998_mmssnoc_axi_rpm_a_clk,
+	[RPM_SMD_AGGR1_NOC_CLK] = &msm8998_aggre1_noc_clk,
+	[RPM_SMD_AGGR1_NOC_A_CLK] = &msm8998_aggre1_noc_a_clk,
+	[RPM_SMD_AGGR2_NOC_CLK] = &msm8998_aggre2_noc_clk,
+	[RPM_SMD_AGGR2_NOC_A_CLK] = &msm8998_aggre2_noc_a_clk,
+	[RPM_SMD_QDSS_CLK] = &msm8998_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK] = &msm8998_qdss_a_clk,
+	[RPM_SMD_RF_CLK1] = &msm8998_rf_clk1,
+	[RPM_SMD_RF_CLK1_A] = &msm8998_rf_clk1_a,
+	[RPM_SMD_RF_CLK2_PIN] = &msm8998_rf_clk2_pin,
+	[RPM_SMD_RF_CLK2_A_PIN] = &msm8998_rf_clk2_a_pin,
+	[RPM_SMD_RF_CLK3] = &msm8998_rf_clk3,
+	[RPM_SMD_RF_CLK3_A] = &msm8998_rf_clk3_a,
+	[RPM_SMD_RF_CLK3_PIN] = &msm8998_rf_clk3_pin,
+	[RPM_SMD_RF_CLK3_A_PIN] = &msm8998_rf_clk3_a_pin,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_msm8998 = {
+	.clks = msm8998_clks,
+	.num_clks = ARRAY_SIZE(msm8998_clks),
+};
+
 static const struct of_device_id rpm_smd_clk_match_table[] = {
 	{ .compatible = "qcom,rpmcc-msm8916", .data = &rpm_clk_msm8916 },
 	{ .compatible = "qcom,rpmcc-msm8974", .data = &rpm_clk_msm8974 },
 	{ .compatible = "qcom,rpmcc-msm8996", .data = &rpm_clk_msm8996 },
+	{ .compatible = "qcom,rpmcc-msm8998", .data = &rpm_clk_msm8998 },
 	{ .compatible = "qcom,rpmcc-qcs404",  .data = &rpm_clk_qcs404  },
 	{ }
 };
diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h
index 3658b0c14966..ede93a0ca156 100644
--- a/include/dt-bindings/clock/qcom,rpmcc.h
+++ b/include/dt-bindings/clock/qcom,rpmcc.h
@@ -127,5 +127,15 @@
 #define RPM_SMD_BIMC_GPU_A_CLK			77
 #define RPM_SMD_QPIC_CLK			78
 #define RPM_SMD_QPIC_CLK_A			79
+#define RPM_SMD_LN_BB_CLK1			80
+#define RPM_SMD_LN_BB_CLK1_A			81
+#define RPM_SMD_LN_BB_CLK2			82
+#define RPM_SMD_LN_BB_CLK2_A			83
+#define RPM_SMD_LN_BB_CLK3_PIN			84
+#define RPM_SMD_LN_BB_CLK3_A_PIN		85
+#define RPM_SMD_RF_CLK3				86
+#define RPM_SMD_RF_CLK3_A			87
+#define RPM_SMD_RF_CLK3_PIN			88
+#define RPM_SMD_RF_CLK3_A_PIN			89
 
 #endif
-- 
cgit v1.2.3


From 19e99de9a53f9ece6baf8e9a15428aedd4b20c86 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Tue, 8 Jan 2019 10:15:36 +0100
Subject: ARM: davinci: remove dead code related to MAC address reading

There are no more users of davinci_get_mac_addr(). Remove it.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/common.c | 15 ---------------
 include/linux/davinci_emac.h   |  1 -
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-davinci/common.c b/arch/arm/mach-davinci/common.c
index e1d0f0d841ff..0c638fe15dcb 100644
--- a/arch/arm/mach-davinci/common.c
+++ b/arch/arm/mach-davinci/common.c
@@ -26,21 +26,6 @@ EXPORT_SYMBOL(davinci_soc_info);
 void __iomem *davinci_intc_base;
 int davinci_intc_type;
 
-void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context)
-{
-	char *mac_addr = davinci_soc_info.emac_pdata->mac_addr;
-	off_t offset = (off_t)context;
-
-	if (!IS_BUILTIN(CONFIG_NVMEM)) {
-		pr_warn("Cannot read MAC addr from EEPROM without CONFIG_NVMEM\n");
-		return;
-	}
-
-	/* Read MAC addr from EEPROM */
-	if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN)
-		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
-}
-
 static int __init davinci_init_id(struct davinci_soc_info *soc_info)
 {
 	int			i;
diff --git a/include/linux/davinci_emac.h b/include/linux/davinci_emac.h
index 05b97144d342..28e6cf1356da 100644
--- a/include/linux/davinci_emac.h
+++ b/include/linux/davinci_emac.h
@@ -46,5 +46,4 @@ enum {
 	EMAC_VERSION_2,	/* DM646x */
 };
 
-void davinci_get_mac_addr(struct nvmem_device *nvmem, void *context);
 #endif
-- 
cgit v1.2.3


From cc2d22477779f189595db5c515bd5ef9c75a1f35 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 7 Jan 2019 20:49:39 +0100
Subject: pwm: Drop per-chip dbg_show callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This callback was introduced in commit 62099abf67a2 ("pwm: Add debugfs
interface") in 2012 and up to now there is not a single user. So drop
this unused code.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
[thierry.reding@gmail.com: remove kerneldoc for ->dbg_show()]
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 5 +----
 include/linux/pwm.h | 4 ----
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 253a459fe0d8..3149204567f3 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -1036,10 +1036,7 @@ static int pwm_seq_show(struct seq_file *s, void *v)
 		   dev_name(chip->dev), chip->npwm,
 		   (chip->npwm != 1) ? "s" : "");
 
-	if (chip->ops->dbg_show)
-		chip->ops->dbg_show(chip, s);
-	else
-		pwm_dbg_show(chip, s);
+	pwm_dbg_show(chip, s);
 
 	return 0;
 }
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d5199b507d79..6a544cb89de4 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -254,7 +254,6 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
  * @get_state: get the current PWM state. This function is only
  *	       called once per PWM device when the PWM chip is
  *	       registered.
- * @dbg_show: optional routine to show contents in debugfs
  * @owner: helps prevent removal of modules exporting active PWMs
  */
 struct pwm_ops {
@@ -272,9 +271,6 @@ struct pwm_ops {
 		     struct pwm_state *state);
 	void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
 			  struct pwm_state *state);
-#ifdef CONFIG_DEBUG_FS
-	void (*dbg_show)(struct pwm_chip *chip, struct seq_file *s);
-#endif
 	struct module *owner;
 };
 
-- 
cgit v1.2.3


From 5d0a4c11896e8b83f816f135c24b184d4ba57741 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 7 Jan 2019 20:49:41 +0100
Subject: pwm: Rearrange structures to group members by purpose
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In pwm_ops there are a few callbacks that are not supposed to be used by
new drivers. Group them at the end of the structure and add a comment.

Similarily for struct pwm_chip group the members that drivers shouldn't
care about at the end and mark them as internal with another comment.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 6a544cb89de4..b628abfffacc 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -242,11 +242,7 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
  * struct pwm_ops - PWM controller operations
  * @request: optional hook for requesting a PWM
  * @free: optional hook for freeing a PWM
- * @config: configure duty cycles and period length for this PWM
- * @set_polarity: configure the polarity of this PWM
  * @capture: capture and report PWM signal
- * @enable: enable PWM output toggling
- * @disable: disable PWM output toggling
  * @apply: atomically apply a new PWM config. The state argument
  *	   should be adjusted with the real hardware config (if the
  *	   approximate the period or duty_cycle value, state should
@@ -255,48 +251,55 @@ pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle,
  *	       called once per PWM device when the PWM chip is
  *	       registered.
  * @owner: helps prevent removal of modules exporting active PWMs
+ * @config: configure duty cycles and period length for this PWM
+ * @set_polarity: configure the polarity of this PWM
+ * @enable: enable PWM output toggling
+ * @disable: disable PWM output toggling
  */
 struct pwm_ops {
 	int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
 	void (*free)(struct pwm_chip *chip, struct pwm_device *pwm);
-	int (*config)(struct pwm_chip *chip, struct pwm_device *pwm,
-		      int duty_ns, int period_ns);
-	int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm,
-			    enum pwm_polarity polarity);
 	int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
 		       struct pwm_capture *result, unsigned long timeout);
-	int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm);
-	void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm);
 	int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
 		     struct pwm_state *state);
 	void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
 			  struct pwm_state *state);
 	struct module *owner;
+
+	/* Only used by legacy drivers */
+	int (*config)(struct pwm_chip *chip, struct pwm_device *pwm,
+		      int duty_ns, int period_ns);
+	int (*set_polarity)(struct pwm_chip *chip, struct pwm_device *pwm,
+			    enum pwm_polarity polarity);
+	int (*enable)(struct pwm_chip *chip, struct pwm_device *pwm);
+	void (*disable)(struct pwm_chip *chip, struct pwm_device *pwm);
 };
 
 /**
  * struct pwm_chip - abstract a PWM controller
  * @dev: device providing the PWMs
- * @list: list node for internal use
  * @ops: callbacks for this PWM controller
  * @base: number of first PWM controlled by this chip
  * @npwm: number of PWMs controlled by this chip
- * @pwms: array of PWM devices allocated by the framework
  * @of_xlate: request a PWM device given a device tree PWM specifier
  * @of_pwm_n_cells: number of cells expected in the device tree PWM specifier
+ * @list: list node for internal use
+ * @pwms: array of PWM devices allocated by the framework
  */
 struct pwm_chip {
 	struct device *dev;
-	struct list_head list;
 	const struct pwm_ops *ops;
 	int base;
 	unsigned int npwm;
 
-	struct pwm_device *pwms;
-
 	struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
 					const struct of_phandle_args *args);
 	unsigned int of_pwm_n_cells;
+
+	/* only used internally by the PWM framework */
+	struct list_head list;
+	struct pwm_device *pwms;
 };
 
 /**
-- 
cgit v1.2.3


From 5a1c18b761ddb299a06746948b9ec2814b04fa92 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <rafal@milecki.pl>
Date: Wed, 2 Jan 2019 00:00:01 +0100
Subject: bcma: keep a direct pointer to the struct device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Accessing struct device is pretty useful/common so having a direct
pointer:
1) Simplifies some code
2) Makes bcma_bus_get_host_dev() unneeded
3) Allows further improvements like using dev_* printing helpers

Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h |  1 -
 drivers/bcma/driver_gpio.c  |  2 +-
 drivers/bcma/host_pci.c     |  2 ++
 drivers/bcma/host_soc.c     |  4 ++--
 drivers/bcma/main.c         | 45 ++++++++++-----------------------------------
 include/linux/bcma/bcma.h   | 11 +++--------
 6 files changed, 18 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index a4aac370f21f..1f0e66310b23 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -33,7 +33,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus);
 int bcma_bus_suspend(struct bcma_bus *bus);
 int bcma_bus_resume(struct bcma_bus *bus);
 #endif
-struct device *bcma_bus_get_host_dev(struct bcma_bus *bus);
 
 /* scan.c */
 void bcma_detect_chip(struct bcma_bus *bus);
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 2c0ffb77d738..a5df3d111334 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -183,7 +183,7 @@ int bcma_gpio_init(struct bcma_drv_cc *cc)
 	chip->direction_input	= bcma_gpio_direction_input;
 	chip->direction_output	= bcma_gpio_direction_output;
 	chip->owner		= THIS_MODULE;
-	chip->parent		= bcma_bus_get_host_dev(bus);
+	chip->parent		= bus->dev;
 #if IS_BUILTIN(CONFIG_OF)
 	chip->of_node		= cc->core->dev.of_node;
 #endif
diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index 63410ecfe640..f52239feb4cb 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -196,6 +196,8 @@ static int bcma_host_pci_probe(struct pci_dev *dev,
 		goto err_pci_release_regions;
 	}
 
+	bus->dev = &dev->dev;
+
 	/* Map MMIO */
 	err = -ENOMEM;
 	bus->mmio = pci_iomap(dev, 0, ~0UL);
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 2dce34789329..c8073b509a2b 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -179,7 +179,6 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* Host specific */
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
-	bus->host_pdev = NULL;
 
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
@@ -213,6 +212,8 @@ static int bcma_host_soc_probe(struct platform_device *pdev)
 	if (!bus)
 		return -ENOMEM;
 
+	bus->dev = dev;
+
 	/* Map MMIO */
 	bus->mmio = of_iomap(np, 0);
 	if (!bus->mmio)
@@ -221,7 +222,6 @@ static int bcma_host_soc_probe(struct platform_device *pdev)
 	/* Host specific */
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
-	bus->host_pdev = pdev;
 
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index fc1f4acdd189..6535614a7dc1 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -223,8 +223,8 @@ unsigned int bcma_core_irq(struct bcma_device *core, int num)
 			mips_irq = bcma_core_mips_irq(core);
 			return mips_irq <= 4 ? mips_irq + 2 : 0;
 		}
-		if (bus->host_pdev)
-			return bcma_of_get_irq(&bus->host_pdev->dev, core, num);
+		if (bus->dev)
+			return bcma_of_get_irq(bus->dev, core, num);
 		return 0;
 	case BCMA_HOSTTYPE_SDIO:
 		return 0;
@@ -239,18 +239,18 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core)
 	core->dev.release = bcma_release_core_dev;
 	core->dev.bus = &bcma_bus_type;
 	dev_set_name(&core->dev, "bcma%d:%d", bus->num, core->core_index);
-	core->dev.parent = bcma_bus_get_host_dev(bus);
-	if (core->dev.parent)
-		bcma_of_fill_device(core->dev.parent, core);
+	core->dev.parent = bus->dev;
+	if (bus->dev)
+		bcma_of_fill_device(bus->dev, core);
 
 	switch (bus->hosttype) {
 	case BCMA_HOSTTYPE_PCI:
-		core->dma_dev = &bus->host_pci->dev;
+		core->dma_dev = bus->dev;
 		core->irq = bus->host_pci->irq;
 		break;
 	case BCMA_HOSTTYPE_SOC:
-		if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) {
-			core->dma_dev = &bus->host_pdev->dev;
+		if (IS_ENABLED(CONFIG_OF) && bus->dev) {
+			core->dma_dev = bus->dev;
 		} else {
 			core->dev.dma_mask = &core->dev.coherent_dma_mask;
 			core->dma_dev = &core->dev;
@@ -261,28 +261,6 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core)
 	}
 }
 
-struct device *bcma_bus_get_host_dev(struct bcma_bus *bus)
-{
-	switch (bus->hosttype) {
-	case BCMA_HOSTTYPE_PCI:
-		if (bus->host_pci)
-			return &bus->host_pci->dev;
-		else
-			return NULL;
-	case BCMA_HOSTTYPE_SOC:
-		if (bus->host_pdev)
-			return &bus->host_pdev->dev;
-		else
-			return NULL;
-	case BCMA_HOSTTYPE_SDIO:
-		if (bus->host_sdio)
-			return &bus->host_sdio->dev;
-		else
-			return NULL;
-	}
-	return NULL;
-}
-
 void bcma_init_bus(struct bcma_bus *bus)
 {
 	mutex_lock(&bcma_buses_mutex);
@@ -402,7 +380,6 @@ int bcma_bus_register(struct bcma_bus *bus)
 {
 	int err;
 	struct bcma_device *core;
-	struct device *dev;
 
 	/* Scan for devices (cores) */
 	err = bcma_bus_scan(bus);
@@ -425,10 +402,8 @@ int bcma_bus_register(struct bcma_bus *bus)
 		bcma_core_pci_early_init(&bus->drv_pci[0]);
 	}
 
-	dev = bcma_bus_get_host_dev(bus);
-	if (dev) {
-		of_platform_default_populate(dev->of_node, NULL, dev);
-	}
+	if (bus->dev)
+		of_platform_default_populate(bus->dev->of_node, NULL, bus->dev);
 
 	/* Cores providing flash access go before SPROM init */
 	list_for_each_entry(core, &bus->cores, list) {
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index ef61f3607e99..60b94b944e9f 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -332,6 +332,8 @@ extern int bcma_arch_register_fallback_sprom(
 		struct ssb_sprom *out));
 
 struct bcma_bus {
+	struct device *dev;
+
 	/* The MMIO area. */
 	void __iomem *mmio;
 
@@ -339,14 +341,7 @@ struct bcma_bus {
 
 	enum bcma_hosttype hosttype;
 	bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */
-	union {
-		/* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */
-		struct pci_dev *host_pci;
-		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
-		struct sdio_func *host_sdio;
-		/* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */
-		struct platform_device *host_pdev;
-	};
+	struct pci_dev *host_pci; /* PCI bus pointer (BCMA_HOSTTYPE_PCI only) */
 
 	struct bcma_chipinfo chipinfo;
 
-- 
cgit v1.2.3


From c1a85a00ea66cb6f0bd0f14e47c28c2b0999799f Mon Sep 17 00:00:00 2001
From: Micah Morton <mortonm@chromium.org>
Date: Mon, 7 Jan 2019 16:10:53 -0800
Subject: LSM: generalize flag passing to security_capable

This patch provides a general mechanism for passing flags to the
security_capable LSM hook. It replaces the specific 'audit' flag that is
used to tell security_capable whether it should log an audit message for
the given capability check. The reason for generalizing this flag
passing is so we can add an additional flag that signifies whether
security_capable is being called by a setid syscall (which is needed by
the proposed SafeSetID LSM).

Signed-off-by: Micah Morton <mortonm@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/lsm_hooks.h              |  8 +++++---
 include/linux/security.h               | 28 ++++++++++++++--------------
 kernel/capability.c                    | 22 +++++++++++++---------
 kernel/seccomp.c                       |  4 ++--
 security/apparmor/capability.c         | 14 +++++++-------
 security/apparmor/include/capability.h |  2 +-
 security/apparmor/ipc.c                |  3 ++-
 security/apparmor/lsm.c                |  4 ++--
 security/apparmor/resource.c           |  2 +-
 security/commoncap.c                   | 17 +++++++++--------
 security/security.c                    | 14 +++++---------
 security/selinux/hooks.c               | 18 +++++++++---------
 security/smack/smack_access.c          |  2 +-
 13 files changed, 71 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 40511a8a5ae6..195707210975 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1270,7 +1270,7 @@
  *	@cred contains the credentials to use.
  *	@ns contains the user namespace we want the capability in
  *	@cap contains the capability <include/linux/capability.h>.
- *	@audit contains whether to write an audit message or not
+ *	@opts contains options for the capable check <include/linux/security.h>
  *	Return 0 if the capability is granted for @tsk.
  * @syslog:
  *	Check permission before accessing the kernel message ring or changing
@@ -1446,8 +1446,10 @@ union security_list_options {
 			const kernel_cap_t *effective,
 			const kernel_cap_t *inheritable,
 			const kernel_cap_t *permitted);
-	int (*capable)(const struct cred *cred, struct user_namespace *ns,
-			int cap, int audit);
+	int (*capable)(const struct cred *cred,
+			struct user_namespace *ns,
+			int cap,
+			unsigned int opts);
 	int (*quotactl)(int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on)(struct dentry *dentry);
 	int (*syslog)(int type);
diff --git a/include/linux/security.h b/include/linux/security.h
index b2c5333ed4b5..13537a49ae97 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -54,9 +54,12 @@ struct xattr;
 struct xfrm_sec_ctx;
 struct mm_struct;
 
+/* Default (no) options for the capable function */
+#define CAP_OPT_NONE 0x0
 /* If capable should audit the security request */
-#define SECURITY_CAP_NOAUDIT 0
-#define SECURITY_CAP_AUDIT 1
+#define CAP_OPT_NOAUDIT BIT(1)
+/* If capable is being called by a setid function */
+#define CAP_OPT_INSETID BIT(2)
 
 /* LSM Agnostic defines for sb_set_mnt_opts */
 #define SECURITY_LSM_NATIVE_LABELS	1
@@ -72,7 +75,7 @@ enum lsm_event {
 
 /* These functions are in security/commoncap.c */
 extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
-		       int cap, int audit);
+		       int cap, unsigned int opts);
 extern int cap_settime(const struct timespec64 *ts, const struct timezone *tz);
 extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
@@ -207,10 +210,10 @@ int security_capset(struct cred *new, const struct cred *old,
 		    const kernel_cap_t *effective,
 		    const kernel_cap_t *inheritable,
 		    const kernel_cap_t *permitted);
-int security_capable(const struct cred *cred, struct user_namespace *ns,
-			int cap);
-int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
-			     int cap);
+int security_capable(const struct cred *cred,
+		       struct user_namespace *ns,
+		       int cap,
+		       unsigned int opts);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
 int security_syslog(int type);
@@ -464,14 +467,11 @@ static inline int security_capset(struct cred *new,
 }
 
 static inline int security_capable(const struct cred *cred,
-				   struct user_namespace *ns, int cap)
+				   struct user_namespace *ns,
+				   int cap,
+				   unsigned int opts)
 {
-	return cap_capable(cred, ns, cap, SECURITY_CAP_AUDIT);
-}
-
-static inline int security_capable_noaudit(const struct cred *cred,
-					   struct user_namespace *ns, int cap) {
-	return cap_capable(cred, ns, cap, SECURITY_CAP_NOAUDIT);
+	return cap_capable(cred, ns, cap, opts);
 }
 
 static inline int security_quotactl(int cmds, int type, int id,
diff --git a/kernel/capability.c b/kernel/capability.c
index 1e1c0236f55b..7718d7dcadc7 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -299,7 +299,7 @@ bool has_ns_capability(struct task_struct *t,
 	int ret;
 
 	rcu_read_lock();
-	ret = security_capable(__task_cred(t), ns, cap);
+	ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NONE);
 	rcu_read_unlock();
 
 	return (ret == 0);
@@ -340,7 +340,7 @@ bool has_ns_capability_noaudit(struct task_struct *t,
 	int ret;
 
 	rcu_read_lock();
-	ret = security_capable_noaudit(__task_cred(t), ns, cap);
+	ret = security_capable(__task_cred(t), ns, cap, CAP_OPT_NOAUDIT);
 	rcu_read_unlock();
 
 	return (ret == 0);
@@ -363,7 +363,9 @@ bool has_capability_noaudit(struct task_struct *t, int cap)
 	return has_ns_capability_noaudit(t, &init_user_ns, cap);
 }
 
-static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
+static bool ns_capable_common(struct user_namespace *ns,
+			      int cap,
+			      unsigned int opts)
 {
 	int capable;
 
@@ -372,8 +374,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
 		BUG();
 	}
 
-	capable = audit ? security_capable(current_cred(), ns, cap) :
-			  security_capable_noaudit(current_cred(), ns, cap);
+	capable = security_capable(current_cred(), ns, cap, opts);
 	if (capable == 0) {
 		current->flags |= PF_SUPERPRIV;
 		return true;
@@ -394,7 +395,7 @@ static bool ns_capable_common(struct user_namespace *ns, int cap, bool audit)
  */
 bool ns_capable(struct user_namespace *ns, int cap)
 {
-	return ns_capable_common(ns, cap, true);
+	return ns_capable_common(ns, cap, CAP_OPT_NONE);
 }
 EXPORT_SYMBOL(ns_capable);
 
@@ -412,7 +413,7 @@ EXPORT_SYMBOL(ns_capable);
  */
 bool ns_capable_noaudit(struct user_namespace *ns, int cap)
 {
-	return ns_capable_common(ns, cap, false);
+	return ns_capable_common(ns, cap, CAP_OPT_NOAUDIT);
 }
 EXPORT_SYMBOL(ns_capable_noaudit);
 
@@ -448,10 +449,11 @@ EXPORT_SYMBOL(capable);
 bool file_ns_capable(const struct file *file, struct user_namespace *ns,
 		     int cap)
 {
+
 	if (WARN_ON_ONCE(!cap_valid(cap)))
 		return false;
 
-	if (security_capable(file->f_cred, ns, cap) == 0)
+	if (security_capable(file->f_cred, ns, cap, CAP_OPT_NONE) == 0)
 		return true;
 
 	return false;
@@ -500,10 +502,12 @@ bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
 {
 	int ret = 0;  /* An absent tracer adds no restrictions */
 	const struct cred *cred;
+
 	rcu_read_lock();
 	cred = rcu_dereference(tsk->ptracer_cred);
 	if (cred)
-		ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
+		ret = security_capable(cred, ns, CAP_SYS_PTRACE,
+				       CAP_OPT_NOAUDIT);
 	rcu_read_unlock();
 	return (ret == 0);
 }
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d7f538847b84..38a77800def6 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -443,8 +443,8 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	 * behavior of privileged children.
 	 */
 	if (!task_no_new_privs(current) &&
-	    security_capable_noaudit(current_cred(), current_user_ns(),
-				     CAP_SYS_ADMIN) != 0)
+	    security_capable(current_cred(), current_user_ns(),
+				     CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) != 0)
 		return ERR_PTR(-EACCES);
 
 	/* Allocate a new seccomp_filter */
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 253ef6e9d445..752f73980e30 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -110,13 +110,13 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
  * profile_capable - test if profile allows use of capability @cap
  * @profile: profile being enforced    (NOT NULL, NOT unconfined)
  * @cap: capability to test if allowed
- * @audit: whether an audit record should be generated
+ * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated
  * @sa: audit data (MAY BE NULL indicating no auditing)
  *
  * Returns: 0 if allowed else -EPERM
  */
-static int profile_capable(struct aa_profile *profile, int cap, int audit,
-			   struct common_audit_data *sa)
+static int profile_capable(struct aa_profile *profile, int cap,
+			   unsigned int opts, struct common_audit_data *sa)
 {
 	int error;
 
@@ -126,7 +126,7 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit,
 	else
 		error = -EPERM;
 
-	if (audit == SECURITY_CAP_NOAUDIT) {
+	if (opts & CAP_OPT_NOAUDIT) {
 		if (!COMPLAIN_MODE(profile))
 			return error;
 		/* audit the cap request in complain mode but note that it
@@ -142,13 +142,13 @@ static int profile_capable(struct aa_profile *profile, int cap, int audit,
  * aa_capable - test permission to use capability
  * @label: label being tested for capability (NOT NULL)
  * @cap: capability to be tested
- * @audit: whether an audit record should be generated
+ * @opts: CAP_OPT_NOAUDIT bit determines whether audit record is generated
  *
  * Look up capability in profile capability set.
  *
  * Returns: 0 on success, or else an error code.
  */
-int aa_capable(struct aa_label *label, int cap, int audit)
+int aa_capable(struct aa_label *label, int cap, unsigned int opts)
 {
 	struct aa_profile *profile;
 	int error = 0;
@@ -156,7 +156,7 @@ int aa_capable(struct aa_label *label, int cap, int audit)
 
 	sa.u.cap = cap;
 	error = fn_for_each_confined(label, profile,
-			profile_capable(profile, cap, audit, &sa));
+			profile_capable(profile, cap, opts, &sa));
 
 	return error;
 }
diff --git a/security/apparmor/include/capability.h b/security/apparmor/include/capability.h
index e0304e2aeb7f..1b3663b6ab12 100644
--- a/security/apparmor/include/capability.h
+++ b/security/apparmor/include/capability.h
@@ -40,7 +40,7 @@ struct aa_caps {
 
 extern struct aa_sfs_entry aa_sfs_entry_caps[];
 
-int aa_capable(struct aa_label *label, int cap, int audit);
+int aa_capable(struct aa_label *label, int cap, unsigned int opts);
 
 static inline void aa_free_cap_rules(struct aa_caps *caps)
 {
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 527ea1557120..aacd1e95cb59 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -107,7 +107,8 @@ static int profile_tracer_perm(struct aa_profile *tracer,
 	aad(sa)->label = &tracer->label;
 	aad(sa)->peer = tracee;
 	aad(sa)->request = 0;
-	aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE, 1);
+	aad(sa)->error = aa_capable(&tracer->label, CAP_SYS_PTRACE,
+				    CAP_OPT_NONE);
 
 	return aa_audit(AUDIT_APPARMOR_AUTO, tracer, sa, audit_ptrace_cb);
 }
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 60ef71268ccf..b6c395e2acd0 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -172,14 +172,14 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
 }
 
 static int apparmor_capable(const struct cred *cred, struct user_namespace *ns,
-			    int cap, int audit)
+			    int cap, unsigned int opts)
 {
 	struct aa_label *label;
 	int error = 0;
 
 	label = aa_get_newest_cred_label(cred);
 	if (!unconfined(label))
-		error = aa_capable(label, cap, audit);
+		error = aa_capable(label, cap, opts);
 	aa_put_label(label);
 
 	return error;
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index 95fd26d09757..552ed09cb47e 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -124,7 +124,7 @@ int aa_task_setrlimit(struct aa_label *label, struct task_struct *task,
 	 */
 
 	if (label != peer &&
-	    aa_capable(label, CAP_SYS_RESOURCE, SECURITY_CAP_NOAUDIT) != 0)
+	    aa_capable(label, CAP_SYS_RESOURCE, CAP_OPT_NOAUDIT) != 0)
 		error = fn_for_each(label, profile,
 				audit_resource(profile, resource,
 					       new_rlim->rlim_max, peer,
diff --git a/security/commoncap.c b/security/commoncap.c
index 52e04136bfa8..188eaf59f82f 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -68,7 +68,7 @@ static void warn_setuid_and_fcaps_mixed(const char *fname)
  * kernel's capable() and has_capability() returns 1 for this case.
  */
 int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
-		int cap, int audit)
+		int cap, unsigned int opts)
 {
 	struct user_namespace *ns = targ_ns;
 
@@ -222,12 +222,11 @@ int cap_capget(struct task_struct *target, kernel_cap_t *effective,
  */
 static inline int cap_inh_is_capped(void)
 {
-
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
 	if (cap_capable(current_cred(), current_cred()->user_ns,
-			CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
+			CAP_SETPCAP, CAP_OPT_NONE) == 0)
 		return 0;
 	return 1;
 }
@@ -1208,8 +1207,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 		    || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
 		    || (cap_capable(current_cred(),
-				    current_cred()->user_ns, CAP_SETPCAP,
-				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
+				    current_cred()->user_ns,
+				    CAP_SETPCAP,
+				    CAP_OPT_NONE) != 0)			/*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -1304,9 +1304,10 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
-			SECURITY_CAP_NOAUDIT) == 0)
+	if (cap_capable(current_cred(), &init_user_ns,
+				CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
 		cap_sys_admin = 1;
+
 	return cap_sys_admin;
 }
 
@@ -1325,7 +1326,7 @@ int cap_mmap_addr(unsigned long addr)
 
 	if (addr < dac_mmap_min_addr) {
 		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
-				  SECURITY_CAP_AUDIT);
+				  CAP_OPT_NONE);
 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
 		if (ret == 0)
 			current->flags |= PF_SUPERPRIV;
diff --git a/security/security.c b/security/security.c
index 953fc3ea18a9..a618e22df5c6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -689,16 +689,12 @@ int security_capset(struct cred *new, const struct cred *old,
 				effective, inheritable, permitted);
 }
 
-int security_capable(const struct cred *cred, struct user_namespace *ns,
-		     int cap)
+int security_capable(const struct cred *cred,
+		     struct user_namespace *ns,
+		     int cap,
+		     unsigned int opts)
 {
-	return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_AUDIT);
-}
-
-int security_capable_noaudit(const struct cred *cred, struct user_namespace *ns,
-			     int cap)
-{
-	return call_int_hook(capable, 0, cred, ns, cap, SECURITY_CAP_NOAUDIT);
+	return call_int_hook(capable, 0, cred, ns, cap, opts);
 }
 
 int security_quotactl(int cmds, int type, int id, struct super_block *sb)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d98e1d8d18f6..b2ee49f938f1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1578,7 +1578,7 @@ static inline u32 signal_to_av(int sig)
 
 /* Check whether a task is allowed to use a capability. */
 static int cred_has_capability(const struct cred *cred,
-			       int cap, int audit, bool initns)
+			       int cap, unsigned int opts, bool initns)
 {
 	struct common_audit_data ad;
 	struct av_decision avd;
@@ -1605,7 +1605,7 @@ static int cred_has_capability(const struct cred *cred,
 
 	rc = avc_has_perm_noaudit(&selinux_state,
 				  sid, sid, sclass, av, 0, &avd);
-	if (audit == SECURITY_CAP_AUDIT) {
+	if (!(opts & CAP_OPT_NOAUDIT)) {
 		int rc2 = avc_audit(&selinux_state,
 				    sid, sid, sclass, av, &avd, rc, &ad, 0);
 		if (rc2)
@@ -2125,9 +2125,9 @@ static int selinux_capset(struct cred *new, const struct cred *old,
  */
 
 static int selinux_capable(const struct cred *cred, struct user_namespace *ns,
-			   int cap, int audit)
+			   int cap, unsigned int opts)
 {
-	return cred_has_capability(cred, cap, audit, ns == &init_user_ns);
+	return cred_has_capability(cred, cap, opts, ns == &init_user_ns);
 }
 
 static int selinux_quotactl(int cmds, int type, int id, struct super_block *sb)
@@ -2201,7 +2201,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages)
 	int rc, cap_sys_admin = 0;
 
 	rc = cred_has_capability(current_cred(), CAP_SYS_ADMIN,
-				 SECURITY_CAP_NOAUDIT, true);
+				 CAP_OPT_NOAUDIT, true);
 	if (rc == 0)
 		cap_sys_admin = 1;
 
@@ -2988,11 +2988,11 @@ static int selinux_inode_getattr(const struct path *path)
 static bool has_cap_mac_admin(bool audit)
 {
 	const struct cred *cred = current_cred();
-	int cap_audit = audit ? SECURITY_CAP_AUDIT : SECURITY_CAP_NOAUDIT;
+	unsigned int opts = audit ? CAP_OPT_NONE : CAP_OPT_NOAUDIT;
 
-	if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, cap_audit))
+	if (cap_capable(cred, &init_user_ns, CAP_MAC_ADMIN, opts))
 		return false;
-	if (cred_has_capability(cred, CAP_MAC_ADMIN, cap_audit, true))
+	if (cred_has_capability(cred, CAP_MAC_ADMIN, opts, true))
 		return false;
 	return true;
 }
@@ -3387,7 +3387,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd,
 	case KDSKBENT:
 	case KDSKBSENT:
 		error = cred_has_capability(cred, CAP_SYS_TTY_CONFIG,
-					    SECURITY_CAP_AUDIT, true);
+					    CAP_OPT_NONE, true);
 		break;
 
 	/* default case assumes that the command will go
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 489d49a20b47..fe2ce3a65822 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -640,7 +640,7 @@ bool smack_privileged_cred(int cap, const struct cred *cred)
 	struct smack_known_list_elem *sklep;
 	int rc;
 
-	rc = cap_capable(cred, &init_user_ns, cap, SECURITY_CAP_AUDIT);
+	rc = cap_capable(cred, &init_user_ns, cap, CAP_OPT_NONE);
 	if (rc)
 		return false;
 
-- 
cgit v1.2.3


From b0ea0fa5435f9df7213a9af098558f7dd584d8e8 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 9 Jan 2019 11:15:16 +0200
Subject: IB/{core,hw}: Have ib_umem_get extract the ib_ucontext from ib_udata

ib_umem_get() can only be called in a method callback, which always has a
udata parameter. This allows ib_umem_get() to derive the ucontext pointer
directly from the udata without requiring the drivers to find it in some
way or another.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
---
 drivers/infiniband/core/umem.c                |  9 ++++--
 drivers/infiniband/core/uverbs_main.c         | 24 ++++++++++++++++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c      | 13 ++++-----
 drivers/infiniband/hw/cxgb3/iwch_provider.c   |  2 +-
 drivers/infiniband/hw/cxgb4/mem.c             |  2 +-
 drivers/infiniband/hw/hns/hns_roce_cq.c       |  9 +++---
 drivers/infiniband/hw/hns/hns_roce_db.c       |  6 ++--
 drivers/infiniband/hw/hns/hns_roce_device.h   |  3 +-
 drivers/infiniband/hw/hns/hns_roce_mr.c       |  7 ++---
 drivers/infiniband/hw/hns/hns_roce_qp.c       | 13 ++++-----
 drivers/infiniband/hw/hns/hns_roce_srq.c      |  7 ++---
 drivers/infiniband/hw/i40iw/i40iw_verbs.c     |  2 +-
 drivers/infiniband/hw/mlx4/cq.c               | 17 ++++++------
 drivers/infiniband/hw/mlx4/doorbell.c         |  6 ++--
 drivers/infiniband/hw/mlx4/mlx4_ib.h          |  3 +-
 drivers/infiniband/hw/mlx4/mr.c               | 11 ++++----
 drivers/infiniband/hw/mlx4/qp.c               | 14 ++++++----
 drivers/infiniband/hw/mlx4/srq.c              |  5 ++--
 drivers/infiniband/hw/mlx5/cq.c               | 11 ++++----
 drivers/infiniband/hw/mlx5/devx.c             |  2 +-
 drivers/infiniband/hw/mlx5/doorbell.c         |  6 ++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h          |  4 ++-
 drivers/infiniband/hw/mlx5/mr.c               | 22 +++++++--------
 drivers/infiniband/hw/mlx5/odp.c              |  4 +--
 drivers/infiniband/hw/mlx5/qp.c               | 40 ++++++++++++---------------
 drivers/infiniband/hw/mlx5/srq.c              |  5 ++--
 drivers/infiniband/hw/mthca/mthca_provider.c  |  2 +-
 drivers/infiniband/hw/nes/nes_verbs.c         |  2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c   |  2 +-
 drivers/infiniband/hw/qedr/verbs.c            | 33 +++++++++++-----------
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c  |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c  |  3 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c  |  6 ++--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c |  4 +--
 drivers/infiniband/sw/rdmavt/mr.c             |  3 +-
 drivers/infiniband/sw/rxe/rxe_mr.c            |  2 +-
 include/rdma/ib_umem.h                        |  8 ++++--
 include/rdma/ib_verbs.h                       |  1 +
 38 files changed, 168 insertions(+), 147 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index c6144df47ea4..1efe0a74e06b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -72,15 +72,16 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
  * If access flags indicate ODP memory, avoid pinning. Instead, stores
  * the mm for future page fault handling in conjunction with MMU notifiers.
  *
- * @context: userspace context to pin memory for
+ * @udata: userspace context to pin memory for
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
  * @access: IB_ACCESS_xxx flags for memory being pinned
  * @dmasync: flush in-flight DMA when the memory region is written
  */
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 			    size_t size, int access, int dmasync)
 {
+	struct ib_ucontext *context;
 	struct ib_umem *umem;
 	struct page **page_list;
 	struct vm_area_struct **vma_list;
@@ -95,6 +96,10 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	struct scatterlist *sg, *sg_list_start;
 	unsigned int gup_flags = FOLL_WRITE;
 
+	context = rdma_get_ucontext(udata);
+	if (IS_ERR(context))
+		return ERR_CAST(context);
+
 	if (dmasync)
 		dma_attrs |= DMA_ATTR_WRITE_BARRIER;
 
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fb0007aa0c27..996f167d1436 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -101,6 +101,30 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
 }
 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
 
+/* rdma_get_ucontext - Return the ucontext from a udata
+ * @udata: The udata to get the context from
+ *
+ * This can only be called from within a uapi method that was passed ib_udata
+ * as a parameter. It returns the ucontext associated with the udata, or ERR_PTR
+ * if the udata is NULL or the ucontext has been disassociated.
+ */
+struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata)
+{
+	if (!udata)
+		return ERR_PTR(-EIO);
+
+	/*
+	 * FIXME: Really all cases that get here with a udata will have
+	 * already called ib_uverbs_get_ucontext_file, or located a uobject
+	 * that points to a ucontext. We could store that result in the udata
+	 * so this function can't fail.
+	 */
+	return ib_uverbs_get_ucontext_file(
+		container_of(udata, struct uverbs_attr_bundle, driver_udata)
+			->ufile);
+}
+EXPORT_SYMBOL(rdma_get_ucontext);
+
 int uverbs_dealloc_mw(struct ib_mw *mw)
 {
 	struct ib_pd *pd = mw->pd;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 1e2515e2eb62..9bc637e49faa 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -895,8 +895,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 	if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
 		bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
 	bytes = PAGE_ALIGN(bytes);
-	umem = ib_umem_get(context, ureq.qpsva, bytes,
-			   IB_ACCESS_LOCAL_WRITE, 1);
+	umem = ib_umem_get(udata, ureq.qpsva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(umem))
 		return PTR_ERR(umem);
 
@@ -908,7 +907,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 	if (!qp->qplib_qp.srq) {
 		bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
 		bytes = PAGE_ALIGN(bytes);
-		umem = ib_umem_get(context, ureq.qprva, bytes,
+		umem = ib_umem_get(udata, ureq.qprva, bytes,
 				   IB_ACCESS_LOCAL_WRITE, 1);
 		if (IS_ERR(umem))
 			goto rqfail;
@@ -1370,8 +1369,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
 
 	bytes = (qplib_srq->max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
 	bytes = PAGE_ALIGN(bytes);
-	umem = ib_umem_get(context, ureq.srqva, bytes,
-			   IB_ACCESS_LOCAL_WRITE, 1);
+	umem = ib_umem_get(udata, ureq.srqva, bytes, IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(umem))
 		return PTR_ERR(umem);
 
@@ -2622,7 +2620,7 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 			goto fail;
 		}
 
-		cq->umem = ib_umem_get(context, req.cq_va,
+		cq->umem = ib_umem_get(udata, req.cq_va,
 				       entries * sizeof(struct cq_base),
 				       IB_ACCESS_LOCAL_WRITE, 1);
 		if (IS_ERR(cq->umem)) {
@@ -3589,8 +3587,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 	/* The fixed portion of the rkey is the same as the lkey */
 	mr->ib_mr.rkey = mr->qplib_mr.rkey;
 
-	umem = ib_umem_get(ib_pd->uobject->context, start, length,
-			   mr_access_flags, 0);
+	umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
 	if (IS_ERR(umem)) {
 		dev_err(rdev_to_dev(rdev), "Failed to get umem");
 		rc = -EFAULT;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b34b1a1bd94b..92ee6761a3bd 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -540,7 +540,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mhp->rhp = rhp;
 
-	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	mhp->umem = ib_umem_get(udata, start, length, acc, 0);
 	if (IS_ERR(mhp->umem)) {
 		err = PTR_ERR(mhp->umem);
 		kfree(mhp);
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 7b76e6f81aeb..96760a36b9fc 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -537,7 +537,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mhp->rhp = rhp;
 
-	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	mhp->umem = ib_umem_get(udata, start, length, acc, 0);
 	if (IS_ERR(mhp->umem))
 		goto err_free_skb;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 3a485f50fede..1dfe5627006c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -215,7 +215,7 @@ void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
 EXPORT_SYMBOL_GPL(hns_roce_free_cq);
 
 static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
-				   struct ib_ucontext *context,
+				   struct ib_udata *udata,
 				   struct hns_roce_cq_buf *buf,
 				   struct ib_umem **umem, u64 buf_addr, int cqe)
 {
@@ -223,7 +223,7 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
 	u32 page_shift;
 	u32 npages;
 
-	*umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
+	*umem = ib_umem_get(udata, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
 			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
@@ -347,7 +347,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 		}
 
 		/* Get user space address, write it into mtt table */
-		ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf,
+		ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf,
 					      &hr_cq->umem, ucmd.buf_addr,
 					      cq_entries);
 		if (ret) {
@@ -358,7 +358,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
 		    (udata->outlen >= sizeof(resp))) {
 			ret = hns_roce_db_map_user(to_hr_ucontext(context),
-						   ucmd.db_addr, &hr_cq->db);
+						   udata, ucmd.db_addr,
+						   &hr_cq->db);
 			if (ret) {
 				dev_err(dev, "cq record doorbell map failed!\n");
 				goto err_mtt;
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index e2f93c1ce86a..0c6c1fe87705 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -8,7 +8,8 @@
 #include <rdma/ib_umem.h>
 #include "hns_roce_device.h"
 
-int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+int hns_roce_db_map_user(struct hns_roce_ucontext *context,
+			 struct ib_udata *udata, unsigned long virt,
 			 struct hns_roce_db *db)
 {
 	struct hns_roce_user_db_page *page;
@@ -28,8 +29,7 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
 
 	refcount_set(&page->refcount, 1);
 	page->user_virt = (virt & PAGE_MASK);
-	page->umem = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
-				 PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
 	if (IS_ERR(page->umem)) {
 		ret = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 509e467843f6..9a4be70936e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -1133,7 +1133,8 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
 int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
 void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
 
-int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
+int hns_roce_db_map_user(struct hns_roce_ucontext *context,
+			 struct ib_udata *udata, unsigned long virt,
 			 struct hns_roce_db *db);
 void hns_roce_db_unmap_user(struct hns_roce_ucontext *context,
 			    struct hns_roce_db *db);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index ee5991bd4171..da4fffedb879 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -1110,8 +1110,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(pd->uobject->context, start, length,
-			       access_flags, 0);
+	mr->umem = ib_umem_get(udata, start, length, access_flags, 0);
 	if (IS_ERR(mr->umem)) {
 		ret = PTR_ERR(mr->umem);
 		goto err_free;
@@ -1220,8 +1219,8 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length,
 		}
 		ib_umem_release(mr->umem);
 
-		mr->umem = ib_umem_get(ibmr->uobject->context, start, length,
-				       mr_access_flags, 0);
+		mr->umem =
+			ib_umem_get(udata, start, length, mr_access_flags, 0);
 		if (IS_ERR(mr->umem)) {
 			ret = PTR_ERR(mr->umem);
 			mr->umem = NULL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index b9ba140b50d4..accf9ce1507d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -613,9 +613,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 			goto err_rq_sge_list;
 		}
 
-		hr_qp->umem = ib_umem_get(ib_pd->uobject->context,
-					  ucmd.buf_addr, hr_qp->buff_size, 0,
-					  0);
+		hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr,
+					  hr_qp->buff_size, 0, 0);
 		if (IS_ERR(hr_qp->umem)) {
 			dev_err(dev, "ib_umem_get error for create qp\n");
 			ret = PTR_ERR(hr_qp->umem);
@@ -654,8 +653,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_sq(init_attr)) {
 			ret = hns_roce_db_map_user(
-					to_hr_ucontext(ib_pd->uobject->context),
-					ucmd.sdb_addr, &hr_qp->sdb);
+				to_hr_ucontext(ib_pd->uobject->context), udata,
+				ucmd.sdb_addr, &hr_qp->sdb);
 			if (ret) {
 				dev_err(dev, "sq record doorbell map failed!\n");
 				goto err_mtt;
@@ -670,8 +669,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_rq(init_attr)) {
 			ret = hns_roce_db_map_user(
-					to_hr_ucontext(ib_pd->uobject->context),
-					ucmd.db_addr, &hr_qp->rdb);
+				to_hr_ucontext(ib_pd->uobject->context), udata,
+				ucmd.db_addr, &hr_qp->rdb);
 			if (ret) {
 				dev_err(dev, "rq record doorbell map failed!\n");
 				goto err_sq_dbmap;
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 960b1946c365..8975f858b36f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -252,8 +252,8 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
 			goto err_srq;
 		}
 
-		srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-					srq_buf_size, 0, 0);
+		srq->umem =
+			ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0);
 		if (IS_ERR(srq->umem)) {
 			ret = PTR_ERR(srq->umem);
 			goto err_srq;
@@ -280,8 +280,7 @@ struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
 			goto err_srq_mtt;
 
 		/* config index queue BA */
-		srq->idx_que.umem = ib_umem_get(pd->uobject->context,
-						ucmd.que_addr,
+		srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr,
 						srq->idx_que.buf_size, 0, 0);
 		if (IS_ERR(srq->idx_que.umem)) {
 			dev_err(hr_dev->dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 0b675b0742c2..80b66df95362 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -1852,7 +1852,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
 
 	if (length > I40IW_MAX_MR_SIZE)
 		return ERR_PTR(-EINVAL);
-	region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	region = ib_umem_get(udata, start, length, acc, 0);
 	if (IS_ERR(region))
 		return (struct ib_mr *)region;
 
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 43512347b4f0..db936c12b5bd 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -134,16 +134,16 @@ static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
 }
 
-static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
-			       struct mlx4_ib_cq_buf *buf, struct ib_umem **umem,
-			       u64 buf_addr, int cqe)
+static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata,
+			       struct mlx4_ib_cq_buf *buf,
+			       struct ib_umem **umem, u64 buf_addr, int cqe)
 {
 	int err;
 	int cqe_size = dev->dev->caps.cqe_size;
 	int shift;
 	int n;
 
-	*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
+	*umem = ib_umem_get(udata, buf_addr, cqe * cqe_size,
 			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
@@ -213,14 +213,13 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
 		}
 
 		buf_addr = (void *)(unsigned long)ucmd.buf_addr;
-
-		err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
+		err = mlx4_ib_get_cq_umem(dev, udata, &cq->buf, &cq->umem,
 					  ucmd.buf_addr, entries);
 		if (err)
 			goto err_cq;
 
-		err = mlx4_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
-					  &cq->db);
+		err = mlx4_ib_db_map_user(to_mucontext(context), udata,
+					  ucmd.db_addr, &cq->db);
 		if (err)
 			goto err_mtt;
 
@@ -336,7 +335,7 @@ static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq
 	if (!cq->resize_buf)
 		return -ENOMEM;
 
-	err = mlx4_ib_get_cq_umem(dev, cq->umem->context, &cq->resize_buf->buf,
+	err = mlx4_ib_get_cq_umem(dev, udata, &cq->resize_buf->buf,
 				  &cq->resize_umem, ucmd.buf_addr, entries);
 	if (err) {
 		kfree(cq->resize_buf);
diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c
index c51740986367..3aab71b29ce8 100644
--- a/drivers/infiniband/hw/mlx4/doorbell.c
+++ b/drivers/infiniband/hw/mlx4/doorbell.c
@@ -41,7 +41,8 @@ struct mlx4_ib_user_db_page {
 	int			refcnt;
 };
 
-int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context,
+			struct ib_udata *udata, unsigned long virt,
 			struct mlx4_db *db)
 {
 	struct mlx4_ib_user_db_page *page;
@@ -61,8 +62,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
 
 	page->user_virt = (virt & PAGE_MASK);
 	page->refcnt    = 0;
-	page->umem      = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
-				      PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
 	if (IS_ERR(page->umem)) {
 		err = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index e491f3eda6e7..60dc1347c5ab 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -722,7 +722,8 @@ static inline u8 mlx4_ib_bond_next_port(struct mlx4_ib_dev *dev)
 int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
 void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
 
-int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
+int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context,
+			struct ib_udata *udata, unsigned long virt,
 			struct mlx4_db *db);
 void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
 
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index c7c85c22e4e3..56639ecd53ad 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -367,7 +367,8 @@ end:
 	return block_shift;
 }
 
-static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
+static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context,
+					struct ib_udata *udata, u64 start,
 					u64 length, u64 virt_addr,
 					int access_flags)
 {
@@ -398,7 +399,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
 		up_read(&current->mm->mmap_sem);
 	}
 
-	return ib_umem_get(context, start, length, access_flags, 0);
+	return ib_umem_get(udata, start, length, access_flags, 0);
 }
 
 struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
@@ -415,7 +416,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
+	mr->umem = mlx4_get_umem_mr(pd->uobject->context, udata, start, length,
 				    virt_addr, access_flags);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
@@ -506,8 +507,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
 		ib_umem_release(mmr->umem);
 		mmr->umem =
-			mlx4_get_umem_mr(mr->uobject->context, start, length,
-					 virt_addr, mr_access_flags);
+			mlx4_get_umem_mr(mr->uobject->context, udata, start,
+					 length, virt_addr, mr_access_flags);
 		if (IS_ERR(mmr->umem)) {
 			err = PTR_ERR(mmr->umem);
 			/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 971e9a9ebdaf..e38bab50cecf 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1015,9 +1015,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				(qp->sq.wqe_cnt << qp->sq.wqe_shift);
 		}
 
-		qp->umem = ib_umem_get(pd->uobject->context,
-				(src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
-				ucmd.wq.buf_addr, qp->buf_size, 0, 0);
+		qp->umem =
+			ib_umem_get(udata,
+				    (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
+							      ucmd.wq.buf_addr,
+				    qp->buf_size, 0, 0);
 		if (IS_ERR(qp->umem)) {
 			err = PTR_ERR(qp->umem);
 			goto err;
@@ -1035,9 +1037,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err_mtt;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
+			err = mlx4_ib_db_map_user(
+				to_mucontext(pd->uobject->context), udata,
 				(src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr :
-				ucmd.wq.db_addr, &qp->db);
+							  ucmd.wq.db_addr,
+				&qp->db);
 			if (err)
 				goto err_mtt;
 		}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 4456f1b8921d..498588eac051 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -113,8 +113,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 			goto err_srq;
 		}
 
-		srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-					buf_size, 0, 0);
+		srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
 		if (IS_ERR(srq->umem)) {
 			err = PTR_ERR(srq->umem);
 			goto err_srq;
@@ -130,7 +129,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 			goto err_mtt;
 
 		err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
-					  ucmd.db_addr, &srq->db);
+					  udata, ucmd.db_addr, &srq->db);
 		if (err)
 			goto err_mtt;
 	} else {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 90f1b0bae5b5..c283c32f30fe 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -707,15 +707,15 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 
 	*cqe_size = ucmd.cqe_size;
 
-	cq->buf.umem = ib_umem_get(context, ucmd.buf_addr,
-				   entries * ucmd.cqe_size,
-				   IB_ACCESS_LOCAL_WRITE, 1);
+	cq->buf.umem =
+		ib_umem_get(udata, ucmd.buf_addr, entries * ucmd.cqe_size,
+			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(cq->buf.umem)) {
 		err = PTR_ERR(cq->buf.umem);
 		return err;
 	}
 
-	err = mlx5_ib_db_map_user(to_mucontext(context), ucmd.db_addr,
+	err = mlx5_ib_db_map_user(to_mucontext(context), udata, ucmd.db_addr,
 				  &cq->db);
 	if (err)
 		goto err_umem;
@@ -1111,7 +1111,6 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	struct ib_umem *umem;
 	int err;
 	int npages;
-	struct ib_ucontext *context = cq->buf.umem->context;
 
 	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 	if (err)
@@ -1124,7 +1123,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1)
 		return -EINVAL;
 
-	umem = ib_umem_get(context, ucmd.buf_addr,
+	umem = ib_umem_get(udata, ucmd.buf_addr,
 			   (size_t)ucmd.cqe_size * entries,
 			   IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(umem)) {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 5a588f3cfb1b..8bb711da7ee1 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1195,7 +1195,7 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 	if (err)
 		return err;
 
-	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+	obj->umem = ib_umem_get(&attrs->driver_udata, addr, size, access, 0);
 	if (IS_ERR(obj->umem))
 		return PTR_ERR(obj->umem);
 
diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c
index a0e4e6ddb71a..8f4e5f22b84c 100644
--- a/drivers/infiniband/hw/mlx5/doorbell.c
+++ b/drivers/infiniband/hw/mlx5/doorbell.c
@@ -43,7 +43,8 @@ struct mlx5_ib_user_db_page {
 	int			refcnt;
 };
 
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+			struct ib_udata *udata, unsigned long virt,
 			struct mlx5_db *db)
 {
 	struct mlx5_ib_user_db_page *page;
@@ -63,8 +64,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
 
 	page->user_virt = (virt & PAGE_MASK);
 	page->refcnt    = 0;
-	page->umem      = ib_umem_get(&context->ibucontext, virt & PAGE_MASK,
-				      PAGE_SIZE, 0, 0);
+	page->umem = ib_umem_get(udata, virt & PAGE_MASK, PAGE_SIZE, 0, 0);
 	if (IS_ERR(page->umem)) {
 		err = PTR_ERR(page->umem);
 		kfree(page);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 81efa5def8ad..b0a37ca2a714 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1037,7 +1037,8 @@ to_mflow_act(struct ib_flow_action *ibact)
 	return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
 }
 
-int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
+int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
+			struct ib_udata *udata, unsigned long virt,
 			struct mlx5_db *db);
 void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
@@ -1103,6 +1104,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
 		       int page_shift, int flags);
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+					     struct ib_udata *udata,
 					     int access_flags);
 void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 494a90f4348c..6d763d6a189f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -843,18 +843,17 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
 	return MLX5_MAX_UMR_SHIFT;
 }
 
-static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
-		       int access_flags, struct ib_umem **umem,
-		       int *npages, int *page_shift, int *ncont,
-		       int *order)
+static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
+		       u64 start, u64 length, int access_flags,
+		       struct ib_umem **umem, int *npages, int *page_shift,
+		       int *ncont, int *order)
 {
-	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct ib_umem *u;
 	int err;
 
 	*umem = NULL;
 
-	u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
+	u = ib_umem_get(udata, start, length, access_flags, 0);
 	err = PTR_ERR_OR_ZERO(u);
 	if (err) {
 		mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
@@ -1333,14 +1332,14 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
 			return ERR_PTR(-EINVAL);
 
-		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
 		if (IS_ERR(mr))
 			return ERR_CAST(mr);
 		return &mr->ibmr;
 	}
 
-	err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
-			   &page_shift, &ncont, &order);
+	err = mr_umem_get(dev, udata, start, length, access_flags, &umem,
+			  &npages, &page_shift, &ncont, &order);
 
 	if (err < 0)
 		return ERR_PTR(err);
@@ -1488,8 +1487,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		flags |= IB_MR_REREG_TRANS;
 		ib_umem_release(mr->umem);
 		mr->umem = NULL;
-		err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
-				  &npages, &page_shift, &ncont, &order);
+		err = mr_umem_get(dev, udata, addr, len, access_flags,
+				  &mr->umem, &npages, &page_shift, &ncont,
+				  &order);
 		if (err)
 			goto err;
 	}
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 37d6653f9636..8d46b1dc5658 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -492,13 +492,13 @@ next_mr:
 }
 
 struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+					     struct ib_udata *udata,
 					     int access_flags)
 {
-	struct ib_ucontext *ctx = pd->ibpd.uobject->context;
 	struct mlx5_ib_mr *imr;
 	struct ib_umem *umem;
 
-	umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+	umem = ib_umem_get(udata, 0, 0, IB_ACCESS_ON_DEMAND, 0);
 	if (IS_ERR(umem))
 		return ERR_CAST(umem);
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index dd2ae640bc84..529e76f67cb6 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -645,16 +645,14 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
 	return bfregi->sys_pages[index_of_sys_page] + offset;
 }
 
-static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
-			    struct ib_pd *pd,
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 			    unsigned long addr, size_t size,
-			    struct ib_umem **umem,
-			    int *npages, int *page_shift, int *ncont,
-			    u32 *offset)
+			    struct ib_umem **umem, int *npages, int *page_shift,
+			    int *ncont, u32 *offset)
 {
 	int err;
 
-	*umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+	*umem = ib_umem_get(udata, addr, size, 0, 0);
 	if (IS_ERR(*umem)) {
 		mlx5_ib_dbg(dev, "umem_get failed\n");
 		return PTR_ERR(*umem);
@@ -695,10 +693,9 @@ static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 }
 
 static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
-			  struct mlx5_ib_rwq *rwq,
+			  struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
 			  struct mlx5_ib_create_wq *ucmd)
 {
-	struct mlx5_ib_ucontext *context;
 	int page_shift = 0;
 	int npages;
 	u32 offset = 0;
@@ -708,9 +705,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	if (!ucmd->buf_addr)
 		return -EINVAL;
 
-	context = to_mucontext(pd->uobject->context);
-	rwq->umem = ib_umem_get(pd->uobject->context, ucmd->buf_addr,
-			       rwq->buf_size, 0, 0);
+	rwq->umem = ib_umem_get(udata, ucmd->buf_addr, rwq->buf_size, 0, 0);
 	if (IS_ERR(rwq->umem)) {
 		mlx5_ib_dbg(dev, "umem_get failed\n");
 		err = PTR_ERR(rwq->umem);
@@ -735,7 +730,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		    (unsigned long long)ucmd->buf_addr, rwq->buf_size,
 		    npages, page_shift, ncont, offset);
 
-	err = mlx5_ib_db_map_user(context, ucmd->db_addr, &rwq->db);
+	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
+				  ucmd->db_addr, &rwq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map failed\n");
 		goto err_umem;
@@ -819,10 +815,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 
 	if (ucmd.buf_addr && ubuffer->buf_size) {
 		ubuffer->buf_addr = ucmd.buf_addr;
-		err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
-				       ubuffer->buf_size,
-				       &ubuffer->umem, &npages, &page_shift,
-				       &ncont, &offset);
+		err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr,
+				       ubuffer->buf_size, &ubuffer->umem,
+				       &npages, &page_shift, &ncont, &offset);
 		if (err)
 			goto err_bfreg;
 	} else {
@@ -856,7 +851,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		resp->bfreg_index = MLX5_IB_INVALID_BFREG;
 	qp->bfregn = bfregn;
 
-	err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
+	err = mlx5_ib_db_map_user(context, udata, ucmd.db_addr, &qp->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map failed\n");
 		goto err_free;
@@ -1119,6 +1114,7 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
 }
 
 static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+				   struct ib_udata *udata,
 				   struct mlx5_ib_sq *sq, void *qpin,
 				   struct ib_pd *pd)
 {
@@ -1135,9 +1131,9 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 	int ncont = 0;
 	u32 offset = 0;
 
-	err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
-			       &sq->ubuffer.umem, &npages, &page_shift,
-			       &ncont, &offset);
+	err = mlx5_ib_umem_get(dev, udata, ubuffer->buf_addr, ubuffer->buf_size,
+			       &sq->ubuffer.umem, &npages, &page_shift, &ncont,
+			       &offset);
 	if (err)
 		return err;
 
@@ -1374,7 +1370,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		if (err)
 			return err;
 
-		err = create_raw_packet_qp_sq(dev, sq, in, pd);
+		err = create_raw_packet_qp_sq(dev, udata, sq, in, pd);
 		if (err)
 			goto err_destroy_tis;
 
@@ -5793,7 +5789,7 @@ static int prepare_user_rq(struct ib_pd *pd,
 		return err;
 	}
 
-	err = create_user_rq(dev, pd, rwq, &ucmd);
+	err = create_user_rq(dev, pd, udata, rwq, &ucmd);
 	if (err) {
 		mlx5_ib_dbg(dev, "err %d\n", err);
 		return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4e8d18009f58..22bd774e0b4e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -79,8 +79,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
-	srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
-				0, 0);
+	srq->umem = ib_umem_get(udata, ucmd.buf_addr, buf_size, 0, 0);
 	if (IS_ERR(srq->umem)) {
 		mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size);
 		err = PTR_ERR(srq->umem);
@@ -104,7 +103,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
 	mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
 
-	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context),
+	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
 				  ucmd.db_addr, &srq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map doorbell failed\n");
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 82cb6b71ac7c..53fff6aed896 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -931,7 +931,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = ib_umem_get(pd->uobject->context, start, length, acc,
+	mr->umem = ib_umem_get(udata, start, length, acc,
 			       ucmd.mr_attrs & MTHCA_MR_DMASYNC);
 
 	if (IS_ERR(mr->umem)) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 4e7f08ee1907..feb4d259aab9 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2134,7 +2134,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	u8 stag_key;
 	int first_page = 1;
 
-	region = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+	region = ib_umem_get(udata, start, length, acc, 0);
 	if (IS_ERR(region)) {
 		return (struct ib_mr *)region;
 	}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index c46bed0c5513..a8b40cb7679e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -916,7 +916,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(status);
-	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+	mr->umem = ib_umem_get(udata, start, len, acc, 0);
 	if (IS_ERR(mr->umem)) {
 		status = -EFAULT;
 		goto umem_err;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index b342a70e2814..ba8cb6559122 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -736,11 +736,10 @@ static inline int qedr_align_cq_entries(int entries)
 	return aligned_size / QEDR_CQE_SIZE;
 }
 
-static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
+static inline int qedr_init_user_queue(struct ib_udata *udata,
 				       struct qedr_dev *dev,
-				       struct qedr_userq *q,
-				       u64 buf_addr, size_t buf_len,
-				       int access, int dmasync,
+				       struct qedr_userq *q, u64 buf_addr,
+				       size_t buf_len, int access, int dmasync,
 				       int alloc_and_init)
 {
 	u32 fw_pages;
@@ -748,7 +747,7 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
 
 	q->buf_addr = buf_addr;
 	q->buf_len = buf_len;
-	q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
+	q->umem = ib_umem_get(udata, q->buf_addr, q->buf_len, access, dmasync);
 	if (IS_ERR(q->umem)) {
 		DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
 		       PTR_ERR(q->umem));
@@ -905,9 +904,9 @@ struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 
 		cq->cq_type = QEDR_CQ_TYPE_USER;
 
-		rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
-					  ureq.len, IB_ACCESS_LOCAL_WRITE,
-					  1, 1);
+		rc = qedr_init_user_queue(udata, dev, &cq->q, ureq.addr,
+					  ureq.len, IB_ACCESS_LOCAL_WRITE, 1,
+					  1);
 		if (rc)
 			goto err0;
 
@@ -1344,7 +1343,7 @@ static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
 			  hw_srq->phy_prod_pair_addr);
 }
 
-static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
+static int qedr_init_srq_user_params(struct ib_udata *udata,
 				     struct qedr_srq *srq,
 				     struct qedr_create_srq_ureq *ureq,
 				     int access, int dmasync)
@@ -1352,14 +1351,14 @@ static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
 	struct scatterlist *sg;
 	int rc;
 
-	rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
+	rc = qedr_init_user_queue(udata, srq->dev, &srq->usrq, ureq->srq_addr,
 				  ureq->srq_len, access, dmasync, 1);
 	if (rc)
 		return rc;
 
-	srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
-				     sizeof(struct rdma_srq_producers),
-				     access, dmasync);
+	srq->prod_umem =
+		ib_umem_get(udata, ureq->prod_pair_addr,
+			    sizeof(struct rdma_srq_producers), access, dmasync);
 	if (IS_ERR(srq->prod_umem)) {
 		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
 		ib_umem_release(srq->usrq.umem);
@@ -1468,7 +1467,7 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 			goto err0;
 		}
 
-		rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
+		rc = qedr_init_srq_user_params(udata, srq, &ureq, 0, 0);
 		if (rc)
 			goto err0;
 
@@ -1714,14 +1713,14 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	}
 
 	/* SQ - read access only (0), dma sync not required (0) */
-	rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
+	rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr,
 				  ureq.sq_len, 0, 0, alloc_and_init);
 	if (rc)
 		return rc;
 
 	if (!qp->srq) {
 		/* RQ - read access only (0), dma sync not required (0) */
-		rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
+		rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
 					  ureq.rq_len, 0, 0, alloc_and_init);
 		if (rc)
 			return rc;
@@ -2719,7 +2718,7 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
 
 	mr->type = QEDR_MR_USER;
 
-	mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+	mr->umem = ib_umem_get(udata, start, len, acc, 0);
 	if (IS_ERR(mr->umem)) {
 		rc = -EFAULT;
 		goto err0;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 0f004c737620..104c7db4704f 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -141,7 +141,7 @@ struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 			goto err_cq;
 		}
 
-		cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size,
+		cq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size,
 				       IB_ACCESS_LOCAL_WRITE, 1);
 		if (IS_ERR(cq->umem)) {
 			ret = PTR_ERR(cq->umem);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
index fa96fa4fb829..a85884e90e84 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -126,8 +126,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 		return ERR_PTR(-EINVAL);
 	}
 
-	umem = ib_umem_get(pd->uobject->context, start,
-			   length, access_flags, 0);
+	umem = ib_umem_get(udata, start, length, access_flags, 0);
 	if (IS_ERR(umem)) {
 		dev_warn(&dev->pdev->dev,
 			 "could not get umem for mem region\n");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 3acf74cbe266..5fc444cef011 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -262,8 +262,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 
 			if (!is_srq) {
 				/* set qp->sq.wqe_cnt, shift, buf_size.. */
-				qp->rumem = ib_umem_get(pd->uobject->context,
-							ucmd.rbuf_addr,
+				qp->rumem = ib_umem_get(udata, ucmd.rbuf_addr,
 							ucmd.rbuf_size, 0, 0);
 				if (IS_ERR(qp->rumem)) {
 					ret = PTR_ERR(qp->rumem);
@@ -275,8 +274,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 				qp->srq = to_vsrq(init_attr->srq);
 			}
 
-			qp->sumem = ib_umem_get(pd->uobject->context,
-						ucmd.sbuf_addr,
+			qp->sumem = ib_umem_get(udata, ucmd.sbuf_addr,
 						ucmd.sbuf_size, 0, 0);
 			if (IS_ERR(qp->sumem)) {
 				if (!is_srq)
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 06ba7c7a2235..951d9d68107a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -153,9 +153,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
 		goto err_srq;
 	}
 
-	srq->umem = ib_umem_get(pd->uobject->context,
-				ucmd.buf_addr,
-				ucmd.buf_size, 0, 0);
+	srq->umem = ib_umem_get(udata, ucmd.buf_addr, ucmd.buf_size, 0, 0);
 	if (IS_ERR(srq->umem)) {
 		ret = PTR_ERR(srq->umem);
 		goto err_srq;
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 49c9541050d4..8b1c1e8dd7ef 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -388,8 +388,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (length == 0)
 		return ERR_PTR(-EINVAL);
 
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
+	umem = ib_umem_get(udata, start, length, mr_access_flags, 0);
 	if (IS_ERR(umem))
 		return (void *)umem;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 9d3916b93f23..2438093776a0 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -171,7 +171,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
 	void			*vaddr;
 	int err;
 
-	umem = ib_umem_get(pd->ibpd.uobject->context, start, length, access, 0);
+	umem = ib_umem_get(udata, start, length, access, 0);
 	if (IS_ERR(umem)) {
 		pr_warn("err %d from rxe_umem_get\n",
 			(int)PTR_ERR(umem));
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5d3755ec5afa..73af05db04c7 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -36,6 +36,7 @@
 #include <linux/list.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
 
 struct ib_ucontext;
 struct ib_umem_odp;
@@ -80,7 +81,7 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem)
 
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
-struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
+struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 			    size_t size, int access, int dmasync);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
@@ -91,9 +92,10 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 
 #include <linux/err.h>
 
-static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
+static inline struct ib_umem *ib_umem_get(struct ib_udata *udata,
 					  unsigned long addr, size_t size,
-					  int access, int dmasync) {
+					  int access, int dmasync)
+{
 	return ERR_PTR(-EINVAL);
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3ddd199ba602..aa1f126d3383 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4200,6 +4200,7 @@ void rdma_roce_rescan_device(struct ib_device *ibdev);
 
 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
 
+struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata);
 
 int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
 
-- 
cgit v1.2.3


From bec9ba7f37631e794cbfaa4c2274074d631217a9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 16 Dec 2018 19:12:18 -0800
Subject: crypto: cipher - remove struct cipher_desc

'struct cipher_desc' is unused.  Remove it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 902ec171fc6d..c3c98a62e503 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -188,14 +188,6 @@ struct blkcipher_desc {
 	u32 flags;
 };
 
-struct cipher_desc {
-	struct crypto_tfm *tfm;
-	void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src);
-	unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
-			     const u8 *src, unsigned int nbytes);
-	void *info;
-};
-
 /**
  * DOC: Block Cipher Algorithm Definitions
  *
-- 
cgit v1.2.3


From 0872da16dd632e5d1d3f80388f7ae6fbeb17ad53 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 3 Jan 2019 20:16:14 -0800
Subject: crypto: skcipher - add helper for simple block cipher modes

The majority of skcipher templates (including both the existing ones and
the ones remaining to be converted from the "blkcipher" API) just wrap a
single block cipher algorithm.  This includes cbc, cfb, ctr, ecb, kw,
ofb, and pcbc.  Add a helper function skcipher_alloc_instance_simple()
that handles allocating an skcipher instance for this common case.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/skcipher.c                  | 131 +++++++++++++++++++++++++++++++++++++
 include/crypto/internal/skcipher.h |  15 +++++
 2 files changed, 146 insertions(+)

(limited to 'include')

diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 2a969296bc24..040ae6377b32 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -1058,5 +1058,136 @@ int skcipher_register_instance(struct crypto_template *tmpl,
 }
 EXPORT_SYMBOL_GPL(skcipher_register_instance);
 
+static int skcipher_setkey_simple(struct crypto_skcipher *tfm, const u8 *key,
+				  unsigned int keylen)
+{
+	struct crypto_cipher *cipher = skcipher_cipher_simple(tfm);
+	int err;
+
+	crypto_cipher_clear_flags(cipher, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(cipher, crypto_skcipher_get_flags(tfm) &
+				CRYPTO_TFM_REQ_MASK);
+	err = crypto_cipher_setkey(cipher, key, keylen);
+	crypto_skcipher_set_flags(tfm, crypto_cipher_get_flags(cipher) &
+				  CRYPTO_TFM_RES_MASK);
+	return err;
+}
+
+static int skcipher_init_tfm_simple(struct crypto_skcipher *tfm)
+{
+	struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+	struct crypto_spawn *spawn = skcipher_instance_ctx(inst);
+	struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm);
+	struct crypto_cipher *cipher;
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->cipher = cipher;
+	return 0;
+}
+
+static void skcipher_exit_tfm_simple(struct crypto_skcipher *tfm)
+{
+	struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_cipher(ctx->cipher);
+}
+
+static void skcipher_free_instance_simple(struct skcipher_instance *inst)
+{
+	crypto_drop_spawn(skcipher_instance_ctx(inst));
+	kfree(inst);
+}
+
+/**
+ * skcipher_alloc_instance_simple - allocate instance of simple block cipher mode
+ *
+ * Allocate an skcipher_instance for a simple block cipher mode of operation,
+ * e.g. cbc or ecb.  The instance context will have just a single crypto_spawn,
+ * that for the underlying cipher.  The {min,max}_keysize, ivsize, blocksize,
+ * alignmask, and priority are set from the underlying cipher but can be
+ * overridden if needed.  The tfm context defaults to skcipher_ctx_simple, and
+ * default ->setkey(), ->init(), and ->exit() methods are installed.
+ *
+ * @tmpl: the template being instantiated
+ * @tb: the template parameters
+ * @cipher_alg_ret: on success, a pointer to the underlying cipher algorithm is
+ *		    returned here.  It must be dropped with crypto_mod_put().
+ *
+ * Return: a pointer to the new instance, or an ERR_PTR().  The caller still
+ *	   needs to register the instance.
+ */
+struct skcipher_instance *
+skcipher_alloc_instance_simple(struct crypto_template *tmpl, struct rtattr **tb,
+			       struct crypto_alg **cipher_alg_ret)
+{
+	struct crypto_attr_type *algt;
+	struct crypto_alg *cipher_alg;
+	struct skcipher_instance *inst;
+	struct crypto_spawn *spawn;
+	u32 mask;
+	int err;
+
+	algt = crypto_get_attr_type(tb);
+	if (IS_ERR(algt))
+		return ERR_CAST(algt);
+
+	if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+		return ERR_PTR(-EINVAL);
+
+	mask = CRYPTO_ALG_TYPE_MASK |
+		crypto_requires_off(algt->type, algt->mask,
+				    CRYPTO_ALG_NEED_FALLBACK);
+
+	cipher_alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER, mask);
+	if (IS_ERR(cipher_alg))
+		return ERR_CAST(cipher_alg);
+
+	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
+	if (!inst) {
+		err = -ENOMEM;
+		goto err_put_cipher_alg;
+	}
+	spawn = skcipher_instance_ctx(inst);
+
+	err = crypto_inst_setname(skcipher_crypto_instance(inst), tmpl->name,
+				  cipher_alg);
+	if (err)
+		goto err_free_inst;
+
+	err = crypto_init_spawn(spawn, cipher_alg,
+				skcipher_crypto_instance(inst),
+				CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto err_free_inst;
+	inst->free = skcipher_free_instance_simple;
+
+	/* Default algorithm properties, can be overridden */
+	inst->alg.base.cra_blocksize = cipher_alg->cra_blocksize;
+	inst->alg.base.cra_alignmask = cipher_alg->cra_alignmask;
+	inst->alg.base.cra_priority = cipher_alg->cra_priority;
+	inst->alg.min_keysize = cipher_alg->cra_cipher.cia_min_keysize;
+	inst->alg.max_keysize = cipher_alg->cra_cipher.cia_max_keysize;
+	inst->alg.ivsize = cipher_alg->cra_blocksize;
+
+	/* Use skcipher_ctx_simple by default, can be overridden */
+	inst->alg.base.cra_ctxsize = sizeof(struct skcipher_ctx_simple);
+	inst->alg.setkey = skcipher_setkey_simple;
+	inst->alg.init = skcipher_init_tfm_simple;
+	inst->alg.exit = skcipher_exit_tfm_simple;
+
+	*cipher_alg_ret = cipher_alg;
+	return inst;
+
+err_free_inst:
+	kfree(inst);
+err_put_cipher_alg:
+	crypto_mod_put(cipher_alg);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(skcipher_alloc_instance_simple);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Symmetric key cipher type");
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 453e867b4bd9..9de6032209cb 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -205,5 +205,20 @@ static inline unsigned int crypto_skcipher_alg_max_keysize(
 	return alg->max_keysize;
 }
 
+/* Helpers for simple block cipher modes of operation */
+struct skcipher_ctx_simple {
+	struct crypto_cipher *cipher;	/* underlying block cipher */
+};
+static inline struct crypto_cipher *
+skcipher_cipher_simple(struct crypto_skcipher *tfm)
+{
+	struct skcipher_ctx_simple *ctx = crypto_skcipher_ctx(tfm);
+
+	return ctx->cipher;
+}
+struct skcipher_instance *
+skcipher_alloc_instance_simple(struct crypto_template *tmpl, struct rtattr **tb,
+			       struct crypto_alg **cipher_alg_ret);
+
 #endif	/* _CRYPTO_INTERNAL_SKCIPHER_H */
 
-- 
cgit v1.2.3


From 14aa1a839a33b4ade7479ffcad8a5fabe5cc4000 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 3 Jan 2019 20:16:25 -0800
Subject: crypto: algapi - remove crypto_alloc_instance()

Now that all "blkcipher" templates have been converted to "skcipher",
crypto_alloc_instance() is no longer used.  And it's not useful any
longer as it creates an old-style weakly typed instance rather than a
new-style strongly typed instance.  So remove it, and now that the name
is freed up rename crypto_alloc_instance2() to crypto_alloc_instance().

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c                | 33 ++-------------------------------
 include/crypto/algapi.h        |  6 ++----
 include/crypto/internal/hash.h |  6 +++---
 3 files changed, 7 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 8b65ada33e5d..f3d766312bd9 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -845,8 +845,8 @@ int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 }
 EXPORT_SYMBOL_GPL(crypto_inst_setname);
 
-void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
-			     unsigned int head)
+void *crypto_alloc_instance(const char *name, struct crypto_alg *alg,
+			    unsigned int head)
 {
 	struct crypto_instance *inst;
 	char *p;
@@ -869,35 +869,6 @@ err_free_inst:
 	kfree(p);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(crypto_alloc_instance2);
-
-struct crypto_instance *crypto_alloc_instance(const char *name,
-					      struct crypto_alg *alg)
-{
-	struct crypto_instance *inst;
-	struct crypto_spawn *spawn;
-	int err;
-
-	inst = crypto_alloc_instance2(name, alg, 0);
-	if (IS_ERR(inst))
-		goto out;
-
-	spawn = crypto_instance_ctx(inst);
-	err = crypto_init_spawn(spawn, alg, inst,
-				CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
-
-	if (err)
-		goto err_free_inst;
-
-	return inst;
-
-err_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-
-out:
-	return inst;
-}
 EXPORT_SYMBOL_GPL(crypto_alloc_instance);
 
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen)
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 4a5ad10e75f0..093869f175d6 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -185,10 +185,8 @@ static inline struct crypto_alg *crypto_attr_alg(struct rtattr *rta,
 int crypto_attr_u32(struct rtattr *rta, u32 *num);
 int crypto_inst_setname(struct crypto_instance *inst, const char *name,
 			struct crypto_alg *alg);
-void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
-			     unsigned int head);
-struct crypto_instance *crypto_alloc_instance(const char *name,
-					      struct crypto_alg *alg);
+void *crypto_alloc_instance(const char *name, struct crypto_alg *alg,
+			    unsigned int head);
 
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
 int crypto_enqueue_request(struct crypto_queue *queue,
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index a0b0ad9d585e..e355fdb642a9 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -170,7 +170,7 @@ static inline unsigned int ahash_instance_headroom(void)
 static inline struct ahash_instance *ahash_alloc_instance(
 	const char *name, struct crypto_alg *alg)
 {
-	return crypto_alloc_instance2(name, alg, ahash_instance_headroom());
+	return crypto_alloc_instance(name, alg, ahash_instance_headroom());
 }
 
 static inline void ahash_request_complete(struct ahash_request *req, int err)
@@ -233,8 +233,8 @@ static inline void *shash_instance_ctx(struct shash_instance *inst)
 static inline struct shash_instance *shash_alloc_instance(
 	const char *name, struct crypto_alg *alg)
 {
-	return crypto_alloc_instance2(name, alg,
-				      sizeof(struct shash_alg) - sizeof(*alg));
+	return crypto_alloc_instance(name, alg,
+				     sizeof(struct shash_alg) - sizeof(*alg));
 }
 
 static inline struct crypto_shash *crypto_spawn_shash(
-- 
cgit v1.2.3


From 5b438f4ba315db4f8c1489d175656798d58c014f Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Fri, 11 Jan 2019 13:04:57 +0800
Subject: iommu/vt-d: Support page request in scalable mode

VT-d Rev3.0 has made a few changes to the page request interface,

1. widened PRQ descriptor from 128 bits to 256 bits;
2. removed streaming response type;
3. introduced private data that requires page response even the
   request is not last request in group (LPIG).

This is a supplement to commit 1c4f88b7f1f92 ("iommu/vt-d: Shared
virtual address in scalable mode") and makes the svm code compliant
with VT-d Rev3.0.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Liu Yi L <yi.l.liu@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 77 ++++++++++++++++++++++++++-------------------
 include/linux/intel-iommu.h | 21 ++++++-------
 include/linux/intel-svm.h   |  2 +-
 3 files changed, 55 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index a2a2aa4439aa..79add5716552 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -470,20 +470,31 @@ EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
 
 /* Page request queue descriptor */
 struct page_req_dsc {
-	u64 srr:1;
-	u64 bof:1;
-	u64 pasid_present:1;
-	u64 lpig:1;
-	u64 pasid:20;
-	u64 bus:8;
-	u64 private:23;
-	u64 prg_index:9;
-	u64 rd_req:1;
-	u64 wr_req:1;
-	u64 exe_req:1;
-	u64 priv_req:1;
-	u64 devfn:8;
-	u64 addr:52;
+	union {
+		struct {
+			u64 type:8;
+			u64 pasid_present:1;
+			u64 priv_data_present:1;
+			u64 rsvd:6;
+			u64 rid:16;
+			u64 pasid:20;
+			u64 exe_req:1;
+			u64 pm_req:1;
+			u64 rsvd2:10;
+		};
+		u64 qw_0;
+	};
+	union {
+		struct {
+			u64 rd_req:1;
+			u64 wr_req:1;
+			u64 lpig:1;
+			u64 prg_index:9;
+			u64 addr:52;
+		};
+		u64 qw_1;
+	};
+	u64 priv_data[2];
 };
 
 #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
@@ -596,7 +607,7 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 		/* Accounting for major/minor faults? */
 		rcu_read_lock();
 		list_for_each_entry_rcu(sdev, &svm->devs, list) {
-			if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
+			if (sdev->sid == req->rid)
 				break;
 		}
 		/* Other devices can go away, but the drivers are not permitted
@@ -609,33 +620,35 @@ static irqreturn_t prq_event_thread(int irq, void *d)
 
 		if (sdev && sdev->ops && sdev->ops->fault_cb) {
 			int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
-				(req->exe_req << 1) | (req->priv_req);
-			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
+				(req->exe_req << 1) | (req->pm_req);
+			sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
+					    req->priv_data, rwxp, result);
 		}
 		/* We get here in the error case where the PASID lookup failed,
 		   and these can be NULL. Do not use them below this point! */
 		sdev = NULL;
 		svm = NULL;
 	no_pasid:
-		if (req->lpig) {
-			/* Page Group Response */
+		if (req->lpig || req->priv_data_present) {
+			/*
+			 * Per VT-d spec. v3.0 ch7.7, system software must
+			 * respond with page group response if private data
+			 * is present (PDP) or last page in group (LPIG) bit
+			 * is set. This is an additional VT-d feature beyond
+			 * PCI ATS spec.
+			 */
 			resp.qw0 = QI_PGRP_PASID(req->pasid) |
-				QI_PGRP_DID((req->bus << 8) | req->devfn) |
+				QI_PGRP_DID(req->rid) |
 				QI_PGRP_PASID_P(req->pasid_present) |
+				QI_PGRP_PDP(req->pasid_present) |
+				QI_PGRP_RESP_CODE(result) |
 				QI_PGRP_RESP_TYPE;
 			resp.qw1 = QI_PGRP_IDX(req->prg_index) |
-				QI_PGRP_PRIV(req->private) |
-				QI_PGRP_RESP_CODE(result);
-		} else if (req->srr) {
-			/* Page Stream Response */
-			resp.qw0 = QI_PSTRM_IDX(req->prg_index) |
-				QI_PSTRM_PRIV(req->private) |
-				QI_PSTRM_BUS(req->bus) |
-				QI_PSTRM_PASID(req->pasid) |
-				QI_PSTRM_RESP_TYPE;
-			resp.qw1 = QI_PSTRM_ADDR(address) |
-				QI_PSTRM_DEVFN(req->devfn) |
-				QI_PSTRM_RESP_CODE(result);
+				QI_PGRP_LPIG(req->lpig);
+
+			if (req->priv_data_present)
+				memcpy(&resp.qw2, req->priv_data,
+				       sizeof(req->priv_data));
 		}
 		resp.qw2 = 0;
 		resp.qw3 = 0;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 0605f3bf6e79..fa364de9db18 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -374,20 +374,17 @@ enum {
 #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
 #define QI_DEV_EIOTLB_MAX_INVS	32
 
-#define QI_PGRP_IDX(idx)	(((u64)(idx)) << 55)
-#define QI_PGRP_PRIV(priv)	(((u64)(priv)) << 32)
-#define QI_PGRP_RESP_CODE(res)	((u64)(res))
-#define QI_PGRP_PASID(pasid)	(((u64)(pasid)) << 32)
-#define QI_PGRP_DID(did)	(((u64)(did)) << 16)
+/* Page group response descriptor QW0 */
 #define QI_PGRP_PASID_P(p)	(((u64)(p)) << 4)
+#define QI_PGRP_PDP(p)		(((u64)(p)) << 5)
+#define QI_PGRP_RESP_CODE(res)	(((u64)(res)) << 12)
+#define QI_PGRP_DID(rid)	(((u64)(rid)) << 16)
+#define QI_PGRP_PASID(pasid)	(((u64)(pasid)) << 32)
+
+/* Page group response descriptor QW1 */
+#define QI_PGRP_LPIG(x)		(((u64)(x)) << 2)
+#define QI_PGRP_IDX(idx)	(((u64)(idx)) << 3)
 
-#define QI_PSTRM_ADDR(addr)	(((u64)(addr)) & VTD_PAGE_MASK)
-#define QI_PSTRM_DEVFN(devfn)	(((u64)(devfn)) << 4)
-#define QI_PSTRM_RESP_CODE(res)	((u64)(res))
-#define QI_PSTRM_IDX(idx)	(((u64)(idx)) << 55)
-#define QI_PSTRM_PRIV(priv)	(((u64)(priv)) << 32)
-#define QI_PSTRM_BUS(bus)	(((u64)(bus)) << 24)
-#define QI_PSTRM_PASID(pasid)	(((u64)(pasid)) << 4)
 
 #define QI_RESP_SUCCESS		0x0
 #define QI_RESP_INVALID		0x1
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 99bc5b3ae26e..e3f76315ca4d 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -20,7 +20,7 @@ struct device;
 
 struct svm_dev_ops {
 	void (*fault_cb)(struct device *dev, int pasid, u64 address,
-			 u32 private, int rwxp, int response);
+			 void *private, int rwxp, int response);
 };
 
 /* Values for rxwp in fault_cb callback */
-- 
cgit v1.2.3


From 19514910d021c93c7823ec32067e6b7dea224f0f Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 9 Jan 2019 13:43:19 +0100
Subject: livepatch: Change unsigned long old_addr -> void *old_func in struct
 klp_func

The address of the to be patched function and new function is stored
in struct klp_func as:

	void *new_func;
	unsigned long old_addr;

The different naming scheme and type are derived from the way
the addresses are set. @old_addr is assigned at runtime using
kallsyms-based search. @new_func is statically initialized,
for example:

  static struct klp_func funcs[] = {
	{
		.old_name = "cmdline_proc_show",
		.new_func = livepatch_cmdline_proc_show,
	}, { }
  };

This patch changes unsigned long old_addr -> void *old_func. It removes
some confusion when these address are later used in the code. It is
motivated by a followup patch that adds special NOP struct klp_func
where we want to assign func->new_func = func->old_addr respectively
func->new_func = func->old_func.

This patch does not modify the existing behavior.

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Alice Ferrazzi <alice.ferrazzi@gmail.com>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h     |  4 ++--
 kernel/livepatch/core.c       |  6 +++---
 kernel/livepatch/patch.c      | 18 ++++++++++--------
 kernel/livepatch/patch.h      |  4 ++--
 kernel/livepatch/transition.c |  4 ++--
 5 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index aec44b1d9582..634e13876380 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -40,7 +40,7 @@
  * @new_func:	pointer to the patched function code
  * @old_sympos: a hint indicating which symbol position the old function
  *		can be found (optional)
- * @old_addr:	the address of the function being patched
+ * @old_func:	pointer to the function being patched
  * @kobj:	kobject for sysfs resources
  * @stack_node:	list node for klp_ops func_stack list
  * @old_size:	size of the old function
@@ -77,7 +77,7 @@ struct klp_func {
 	unsigned long old_sympos;
 
 	/* internal */
-	unsigned long old_addr;
+	void *old_func;
 	struct kobject kobj;
 	struct list_head stack_node;
 	unsigned long old_size, new_size;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 5b77a7314e01..cb59c7fb94cb 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -648,7 +648,7 @@ static void klp_free_object_loaded(struct klp_object *obj)
 	obj->mod = NULL;
 
 	klp_for_each_func(obj, func)
-		func->old_addr = 0;
+		func->old_func = NULL;
 }
 
 /*
@@ -721,11 +721,11 @@ static int klp_init_object_loaded(struct klp_patch *patch,
 	klp_for_each_func(obj, func) {
 		ret = klp_find_object_symbol(obj->name, func->old_name,
 					     func->old_sympos,
-					     &func->old_addr);
+					     (unsigned long *)&func->old_func);
 		if (ret)
 			return ret;
 
-		ret = kallsyms_lookup_size_offset(func->old_addr,
+		ret = kallsyms_lookup_size_offset((unsigned long)func->old_func,
 						  &func->old_size, NULL);
 		if (!ret) {
 			pr_err("kallsyms size lookup failed for '%s'\n",
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 7702cb4064fc..825022d70912 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -34,7 +34,7 @@
 
 static LIST_HEAD(klp_ops);
 
-struct klp_ops *klp_find_ops(unsigned long old_addr)
+struct klp_ops *klp_find_ops(void *old_func)
 {
 	struct klp_ops *ops;
 	struct klp_func *func;
@@ -42,7 +42,7 @@ struct klp_ops *klp_find_ops(unsigned long old_addr)
 	list_for_each_entry(ops, &klp_ops, node) {
 		func = list_first_entry(&ops->func_stack, struct klp_func,
 					stack_node);
-		if (func->old_addr == old_addr)
+		if (func->old_func == old_func)
 			return ops;
 	}
 
@@ -142,17 +142,18 @@ static void klp_unpatch_func(struct klp_func *func)
 
 	if (WARN_ON(!func->patched))
 		return;
-	if (WARN_ON(!func->old_addr))
+	if (WARN_ON(!func->old_func))
 		return;
 
-	ops = klp_find_ops(func->old_addr);
+	ops = klp_find_ops(func->old_func);
 	if (WARN_ON(!ops))
 		return;
 
 	if (list_is_singular(&ops->func_stack)) {
 		unsigned long ftrace_loc;
 
-		ftrace_loc = klp_get_ftrace_location(func->old_addr);
+		ftrace_loc =
+			klp_get_ftrace_location((unsigned long)func->old_func);
 		if (WARN_ON(!ftrace_loc))
 			return;
 
@@ -174,17 +175,18 @@ static int klp_patch_func(struct klp_func *func)
 	struct klp_ops *ops;
 	int ret;
 
-	if (WARN_ON(!func->old_addr))
+	if (WARN_ON(!func->old_func))
 		return -EINVAL;
 
 	if (WARN_ON(func->patched))
 		return -EINVAL;
 
-	ops = klp_find_ops(func->old_addr);
+	ops = klp_find_ops(func->old_func);
 	if (!ops) {
 		unsigned long ftrace_loc;
 
-		ftrace_loc = klp_get_ftrace_location(func->old_addr);
+		ftrace_loc =
+			klp_get_ftrace_location((unsigned long)func->old_func);
 		if (!ftrace_loc) {
 			pr_err("failed to find location for function '%s'\n",
 				func->old_name);
diff --git a/kernel/livepatch/patch.h b/kernel/livepatch/patch.h
index e72d8250d04b..a9b16e513656 100644
--- a/kernel/livepatch/patch.h
+++ b/kernel/livepatch/patch.h
@@ -10,7 +10,7 @@
  * struct klp_ops - structure for tracking registered ftrace ops structs
  *
  * A single ftrace_ops is shared between all enabled replacement functions
- * (klp_func structs) which have the same old_addr.  This allows the switch
+ * (klp_func structs) which have the same old_func.  This allows the switch
  * between function versions to happen instantaneously by updating the klp_ops
  * struct's func_stack list.  The winner is the klp_func at the top of the
  * func_stack (front of the list).
@@ -25,7 +25,7 @@ struct klp_ops {
 	struct ftrace_ops fops;
 };
 
-struct klp_ops *klp_find_ops(unsigned long old_addr);
+struct klp_ops *klp_find_ops(void *old_func);
 
 int klp_patch_object(struct klp_object *obj);
 void klp_unpatch_object(struct klp_object *obj);
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 304d5eb8a98c..f27a378ad5e1 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -224,11 +224,11 @@ static int klp_check_stack_func(struct klp_func *func,
 			 * Check for the to-be-patched function
 			 * (the previous func).
 			 */
-			ops = klp_find_ops(func->old_addr);
+			ops = klp_find_ops(func->old_func);
 
 			if (list_is_singular(&ops->func_stack)) {
 				/* original function */
-				func_addr = func->old_addr;
+				func_addr = (unsigned long)func->old_func;
 				func_size = func->old_size;
 			} else {
 				/* previously patched function */
-- 
cgit v1.2.3


From 0430f78bf38f9972f0cf0522709cc63d49fa164c Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 9 Jan 2019 13:43:21 +0100
Subject: livepatch: Consolidate klp_free functions

The code for freeing livepatch structures is a bit scattered and tricky:

  + direct calls to klp_free_*_limited() and kobject_put() are
    used to release partially initialized objects

  + klp_free_patch() removes the patch from the public list
    and releases all objects except for patch->kobj

  + object_put(&patch->kobj) and the related wait_for_completion()
    are called directly outside klp_mutex; this code is duplicated;

Now, we are going to remove the registration stage to simplify the API
and the code. This would require handling more situations in
klp_enable_patch() error paths.

More importantly, we are going to add a feature called atomic replace.
It will need to dynamically create func and object structures. We will
want to reuse the existing init() and free() functions. This would
create even more error path scenarios.

This patch implements more straightforward free functions:

  + checks kobj_added flag instead of @limit[*]

  + initializes patch->list early so that the check for empty list
    always works

  + The action(s) that has to be done outside klp_mutex are done
    in separate klp_free_patch_finish() function. It waits only
    when patch->kobj was really released via the _start() part.

The patch does not change the existing behavior.

[*] We need our own flag to track that the kobject was successfully
    added to the hierarchy.  Note that kobj.state_initialized only
    indicates that kobject has been initialized, not whether is has
    been added (and needs to be removed on cleanup).

Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Jason Baron <jbaron@akamai.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h |   6 ++
 kernel/livepatch/core.c   | 137 +++++++++++++++++++++++++++++++---------------
 2 files changed, 98 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 634e13876380..6978785bc059 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -45,6 +45,7 @@
  * @stack_node:	list node for klp_ops func_stack list
  * @old_size:	size of the old function
  * @new_size:	size of the new function
+ * @kobj_added: @kobj has been added and needs freeing
  * @patched:	the func has been added to the klp_ops list
  * @transition:	the func is currently being applied or reverted
  *
@@ -81,6 +82,7 @@ struct klp_func {
 	struct kobject kobj;
 	struct list_head stack_node;
 	unsigned long old_size, new_size;
+	bool kobj_added;
 	bool patched;
 	bool transition;
 };
@@ -117,6 +119,7 @@ struct klp_callbacks {
  * @kobj:	kobject for sysfs resources
  * @mod:	kernel module associated with the patched object
  *		(NULL for vmlinux)
+ * @kobj_added: @kobj has been added and needs freeing
  * @patched:	the object's funcs have been added to the klp_ops list
  */
 struct klp_object {
@@ -128,6 +131,7 @@ struct klp_object {
 	/* internal */
 	struct kobject kobj;
 	struct module *mod;
+	bool kobj_added;
 	bool patched;
 };
 
@@ -137,6 +141,7 @@ struct klp_object {
  * @objs:	object entries for kernel objects to be patched
  * @list:	list node for global list of registered patches
  * @kobj:	kobject for sysfs resources
+ * @kobj_added: @kobj has been added and needs freeing
  * @enabled:	the patch is enabled (but operation may be incomplete)
  * @finish:	for waiting till it is safe to remove the patch module
  */
@@ -148,6 +153,7 @@ struct klp_patch {
 	/* internal */
 	struct list_head list;
 	struct kobject kobj;
+	bool kobj_added;
 	bool enabled;
 	struct completion finish;
 };
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 20589da35194..6f0d9095f662 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -465,17 +465,15 @@ static struct kobj_type klp_ktype_func = {
 	.sysfs_ops = &kobj_sysfs_ops,
 };
 
-/*
- * Free all functions' kobjects in the array up to some limit. When limit is
- * NULL, all kobjects are freed.
- */
-static void klp_free_funcs_limited(struct klp_object *obj,
-				   struct klp_func *limit)
+static void klp_free_funcs(struct klp_object *obj)
 {
 	struct klp_func *func;
 
-	for (func = obj->funcs; func->old_name && func != limit; func++)
-		kobject_put(&func->kobj);
+	klp_for_each_func(obj, func) {
+		/* Might be called from klp_init_patch() error path. */
+		if (func->kobj_added)
+			kobject_put(&func->kobj);
+	}
 }
 
 /* Clean up when a patched object is unloaded */
@@ -489,30 +487,60 @@ static void klp_free_object_loaded(struct klp_object *obj)
 		func->old_func = NULL;
 }
 
-/*
- * Free all objects' kobjects in the array up to some limit. When limit is
- * NULL, all kobjects are freed.
- */
-static void klp_free_objects_limited(struct klp_patch *patch,
-				     struct klp_object *limit)
+static void klp_free_objects(struct klp_patch *patch)
 {
 	struct klp_object *obj;
 
-	for (obj = patch->objs; obj->funcs && obj != limit; obj++) {
-		klp_free_funcs_limited(obj, NULL);
-		kobject_put(&obj->kobj);
+	klp_for_each_object(patch, obj) {
+		klp_free_funcs(obj);
+
+		/* Might be called from klp_init_patch() error path. */
+		if (obj->kobj_added)
+			kobject_put(&obj->kobj);
 	}
 }
 
-static void klp_free_patch(struct klp_patch *patch)
+/*
+ * This function implements the free operations that can be called safely
+ * under klp_mutex.
+ *
+ * The operation must be completed by calling klp_free_patch_finish()
+ * outside klp_mutex.
+ */
+static void klp_free_patch_start(struct klp_patch *patch)
 {
-	klp_free_objects_limited(patch, NULL);
 	if (!list_empty(&patch->list))
 		list_del(&patch->list);
+
+	klp_free_objects(patch);
+}
+
+/*
+ * This function implements the free part that must be called outside
+ * klp_mutex.
+ *
+ * It must be called after klp_free_patch_start(). And it has to be
+ * the last function accessing the livepatch structures when the patch
+ * gets disabled.
+ */
+static void klp_free_patch_finish(struct klp_patch *patch)
+{
+	/*
+	 * Avoid deadlock with enabled_store() sysfs callback by
+	 * calling this outside klp_mutex. It is safe because
+	 * this is called when the patch gets disabled and it
+	 * cannot get enabled again.
+	 */
+	if (patch->kobj_added) {
+		kobject_put(&patch->kobj);
+		wait_for_completion(&patch->finish);
+	}
 }
 
 static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 {
+	int ret;
+
 	if (!func->old_name || !func->new_func)
 		return -EINVAL;
 
@@ -528,9 +556,13 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 	 * object. If the user selects 0 for old_sympos, then 1 will be used
 	 * since a unique symbol will be the first occurrence.
 	 */
-	return kobject_init_and_add(&func->kobj, &klp_ktype_func,
-				    &obj->kobj, "%s,%lu", func->old_name,
-				    func->old_sympos ? func->old_sympos : 1);
+	ret = kobject_init_and_add(&func->kobj, &klp_ktype_func,
+				   &obj->kobj, "%s,%lu", func->old_name,
+				   func->old_sympos ? func->old_sympos : 1);
+	if (!ret)
+		func->kobj_added = true;
+
+	return ret;
 }
 
 /* Arches may override this to finish any remaining arch-specific tasks */
@@ -589,9 +621,6 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
 	int ret;
 	const char *name;
 
-	if (!obj->funcs)
-		return -EINVAL;
-
 	if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN)
 		return -EINVAL;
 
@@ -605,46 +634,66 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
 				   &patch->kobj, "%s", name);
 	if (ret)
 		return ret;
+	obj->kobj_added = true;
 
 	klp_for_each_func(obj, func) {
 		ret = klp_init_func(obj, func);
 		if (ret)
-			goto free;
+			return ret;
 	}
 
-	if (klp_is_object_loaded(obj)) {
+	if (klp_is_object_loaded(obj))
 		ret = klp_init_object_loaded(patch, obj);
-		if (ret)
-			goto free;
-	}
-
-	return 0;
 
-free:
-	klp_free_funcs_limited(obj, func);
-	kobject_put(&obj->kobj);
 	return ret;
 }
 
-static int klp_init_patch(struct klp_patch *patch)
+static int klp_init_patch_early(struct klp_patch *patch)
 {
 	struct klp_object *obj;
-	int ret;
+	struct klp_func *func;
 
 	if (!patch->objs)
 		return -EINVAL;
 
-	mutex_lock(&klp_mutex);
-
+	INIT_LIST_HEAD(&patch->list);
+	patch->kobj_added = false;
 	patch->enabled = false;
 	init_completion(&patch->finish);
 
+	klp_for_each_object(patch, obj) {
+		if (!obj->funcs)
+			return -EINVAL;
+
+		obj->kobj_added = false;
+
+		klp_for_each_func(obj, func)
+			func->kobj_added = false;
+	}
+
+	return 0;
+}
+
+static int klp_init_patch(struct klp_patch *patch)
+{
+	struct klp_object *obj;
+	int ret;
+
+	mutex_lock(&klp_mutex);
+
+	ret = klp_init_patch_early(patch);
+	if (ret) {
+		mutex_unlock(&klp_mutex);
+		return ret;
+	}
+
 	ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
 				   klp_root_kobj, "%s", patch->mod->name);
 	if (ret) {
 		mutex_unlock(&klp_mutex);
 		return ret;
 	}
+	patch->kobj_added = true;
 
 	klp_for_each_object(patch, obj) {
 		ret = klp_init_object(patch, obj);
@@ -659,12 +708,11 @@ static int klp_init_patch(struct klp_patch *patch)
 	return 0;
 
 free:
-	klp_free_objects_limited(patch, obj);
+	klp_free_patch_start(patch);
 
 	mutex_unlock(&klp_mutex);
 
-	kobject_put(&patch->kobj);
-	wait_for_completion(&patch->finish);
+	klp_free_patch_finish(patch);
 
 	return ret;
 }
@@ -693,12 +741,11 @@ int klp_unregister_patch(struct klp_patch *patch)
 		goto err;
 	}
 
-	klp_free_patch(patch);
+	klp_free_patch_start(patch);
 
 	mutex_unlock(&klp_mutex);
 
-	kobject_put(&patch->kobj);
-	wait_for_completion(&patch->finish);
+	klp_free_patch_finish(patch);
 
 	return 0;
 err:
-- 
cgit v1.2.3


From 68007289bf3cd937a5b8fc4987d2787167bd06ca Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 9 Jan 2019 13:43:22 +0100
Subject: livepatch: Don't block the removal of patches loaded after a forced
 transition

module_put() is currently never called in klp_complete_transition() when
klp_force is set. As a result, we might keep the reference count even when
klp_enable_patch() fails and klp_cancel_transition() is called.

This might give the impression that a module might get blocked in some
strange init state. Fortunately, it is not the case. The reference count
is ignored when mod->init fails and erroneous modules are always removed.

Anyway, this might be confusing. Instead, this patch moves
the global klp_forced flag into struct klp_patch. As a result,
we block only modules that might still be in use after a forced
transition. Newly loaded livepatches might be eventually completely
removed later.

It is not a big deal. But the code is at least consistent with
the reality.

Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h     |  2 ++
 kernel/livepatch/core.c       |  4 +++-
 kernel/livepatch/core.h       |  1 +
 kernel/livepatch/transition.c | 10 +++++-----
 4 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 6978785bc059..6a9165d9b090 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -143,6 +143,7 @@ struct klp_object {
  * @kobj:	kobject for sysfs resources
  * @kobj_added: @kobj has been added and needs freeing
  * @enabled:	the patch is enabled (but operation may be incomplete)
+ * @forced:	was involved in a forced transition
  * @finish:	for waiting till it is safe to remove the patch module
  */
 struct klp_patch {
@@ -155,6 +156,7 @@ struct klp_patch {
 	struct kobject kobj;
 	bool kobj_added;
 	bool enabled;
+	bool forced;
 	struct completion finish;
 };
 
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 6f0d9095f662..e77c5017ae0c 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -45,7 +45,8 @@
  */
 DEFINE_MUTEX(klp_mutex);
 
-static LIST_HEAD(klp_patches);
+/* Registered patches */
+LIST_HEAD(klp_patches);
 
 static struct kobject *klp_root_kobj;
 
@@ -659,6 +660,7 @@ static int klp_init_patch_early(struct klp_patch *patch)
 	INIT_LIST_HEAD(&patch->list);
 	patch->kobj_added = false;
 	patch->enabled = false;
+	patch->forced = false;
 	init_completion(&patch->finish);
 
 	klp_for_each_object(patch, obj) {
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index 48a83d4364cf..d0cb5390e247 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -5,6 +5,7 @@
 #include <linux/livepatch.h>
 
 extern struct mutex klp_mutex;
+extern struct list_head klp_patches;
 
 static inline bool klp_is_object_loaded(struct klp_object *obj)
 {
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index f27a378ad5e1..a4c921364003 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -33,8 +33,6 @@ struct klp_patch *klp_transition_patch;
 
 static int klp_target_state = KLP_UNDEFINED;
 
-static bool klp_forced = false;
-
 /*
  * This work can be performed periodically to finish patching or unpatching any
  * "straggler" tasks which failed to transition in the first attempt.
@@ -137,10 +135,10 @@ static void klp_complete_transition(void)
 		  klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
 
 	/*
-	 * klp_forced set implies unbounded increase of module's ref count if
+	 * patch->forced set implies unbounded increase of module's ref count if
 	 * the module is disabled/enabled in a loop.
 	 */
-	if (!klp_forced && klp_target_state == KLP_UNPATCHED)
+	if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED)
 		module_put(klp_transition_patch->mod);
 
 	klp_target_state = KLP_UNDEFINED;
@@ -620,6 +618,7 @@ void klp_send_signals(void)
  */
 void klp_force_transition(void)
 {
+	struct klp_patch *patch;
 	struct task_struct *g, *task;
 	unsigned int cpu;
 
@@ -633,5 +632,6 @@ void klp_force_transition(void)
 	for_each_possible_cpu(cpu)
 		klp_update_patch_state(idle_task(cpu));
 
-	klp_forced = true;
+	list_for_each_entry(patch, &klp_patches, list)
+		patch->forced = true;
 }
-- 
cgit v1.2.3


From 958ef1e39d24d6cb8bf2a7406130a98c9564230f Mon Sep 17 00:00:00 2001
From: Petr Mladek <pmladek@suse.com>
Date: Wed, 9 Jan 2019 13:43:23 +0100
Subject: livepatch: Simplify API by removing registration step

The possibility to re-enable a registered patch was useful for immediate
patches where the livepatch module had to stay until the system reboot.
The improved consistency model allows to achieve the same result by
unloading and loading the livepatch module again.

Also we are going to add a feature called atomic replace. It will allow
to create a patch that would replace all already registered patches.
The aim is to handle dependent patches more securely. It will obsolete
the stack of patches that helped to handle the dependencies so far.
Then it might be unclear when a cumulative patch re-enabling is safe.

It would be complicated to support the many modes. Instead we could
actually make the API and code easier to understand.

Therefore, remove the two step public API. All the checks and init calls
are moved from klp_register_patch() to klp_enabled_patch(). Also the patch
is automatically freed, including the sysfs interface when the transition
to the disabled state is completed.

As a result, there is never a disabled patch on the top of the stack.
Therefore we do not need to check the stack in __klp_enable_patch().
And we could simplify the check in __klp_disable_patch().

Also the API and logic is much easier. It is enough to call
klp_enable_patch() in module_init() call. The patch can be disabled
by writing '0' into /sys/kernel/livepatch/<patch>/enabled. Then the module
can be removed once the transition finishes and sysfs interface is freed.

The only problem is how to free the structures and kobjects safely.
The operation is triggered from the sysfs interface. We could not put
the related kobject from there because it would cause lock inversion
between klp_mutex and kernfs locks, see kn->count lockdep map.

Therefore, offload the free task to a workqueue. It is perfectly fine:

  + The patch can no longer be used in the livepatch operations.

  + The module could not be removed until the free operation finishes
    and module_put() is called.

  + The operation is asynchronous already when the first
    klp_try_complete_transition() fails and another call
    is queued with a delay.

Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/livepatch/livepatch.txt        | 135 +++++--------
 include/linux/livepatch.h                    |   7 +-
 kernel/livepatch/core.c                      | 280 +++++++++------------------
 kernel/livepatch/core.h                      |   2 +
 kernel/livepatch/transition.c                |  19 +-
 samples/livepatch/livepatch-callbacks-demo.c |  13 +-
 samples/livepatch/livepatch-sample.c         |  13 +-
 samples/livepatch/livepatch-shadow-fix1.c    |  14 +-
 samples/livepatch/livepatch-shadow-fix2.c    |  14 +-
 9 files changed, 168 insertions(+), 329 deletions(-)

(limited to 'include')

diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
index 2d7ed09dbd59..8f56490a4bb6 100644
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@@ -12,12 +12,11 @@ Table of Contents:
 4. Livepatch module
    4.1. New functions
    4.2. Metadata
-   4.3. Livepatch module handling
 5. Livepatch life-cycle
-   5.1. Registration
+   5.1. Loading
    5.2. Enabling
    5.3. Disabling
-   5.4. Unregistration
+   5.4. Removing
 6. Sysfs
 7. Limitations
 
@@ -298,117 +297,89 @@ into three levels:
     see the "Consistency model" section.
 
 
-4.3. Livepatch module handling
-------------------------------
-
-The usual behavior is that the new functions will get used when
-the livepatch module is loaded. For this, the module init() function
-has to register the patch (struct klp_patch) and enable it. See the
-section "Livepatch life-cycle" below for more details about these
-two operations.
-
-Module removal is only safe when there are no users of the underlying
-functions. This is the reason why the force feature permanently disables
-the removal. The forced tasks entered the functions but we cannot say
-that they returned back.  Therefore it cannot be decided when the
-livepatch module can be safely removed. When the system is successfully
-transitioned to a new patch state (patched/unpatched) without being
-forced it is guaranteed that no task sleeps or runs in the old code.
-
-
 5. Livepatch life-cycle
 =======================
 
-Livepatching defines four basic operations that define the life cycle of each
-live patch: registration, enabling, disabling and unregistration.  There are
-several reasons why it is done this way.
-
-First, the patch is applied only when all patched symbols for already
-loaded objects are found. The error handling is much easier if this
-check is done before particular functions get redirected.
+Livepatching can be described by four basic operations:
+loading, enabling, disabling, removing.
 
-Second, it might take some time until the entire system is migrated with
-the hybrid consistency model being used. The patch revert might block
-the livepatch module removal for too long. Therefore it is useful to
-revert the patch using a separate operation that might be called
-explicitly. But it does not make sense to remove all information until
-the livepatch module is really removed.
 
+5.1. Loading
+------------
 
-5.1. Registration
------------------
+The only reasonable way is to enable the patch when the livepatch kernel
+module is being loaded. For this, klp_enable_patch() has to be called
+in the module_init() callback. There are two main reasons:
 
-Each patch first has to be registered using klp_register_patch(). This makes
-the patch known to the livepatch framework. Also it does some preliminary
-computing and checks.
+First, only the module has an easy access to the related struct klp_patch.
 
-In particular, the patch is added into the list of known patches. The
-addresses of the patched functions are found according to their names.
-The special relocations, mentioned in the section "New functions", are
-applied. The relevant entries are created under
-/sys/kernel/livepatch/<name>. The patch is rejected when any operation
-fails.
+Second, the error code might be used to refuse loading the module when
+the patch cannot get enabled.
 
 
 5.2. Enabling
 -------------
 
-Registered patches might be enabled either by calling klp_enable_patch() or
-by writing '1' to /sys/kernel/livepatch/<name>/enabled. The system will
-start using the new implementation of the patched functions at this stage.
+The livepatch gets enabled by calling klp_enable_patch() from
+the module_init() callback. The system will start using the new
+implementation of the patched functions at this stage.
 
-When a patch is enabled, livepatch enters into a transition state where
-tasks are converging to the patched state.  This is indicated by a value
-of '1' in /sys/kernel/livepatch/<name>/transition.  Once all tasks have
-been patched, the 'transition' value changes to '0'.  For more
-information about this process, see the "Consistency model" section.
+First, the addresses of the patched functions are found according to their
+names. The special relocations, mentioned in the section "New functions",
+are applied. The relevant entries are created under
+/sys/kernel/livepatch/<name>. The patch is rejected when any above
+operation fails.
 
-If an original function is patched for the first time, a function
-specific struct klp_ops is created and an universal ftrace handler is
-registered.
+Second, livepatch enters into a transition state where tasks are converging
+to the patched state. If an original function is patched for the first
+time, a function specific struct klp_ops is created and an universal
+ftrace handler is registered[*]. This stage is indicated by a value of '1'
+in /sys/kernel/livepatch/<name>/transition. For more information about
+this process, see the "Consistency model" section.
 
-Functions might be patched multiple times. The ftrace handler is registered
-only once for the given function. Further patches just add an entry to the
-list (see field `func_stack`) of the struct klp_ops. The last added
-entry is chosen by the ftrace handler and becomes the active function
-replacement.
+Finally, once all tasks have been patched, the 'transition' value changes
+to '0'.
 
-Note that the patches might be enabled in a different order than they were
-registered.
+[*] Note that functions might be patched multiple times. The ftrace handler
+    is registered only once for a given function. Further patches just add
+    an entry to the list (see field `func_stack`) of the struct klp_ops.
+    The right implementation is selected by the ftrace handler, see
+    the "Consistency model" section.
 
 
 5.3. Disabling
 --------------
 
-Enabled patches might get disabled either by calling klp_disable_patch() or
-by writing '0' to /sys/kernel/livepatch/<name>/enabled. At this stage
-either the code from the previously enabled patch or even the original
-code gets used.
+Enabled patches might get disabled by writing '0' to
+/sys/kernel/livepatch/<name>/enabled.
 
-When a patch is disabled, livepatch enters into a transition state where
-tasks are converging to the unpatched state.  This is indicated by a
-value of '1' in /sys/kernel/livepatch/<name>/transition.  Once all tasks
-have been unpatched, the 'transition' value changes to '0'.  For more
-information about this process, see the "Consistency model" section.
+First, livepatch enters into a transition state where tasks are converging
+to the unpatched state. The system starts using either the code from
+the previously enabled patch or even the original one. This stage is
+indicated by a value of '1' in /sys/kernel/livepatch/<name>/transition.
+For more information about this process, see the "Consistency model"
+section.
 
-Here all the functions (struct klp_func) associated with the to-be-disabled
+Second, once all tasks have been unpatched, the 'transition' value changes
+to '0'. All the functions (struct klp_func) associated with the to-be-disabled
 patch are removed from the corresponding struct klp_ops. The ftrace handler
 is unregistered and the struct klp_ops is freed when the func_stack list
 becomes empty.
 
-Patches must be disabled in exactly the reverse order in which they were
-enabled. It makes the problem and the implementation much easier.
+Third, the sysfs interface is destroyed.
 
+Note that patches must be disabled in exactly the reverse order in which
+they were enabled. It makes the problem and the implementation much easier.
 
-5.4. Unregistration
--------------------
 
-Disabled patches might be unregistered by calling klp_unregister_patch().
-This can be done only when the patch is disabled and the code is no longer
-used. It must be called before the livepatch module gets unloaded.
+5.4. Removing
+-------------
 
-At this stage, all the relevant sys-fs entries are removed and the patch
-is removed from the list of known patches.
+Module removal is only safe when there are no users of functions provided
+by the module. This is the reason why the force feature permanently
+disables the removal. Only when the system is successfully transitioned
+to a new patch state (patched/unpatched) without being forced it is
+guaranteed that no task sleeps or runs in the old code.
 
 
 6. Sysfs
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 6a9165d9b090..8f9c19c69744 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -139,11 +139,12 @@ struct klp_object {
  * struct klp_patch - patch structure for live patching
  * @mod:	reference to the live patch module
  * @objs:	object entries for kernel objects to be patched
- * @list:	list node for global list of registered patches
+ * @list:	list node for global list of actively used patches
  * @kobj:	kobject for sysfs resources
  * @kobj_added: @kobj has been added and needs freeing
  * @enabled:	the patch is enabled (but operation may be incomplete)
  * @forced:	was involved in a forced transition
+ * @free_work:	patch cleanup from workqueue-context
  * @finish:	for waiting till it is safe to remove the patch module
  */
 struct klp_patch {
@@ -157,6 +158,7 @@ struct klp_patch {
 	bool kobj_added;
 	bool enabled;
 	bool forced;
+	struct work_struct free_work;
 	struct completion finish;
 };
 
@@ -168,10 +170,7 @@ struct klp_patch {
 	     func->old_name || func->new_func || func->old_sympos; \
 	     func++)
 
-int klp_register_patch(struct klp_patch *);
-int klp_unregister_patch(struct klp_patch *);
 int klp_enable_patch(struct klp_patch *);
-int klp_disable_patch(struct klp_patch *);
 
 void arch_klp_init_object_loaded(struct klp_patch *patch,
 				 struct klp_object *obj);
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index e77c5017ae0c..bd41b03a72d5 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -45,7 +45,11 @@
  */
 DEFINE_MUTEX(klp_mutex);
 
-/* Registered patches */
+/*
+ * Actively used patches: enabled or in transition. Note that replaced
+ * or disabled patches are not listed even though the related kernel
+ * module still can be loaded.
+ */
 LIST_HEAD(klp_patches);
 
 static struct kobject *klp_root_kobj;
@@ -83,17 +87,6 @@ static void klp_find_object_module(struct klp_object *obj)
 	mutex_unlock(&module_mutex);
 }
 
-static bool klp_is_patch_registered(struct klp_patch *patch)
-{
-	struct klp_patch *mypatch;
-
-	list_for_each_entry(mypatch, &klp_patches, list)
-		if (mypatch == patch)
-			return true;
-
-	return false;
-}
-
 static bool klp_initialized(void)
 {
 	return !!klp_root_kobj;
@@ -292,7 +285,6 @@ static int klp_write_object_relocations(struct module *pmod,
  * /sys/kernel/livepatch/<patch>/<object>/<function,sympos>
  */
 static int __klp_disable_patch(struct klp_patch *patch);
-static int __klp_enable_patch(struct klp_patch *patch);
 
 static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
 			     const char *buf, size_t count)
@@ -309,40 +301,32 @@ static ssize_t enabled_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 	mutex_lock(&klp_mutex);
 
-	if (!klp_is_patch_registered(patch)) {
-		/*
-		 * Module with the patch could either disappear meanwhile or is
-		 * not properly initialized yet.
-		 */
-		ret = -EINVAL;
-		goto err;
-	}
-
 	if (patch->enabled == enabled) {
 		/* already in requested state */
 		ret = -EINVAL;
-		goto err;
+		goto out;
 	}
 
-	if (patch == klp_transition_patch) {
+	/*
+	 * Allow to reverse a pending transition in both ways. It might be
+	 * necessary to complete the transition without forcing and breaking
+	 * the system integrity.
+	 *
+	 * Do not allow to re-enable a disabled patch.
+	 */
+	if (patch == klp_transition_patch)
 		klp_reverse_transition();
-	} else if (enabled) {
-		ret = __klp_enable_patch(patch);
-		if (ret)
-			goto err;
-	} else {
+	else if (!enabled)
 		ret = __klp_disable_patch(patch);
-		if (ret)
-			goto err;
-	}
+	else
+		ret = -EINVAL;
 
+out:
 	mutex_unlock(&klp_mutex);
 
+	if (ret)
+		return ret;
 	return count;
-
-err:
-	mutex_unlock(&klp_mutex);
-	return ret;
 }
 
 static ssize_t enabled_show(struct kobject *kobj,
@@ -508,7 +492,7 @@ static void klp_free_objects(struct klp_patch *patch)
  * The operation must be completed by calling klp_free_patch_finish()
  * outside klp_mutex.
  */
-static void klp_free_patch_start(struct klp_patch *patch)
+void klp_free_patch_start(struct klp_patch *patch)
 {
 	if (!list_empty(&patch->list))
 		list_del(&patch->list);
@@ -536,6 +520,23 @@ static void klp_free_patch_finish(struct klp_patch *patch)
 		kobject_put(&patch->kobj);
 		wait_for_completion(&patch->finish);
 	}
+
+	/* Put the module after the last access to struct klp_patch. */
+	if (!patch->forced)
+		module_put(patch->mod);
+}
+
+/*
+ * The livepatch might be freed from sysfs interface created by the patch.
+ * This work allows to wait until the interface is destroyed in a separate
+ * context.
+ */
+static void klp_free_patch_work_fn(struct work_struct *work)
+{
+	struct klp_patch *patch =
+		container_of(work, struct klp_patch, free_work);
+
+	klp_free_patch_finish(patch);
 }
 
 static int klp_init_func(struct klp_object *obj, struct klp_func *func)
@@ -661,6 +662,7 @@ static int klp_init_patch_early(struct klp_patch *patch)
 	patch->kobj_added = false;
 	patch->enabled = false;
 	patch->forced = false;
+	INIT_WORK(&patch->free_work, klp_free_patch_work_fn);
 	init_completion(&patch->finish);
 
 	klp_for_each_object(patch, obj) {
@@ -673,6 +675,9 @@ static int klp_init_patch_early(struct klp_patch *patch)
 			func->kobj_added = false;
 	}
 
+	if (!try_module_get(patch->mod))
+		return -ENODEV;
+
 	return 0;
 }
 
@@ -681,115 +686,22 @@ static int klp_init_patch(struct klp_patch *patch)
 	struct klp_object *obj;
 	int ret;
 
-	mutex_lock(&klp_mutex);
-
-	ret = klp_init_patch_early(patch);
-	if (ret) {
-		mutex_unlock(&klp_mutex);
-		return ret;
-	}
-
 	ret = kobject_init_and_add(&patch->kobj, &klp_ktype_patch,
 				   klp_root_kobj, "%s", patch->mod->name);
-	if (ret) {
-		mutex_unlock(&klp_mutex);
+	if (ret)
 		return ret;
-	}
 	patch->kobj_added = true;
 
 	klp_for_each_object(patch, obj) {
 		ret = klp_init_object(patch, obj);
 		if (ret)
-			goto free;
+			return ret;
 	}
 
 	list_add_tail(&patch->list, &klp_patches);
 
-	mutex_unlock(&klp_mutex);
-
-	return 0;
-
-free:
-	klp_free_patch_start(patch);
-
-	mutex_unlock(&klp_mutex);
-
-	klp_free_patch_finish(patch);
-
-	return ret;
-}
-
-/**
- * klp_unregister_patch() - unregisters a patch
- * @patch:	Disabled patch to be unregistered
- *
- * Frees the data structures and removes the sysfs interface.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_unregister_patch(struct klp_patch *patch)
-{
-	int ret;
-
-	mutex_lock(&klp_mutex);
-
-	if (!klp_is_patch_registered(patch)) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	if (patch->enabled) {
-		ret = -EBUSY;
-		goto err;
-	}
-
-	klp_free_patch_start(patch);
-
-	mutex_unlock(&klp_mutex);
-
-	klp_free_patch_finish(patch);
-
 	return 0;
-err:
-	mutex_unlock(&klp_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(klp_unregister_patch);
-
-/**
- * klp_register_patch() - registers a patch
- * @patch:	Patch to be registered
- *
- * Initializes the data structure associated with the patch and
- * creates the sysfs interface.
- *
- * There is no need to take the reference on the patch module here. It is done
- * later when the patch is enabled.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_register_patch(struct klp_patch *patch)
-{
-	if (!patch || !patch->mod)
-		return -EINVAL;
-
-	if (!is_livepatch_module(patch->mod)) {
-		pr_err("module %s is not marked as a livepatch module\n",
-		       patch->mod->name);
-		return -EINVAL;
-	}
-
-	if (!klp_initialized())
-		return -ENODEV;
-
-	if (!klp_have_reliable_stack()) {
-		pr_err("This architecture doesn't have support for the livepatch consistency model.\n");
-		return -ENOSYS;
-	}
-
-	return klp_init_patch(patch);
 }
-EXPORT_SYMBOL_GPL(klp_register_patch);
 
 static int __klp_disable_patch(struct klp_patch *patch)
 {
@@ -802,8 +714,7 @@ static int __klp_disable_patch(struct klp_patch *patch)
 		return -EBUSY;
 
 	/* enforce stacking: only the last enabled patch can be disabled */
-	if (!list_is_last(&patch->list, &klp_patches) &&
-	    list_next_entry(patch, list)->enabled)
+	if (!list_is_last(&patch->list, &klp_patches))
 		return -EBUSY;
 
 	klp_init_transition(patch, KLP_UNPATCHED);
@@ -822,44 +733,12 @@ static int __klp_disable_patch(struct klp_patch *patch)
 	smp_wmb();
 
 	klp_start_transition();
-	klp_try_complete_transition();
 	patch->enabled = false;
+	klp_try_complete_transition();
 
 	return 0;
 }
 
-/**
- * klp_disable_patch() - disables a registered patch
- * @patch:	The registered, enabled patch to be disabled
- *
- * Unregisters the patched functions from ftrace.
- *
- * Return: 0 on success, otherwise error
- */
-int klp_disable_patch(struct klp_patch *patch)
-{
-	int ret;
-
-	mutex_lock(&klp_mutex);
-
-	if (!klp_is_patch_registered(patch)) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	if (!patch->enabled) {
-		ret = -EINVAL;
-		goto err;
-	}
-
-	ret = __klp_disable_patch(patch);
-
-err:
-	mutex_unlock(&klp_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(klp_disable_patch);
-
 static int __klp_enable_patch(struct klp_patch *patch)
 {
 	struct klp_object *obj;
@@ -871,17 +750,8 @@ static int __klp_enable_patch(struct klp_patch *patch)
 	if (WARN_ON(patch->enabled))
 		return -EINVAL;
 
-	/* enforce stacking: only the first disabled patch can be enabled */
-	if (patch->list.prev != &klp_patches &&
-	    !list_prev_entry(patch, list)->enabled)
-		return -EBUSY;
-
-	/*
-	 * A reference is taken on the patch module to prevent it from being
-	 * unloaded.
-	 */
-	if (!try_module_get(patch->mod))
-		return -ENODEV;
+	if (!patch->kobj_added)
+		return -EINVAL;
 
 	pr_notice("enabling patch '%s'\n", patch->mod->name);
 
@@ -916,8 +786,8 @@ static int __klp_enable_patch(struct klp_patch *patch)
 	}
 
 	klp_start_transition();
-	klp_try_complete_transition();
 	patch->enabled = true;
+	klp_try_complete_transition();
 
 	return 0;
 err:
@@ -928,11 +798,15 @@ err:
 }
 
 /**
- * klp_enable_patch() - enables a registered patch
- * @patch:	The registered, disabled patch to be enabled
+ * klp_enable_patch() - enable the livepatch
+ * @patch:	patch to be enabled
  *
- * Performs the needed symbol lookups and code relocations,
- * then registers the patched functions with ftrace.
+ * Initializes the data structure associated with the patch, creates the sysfs
+ * interface, performs the needed symbol lookups and code relocations,
+ * registers the patched functions with ftrace.
+ *
+ * This function is supposed to be called from the livepatch module_init()
+ * callback.
  *
  * Return: 0 on success, otherwise error
  */
@@ -940,17 +814,51 @@ int klp_enable_patch(struct klp_patch *patch)
 {
 	int ret;
 
+	if (!patch || !patch->mod)
+		return -EINVAL;
+
+	if (!is_livepatch_module(patch->mod)) {
+		pr_err("module %s is not marked as a livepatch module\n",
+		       patch->mod->name);
+		return -EINVAL;
+	}
+
+	if (!klp_initialized())
+		return -ENODEV;
+
+	if (!klp_have_reliable_stack()) {
+		pr_err("This architecture doesn't have support for the livepatch consistency model.\n");
+		return -ENOSYS;
+	}
+
+
 	mutex_lock(&klp_mutex);
 
-	if (!klp_is_patch_registered(patch)) {
-		ret = -EINVAL;
-		goto err;
+	ret = klp_init_patch_early(patch);
+	if (ret) {
+		mutex_unlock(&klp_mutex);
+		return ret;
 	}
 
+	ret = klp_init_patch(patch);
+	if (ret)
+		goto err;
+
 	ret = __klp_enable_patch(patch);
+	if (ret)
+		goto err;
+
+	mutex_unlock(&klp_mutex);
+
+	return 0;
 
 err:
+	klp_free_patch_start(patch);
+
 	mutex_unlock(&klp_mutex);
+
+	klp_free_patch_finish(patch);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(klp_enable_patch);
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index d0cb5390e247..d4eefc520c08 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -7,6 +7,8 @@
 extern struct mutex klp_mutex;
 extern struct list_head klp_patches;
 
+void klp_free_patch_start(struct klp_patch *patch);
+
 static inline bool klp_is_object_loaded(struct klp_object *obj)
 {
 	return !obj->name || obj->mod;
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index a4c921364003..c9917a24b3a4 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -134,13 +134,6 @@ static void klp_complete_transition(void)
 	pr_notice("'%s': %s complete\n", klp_transition_patch->mod->name,
 		  klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
 
-	/*
-	 * patch->forced set implies unbounded increase of module's ref count if
-	 * the module is disabled/enabled in a loop.
-	 */
-	if (!klp_transition_patch->forced && klp_target_state == KLP_UNPATCHED)
-		module_put(klp_transition_patch->mod);
-
 	klp_target_state = KLP_UNDEFINED;
 	klp_transition_patch = NULL;
 }
@@ -357,6 +350,7 @@ void klp_try_complete_transition(void)
 {
 	unsigned int cpu;
 	struct task_struct *g, *task;
+	struct klp_patch *patch;
 	bool complete = true;
 
 	WARN_ON_ONCE(klp_target_state == KLP_UNDEFINED);
@@ -405,7 +399,18 @@ void klp_try_complete_transition(void)
 	}
 
 	/* we're done, now cleanup the data structures */
+	patch = klp_transition_patch;
 	klp_complete_transition();
+
+	/*
+	 * It would make more sense to free the patch in
+	 * klp_complete_transition() but it is called also
+	 * from klp_cancel_transition().
+	 */
+	if (!patch->enabled) {
+		klp_free_patch_start(patch);
+		schedule_work(&patch->free_work);
+	}
 }
 
 /*
diff --git a/samples/livepatch/livepatch-callbacks-demo.c b/samples/livepatch/livepatch-callbacks-demo.c
index 72f9e6d1387b..62d97953ad02 100644
--- a/samples/livepatch/livepatch-callbacks-demo.c
+++ b/samples/livepatch/livepatch-callbacks-demo.c
@@ -195,22 +195,11 @@ static struct klp_patch patch = {
 
 static int livepatch_callbacks_demo_init(void)
 {
-	int ret;
-
-	ret = klp_register_patch(&patch);
-	if (ret)
-		return ret;
-	ret = klp_enable_patch(&patch);
-	if (ret) {
-		WARN_ON(klp_unregister_patch(&patch));
-		return ret;
-	}
-	return 0;
+	return klp_enable_patch(&patch);
 }
 
 static void livepatch_callbacks_demo_exit(void)
 {
-	WARN_ON(klp_unregister_patch(&patch));
 }
 
 module_init(livepatch_callbacks_demo_init);
diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c
index 2d554dd930e2..01c9cf003ca2 100644
--- a/samples/livepatch/livepatch-sample.c
+++ b/samples/livepatch/livepatch-sample.c
@@ -69,22 +69,11 @@ static struct klp_patch patch = {
 
 static int livepatch_init(void)
 {
-	int ret;
-
-	ret = klp_register_patch(&patch);
-	if (ret)
-		return ret;
-	ret = klp_enable_patch(&patch);
-	if (ret) {
-		WARN_ON(klp_unregister_patch(&patch));
-		return ret;
-	}
-	return 0;
+	return klp_enable_patch(&patch);
 }
 
 static void livepatch_exit(void)
 {
-	WARN_ON(klp_unregister_patch(&patch));
 }
 
 module_init(livepatch_init);
diff --git a/samples/livepatch/livepatch-shadow-fix1.c b/samples/livepatch/livepatch-shadow-fix1.c
index e8f1bd6b29b1..a5a5cac21d4d 100644
--- a/samples/livepatch/livepatch-shadow-fix1.c
+++ b/samples/livepatch/livepatch-shadow-fix1.c
@@ -157,25 +157,13 @@ static struct klp_patch patch = {
 
 static int livepatch_shadow_fix1_init(void)
 {
-	int ret;
-
-	ret = klp_register_patch(&patch);
-	if (ret)
-		return ret;
-	ret = klp_enable_patch(&patch);
-	if (ret) {
-		WARN_ON(klp_unregister_patch(&patch));
-		return ret;
-	}
-	return 0;
+	return klp_enable_patch(&patch);
 }
 
 static void livepatch_shadow_fix1_exit(void)
 {
 	/* Cleanup any existing SV_LEAK shadow variables */
 	klp_shadow_free_all(SV_LEAK, livepatch_fix1_dummy_leak_dtor);
-
-	WARN_ON(klp_unregister_patch(&patch));
 }
 
 module_init(livepatch_shadow_fix1_init);
diff --git a/samples/livepatch/livepatch-shadow-fix2.c b/samples/livepatch/livepatch-shadow-fix2.c
index b34c7bf83356..52de947b5526 100644
--- a/samples/livepatch/livepatch-shadow-fix2.c
+++ b/samples/livepatch/livepatch-shadow-fix2.c
@@ -129,25 +129,13 @@ static struct klp_patch patch = {
 
 static int livepatch_shadow_fix2_init(void)
 {
-	int ret;
-
-	ret = klp_register_patch(&patch);
-	if (ret)
-		return ret;
-	ret = klp_enable_patch(&patch);
-	if (ret) {
-		WARN_ON(klp_unregister_patch(&patch));
-		return ret;
-	}
-	return 0;
+	return klp_enable_patch(&patch);
 }
 
 static void livepatch_shadow_fix2_exit(void)
 {
 	/* Cleanup any existing SV_COUNTER shadow variables */
 	klp_shadow_free_all(SV_COUNTER, NULL);
-
-	WARN_ON(klp_unregister_patch(&patch));
 }
 
 module_init(livepatch_shadow_fix2_init);
-- 
cgit v1.2.3


From 20e55025958e18e671d92c7adea00c301ac93c43 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 9 Jan 2019 13:43:24 +0100
Subject: livepatch: Use lists to manage patches, objects and functions

Currently klp_patch contains a pointer to a statically allocated array of
struct klp_object and struct klp_objects contains a pointer to a statically
allocated array of klp_func. In order to allow for the dynamic allocation
of objects and functions, link klp_patch, klp_object, and klp_func together
via linked lists. This allows us to more easily allocate new objects and
functions, while having the iterator be a simple linked list walk.

The static structures are added to the lists early. It allows to add
the dynamically allocated objects before klp_init_object() and
klp_init_func() calls. Therefore it reduces the further changes
to the code.

This patch does not change the existing behavior.

Signed-off-by: Jason Baron <jbaron@akamai.com>
[pmladek@suse.com: Initialize lists before init calls]
Signed-off-by: Petr Mladek <pmladek@suse.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/livepatch.h | 19 +++++++++++++++++--
 kernel/livepatch/core.c   |  9 +++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 8f9c19c69744..e117e20ff771 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/ftrace.h>
 #include <linux/completion.h>
+#include <linux/list.h>
 
 #if IS_ENABLED(CONFIG_LIVEPATCH)
 
@@ -42,6 +43,7 @@
  *		can be found (optional)
  * @old_func:	pointer to the function being patched
  * @kobj:	kobject for sysfs resources
+ * @node:	list node for klp_object func_list
  * @stack_node:	list node for klp_ops func_stack list
  * @old_size:	size of the old function
  * @new_size:	size of the new function
@@ -80,6 +82,7 @@ struct klp_func {
 	/* internal */
 	void *old_func;
 	struct kobject kobj;
+	struct list_head node;
 	struct list_head stack_node;
 	unsigned long old_size, new_size;
 	bool kobj_added;
@@ -117,6 +120,8 @@ struct klp_callbacks {
  * @funcs:	function entries for functions to be patched in the object
  * @callbacks:	functions to be executed pre/post (un)patching
  * @kobj:	kobject for sysfs resources
+ * @func_list:	dynamic list of the function entries
+ * @node:	list node for klp_patch obj_list
  * @mod:	kernel module associated with the patched object
  *		(NULL for vmlinux)
  * @kobj_added: @kobj has been added and needs freeing
@@ -130,6 +135,8 @@ struct klp_object {
 
 	/* internal */
 	struct kobject kobj;
+	struct list_head func_list;
+	struct list_head node;
 	struct module *mod;
 	bool kobj_added;
 	bool patched;
@@ -141,6 +148,7 @@ struct klp_object {
  * @objs:	object entries for kernel objects to be patched
  * @list:	list node for global list of actively used patches
  * @kobj:	kobject for sysfs resources
+ * @obj_list:	dynamic list of the object entries
  * @kobj_added: @kobj has been added and needs freeing
  * @enabled:	the patch is enabled (but operation may be incomplete)
  * @forced:	was involved in a forced transition
@@ -155,6 +163,7 @@ struct klp_patch {
 	/* internal */
 	struct list_head list;
 	struct kobject kobj;
+	struct list_head obj_list;
 	bool kobj_added;
 	bool enabled;
 	bool forced;
@@ -162,14 +171,20 @@ struct klp_patch {
 	struct completion finish;
 };
 
-#define klp_for_each_object(patch, obj) \
+#define klp_for_each_object_static(patch, obj) \
 	for (obj = patch->objs; obj->funcs || obj->name; obj++)
 
-#define klp_for_each_func(obj, func) \
+#define klp_for_each_object(patch, obj)	\
+	list_for_each_entry(obj, &patch->obj_list, node)
+
+#define klp_for_each_func_static(obj, func) \
 	for (func = obj->funcs; \
 	     func->old_name || func->new_func || func->old_sympos; \
 	     func++)
 
+#define klp_for_each_func(obj, func)	\
+	list_for_each_entry(func, &obj->func_list, node)
+
 int klp_enable_patch(struct klp_patch *);
 
 void arch_klp_init_object_loaded(struct klp_patch *patch,
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index bd41b03a72d5..37d0d3645fa6 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -659,20 +659,25 @@ static int klp_init_patch_early(struct klp_patch *patch)
 		return -EINVAL;
 
 	INIT_LIST_HEAD(&patch->list);
+	INIT_LIST_HEAD(&patch->obj_list);
 	patch->kobj_added = false;
 	patch->enabled = false;
 	patch->forced = false;
 	INIT_WORK(&patch->free_work, klp_free_patch_work_fn);
 	init_completion(&patch->finish);
 
-	klp_for_each_object(patch, obj) {
+	klp_for_each_object_static(patch, obj) {
 		if (!obj->funcs)
 			return -EINVAL;
 
+		INIT_LIST_HEAD(&obj->func_list);
 		obj->kobj_added = false;
+		list_add_tail(&obj->node, &patch->obj_list);
 
-		klp_for_each_func(obj, func)
+		klp_for_each_func_static(obj, func) {
 			func->kobj_added = false;
+			list_add_tail(&func->node, &obj->func_list);
+		}
 	}
 
 	if (!try_module_get(patch->mod))
-- 
cgit v1.2.3


From e1452b607c48c642caf57299f4da83aa002f8533 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@akamai.com>
Date: Wed, 9 Jan 2019 13:43:25 +0100
Subject: livepatch: Add atomic replace

Sometimes we would like to revert a particular fix. Currently, this
is not easy because we want to keep all other fixes active and we
could revert only the last applied patch.

One solution would be to apply new patch that implemented all
the reverted functions like in the original code. It would work
as expected but there will be unnecessary redirections. In addition,
it would also require knowing which functions need to be reverted at
build time.

Another problem is when there are many patches that touch the same
functions. There might be dependencies between patches that are
not enforced on the kernel side. Also it might be pretty hard to
actually prepare the patch and ensure compatibility with the other
patches.

Atomic replace && cumulative patches:

A better solution would be to create cumulative patch and say that
it replaces all older ones.

This patch adds a new "replace" flag to struct klp_patch. When it is
enabled, a set of 'nop' klp_func will be dynamically created for all
functions that are already being patched but that will no longer be
modified by the new patch. They are used as a new target during
the patch transition.

The idea is to handle Nops' structures like the static ones. When
the dynamic structures are allocated, we initialize all values that
are normally statically defined.

The only exception is "new_func" in struct klp_func. It has to point
to the original function and the address is known only when the object
(module) is loaded. Note that we really need to set it. The address is
used, for example, in klp_check_stack_func().

Nevertheless we still need to distinguish the dynamically allocated
structures in some operations. For this, we add "nop" flag into
struct klp_func and "dynamic" flag into struct klp_object. They
need special handling in the following situations:

  + The structures are added into the lists of objects and functions
    immediately. In fact, the lists were created for this purpose.

  + The address of the original function is known only when the patched
    object (module) is loaded. Therefore it is copied later in
    klp_init_object_loaded().

  + The ftrace handler must not set PC to func->new_func. It would cause
    infinite loop because the address points back to the beginning of
    the original function.

  + The various free() functions must free the structure itself.

Note that other ways to detect the dynamic structures are not considered
safe. For example, even the statically defined struct klp_object might
include empty funcs array. It might be there just to run some callbacks.

Also note that the safe iterator must be used in the free() functions.
Otherwise already freed structures might get accessed.

Special callbacks handling:

The callbacks from the replaced patches are _not_ called by intention.
It would be pretty hard to define a reasonable semantic and implement it.

It might even be counter-productive. The new patch is cumulative. It is
supposed to include most of the changes from older patches. In most cases,
it will not want to call pre_unpatch() post_unpatch() callbacks from
the replaced patches. It would disable/break things for no good reasons.
Also it should be easier to handle various scenarios in a single script
in the new patch than think about interactions caused by running many
scripts from older patches. Not to say that the old scripts even would
not expect to be called in this situation.

Removing replaced patches:

One nice effect of the cumulative patches is that the code from the
older patches is no longer used. Therefore the replaced patches can
be removed. It has several advantages:

  + Nops' structs will no longer be necessary and might be removed.
    This would save memory, restore performance (no ftrace handler),
    allow clear view on what is really patched.

  + Disabling the patch will cause using the original code everywhere.
    Therefore the livepatch callbacks could handle only one scenario.
    Note that the complication is already complex enough when the patch
    gets enabled. It is currently solved by calling callbacks only from
    the new cumulative patch.

  + The state is clean in both the sysfs interface and lsmod. The modules
    with the replaced livepatches might even get removed from the system.

Some people actually expected this behavior from the beginning. After all
a cumulative patch is supposed to "completely" replace an existing one.
It is like when a new version of an application replaces an older one.

This patch does the first step. It removes the replaced patches from
the list of patches. It is safe. The consistency model ensures that
they are no longer used. By other words, each process works only with
the structures from klp_transition_patch.

The removal is done by a special function. It combines actions done by
__disable_patch() and klp_complete_transition(). But it is a fast
track without all the transaction-related stuff.

Signed-off-by: Jason Baron <jbaron@akamai.com>
[pmladek@suse.com: Split, reuse existing code, simplified]
Signed-off-by: Petr Mladek <pmladek@suse.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/livepatch/livepatch.txt |  31 ++++-
 include/linux/livepatch.h             |  12 ++
 kernel/livepatch/core.c               | 232 ++++++++++++++++++++++++++++++++--
 kernel/livepatch/core.h               |   1 +
 kernel/livepatch/patch.c              |   8 ++
 kernel/livepatch/transition.c         |   3 +
 6 files changed, 273 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/livepatch/livepatch.txt b/Documentation/livepatch/livepatch.txt
index 8f56490a4bb6..2a70f43166f6 100644
--- a/Documentation/livepatch/livepatch.txt
+++ b/Documentation/livepatch/livepatch.txt
@@ -15,8 +15,9 @@ Table of Contents:
 5. Livepatch life-cycle
    5.1. Loading
    5.2. Enabling
-   5.3. Disabling
-   5.4. Removing
+   5.3. Replacing
+   5.4. Disabling
+   5.5. Removing
 6. Sysfs
 7. Limitations
 
@@ -300,8 +301,12 @@ into three levels:
 5. Livepatch life-cycle
 =======================
 
-Livepatching can be described by four basic operations:
-loading, enabling, disabling, removing.
+Livepatching can be described by five basic operations:
+loading, enabling, replacing, disabling, removing.
+
+Where the replacing and the disabling operations are mutually
+exclusive. They have the same result for the given patch but
+not for the system.
 
 
 5.1. Loading
@@ -347,7 +352,21 @@ to '0'.
     the "Consistency model" section.
 
 
-5.3. Disabling
+5.3. Replacing
+--------------
+
+All enabled patches might get replaced by a cumulative patch that
+has the .replace flag set.
+
+Once the new patch is enabled and the 'transition' finishes then
+all the functions (struct klp_func) associated with the replaced
+patches are removed from the corresponding struct klp_ops. Also
+the ftrace handler is unregistered and the struct klp_ops is
+freed when the related function is not modified by the new patch
+and func_stack list becomes empty.
+
+
+5.4. Disabling
 --------------
 
 Enabled patches might get disabled by writing '0' to
@@ -372,7 +391,7 @@ Note that patches must be disabled in exactly the reverse order in which
 they were enabled. It makes the problem and the implementation much easier.
 
 
-5.4. Removing
+5.5. Removing
 -------------
 
 Module removal is only safe when there are no users of functions provided
diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index e117e20ff771..53551f470722 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -48,6 +48,7 @@
  * @old_size:	size of the old function
  * @new_size:	size of the new function
  * @kobj_added: @kobj has been added and needs freeing
+ * @nop:        temporary patch to use the original code again; dyn. allocated
  * @patched:	the func has been added to the klp_ops list
  * @transition:	the func is currently being applied or reverted
  *
@@ -86,6 +87,7 @@ struct klp_func {
 	struct list_head stack_node;
 	unsigned long old_size, new_size;
 	bool kobj_added;
+	bool nop;
 	bool patched;
 	bool transition;
 };
@@ -125,6 +127,7 @@ struct klp_callbacks {
  * @mod:	kernel module associated with the patched object
  *		(NULL for vmlinux)
  * @kobj_added: @kobj has been added and needs freeing
+ * @dynamic:    temporary object for nop functions; dynamically allocated
  * @patched:	the object's funcs have been added to the klp_ops list
  */
 struct klp_object {
@@ -139,6 +142,7 @@ struct klp_object {
 	struct list_head node;
 	struct module *mod;
 	bool kobj_added;
+	bool dynamic;
 	bool patched;
 };
 
@@ -146,6 +150,7 @@ struct klp_object {
  * struct klp_patch - patch structure for live patching
  * @mod:	reference to the live patch module
  * @objs:	object entries for kernel objects to be patched
+ * @replace:	replace all actively used patches
  * @list:	list node for global list of actively used patches
  * @kobj:	kobject for sysfs resources
  * @obj_list:	dynamic list of the object entries
@@ -159,6 +164,7 @@ struct klp_patch {
 	/* external */
 	struct module *mod;
 	struct klp_object *objs;
+	bool replace;
 
 	/* internal */
 	struct list_head list;
@@ -174,6 +180,9 @@ struct klp_patch {
 #define klp_for_each_object_static(patch, obj) \
 	for (obj = patch->objs; obj->funcs || obj->name; obj++)
 
+#define klp_for_each_object_safe(patch, obj, tmp_obj)		\
+	list_for_each_entry_safe(obj, tmp_obj, &patch->obj_list, node)
+
 #define klp_for_each_object(patch, obj)	\
 	list_for_each_entry(obj, &patch->obj_list, node)
 
@@ -182,6 +191,9 @@ struct klp_patch {
 	     func->old_name || func->new_func || func->old_sympos; \
 	     func++)
 
+#define klp_for_each_func_safe(obj, func, tmp_func)			\
+	list_for_each_entry_safe(func, tmp_func, &obj->func_list, node)
+
 #define klp_for_each_func(obj, func)	\
 	list_for_each_entry(func, &obj->func_list, node)
 
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 37d0d3645fa6..ecb7660f1d8b 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -92,6 +92,40 @@ static bool klp_initialized(void)
 	return !!klp_root_kobj;
 }
 
+static struct klp_func *klp_find_func(struct klp_object *obj,
+				      struct klp_func *old_func)
+{
+	struct klp_func *func;
+
+	klp_for_each_func(obj, func) {
+		if ((strcmp(old_func->old_name, func->old_name) == 0) &&
+		    (old_func->old_sympos == func->old_sympos)) {
+			return func;
+		}
+	}
+
+	return NULL;
+}
+
+static struct klp_object *klp_find_object(struct klp_patch *patch,
+					  struct klp_object *old_obj)
+{
+	struct klp_object *obj;
+
+	klp_for_each_object(patch, obj) {
+		if (klp_is_module(old_obj)) {
+			if (klp_is_module(obj) &&
+			    strcmp(old_obj->name, obj->name) == 0) {
+				return obj;
+			}
+		} else if (!klp_is_module(obj)) {
+			return obj;
+		}
+	}
+
+	return NULL;
+}
+
 struct klp_find_arg {
 	const char *objname;
 	const char *name;
@@ -418,6 +452,121 @@ static struct attribute *klp_patch_attrs[] = {
 	NULL
 };
 
+static void klp_free_object_dynamic(struct klp_object *obj)
+{
+	kfree(obj->name);
+	kfree(obj);
+}
+
+static struct klp_object *klp_alloc_object_dynamic(const char *name)
+{
+	struct klp_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return NULL;
+
+	if (name) {
+		obj->name = kstrdup(name, GFP_KERNEL);
+		if (!obj->name) {
+			kfree(obj);
+			return NULL;
+		}
+	}
+
+	INIT_LIST_HEAD(&obj->func_list);
+	obj->dynamic = true;
+
+	return obj;
+}
+
+static void klp_free_func_nop(struct klp_func *func)
+{
+	kfree(func->old_name);
+	kfree(func);
+}
+
+static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
+					   struct klp_object *obj)
+{
+	struct klp_func *func;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return NULL;
+
+	if (old_func->old_name) {
+		func->old_name = kstrdup(old_func->old_name, GFP_KERNEL);
+		if (!func->old_name) {
+			kfree(func);
+			return NULL;
+		}
+	}
+
+	/*
+	 * func->new_func is same as func->old_func. These addresses are
+	 * set when the object is loaded, see klp_init_object_loaded().
+	 */
+	func->old_sympos = old_func->old_sympos;
+	func->nop = true;
+
+	return func;
+}
+
+static int klp_add_object_nops(struct klp_patch *patch,
+			       struct klp_object *old_obj)
+{
+	struct klp_object *obj;
+	struct klp_func *func, *old_func;
+
+	obj = klp_find_object(patch, old_obj);
+
+	if (!obj) {
+		obj = klp_alloc_object_dynamic(old_obj->name);
+		if (!obj)
+			return -ENOMEM;
+
+		list_add_tail(&obj->node, &patch->obj_list);
+	}
+
+	klp_for_each_func(old_obj, old_func) {
+		func = klp_find_func(obj, old_func);
+		if (func)
+			continue;
+
+		func = klp_alloc_func_nop(old_func, obj);
+		if (!func)
+			return -ENOMEM;
+
+		list_add_tail(&func->node, &obj->func_list);
+	}
+
+	return 0;
+}
+
+/*
+ * Add 'nop' functions which simply return to the caller to run
+ * the original function. The 'nop' functions are added to a
+ * patch to facilitate a 'replace' mode.
+ */
+static int klp_add_nops(struct klp_patch *patch)
+{
+	struct klp_patch *old_patch;
+	struct klp_object *old_obj;
+
+	list_for_each_entry(old_patch, &klp_patches, list) {
+		klp_for_each_object(old_patch, old_obj) {
+			int err;
+
+			err = klp_add_object_nops(patch, old_obj);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
 static void klp_kobj_release_patch(struct kobject *kobj)
 {
 	struct klp_patch *patch;
@@ -434,6 +583,12 @@ static struct kobj_type klp_ktype_patch = {
 
 static void klp_kobj_release_object(struct kobject *kobj)
 {
+	struct klp_object *obj;
+
+	obj = container_of(kobj, struct klp_object, kobj);
+
+	if (obj->dynamic)
+		klp_free_object_dynamic(obj);
 }
 
 static struct kobj_type klp_ktype_object = {
@@ -443,6 +598,12 @@ static struct kobj_type klp_ktype_object = {
 
 static void klp_kobj_release_func(struct kobject *kobj)
 {
+	struct klp_func *func;
+
+	func = container_of(kobj, struct klp_func, kobj);
+
+	if (func->nop)
+		klp_free_func_nop(func);
 }
 
 static struct kobj_type klp_ktype_func = {
@@ -452,12 +613,15 @@ static struct kobj_type klp_ktype_func = {
 
 static void klp_free_funcs(struct klp_object *obj)
 {
-	struct klp_func *func;
+	struct klp_func *func, *tmp_func;
 
-	klp_for_each_func(obj, func) {
+	klp_for_each_func_safe(obj, func, tmp_func) {
 		/* Might be called from klp_init_patch() error path. */
-		if (func->kobj_added)
+		if (func->kobj_added) {
 			kobject_put(&func->kobj);
+		} else if (func->nop) {
+			klp_free_func_nop(func);
+		}
 	}
 }
 
@@ -468,20 +632,27 @@ static void klp_free_object_loaded(struct klp_object *obj)
 
 	obj->mod = NULL;
 
-	klp_for_each_func(obj, func)
+	klp_for_each_func(obj, func) {
 		func->old_func = NULL;
+
+		if (func->nop)
+			func->new_func = NULL;
+	}
 }
 
 static void klp_free_objects(struct klp_patch *patch)
 {
-	struct klp_object *obj;
+	struct klp_object *obj, *tmp_obj;
 
-	klp_for_each_object(patch, obj) {
+	klp_for_each_object_safe(patch, obj, tmp_obj) {
 		klp_free_funcs(obj);
 
 		/* Might be called from klp_init_patch() error path. */
-		if (obj->kobj_added)
+		if (obj->kobj_added) {
 			kobject_put(&obj->kobj);
+		} else if (obj->dynamic) {
+			klp_free_object_dynamic(obj);
+		}
 	}
 }
 
@@ -543,7 +714,14 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 {
 	int ret;
 
-	if (!func->old_name || !func->new_func)
+	if (!func->old_name)
+		return -EINVAL;
+
+	/*
+	 * NOPs get the address later. The patched module must be loaded,
+	 * see klp_init_object_loaded().
+	 */
+	if (!func->new_func && !func->nop)
 		return -EINVAL;
 
 	if (strlen(func->old_name) >= KSYM_NAME_LEN)
@@ -605,6 +783,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
 			return -ENOENT;
 		}
 
+		if (func->nop)
+			func->new_func = func->old_func;
+
 		ret = kallsyms_lookup_size_offset((unsigned long)func->new_func,
 						  &func->new_size, NULL);
 		if (!ret) {
@@ -697,6 +878,12 @@ static int klp_init_patch(struct klp_patch *patch)
 		return ret;
 	patch->kobj_added = true;
 
+	if (patch->replace) {
+		ret = klp_add_nops(patch);
+		if (ret)
+			return ret;
+	}
+
 	klp_for_each_object(patch, obj) {
 		ret = klp_init_object(patch, obj);
 		if (ret)
@@ -868,6 +1055,35 @@ err:
 }
 EXPORT_SYMBOL_GPL(klp_enable_patch);
 
+/*
+ * This function removes replaced patches.
+ *
+ * We could be pretty aggressive here. It is called in the situation where
+ * these structures are no longer accessible. All functions are redirected
+ * by the klp_transition_patch. They use either a new code or they are in
+ * the original code because of the special nop function patches.
+ *
+ * The only exception is when the transition was forced. In this case,
+ * klp_ftrace_handler() might still see the replaced patch on the stack.
+ * Fortunately, it is carefully designed to work with removed functions
+ * thanks to RCU. We only have to keep the patches on the system. Also
+ * this is handled transparently by patch->module_put.
+ */
+void klp_discard_replaced_patches(struct klp_patch *new_patch)
+{
+	struct klp_patch *old_patch, *tmp_patch;
+
+	list_for_each_entry_safe(old_patch, tmp_patch, &klp_patches, list) {
+		if (old_patch == new_patch)
+			return;
+
+		old_patch->enabled = false;
+		klp_unpatch_objects(old_patch);
+		klp_free_patch_start(old_patch);
+		schedule_work(&old_patch->free_work);
+	}
+}
+
 /*
  * Remove parts of patches that touch a given kernel module. The list of
  * patches processed might be limited. When limit is NULL, all patches
diff --git a/kernel/livepatch/core.h b/kernel/livepatch/core.h
index d4eefc520c08..f6a853adcc00 100644
--- a/kernel/livepatch/core.h
+++ b/kernel/livepatch/core.h
@@ -8,6 +8,7 @@ extern struct mutex klp_mutex;
 extern struct list_head klp_patches;
 
 void klp_free_patch_start(struct klp_patch *patch);
+void klp_discard_replaced_patches(struct klp_patch *new_patch);
 
 static inline bool klp_is_object_loaded(struct klp_object *obj)
 {
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index 825022d70912..0ff466ab4b5a 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -118,7 +118,15 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 		}
 	}
 
+	/*
+	 * NOPs are used to replace existing patches with original code.
+	 * Do nothing! Setting pc would cause an infinite loop.
+	 */
+	if (func->nop)
+		goto unlock;
+
 	klp_arch_set_pc(regs, (unsigned long)func->new_func);
+
 unlock:
 	preempt_enable_notrace();
 }
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index c9917a24b3a4..f4c5908a9731 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -85,6 +85,9 @@ static void klp_complete_transition(void)
 		 klp_transition_patch->mod->name,
 		 klp_target_state == KLP_PATCHED ? "patching" : "unpatching");
 
+	if (klp_transition_patch->replace && klp_target_state == KLP_PATCHED)
+		klp_discard_replaced_patches(klp_transition_patch);
+
 	if (klp_target_state == KLP_UNPATCHED) {
 		/*
 		 * All tasks have transitioned to KLP_UNPATCHED so we can now
-- 
cgit v1.2.3


From cf9ff75d15a9bbd625519997c2ca864d1fa80227 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 10 Dec 2018 16:50:18 +0000
Subject: soc: fsl: dpio: store a backpointer to the device backing the
 dpaa2_io

Add a new field in the dpaa2_io structure to hold a backpointer to the
actual DPIO device.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/soc/fsl/dpio/dpio-driver.c  | 2 +-
 drivers/soc/fsl/dpio/dpio-service.c | 7 ++++++-
 include/soc/fsl/dpaa2-io.h          | 3 ++-
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c
index 5286723d4a14..2d4af32a0dec 100644
--- a/drivers/soc/fsl/dpio/dpio-driver.c
+++ b/drivers/soc/fsl/dpio/dpio-driver.c
@@ -176,7 +176,7 @@ static int dpaa2_dpio_probe(struct fsl_mc_device *dpio_dev)
 	if (err)
 		goto err_register_dpio_irq;
 
-	priv->io = dpaa2_io_create(&desc);
+	priv->io = dpaa2_io_create(&desc, dev);
 	if (!priv->io) {
 		dev_err(dev, "dpaa2_io_create failed\n");
 		err = -ENOMEM;
diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
index c02bfb18a1f0..52d800aa4774 100644
--- a/drivers/soc/fsl/dpio/dpio-service.c
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -27,6 +27,7 @@ struct dpaa2_io {
 	/* protect notifications list */
 	spinlock_t lock_notifications;
 	struct list_head notifications;
+	struct device *dev;
 };
 
 struct dpaa2_io_store {
@@ -98,13 +99,15 @@ EXPORT_SYMBOL_GPL(dpaa2_io_service_select);
 /**
  * dpaa2_io_create() - create a dpaa2_io object.
  * @desc: the dpaa2_io descriptor
+ * @dev: the actual DPIO device
  *
  * Activates a "struct dpaa2_io" corresponding to the given config of an actual
  * DPIO object.
  *
  * Return a valid dpaa2_io object for success, or NULL for failure.
  */
-struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc)
+struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
+				 struct device *dev)
 {
 	struct dpaa2_io *obj = kmalloc(sizeof(*obj), GFP_KERNEL);
 
@@ -146,6 +149,8 @@ struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc)
 		dpio_by_cpu[desc->cpu] = obj;
 	spin_unlock(&dpio_list_lock);
 
+	obj->dev = dev;
+
 	return obj;
 }
 
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
index 243119c53cdf..e252066d1f43 100644
--- a/include/soc/fsl/dpaa2-io.h
+++ b/include/soc/fsl/dpaa2-io.h
@@ -57,7 +57,8 @@ struct dpaa2_io_desc {
 	u32 qman_version;
 };
 
-struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc);
+struct dpaa2_io *dpaa2_io_create(const struct dpaa2_io_desc *desc,
+				 struct device *dev);
 
 void dpaa2_io_down(struct dpaa2_io *d);
 
-- 
cgit v1.2.3


From 47441f7f73b7cf0c2d6e7d6372f026ea81193fd6 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 10 Dec 2018 16:50:19 +0000
Subject: soc: fsl: dpio: add a device_link at dpaa2_io_service_register

Automatically add a device link between the actual device requesting the
dpaa2_io_service_register and the underlying dpaa2_io used. This link
will ensure that when a DPIO device, which is indirectly used by other
devices, is unbound any consumer devices will be also unbound from their
drivers.

For example, any DPNI, bound to the dpaa2-eth driver, which is using
DPIO devices will be unbound before its supplier device.

Also, add a new parameter to the dpaa2_io_service_[de]register functions
to specify the requesting device (ie the consumer).

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Horia Geanta <horia.geanta@nxp.com>
Reviewed-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/crypto/caam/caamalg_qi2.c                |  6 +++---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c |  9 +++++----
 drivers/soc/fsl/dpio/dpio-service.c              | 16 ++++++++++++++--
 include/soc/fsl/dpaa2-io.h                       |  6 ++++--
 4 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 425d5d974613..77f4c0045de2 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -4503,7 +4503,7 @@ static int __cold dpaa2_dpseci_dpio_setup(struct dpaa2_caam_priv *priv)
 		nctx->cb = dpaa2_caam_fqdan_cb;
 
 		/* Register notification callbacks */
-		err = dpaa2_io_service_register(NULL, nctx);
+		err = dpaa2_io_service_register(NULL, nctx, dev);
 		if (unlikely(err)) {
 			dev_dbg(dev, "No affine DPIO for cpu %d\n", cpu);
 			nctx->cb = NULL;
@@ -4536,7 +4536,7 @@ err:
 		ppriv = per_cpu_ptr(priv->ppriv, cpu);
 		if (!ppriv->nctx.cb)
 			break;
-		dpaa2_io_service_deregister(NULL, &ppriv->nctx);
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx, dev);
 	}
 
 	for_each_online_cpu(cpu) {
@@ -4556,7 +4556,7 @@ static void __cold dpaa2_dpseci_dpio_free(struct dpaa2_caam_priv *priv)
 
 	for_each_online_cpu(cpu) {
 		ppriv = per_cpu_ptr(priv->ppriv, cpu);
-		dpaa2_io_service_deregister(NULL, &ppriv->nctx);
+		dpaa2_io_service_deregister(NULL, &ppriv->nctx, priv->dev);
 		dpaa2_io_store_destroy(ppriv->store);
 
 		if (++i == priv->num_pairs)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 1ca9a18139ec..c500ea77aaa0 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1902,7 +1902,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
 
 		/* Register the new context */
 		channel->dpio = dpaa2_io_service_select(i);
-		err = dpaa2_io_service_register(channel->dpio, nctx);
+		err = dpaa2_io_service_register(channel->dpio, nctx, dev);
 		if (err) {
 			dev_dbg(dev, "No affine DPIO for cpu %d\n", i);
 			/* If no affine DPIO for this core, there's probably
@@ -1942,7 +1942,7 @@ static int setup_dpio(struct dpaa2_eth_priv *priv)
 	return 0;
 
 err_set_cdan:
-	dpaa2_io_service_deregister(channel->dpio, nctx);
+	dpaa2_io_service_deregister(channel->dpio, nctx, dev);
 err_service_reg:
 	free_channel(priv, channel);
 err_alloc_ch:
@@ -1962,13 +1962,14 @@ err_alloc_ch:
 
 static void free_dpio(struct dpaa2_eth_priv *priv)
 {
-	int i;
+	struct device *dev = priv->net_dev->dev.parent;
 	struct dpaa2_eth_channel *ch;
+	int i;
 
 	/* deregister CDAN notifications and free channels */
 	for (i = 0; i < priv->num_channels; i++) {
 		ch = priv->channel[i];
-		dpaa2_io_service_deregister(ch->dpio, &ch->nctx);
+		dpaa2_io_service_deregister(ch->dpio, &ch->nctx, dev);
 		free_channel(priv, ch);
 	}
 }
diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
index 52d800aa4774..bc801934602a 100644
--- a/drivers/soc/fsl/dpio/dpio-service.c
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -237,6 +237,7 @@ EXPORT_SYMBOL(dpaa2_io_get_cpu);
  *                               notifications on the given DPIO service.
  * @d:   the given DPIO service.
  * @ctx: the notification context.
+ * @dev: the device that requests the register
  *
  * The caller should make the MC command to attach a DPAA2 object to
  * a DPIO after this function completes successfully.  In that way:
@@ -251,14 +252,20 @@ EXPORT_SYMBOL(dpaa2_io_get_cpu);
  * Return 0 for success, or -ENODEV for failure.
  */
 int dpaa2_io_service_register(struct dpaa2_io *d,
-			      struct dpaa2_io_notification_ctx *ctx)
+			      struct dpaa2_io_notification_ctx *ctx,
+			      struct device *dev)
 {
+	struct device_link *link;
 	unsigned long irqflags;
 
 	d = service_select_by_cpu(d, ctx->desired_cpu);
 	if (!d)
 		return -ENODEV;
 
+	link = device_link_add(dev, d->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+	if (!link)
+		return -EINVAL;
+
 	ctx->dpio_id = d->dpio_desc.dpio_id;
 	ctx->qman64 = (u64)(uintptr_t)ctx;
 	ctx->dpio_private = d;
@@ -279,12 +286,14 @@ EXPORT_SYMBOL_GPL(dpaa2_io_service_register);
  * dpaa2_io_service_deregister - The opposite of 'register'.
  * @service: the given DPIO service.
  * @ctx: the notification context.
+ * @dev: the device that requests to be deregistered
  *
  * This function should be called only after sending the MC command to
  * to detach the notification-producing device from the DPIO.
  */
 void dpaa2_io_service_deregister(struct dpaa2_io *service,
-				 struct dpaa2_io_notification_ctx *ctx)
+				 struct dpaa2_io_notification_ctx *ctx,
+				 struct device *dev)
 {
 	struct dpaa2_io *d = ctx->dpio_private;
 	unsigned long irqflags;
@@ -295,6 +304,9 @@ void dpaa2_io_service_deregister(struct dpaa2_io *service,
 	spin_lock_irqsave(&d->lock_notifications, irqflags);
 	list_del(&ctx->node);
 	spin_unlock_irqrestore(&d->lock_notifications, irqflags);
+
+	if (dev)
+		device_link_remove(dev, d->dev);
 }
 EXPORT_SYMBOL_GPL(dpaa2_io_service_deregister);
 
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
index e252066d1f43..3447fd10a3e6 100644
--- a/include/soc/fsl/dpaa2-io.h
+++ b/include/soc/fsl/dpaa2-io.h
@@ -94,9 +94,11 @@ struct dpaa2_io_notification_ctx {
 int dpaa2_io_get_cpu(struct dpaa2_io *d);
 
 int dpaa2_io_service_register(struct dpaa2_io *service,
-			      struct dpaa2_io_notification_ctx *ctx);
+			      struct dpaa2_io_notification_ctx *ctx,
+			      struct device *dev);
 void dpaa2_io_service_deregister(struct dpaa2_io *service,
-				 struct dpaa2_io_notification_ctx *ctx);
+				 struct dpaa2_io_notification_ctx *ctx,
+				 struct device *dev);
 int dpaa2_io_service_rearm(struct dpaa2_io *service,
 			   struct dpaa2_io_notification_ctx *ctx);
 
-- 
cgit v1.2.3


From 8a533a7de21aa69cf9a3e55223c819858fbc7da2 Mon Sep 17 00:00:00 2001
From: Ioana Ciocoi Radulescu <ruxandra.radulescu@nxp.com>
Date: Fri, 14 Dec 2018 16:04:06 +0000
Subject: soc: fsl: dpio: Change bpid type to u16

In all QBMan registers, the buffer pool id field is two bytes long.
The low level qbman APIs reflect this, but the high level DPIO ones
use u32. Modify them in order to avoid implicit downcasts.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/soc/fsl/dpio/dpio-service.c | 4 ++--
 include/soc/fsl/dpaa2-io.h          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c
index 5389336463c0..b9539ef2c3cd 100644
--- a/drivers/soc/fsl/dpio/dpio-service.c
+++ b/drivers/soc/fsl/dpio/dpio-service.c
@@ -473,7 +473,7 @@ EXPORT_SYMBOL_GPL(dpaa2_io_service_enqueue_qd);
  * Return 0 for success, and negative error code for failure.
  */
 int dpaa2_io_service_release(struct dpaa2_io *d,
-			     u32 bpid,
+			     u16 bpid,
 			     const u64 *buffers,
 			     unsigned int num_buffers)
 {
@@ -502,7 +502,7 @@ EXPORT_SYMBOL_GPL(dpaa2_io_service_release);
  * Eg. if the buffer pool is empty, this will return zero.
  */
 int dpaa2_io_service_acquire(struct dpaa2_io *d,
-			     u32 bpid,
+			     u16 bpid,
 			     u64 *buffers,
 			     unsigned int num_buffers)
 {
diff --git a/include/soc/fsl/dpaa2-io.h b/include/soc/fsl/dpaa2-io.h
index 3447fd10a3e6..672cfb58046f 100644
--- a/include/soc/fsl/dpaa2-io.h
+++ b/include/soc/fsl/dpaa2-io.h
@@ -111,9 +111,9 @@ int dpaa2_io_service_enqueue_fq(struct dpaa2_io *d, u32 fqid,
 				const struct dpaa2_fd *fd);
 int dpaa2_io_service_enqueue_qd(struct dpaa2_io *d, u32 qdid, u8 prio,
 				u16 qdbin, const struct dpaa2_fd *fd);
-int dpaa2_io_service_release(struct dpaa2_io *d, u32 bpid,
+int dpaa2_io_service_release(struct dpaa2_io *d, u16 bpid,
 			     const u64 *buffers, unsigned int num_buffers);
-int dpaa2_io_service_acquire(struct dpaa2_io *d, u32 bpid,
+int dpaa2_io_service_acquire(struct dpaa2_io *d, u16 bpid,
 			     u64 *buffers, unsigned int num_buffers);
 
 struct dpaa2_io_store *dpaa2_io_store_create(unsigned int max_frames,
-- 
cgit v1.2.3


From afb77422819ff60612e9b7d36461b9b2bc8e038e Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Mon, 10 Dec 2018 16:50:19 +0000
Subject: bus: fsl-mc: automatically add a device_link on
 fsl_mc_[portal,object]_allocate

Allocatable devices can be acquired by drivers on the fsl-mc bus using
the fsl_mc_portal_allocate or fsl_mc_object_allocate functions. Add a
device link between the consumer device and the supplier device so that
proper resource management is achieved.
Also, adding a link between these devices ensures that a proper unbind
order is respected (ie before the supplier device is unbound from its
respective driver all consumer devices will be notified and unbound
first).

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Reviewed-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/bus/fsl-mc/fsl-mc-allocator.c | 11 +++++++++++
 drivers/bus/fsl-mc/mc-io.c            | 13 +++++++++++++
 include/linux/fsl/mc.h                |  1 +
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/bus/fsl-mc/fsl-mc-allocator.c b/drivers/bus/fsl-mc/fsl-mc-allocator.c
index e906ecfe23dd..8ad77246f322 100644
--- a/drivers/bus/fsl-mc/fsl-mc-allocator.c
+++ b/drivers/bus/fsl-mc/fsl-mc-allocator.c
@@ -295,6 +295,14 @@ int __must_check fsl_mc_object_allocate(struct fsl_mc_device *mc_dev,
 	if (!mc_adev)
 		goto error;
 
+	mc_adev->consumer_link = device_link_add(&mc_dev->dev,
+						 &mc_adev->dev,
+						 DL_FLAG_AUTOREMOVE_CONSUMER);
+	if (!mc_adev->consumer_link) {
+		error = -EINVAL;
+		goto error;
+	}
+
 	*new_mc_adev = mc_adev;
 	return 0;
 error:
@@ -321,6 +329,9 @@ void fsl_mc_object_free(struct fsl_mc_device *mc_adev)
 		return;
 
 	fsl_mc_resource_free(resource);
+
+	device_link_del(mc_adev->consumer_link);
+	mc_adev->consumer_link = NULL;
 }
 EXPORT_SYMBOL_GPL(fsl_mc_object_free);
 
diff --git a/drivers/bus/fsl-mc/mc-io.c b/drivers/bus/fsl-mc/mc-io.c
index 7226cfc49b6f..3ae574a58cce 100644
--- a/drivers/bus/fsl-mc/mc-io.c
+++ b/drivers/bus/fsl-mc/mc-io.c
@@ -209,9 +209,19 @@ int __must_check fsl_mc_portal_allocate(struct fsl_mc_device *mc_dev,
 	if (error < 0)
 		goto error_cleanup_resource;
 
+	dpmcp_dev->consumer_link = device_link_add(&mc_dev->dev,
+						   &dpmcp_dev->dev,
+						   DL_FLAG_AUTOREMOVE_CONSUMER);
+	if (!dpmcp_dev->consumer_link) {
+		error = -EINVAL;
+		goto error_cleanup_mc_io;
+	}
+
 	*new_mc_io = mc_io;
 	return 0;
 
+error_cleanup_mc_io:
+	fsl_destroy_mc_io(mc_io);
 error_cleanup_resource:
 	fsl_mc_resource_free(resource);
 	return error;
@@ -244,6 +254,9 @@ void fsl_mc_portal_free(struct fsl_mc_io *mc_io)
 
 	fsl_destroy_mc_io(mc_io);
 	fsl_mc_resource_free(resource);
+
+	device_link_del(dpmcp_dev->consumer_link);
+	dpmcp_dev->consumer_link = NULL;
 }
 EXPORT_SYMBOL_GPL(fsl_mc_portal_free);
 
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 741f567253ef..975553a9f75d 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -193,6 +193,7 @@ struct fsl_mc_device {
 	struct resource *regions;
 	struct fsl_mc_device_irq **irqs;
 	struct fsl_mc_resource *resource;
+	struct device_link *consumer_link;
 };
 
 #define to_fsl_mc_device(_dev) \
-- 
cgit v1.2.3


From ff525b6e2d3011d5a1ef08d8b3f4f745c7245412 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Sat, 5 Jan 2019 00:01:26 +0800
Subject: scsi: libsas: Fix some indentation in libsas.h

Currently much indentation in this file is done with whitespaces instead of
tabs, which can make reading difficult, so fix this up.

Some other little minor tidy-up is done, but this file still has many other
checkpatch warnings (generally linelength > 80 or function arguments have
no identifier names).

All libsas code can be audited for checkpatch issues later.

Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libsas.h | 56 +++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 3de3b10da19a..857086cf7ebf 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -52,8 +52,8 @@ enum sas_phy_role {
 };
 
 enum sas_phy_type {
-        PHY_TYPE_PHYSICAL,
-        PHY_TYPE_VIRTUAL
+	PHY_TYPE_PHYSICAL,
+	PHY_TYPE_VIRTUAL
 };
 
 /* The events are mnemonically described in sas_dump.c
@@ -91,7 +91,7 @@ enum discover_event {
 
 #define to_dom_device(_obj) container_of(_obj, struct domain_device, dev_obj)
 #define to_dev_attr(_attr)  container_of(_attr, struct domain_dev_attribute,\
-                                         attr)
+					 attr)
 
 enum routing_attribute {
 	DIRECT_ROUTING,
@@ -184,37 +184,37 @@ struct domain_device {
 	spinlock_t done_lock;
 	enum sas_device_type dev_type;
 
-        enum sas_linkrate linkrate;
-        enum sas_linkrate min_linkrate;
-        enum sas_linkrate max_linkrate;
+	enum sas_linkrate linkrate;
+	enum sas_linkrate min_linkrate;
+	enum sas_linkrate max_linkrate;
 
-        int  pathways;
+	int  pathways;
 
-        struct domain_device *parent;
-        struct list_head siblings; /* devices on the same level */
-        struct asd_sas_port *port;        /* shortcut to root of the tree */
+	struct domain_device *parent;
+	struct list_head siblings; /* devices on the same level */
+	struct asd_sas_port *port;        /* shortcut to root of the tree */
 	struct sas_phy *phy;
 
-        struct list_head dev_list_node;
+	struct list_head dev_list_node;
 	struct list_head disco_list_node; /* awaiting probe or destruct */
 
-        enum sas_protocol    iproto;
-        enum sas_protocol    tproto;
+	enum sas_protocol    iproto;
+	enum sas_protocol    tproto;
 
-        struct sas_rphy *rphy;
+	struct sas_rphy *rphy;
 
-        u8  sas_addr[SAS_ADDR_SIZE];
-        u8  hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
+	u8  sas_addr[SAS_ADDR_SIZE];
+	u8  hashed_sas_addr[HASHED_SAS_ADDR_SIZE];
 
-        u8  frame_rcvd[32];
+	u8  frame_rcvd[32];
 
-        union {
-                struct expander_device ex_dev;
-                struct sata_device     sata_dev; /* STP & directly attached */
+	union {
+		struct expander_device ex_dev;
+		struct sata_device     sata_dev; /* STP & directly attached */
 		struct ssp_device      ssp_dev;
-        };
+	};
 
-        void *lldd_dev;
+	void *lldd_dev;
 	unsigned long state;
 	struct kref kref;
 };
@@ -512,10 +512,10 @@ enum exec_status {
 
 /* When a task finishes with a response, the LLDD examines the
  * response:
- * 	- For an ATA task task_status_struct::stat is set to
+ *	- For an ATA task task_status_struct::stat is set to
  * SAS_PROTO_RESPONSE, and the task_status_struct::buf is set to the
  * contents of struct ata_task_resp.
- * 	- For SSP tasks, if no data is present or status/TMF response
+ *	- For SSP tasks, if no data is present or status/TMF response
  * is valid, task_status_struct::stat is set.  If data is present
  * (SENSE data), the LLDD copies up to SAS_STATUS_BUF_SIZE, sets
  * task_status_struct::buf_valid_size, and task_status_struct::stat is
@@ -671,15 +671,13 @@ extern void sas_prep_resume_ha(struct sas_ha_struct *sas_ha);
 extern void sas_resume_ha(struct sas_ha_struct *sas_ha);
 extern void sas_suspend_ha(struct sas_ha_struct *sas_ha);
 
-int sas_set_phy_speed(struct sas_phy *phy,
-		      struct sas_phy_linkrates *rates);
+int sas_set_phy_speed(struct sas_phy *phy, struct sas_phy_linkrates *rates);
 int sas_phy_reset(struct sas_phy *phy, int hard_reset);
-extern int sas_queuecommand(struct Scsi_Host * ,struct scsi_cmnd *);
+extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_configure(struct scsi_device *);
 extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
-extern int sas_bios_param(struct scsi_device *,
-			  struct block_device *,
+extern int sas_bios_param(struct scsi_device *, struct block_device *,
 			  sector_t capacity, int *hsc);
 extern struct scsi_transport_template *
 sas_domain_attach_transport(struct sas_domain_function_template *);
-- 
cgit v1.2.3


From 98a455d91e7116ca417bc37da6aa2dd633206a6f Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Tue, 18 Dec 2018 14:02:45 +0800
Subject: ACPI / tables: table override from built-in initrd

In some scenario, we need to build initrd with kernel in a single image.
This can simplify system deployment process by downloading the whole system
once, such as in IC verification.

This patch adds support to override ACPI tables from built-in initrd.

Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
[ rjw: Minor cleanups ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/acpi/initrd_table_override.txt |  4 ++++
 drivers/acpi/Kconfig                         | 10 ++++++++++
 drivers/acpi/tables.c                        | 12 ++++++++++--
 include/linux/initrd.h                       |  3 +++
 4 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
index eb651a6aa285..30437a6db373 100644
--- a/Documentation/acpi/initrd_table_override.txt
+++ b/Documentation/acpi/initrd_table_override.txt
@@ -14,6 +14,10 @@ upgrade the ACPI execution environment that is defined by the ACPI tables
 via upgrading the ACPI tables provided by the BIOS with an instrumented,
 modified, more recent version one, or installing brand new ACPI tables.
 
+When building initrd with kernel in a single image, option
+ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
+feature to work.
+
 For a full list of ACPI tables that can be upgraded/installed, take a look
 at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
 drivers/acpi/tables.c.
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 90ff0a47c12e..4e015c77e48e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -357,6 +357,16 @@ config ACPI_TABLE_UPGRADE
 	  initrd, therefore it's safe to say Y.
 	  See Documentation/acpi/initrd_table_override.txt for details
 
+config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD
+	bool "Override ACPI tables from built-in initrd"
+	depends on ACPI_TABLE_UPGRADE
+	depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION=""
+	help
+	  This option provides functionality to override arbitrary ACPI tables
+	  from built-in uncompressed initrd.
+
+	  See Documentation/acpi/initrd_table_override.txt for details
+
 config ACPI_DEBUG
 	bool "Debug Statements"
 	help
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 48eabb6c2d4f..8fccbe49612a 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -473,14 +473,22 @@ static DECLARE_BITMAP(acpi_initrd_installed, NR_ACPI_INITRD_TABLES);
 
 void __init acpi_table_upgrade(void)
 {
-	void *data = (void *)initrd_start;
-	size_t size = initrd_end - initrd_start;
+	void *data;
+	size_t size;
 	int sig, no, table_nr = 0, total_offset = 0;
 	long offset = 0;
 	struct acpi_table_header *table;
 	char cpio_path[32] = "kernel/firmware/acpi/";
 	struct cpio_data file;
 
+	if (IS_ENABLED(CONFIG_ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD)) {
+		data = __initramfs_start;
+		size = __initramfs_size;
+	} else {
+		data = (void *)initrd_start;
+		size = initrd_end - initrd_start;
+	}
+
 	if (data == NULL || size == 0)
 		return;
 
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 14beaff9b445..d77fe34fb00a 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -25,3 +25,6 @@ extern phys_addr_t phys_initrd_start;
 extern unsigned long phys_initrd_size;
 
 extern unsigned int real_root_dev;
+
+extern char __initramfs_start[];
+extern unsigned long __initramfs_size;
-- 
cgit v1.2.3


From e6ce7943231fcba95a3c8842ab65f257cb5ab124 Mon Sep 17 00:00:00 2001
From: Sameer Pujar <spujar@nvidia.com>
Date: Mon, 14 Jan 2019 23:51:08 +0530
Subject: ALSA: hda: add verbs for stripe control

Controllers can support multiple Serial Data Out(SDO) lines, for
extended outbound bandwidth, to pump data to all codecs on the link.
Codecs can sample data present on SDO.

Add verbs AC_VERB_GET_STRIPE_CONTROL and AC_VERB_SET_STRIPE_CONTROL
These can be used to program usage of SDO lines for codec.

Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Reviewed-by: Mohan Kumar D <mkumard@nvidia.com>
Reviewed-by: Ravindra Lokhande <rlokhande@nvidia.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_verbs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/hda_verbs.h b/include/sound/hda_verbs.h
index 2a8573a00ea6..e36b77531c5c 100644
--- a/include/sound/hda_verbs.h
+++ b/include/sound/hda_verbs.h
@@ -66,6 +66,7 @@ enum {
 #define AC_VERB_GET_CONFIG_DEFAULT		0x0f1c
 /* f20: AFG/MFG */
 #define AC_VERB_GET_SUBSYSTEM_ID		0x0f20
+#define AC_VERB_GET_STRIPE_CONTROL		0x0f24
 #define AC_VERB_GET_CVT_CHAN_COUNT		0x0f2d
 #define AC_VERB_GET_HDMI_DIP_SIZE		0x0f2e
 #define AC_VERB_GET_HDMI_ELDD			0x0f2f
@@ -110,6 +111,7 @@ enum {
 #define AC_VERB_SET_CONFIG_DEFAULT_BYTES_3	0x71f
 #define AC_VERB_SET_EAPD				0x788
 #define AC_VERB_SET_CODEC_RESET			0x7ff
+#define AC_VERB_SET_STRIPE_CONTROL		0x724
 #define AC_VERB_SET_CVT_CHAN_COUNT		0x72d
 #define AC_VERB_SET_HDMI_DIP_INDEX		0x730
 #define AC_VERB_SET_HDMI_DIP_DATA		0x731
-- 
cgit v1.2.3


From 5dd3d271320d888bb708ca6252b8a9e416a7fe64 Mon Sep 17 00:00:00 2001
From: Sameer Pujar <spujar@nvidia.com>
Date: Mon, 14 Jan 2019 23:51:09 +0530
Subject: ALSA: hda: Add api to program stripe control bits

Controllers and codecs can support striping of audio out across
multiple SDO lines. The number of supported SDO lines can be
specific to chip. GCAP register can be read to know the maximum
supported SDO lines.

snd_hdac_get_stream_stripe_ctl() is exposed to program stripe bits
on controller and codec side.
stripe value: 0 for 1SDO, 1 for 2SDO, 2 for 4SDO lines, etc.,

Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Reviewed-by: Mohan Kumar D <mkumard@nvidia.com>
Reviewed-by: Ravindra Lokhande <rlokhande@nvidia.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h |  3 +++
 sound/hda/hdac_stream.c | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index b4fa1c775251..45f944d57982 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -539,6 +539,9 @@ void snd_hdac_stream_sync(struct hdac_stream *azx_dev, bool start,
 			  unsigned int streams);
 void snd_hdac_stream_timecounter_init(struct hdac_stream *azx_dev,
 				      unsigned int streams);
+int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus,
+				struct snd_pcm_substream *substream);
+
 /*
  * macros for easy use
  */
diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
index ba73a33480b6..820694a2061e 100644
--- a/sound/hda/hdac_stream.c
+++ b/sound/hda/hdac_stream.c
@@ -12,6 +12,40 @@
 #include <sound/hda_register.h>
 #include "trace.h"
 
+/**
+ * snd_hdac_get_stream_stripe_ctl - get stripe control value
+ * @bus: HD-audio core bus
+ * @substream: PCM substream
+ */
+int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus,
+				   struct snd_pcm_substream *substream)
+{
+	struct snd_pcm_runtime *runtime = substream->runtime;
+	unsigned int channels = runtime->channels,
+		     rate = runtime->rate,
+		     bits_per_sample = runtime->sample_bits,
+		     max_sdo_lines, value, sdo_line;
+
+	/* T_AZA_GCAP_NSDO is 1:2 bitfields in GCAP */
+	max_sdo_lines = snd_hdac_chip_readl(bus, GCAP) & AZX_GCAP_NSDO;
+
+	/* following is from HD audio spec */
+	for (sdo_line = max_sdo_lines; sdo_line > 0; sdo_line >>= 1) {
+		if (rate > 48000)
+			value = (channels * bits_per_sample *
+					(rate / 48000)) / sdo_line;
+		else
+			value = (channels * bits_per_sample) / sdo_line;
+
+		if (value >= 8)
+			break;
+	}
+
+	/* stripe value: 0 for 1SDO, 1 for 2SDO, 2 for 4SDO lines */
+	return sdo_line >> 1;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_get_stream_stripe_ctl);
+
 /**
  * snd_hdac_stream_init - initialize each stream (aka device)
  * @bus: HD-audio core bus
-- 
cgit v1.2.3


From b59c8e7a73160b11f99b9008a5f215dd54b9d581 Mon Sep 17 00:00:00 2001
From: Sameer Pujar <spujar@nvidia.com>
Date: Mon, 14 Jan 2019 23:51:10 +0530
Subject: ALSA: hda: add register offset for stripe control

bits 16:17 in SD_CTL register refer to stripe control. Added an
offset register(AZX_REG_SD_CTL_3B) to have exclusive read/write
of corresponding register byte. This helps to avoid unnecessary
32-bit read/write of SD_CTL whenever only stripe or other bits of
corresponding byte need to be updated. Also HD audio spec defines
SD_CTL as 3 byte register.

SD_CTL_STRIPE_MASK(0x3) can be used for stripe control programming
and when updating AZX_REG_SD_CTL_3B.

Signed-off-by: Sameer Pujar <spujar@nvidia.com>
Reviewed-by: Mohan Kumar D <mkumard@nvidia.com>
Reviewed-by: Ravindra Lokhande <rlokhande@nvidia.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_register.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h
index 2ab39fb52d7a..0fd39295b426 100644
--- a/include/sound/hda_register.h
+++ b/include/sound/hda_register.h
@@ -79,6 +79,7 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 
 /* stream register offsets from stream base */
 #define AZX_REG_SD_CTL			0x00
+#define AZX_REG_SD_CTL_3B		0x02 /* 3rd byte of SD_CTL register */
 #define AZX_REG_SD_STS			0x03
 #define AZX_REG_SD_LPIB			0x04
 #define AZX_REG_SD_CBL			0x08
@@ -165,6 +166,7 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define SD_INT_COMPLETE		0x04	/* completion interrupt */
 #define SD_INT_MASK		(SD_INT_DESC_ERR|SD_INT_FIFO_ERR|\
 				 SD_INT_COMPLETE)
+#define SD_CTL_STRIPE_MASK	0x3	/* stripe control mask */
 
 /* SD_STS */
 #define SD_STS_FIFO_READY	0x20	/* FIFO ready */
-- 
cgit v1.2.3


From ea4baf7f116a18382df331db2123d98bc1c3cd83 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 18 Dec 2018 14:28:30 +0200
Subject: RDMA: Rename port_callback to init_port

Most provider routines are callback routines which ib core invokes.
_callback suffix doesn't convey information about when such callback is
invoked. Therefore, rename port_callback to init_port.

Additionally, store the init_port function pointer in ib_device_ops, so
that it can be accessed in subsequent patches when binding rdma device to
net namespace.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/core_priv.h            |  4 +---
 drivers/infiniband/core/device.c               |  7 +++----
 drivers/infiniband/core/sysfs.c                | 16 ++++++----------
 drivers/infiniband/hw/bnxt_re/main.c           |  2 +-
 drivers/infiniband/hw/cxgb3/iwch_provider.c    |  2 +-
 drivers/infiniband/hw/cxgb4/provider.c         |  2 +-
 drivers/infiniband/hw/hfi1/verbs.c             |  2 +-
 drivers/infiniband/hw/hns/hns_roce_main.c      |  2 +-
 drivers/infiniband/hw/i40iw/i40iw_verbs.c      |  2 +-
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/infiniband/hw/mlx5/main.c              |  2 +-
 drivers/infiniband/hw/mthca/mthca_provider.c   |  2 +-
 drivers/infiniband/hw/nes/nes_verbs.c          |  2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c     |  2 +-
 drivers/infiniband/hw/qedr/main.c              |  2 +-
 drivers/infiniband/hw/qib/qib_verbs.c          |  2 +-
 drivers/infiniband/hw/usnic/usnic_ib_main.c    |  2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c |  2 +-
 drivers/infiniband/sw/rdmavt/vt.c              |  5 ++---
 drivers/infiniband/sw/rxe/rxe_verbs.c          |  2 +-
 include/rdma/ib_verbs.h                        | 10 +++++++---
 include/rdma/rdma_vt.h                         |  3 ---
 22 files changed, 35 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 3cd830d52967..aca75c74e451 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -54,9 +54,7 @@ struct pkey_index_qp_list {
 	struct list_head    qp_list;
 };
 
-int  ib_device_register_sysfs(struct ib_device *device,
-			      int (*port_callback)(struct ib_device *,
-						   u8, struct kobject *));
+int ib_device_register_sysfs(struct ib_device *device);
 void ib_device_unregister_sysfs(struct ib_device *device);
 int ib_device_rename(struct ib_device *ibdev, const char *name);
 
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8872453e26c0..66867e92ddea 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -574,9 +574,7 @@ port_cleanup:
  * callback for each device that is added. @device must be allocated
  * with ib_alloc_device().
  */
-int ib_register_device(struct ib_device *device, const char *name,
-		       int (*port_callback)(struct ib_device *, u8,
-					    struct kobject *))
+int ib_register_device(struct ib_device *device, const char *name)
 {
 	int ret;
 	struct ib_client *client;
@@ -613,7 +611,7 @@ int ib_register_device(struct ib_device *device, const char *name,
 		goto dev_cleanup;
 	}
 
-	ret = ib_device_register_sysfs(device, port_callback);
+	ret = ib_device_register_sysfs(device);
 	if (ret) {
 		dev_warn(&device->dev,
 			 "Couldn't register device with driver model\n");
@@ -1283,6 +1281,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, get_vector_affinity);
 	SET_DEVICE_OP(dev_ops, get_vf_config);
 	SET_DEVICE_OP(dev_ops, get_vf_stats);
+	SET_DEVICE_OP(dev_ops, init_port);
 	SET_DEVICE_OP(dev_ops, map_mr_sg);
 	SET_DEVICE_OP(dev_ops, map_phys_fmr);
 	SET_DEVICE_OP(dev_ops, mmap);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 80f68eb0ba5c..7a5679933df6 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1015,9 +1015,7 @@ err_free_stats:
 	return;
 }
 
-static int add_port(struct ib_device *device, int port_num,
-		    int (*port_callback)(struct ib_device *,
-					 u8, struct kobject *))
+static int add_port(struct ib_device *device, int port_num)
 {
 	struct ib_port *p;
 	struct ib_port_attr attr;
@@ -1113,8 +1111,8 @@ static int add_port(struct ib_device *device, int port_num,
 	if (ret)
 		goto err_free_pkey;
 
-	if (port_callback) {
-		ret = port_callback(device, port_num, &p->kobj);
+	if (device->ops.init_port) {
+		ret = device->ops.init_port(device, port_num, &p->kobj);
 		if (ret)
 			goto err_remove_pkey;
 	}
@@ -1308,9 +1306,7 @@ static void free_port_list_attributes(struct ib_device *device)
 	kobject_put(device->ports_kobj);
 }
 
-int ib_device_register_sysfs(struct ib_device *device,
-			     int (*port_callback)(struct ib_device *,
-						  u8, struct kobject *))
+int ib_device_register_sysfs(struct ib_device *device)
 {
 	struct device *class_dev = &device->dev;
 	int ret;
@@ -1330,12 +1326,12 @@ int ib_device_register_sysfs(struct ib_device *device,
 	}
 
 	if (rdma_cap_ib_switch(device)) {
-		ret = add_port(device, 0, port_callback);
+		ret = add_port(device, 0);
 		if (ret)
 			goto err_put;
 	} else {
 		for (i = 1; i <= device->phys_port_cnt; ++i) {
-			ret = add_port(device, i, port_callback);
+			ret = add_port(device, i);
 			if (ret)
 				goto err_put;
 		}
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index e7a997f2a537..797a3e943366 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -662,7 +662,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
 	ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
 	ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
-	return ib_register_device(ibdev, "bnxt_re%d", NULL);
+	return ib_register_device(ibdev, "bnxt_re%d");
 }
 
 static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 92ee6761a3bd..ffdde3cca268 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1409,7 +1409,7 @@ int iwch_register_device(struct iwch_dev *dev)
 	dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
 	rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
 	ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
-	ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
+	ret = ib_register_device(&dev->ibdev, "cxgb3_%d");
 	if (ret)
 		kfree(dev->ibdev.iwcm);
 	return ret;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 586b0c37481f..0a99894b0160 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -634,7 +634,7 @@ void c4iw_register_device(struct work_struct *work)
 	rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
 	dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
 	ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
-	ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
+	ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
 	if (ret)
 		goto err_kfree_iwcm;
 	return;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index ec582d86025f..8957adf58af7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1622,6 +1622,7 @@ static const struct ib_device_ops hfi1_dev_ops = {
 	.alloc_rdma_netdev = hfi1_vnic_alloc_rn,
 	.get_dev_fw_str = hfi1_get_dev_fw_str,
 	.get_hw_stats = get_hw_stats,
+	.init_port = hfi1_create_port_files,
 	.modify_device = modify_device,
 	/* keep process mad in the driver */
 	.process_mad = hfi1_process_mad,
@@ -1679,7 +1680,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	/*
 	 * Fill in rvt info object.
 	 */
-	dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files;
 	dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev;
 	dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah;
 	dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 44a07fce0617..46ede58ef3b8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -564,7 +564,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 	ib_dev->driver_id = RDMA_DRIVER_HNS;
 	ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
 	ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
-	ret = ib_register_device(ib_dev, "hns_%d", NULL);
+	ret = ib_register_device(ib_dev, "hns_%d");
 	if (ret) {
 		dev_err(dev, "ib_register_device failed!\n");
 		return ret;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 80b66df95362..af66ab9d150b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2868,7 +2868,7 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
 	iwibdev = iwdev->iwibdev;
 	rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
 	iwibdev->ibdev.driver_id = RDMA_DRIVER_I40IW;
-	ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", NULL);
+	ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
 	if (ret)
 		goto error;
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 1f15ec3e2b83..c3f950d82ed0 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2856,7 +2856,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 	rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
 	ibdev->ib_dev.driver_id = RDMA_DRIVER_MLX4;
-	if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", NULL))
+	if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
 		goto err_diag_counters;
 
 	if (mlx4_ib_mad_init(ibdev))
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 717e8535b41a..948617a60d44 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -6233,7 +6233,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 		name = "mlx5_%d";
 	else
 		name = "mlx5_bond_%d";
-	return ib_register_device(&dev->ib_dev, name, NULL);
+	return ib_register_device(&dev->ib_dev, name);
 }
 
 void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 0a939ea5497b..3473c6c51b92 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1338,7 +1338,7 @@ int mthca_register_device(struct mthca_dev *dev)
 
 	rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
 	dev->ib_dev.driver_id = RDMA_DRIVER_MTHCA;
-	ret = ib_register_device(&dev->ib_dev, "mthca%d", NULL);
+	ret = ib_register_device(&dev->ib_dev, "mthca%d");
 	if (ret)
 		return ret;
 
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index feb4d259aab9..34601f0cbd74 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3801,7 +3801,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
 
 	rdma_set_device_sysfs_group(&nesvnic->nesibdev->ibdev, &nes_attr_group);
 	nesvnic->nesibdev->ibdev.driver_id = RDMA_DRIVER_NES;
-	ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d", NULL);
+	ret = ib_register_device(&nesvnic->nesibdev->ibdev, "nes%d");
 	if (ret) {
 		return ret;
 	}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 1f393842453a..f45b996f617f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -243,7 +243,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
 	}
 	rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
 	dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
-	return ib_register_device(&dev->ibdev, "ocrdma%d", NULL);
+	return ib_register_device(&dev->ibdev, "ocrdma%d");
 }
 
 static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 75940e2a8791..8e5c76d06855 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -290,7 +290,7 @@ static int qedr_register_device(struct qedr_dev *dev)
 	ib_set_device_ops(&dev->ibdev, &qedr_dev_ops);
 
 	dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
-	return ib_register_device(&dev->ibdev, "qedr%d", NULL);
+	return ib_register_device(&dev->ibdev, "qedr%d");
 }
 
 /* This function allocates fast-path status block memory */
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 276304f611ab..ff8dab8e2344 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1494,6 +1494,7 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
 }
 
 static const struct ib_device_ops qib_dev_ops = {
+	.init_port = qib_create_port_files,
 	.modify_device = qib_modify_device,
 	.process_mad = qib_process_mad,
 };
@@ -1567,7 +1568,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	/*
 	 * Fill in rvt info object.
 	 */
-	dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
 	dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
 	dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
 	dd->verbs_dev.rdi.driver_f.setup_wqe = qib_check_send_wqe;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index b2323a52a0dd..3201dd1899c7 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -422,7 +422,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
 	us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
 	rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
 
-	if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", NULL))
+	if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d"))
 		goto err_fwd_dealloc;
 
 	usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index eaa109dbc96a..1bc415483d9b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -278,7 +278,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
 	spin_lock_init(&dev->srq_tbl_lock);
 	rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
 
-	ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", NULL);
+	ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d");
 	if (ret)
 		goto err_srq_free;
 
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 8724a1817158..7de7389d0235 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -446,7 +446,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 		 * These functions are not part of verbs specifically but are
 		 * required for rdmavt to function.
 		 */
-		if ((!rdi->driver_f.port_callback) ||
+		if ((!rdi->ibdev.ops.init_port) ||
 		    (!rdi->driver_f.get_pci_dev))
 			return -EINVAL;
 		break;
@@ -644,8 +644,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
 
 	rdi->ibdev.driver_id = driver_id;
 	/* We are now good to announce we exist */
-	ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev),
-				 rdi->driver_f.port_callback);
+	ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
 	if (ret) {
 		rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
 		goto bail_wss;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index b20e6e0415f5..43171148e9c5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1258,7 +1258,7 @@ int rxe_register_device(struct rxe_dev *rxe)
 
 	rdma_set_device_sysfs_group(dev, &rxe_attr_group);
 	dev->driver_id = RDMA_DRIVER_RXE;
-	err = ib_register_device(dev, "rxe%d", NULL);
+	err = ib_register_device(dev, "rxe%d");
 	if (err) {
 		pr_warn("%s failed with error %d\n", __func__, err);
 		goto err1;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index aa1f126d3383..1d1902fd9f87 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2504,6 +2504,12 @@ struct ib_device_ops {
 	 */
 	int (*get_hw_stats)(struct ib_device *device,
 			    struct rdma_hw_stats *stats, u8 port, int index);
+	/*
+	 * This function is called once for each port when a ib device is
+	 * registered.
+	 */
+	int (*init_port)(struct ib_device *device, u8 port_num,
+			 struct kobject *port_sysfs);
 };
 
 struct ib_device {
@@ -2620,9 +2626,7 @@ void ib_dealloc_device(struct ib_device *device);
 
 void ib_get_device_fw_str(struct ib_device *device, char *str);
 
-int ib_register_device(struct ib_device *device, const char *name,
-		       int (*port_callback)(struct ib_device *, u8,
-					    struct kobject *));
+int ib_register_device(struct ib_device *device, const char *name);
 void ib_unregister_device(struct ib_device *device);
 
 int ib_register_client   (struct ib_client *client);
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index dd0ed8048bb4..acb3bc96dfa7 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -250,9 +250,6 @@ struct rvt_driver_provided {
 	 */
 	void (*do_send)(struct rvt_qp *qp);
 
-	/* Passed to ib core registration. Callback to create syfs files */
-	int (*port_callback)(struct ib_device *, u8, struct kobject *);
-
 	/*
 	 * Returns a pointer to the undelying hardware's PCI device. This is
 	 * used to display information as to what hardware is being referenced
-- 
cgit v1.2.3


From 54747231150f0dddf68f2ee29ec2970fcc433909 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 18 Dec 2018 14:15:56 +0200
Subject: RDMA: Introduce and use rdma_device_to_ibdev()

Introduce and use rdma_device_to_ibdev() API for those drivers which are
registering one sysfs group and also use in ib_core.

In subsequent patch, device->provider_ibdev one-to-one mapping is no
longer holds true during accessing sysfs entries.
Therefore, introduce an API rdma_device_to_ibdev() that provides such
information.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c             |  2 --
 drivers/infiniband/core/sysfs.c              | 12 ++++++------
 drivers/infiniband/hw/bnxt_re/main.c         |  6 ++++--
 drivers/infiniband/hw/cxgb3/iwch_provider.c  | 14 ++++++++------
 drivers/infiniband/hw/cxgb4/provider.c       | 14 ++++++++------
 drivers/infiniband/hw/hfi1/sysfs.c           | 16 ++++++++--------
 drivers/infiniband/hw/i40iw/i40iw_verbs.c    |  5 ++---
 drivers/infiniband/hw/mlx4/main.c            |  7 ++++---
 drivers/infiniband/hw/mlx5/main.c            | 13 ++++++++-----
 drivers/infiniband/hw/mthca/mthca_provider.c |  9 ++++++---
 drivers/infiniband/hw/nes/nes_verbs.c        |  2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c   |  6 ++++--
 drivers/infiniband/hw/qedr/main.c            |  3 ++-
 drivers/infiniband/hw/qib/qib_sysfs.c        | 18 +++++++++---------
 drivers/infiniband/hw/usnic/usnic_ib_sysfs.c | 26 +++++++++++---------------
 drivers/infiniband/sw/rxe/rxe_verbs.c        |  4 ++--
 include/rdma/ib_verbs.h                      | 23 +++++++++++++++++++++++
 17 files changed, 106 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 66867e92ddea..f8180cf1a004 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -296,8 +296,6 @@ struct ib_device *ib_alloc_device(size_t size)
 	device->dev.class = &ib_class;
 	device_initialize(&device->dev);
 
-	dev_set_drvdata(&device->dev, device);
-
 	INIT_LIST_HEAD(&device->event_handler_list);
 	spin_lock_init(&device->event_handler_lock);
 	rwlock_init(&device->client_data_lock);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 7a5679933df6..c75692802da8 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1187,7 +1187,7 @@ err_put:
 static ssize_t node_type_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 
 	switch (dev->node_type) {
 	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
@@ -1204,7 +1204,7 @@ static DEVICE_ATTR_RO(node_type);
 static ssize_t sys_image_guid_show(struct device *device,
 				   struct device_attribute *dev_attr, char *buf)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 
 	return sprintf(buf, "%04x:%04x:%04x:%04x\n",
 		       be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
@@ -1217,7 +1217,7 @@ static DEVICE_ATTR_RO(sys_image_guid);
 static ssize_t node_guid_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 
 	return sprintf(buf, "%04x:%04x:%04x:%04x\n",
 		       be16_to_cpu(((__be16 *) &dev->node_guid)[0]),
@@ -1230,7 +1230,7 @@ static DEVICE_ATTR_RO(node_guid);
 static ssize_t node_desc_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 
 	return sprintf(buf, "%.64s\n", dev->node_desc);
 }
@@ -1239,7 +1239,7 @@ static ssize_t node_desc_store(struct device *device,
 			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 	struct ib_device_modify desc = {};
 	int ret;
 
@@ -1258,7 +1258,7 @@ static DEVICE_ATTR_RW(node_desc);
 static ssize_t fw_ver_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
-	struct ib_device *dev = container_of(device, struct ib_device, dev);
+	struct ib_device *dev = rdma_device_to_ibdev(device);
 
 	ib_get_device_fw_str(dev, buf);
 	strlcat(buf, "\n", IB_FW_VERSION_NAME_MAX);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 797a3e943366..16eecfa5882c 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -538,7 +538,8 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
-	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+	struct bnxt_re_dev *rdev =
+		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
 
 	return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
 }
@@ -547,7 +548,8 @@ static DEVICE_ATTR_RO(hw_rev);
 static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
-	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+	struct bnxt_re_dev *rdev =
+		rdma_device_to_drv_device(device, struct bnxt_re_dev, ibdev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
 }
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ffdde3cca268..07c20cd07f33 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1130,8 +1130,9 @@ static int iwch_query_port(struct ib_device *ibdev,
 static ssize_t hw_rev_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
-						 ibdev.dev);
+	struct iwch_dev *iwch_dev =
+			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
+
 	pr_debug("%s dev 0x%p\n", __func__, dev);
 	return sprintf(buf, "%d\n", iwch_dev->rdev.t3cdev_p->type);
 }
@@ -1140,8 +1141,8 @@ static DEVICE_ATTR_RO(hw_rev);
 static ssize_t hca_type_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
-	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
-						 ibdev.dev);
+	struct iwch_dev *iwch_dev =
+			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
 	struct ethtool_drvinfo info;
 	struct net_device *lldev = iwch_dev->rdev.t3cdev_p->lldev;
 
@@ -1154,8 +1155,9 @@ static DEVICE_ATTR_RO(hca_type);
 static ssize_t board_id_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
-	struct iwch_dev *iwch_dev = container_of(dev, struct iwch_dev,
-						 ibdev.dev);
+	struct iwch_dev *iwch_dev =
+			rdma_device_to_drv_device(dev, struct iwch_dev, ibdev);
+
 	pr_debug("%s dev 0x%p\n", __func__, dev);
 	return sprintf(buf, "%x.%x\n", iwch_dev->rdev.rnic_info.pdev->vendor,
 		       iwch_dev->rdev.rnic_info.pdev->device);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0a99894b0160..f977f8e7e162 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -376,8 +376,9 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 static ssize_t hw_rev_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
 	pr_debug("dev 0x%p\n", dev);
 	return sprintf(buf, "%d\n",
 		       CHELSIO_CHIP_RELEASE(c4iw_dev->rdev.lldi.adapter_type));
@@ -387,8 +388,8 @@ static DEVICE_ATTR_RO(hw_rev);
 static ssize_t hca_type_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
 	struct ethtool_drvinfo info;
 	struct net_device *lldev = c4iw_dev->rdev.lldi.ports[0];
 
@@ -401,8 +402,9 @@ static DEVICE_ATTR_RO(hca_type);
 static ssize_t board_id_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct c4iw_dev *c4iw_dev = container_of(dev, struct c4iw_dev,
-						 ibdev.dev);
+	struct c4iw_dev *c4iw_dev =
+			rdma_device_to_drv_device(dev, struct c4iw_dev, ibdev);
+
 	pr_debug("dev 0x%p\n", dev);
 	return sprintf(buf, "%x.%x\n", c4iw_dev->rdev.lldi.pdev->vendor,
 		       c4iw_dev->rdev.lldi.pdev->device);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 2be513d4c9da..90f62c4bddba 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -498,7 +498,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -508,7 +508,7 @@ static ssize_t board_id_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -524,7 +524,7 @@ static ssize_t boardversion_show(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -536,7 +536,7 @@ static ssize_t nctxts_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/*
@@ -555,7 +555,7 @@ static ssize_t nfreectxts_show(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -567,7 +567,7 @@ static ssize_t serial_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
@@ -579,7 +579,7 @@ static ssize_t chip_reset_store(struct device *device,
 				size_t count)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -609,7 +609,7 @@ static ssize_t tempsense_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct hfi1_ibdev *dev =
-		container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct hfi1_ibdev, rdi.ibdev);
 	struct hfi1_devdata *dd = dd_from_dev(dev);
 	struct hfi1_temp temp;
 	int ret;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index af66ab9d150b..12b31a8440be 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2139,9 +2139,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
 static ssize_t hw_rev_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
-	struct i40iw_ib_device *iwibdev = container_of(dev,
-						       struct i40iw_ib_device,
-						       ibdev.dev);
+	struct i40iw_ib_device *iwibdev =
+		rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev);
 	u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
 
 	return sprintf(buf, "%x\n", hw_rev);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c3f950d82ed0..dc2ffd293a11 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2043,7 +2043,7 @@ static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
-		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
 	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
 }
 static DEVICE_ATTR_RO(hca_type);
@@ -2052,7 +2052,7 @@ static ssize_t hw_rev_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
-		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
 	return sprintf(buf, "%x\n", dev->dev->rev_id);
 }
 static DEVICE_ATTR_RO(hw_rev);
@@ -2061,7 +2061,8 @@ static ssize_t board_id_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mlx4_ib_dev *dev =
-		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx4_ib_dev, ib_dev);
+
 	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
 		       dev->dev->board_id);
 }
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 948617a60d44..4b1b56d54301 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -4104,7 +4104,7 @@ static ssize_t fw_pages_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
-		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
 
 	return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages);
 }
@@ -4114,7 +4114,7 @@ static ssize_t reg_pages_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
-		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
 
 	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 }
@@ -4124,7 +4124,8 @@ static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
-		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
 	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
 }
 static DEVICE_ATTR_RO(hca_type);
@@ -4133,7 +4134,8 @@ static ssize_t hw_rev_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
-		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
 	return sprintf(buf, "%x\n", dev->mdev->rev_id);
 }
 static DEVICE_ATTR_RO(hw_rev);
@@ -4142,7 +4144,8 @@ static ssize_t board_id_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mlx5_ib_dev *dev =
-		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev);
+
 	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
 		       dev->mdev->board_id);
 }
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 3473c6c51b92..63003b4d2485 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1081,7 +1081,8 @@ static ssize_t hw_rev_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
-		container_of(device, struct mthca_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
+
 	return sprintf(buf, "%x\n", dev->rev_id);
 }
 static DEVICE_ATTR_RO(hw_rev);
@@ -1090,7 +1091,8 @@ static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
-		container_of(device, struct mthca_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
+
 	switch (dev->pdev->device) {
 	case PCI_DEVICE_ID_MELLANOX_TAVOR:
 		return sprintf(buf, "MT23108\n");
@@ -1111,7 +1113,8 @@ static ssize_t board_id_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct mthca_dev *dev =
-		container_of(device, struct mthca_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct mthca_dev, ib_dev);
+
 	return sprintf(buf, "%.*s\n", MTHCA_BOARD_ID_LEN, dev->board_id);
 }
 static DEVICE_ATTR_RO(board_id);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 34601f0cbd74..034156f7e9ed 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -2560,7 +2560,7 @@ static ssize_t hw_rev_show(struct device *dev,
 			   struct device_attribute *attr, char *buf)
 {
 	struct nes_ib_device *nesibdev =
-			container_of(dev, struct nes_ib_device, ibdev.dev);
+		rdma_device_to_drv_device(dev, struct nes_ib_device, ibdev);
 	struct nes_vnic *nesvnic = nesibdev->nesvnic;
 
 	nes_debug(NES_DBG_INIT, "\n");
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index f45b996f617f..b0491b9ecfe4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -118,7 +118,8 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
 static ssize_t hw_rev_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
-	struct ocrdma_dev *dev = dev_get_drvdata(device);
+	struct ocrdma_dev *dev =
+		rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
 
 	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor);
 }
@@ -127,7 +128,8 @@ static DEVICE_ATTR_RO(hw_rev);
 static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
-	struct ocrdma_dev *dev = dev_get_drvdata(device);
+	struct ocrdma_dev *dev =
+		rdma_device_to_drv_device(device, struct ocrdma_dev, ibdev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
 }
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 8e5c76d06855..f85e72b65a10 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -137,7 +137,8 @@ static int qedr_iw_port_immutable(struct ib_device *ibdev, u8 port_num,
 static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
-	struct qedr_dev *dev = dev_get_drvdata(device);
+	struct qedr_dev *dev =
+		rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
 
 	return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
 }
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 1cf4ca3f23e3..905206a0c2d5 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -555,7 +555,7 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
 			   char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
@@ -565,7 +565,7 @@ static ssize_t hca_type_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -590,7 +590,7 @@ static ssize_t boardversion_show(struct device *device,
 				 struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -602,7 +602,7 @@ static ssize_t localbus_info_show(struct device *device,
 				  struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* The string printed here is already newline-terminated. */
@@ -614,7 +614,7 @@ static ssize_t nctxts_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of user ports (contexts) available. */
@@ -630,7 +630,7 @@ static ssize_t nfreectxts_show(struct device *device,
 			       struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	/* Return the number of free user ports (contexts) available. */
@@ -642,7 +642,7 @@ static ssize_t serial_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 	buf[sizeof(dd->serial)] = '\0';
@@ -657,7 +657,7 @@ static ssize_t chip_reset_store(struct device *device,
 				size_t count)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 
@@ -679,7 +679,7 @@ static ssize_t tempsense_show(struct device *device,
 			      struct device_attribute *attr, char *buf)
 {
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
+		rdma_device_to_drv_device(device, struct qib_ibdev, rdi.ibdev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int idx;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index a7e4b2ccfaf8..c85d48ae7442 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -50,7 +50,7 @@ static ssize_t board_id_show(struct device *device,
 			     struct device_attribute *attr, char *buf)
 {
 	struct usnic_ib_dev *us_ibdev =
-		container_of(device, struct usnic_ib_dev, ib_dev.dev);
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 	unsigned short subsystem_device_id;
 
 	mutex_lock(&us_ibdev->usdev_lock);
@@ -67,14 +67,13 @@ static DEVICE_ATTR_RO(board_id);
 static ssize_t
 config_show(struct device *device, struct device_attribute *attr, char *buf)
 {
-	struct usnic_ib_dev *us_ibdev;
+	struct usnic_ib_dev *us_ibdev =
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 	char *ptr;
 	unsigned left;
 	unsigned n;
 	enum usnic_vnic_res_type res_type;
 
-	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
-
 	/* Buffer space limit is 1 page */
 	ptr = buf;
 	left = PAGE_SIZE;
@@ -130,9 +129,8 @@ static DEVICE_ATTR_RO(config);
 static ssize_t
 iface_show(struct device *device, struct device_attribute *attr, char *buf)
 {
-	struct usnic_ib_dev *us_ibdev;
-
-	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	struct usnic_ib_dev *us_ibdev =
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%s\n",
 			netdev_name(us_ibdev->netdev));
@@ -142,9 +140,8 @@ static DEVICE_ATTR_RO(iface);
 static ssize_t
 max_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
-	struct usnic_ib_dev *us_ibdev;
-
-	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	struct usnic_ib_dev *us_ibdev =
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%u\n",
 			kref_read(&us_ibdev->vf_cnt));
@@ -154,10 +151,10 @@ static DEVICE_ATTR_RO(max_vf);
 static ssize_t
 qp_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
-	struct usnic_ib_dev *us_ibdev;
+	struct usnic_ib_dev *us_ibdev =
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 	int qp_per_vf;
 
-	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
 	qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
 			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
 
@@ -169,9 +166,8 @@ static DEVICE_ATTR_RO(qp_per_vf);
 static ssize_t
 cq_per_vf_show(struct device *device, struct device_attribute *attr, char *buf)
 {
-	struct usnic_ib_dev *us_ibdev;
-
-	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	struct usnic_ib_dev *us_ibdev =
+		rdma_device_to_drv_device(device, struct usnic_ib_dev, ib_dev);
 
 	return scnprintf(buf, PAGE_SIZE, "%d\n",
 			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 43171148e9c5..3d01247a28db 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1129,8 +1129,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
 static ssize_t parent_show(struct device *device,
 			   struct device_attribute *attr, char *buf)
 {
-	struct rxe_dev *rxe = container_of(device, struct rxe_dev,
-					   ib_dev.dev);
+	struct rxe_dev *rxe =
+		rdma_device_to_drv_device(device, struct rxe_dev, ib_dev);
 
 	return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 1d1902fd9f87..94b6e1dd4dab 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4241,4 +4241,27 @@ rdma_set_device_sysfs_group(struct ib_device *dev,
 	dev->groups[1] = group;
 }
 
+/**
+ * rdma_device_to_ibdev - Get ib_device pointer from device pointer
+ *
+ * @device:	device pointer for which ib_device pointer to retrieve
+ *
+ * rdma_device_to_ibdev() retrieves ib_device pointer from device.
+ *
+ */
+static inline struct ib_device *rdma_device_to_ibdev(struct device *device)
+{
+	return container_of(device, struct ib_device, dev);
+}
+
+/**
+ * rdma_device_to_drv_device - Helper macro to reach back to driver's
+ *			       ib_device holder structure from device pointer.
+ *
+ * NOTE: New drivers should not make use of this API; This API is only for
+ * existing drivers who have exposed sysfs entries using
+ * rdma_set_device_sysfs_group().
+ */
+#define rdma_device_to_drv_device(dev, drv_dev_struct, ibdev_member)           \
+	container_of(rdma_device_to_ibdev(dev), drv_dev_struct, ibdev_member)
 #endif /* IB_VERBS_H */
-- 
cgit v1.2.3


From 73f5a82bb3c9fce550da4a74a32b8cb064b50663 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 13 Jan 2019 15:57:04 +0200
Subject: RDMA/mad: Reduce MAD scope to mlx5_ib only

Management Datagram Interface (MAD) is applicable
only when physical port is Infiniband. It makes MAD
command logic to be completely unrelated to eth/core
parts of mlx5.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c                 | 37 ++++++++++++
 drivers/infiniband/hw/mlx5/cmd.h                 |  2 +
 drivers/infiniband/hw/mlx5/mad.c                 | 11 ++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h             |  3 -
 drivers/net/ethernet/mellanox/mlx5/core/Makefile |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mad.c    | 75 ------------------------
 include/linux/mlx5/driver.h                      |  2 -
 7 files changed, 47 insertions(+), 85 deletions(-)
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/mad.c

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 356bccc715ee..6bcc63aaa50b 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -345,3 +345,40 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
 				       counter_set_id);
 	return err;
 }
+
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+		     u16 opmod, u8 port)
+{
+	int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+	int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+	int err = -ENOMEM;
+	void *data;
+	void *resp;
+	u32 *out;
+	u32 *in;
+
+	in = kzalloc(inlen, GFP_KERNEL);
+	out = kzalloc(outlen, GFP_KERNEL);
+	if (!in || !out)
+		goto out;
+
+	MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+	MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+	MLX5_SET(mad_ifc_in, in, port, port);
+
+	data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+	memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
+
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
+	if (err)
+		goto out;
+
+	resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+	memcpy(outb, resp,
+	       MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
+
+out:
+	kfree(out);
+	kfree(in);
+	return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 1e76dc67a369..923a7b93f507 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -63,4 +63,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
 int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
 int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
 			     u16 uid);
+int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+		     u16 opmod, u8 port);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 558638468edb..6c529e6f3a01 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -36,6 +36,7 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_pma.h>
 #include "mlx5_ib.h"
+#include "cmd.h"
 
 enum {
 	MLX5_IB_VENDOR_CLASS1 = 0x9,
@@ -51,9 +52,10 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
 	return dev->mdev->port_caps[port_num - 1].has_smi;
 }
 
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-		 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		 const void *in_mad, void *response_mad)
+static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
+			int ignore_bkey, u8 port, const struct ib_wc *in_wc,
+			const struct ib_grh *in_grh, const void *in_mad,
+			void *response_mad)
 {
 	u8 op_modifier = 0;
 
@@ -68,7 +70,8 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 	if (ignore_bkey || !in_wc)
 		op_modifier |= 0x2;
 
-	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
+	return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
+				port);
 }
 
 static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b06d3b1efea8..efe383c0ac86 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -1038,9 +1038,6 @@ void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db)
 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq);
 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
-int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
-		 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		 const void *in_mad, void *response_mad);
 struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
 				u32 flags, struct ib_udata *udata);
 int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 9de9abacf7f6..0257731e6d42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 #
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
-		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
+		transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
deleted file mode 100644
index 3a3b0005fd2b..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
-#include "mlx5_core.h"
-
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-		      u16 opmod, u8 port)
-{
-	int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
-	int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
-	int err = -ENOMEM;
-	void *data;
-	void *resp;
-	u32 *out;
-	u32 *in;
-
-	in = kzalloc(inlen, GFP_KERNEL);
-	out = kzalloc(outlen, GFP_KERNEL);
-	if (!in || !out)
-		goto out;
-
-	MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
-	MLX5_SET(mad_ifc_in, in, op_mod, opmod);
-	MLX5_SET(mad_ifc_in, in, port, port);
-
-	data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
-	memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
-
-	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
-	if (err)
-		goto out;
-
-	resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
-	memcpy(outb, resp,
-	       MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
-
-out:
-	kfree(out);
-	kfree(in);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_mad_ifc);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 54299251d40d..4e444863054a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -897,8 +897,6 @@ int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 u32 *out, int outlen);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
-int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
-		      u16 opmod, u8 port);
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 16118794ede91aac1a73abe15de22d3de9d2b775 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 11 Jan 2019 14:33:17 +0100
Subject: posix-cpu-timers: Remove private interval storage

Posix CPU timers store the interval in private storage for historical
reasons (it_interval used to be a non scalar representation on 32bit
systems). This is gone and there is no reason for duplicated storage
anymore.

Use it_interval everywhere.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "H.J. Lu" <hjl.tools@gmail.com>
Link: https://lkml.kernel.org/r/20190111133500.945255655@linutronix.de
---
 include/linux/posix-timers.h   |  2 +-
 kernel/time/posix-cpu-timers.c | 13 ++++++-------
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index e96581ca7c9d..b20798fc5191 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -12,7 +12,7 @@ struct siginfo;
 
 struct cpu_timer_list {
 	struct list_head entry;
-	u64 expires, incr;
+	u64 expires;
 	struct task_struct *task;
 	int firing;
 };
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80f955210861..0a426f4e3125 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -67,13 +67,13 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 	int i;
 	u64 delta, incr;
 
-	if (timer->it.cpu.incr == 0)
+	if (!timer->it_interval)
 		return;
 
 	if (now < timer->it.cpu.expires)
 		return;
 
-	incr = timer->it.cpu.incr;
+	incr = timer->it_interval;
 	delta = now + incr - timer->it.cpu.expires;
 
 	/* Don't use (incr*2 < delta), incr*2 might overflow. */
@@ -520,7 +520,7 @@ static void cpu_timer_fire(struct k_itimer *timer)
 		 */
 		wake_up_process(timer->it_process);
 		timer->it.cpu.expires = 0;
-	} else if (timer->it.cpu.incr == 0) {
+	} else if (!timer->it_interval) {
 		/*
 		 * One-shot timer.  Clear it as soon as it's fired.
 		 */
@@ -606,7 +606,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	 */
 
 	ret = 0;
-	old_incr = timer->it.cpu.incr;
+	old_incr = timer->it_interval;
 	old_expires = timer->it.cpu.expires;
 	if (unlikely(timer->it.cpu.firing)) {
 		timer->it.cpu.firing = -1;
@@ -684,8 +684,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	 * Install the new reload setting, and
 	 * set up the signal and overrun bookkeeping.
 	 */
-	timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);
-	timer->it_interval = ns_to_ktime(timer->it.cpu.incr);
+	timer->it_interval = timespec64_to_ktime(new->it_interval);
 
 	/*
 	 * This acts as a modification timestamp for the timer,
@@ -724,7 +723,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
 	/*
 	 * Easy part: convert the reload time.
 	 */
-	itp->it_interval = ns_to_timespec64(timer->it.cpu.incr);
+	itp->it_interval = ktime_to_timespec64(timer->it_interval);
 
 	if (!timer->it.cpu.expires)
 		return;
-- 
cgit v1.2.3


From 3d21ef0b49f84d3341984caafc5c658739674927 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 11 Jan 2019 15:58:39 +0100
Subject: ALSA: pcm: Suspend streams globally via device type PM ops

Until now we rely on each driver calling snd_pcm_suspend*() explicitly
at its own PM handling.  However, this can be done far more easily by
setting the PM ops to each actual snd_pcm device object.

This patch adds the device_type object for PCM stream and assigns to
each PCM stream object.  The type contains only the PM ops for system
suspend; we don't need to deal with the resume in general.

The suspend hook simply calls snd_pcm_suspend_all() for the given PCM
streams.  This implies that the PM order is correctly put, i.e. PCM is
suspended before the main (or codec) driver, which should be true in
general.  If a special ordering is needed, you'd need to adjust the
device PM order manually later.

This patch introduces a new flag, snd_pcm.no_device_suspend, too.
With this flag set, the PCM device object won't invoke
snd_pcm_suspend_all() by itself.  This is needed for ASoC who wants to
manage the PM call orders in its serialized way, and the flag is set
in soc_new_pcm() as default.

For the non-ASoC world, we can get rid of the manual snd_pcm_suspend
calls.  This will be done in the later patches.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h |  1 +
 sound/core/pcm.c    | 26 ++++++++++++++++++++++++++
 sound/soc/soc-pcm.c |  1 +
 3 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index d6bd3caf6878..04e97564949c 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -538,6 +538,7 @@ struct snd_pcm {
 	void (*private_free) (struct snd_pcm *pcm);
 	bool internal; /* pcm is for internal use only */
 	bool nonatomic; /* whole PCM operations are in non-atomic context */
+	bool no_device_suspend; /* don't invoke device PM suspend */
 #if IS_ENABLED(CONFIG_SND_PCM_OSS)
 	struct snd_pcm_oss oss;
 #endif
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 01b9d62eef14..ca1ea3cf9350 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -683,6 +683,31 @@ static inline int snd_pcm_substream_proc_done(struct snd_pcm_substream *substrea
 
 static const struct attribute_group *pcm_dev_attr_groups[];
 
+/*
+ * PM callbacks: we need to deal only with suspend here, as the resume is
+ * triggered either from user-space or the driver's resume callback
+ */
+#ifdef CONFIG_PM_SLEEP
+static int do_pcm_suspend(struct device *dev)
+{
+	struct snd_pcm_str *pstr = container_of(dev, struct snd_pcm_str, dev);
+
+	if (!pstr->pcm->no_device_suspend)
+		snd_pcm_suspend_all(pstr->pcm);
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops pcm_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(do_pcm_suspend, NULL)
+};
+
+/* device type for PCM -- basically only for passing PM callbacks */
+static const struct device_type pcm_dev_type = {
+	.name = "pcm",
+	.pm = &pcm_dev_pm_ops,
+};
+
 /**
  * snd_pcm_new_stream - create a new PCM stream
  * @pcm: the pcm instance
@@ -713,6 +738,7 @@ int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count)
 
 	snd_device_initialize(&pstr->dev, pcm->card);
 	pstr->dev.groups = pcm_dev_attr_groups;
+	pstr->dev.type = &pcm_dev_type;
 	dev_set_name(&pstr->dev, "pcmC%iD%i%c", pcm->card->number, pcm->device,
 		     stream == SNDRV_PCM_STREAM_PLAYBACK ? 'p' : 'c');
 
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 03f36e534050..485eec5be608 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -3155,6 +3155,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 	}
 
 	pcm->private_free = soc_pcm_private_free;
+	pcm->no_device_suspend = true;
 out:
 	dev_info(rtd->card->dev, "%s <-> %s mapping ok\n",
 		 (rtd->num_codecs > 1) ? "multicodec" : rtd->codec_dai->name,
-- 
cgit v1.2.3


From ce7f93e2bd6f649980846914e4a04ad6ba141fa6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 15 Jan 2019 10:54:02 +0100
Subject: ALSA: pcm: Make snd_pcm_suspend() local static

snd_pcm_suspend() is no longer called from outside, so let's make it
local static.  Also drop a superfluous NULL check there.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |  5 -----
 sound/core/pcm_native.c | 11 +++--------
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 04e97564949c..2c30c1ad1b0d 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -582,13 +582,8 @@ int snd_pcm_stop(struct snd_pcm_substream *substream, snd_pcm_state_t status);
 int snd_pcm_drain_done(struct snd_pcm_substream *substream);
 int snd_pcm_stop_xrun(struct snd_pcm_substream *substream);
 #ifdef CONFIG_PM
-int snd_pcm_suspend(struct snd_pcm_substream *substream);
 int snd_pcm_suspend_all(struct snd_pcm *pcm);
 #else
-static inline int snd_pcm_suspend(struct snd_pcm_substream *substream)
-{
-	return 0;
-}
 static inline int snd_pcm_suspend_all(struct snd_pcm *pcm)
 {
 	return 0;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 818dff1de545..26afb6b0889a 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1460,29 +1460,24 @@ static const struct action_ops snd_pcm_action_suspend = {
 	.post_action = snd_pcm_post_suspend
 };
 
-/**
+/*
  * snd_pcm_suspend - trigger SUSPEND to all linked streams
  * @substream: the PCM substream
  *
  * After this call, all streams are changed to SUSPENDED state.
  *
- * Return: Zero if successful (or @substream is %NULL), or a negative error
- * code.
+ * Return: Zero if successful, or a negative error code.
  */
-int snd_pcm_suspend(struct snd_pcm_substream *substream)
+static int snd_pcm_suspend(struct snd_pcm_substream *substream)
 {
 	int err;
 	unsigned long flags;
 
-	if (! substream)
-		return 0;
-
 	snd_pcm_stream_lock_irqsave(substream, flags);
 	err = snd_pcm_action(&snd_pcm_action_suspend, substream, 0);
 	snd_pcm_stream_unlock_irqrestore(substream, flags);
 	return err;
 }
-EXPORT_SYMBOL(snd_pcm_suspend);
 
 /**
  * snd_pcm_suspend_all - trigger SUSPEND to all substreams in the given pcm
-- 
cgit v1.2.3


From f13c274da3896da705169409ad9b41df1ed84a33 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 14 Jan 2019 09:55:23 -0800
Subject: ACPICA: Convert more ACPI errors to firmware errors

ACPICA commit f3198c12f2df9d170b3da891a180b774cfe01e59

Also adds a new firmware error function, acpi_bios_exception.

Link: https://github.com/acpica/acpica/commit/f3198c12
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/dsopcode.c  | 15 +++++++-------
 drivers/acpi/acpica/exoparg2.c  |  8 ++++----
 drivers/acpi/acpica/utxferror.c | 44 +++++++++++++++++++++++++++++++++++++++++
 include/acpi/acoutput.h         |  2 ++
 include/acpi/acpixf.h           |  6 ++++++
 5 files changed, 64 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 78f9de260d5f..a0b821031063 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -130,8 +130,8 @@ acpi_ds_init_buffer_field(u16 aml_opcode,
 		/* Must have a valid (>0) bit count */
 
 		if (bit_count == 0) {
-			ACPI_ERROR((AE_INFO,
-				    "Attempt to CreateField of length zero"));
+			ACPI_BIOS_ERROR((AE_INFO,
+					 "Attempt to CreateField of length zero"));
 			status = AE_AML_OPERAND_VALUE;
 			goto cleanup;
 		}
@@ -194,12 +194,13 @@ acpi_ds_init_buffer_field(u16 aml_opcode,
 	/* Entire field must fit within the current length of the buffer */
 
 	if ((bit_offset + bit_count) > (8 * (u32)buffer_desc->buffer.length)) {
-		ACPI_ERROR((AE_INFO,
-			    "Field [%4.4s] at bit offset/length %u/%u "
-			    "exceeds size of target Buffer (%u bits)",
-			    acpi_ut_get_node_name(result_desc), bit_offset,
-			    bit_count, 8 * (u32)buffer_desc->buffer.length));
 		status = AE_AML_BUFFER_LIMIT;
+		ACPI_BIOS_EXCEPTION((AE_INFO, status,
+				     "Field [%4.4s] at bit offset/length %u/%u "
+				     "exceeds size of target Buffer (%u bits)",
+				     acpi_ut_get_node_name(result_desc),
+				     bit_offset, bit_count,
+				     8 * (u32)buffer_desc->buffer.length));
 		goto cleanup;
 	}
 
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index 3a477566ba1b..e9509b1c9a3a 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -390,10 +390,10 @@ acpi_status acpi_ex_opcode_2A_1T_1R(struct acpi_walk_state *walk_state)
 		/* Failure means that the Index was beyond the end of the object */
 
 		if (ACPI_FAILURE(status)) {
-			ACPI_EXCEPTION((AE_INFO, status,
-					"Index (0x%X%8.8X) is beyond end of object (length 0x%X)",
-					ACPI_FORMAT_UINT64(index),
-					(u32)length));
+			ACPI_BIOS_EXCEPTION((AE_INFO, status,
+					     "Index (0x%X%8.8X) is beyond end of object (length 0x%X)",
+					     ACPI_FORMAT_UINT64(index),
+					     (u32)length));
 			goto cleanup;
 		}
 
diff --git a/drivers/acpi/acpica/utxferror.c b/drivers/acpi/acpica/utxferror.c
index 6bb85d691fcb..a1ed7fced4db 100644
--- a/drivers/acpi/acpica/utxferror.c
+++ b/drivers/acpi/acpica/utxferror.c
@@ -185,6 +185,50 @@ acpi_bios_error(const char *module_name,
 
 ACPI_EXPORT_SYMBOL(acpi_bios_error)
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_bios_exception
+ *
+ * PARAMETERS:  module_name         - Caller's module name (for error output)
+ *              line_number         - Caller's line number (for error output)
+ *              status              - Status value to be decoded/formatted
+ *              format              - Printf format string + additional args
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Print an "ACPI Firmware Error" message with module/line/version
+ *              info as well as decoded acpi_status.
+ *
+ ******************************************************************************/
+void ACPI_INTERNAL_VAR_XFACE
+acpi_bios_exception(const char *module_name,
+		    u32 line_number,
+		    acpi_status status, const char *format, ...)
+{
+	va_list arg_list;
+
+	ACPI_MSG_REDIRECT_BEGIN;
+
+	/* For AE_OK, just print the message */
+
+	if (ACPI_SUCCESS(status)) {
+		acpi_os_printf(ACPI_MSG_BIOS_ERROR);
+
+	} else {
+		acpi_os_printf(ACPI_MSG_BIOS_ERROR "%s, ",
+			       acpi_format_exception(status));
+	}
+
+	va_start(arg_list, format);
+	acpi_os_vprintf(format, arg_list);
+	ACPI_MSG_SUFFIX;
+	va_end(arg_list);
+
+	ACPI_MSG_REDIRECT_END;
+}
+
+ACPI_EXPORT_SYMBOL(acpi_bios_exception)
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_bios_warning
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 6db9a6d40c85..93a280703318 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -201,6 +201,7 @@
 #define ACPI_EXCEPTION(plist)           acpi_exception plist
 #define ACPI_ERROR(plist)               acpi_error plist
 #define ACPI_BIOS_WARNING(plist)        acpi_bios_warning plist
+#define ACPI_BIOS_EXCEPTION(plist)      acpi_bios_exception plist
 #define ACPI_BIOS_ERROR(plist)          acpi_bios_error plist
 #define ACPI_DEBUG_OBJECT(obj,l,i)      acpi_ex_do_debug_object(obj,l,i)
 
@@ -213,6 +214,7 @@
 #define ACPI_EXCEPTION(plist)
 #define ACPI_ERROR(plist)
 #define ACPI_BIOS_WARNING(plist)
+#define ACPI_BIOS_EXCEPTION(plist)
 #define ACPI_BIOS_ERROR(plist)
 #define ACPI_DEBUG_OBJECT(obj,l,i)
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 7aa38b648564..f0adda015fc7 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -903,6 +903,12 @@ ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(3)
 				acpi_bios_error(const char *module_name,
 						u32 line_number,
 						const char *format, ...))
+ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(4)
+				void ACPI_INTERNAL_VAR_XFACE
+				acpi_bios_exception(const char *module_name,
+						    u32 line_number,
+						    acpi_status status,
+						    const char *format, ...))
 ACPI_MSG_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(3)
 				void ACPI_INTERNAL_VAR_XFACE
 				acpi_bios_warning(const char *module_name,
-- 
cgit v1.2.3


From 840c02ca2215af648c781ae680d93d8aecd083b7 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 14 Jan 2019 09:55:25 -0800
Subject: ACPICA: All acpica: Update copyrights to 2019

ACPICA commit 62f4f98e941d86e41969bf2ab5a93b8dc94dc49e

The update includes userspace tool signons.

Link: https://github.com/acpica/acpica/commit/62f4f98e
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acapps.h                             | 4 ++--
 drivers/acpi/acpica/accommon.h                           | 2 +-
 drivers/acpi/acpica/acconvert.h                          | 2 +-
 drivers/acpi/acpica/acdebug.h                            | 2 +-
 drivers/acpi/acpica/acdispat.h                           | 2 +-
 drivers/acpi/acpica/acevents.h                           | 2 +-
 drivers/acpi/acpica/acglobal.h                           | 2 +-
 drivers/acpi/acpica/achware.h                            | 2 +-
 drivers/acpi/acpica/acinterp.h                           | 2 +-
 drivers/acpi/acpica/aclocal.h                            | 2 +-
 drivers/acpi/acpica/acmacros.h                           | 2 +-
 drivers/acpi/acpica/acnamesp.h                           | 2 +-
 drivers/acpi/acpica/acobject.h                           | 2 +-
 drivers/acpi/acpica/acopcode.h                           | 2 +-
 drivers/acpi/acpica/acparser.h                           | 2 +-
 drivers/acpi/acpica/acpredef.h                           | 2 +-
 drivers/acpi/acpica/acresrc.h                            | 2 +-
 drivers/acpi/acpica/acstruct.h                           | 2 +-
 drivers/acpi/acpica/actables.h                           | 2 +-
 drivers/acpi/acpica/acutils.h                            | 2 +-
 drivers/acpi/acpica/amlcode.h                            | 2 +-
 drivers/acpi/acpica/amlresrc.h                           | 2 +-
 drivers/acpi/acpica/dbhistry.c                           | 2 +-
 drivers/acpi/acpica/dsargs.c                             | 2 +-
 drivers/acpi/acpica/dscontrol.c                          | 2 +-
 drivers/acpi/acpica/dsdebug.c                            | 2 +-
 drivers/acpi/acpica/dsfield.c                            | 2 +-
 drivers/acpi/acpica/dsinit.c                             | 2 +-
 drivers/acpi/acpica/dsmethod.c                           | 2 +-
 drivers/acpi/acpica/dsobject.c                           | 2 +-
 drivers/acpi/acpica/dsopcode.c                           | 2 +-
 drivers/acpi/acpica/dspkginit.c                          | 2 +-
 drivers/acpi/acpica/dswexec.c                            | 2 +-
 drivers/acpi/acpica/dswload.c                            | 2 +-
 drivers/acpi/acpica/dswload2.c                           | 2 +-
 drivers/acpi/acpica/dswscope.c                           | 2 +-
 drivers/acpi/acpica/dswstate.c                           | 2 +-
 drivers/acpi/acpica/evevent.c                            | 2 +-
 drivers/acpi/acpica/evglock.c                            | 2 +-
 drivers/acpi/acpica/evgpe.c                              | 2 +-
 drivers/acpi/acpica/evgpeblk.c                           | 2 +-
 drivers/acpi/acpica/evgpeinit.c                          | 2 +-
 drivers/acpi/acpica/evgpeutil.c                          | 2 +-
 drivers/acpi/acpica/evhandler.c                          | 2 +-
 drivers/acpi/acpica/evmisc.c                             | 2 +-
 drivers/acpi/acpica/evregion.c                           | 2 +-
 drivers/acpi/acpica/evrgnini.c                           | 2 +-
 drivers/acpi/acpica/evxface.c                            | 2 +-
 drivers/acpi/acpica/evxfevnt.c                           | 2 +-
 drivers/acpi/acpica/evxfgpe.c                            | 2 +-
 drivers/acpi/acpica/evxfregn.c                           | 2 +-
 drivers/acpi/acpica/exconcat.c                           | 2 +-
 drivers/acpi/acpica/exconfig.c                           | 2 +-
 drivers/acpi/acpica/exconvrt.c                           | 2 +-
 drivers/acpi/acpica/excreate.c                           | 2 +-
 drivers/acpi/acpica/exdebug.c                            | 2 +-
 drivers/acpi/acpica/exdump.c                             | 2 +-
 drivers/acpi/acpica/exfield.c                            | 2 +-
 drivers/acpi/acpica/exfldio.c                            | 2 +-
 drivers/acpi/acpica/exmisc.c                             | 2 +-
 drivers/acpi/acpica/exmutex.c                            | 2 +-
 drivers/acpi/acpica/exnames.c                            | 2 +-
 drivers/acpi/acpica/exoparg1.c                           | 2 +-
 drivers/acpi/acpica/exoparg2.c                           | 2 +-
 drivers/acpi/acpica/exoparg3.c                           | 2 +-
 drivers/acpi/acpica/exoparg6.c                           | 2 +-
 drivers/acpi/acpica/exprep.c                             | 2 +-
 drivers/acpi/acpica/exregion.c                           | 2 +-
 drivers/acpi/acpica/exresnte.c                           | 2 +-
 drivers/acpi/acpica/exresolv.c                           | 2 +-
 drivers/acpi/acpica/exresop.c                            | 2 +-
 drivers/acpi/acpica/exserial.c                           | 2 +-
 drivers/acpi/acpica/exstore.c                            | 2 +-
 drivers/acpi/acpica/exstoren.c                           | 2 +-
 drivers/acpi/acpica/exstorob.c                           | 2 +-
 drivers/acpi/acpica/exsystem.c                           | 2 +-
 drivers/acpi/acpica/extrace.c                            | 2 +-
 drivers/acpi/acpica/exutils.c                            | 2 +-
 drivers/acpi/acpica/hwacpi.c                             | 2 +-
 drivers/acpi/acpica/hwesleep.c                           | 2 +-
 drivers/acpi/acpica/hwgpe.c                              | 2 +-
 drivers/acpi/acpica/hwsleep.c                            | 2 +-
 drivers/acpi/acpica/hwtimer.c                            | 2 +-
 drivers/acpi/acpica/hwvalid.c                            | 2 +-
 drivers/acpi/acpica/hwxface.c                            | 2 +-
 drivers/acpi/acpica/hwxfsleep.c                          | 2 +-
 drivers/acpi/acpica/nsarguments.c                        | 2 +-
 drivers/acpi/acpica/nsconvert.c                          | 2 +-
 drivers/acpi/acpica/nsdump.c                             | 2 +-
 drivers/acpi/acpica/nsdumpdv.c                           | 2 +-
 drivers/acpi/acpica/nsinit.c                             | 2 +-
 drivers/acpi/acpica/nsload.c                             | 2 +-
 drivers/acpi/acpica/nsparse.c                            | 2 +-
 drivers/acpi/acpica/nspredef.c                           | 2 +-
 drivers/acpi/acpica/nsprepkg.c                           | 2 +-
 drivers/acpi/acpica/nsrepair.c                           | 2 +-
 drivers/acpi/acpica/nsrepair2.c                          | 2 +-
 drivers/acpi/acpica/nsutils.c                            | 2 +-
 drivers/acpi/acpica/nswalk.c                             | 2 +-
 drivers/acpi/acpica/nsxfname.c                           | 2 +-
 drivers/acpi/acpica/psargs.c                             | 2 +-
 drivers/acpi/acpica/psloop.c                             | 2 +-
 drivers/acpi/acpica/psobject.c                           | 2 +-
 drivers/acpi/acpica/psopcode.c                           | 2 +-
 drivers/acpi/acpica/psopinfo.c                           | 2 +-
 drivers/acpi/acpica/psparse.c                            | 2 +-
 drivers/acpi/acpica/psscope.c                            | 2 +-
 drivers/acpi/acpica/pstree.c                             | 2 +-
 drivers/acpi/acpica/psutils.c                            | 2 +-
 drivers/acpi/acpica/pswalk.c                             | 2 +-
 drivers/acpi/acpica/psxface.c                            | 2 +-
 drivers/acpi/acpica/tbdata.c                             | 2 +-
 drivers/acpi/acpica/tbfadt.c                             | 2 +-
 drivers/acpi/acpica/tbfind.c                             | 2 +-
 drivers/acpi/acpica/tbinstal.c                           | 2 +-
 drivers/acpi/acpica/tbprint.c                            | 2 +-
 drivers/acpi/acpica/tbutils.c                            | 2 +-
 drivers/acpi/acpica/tbxface.c                            | 2 +-
 drivers/acpi/acpica/tbxfload.c                           | 2 +-
 drivers/acpi/acpica/tbxfroot.c                           | 2 +-
 drivers/acpi/acpica/utaddress.c                          | 2 +-
 drivers/acpi/acpica/utalloc.c                            | 2 +-
 drivers/acpi/acpica/utascii.c                            | 2 +-
 drivers/acpi/acpica/utbuffer.c                           | 2 +-
 drivers/acpi/acpica/utcache.c                            | 2 +-
 drivers/acpi/acpica/utcopy.c                             | 2 +-
 drivers/acpi/acpica/utdebug.c                            | 2 +-
 drivers/acpi/acpica/utdecode.c                           | 2 +-
 drivers/acpi/acpica/uteval.c                             | 2 +-
 drivers/acpi/acpica/utglobal.c                           | 2 +-
 drivers/acpi/acpica/uthex.c                              | 2 +-
 drivers/acpi/acpica/utids.c                              | 2 +-
 drivers/acpi/acpica/utinit.c                             | 2 +-
 drivers/acpi/acpica/utlock.c                             | 2 +-
 drivers/acpi/acpica/utobject.c                           | 2 +-
 drivers/acpi/acpica/utosi.c                              | 2 +-
 drivers/acpi/acpica/utpredef.c                           | 2 +-
 drivers/acpi/acpica/utprint.c                            | 2 +-
 drivers/acpi/acpica/uttrack.c                            | 2 +-
 drivers/acpi/acpica/utuuid.c                             | 2 +-
 drivers/acpi/acpica/utxface.c                            | 2 +-
 drivers/acpi/acpica/utxfinit.c                           | 2 +-
 include/acpi/acbuffer.h                                  | 2 +-
 include/acpi/acconfig.h                                  | 2 +-
 include/acpi/acexcep.h                                   | 2 +-
 include/acpi/acnames.h                                   | 2 +-
 include/acpi/acoutput.h                                  | 2 +-
 include/acpi/acpi.h                                      | 2 +-
 include/acpi/acpiosxf.h                                  | 2 +-
 include/acpi/acpixf.h                                    | 2 +-
 include/acpi/acrestyp.h                                  | 2 +-
 include/acpi/actbl.h                                     | 2 +-
 include/acpi/actbl1.h                                    | 2 +-
 include/acpi/actbl2.h                                    | 2 +-
 include/acpi/actbl3.h                                    | 2 +-
 include/acpi/actypes.h                                   | 2 +-
 include/acpi/acuuid.h                                    | 2 +-
 include/acpi/platform/acenv.h                            | 2 +-
 include/acpi/platform/acenvex.h                          | 2 +-
 include/acpi/platform/acgcc.h                            | 2 +-
 include/acpi/platform/acgccex.h                          | 2 +-
 include/acpi/platform/acintel.h                          | 2 +-
 include/acpi/platform/aclinux.h                          | 2 +-
 include/acpi/platform/aclinuxex.h                        | 2 +-
 tools/power/acpi/common/cmfsize.c                        | 2 +-
 tools/power/acpi/common/getopt.c                         | 2 +-
 tools/power/acpi/os_specific/service_layers/oslinuxtbl.c | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixdir.c  | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixmap.c  | 2 +-
 tools/power/acpi/os_specific/service_layers/osunixxf.c   | 2 +-
 tools/power/acpi/tools/acpidump/acpidump.h               | 2 +-
 tools/power/acpi/tools/acpidump/apdump.c                 | 2 +-
 tools/power/acpi/tools/acpidump/apfiles.c                | 2 +-
 tools/power/acpi/tools/acpidump/apmain.c                 | 2 +-
 174 files changed, 175 insertions(+), 175 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index 5a9c2febc0fb..863ade9add6d 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
@@ -17,7 +17,7 @@
 /* Common info for tool signons */
 
 #define ACPICA_NAME                 "Intel ACPI Component Architecture"
-#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2018 Intel Corporation"
+#define ACPICA_COPYRIGHT            "Copyright (c) 2000 - 2019 Intel Corporation"
 
 #if ACPI_MACHINE_WIDTH == 64
 #define ACPI_WIDTH          " (64-bit version)"
diff --git a/drivers/acpi/acpica/accommon.h b/drivers/acpi/acpica/accommon.h
index 8bc935977d8e..54f81eac7ec9 100644
--- a/drivers/acpi/acpica/accommon.h
+++ b/drivers/acpi/acpica/accommon.h
@@ -3,7 +3,7 @@
  *
  * Name: accommon.h - Common include files for generation of ACPICA source
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acconvert.h b/drivers/acpi/acpica/acconvert.h
index 4ebe18826646..d5478cd4a857 100644
--- a/drivers/acpi/acpica/acconvert.h
+++ b/drivers/acpi/acpica/acconvert.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acapps - common include for ACPI applications/tools
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index 57d9495e5933..3a339045b1bf 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -3,7 +3,7 @@
  *
  * Name: acdebug.h - ACPI/AML debugger
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acdispat.h b/drivers/acpi/acpica/acdispat.h
index e577f3a40e6a..82f81501566b 100644
--- a/drivers/acpi/acpica/acdispat.h
+++ b/drivers/acpi/acpica/acdispat.h
@@ -3,7 +3,7 @@
  *
  * Name: acdispat.h - dispatcher (parser to interpreter interface)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index b412aa909907..831660179662 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -3,7 +3,7 @@
  *
  * Name: acevents.h - Event subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 7a0aeb70ec34..d056a1845613 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -3,7 +3,7 @@
  *
  * Name: acglobal.h - Declarations for global variables
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index ef99e2fc37f8..bcf8f7501db7 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -3,7 +3,7 @@
  *
  * Name: achware.h -- hardware specific interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index c5b2be0b6613..20706adbc148 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -3,7 +3,7 @@
  *
  * Name: acinterp.h - Interpreter subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 99b0da899109..18ec6627d674 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -3,7 +3,7 @@
  *
  * Name: aclocal.h - Internal data types used across the ACPI subsystem
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index de52cd6e868a..99131cd4e389 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -3,7 +3,7 @@
  *
  * Name: acmacros.h - C macros for the entire subsystem.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 9bd25f36c608..39812fc4386a 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -3,7 +3,7 @@
  *
  * Name: acnamesp.h - Namespace subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index ac992b6ebce9..0edf8fca7567 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -3,7 +3,7 @@
  *
  * Name: acobject.h - Definition of union acpi_operand_object  (Internal object only)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acopcode.h b/drivers/acpi/acpica/acopcode.h
index 818eba413614..9d78134428e3 100644
--- a/drivers/acpi/acpica/acopcode.h
+++ b/drivers/acpi/acpica/acopcode.h
@@ -3,7 +3,7 @@
  *
  * Name: acopcode.h - AML opcode information for the AML parser and interpreter
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index ab48196ae55e..6e32c97cba6c 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acparser.h - AML Parser subcomponent prototypes and defines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acpredef.h b/drivers/acpi/acpica/acpredef.h
index d31bb04facb6..cc96b40032b3 100644
--- a/drivers/acpi/acpica/acpredef.h
+++ b/drivers/acpi/acpica/acpredef.h
@@ -3,7 +3,7 @@
  *
  * Name: acpredef - Information table for ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acresrc.h b/drivers/acpi/acpica/acresrc.h
index 59ae8b1a6e40..422cd8f2b92e 100644
--- a/drivers/acpi/acpica/acresrc.h
+++ b/drivers/acpi/acpica/acresrc.h
@@ -3,7 +3,7 @@
  *
  * Name: acresrc.h - Resource Manager function prototypes
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index 14be32961b4c..8a4e6b4aaf2c 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
@@ -3,7 +3,7 @@
  *
  * Name: acstruct.h - Internal structs
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index 12fac33ce77e..dfbf1dbd4033 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -3,7 +3,7 @@
  *
  * Name: actables.h - ACPI table management
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 3374d41582b5..9022537567e9 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -3,7 +3,7 @@
  *
  * Name: acutils.h -- prototypes for the common (subsystem-wide) procedures
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h
index 6c05355447c1..49e412edd7c6 100644
--- a/drivers/acpi/acpica/amlcode.h
+++ b/drivers/acpi/acpica/amlcode.h
@@ -5,7 +5,7 @@
  *                   Declarations and definitions contained herein are derived
  *                   directly from the ACPI specification.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/amlresrc.h b/drivers/acpi/acpica/amlresrc.h
index cdb590176e9d..7c3bd4ab60fc 100644
--- a/drivers/acpi/acpica/amlresrc.h
+++ b/drivers/acpi/acpica/amlresrc.h
@@ -3,7 +3,7 @@
  *
  * Module Name: amlresrc.h - AML resource descriptors
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dbhistry.c b/drivers/acpi/acpica/dbhistry.c
index b0b9a26c7db5..7809bd94a18d 100644
--- a/drivers/acpi/acpica/dbhistry.c
+++ b/drivers/acpi/acpica/dbhistry.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dbhistry - debugger HISTORY command
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsargs.c b/drivers/acpi/acpica/dsargs.c
index 6b15625e8099..85b34d02233e 100644
--- a/drivers/acpi/acpica/dsargs.c
+++ b/drivers/acpi/acpica/dsargs.c
@@ -4,7 +4,7 @@
  * Module Name: dsargs - Support for execution of dynamic arguments for static
  *                       objects (regions, fields, buffer fields, etc.)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dscontrol.c b/drivers/acpi/acpica/dscontrol.c
index 0da96268deb5..4847f89c678c 100644
--- a/drivers/acpi/acpica/dscontrol.c
+++ b/drivers/acpi/acpica/dscontrol.c
@@ -4,7 +4,7 @@
  * Module Name: dscontrol - Support for execution control opcodes -
  *                          if/else/while/return
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsdebug.c b/drivers/acpi/acpica/dsdebug.c
index 9d33f0bb2885..0d3e1ced1f57 100644
--- a/drivers/acpi/acpica/dsdebug.c
+++ b/drivers/acpi/acpica/dsdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsdebug - Parser/Interpreter interface - debugging
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c
index 30fe89545d6a..d51216c9d891 100644
--- a/drivers/acpi/acpica/dsfield.c
+++ b/drivers/acpi/acpica/dsfield.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsfield - Dispatcher field routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c
index e8de1b0ce2f5..a4a24ffe5fae 100644
--- a/drivers/acpi/acpica/dsinit.c
+++ b/drivers/acpi/acpica/dsinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsinit - Object initialization namespace walk
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index c1a4d02fafd5..f59b4d944f7f 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsmethod - Parser/Interpreter interface - control method parsing
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsobject.c b/drivers/acpi/acpica/dsobject.c
index 6a9cc613adaa..179129a2deb1 100644
--- a/drivers/acpi/acpica/dsobject.c
+++ b/drivers/acpi/acpica/dsobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsobject - Dispatcher object management routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index a0b821031063..cab14713e591 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dsopcode - Dispatcher support for regions and fields
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dspkginit.c b/drivers/acpi/acpica/dspkginit.c
index 584853385268..997faa10f615 100644
--- a/drivers/acpi/acpica/dspkginit.c
+++ b/drivers/acpi/acpica/dspkginit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dspkginit - Completion of deferred package initialization
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 1504b93cc5f4..d75aae304595 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -4,7 +4,7 @@
  * Module Name: dswexec - Dispatcher method execution callbacks;
  *                        dispatch to interpreter.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c
index e2ef09643d50..c88fd31208a5 100644
--- a/drivers/acpi/acpica/dswload.c
+++ b/drivers/acpi/acpica/dswload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload - Dispatcher first pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 9a309f5c4de8..98cdc24c640f 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswload2 - Dispatcher second pass namespace load callbacks
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswscope.c b/drivers/acpi/acpica/dswscope.c
index 7592176a8fa2..39acf7b286da 100644
--- a/drivers/acpi/acpica/dswscope.c
+++ b/drivers/acpi/acpica/dswscope.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswscope - Scope stack manipulation
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/dswstate.c b/drivers/acpi/acpica/dswstate.c
index 4c1ec202d5ab..de79f835a373 100644
--- a/drivers/acpi/acpica/dswstate.c
+++ b/drivers/acpi/acpica/dswstate.c
@@ -3,7 +3,7 @@
  *
  * Module Name: dswstate - Dispatcher parse tree walk management routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evevent.c b/drivers/acpi/acpica/evevent.c
index b3d07cc14d75..9e2f5a05c066 100644
--- a/drivers/acpi/acpica/evevent.c
+++ b/drivers/acpi/acpica/evevent.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evevent - Fixed Event handling and dispatch
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c
index 1b8a662a14a9..5c77bee5d31f 100644
--- a/drivers/acpi/acpica/evglock.c
+++ b/drivers/acpi/acpica/evglock.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evglock - Global Lock support
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index e10fec99a182..a5d435849d0d 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpe - General Purpose Event handling and dispatch
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index b253063b09d3..328d1d6123ad 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeblk - GPE block creation and initialization.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 1f686750bb1a..c92d2f6ebe01 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeinit - System GPE initialization and update
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evgpeutil.c b/drivers/acpi/acpica/evgpeutil.c
index 0fb6c70f44ed..917892227e09 100644
--- a/drivers/acpi/acpica/evgpeutil.c
+++ b/drivers/acpi/acpica/evgpeutil.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evgpeutil - GPE utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c
index 4ed1e67db6be..3ef4e27995f0 100644
--- a/drivers/acpi/acpica/evhandler.c
+++ b/drivers/acpi/acpica/evhandler.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evhandler - Support for Address Space handlers
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c
index baadd635b5af..d45f7639f7ee 100644
--- a/drivers/acpi/acpica/evmisc.c
+++ b/drivers/acpi/acpica/evmisc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evmisc - Miscellaneous event manager support functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index 49decca4e08f..b683b3ad6baa 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evregion - Operation Region support
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 17df5dacd43c..163dd4a142eb 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evrgnini- ACPI address_space (op_region) init
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxface.c b/drivers/acpi/acpica/evxface.c
index febc332b00ac..3df00eb6621b 100644
--- a/drivers/acpi/acpica/evxface.c
+++ b/drivers/acpi/acpica/evxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxface - External interfaces for ACPI events
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfevnt.c b/drivers/acpi/acpica/evxfevnt.c
index 970e940bdb17..e528fe56b755 100644
--- a/drivers/acpi/acpica/evxfevnt.c
+++ b/drivers/acpi/acpica/evxfevnt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfevnt - External Interfaces, ACPI event disable/enable
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index b2d5f66cc1b0..fb1d215dd288 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: evxfgpe - External Interfaces for General Purpose Events (GPEs)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index 3b3a25d9f0e6..47265b073e6f 100644
--- a/drivers/acpi/acpica/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c
@@ -4,7 +4,7 @@
  * Module Name: evxfregn - External Interfaces, ACPI Operation Regions and
  *                         Address Spaces.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
index 5e75c510ca25..c7af07566b7b 100644
--- a/drivers/acpi/acpica/exconcat.c
+++ b/drivers/acpi/acpica/exconcat.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconcat - Concatenate-type AML operators
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index 2373a7492151..587aeeeb5070 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconfig - Namespace reconfiguration (Load/Unload opcodes)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index 1a70b80cc406..f5675f1b1928 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exconvrt - Object conversion routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c
index 3304c6b1e8a7..f376fc00064e 100644
--- a/drivers/acpi/acpica/excreate.c
+++ b/drivers/acpi/acpica/excreate.c
@@ -3,7 +3,7 @@
  *
  * Module Name: excreate - Named object creation
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exdebug.c b/drivers/acpi/acpica/exdebug.c
index ebbc244039ab..b1aeec8cac55 100644
--- a/drivers/acpi/acpica/exdebug.c
+++ b/drivers/acpi/acpica/exdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exdebug - Support for stores to the AML Debug Object
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index f71dfa1e90e1..6526b2deeaad 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exdump - Interpreter debug output routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index e5798f15793a..7dcdde094582 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exfield - AML execution - field_unit read/write
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 516994133128..95a0dcb4f7b9 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exfldio - Aml Field I/O
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index d91f15cdf3ae..60e854965af9 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exmisc - ACPI AML (p-code) execution - specific opcodes
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exmutex.c b/drivers/acpi/acpica/exmutex.c
index c06079774bad..775cd62af5b3 100644
--- a/drivers/acpi/acpica/exmutex.c
+++ b/drivers/acpi/acpica/exmutex.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exmutex - ASL Mutex Acquire/Release functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exnames.c b/drivers/acpi/acpica/exnames.c
index 7eed79dcda83..bd68d66e89f0 100644
--- a/drivers/acpi/acpica/exnames.c
+++ b/drivers/acpi/acpica/exnames.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exnames - interpreter/scanner name load/execute
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index ba9fbae0cf91..06e35ea09823 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg1 - AML execution - opcodes with 1 argument
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg2.c b/drivers/acpi/acpica/exoparg2.c
index e9509b1c9a3a..5e4a31a11df4 100644
--- a/drivers/acpi/acpica/exoparg2.c
+++ b/drivers/acpi/acpica/exoparg2.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg2 - AML execution - opcodes with 2 arguments
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg3.c b/drivers/acpi/acpica/exoparg3.c
index 764fa6f924ff..a4ebce417930 100644
--- a/drivers/acpi/acpica/exoparg3.c
+++ b/drivers/acpi/acpica/exoparg3.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg3 - AML execution - opcodes with 3 arguments
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exoparg6.c b/drivers/acpi/acpica/exoparg6.c
index 3941525f3d6b..31385a0b2dab 100644
--- a/drivers/acpi/acpica/exoparg6.c
+++ b/drivers/acpi/acpica/exoparg6.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exoparg6 - AML execution - opcodes with 6 arguments
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exprep.c b/drivers/acpi/acpica/exprep.c
index 738f3c732363..728d752f7adc 100644
--- a/drivers/acpi/acpica/exprep.c
+++ b/drivers/acpi/acpica/exprep.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exprep - ACPI AML field prep utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c
index 2c58f5e00b1a..c08521194b29 100644
--- a/drivers/acpi/acpica/exregion.c
+++ b/drivers/acpi/acpica/exregion.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exregion - ACPI default op_region (address space) handlers
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresnte.c b/drivers/acpi/acpica/exresnte.c
index ea4b0fe674f1..b223d01e6bf8 100644
--- a/drivers/acpi/acpica/exresnte.c
+++ b/drivers/acpi/acpica/exresnte.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresnte - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresolv.c b/drivers/acpi/acpica/exresolv.c
index 5e42c7de46fa..36da5c0ef69c 100644
--- a/drivers/acpi/acpica/exresolv.c
+++ b/drivers/acpi/acpica/exresolv.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresolv - AML Interpreter object resolution
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index d94190bc5985..bdfe4d33b483 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exresop - AML Interpreter operand/object resolution
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index ec61553c4483..89bc0a2d8163 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exserial - field_unit support for serial address spaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstore.c b/drivers/acpi/acpica/exstore.c
index 75d5665b7b2f..7f3c3571c292 100644
--- a/drivers/acpi/acpica/exstore.c
+++ b/drivers/acpi/acpica/exstore.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exstore - AML Interpreter object store support
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstoren.c b/drivers/acpi/acpica/exstoren.c
index 31cba19652ed..4e43c8277f07 100644
--- a/drivers/acpi/acpica/exstoren.c
+++ b/drivers/acpi/acpica/exstoren.c
@@ -4,7 +4,7 @@
  * Module Name: exstoren - AML Interpreter object store support,
  *                        Store to Node (namespace object)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exstorob.c b/drivers/acpi/acpica/exstorob.c
index 4cd82ff509bc..dc9e2b1c1ad9 100644
--- a/drivers/acpi/acpica/exstorob.c
+++ b/drivers/acpi/acpica/exstorob.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exstorob - AML object store support, store to object
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exsystem.c b/drivers/acpi/acpica/exsystem.c
index ec8b5a22cad4..a538f7799b78 100644
--- a/drivers/acpi/acpica/exsystem.c
+++ b/drivers/acpi/acpica/exsystem.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exsystem - Interface to OS services
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/extrace.c b/drivers/acpi/acpica/extrace.c
index 9bd3fa56b51a..db7f93ca539f 100644
--- a/drivers/acpi/acpica/extrace.c
+++ b/drivers/acpi/acpica/extrace.c
@@ -3,7 +3,7 @@
  *
  * Module Name: extrace - Support for interpreter execution tracing
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index bd22e27adf9b..a1f26b0ad449 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: exutils - interpreter/scanner utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwacpi.c b/drivers/acpi/acpica/hwacpi.c
index 525e6ea5c114..926f7e080f22 100644
--- a/drivers/acpi/acpica/hwacpi.c
+++ b/drivers/acpi/acpica/hwacpi.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwacpi - ACPI Hardware Initialization/Mode Interface
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c
index e0ad3f11142e..dee3affaca49 100644
--- a/drivers/acpi/acpica/hwesleep.c
+++ b/drivers/acpi/acpica/hwesleep.c
@@ -4,7 +4,7 @@
  * Name: hwesleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                    extended FADT-V5 sleep registers.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index 2d2e2e41a685..565bd3f29f31 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwgpe - Low level GPE enable/disable/clear functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index d8b8fc2ff563..b62db8ec446f 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -4,7 +4,7 @@
  * Name: hwsleep.c - ACPI Hardware Sleep/Wake Support functions for the
  *                   original/legacy sleep/PM registers.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index 5d5e27146fc2..2fb9f75d71c5 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -3,7 +3,7 @@
  *
  * Name: hwtimer.c - ACPI Power Management Timer Interface
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c
index 24f9b61aa404..cd576153257c 100644
--- a/drivers/acpi/acpica/hwvalid.c
+++ b/drivers/acpi/acpica/hwvalid.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwvalid - I/O request validation
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 6e39a771a56e..c4fd97104024 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: hwxface - Public ACPICA hardware interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c
index 3f22f7dd4556..669fa73dd794 100644
--- a/drivers/acpi/acpica/hwxfsleep.c
+++ b/drivers/acpi/acpica/hwxfsleep.c
@@ -3,7 +3,7 @@
  *
  * Name: hwxfsleep.c - ACPI Hardware Sleep/Wake External Interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsarguments.c b/drivers/acpi/acpica/nsarguments.c
index b9ede797b654..0e97ed38973f 100644
--- a/drivers/acpi/acpica/nsarguments.c
+++ b/drivers/acpi/acpica/nsarguments.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsarguments - Validation of args for ACPI predefined methods
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index f9527346b0f7..14cbf63f1991 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -4,7 +4,7 @@
  * Module Name: nsconvert - Object conversions for objects returned by
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index 90ccffcd770b..15070bd0c28a 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 2b291c500fb0..73e5c83c8c9f 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsdump - table dumping routines for debug
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c
index d77257d1c827..19fb8dda870f 100644
--- a/drivers/acpi/acpica/nsinit.c
+++ b/drivers/acpi/acpica/nsinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsinit - namespace initialization
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 04bc73e82aed..6b90aa89f942 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsload - namespace loading/expanding/contracting procedures
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index 488ff39d86f7..bfa408f7fd39 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsparse - namespace interface to AML parser
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c
index 29c68b15a64f..2f9d93122d0c 100644
--- a/drivers/acpi/acpica/nspredef.c
+++ b/drivers/acpi/acpica/nspredef.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nspredef - Validation of ACPI predefined methods and objects
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsprepkg.c b/drivers/acpi/acpica/nsprepkg.c
index 51523473e7fe..9a80e3b23496 100644
--- a/drivers/acpi/acpica/nsprepkg.c
+++ b/drivers/acpi/acpica/nsprepkg.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsprepkg - Validation of package objects for predefined names
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c
index ff2ab8fbec38..0aacfa48e20d 100644
--- a/drivers/acpi/acpica/nsrepair.c
+++ b/drivers/acpi/acpica/nsrepair.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nsrepair - Repair for objects returned by predefined methods
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c
index a3bd6280882c..d5804a6d1d65 100644
--- a/drivers/acpi/acpica/nsrepair2.c
+++ b/drivers/acpi/acpica/nsrepair2.c
@@ -4,7 +4,7 @@
  * Module Name: nsrepair2 - Repair for objects returned by specific
  *                          predefined methods
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index a2bf4b2caa6c..0c5662c4168c 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -4,7 +4,7 @@
  * Module Name: nsutils - Utilities for accessing ACPI namespace, accessing
  *                        parents and siblings and Scope manipulation
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nswalk.c b/drivers/acpi/acpica/nswalk.c
index e9a061da9bb2..ceea6af79d12 100644
--- a/drivers/acpi/acpica/nswalk.c
+++ b/drivers/acpi/acpica/nswalk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: nswalk - Functions for walking the ACPI namespace
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index b2915c9cceaf..de2d3135d6a9 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -4,7 +4,7 @@
  * Module Name: nsxfname - Public interfaces to the ACPI subsystem
  *                         ACPI Namespace oriented interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psargs.c b/drivers/acpi/acpica/psargs.c
index 176d28d60125..9d9d442cd999 100644
--- a/drivers/acpi/acpica/psargs.c
+++ b/drivers/acpi/acpica/psargs.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psargs - Parse AML opcode arguments
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index e00d1af6fa80..4796f17805ac 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psloop - Main AML parse loop
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c
index e1fd819a2955..98e5c7400e54 100644
--- a/drivers/acpi/acpica/psobject.c
+++ b/drivers/acpi/acpica/psobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psobject - Support for parse objects
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psopcode.c b/drivers/acpi/acpica/psopcode.c
index 8d7dc98bad17..43775c5ce17c 100644
--- a/drivers/acpi/acpica/psopcode.c
+++ b/drivers/acpi/acpica/psopcode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psopcode - Parser/Interpreter opcode information table
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psopinfo.c b/drivers/acpi/acpica/psopinfo.c
index f310954eea59..15e7563829f1 100644
--- a/drivers/acpi/acpica/psopinfo.c
+++ b/drivers/acpi/acpica/psopinfo.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psopinfo - AML opcode information functions and dispatch tables
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 65603473b6cb..3f381ffd9581 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psparse - Parser top level AML parse routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psscope.c b/drivers/acpi/acpica/psscope.c
index 00c67bc249aa..f153ca804740 100644
--- a/drivers/acpi/acpica/psscope.c
+++ b/drivers/acpi/acpica/psscope.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psscope - Parser scope stack management routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/pstree.c b/drivers/acpi/acpica/pstree.c
index 64a8329a17f1..22d8a2becdd0 100644
--- a/drivers/acpi/acpica/pstree.c
+++ b/drivers/acpi/acpica/pstree.c
@@ -3,7 +3,7 @@
  *
  * Module Name: pstree - Parser op tree manipulation/traversal/search
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psutils.c b/drivers/acpi/acpica/psutils.c
index ef8a5805a836..2512f584fa3c 100644
--- a/drivers/acpi/acpica/psutils.c
+++ b/drivers/acpi/acpica/psutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psutils - Parser miscellaneous utilities (Parser only)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/pswalk.c b/drivers/acpi/acpica/pswalk.c
index bd6af8c87d48..cf91841297c2 100644
--- a/drivers/acpi/acpica/pswalk.c
+++ b/drivers/acpi/acpica/pswalk.c
@@ -3,7 +3,7 @@
  *
  * Module Name: pswalk - Parser routines to walk parsed op tree(s)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index 5743b22399a0..ee2ee2c858f2 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: psxface - Parser external interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 862149c8a208..0cecd0039acf 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbdata - Table manager data structure functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 99d325a51816..2c67351d5a79 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfadt   - FADT table utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index f00694b1d000..951bd8e1c50a 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbfind   - find table
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 5f8e7b561c90..be6642bf6366 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbinstal - ACPI table installation and removal
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c
index e303418a895b..9b5df95d881b 100644
--- a/drivers/acpi/acpica/tbprint.c
+++ b/drivers/acpi/acpica/tbprint.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbprint - Table output utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index b526096560b5..2469e01310e2 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbutils - ACPI Table utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index e4d0dc8948cd..593c20c20104 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxface - ACPI table-oriented external interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 9011297552af..2c573fcc8c70 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfload - Table load/unload external interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index 483d0ce5180a..e2859d09ca2e 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -3,7 +3,7 @@
  *
  * Module Name: tbxfroot - Find the root ACPI table (RSDT)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index dbabe680ff58..bb260376bd59 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utaddress - op_region address range check
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utalloc.c b/drivers/acpi/acpica/utalloc.c
index 8cbcd7d6bd5e..d64da4d9e8d0 100644
--- a/drivers/acpi/acpica/utalloc.c
+++ b/drivers/acpi/acpica/utalloc.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utalloc - local memory allocation routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utascii.c b/drivers/acpi/acpica/utascii.c
index 04ff61e284f5..79d7426fd7bf 100644
--- a/drivers/acpi/acpica/utascii.c
+++ b/drivers/acpi/acpica/utascii.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utascii - Utility ascii functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index fffa6f5ae59e..61db9967ebe4 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utbuffer - Buffer dump routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utcache.c b/drivers/acpi/acpica/utcache.c
index 97d6ec174c28..8533fce7fa93 100644
--- a/drivers/acpi/acpica/utcache.c
+++ b/drivers/acpi/acpica/utcache.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utcache - local cache allocation routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utcopy.c b/drivers/acpi/acpica/utcopy.c
index a872ed7879ca..1fb8327f3c3b 100644
--- a/drivers/acpi/acpica/utcopy.c
+++ b/drivers/acpi/acpica/utcopy.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utcopy - Internal to external object translation utilities
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index aabdc25effd9..01b1b36c8a8e 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utdebug - Debug print/trace routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index d0a375c3f4e2..6be69c28c34e 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utdecode - Utility decoding routines (value-to-string)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index c56ae6e058d5..558a9f3b0678 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uteval - Object evaluation
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index f8c5b49344df..b0622ec4bb85 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utglobal - Global variables for the ACPI subsystem
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uthex.c b/drivers/acpi/acpica/uthex.c
index 3d63a9e8da4f..b6da135d5f41 100644
--- a/drivers/acpi/acpica/uthex.c
+++ b/drivers/acpi/acpica/uthex.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uthex -- Hex/ASCII support functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
index 70e6bf1107a1..e805abdd95b8 100644
--- a/drivers/acpi/acpica/utids.c
+++ b/drivers/acpi/acpica/utids.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utids - support for device Ids - HID, UID, CID, SUB, CLS
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index 0646ed62b351..bc124591320e 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utinit - Common ACPI subsystem initialization
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utlock.c b/drivers/acpi/acpica/utlock.c
index d61e01bd01a3..8b4ff11d617a 100644
--- a/drivers/acpi/acpica/utlock.c
+++ b/drivers/acpi/acpica/utlock.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utlock - Reader/Writer lock interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utobject.c b/drivers/acpi/acpica/utobject.c
index ae6d8cc18cec..eee97a902696 100644
--- a/drivers/acpi/acpica/utobject.c
+++ b/drivers/acpi/acpica/utobject.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utobject - ACPI object create/delete/size/cache routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index 902a47463abf..688c61a90725 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utosi - Support for the _OSI predefined control method
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index 65ca9807c2a8..a9f08f43c685 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utpredef - support functions for predefined names
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index a98c334c3bb7..5839f2fa7400 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utprint - Formatted printing routines
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index f6b55e2482ee..8052f7ef5025 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -3,7 +3,7 @@
  *
  * Module Name: uttrack - Memory allocation tracking routines (debug only)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utuuid.c b/drivers/acpi/acpica/utuuid.c
index 59ae118092a3..0a7cf8007643 100644
--- a/drivers/acpi/acpica/utuuid.c
+++ b/drivers/acpi/acpica/utuuid.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utuuid -- UUID support functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index d2d6cc065181..f497c4b30e65 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utxface - External interfaces, miscellaneous utility functions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index e3c60f57c9f0..9f3b1e3a09de 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -3,7 +3,7 @@
  *
  * Module Name: utxfinit - External interfaces for ACPICA initialization
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acbuffer.h b/include/acpi/acbuffer.h
index 6488d572739c..c2acd29f973d 100644
--- a/include/acpi/acbuffer.h
+++ b/include/acpi/acbuffer.h
@@ -3,7 +3,7 @@
  *
  * Name: acbuffer.h - Support for buffers returned by ACPI predefined names
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 53c088247d36..298e15100789 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -3,7 +3,7 @@
  *
  * Name: acconfig.h - Global configuration constants
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 09f46050961f..6b4fed5b9d11 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -3,7 +3,7 @@
  *
  * Name: acexcep.h - Exception codes returned by the ACPI subsystem
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acnames.h b/include/acpi/acnames.h
index 6f69a4f638f8..8b3eae96706a 100644
--- a/include/acpi/acnames.h
+++ b/include/acpi/acnames.h
@@ -3,7 +3,7 @@
  *
  * Name: acnames.h - Global names and strings
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 93a280703318..30b1ae53689f 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -3,7 +3,7 @@
  *
  * Name: acoutput.h -- debug output
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index ccdc5981bc91..bc7d39ecf574 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -3,7 +3,7 @@
  *
  * Name: acpi.h - Master public include file used to interface to ACPICA
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index eb1f21af7556..1b59fb61f67d 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -5,7 +5,7 @@
  *                    interfaces must be implemented by OSL to interface the
  *                    ACPI components to the host operating system.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f0adda015fc7..3a3febc9b979 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -3,7 +3,7 @@
  *
  * Name: acpixf.h - External interfaces to the ACPI subsystem
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index 724ad5f29a16..000145ded0ec 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -3,7 +3,7 @@
  *
  * Name: acrestyp.h - Defines, types, and structures for resource descriptors
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0a977eca0a74..65cc9cbf1141 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -3,7 +3,7 @@
  *
  * Name: actbl.h - Basic ACPI Table Definitions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ab424509cae9..45b0f532f63b 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -3,7 +3,7 @@
  *
  * Name: actbl1.h - Additional ACPI table definitions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index c50ef7e6b942..be01efdede32 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -3,7 +3,7 @@
  *
  * Name: actbl2.h - ACPI Table Definitions (tables not in ACPI spec)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index ea1ca49c9c1b..a6014e9a7113 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -3,7 +3,7 @@
  *
  * Name: actbl3.h - ACPI Table Definitions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 2590627dbfcc..913a7cfb61e6 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -3,7 +3,7 @@
  *
  * Name: actypes.h - Common data types for the entire ACPI subsystem
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/acuuid.h b/include/acpi/acuuid.h
index e63f214531ad..23262cab047a 100644
--- a/include/acpi/acuuid.h
+++ b/include/acpi/acuuid.h
@@ -3,7 +3,7 @@
  *
  * Name: acuuid.h - ACPI-related UUID/GUID definitions
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index f444e5b0fdaa..35ab3f87cc29 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -3,7 +3,7 @@
  *
  * Name: acenv.h - Host and compiler configuration
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h
index 47d690eafe4c..2e36c8344897 100644
--- a/include/acpi/platform/acenvex.h
+++ b/include/acpi/platform/acenvex.h
@@ -3,7 +3,7 @@
  *
  * Name: acenvex.h - Extra host and compiler configuration
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 085db95a3dae..6a0705b433d2 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -3,7 +3,7 @@
  *
  * Name: acgcc.h - GCC specific defines, etc.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/acgccex.h b/include/acpi/platform/acgccex.h
index 5d2b667af829..8dda2856aca1 100644
--- a/include/acpi/platform/acgccex.h
+++ b/include/acpi/platform/acgccex.h
@@ -3,7 +3,7 @@
  *
  * Name: acgccex.h - Extra GCC specific defines, etc.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/acintel.h b/include/acpi/platform/acintel.h
index 626265833a54..d2cc247248cb 100644
--- a/include/acpi/platform/acintel.h
+++ b/include/acpi/platform/acintel.h
@@ -3,7 +3,7 @@
  *
  * Name: acintel.h - VC specific defines, etc.
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index e3d21d014fcc..9ff328fd946a 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -3,7 +3,7 @@
  *
  * Name: aclinux.h - OS specific defines, etc. for Linux
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index d754a1b12721..cc4f1eb53cba 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -3,7 +3,7 @@
  *
  * Name: aclinuxex.h - Extra OS specific defines, etc. for Linux
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index ff8025de8237..28c11c6b4d06 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -3,7 +3,7 @@
  *
  * Module Name: cfsize - Common get file size function
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 7be89b873661..6b41d8b64a00 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -3,7 +3,7 @@
  *
  * Module Name: getopt
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index a20c703f8b7d..2a1fd9182f94 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -3,7 +3,7 @@
  *
  * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
index 4fd44e218a83..30913f124dd5 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixdir.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixdir - Unix directory access interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
index 93d8359309b6..29dfb47adfeb 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixmap.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixmap - Unix OSL for file mappings
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 8a88e87778bd..83d3b3b829b8 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -3,7 +3,7 @@
  *
  * Module Name: osunixxf - UNIX OSL interfaces
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index f69f1f559743..2eb0aaa4f462 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -3,7 +3,7 @@
  *
  * Module Name: acpidump.h - Include file for acpi_dump utility
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index a9848de32d8e..e256c2ac5ddc 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -3,7 +3,7 @@
  *
  * Module Name: apdump - Dump routines for ACPI tables (acpidump)
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index e86207e5afcf..49972bc78bc5 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -3,7 +3,7 @@
  *
  * Module Name: apfiles - File-related functions for acpidump utility
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 2d9b94b631cb..d8f1b57537d3 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -3,7 +3,7 @@
  *
  * Module Name: apmain - Main module for the acpidump utility
  *
- * Copyright (C) 2000 - 2018, Intel Corp.
+ * Copyright (C) 2000 - 2019, Intel Corp.
  *
  *****************************************************************************/
 
-- 
cgit v1.2.3


From 075470d88930dd16858a844f9dfb611c11f798d3 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 14 Jan 2019 09:55:26 -0800
Subject: ACPICA: Update version to 20190108

ACPICA commit aace846bb8f9b5877e100017f37fefc09dd2fde6

Version 20190108

Link: https://github.com/acpica/acpica/commit/aace846b
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3a3febc9b979..4cc74954fea3 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20181213
+#define ACPI_CA_VERSION                 0x20190108
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 8a62ffe2753a845272f4f2100b5fca0b6053ff6f Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Fri, 21 Dec 2018 11:33:54 +0100
Subject: PM-runtime: Add new interface to get accounted time

Some drivers (like i915/drm) needs to get the accounted suspended time.
pm_runtime_suspended_time() will return the suspended accounted time
in ns unit.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 15 +++++++++++++++
 include/linux/pm_runtime.h   |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 457be03b744d..a453090c9449 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -88,6 +88,21 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status)
 	dev->power.runtime_status = status;
 }
 
+u64 pm_runtime_suspended_time(struct device *dev)
+{
+	unsigned long flags, time;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	update_pm_runtime_accounting(dev);
+	time = dev->power.suspended_jiffies;
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return jiffies_to_nsecs(time);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
+
 /**
  * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
  * @dev: Device to handle.
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 54af4eef169f..a370006921c0 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -113,6 +113,8 @@ static inline bool pm_runtime_is_irq_safe(struct device *dev)
 	return dev->power.irq_safe;
 }
 
+extern u64 pm_runtime_suspended_time(struct device *dev);
+
 #else /* !CONFIG_PM */
 
 static inline bool queue_pm_work(struct work_struct *work) { return false; }
-- 
cgit v1.2.3


From b33a02aadcc6330a61e511240b634dc11112e65e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Jan 2019 17:24:55 +0200
Subject: i2c: acpi: Move I2C bits from acpi.h to i2c.h

As discussed previously the best location for certain bus related bits,
e.g. I2C, is its own realm of the headers.

In order to uncontaminate acpi.h move the I2C bits to i2c.h.

There is no functional change intended.

Link: https://lkml.org/lkml/2018/11/28/744
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/acpi.h | 11 -----------
 include/linux/i2c.h  | 10 ++++++++++
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 87715f20b69a..13f5cb2c4763 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1061,17 +1061,6 @@ static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
 }
 #endif
 
-#if defined(CONFIG_ACPI) && IS_ENABLED(CONFIG_I2C)
-bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
-			       struct acpi_resource_i2c_serialbus **i2c);
-#else
-static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
-					     struct acpi_resource_i2c_serialbus **i2c)
-{
-	return false;
-}
-#endif
-
 /* Device properties */
 
 #ifdef CONFIG_ACPI
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index cba59d66c00d..1f45331924d6 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -967,11 +967,21 @@ static inline int of_i2c_get_board_info(struct device *dev,
 
 #endif /* CONFIG_OF */
 
+struct acpi_resource;
+struct acpi_resource_i2c_serialbus;
+
 #if IS_ENABLED(CONFIG_ACPI)
+bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
+			       struct acpi_resource_i2c_serialbus **i2c);
 u32 i2c_acpi_find_bus_speed(struct device *dev);
 struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
 				       struct i2c_board_info *info);
 #else
+static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
+					     struct acpi_resource_i2c_serialbus **i2c)
+{
+	return false;
+}
 static inline u32 i2c_acpi_find_bus_speed(struct device *dev)
 {
 	return 0;
-- 
cgit v1.2.3


From 063755ab1d1c1127adc09703185967862584935b Mon Sep 17 00:00:00 2001
From: Philippe Schenker <philippe.schenker@toradex.com>
Date: Fri, 21 Dec 2018 14:46:31 +0100
Subject: mfd: stmpe: Move ADC related defines to MFD header

Move defines that are ADC related to the header of the overlying MFD,
so they can be used from multiple sub-devices.

Signed-off-by: Philippe Schenker <philippe.schenker@toradex.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/input/touchscreen/stmpe-ts.c | 34 +++++++++++++---------------------
 include/linux/mfd/stmpe.h            | 11 +++++++++++
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/stmpe-ts.c b/drivers/input/touchscreen/stmpe-ts.c
index 2a78e27b4495..c5d9006588a2 100644
--- a/drivers/input/touchscreen/stmpe-ts.c
+++ b/drivers/input/touchscreen/stmpe-ts.c
@@ -49,17 +49,6 @@
 
 #define STMPE_IRQ_TOUCH_DET		0
 
-#define SAMPLE_TIME(x)			((x & 0xf) << 4)
-#define MOD_12B(x)			((x & 0x1) << 3)
-#define REF_SEL(x)			((x & 0x1) << 1)
-#define ADC_FREQ(x)			(x & 0x3)
-#define AVE_CTRL(x)			((x & 0x3) << 6)
-#define DET_DELAY(x)			((x & 0x7) << 3)
-#define SETTLING(x)			(x & 0x7)
-#define FRACTION_Z(x)			(x & 0x7)
-#define I_DRIVE(x)			(x & 0x1)
-#define OP_MODE(x)			((x & 0x7) << 1)
-
 #define STMPE_TS_NAME			"stmpe-ts"
 #define XY_MASK				0xfff
 
@@ -213,9 +202,10 @@ static int stmpe_init_hw(struct stmpe_touch *ts)
 		return ret;
 	}
 
-	adc_ctrl1 = SAMPLE_TIME(ts->sample_time) | MOD_12B(ts->mod_12b) |
-		REF_SEL(ts->ref_sel);
-	adc_ctrl1_mask = SAMPLE_TIME(0xff) | MOD_12B(0xff) | REF_SEL(0xff);
+	adc_ctrl1 = STMPE_SAMPLE_TIME(ts->sample_time) |
+		    STMPE_MOD_12B(ts->mod_12b) | STMPE_REF_SEL(ts->ref_sel);
+	adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) |
+			 STMPE_REF_SEL(0xff);
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL1,
 			adc_ctrl1_mask, adc_ctrl1);
@@ -225,15 +215,17 @@ static int stmpe_init_hw(struct stmpe_touch *ts)
 	}
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_ADC_CTRL2,
-			ADC_FREQ(0xff), ADC_FREQ(ts->adc_freq));
+			STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(ts->adc_freq));
 	if (ret) {
 		dev_err(dev, "Could not setup ADC\n");
 		return ret;
 	}
 
-	tsc_cfg = AVE_CTRL(ts->ave_ctrl) | DET_DELAY(ts->touch_det_delay) |
-			SETTLING(ts->settling);
-	tsc_cfg_mask = AVE_CTRL(0xff) | DET_DELAY(0xff) | SETTLING(0xff);
+	tsc_cfg = STMPE_AVE_CTRL(ts->ave_ctrl) |
+		  STMPE_DET_DELAY(ts->touch_det_delay) |
+		  STMPE_SETTLING(ts->settling);
+	tsc_cfg_mask = STMPE_AVE_CTRL(0xff) | STMPE_DET_DELAY(0xff) |
+		       STMPE_SETTLING(0xff);
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CFG, tsc_cfg_mask, tsc_cfg);
 	if (ret) {
@@ -242,14 +234,14 @@ static int stmpe_init_hw(struct stmpe_touch *ts)
 	}
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_FRACTION_Z,
-			FRACTION_Z(0xff), FRACTION_Z(ts->fraction_z));
+			STMPE_FRACTION_Z(0xff), STMPE_FRACTION_Z(ts->fraction_z));
 	if (ret) {
 		dev_err(dev, "Could not config touch\n");
 		return ret;
 	}
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_I_DRIVE,
-			I_DRIVE(0xff), I_DRIVE(ts->i_drive));
+			STMPE_I_DRIVE(0xff), STMPE_I_DRIVE(ts->i_drive));
 	if (ret) {
 		dev_err(dev, "Could not config touch\n");
 		return ret;
@@ -263,7 +255,7 @@ static int stmpe_init_hw(struct stmpe_touch *ts)
 	}
 
 	ret = stmpe_set_bits(stmpe, STMPE_REG_TSC_CTRL,
-			OP_MODE(0xff), OP_MODE(OP_MOD_XYZ));
+			STMPE_OP_MODE(0xff), STMPE_OP_MODE(OP_MOD_XYZ));
 	if (ret) {
 		dev_err(dev, "Could not set mode\n");
 		return ret;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 4a827af17e59..c0353f6431f9 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -10,6 +10,17 @@
 
 #include <linux/mutex.h>
 
+#define STMPE_SAMPLE_TIME(x)	((x & 0xf) << 4)
+#define STMPE_MOD_12B(x)	((x & 0x1) << 3)
+#define STMPE_REF_SEL(x)	((x & 0x1) << 1)
+#define STMPE_ADC_FREQ(x)	(x & 0x3)
+#define STMPE_AVE_CTRL(x)	((x & 0x3) << 6)
+#define STMPE_DET_DELAY(x)	((x & 0x7) << 3)
+#define STMPE_SETTLING(x)	(x & 0x7)
+#define STMPE_FRACTION_Z(x)	(x & 0x7)
+#define STMPE_I_DRIVE(x)	(x & 0x1)
+#define STMPE_OP_MODE(x)	((x & 0x7) << 1)
+
 struct device;
 struct regulator;
 
-- 
cgit v1.2.3


From 6377cfa3b857ced301f2079ac97de6c19057ab65 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Fri, 21 Dec 2018 14:46:32 +0100
Subject: mfd: stmpe: Preparations for STMPE ADC driver

This prepares the MFD for the STMPE ADC driver. This commit introduces
devicetree settings that are used by the ADC and adds an init function.
Common ADC settings that are shared with the touchscreen driver can now
reside in the overlying MFD.

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Max Krummenacher <max.krummenacher@toradex.com>
Signed-off-by: Philippe Schenker <philippe.schenker@toradex.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig       |  3 ++-
 drivers/mfd/stmpe.c       | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/stmpe.h | 10 +++++++
 3 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 8c5dfdce4326..bba159e8eaa4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1204,7 +1204,7 @@ config MFD_STMPE
 
 	  Currently supported devices are:
 
-		STMPE811: GPIO, Touchscreen
+		STMPE811: GPIO, Touchscreen, ADC
 		STMPE1601: GPIO, Keypad
 		STMPE1801: GPIO, Keypad
 		STMPE2401: GPIO, Keypad
@@ -1217,6 +1217,7 @@ config MFD_STMPE
 		GPIO: stmpe-gpio
 		Keypad: stmpe-keypad
 		Touchscreen: stmpe-ts
+		ADC: stmpe-adc
 
 menu "STMicroelectronics STMPE Interface Drivers"
 depends on MFD_STMPE
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 566caca4efd8..f582531a8f3e 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -463,6 +463,28 @@ static const struct mfd_cell stmpe_ts_cell = {
 	.num_resources	= ARRAY_SIZE(stmpe_ts_resources),
 };
 
+/*
+ * ADC (STMPE811)
+ */
+
+static struct resource stmpe_adc_resources[] = {
+	{
+		.name	= "STMPE_TEMP_SENS",
+		.flags	= IORESOURCE_IRQ,
+	},
+	{
+		.name	= "STMPE_ADC",
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static const struct mfd_cell stmpe_adc_cell = {
+	.name		= "stmpe-adc",
+	.of_compatible	= "st,stmpe-adc",
+	.resources	= stmpe_adc_resources,
+	.num_resources	= ARRAY_SIZE(stmpe_adc_resources),
+};
+
 /*
  * STMPE811 or STMPE610
  */
@@ -497,6 +519,11 @@ static struct stmpe_variant_block stmpe811_blocks[] = {
 		.irq	= STMPE811_IRQ_TOUCH_DET,
 		.block	= STMPE_BLOCK_TOUCHSCREEN,
 	},
+	{
+		.cell	= &stmpe_adc_cell,
+		.irq	= STMPE811_IRQ_TEMP_SENS,
+		.block	= STMPE_BLOCK_ADC,
+	},
 };
 
 static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks,
@@ -517,6 +544,35 @@ static int stmpe811_enable(struct stmpe *stmpe, unsigned int blocks,
 				enable ? 0 : mask);
 }
 
+int stmpe811_adc_common_init(struct stmpe *stmpe)
+{
+	int ret;
+	u8 adc_ctrl1, adc_ctrl1_mask;
+
+	adc_ctrl1 = STMPE_SAMPLE_TIME(stmpe->sample_time) |
+		    STMPE_MOD_12B(stmpe->mod_12b) |
+		    STMPE_REF_SEL(stmpe->ref_sel);
+	adc_ctrl1_mask = STMPE_SAMPLE_TIME(0xff) | STMPE_MOD_12B(0xff) |
+			 STMPE_REF_SEL(0xff);
+
+	ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL1,
+			adc_ctrl1_mask, adc_ctrl1);
+	if (ret) {
+		dev_err(stmpe->dev, "Could not setup ADC\n");
+		return ret;
+	}
+
+	ret = stmpe_set_bits(stmpe, STMPE811_REG_ADC_CTRL2,
+			STMPE_ADC_FREQ(0xff), STMPE_ADC_FREQ(stmpe->adc_freq));
+	if (ret) {
+		dev_err(stmpe->dev, "Could not setup ADC\n");
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(stmpe811_adc_common_init);
+
 static int stmpe811_get_altfunc(struct stmpe *stmpe, enum stmpe_block block)
 {
 	/* 0 for touchscreen, 1 for GPIO */
@@ -1325,6 +1381,7 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum)
 	struct device_node *np = ci->dev->of_node;
 	struct stmpe *stmpe;
 	int ret;
+	u32 val;
 
 	pdata = devm_kzalloc(ci->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -1342,6 +1399,15 @@ int stmpe_probe(struct stmpe_client_info *ci, enum stmpe_partnum partnum)
 	mutex_init(&stmpe->irq_lock);
 	mutex_init(&stmpe->lock);
 
+	if (!of_property_read_u32(np, "st,sample-time", &val))
+		stmpe->sample_time = val;
+	if (!of_property_read_u32(np, "st,mod-12b", &val))
+		stmpe->mod_12b = val;
+	if (!of_property_read_u32(np, "st,ref-sel", &val))
+		stmpe->ref_sel = val;
+	if (!of_property_read_u32(np, "st,adc-freq", &val))
+		stmpe->adc_freq = val;
+
 	stmpe->dev = ci->dev;
 	stmpe->client = ci->client;
 	stmpe->pdata = pdata;
@@ -1433,6 +1499,8 @@ int stmpe_remove(struct stmpe *stmpe)
 	if (!IS_ERR(stmpe->vcc))
 		regulator_disable(stmpe->vcc);
 
+	__stmpe_disable(stmpe, STMPE_BLOCK_ADC);
+
 	mfd_remove_devices(stmpe->dev);
 
 	return 0;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index c0353f6431f9..07f55aac9390 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -21,6 +21,9 @@
 #define STMPE_I_DRIVE(x)	(x & 0x1)
 #define STMPE_OP_MODE(x)	((x & 0x7) << 1)
 
+#define STMPE811_REG_ADC_CTRL1	0x20
+#define STMPE811_REG_ADC_CTRL2	0x21
+
 struct device;
 struct regulator;
 
@@ -134,6 +137,12 @@ struct stmpe {
 	u8 ier[2];
 	u8 oldier[2];
 	struct stmpe_platform_data *pdata;
+
+	/* For devices that use an ADC */
+	u8 sample_time;
+	u8 mod_12b;
+	u8 ref_sel;
+	u8 adc_freq;
 };
 
 extern int stmpe_reg_write(struct stmpe *stmpe, u8 reg, u8 data);
@@ -147,6 +156,7 @@ extern int stmpe_set_altfunc(struct stmpe *stmpe, u32 pins,
 			     enum stmpe_block block);
 extern int stmpe_enable(struct stmpe *stmpe, unsigned int blocks);
 extern int stmpe_disable(struct stmpe *stmpe, unsigned int blocks);
+extern int stmpe811_adc_common_init(struct stmpe *stmpe);
 
 #define STMPE_GPIO_NOREQ_811_TOUCH	(0xf0)
 
-- 
cgit v1.2.3


From 1d7ae53b152dbc5ba0a4f6a83ecc42ac66f52d11 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 12 Dec 2018 23:38:47 +0300
Subject: iommu: Introduce iotlb_sync_map callback

Introduce iotlb_sync_map() callback that is invoked in the end of
iommu_map(). This new callback allows IOMMU drivers to avoid syncing
after mapping of each contiguous chunk and sync only when the whole
mapping is completed, optimizing performance of the mapping operation.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 8 ++++++--
 include/linux/iommu.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3ed4db334341..ed0e63f2cd9b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1585,13 +1585,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
 int iommu_map(struct iommu_domain *domain, unsigned long iova,
 	      phys_addr_t paddr, size_t size, int prot)
 {
+	const struct iommu_ops *ops = domain->ops;
 	unsigned long orig_iova = iova;
 	unsigned int min_pagesz;
 	size_t orig_size = size;
 	phys_addr_t orig_paddr = paddr;
 	int ret = 0;
 
-	if (unlikely(domain->ops->map == NULL ||
+	if (unlikely(ops->map == NULL ||
 		     domain->pgsize_bitmap == 0UL))
 		return -ENODEV;
 
@@ -1620,7 +1621,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
 			 iova, &paddr, pgsize);
 
-		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		ret = ops->map(domain, iova, paddr, pgsize, prot);
 		if (ret)
 			break;
 
@@ -1629,6 +1630,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		size -= pgsize;
 	}
 
+	if (ops->iotlb_sync_map)
+		ops->iotlb_sync_map(domain);
+
 	/* unroll mapping in case something went wrong */
 	if (ret)
 		iommu_unmap(domain, orig_iova, orig_size - size);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e90da6b6f3d1..477ef47c357c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -201,6 +201,7 @@ struct iommu_ops {
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_range_add)(struct iommu_domain *domain,
 				unsigned long iova, size_t size);
+	void (*iotlb_sync_map)(struct iommu_domain *domain);
 	void (*iotlb_sync)(struct iommu_domain *domain);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*add_device)(struct device *dev);
-- 
cgit v1.2.3


From 96efa118c03648fdc76acad9ca8fe018a6be7145 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 12 Dec 2018 23:38:52 +0300
Subject: memory: tegra: Adapt to Tegra20 device-tree binding changes

The tegra20-mc device-tree binding has been changed, GART has been
squashed into Memory Controller and now the clock property is mandatory
for Tegra20, the DT compatible has been changed as well. Adapt driver to
the DT changes.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/memory/tegra/mc.c | 21 ++++++++-------------
 drivers/memory/tegra/mc.h |  6 ------
 include/soc/tegra/mc.h    |  2 +-
 3 files changed, 9 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index b99f3c620f6c..59db13287b47 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -51,7 +51,7 @@
 
 static const struct of_device_id tegra_mc_of_match[] = {
 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
-	{ .compatible = "nvidia,tegra20-mc", .data = &tegra20_mc_soc },
+	{ .compatible = "nvidia,tegra20-mc-gart", .data = &tegra20_mc_soc },
 #endif
 #ifdef CONFIG_ARCH_TEGRA_3x_SOC
 	{ .compatible = "nvidia,tegra30-mc", .data = &tegra30_mc_soc },
@@ -638,24 +638,19 @@ static int tegra_mc_probe(struct platform_device *pdev)
 	if (IS_ERR(mc->regs))
 		return PTR_ERR(mc->regs);
 
+	mc->clk = devm_clk_get(&pdev->dev, "mc");
+	if (IS_ERR(mc->clk)) {
+		dev_err(&pdev->dev, "failed to get MC clock: %ld\n",
+			PTR_ERR(mc->clk));
+		return PTR_ERR(mc->clk);
+	}
+
 #ifdef CONFIG_ARCH_TEGRA_2x_SOC
 	if (mc->soc == &tegra20_mc_soc) {
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-		mc->regs2 = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(mc->regs2))
-			return PTR_ERR(mc->regs2);
-
 		isr = tegra20_mc_irq;
 	} else
 #endif
 	{
-		mc->clk = devm_clk_get(&pdev->dev, "mc");
-		if (IS_ERR(mc->clk)) {
-			dev_err(&pdev->dev, "failed to get MC clock: %ld\n",
-				PTR_ERR(mc->clk));
-			return PTR_ERR(mc->clk);
-		}
-
 		err = tegra_mc_setup_latency_allowance(mc);
 		if (err < 0) {
 			dev_err(&pdev->dev, "failed to setup latency allowance: %d\n",
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index 01065f12ebeb..9856f085e487 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -26,18 +26,12 @@
 
 static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
 {
-	if (mc->regs2 && offset >= 0x24)
-		return readl(mc->regs2 + offset - 0x3c);
-
 	return readl(mc->regs + offset);
 }
 
 static inline void mc_writel(struct tegra_mc *mc, u32 value,
 			     unsigned long offset)
 {
-	if (mc->regs2 && offset >= 0x24)
-		return writel(value, mc->regs2 + offset - 0x3c);
-
 	writel(value, mc->regs + offset);
 }
 
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index b43f37fea096..db5bfdf589b4 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -144,7 +144,7 @@ struct tegra_mc_soc {
 struct tegra_mc {
 	struct device *dev;
 	struct tegra_smmu *smmu;
-	void __iomem *regs, *regs2;
+	void __iomem *regs;
 	struct clk *clk;
 	int irq;
 
-- 
cgit v1.2.3


From ce2785a75dbca27375f3723f4e697a2a8dc096ee Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Wed, 12 Dec 2018 23:38:56 +0300
Subject: iommu/tegra: gart: Integrate with Memory Controller driver

The device-tree binding has been changed. There is no separate GART device
anymore, it is squashed into the Memory Controller. Integrate GART module
with the MC in a way it is done for the SMMU on Tegra30+.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig      |  1 +
 drivers/iommu/tegra-gart.c | 71 ++++++++++++----------------------------------
 drivers/memory/tegra/mc.c  | 43 ++++++++++++++++++++++++++++
 include/soc/tegra/mc.h     | 25 ++++++++++++++++
 4 files changed, 87 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9a25715650e..83c099bb7288 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -282,6 +282,7 @@ config ROCKCHIP_IOMMU
 config TEGRA_IOMMU_GART
 	bool "Tegra GART IOMMU Support"
 	depends on ARCH_TEGRA_2x_SOC
+	depends on TEGRA_MC
 	select IOMMU_API
 	help
 	  Enables support for remapping discontiguous physical memory
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 835fea461c59..b35ffa312a83 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -19,16 +19,17 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#include <linux/init.h>
 #include <linux/io.h>
 #include <linux/iommu.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 
+#include <soc/tegra/mc.h>
+
 /* bitmap of the page sizes currently supported */
 #define GART_IOMMU_PGSIZES	(SZ_4K)
 
@@ -397,9 +398,8 @@ static const struct iommu_ops gart_iommu_ops = {
 	.iotlb_sync	= gart_iommu_sync,
 };
 
-static int tegra_gart_suspend(struct device *dev)
+int tegra_gart_suspend(struct gart_device *gart)
 {
-	struct gart_device *gart = dev_get_drvdata(dev);
 	unsigned long iova;
 	u32 *data = gart->savedata;
 	unsigned long flags;
@@ -411,9 +411,8 @@ static int tegra_gart_suspend(struct device *dev)
 	return 0;
 }
 
-static int tegra_gart_resume(struct device *dev)
+int tegra_gart_resume(struct gart_device *gart)
 {
-	struct gart_device *gart = dev_get_drvdata(dev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&gart->pte_lock, flags);
@@ -422,41 +421,33 @@ static int tegra_gart_resume(struct device *dev)
 	return 0;
 }
 
-static int tegra_gart_probe(struct platform_device *pdev)
+struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
 {
 	struct gart_device *gart;
-	struct resource *res, *res_remap;
+	struct resource *res_remap;
 	void __iomem *gart_regs;
-	struct device *dev = &pdev->dev;
 	int ret;
 
 	BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
 
 	/* the GART memory aperture is required */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (!res || !res_remap) {
+	res_remap = platform_get_resource(to_platform_device(dev),
+					  IORESOURCE_MEM, 1);
+	if (!res_remap) {
 		dev_err(dev, "GART memory aperture expected\n");
-		return -ENXIO;
+		return ERR_PTR(-ENXIO);
 	}
 
 	gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL);
 	if (!gart) {
 		dev_err(dev, "failed to allocate gart_device\n");
-		return -ENOMEM;
-	}
-
-	gart_regs = devm_ioremap(dev, res->start, resource_size(res));
-	if (!gart_regs) {
-		dev_err(dev, "failed to remap GART registers\n");
-		return -ENXIO;
+		return ERR_PTR(-ENOMEM);
 	}
 
-	ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL,
-				     dev_name(&pdev->dev));
+	ret = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart");
 	if (ret) {
 		dev_err(dev, "Failed to register IOMMU in sysfs\n");
-		return ret;
+		return ERR_PTR(ret);
 	}
 
 	iommu_device_set_ops(&gart->iommu, &gart_iommu_ops);
@@ -468,7 +459,8 @@ static int tegra_gart_probe(struct platform_device *pdev)
 		goto remove_sysfs;
 	}
 
-	gart->dev = &pdev->dev;
+	gart->dev = dev;
+	gart_regs = mc->regs + GART_REG_BASE;
 	spin_lock_init(&gart->pte_lock);
 	spin_lock_init(&gart->client_lock);
 	INIT_LIST_HEAD(&gart->client);
@@ -483,46 +475,19 @@ static int tegra_gart_probe(struct platform_device *pdev)
 		goto unregister_iommu;
 	}
 
-	platform_set_drvdata(pdev, gart);
 	do_gart_setup(gart, NULL);
 
 	gart_handle = gart;
 
-	return 0;
+	return gart;
 
 unregister_iommu:
 	iommu_device_unregister(&gart->iommu);
 remove_sysfs:
 	iommu_device_sysfs_remove(&gart->iommu);
 
-	return ret;
-}
-
-static const struct dev_pm_ops tegra_gart_pm_ops = {
-	.suspend	= tegra_gart_suspend,
-	.resume		= tegra_gart_resume,
-};
-
-static const struct of_device_id tegra_gart_of_match[] = {
-	{ .compatible = "nvidia,tegra20-gart", },
-	{ },
-};
-
-static struct platform_driver tegra_gart_driver = {
-	.probe		= tegra_gart_probe,
-	.driver = {
-		.name	= "tegra-gart",
-		.pm	= &tegra_gart_pm_ops,
-		.of_match_table = tegra_gart_of_match,
-		.suppress_bind_attrs = true,
-	},
-};
-
-static int __init tegra_gart_init(void)
-{
-	return platform_driver_register(&tegra_gart_driver);
+	return ERR_PTR(ret);
 }
-subsys_initcall(tegra_gart_init);
 
 module_param(gart_debug, bool, 0644);
 MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 55ecfb2d8cfd..e684e234361a 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -702,13 +702,56 @@ static int tegra_mc_probe(struct platform_device *pdev)
 				PTR_ERR(mc->smmu));
 	}
 
+	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) {
+		mc->gart = tegra_gart_probe(&pdev->dev, mc);
+		if (IS_ERR(mc->gart)) {
+			dev_err(&pdev->dev, "failed to probe GART: %ld\n",
+				PTR_ERR(mc->gart));
+			mc->gart = NULL;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_mc_suspend(struct device *dev)
+{
+	struct tegra_mc *mc = dev_get_drvdata(dev);
+	int err;
+
+	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
+		err = tegra_gart_suspend(mc->gart);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
+static int tegra_mc_resume(struct device *dev)
+{
+	struct tegra_mc *mc = dev_get_drvdata(dev);
+	int err;
+
+	if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
+		err = tegra_gart_resume(mc->gart);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_mc_pm_ops = {
+	.suspend = tegra_mc_suspend,
+	.resume = tegra_mc_resume,
+};
+
 static struct platform_driver tegra_mc_driver = {
 	.driver = {
 		.name = "tegra-mc",
 		.of_match_table = tegra_mc_of_match,
+		.pm = &tegra_mc_pm_ops,
 		.suppress_bind_attrs = true,
 	},
 	.prevent_deferred_probe = true,
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index db5bfdf589b4..e489a028ec9f 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -9,6 +9,7 @@
 #ifndef __SOC_TEGRA_MC_H__
 #define __SOC_TEGRA_MC_H__
 
+#include <linux/err.h>
 #include <linux/reset-controller.h>
 #include <linux/types.h>
 
@@ -77,6 +78,7 @@ struct tegra_smmu_soc {
 
 struct tegra_mc;
 struct tegra_smmu;
+struct gart_device;
 
 #ifdef CONFIG_TEGRA_IOMMU_SMMU
 struct tegra_smmu *tegra_smmu_probe(struct device *dev,
@@ -96,6 +98,28 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu)
 }
 #endif
 
+#ifdef CONFIG_TEGRA_IOMMU_GART
+struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc);
+int tegra_gart_suspend(struct gart_device *gart);
+int tegra_gart_resume(struct gart_device *gart);
+#else
+static inline struct gart_device *
+tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline int tegra_gart_suspend(struct gart_device *gart)
+{
+	return -ENODEV;
+}
+
+static inline int tegra_gart_resume(struct gart_device *gart)
+{
+	return -ENODEV;
+}
+#endif
+
 struct tegra_mc_reset {
 	const char *name;
 	unsigned long id;
@@ -144,6 +168,7 @@ struct tegra_mc_soc {
 struct tegra_mc {
 	struct device *dev;
 	struct tegra_smmu *smmu;
+	struct gart_device *gart;
 	void __iomem *regs;
 	struct clk *clk;
 	int irq;
-- 
cgit v1.2.3


From 3eafbd3a7745e1579665cbb8c3959a74a666b8c9 Mon Sep 17 00:00:00 2001
From: Pascal PAILLET-LME <p.paillet@st.com>
Date: Mon, 14 Jan 2019 10:05:15 +0000
Subject: dt-bindings: mfd: Document STPMIC1

STPMIC1 is a PMIC from STMicroelectronics. The STPMIC1 integrates 10
regulators, 3 power switches, a watchdog and an input for a power on key.

Signed-off-by: Pascal Paillet <p.paillet@st.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/mfd/st,stpmic1.txt         | 61 ++++++++++++++++++++++
 include/dt-bindings/mfd/st,stpmic1.h               | 50 ++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/st,stpmic1.txt
 create mode 100644 include/dt-bindings/mfd/st,stpmic1.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mfd/st,stpmic1.txt b/Documentation/devicetree/bindings/mfd/st,stpmic1.txt
new file mode 100644
index 000000000000..afd45c089585
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/st,stpmic1.txt
@@ -0,0 +1,61 @@
+* STMicroelectronics STPMIC1 Power Management IC
+
+Required properties:
+- compatible:		: "st,stpmic1"
+- reg:			: The I2C slave address for the STPMIC1 chip.
+- interrupts:		: The interrupt line the device is connected to.
+- #interrupt-cells:	: Should be 1.
+- interrupt-controller:	: Marks the device node as an interrupt controller.
+			    Interrupt numbers are defined at
+			    dt-bindings/mfd/st,stpmic1.h.
+
+STPMIC1 consists in a varied group of sub-devices.
+Each sub-device binding is be described in own documentation file.
+
+Device			 Description
+------			------------
+st,stpmic1-onkey	: Power on key, see ../input/st,stpmic1-onkey.txt
+st,stpmic1-regulators	: Regulators, see ../regulator/st,stpmic1-regulator.txt
+st,stpmic1-wdt		: Watchdog, see ../watchdog/st,stpmic1-wdt.txt
+
+Example:
+
+#include <dt-bindings/mfd/st,stpmic1.h>
+
+pmic: pmic@33 {
+	compatible = "st,stpmic1";
+	reg = <0x33>;
+	interrupt-parent = <&gpioa>;
+	interrupts = <0 2>;
+
+	interrupt-controller;
+	#interrupt-cells = <2>;
+
+	onkey {
+		compatible = "st,stpmic1-onkey";
+		interrupts = <IT_PONKEY_F 0>,<IT_PONKEY_R 1>;
+		interrupt-names = "onkey-falling", "onkey-rising";
+		power-off-time-sec = <10>;
+	};
+
+	watchdog {
+		compatible = "st,stpmic1-wdt";
+	};
+
+	regulators {
+		compatible = "st,stpmic1-regulators";
+
+		vdd_core: buck1 {
+			regulator-name = "vdd_core";
+			regulator-boot-on;
+			regulator-min-microvolt = <700000>;
+			regulator-max-microvolt = <1200000>;
+		};
+		vdd: buck3 {
+			regulator-name = "vdd";
+			regulator-min-microvolt = <3300000>;
+			regulator-max-microvolt = <3300000>;
+			regulator-boot-on;
+			regulator-pull-down;
+		};
+	};
diff --git a/include/dt-bindings/mfd/st,stpmic1.h b/include/dt-bindings/mfd/st,stpmic1.h
new file mode 100644
index 000000000000..321cd08797d9
--- /dev/null
+++ b/include/dt-bindings/mfd/st,stpmic1.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: Philippe Peurichard <philippe.peurichard@st.com>,
+ * Pascal Paillet <p.paillet@st.com> for STMicroelectronics.
+ */
+
+#ifndef __DT_BINDINGS_STPMIC1_H__
+#define __DT_BINDINGS_STPMIC1_H__
+
+/* IRQ definitions */
+#define IT_PONKEY_F	0
+#define IT_PONKEY_R	1
+#define IT_WAKEUP_F	2
+#define IT_WAKEUP_R	3
+#define IT_VBUS_OTG_F	4
+#define IT_VBUS_OTG_R	5
+#define IT_SWOUT_F	6
+#define IT_SWOUT_R	7
+
+#define IT_CURLIM_BUCK1	8
+#define IT_CURLIM_BUCK2	9
+#define IT_CURLIM_BUCK3	10
+#define IT_CURLIM_BUCK4	11
+#define IT_OCP_OTG	12
+#define IT_OCP_SWOUT	13
+#define IT_OCP_BOOST	14
+#define IT_OVP_BOOST	15
+
+#define IT_CURLIM_LDO1	16
+#define IT_CURLIM_LDO2	17
+#define IT_CURLIM_LDO3	18
+#define IT_CURLIM_LDO4	19
+#define IT_CURLIM_LDO5	20
+#define IT_CURLIM_LDO6	21
+#define IT_SHORT_SWOTG	22
+#define IT_SHORT_SWOUT	23
+
+#define IT_TWARN_F	24
+#define IT_TWARN_R	25
+#define IT_VINLOW_F	26
+#define IT_VINLOW_R	27
+#define IT_SWIN_F	30
+#define IT_SWIN_R	31
+
+/* BUCK MODES definitions */
+#define STPMIC1_BUCK_MODE_NORMAL 0
+#define STPMIC1_BUCK_MODE_LP 2
+
+#endif /* __DT_BINDINGS_STPMIC1_H__ */
-- 
cgit v1.2.3


From 51908d2e9b7c7730608a19f24fc8718af745bb2f Mon Sep 17 00:00:00 2001
From: Pascal PAILLET-LME <p.paillet@st.com>
Date: Mon, 14 Jan 2019 10:05:16 +0000
Subject: mfd: stpmic1: Add STPMIC1 driver

STPMIC1 is a PMIC from STMicroelectronics. The STPMIC1 integrates 10
regulators, 3 power switches, a watchdog and an input for a power on key.

Signed-off-by: Pascal Paillet <p.paillet@st.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |  16 ++++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/stpmic1.c       | 213 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/stpmic1.h | 212 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 442 insertions(+)
 create mode 100644 drivers/mfd/stpmic1.c
 create mode 100644 include/linux/mfd/stpmic1.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 8c5dfdce4326..0761cb83d174 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1871,6 +1871,22 @@ config MFD_STM32_TIMERS
 	  for PWM and IIO Timer. This driver allow to share the
 	  registers between the others drivers.
 
+config MFD_STPMIC1
+	tristate "Support for STPMIC1 PMIC"
+	depends on (I2C=y && OF)
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	select MFD_CORE
+	help
+	  Support for ST Microelectronics STPMIC1 PMIC. STPMIC1 has power on
+	  key, watchdog and regulator functionalities which are supported via
+	  the relevant subsystems. This driver provides core support for the
+	  STPMIC1. In order to use the actual functionaltiy of the device other
+	  drivers must be enabled.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called stpmic1.
+
 menu "Multimedia Capabilities Port drivers"
 	depends on ARCH_SA1100
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 12980a4ad460..a62fb0112d9f 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -233,6 +233,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)	+= intel_soc_pmic_chtdc_ti.o
 obj-$(CONFIG_MFD_MT6397)	+= mt6397-core.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR)	+= altera-a10sr.o
+obj-$(CONFIG_MFD_STPMIC1)	+= stpmic1.o
 obj-$(CONFIG_MFD_SUN4I_GPADC)	+= sun4i-gpadc.o
 
 obj-$(CONFIG_MFD_STM32_LPTIMER)	+= stm32-lptimer.o
diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c
new file mode 100644
index 000000000000..7dfbe8906cb8
--- /dev/null
+++ b/drivers/mfd/stpmic1.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) STMicroelectronics 2018
+// Author: Pascal Paillet <p.paillet@st.com>
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/stpmic1.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/pm_wakeirq.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/mfd/st,stpmic1.h>
+
+#define STPMIC1_MAIN_IRQ 0
+
+static const struct regmap_range stpmic1_readable_ranges[] = {
+	regmap_reg_range(TURN_ON_SR, VERSION_SR),
+	regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR),
+	regmap_reg_range(BST_SW_CR, BST_SW_CR),
+	regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4),
+	regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4),
+	regmap_reg_range(INT_MASK_R1, INT_MASK_R4),
+	regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4),
+	regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4),
+	regmap_reg_range(INT_SRC_R1, INT_SRC_R1),
+};
+
+static const struct regmap_range stpmic1_writeable_ranges[] = {
+	regmap_reg_range(SWOFF_PWRCTRL_CR, LDO6_STDBY_CR),
+	regmap_reg_range(BST_SW_CR, BST_SW_CR),
+	regmap_reg_range(INT_CLEAR_R1, INT_CLEAR_R4),
+	regmap_reg_range(INT_SET_MASK_R1, INT_SET_MASK_R4),
+	regmap_reg_range(INT_CLEAR_MASK_R1, INT_CLEAR_MASK_R4),
+};
+
+static const struct regmap_range stpmic1_volatile_ranges[] = {
+	regmap_reg_range(TURN_ON_SR, VERSION_SR),
+	regmap_reg_range(WCHDG_CR, WCHDG_CR),
+	regmap_reg_range(INT_PENDING_R1, INT_PENDING_R4),
+	regmap_reg_range(INT_SRC_R1, INT_SRC_R4),
+};
+
+static const struct regmap_access_table stpmic1_readable_table = {
+	.yes_ranges = stpmic1_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(stpmic1_readable_ranges),
+};
+
+static const struct regmap_access_table stpmic1_writeable_table = {
+	.yes_ranges = stpmic1_writeable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(stpmic1_writeable_ranges),
+};
+
+static const struct regmap_access_table stpmic1_volatile_table = {
+	.yes_ranges = stpmic1_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(stpmic1_volatile_ranges),
+};
+
+const struct regmap_config stpmic1_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.cache_type = REGCACHE_RBTREE,
+	.max_register = PMIC_MAX_REGISTER_ADDRESS,
+	.rd_table = &stpmic1_readable_table,
+	.wr_table = &stpmic1_writeable_table,
+	.volatile_table = &stpmic1_volatile_table,
+};
+
+static const struct regmap_irq stpmic1_irqs[] = {
+	REGMAP_IRQ_REG(IT_PONKEY_F, 0, 0x01),
+	REGMAP_IRQ_REG(IT_PONKEY_R, 0, 0x02),
+	REGMAP_IRQ_REG(IT_WAKEUP_F, 0, 0x04),
+	REGMAP_IRQ_REG(IT_WAKEUP_R, 0, 0x08),
+	REGMAP_IRQ_REG(IT_VBUS_OTG_F, 0, 0x10),
+	REGMAP_IRQ_REG(IT_VBUS_OTG_R, 0, 0x20),
+	REGMAP_IRQ_REG(IT_SWOUT_F, 0, 0x40),
+	REGMAP_IRQ_REG(IT_SWOUT_R, 0, 0x80),
+
+	REGMAP_IRQ_REG(IT_CURLIM_BUCK1, 1, 0x01),
+	REGMAP_IRQ_REG(IT_CURLIM_BUCK2, 1, 0x02),
+	REGMAP_IRQ_REG(IT_CURLIM_BUCK3, 1, 0x04),
+	REGMAP_IRQ_REG(IT_CURLIM_BUCK4, 1, 0x08),
+	REGMAP_IRQ_REG(IT_OCP_OTG, 1, 0x10),
+	REGMAP_IRQ_REG(IT_OCP_SWOUT, 1, 0x20),
+	REGMAP_IRQ_REG(IT_OCP_BOOST, 1, 0x40),
+	REGMAP_IRQ_REG(IT_OVP_BOOST, 1, 0x80),
+
+	REGMAP_IRQ_REG(IT_CURLIM_LDO1, 2, 0x01),
+	REGMAP_IRQ_REG(IT_CURLIM_LDO2, 2, 0x02),
+	REGMAP_IRQ_REG(IT_CURLIM_LDO3, 2, 0x04),
+	REGMAP_IRQ_REG(IT_CURLIM_LDO4, 2, 0x08),
+	REGMAP_IRQ_REG(IT_CURLIM_LDO5, 2, 0x10),
+	REGMAP_IRQ_REG(IT_CURLIM_LDO6, 2, 0x20),
+	REGMAP_IRQ_REG(IT_SHORT_SWOTG, 2, 0x40),
+	REGMAP_IRQ_REG(IT_SHORT_SWOUT, 2, 0x80),
+
+	REGMAP_IRQ_REG(IT_TWARN_F, 3, 0x01),
+	REGMAP_IRQ_REG(IT_TWARN_R, 3, 0x02),
+	REGMAP_IRQ_REG(IT_VINLOW_F, 3, 0x04),
+	REGMAP_IRQ_REG(IT_VINLOW_R, 3, 0x08),
+	REGMAP_IRQ_REG(IT_SWIN_F, 3, 0x40),
+	REGMAP_IRQ_REG(IT_SWIN_R, 3, 0x80),
+};
+
+static const struct regmap_irq_chip stpmic1_regmap_irq_chip = {
+	.name = "pmic_irq",
+	.status_base = INT_PENDING_R1,
+	.mask_base = INT_CLEAR_MASK_R1,
+	.unmask_base = INT_SET_MASK_R1,
+	.ack_base = INT_CLEAR_R1,
+	.num_regs = STPMIC1_PMIC_NUM_IRQ_REGS,
+	.irqs = stpmic1_irqs,
+	.num_irqs = ARRAY_SIZE(stpmic1_irqs),
+};
+
+static int stpmic1_probe(struct i2c_client *i2c,
+			 const struct i2c_device_id *id)
+{
+	struct stpmic1 *ddata;
+	struct device *dev = &i2c->dev;
+	int ret;
+	struct device_node *np = dev->of_node;
+	u32 reg;
+
+	ddata = devm_kzalloc(dev, sizeof(struct stpmic1), GFP_KERNEL);
+	if (!ddata)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, ddata);
+	ddata->dev = dev;
+
+	ddata->regmap = devm_regmap_init_i2c(i2c, &stpmic1_regmap_config);
+	if (IS_ERR(ddata->regmap))
+		return PTR_ERR(ddata->regmap);
+
+	ddata->irq = of_irq_get(np, STPMIC1_MAIN_IRQ);
+	if (ddata->irq < 0) {
+		dev_err(dev, "Failed to get main IRQ: %d\n", ddata->irq);
+		return ddata->irq;
+	}
+
+	ret = regmap_read(ddata->regmap, VERSION_SR, &reg);
+	if (ret) {
+		dev_err(dev, "Unable to read PMIC version\n");
+		return ret;
+	}
+	dev_info(dev, "PMIC Chip Version: 0x%x\n", reg);
+
+	/* Initialize PMIC IRQ Chip & associated IRQ domains */
+	ret = devm_regmap_add_irq_chip(dev, ddata->regmap, ddata->irq,
+				       IRQF_ONESHOT | IRQF_SHARED,
+				       0, &stpmic1_regmap_irq_chip,
+				       &ddata->irq_data);
+	if (ret) {
+		dev_err(dev, "IRQ Chip registration failed: %d\n", ret);
+		return ret;
+	}
+
+	return devm_of_platform_populate(dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stpmic1_suspend(struct device *dev)
+{
+	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c);
+
+	disable_irq(pmic_dev->irq);
+
+	return 0;
+}
+
+static int stpmic1_resume(struct device *dev)
+{
+	struct i2c_client *i2c = to_i2c_client(dev);
+	struct stpmic1 *pmic_dev = i2c_get_clientdata(i2c);
+	int ret;
+
+	ret = regcache_sync(pmic_dev->regmap);
+	if (ret)
+		return ret;
+
+	enable_irq(pmic_dev->irq);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(stpmic1_pm, stpmic1_suspend, stpmic1_resume);
+
+static const struct of_device_id stpmic1_of_match[] = {
+	{ .compatible = "st,stpmic1", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, stpmic1_of_match);
+
+static struct i2c_driver stpmic1_driver = {
+	.driver = {
+		.name = "stpmic1",
+		.of_match_table = of_match_ptr(stpmic1_of_match),
+		.pm = &stpmic1_pm,
+	},
+	.probe = stpmic1_probe,
+};
+
+module_i2c_driver(stpmic1_driver);
+
+MODULE_DESCRIPTION("STPMIC1 PMIC Driver");
+MODULE_AUTHOR("Pascal Paillet <p.paillet@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/stpmic1.h b/include/linux/mfd/stpmic1.h
new file mode 100644
index 000000000000..fa3f99f7e9a1
--- /dev/null
+++ b/include/linux/mfd/stpmic1.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: Philippe Peurichard <philippe.peurichard@st.com>,
+ * Pascal Paillet <p.paillet@st.com> for STMicroelectronics.
+ */
+
+#ifndef __LINUX_MFD_STPMIC1_H
+#define __LINUX_MFD_STPMIC1_H
+
+#define TURN_ON_SR		0x1
+#define TURN_OFF_SR		0x2
+#define ICC_LDO_TURN_OFF_SR	0x3
+#define ICC_BUCK_TURN_OFF_SR	0x4
+#define RREQ_STATE_SR		0x5
+#define VERSION_SR		0x6
+
+#define SWOFF_PWRCTRL_CR	0x10
+#define PADS_PULL_CR		0x11
+#define BUCKS_PD_CR		0x12
+#define LDO14_PD_CR		0x13
+#define LDO56_VREF_PD_CR	0x14
+#define VBUS_DET_VIN_CR		0x15
+#define PKEY_TURNOFF_CR		0x16
+#define BUCKS_MASK_RANK_CR	0x17
+#define BUCKS_MASK_RESET_CR	0x18
+#define LDOS_MASK_RANK_CR	0x19
+#define LDOS_MASK_RESET_CR	0x1A
+#define WCHDG_CR		0x1B
+#define WCHDG_TIMER_CR		0x1C
+#define BUCKS_ICCTO_CR		0x1D
+#define LDOS_ICCTO_CR		0x1E
+
+#define BUCK1_ACTIVE_CR		0x20
+#define BUCK2_ACTIVE_CR		0x21
+#define BUCK3_ACTIVE_CR		0x22
+#define BUCK4_ACTIVE_CR		0x23
+#define VREF_DDR_ACTIVE_CR	0x24
+#define LDO1_ACTIVE_CR		0x25
+#define LDO2_ACTIVE_CR		0x26
+#define LDO3_ACTIVE_CR		0x27
+#define LDO4_ACTIVE_CR		0x28
+#define LDO5_ACTIVE_CR		0x29
+#define LDO6_ACTIVE_CR		0x2A
+
+#define BUCK1_STDBY_CR		0x30
+#define BUCK2_STDBY_CR		0x31
+#define BUCK3_STDBY_CR		0x32
+#define BUCK4_STDBY_CR		0x33
+#define VREF_DDR_STDBY_CR	0x34
+#define LDO1_STDBY_CR		0x35
+#define LDO2_STDBY_CR		0x36
+#define LDO3_STDBY_CR		0x37
+#define LDO4_STDBY_CR		0x38
+#define LDO5_STDBY_CR		0x39
+#define LDO6_STDBY_CR		0x3A
+
+#define BST_SW_CR		0x40
+
+#define INT_PENDING_R1		0x50
+#define INT_PENDING_R2		0x51
+#define INT_PENDING_R3		0x52
+#define INT_PENDING_R4		0x53
+
+#define INT_DBG_LATCH_R1	0x60
+#define INT_DBG_LATCH_R2	0x61
+#define INT_DBG_LATCH_R3	0x62
+#define INT_DBG_LATCH_R4	0x63
+
+#define INT_CLEAR_R1		0x70
+#define INT_CLEAR_R2		0x71
+#define INT_CLEAR_R3		0x72
+#define INT_CLEAR_R4		0x73
+
+#define INT_MASK_R1		0x80
+#define INT_MASK_R2		0x81
+#define INT_MASK_R3		0x82
+#define INT_MASK_R4		0x83
+
+#define INT_SET_MASK_R1		0x90
+#define INT_SET_MASK_R2		0x91
+#define INT_SET_MASK_R3		0x92
+#define INT_SET_MASK_R4		0x93
+
+#define INT_CLEAR_MASK_R1	0xA0
+#define INT_CLEAR_MASK_R2	0xA1
+#define INT_CLEAR_MASK_R3	0xA2
+#define INT_CLEAR_MASK_R4	0xA3
+
+#define INT_SRC_R1		0xB0
+#define INT_SRC_R2		0xB1
+#define INT_SRC_R3		0xB2
+#define INT_SRC_R4		0xB3
+
+#define PMIC_MAX_REGISTER_ADDRESS INT_SRC_R4
+
+#define STPMIC1_PMIC_NUM_IRQ_REGS 4
+
+#define TURN_OFF_SR_ICC_EVENT	0x08
+
+#define LDO_VOLTAGE_MASK		GENMASK(6, 2)
+#define BUCK_VOLTAGE_MASK		GENMASK(7, 2)
+#define LDO_BUCK_VOLTAGE_SHIFT		2
+
+#define LDO_ENABLE_MASK			BIT(0)
+#define BUCK_ENABLE_MASK		BIT(0)
+
+#define BUCK_HPLP_ENABLE_MASK		BIT(1)
+#define BUCK_HPLP_SHIFT			1
+
+#define STDBY_ENABLE_MASK  BIT(0)
+
+#define BUCKS_PD_CR_REG_MASK	GENMASK(7, 0)
+#define BUCK_MASK_RANK_REGISTER_MASK	GENMASK(3, 0)
+#define BUCK_MASK_RESET_REGISTER_MASK	GENMASK(3, 0)
+#define LDO1234_PULL_DOWN_REGISTER_MASK	GENMASK(7, 0)
+#define LDO56_VREF_PD_CR_REG_MASK	GENMASK(5, 0)
+#define LDO_MASK_RANK_REGISTER_MASK	GENMASK(5, 0)
+#define LDO_MASK_RESET_REGISTER_MASK	GENMASK(5, 0)
+
+#define BUCK1_PULL_DOWN_REG		BUCKS_PD_CR
+#define BUCK1_PULL_DOWN_MASK		BIT(0)
+#define BUCK2_PULL_DOWN_REG		BUCKS_PD_CR
+#define BUCK2_PULL_DOWN_MASK		BIT(2)
+#define BUCK3_PULL_DOWN_REG		BUCKS_PD_CR
+#define BUCK3_PULL_DOWN_MASK		BIT(4)
+#define BUCK4_PULL_DOWN_REG		BUCKS_PD_CR
+#define BUCK4_PULL_DOWN_MASK		BIT(6)
+
+#define LDO1_PULL_DOWN_REG		LDO14_PD_CR
+#define LDO1_PULL_DOWN_MASK		BIT(0)
+#define LDO2_PULL_DOWN_REG		LDO14_PD_CR
+#define LDO2_PULL_DOWN_MASK		BIT(2)
+#define LDO3_PULL_DOWN_REG		LDO14_PD_CR
+#define LDO3_PULL_DOWN_MASK		BIT(4)
+#define LDO4_PULL_DOWN_REG		LDO14_PD_CR
+#define LDO4_PULL_DOWN_MASK		BIT(6)
+#define LDO5_PULL_DOWN_REG		LDO56_VREF_PD_CR
+#define LDO5_PULL_DOWN_MASK		BIT(0)
+#define LDO6_PULL_DOWN_REG		LDO56_VREF_PD_CR
+#define LDO6_PULL_DOWN_MASK		BIT(2)
+#define VREF_DDR_PULL_DOWN_REG		LDO56_VREF_PD_CR
+#define VREF_DDR_PULL_DOWN_MASK		BIT(4)
+
+#define BUCKS_ICCTO_CR_REG_MASK	GENMASK(6, 0)
+#define LDOS_ICCTO_CR_REG_MASK	GENMASK(5, 0)
+
+#define LDO_BYPASS_MASK			BIT(7)
+
+/* Main PMIC Control Register
+ * SWOFF_PWRCTRL_CR
+ * Address : 0x10
+ */
+#define ICC_EVENT_ENABLED		BIT(4)
+#define PWRCTRL_POLARITY_HIGH		BIT(3)
+#define PWRCTRL_PIN_VALID		BIT(2)
+#define RESTART_REQUEST_ENABLED		BIT(1)
+#define SOFTWARE_SWITCH_OFF_ENABLED	BIT(0)
+
+/* Main PMIC PADS Control Register
+ * PADS_PULL_CR
+ * Address : 0x11
+ */
+#define WAKEUP_DETECTOR_DISABLED	BIT(4)
+#define PWRCTRL_PD_ACTIVE		BIT(3)
+#define PWRCTRL_PU_ACTIVE		BIT(2)
+#define WAKEUP_PD_ACTIVE		BIT(1)
+#define PONKEY_PU_INACTIVE		BIT(0)
+
+/* Main PMIC VINLOW Control Register
+ * VBUS_DET_VIN_CRC DMSC
+ * Address : 0x15
+ */
+#define SWIN_DETECTOR_ENABLED		BIT(7)
+#define SWOUT_DETECTOR_ENABLED		BIT(6)
+#define VINLOW_ENABLED			BIT(0)
+#define VINLOW_CTRL_REG_MASK		GENMASK(7, 0)
+
+/* USB Control Register
+ * Address : 0x40
+ */
+#define BOOST_OVP_DISABLED		BIT(7)
+#define VBUS_OTG_DETECTION_DISABLED	BIT(6)
+#define SW_OUT_DISCHARGE		BIT(5)
+#define VBUS_OTG_DISCHARGE		BIT(4)
+#define OCP_LIMIT_HIGH			BIT(3)
+#define SWIN_SWOUT_ENABLED		BIT(2)
+#define USBSW_OTG_SWITCH_ENABLED	BIT(1)
+#define BOOST_ENABLED			BIT(0)
+
+/* PKEY_TURNOFF_CR
+ * Address : 0x16
+ */
+#define PONKEY_PWR_OFF			BIT(7)
+#define PONKEY_CC_FLAG_CLEAR		BIT(6)
+#define PONKEY_TURNOFF_TIMER_MASK	GENMASK(3, 0)
+#define PONKEY_TURNOFF_MASK		GENMASK(7, 0)
+
+/*
+ * struct stpmic1 - stpmic1 master device for sub-drivers
+ * @dev: master device of the chip (can be used to access platform data)
+ * @irq: main IRQ number
+ * @regmap_irq_chip_data: irq chip data
+ */
+struct stpmic1 {
+	struct device *dev;
+	struct regmap *regmap;
+	int irq;
+	struct regmap_irq_chip_data *irq_data;
+};
+
+#endif /*  __LINUX_MFD_STPMIC1_H */
-- 
cgit v1.2.3


From c1d5fb019f65e1093c77734356f97b6e7ac4d629 Mon Sep 17 00:00:00 2001
From: "Matwey V. Kornilov" <matwey@sai.msu.ru>
Date: Fri, 9 Nov 2018 14:03:26 -0500
Subject: media: usb: pwc: Introduce TRACE_EVENTs for pwc_isoc_handler()

There were reports that PWC-based webcams don't work at some
embedded ARM platforms. [1] Isochronous transfer handler seems to
work too long leading to the issues in MUSB USB host subsystem.
Also note, that urb->giveback() handlers are still called with
disabled interrupts. In order to be able to measure performance of
PWC driver, traces are introduced in URB handler section.

[1] https://www.spinics.net/lists/linux-usb/msg165735.html

Signed-off-by: Matwey V. Kornilov <matwey@sai.msu.ru>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/usb/pwc/pwc-if.c |  7 +++++
 include/trace/events/pwc.h     | 65 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 include/trace/events/pwc.h

(limited to 'include')

diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index 72704f4d5330..53c111bd5a22 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -76,6 +76,9 @@
 #include "pwc-dec23.h"
 #include "pwc-dec1.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/pwc.h>
+
 /* Function prototypes and driver templates */
 
 /* hotplug device table support */
@@ -260,6 +263,8 @@ static void pwc_isoc_handler(struct urb *urb)
 	int i, fst, flen;
 	unsigned char *iso_buf = NULL;
 
+	trace_pwc_handler_enter(urb, pdev);
+
 	if (urb->status == -ENOENT || urb->status == -ECONNRESET ||
 	    urb->status == -ESHUTDOWN) {
 		PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n",
@@ -348,6 +353,8 @@ static void pwc_isoc_handler(struct urb *urb)
 	}
 
 handler_end:
+	trace_pwc_handler_exit(urb, pdev);
+
 	i = usb_submit_urb(urb, GFP_ATOMIC);
 	if (i != 0)
 		PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i);
diff --git a/include/trace/events/pwc.h b/include/trace/events/pwc.h
new file mode 100644
index 000000000000..a2da764a3b41
--- /dev/null
+++ b/include/trace/events/pwc.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_PWC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PWC_H
+
+#include <linux/usb.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM pwc
+
+TRACE_EVENT(pwc_handler_enter,
+	TP_PROTO(struct urb *urb, struct pwc_device *pdev),
+	TP_ARGS(urb, pdev),
+	TP_STRUCT__entry(
+		__field(struct urb*, urb)
+		__field(struct pwc_frame_buf*, fbuf)
+		__field(int, urb__status)
+		__field(u32, urb__actual_length)
+		__field(int, fbuf__filled)
+		__string(name, pdev->v4l2_dev.name)
+	),
+	TP_fast_assign(
+		__entry->urb = urb;
+		__entry->fbuf = pdev->fill_buf;
+		__entry->urb__status = urb->status;
+		__entry->urb__actual_length = urb->actual_length;
+		__entry->fbuf__filled = (pdev->fill_buf
+					 ? pdev->fill_buf->filled : 0);
+		__assign_str(name, pdev->v4l2_dev.name);
+	),
+	TP_printk("dev=%s (fbuf=%p filled=%d) urb=%p (status=%d actual_length=%u)",
+		__get_str(name),
+		__entry->fbuf,
+		__entry->fbuf__filled,
+		__entry->urb,
+		__entry->urb__status,
+		__entry->urb__actual_length)
+);
+
+TRACE_EVENT(pwc_handler_exit,
+	TP_PROTO(struct urb *urb, struct pwc_device *pdev),
+	TP_ARGS(urb, pdev),
+	TP_STRUCT__entry(
+		__field(struct urb*, urb)
+		__field(struct pwc_frame_buf*, fbuf)
+		__field(int, fbuf__filled)
+		__string(name, pdev->v4l2_dev.name)
+	),
+	TP_fast_assign(
+		__entry->urb = urb;
+		__entry->fbuf = pdev->fill_buf;
+		__entry->fbuf__filled = pdev->fill_buf->filled;
+		__assign_str(name, pdev->v4l2_dev.name);
+	),
+	TP_printk(" dev=%s (fbuf=%p filled=%d) urb=%p",
+		__get_str(name),
+		__entry->fbuf,
+		__entry->fbuf__filled,
+		__entry->urb)
+);
+
+#endif /* _TRACE_PWC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 66e988e96b31d0c06471234cabb0a77316841fc3 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 13 Dec 2018 08:31:07 -0500
Subject: media: Change Andrzej Pietrasiewicz's e-mail address

My @samsung.com address is going to cease existing soon, so change it to
an address which can actually be used to contact me.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-dma-sg.c | 2 +-
 drivers/media/platform/s5p-jpeg/jpeg-core.c       | 4 ++--
 drivers/media/platform/s5p-jpeg/jpeg-core.h       | 2 +-
 drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.c     | 2 +-
 drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.h     | 2 +-
 drivers/media/platform/s5p-jpeg/jpeg-regs.h       | 2 +-
 include/media/videobuf2-dma-sg.h                  | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 015e737095cd..f02876d971d7 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2010 Samsung Electronics
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index 3f9000b70385..580d6d02557b 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  * Author: Jacek Anaszewski <j.anaszewski@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -3220,7 +3220,7 @@ static struct platform_driver s5p_jpeg_driver = {
 
 module_platform_driver(s5p_jpeg_driver);
 
-MODULE_AUTHOR("Andrzej Pietrasiewicz <andrzej.p@samsung.com>");
+MODULE_AUTHOR("Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>");
 MODULE_AUTHOR("Jacek Anaszewski <j.anaszewski@samsung.com>");
 MODULE_DESCRIPTION("Samsung JPEG codec driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.h b/drivers/media/platform/s5p-jpeg/jpeg-core.h
index a46465e10351..90fda4b720eb 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.h
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.c b/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.c
index b5f20e722b63..59c6263a71bf 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.h b/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.h
index f208fa3ed738..bfe746f8f750 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.h
+++ b/drivers/media/platform/s5p-jpeg/jpeg-hw-s5p.h
@@ -3,7 +3,7 @@
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-regs.h b/drivers/media/platform/s5p-jpeg/jpeg-regs.h
index df790b10140c..574f0e8021e5 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-regs.h
+++ b/drivers/media/platform/s5p-jpeg/jpeg-regs.h
@@ -5,7 +5,7 @@
  * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd.
  *		http://www.samsung.com
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  * Author: Jacek Anaszewski <j.anaszewski@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/media/videobuf2-dma-sg.h b/include/media/videobuf2-dma-sg.h
index 52afa0e2bb17..f28fcb0cfac7 100644
--- a/include/media/videobuf2-dma-sg.h
+++ b/include/media/videobuf2-dma-sg.h
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2010 Samsung Electronics
  *
- * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ * Author: Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
-- 
cgit v1.2.3


From 4b837c6d7ee771f68a30f362c9f68171a95be222 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 14 Jan 2019 09:01:54 -0500
Subject: media: v4l: uAPI: V4L2_BUF_TYPE_META_OUTPUT is an output buffer type

V4L2_BUF_TYPE_META_OUTPUT was added by commit 72148d1a57e7
("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT") but the patch
missed adding the type to the macro telling whether a given type is an
output type or not. Do that now. Getting this wrong leads to handling the
buffer as a capture buffer in a lot of places.

Fixes: 72148d1a57e7 ("media: v4l: Add support for V4L2_BUF_TYPE_META_OUTPUT")

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index d6eed479c3a6..c0c36c165bf4 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -161,7 +161,8 @@ enum v4l2_buf_type {
 	 || (type) == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY	\
 	 || (type) == V4L2_BUF_TYPE_VBI_OUTPUT			\
 	 || (type) == V4L2_BUF_TYPE_SLICED_VBI_OUTPUT		\
-	 || (type) == V4L2_BUF_TYPE_SDR_OUTPUT)
+	 || (type) == V4L2_BUF_TYPE_SDR_OUTPUT			\
+	 || (type) == V4L2_BUF_TYPE_META_OUTPUT)
 
 enum v4l2_tuner_type {
 	V4L2_TUNER_RADIO	     = 1,
-- 
cgit v1.2.3


From 50656bad786d001b294764e9f047c5d5b3e4db75 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 10 Jan 2019 11:56:09 -0500
Subject: media: v4l2-ctrl: Add control to enable h.264 constrained intra
 prediction

Allow to enable h.264 constrained intra prediction (macroblocks using
intra prediction modes are not allowed to use residual data and decoded
samples of neighboring macroblocks coded using inter prediction modes).
This control directly corresponds to the constrained_intra_pred_flag
field in the h.264 picture parameter set.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 4 ++++
 drivers/media/v4l2-core/v4l2-ctrls.c               | 2 ++
 include/uapi/linux/v4l2-controls.h                 | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index af4273aa5e85..235d0c293983 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1154,6 +1154,10 @@ enum v4l2_mpeg_video_h264_entropy_mode -
 ``V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM (boolean)``
     Enable 8X8 transform for H264. Applicable to the H264 encoder.
 
+``V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION (boolean)``
+    Enable constrained intra prediction for H264. Applicable to the H264
+    encoder.
+
 ``V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB (integer)``
     Cyclic intra macroblock refresh. This is the number of continuous
     macroblocks refreshed every frame. Each frame a successive set of
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 8c47d8f00429..50b56185c02e 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -825,6 +825,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER:return "H264 Number of HC Layers";
 	case V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP:
 								return "H264 Set QP Value for HC Layers";
+	case V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION:
+								return "H264 Constrained Intra Pred";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP:		return "MPEG4 P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP:		return "MPEG4 B-Frame QP Value";
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 3dcfc6148f99..fd65c710b144 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -533,6 +533,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type {
 };
 #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER	(V4L2_CID_MPEG_BASE+381)
 #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP	(V4L2_CID_MPEG_BASE+382)
+#define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION	(V4L2_CID_MPEG_BASE+383)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_MPEG_BASE+400)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_MPEG_BASE+401)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_MPEG_BASE+402)
-- 
cgit v1.2.3


From d034696cbe5a6e00f76ca4b7869c6cdef66aebd5 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 10 Jan 2019 11:56:10 -0500
Subject: media: v4l2-ctrl: Add control for h.264 chroma qp offset

Allow to add fixed quantization parameter offset between luma and
chroma quantization parameters. This control directly corresponds
to the chroma_qp_index_offset field of the h.264 picture parameter
set.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/extended-controls.rst | 5 +++++
 drivers/media/v4l2-core/v4l2-ctrls.c               | 1 +
 include/uapi/linux/v4l2-controls.h                 | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/extended-controls.rst b/Documentation/media/uapi/v4l/extended-controls.rst
index 235d0c293983..00934efdc9e4 100644
--- a/Documentation/media/uapi/v4l/extended-controls.rst
+++ b/Documentation/media/uapi/v4l/extended-controls.rst
@@ -1158,6 +1158,11 @@ enum v4l2_mpeg_video_h264_entropy_mode -
     Enable constrained intra prediction for H264. Applicable to the H264
     encoder.
 
+``V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET (integer)``
+    Specify the offset that should be added to the luma quantization
+    parameter to determine the chroma quantization parameter. Applicable
+    to the H264 encoder.
+
 ``V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB (integer)``
     Cyclic intra macroblock refresh. This is the number of continuous
     macroblocks refreshed every frame. Each frame a successive set of
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 50b56185c02e..99308dac2daa 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -827,6 +827,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 								return "H264 Set QP Value for HC Layers";
 	case V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION:
 								return "H264 Constrained Intra Pred";
+	case V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET:	return "H264 Chroma QP Index Offset";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP:		return "MPEG4 P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP:		return "MPEG4 B-Frame QP Value";
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index fd65c710b144..06479f2fb3ae 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -534,6 +534,7 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type {
 #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER	(V4L2_CID_MPEG_BASE+381)
 #define V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP	(V4L2_CID_MPEG_BASE+382)
 #define V4L2_CID_MPEG_VIDEO_H264_CONSTRAINED_INTRA_PREDICTION	(V4L2_CID_MPEG_BASE+383)
+#define V4L2_CID_MPEG_VIDEO_H264_CHROMA_QP_INDEX_OFFSET		(V4L2_CID_MPEG_BASE+384)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP	(V4L2_CID_MPEG_BASE+400)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP	(V4L2_CID_MPEG_BASE+401)
 #define V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP	(V4L2_CID_MPEG_BASE+402)
-- 
cgit v1.2.3


From 1c3721b1f22286033abeda30b7e12439b083ed0f Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Wed, 9 Jan 2019 13:30:04 -0500
Subject: media: videodev2.h: Add more field helper macros

Adds two helper macros:

V4L2_FIELD_IS_SEQUENTIAL: returns true if the given field type is
'sequential', that is a full frame is transmitted, or exists in
memory, as all top field lines followed by all bottom field lines,
or vice-versa.

V4L2_FIELD_IS_INTERLACED: returns true if the given field type is
'interlaced', that is a full frame is transmitted, or exists in
memory, as top field lines interlaced with bottom field lines.

Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/videodev2.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c0c36c165bf4..9a920f071ff9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -130,6 +130,13 @@ enum v4l2_field {
 	((field) == V4L2_FIELD_BOTTOM ||\
 	 (field) == V4L2_FIELD_TOP ||\
 	 (field) == V4L2_FIELD_ALTERNATE)
+#define V4L2_FIELD_IS_INTERLACED(field) \
+	((field) == V4L2_FIELD_INTERLACED ||\
+	 (field) == V4L2_FIELD_INTERLACED_TB ||\
+	 (field) == V4L2_FIELD_INTERLACED_BT)
+#define V4L2_FIELD_IS_SEQUENTIAL(field) \
+	((field) == V4L2_FIELD_SEQ_TB ||\
+	 (field) == V4L2_FIELD_SEQ_BT)
 
 enum v4l2_buf_type {
 	V4L2_BUF_TYPE_VIDEO_CAPTURE        = 1,
-- 
cgit v1.2.3


From fc8c723852380b0f4349bb06cf553a6bba47462e Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Wed, 9 Jan 2019 13:30:05 -0500
Subject: media: gpu: ipu-csi: Swap fields according to input/output field
 types

The function ipu_csi_init_interface() was inverting the F-bit for
NTSC case, in the CCIR_CODE_1/2 registers. The result being that
for NTSC bottom-top field order, the CSI would swap fields and
capture in top-bottom order.

Instead, base field swap on the field order of the input to the CSI,
and the field order of the requested output. If the input/output
fields are sequential but different, swap fields, otherwise do
not swap. This requires passing both the input and output mbus
frame formats to ipu_csi_init_interface().

Move this code to a new private function ipu_csi_set_bt_interlaced_codes()
that programs the CCIR_CODE_1/2 registers for interlaced BT.656 (and
possibly interlaced BT.1120 in the future).

When detecting input video standard from the input frame width/height,
make sure to double height if input field type is alternate, since
in that case input height only includes lines for one field.

Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/gpu/ipu-v3/ipu-csi.c              | 126 ++++++++++++++++++++----------
 drivers/staging/media/imx/imx-media-csi.c |   7 +-
 include/video/imx-ipu-v3.h                |   5 +-
 3 files changed, 89 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index aa0e30a2ba18..d1e575571a8d 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -325,12 +325,21 @@ static int mbus_code_to_bus_cfg(struct ipu_csi_bus_config *cfg, u32 mbus_code,
 	return 0;
 }
 
+/* translate alternate field mode based on given standard */
+static inline enum v4l2_field
+ipu_csi_translate_field(enum v4l2_field field, v4l2_std_id std)
+{
+	return (field != V4L2_FIELD_ALTERNATE) ? field :
+		((std & V4L2_STD_525_60) ?
+		 V4L2_FIELD_SEQ_BT : V4L2_FIELD_SEQ_TB);
+}
+
 /*
  * Fill a CSI bus config struct from mbus_config and mbus_framefmt.
  */
 static int fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg,
-				 struct v4l2_mbus_config *mbus_cfg,
-				 struct v4l2_mbus_framefmt *mbus_fmt)
+			    const struct v4l2_mbus_config *mbus_cfg,
+			    const struct v4l2_mbus_framefmt *mbus_fmt)
 {
 	int ret;
 
@@ -374,22 +383,76 @@ static int fill_csi_bus_cfg(struct ipu_csi_bus_config *csicfg,
 	return 0;
 }
 
+static int
+ipu_csi_set_bt_interlaced_codes(struct ipu_csi *csi,
+				const struct v4l2_mbus_framefmt *infmt,
+				const struct v4l2_mbus_framefmt *outfmt,
+				v4l2_std_id std)
+{
+	enum v4l2_field infield, outfield;
+	bool swap_fields;
+
+	/* get translated field type of input and output */
+	infield = ipu_csi_translate_field(infmt->field, std);
+	outfield = ipu_csi_translate_field(outfmt->field, std);
+
+	/*
+	 * Write the H-V-F codes the CSI will match against the
+	 * incoming data for start/end of active and blanking
+	 * field intervals. If input and output field types are
+	 * sequential but not the same (one is SEQ_BT and the other
+	 * is SEQ_TB), swap the F-bit so that the CSI will capture
+	 * field 1 lines before field 0 lines.
+	 */
+	swap_fields = (V4L2_FIELD_IS_SEQUENTIAL(infield) &&
+		       V4L2_FIELD_IS_SEQUENTIAL(outfield) &&
+		       infield != outfield);
+
+	if (!swap_fields) {
+		/*
+		 * Field0BlankEnd  = 110, Field0BlankStart  = 010
+		 * Field0ActiveEnd = 100, Field0ActiveStart = 000
+		 * Field1BlankEnd  = 111, Field1BlankStart  = 011
+		 * Field1ActiveEnd = 101, Field1ActiveStart = 001
+		 */
+		ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN,
+			      CSI_CCIR_CODE_1);
+		ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2);
+	} else {
+		dev_dbg(csi->ipu->dev, "capture field swap\n");
+
+		/* same as above but with F-bit inverted */
+		ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN,
+			      CSI_CCIR_CODE_1);
+		ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2);
+	}
+
+	ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3);
+
+	return 0;
+}
+
+
 int ipu_csi_init_interface(struct ipu_csi *csi,
-			   struct v4l2_mbus_config *mbus_cfg,
-			   struct v4l2_mbus_framefmt *mbus_fmt)
+			   const struct v4l2_mbus_config *mbus_cfg,
+			   const struct v4l2_mbus_framefmt *infmt,
+			   const struct v4l2_mbus_framefmt *outfmt)
 {
 	struct ipu_csi_bus_config cfg;
 	unsigned long flags;
 	u32 width, height, data = 0;
+	v4l2_std_id std;
 	int ret;
 
-	ret = fill_csi_bus_cfg(&cfg, mbus_cfg, mbus_fmt);
+	ret = fill_csi_bus_cfg(&cfg, mbus_cfg, infmt);
 	if (ret < 0)
 		return ret;
 
 	/* set default sensor frame width and height */
-	width = mbus_fmt->width;
-	height = mbus_fmt->height;
+	width = infmt->width;
+	height = infmt->height;
+	if (infmt->field == V4L2_FIELD_ALTERNATE)
+		height *= 2;
 
 	/* Set the CSI_SENS_CONF register remaining fields */
 	data |= cfg.data_width << CSI_SENS_CONF_DATA_WIDTH_SHIFT |
@@ -416,42 +479,22 @@ int ipu_csi_init_interface(struct ipu_csi *csi,
 		ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3);
 		break;
 	case IPU_CSI_CLK_MODE_CCIR656_INTERLACED:
-		if (mbus_fmt->width == 720 && mbus_fmt->height == 576) {
-			/*
-			 * PAL case
-			 *
-			 * Field0BlankEnd = 0x6, Field0BlankStart = 0x2,
-			 * Field0ActiveEnd = 0x4, Field0ActiveStart = 0
-			 * Field1BlankEnd = 0x7, Field1BlankStart = 0x3,
-			 * Field1ActiveEnd = 0x5, Field1ActiveStart = 0x1
-			 */
-			height = 625; /* framelines for PAL */
-
-			ipu_csi_write(csi, 0x40596 | CSI_CCIR_ERR_DET_EN,
-					  CSI_CCIR_CODE_1);
-			ipu_csi_write(csi, 0xD07DF, CSI_CCIR_CODE_2);
-			ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3);
-		} else if (mbus_fmt->width == 720 && mbus_fmt->height == 480) {
-			/*
-			 * NTSC case
-			 *
-			 * Field0BlankEnd = 0x7, Field0BlankStart = 0x3,
-			 * Field0ActiveEnd = 0x5, Field0ActiveStart = 0x1
-			 * Field1BlankEnd = 0x6, Field1BlankStart = 0x2,
-			 * Field1ActiveEnd = 0x4, Field1ActiveStart = 0
-			 */
-			height = 525; /* framelines for NTSC */
-
-			ipu_csi_write(csi, 0xD07DF | CSI_CCIR_ERR_DET_EN,
-					  CSI_CCIR_CODE_1);
-			ipu_csi_write(csi, 0x40596, CSI_CCIR_CODE_2);
-			ipu_csi_write(csi, 0xFF0000, CSI_CCIR_CODE_3);
+		if (width == 720 && height == 480) {
+			std = V4L2_STD_NTSC;
+			height = 525;
+		} else if (width == 720 && height == 576) {
+			std = V4L2_STD_PAL;
+			height = 625;
 		} else {
 			dev_err(csi->ipu->dev,
-				"Unsupported CCIR656 interlaced video mode\n");
-			spin_unlock_irqrestore(&csi->lock, flags);
-			return -EINVAL;
+				"Unsupported interlaced video mode\n");
+			ret = -EINVAL;
+			goto out_unlock;
 		}
+
+		ret = ipu_csi_set_bt_interlaced_codes(csi, infmt, outfmt, std);
+		if (ret)
+			goto out_unlock;
 		break;
 	case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_DDR:
 	case IPU_CSI_CLK_MODE_CCIR1120_PROGRESSIVE_SDR:
@@ -476,9 +519,10 @@ int ipu_csi_init_interface(struct ipu_csi *csi,
 	dev_dbg(csi->ipu->dev, "CSI_ACT_FRM_SIZE = 0x%08X\n",
 		ipu_csi_read(csi, CSI_ACT_FRM_SIZE));
 
+out_unlock:
 	spin_unlock_irqrestore(&csi->lock, flags);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(ipu_csi_init_interface);
 
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index 4223f8d418ae..c2a8d9cd31b7 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -679,12 +679,7 @@ static int csi_setup(struct csi_priv *priv)
 		priv->upstream_ep.bus.parallel.flags :
 		priv->upstream_ep.bus.mipi_csi2.flags;
 
-	/*
-	 * we need to pass input frame to CSI interface, but
-	 * with translated field type from output format
-	 */
 	if_fmt = *infmt;
-	if_fmt.field = outfmt->field;
 	crop = priv->crop;
 
 	/*
@@ -702,7 +697,7 @@ static int csi_setup(struct csi_priv *priv)
 			     priv->crop.width == 2 * priv->compose.width,
 			     priv->crop.height == 2 * priv->compose.height);
 
-	ipu_csi_init_interface(priv->csi, &mbus_cfg, &if_fmt);
+	ipu_csi_init_interface(priv->csi, &mbus_cfg, &if_fmt, outfmt);
 
 	ipu_csi_set_dest(priv->csi, priv->dest);
 
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index e582e8e7527a..bbc8481f567d 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -354,8 +354,9 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
  */
 struct ipu_csi;
 int ipu_csi_init_interface(struct ipu_csi *csi,
-			   struct v4l2_mbus_config *mbus_cfg,
-			   struct v4l2_mbus_framefmt *mbus_fmt);
+			   const struct v4l2_mbus_config *mbus_cfg,
+			   const struct v4l2_mbus_framefmt *infmt,
+			   const struct v4l2_mbus_framefmt *outfmt);
 bool ipu_csi_is_interlaced(struct ipu_csi *csi);
 void ipu_csi_get_window(struct ipu_csi *csi, struct v4l2_rect *w);
 void ipu_csi_set_window(struct ipu_csi *csi, struct v4l2_rect *w);
-- 
cgit v1.2.3


From 9b5c8d5ffb20cc4f4b1476cb1ad57e0f9b934788 Mon Sep 17 00:00:00 2001
From: Steve Longerbeam <slongerbeam@gmail.com>
Date: Wed, 9 Jan 2019 13:30:06 -0500
Subject: media: gpu: ipu-v3: Add planar support to interlaced scan

To support interlaced scan with planar formats, cpmem SLUV must
be programmed with the correct chroma line stride. For full and
partial planar 4:2:2 (YUV422P, NV16), chroma line stride must
be doubled. For full and partial planar 4:2:0 (YUV420, YVU420, NV12),
chroma line stride must _not_ be doubled, since a single chroma line
is shared by two luma lines.

Signed-off-by: Steve Longerbeam <slongerbeam@gmail.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/gpu/ipu-v3/ipu-cpmem.c              | 26 ++++++++++++++++++++++++--
 drivers/staging/media/imx/imx-ic-prpencvf.c |  3 ++-
 drivers/staging/media/imx/imx-media-csi.c   |  3 ++-
 include/video/imx-ipu-v3.h                  |  3 ++-
 4 files changed, 30 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-cpmem.c b/drivers/gpu/ipu-v3/ipu-cpmem.c
index 163fadb8a33a..d047a6867c59 100644
--- a/drivers/gpu/ipu-v3/ipu-cpmem.c
+++ b/drivers/gpu/ipu-v3/ipu-cpmem.c
@@ -277,9 +277,10 @@ void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off)
 }
 EXPORT_SYMBOL_GPL(ipu_cpmem_set_uv_offset);
 
-void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride)
+void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride,
+			       u32 pixelformat)
 {
-	u32 ilo, sly;
+	u32 ilo, sly, sluv;
 
 	if (stride < 0) {
 		stride = -stride;
@@ -290,9 +291,30 @@ void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride)
 
 	sly = (stride * 2) - 1;
 
+	switch (pixelformat) {
+	case V4L2_PIX_FMT_YUV420:
+	case V4L2_PIX_FMT_YVU420:
+		sluv = stride / 2 - 1;
+		break;
+	case V4L2_PIX_FMT_NV12:
+		sluv = stride - 1;
+		break;
+	case V4L2_PIX_FMT_YUV422P:
+		sluv = stride - 1;
+		break;
+	case V4L2_PIX_FMT_NV16:
+		sluv = stride * 2 - 1;
+		break;
+	default:
+		sluv = 0;
+		break;
+	}
+
 	ipu_ch_param_write_field(ch, IPU_FIELD_SO, 1);
 	ipu_ch_param_write_field(ch, IPU_FIELD_ILO, ilo);
 	ipu_ch_param_write_field(ch, IPU_FIELD_SLY, sly);
+	if (sluv)
+		ipu_ch_param_write_field(ch, IPU_FIELD_SLUV, sluv);
 };
 EXPORT_SYMBOL_GPL(ipu_cpmem_interlaced_scan);
 
diff --git a/drivers/staging/media/imx/imx-ic-prpencvf.c b/drivers/staging/media/imx/imx-ic-prpencvf.c
index 28f41caba05d..af7224846bd5 100644
--- a/drivers/staging/media/imx/imx-ic-prpencvf.c
+++ b/drivers/staging/media/imx/imx-ic-prpencvf.c
@@ -412,7 +412,8 @@ static int prp_setup_channel(struct prp_priv *priv,
 	if (image.pix.field == V4L2_FIELD_NONE &&
 	    V4L2_FIELD_HAS_BOTH(infmt->field) &&
 	    channel == priv->out_ch)
-		ipu_cpmem_interlaced_scan(channel, image.pix.bytesperline);
+		ipu_cpmem_interlaced_scan(channel, image.pix.bytesperline,
+					  image.pix.pixelformat);
 
 	ret = ipu_ic_task_idma_init(priv->ic, channel,
 				    image.pix.width, image.pix.height,
diff --git a/drivers/staging/media/imx/imx-media-csi.c b/drivers/staging/media/imx/imx-media-csi.c
index c2a8d9cd31b7..da4808348845 100644
--- a/drivers/staging/media/imx/imx-media-csi.c
+++ b/drivers/staging/media/imx/imx-media-csi.c
@@ -512,7 +512,8 @@ static int csi_idmac_setup_channel(struct csi_priv *priv)
 	if (image.pix.field == V4L2_FIELD_NONE &&
 	    V4L2_FIELD_HAS_BOTH(infmt->field))
 		ipu_cpmem_interlaced_scan(priv->idmac_ch,
-					  image.pix.bytesperline);
+					  image.pix.bytesperline,
+					  image.pix.pixelformat);
 
 	ipu_idmac_set_double_buffer(priv->idmac_ch, true);
 
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index bbc8481f567d..c887f4bee5f8 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -258,7 +258,8 @@ void ipu_cpmem_set_stride(struct ipuv3_channel *ch, int stride);
 void ipu_cpmem_set_high_priority(struct ipuv3_channel *ch);
 void ipu_cpmem_set_buffer(struct ipuv3_channel *ch, int bufnum, dma_addr_t buf);
 void ipu_cpmem_set_uv_offset(struct ipuv3_channel *ch, u32 u_off, u32 v_off);
-void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride);
+void ipu_cpmem_interlaced_scan(struct ipuv3_channel *ch, int stride,
+			       u32 pixelformat);
 void ipu_cpmem_set_axi_id(struct ipuv3_channel *ch, u32 id);
 int ipu_cpmem_get_burstsize(struct ipuv3_channel *ch);
 void ipu_cpmem_set_burstsize(struct ipuv3_channel *ch, int burstsize);
-- 
cgit v1.2.3


From 8e1f456129e61371fb190c71ea182a9f6e21282e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Wed, 9 Jan 2019 15:44:46 +0100
Subject: leds: Add helper for getting default pattern from Device Tree

Multiple LED triggers might need to access default pattern so add a
helper for that.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/led-core.c | 30 ++++++++++++++++++++++++++++++
 include/linux/leds.h    | 13 +++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index ede4fa0ac2cc..e3da7c03da1b 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -16,7 +16,9 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/of.h>
 #include <linux/rwsem.h>
+#include <linux/slab.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
@@ -310,6 +312,34 @@ int led_update_brightness(struct led_classdev *led_cdev)
 }
 EXPORT_SYMBOL_GPL(led_update_brightness);
 
+u32 *led_get_default_pattern(struct led_classdev *led_cdev, unsigned int *size)
+{
+	struct device_node *np = dev_of_node(led_cdev->dev);
+	u32 *pattern;
+	int count;
+
+	if (!np)
+		return NULL;
+
+	count = of_property_count_u32_elems(np, "led-pattern");
+	if (count < 0)
+		return NULL;
+
+	pattern = kcalloc(count, sizeof(*pattern), GFP_KERNEL);
+	if (!pattern)
+		return NULL;
+
+	if (of_property_read_u32_array(np, "led-pattern", pattern, count)) {
+		kfree(pattern);
+		return NULL;
+	}
+
+	*size = count;
+
+	return pattern;
+}
+EXPORT_SYMBOL_GPL(led_get_default_pattern);
+
 /* Caller must ensure led_cdev->led_access held */
 void led_sysfs_disable(struct led_classdev *led_cdev)
 {
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 5263f87e1d2c..78204650fe2a 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -218,6 +218,19 @@ extern int led_set_brightness_sync(struct led_classdev *led_cdev,
  */
 extern int led_update_brightness(struct led_classdev *led_cdev);
 
+/**
+ * led_get_default_pattern - return default pattern
+ *
+ * @led_cdev: the LED to get default pattern for
+ * @size:     pointer for storing the number of elements in returned array,
+ *            modified only if return != NULL
+ *
+ * Return:    Allocated array of integers with default pattern from device tree
+ *            or NULL.  Caller is responsible for kfree().
+ */
+extern u32 *led_get_default_pattern(struct led_classdev *led_cdev,
+				    unsigned int *size);
+
 /**
  * led_sysfs_disable - disable LED sysfs interface
  * @led_cdev: the LED to set
-- 
cgit v1.2.3


From da7b9e9b00d4859d0fa563d7fe9a2b2dff59e852 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 15 Jan 2019 14:43:04 -0800
Subject: net: dsa: Add ndo_get_phys_port_name() for CPU port

There is not currently way to infer the port number through sysfs that
is being used as the CPU port number. Overlay a ndo_get_phys_port_name()
operation onto the DSA master network device in order to retrieve that
information.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  5 +++++
 net/dsa/master.c  | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index b3eefe8e18fd..2f1daf29131a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -208,6 +208,11 @@ struct dsa_port {
 	 * Original copy of the master netdev ethtool_ops
 	 */
 	const struct ethtool_ops *orig_ethtool_ops;
+
+	/*
+	 * Original copy of the master netdev net_device_ops
+	 */
+	const struct net_device_ops *orig_ndo_ops;
 };
 
 struct dsa_switch {
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 71bb15f491c8..79e97d2f2d9b 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -126,6 +126,17 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
 	}
 }
 
+static int dsa_master_get_phys_port_name(struct net_device *dev,
+					 char *name, size_t len)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	if (snprintf(name, len, "p%d", cpu_dp->index) >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int dsa_master_ethtool_setup(struct net_device *dev)
 {
 	struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -158,6 +169,38 @@ static void dsa_master_ethtool_teardown(struct net_device *dev)
 	cpu_dp->orig_ethtool_ops = NULL;
 }
 
+static int dsa_master_ndo_setup(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+	struct dsa_switch *ds = cpu_dp->ds;
+	struct net_device_ops *ops;
+
+	if (dev->netdev_ops->ndo_get_phys_port_name)
+		return 0;
+
+	ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	cpu_dp->orig_ndo_ops = dev->netdev_ops;
+	if (cpu_dp->orig_ndo_ops)
+		memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
+
+	ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
+
+	dev->netdev_ops  = ops;
+
+	return 0;
+}
+
+static void dsa_master_ndo_teardown(struct net_device *dev)
+{
+	struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+	dev->netdev_ops = cpu_dp->orig_ndo_ops;
+	cpu_dp->orig_ndo_ops = NULL;
+}
+
 static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
 			    char *buf)
 {
@@ -223,16 +266,27 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
 	if (ret)
 		return ret;
 
+	ret = dsa_master_ndo_setup(dev);
+	if (ret)
+		goto out_err_ethtool_teardown;
+
 	ret = sysfs_create_group(&dev->dev.kobj, &dsa_group);
 	if (ret)
-		dsa_master_ethtool_teardown(dev);
+		goto out_err_ndo_teardown;
+
+	return ret;
 
+out_err_ndo_teardown:
+	dsa_master_ndo_teardown(dev);
+out_err_ethtool_teardown:
+	dsa_master_ethtool_teardown(dev);
 	return ret;
 }
 
 void dsa_master_teardown(struct net_device *dev)
 {
 	sysfs_remove_group(&dev->dev.kobj, &dsa_group);
+	dsa_master_ndo_teardown(dev);
 	dsa_master_ethtool_teardown(dev);
 	dsa_master_reset_mtu(dev);
 
-- 
cgit v1.2.3


From fcd44b64b1eb0a33f6cc14f21dcb927ffd664af3 Mon Sep 17 00:00:00 2001
From: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Date: Tue, 15 Jan 2019 10:05:10 +0000
Subject: mtd: spi-nor: add opcodes for octal Read/Write commands

- Add opcodes for octal I/O commands
  * Read  : 1-1-8 and 1-8-8 protocol
  * Write : 1-1-8 and 1-8-8 protocol
  * opcodes for 4-byte address mode command

- Entry of macros in _convert_3to4_xxx function

- Add flag SPI_NOR_OCTAL_READ specifying flash support octal read
  commands. This flag is required for flashes which didn't provides
  support for auto detection of Octal mode capabilities i.e. not
  seems to support newer JESD216C standard.

Signed-off-by: Vignesh R <vigneshr@ti.com>
Signed-off-by: Yogesh Narayan Gaur <yogeshnarayan.gaur@nxp.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <bbrezillon@kernel.org>
---
 drivers/mtd/spi-nor/spi-nor.c | 16 ++++++++++++++--
 include/linux/mtd/spi-nor.h   | 16 ++++++++++++----
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 6e13bbd1aaa5..872d70722672 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -68,7 +68,7 @@ enum spi_nor_read_command_index {
 	SNOR_CMD_READ_4_4_4,
 	SNOR_CMD_READ_1_4_4_DTR,
 
-	/* Octo SPI */
+	/* Octal SPI */
 	SNOR_CMD_READ_1_1_8,
 	SNOR_CMD_READ_1_8_8,
 	SNOR_CMD_READ_8_8_8,
@@ -85,7 +85,7 @@ enum spi_nor_pp_command_index {
 	SNOR_CMD_PP_1_4_4,
 	SNOR_CMD_PP_4_4_4,
 
-	/* Octo SPI */
+	/* Octal SPI */
 	SNOR_CMD_PP_1_1_8,
 	SNOR_CMD_PP_1_8_8,
 	SNOR_CMD_PP_8_8_8,
@@ -278,6 +278,7 @@ struct flash_info {
 #define NO_CHIP_ERASE		BIT(12) /* Chip does not support chip erase */
 #define SPI_NOR_SKIP_SFDP	BIT(13)	/* Skip parsing of SFDP tables */
 #define USE_CLSR		BIT(14)	/* use CLSR command */
+#define SPI_NOR_OCTAL_READ	BIT(15)	/* Flash supports Octal Read */
 
 	/* Part specific fixup hooks. */
 	const struct spi_nor_fixups *fixups;
@@ -398,6 +399,8 @@ static u8 spi_nor_convert_3to4_read(u8 opcode)
 		{ SPINOR_OP_READ_1_2_2,	SPINOR_OP_READ_1_2_2_4B },
 		{ SPINOR_OP_READ_1_1_4,	SPINOR_OP_READ_1_1_4_4B },
 		{ SPINOR_OP_READ_1_4_4,	SPINOR_OP_READ_1_4_4_4B },
+		{ SPINOR_OP_READ_1_1_8,	SPINOR_OP_READ_1_1_8_4B },
+		{ SPINOR_OP_READ_1_8_8,	SPINOR_OP_READ_1_8_8_4B },
 
 		{ SPINOR_OP_READ_1_1_1_DTR,	SPINOR_OP_READ_1_1_1_DTR_4B },
 		{ SPINOR_OP_READ_1_2_2_DTR,	SPINOR_OP_READ_1_2_2_DTR_4B },
@@ -414,6 +417,8 @@ static u8 spi_nor_convert_3to4_program(u8 opcode)
 		{ SPINOR_OP_PP,		SPINOR_OP_PP_4B },
 		{ SPINOR_OP_PP_1_1_4,	SPINOR_OP_PP_1_1_4_4B },
 		{ SPINOR_OP_PP_1_4_4,	SPINOR_OP_PP_1_4_4_4B },
+		{ SPINOR_OP_PP_1_1_8,	SPINOR_OP_PP_1_1_8_4B },
+		{ SPINOR_OP_PP_1_8_8,	SPINOR_OP_PP_1_8_8_4B },
 	};
 
 	return spi_nor_convert_opcode(opcode, spi_nor_3to4_program,
@@ -3591,6 +3596,13 @@ static int spi_nor_init_params(struct spi_nor *nor,
 					  SNOR_PROTO_1_1_4);
 	}
 
+	if (info->flags & SPI_NOR_OCTAL_READ) {
+		params->hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
+		spi_nor_set_read_settings(&params->reads[SNOR_CMD_READ_1_1_8],
+					  0, 8, SPINOR_OP_READ_1_1_8,
+					  SNOR_PROTO_1_1_8);
+	}
+
 	/* Page Program settings. */
 	params->hwcaps.mask |= SNOR_HWCAPS_PP;
 	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index fa2d89e38e40..2353af8bac99 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -46,9 +46,13 @@
 #define SPINOR_OP_READ_1_2_2	0xbb	/* Read data bytes (Dual I/O SPI) */
 #define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad Output SPI) */
 #define SPINOR_OP_READ_1_4_4	0xeb	/* Read data bytes (Quad I/O SPI) */
+#define SPINOR_OP_READ_1_1_8	0x8b	/* Read data bytes (Octal Output SPI) */
+#define SPINOR_OP_READ_1_8_8	0xcb	/* Read data bytes (Octal I/O SPI) */
 #define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_PP_1_1_4	0x32	/* Quad page program */
 #define SPINOR_OP_PP_1_4_4	0x38	/* Quad page program */
+#define SPINOR_OP_PP_1_1_8	0x82	/* Octal page program */
+#define SPINOR_OP_PP_1_8_8	0xc2	/* Octal page program */
 #define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
 #define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
 #define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
@@ -69,9 +73,13 @@
 #define SPINOR_OP_READ_1_2_2_4B	0xbc	/* Read data bytes (Dual I/O SPI) */
 #define SPINOR_OP_READ_1_1_4_4B	0x6c	/* Read data bytes (Quad Output SPI) */
 #define SPINOR_OP_READ_1_4_4_4B	0xec	/* Read data bytes (Quad I/O SPI) */
+#define SPINOR_OP_READ_1_1_8_4B	0x7c	/* Read data bytes (Octal Output SPI) */
+#define SPINOR_OP_READ_1_8_8_4B	0xcc	/* Read data bytes (Octal I/O SPI) */
 #define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_PP_1_1_4_4B	0x34	/* Quad page program */
 #define SPINOR_OP_PP_1_4_4_4B	0x3e	/* Quad page program */
+#define SPINOR_OP_PP_1_1_8_4B	0x84	/* Octal page program */
+#define SPINOR_OP_PP_1_8_8_4B	0x8e	/* Octal page program */
 #define SPINOR_OP_BE_4K_4B	0x21	/* Erase 4KiB block */
 #define SPINOR_OP_BE_32K_4B	0x5c	/* Erase 32KiB block */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
@@ -458,7 +466,7 @@ struct spi_nor_hwcaps {
 /*
  *(Fast) Read capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Octo SPI protocols first,
+ * As a matter of performances, it is relevant to use Octal SPI protocols first,
  * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
  * (Slow) Read.
  */
@@ -479,7 +487,7 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_READ_4_4_4		BIT(9)
 #define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
-#define SNOR_HWCPAS_READ_OCTO		GENMASK(14, 11)
+#define SNOR_HWCPAS_READ_OCTAL		GENMASK(14, 11)
 #define SNOR_HWCAPS_READ_1_1_8		BIT(11)
 #define SNOR_HWCAPS_READ_1_8_8		BIT(12)
 #define SNOR_HWCAPS_READ_8_8_8		BIT(13)
@@ -488,7 +496,7 @@ struct spi_nor_hwcaps {
 /*
  * Page Program capabilities.
  * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Octo/Quad SPI protocols are preferred to the
+ * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the
  * legacy SPI 1-1-1 protocol.
  * Note that Dual Page Programs are not supported because there is no existing
  * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
@@ -502,7 +510,7 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_PP_1_4_4	BIT(18)
 #define SNOR_HWCAPS_PP_4_4_4	BIT(19)
 
-#define SNOR_HWCAPS_PP_OCTO	GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_OCTAL	GENMASK(22, 20)
 #define SNOR_HWCAPS_PP_1_1_8	BIT(20)
 #define SNOR_HWCAPS_PP_1_8_8	BIT(21)
 #define SNOR_HWCAPS_PP_8_8_8	BIT(22)
-- 
cgit v1.2.3


From b172fd0c898022c47161a99cb40be5304b0d3fd0 Mon Sep 17 00:00:00 2001
From: Alban Bedel <albeu@free.fr>
Date: Wed, 16 Jan 2019 19:55:46 +0100
Subject: spi: ath79: Enable support for compile test

To allow building this driver in compile test we need to remove all
dependency on headers from arch/mips/include. To allow this we
explicitly define all the registers locally instead of using
ar71xx_regs.h and we move the platform data struct definition to
include/linux/platform_data/spi-ath79.h.

Signed-off-by: Alban Bedel <albeu@free.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/mips/ath79/dev-spi.h                             |  2 +-
 arch/mips/include/asm/mach-ath79/ath79_spi_platform.h | 19 -------------------
 drivers/spi/Kconfig                                   |  2 +-
 drivers/spi/spi-ath79.c                               | 15 ++++++++++++---
 include/linux/platform_data/spi-ath79.h               | 19 +++++++++++++++++++
 5 files changed, 33 insertions(+), 24 deletions(-)
 delete mode 100644 arch/mips/include/asm/mach-ath79/ath79_spi_platform.h
 create mode 100644 include/linux/platform_data/spi-ath79.h

(limited to 'include')

diff --git a/arch/mips/ath79/dev-spi.h b/arch/mips/ath79/dev-spi.h
index d732565ca736..6e15bc8651be 100644
--- a/arch/mips/ath79/dev-spi.h
+++ b/arch/mips/ath79/dev-spi.h
@@ -13,7 +13,7 @@
 #define _ATH79_DEV_SPI_H
 
 #include <linux/spi/spi.h>
-#include <asm/mach-ath79/ath79_spi_platform.h>
+#include <linux/platform_data/spi-ath79.h>
 
 void ath79_register_spi(struct ath79_spi_platform_data *pdata,
 			 struct spi_board_info const *info,
diff --git a/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h b/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h
deleted file mode 100644
index aa71216edf99..000000000000
--- a/arch/mips/include/asm/mach-ath79/ath79_spi_platform.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- *  Platform data definition for Atheros AR71XX/AR724X/AR913X SPI controller
- *
- *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
- *
- *  This program is free software; you can redistribute it and/or modify it
- *  under the terms of the GNU General Public License version 2 as published
- *  by the Free Software Foundation.
- */
-
-#ifndef _ATH79_SPI_PLATFORM_H
-#define _ATH79_SPI_PLATFORM_H
-
-struct ath79_spi_platform_data {
-	unsigned	bus_num;
-	unsigned	num_chipselect;
-};
-
-#endif /* _ATH79_SPI_PLATFORM_H */
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 128892c7e21e..71d3d2d5e5d1 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -63,7 +63,7 @@ config SPI_ALTERA
 
 config SPI_ATH79
 	tristate "Atheros AR71XX/AR724X/AR913X SPI controller driver"
-	depends on ATH79
+	depends on ATH79 || COMPILE_TEST
 	select SPI_BITBANG
 	help
 	  This enables support for the SPI controller present on the
diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index edf695a359f4..09c4fb7fcf7a 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -23,15 +23,24 @@
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/err.h>
-
-#include <asm/mach-ath79/ar71xx_regs.h>
-#include <asm/mach-ath79/ath79_spi_platform.h>
+#include <linux/platform_data/spi-ath79.h>
 
 #define DRV_NAME	"ath79-spi"
 
 #define ATH79_SPI_RRW_DELAY_FACTOR	12000
 #define MHZ				(1000 * 1000)
 
+#define AR71XX_SPI_REG_FS		0x00	/* Function Select */
+#define AR71XX_SPI_REG_CTRL		0x04	/* SPI Control */
+#define AR71XX_SPI_REG_IOC		0x08	/* SPI I/O Control */
+#define AR71XX_SPI_REG_RDS		0x0c	/* Read Data Shift */
+
+#define AR71XX_SPI_FS_GPIO		BIT(0)	/* Enable GPIO mode */
+
+#define AR71XX_SPI_IOC_DO		BIT(0)	/* Data Out pin */
+#define AR71XX_SPI_IOC_CLK		BIT(8)	/* CLK pin */
+#define AR71XX_SPI_IOC_CS(n)		BIT(16 + (n))
+
 struct ath79_spi {
 	struct spi_bitbang	bitbang;
 	u32			ioc_base;
diff --git a/include/linux/platform_data/spi-ath79.h b/include/linux/platform_data/spi-ath79.h
new file mode 100644
index 000000000000..aa71216edf99
--- /dev/null
+++ b/include/linux/platform_data/spi-ath79.h
@@ -0,0 +1,19 @@
+/*
+ *  Platform data definition for Atheros AR71XX/AR724X/AR913X SPI controller
+ *
+ *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#ifndef _ATH79_SPI_PLATFORM_H
+#define _ATH79_SPI_PLATFORM_H
+
+struct ath79_spi_platform_data {
+	unsigned	bus_num;
+	unsigned	num_chipselect;
+};
+
+#endif /* _ATH79_SPI_PLATFORM_H */
-- 
cgit v1.2.3


From adbb78e1104a006275e0e8e37526f617bb2a3d91 Mon Sep 17 00:00:00 2001
From: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
Date: Thu, 10 Jan 2019 16:32:32 +0100
Subject: ARM: dts: r8a7778: Add HSCIF0/1 support

Add HSCIF0/1 clocks and device nodes, based on Rev. 1.00 of the R-Car
M1A datasheet.

Signed-off-by: Ulrich Hecht <ulrich.hecht+renesas@gmail.com>
[geert: Squashed two patches]
[geert: Correct HSCIF1 module clock index]
[geert: Correct reg properties for non-LPAE]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
 arch/arm/boot/dts/r8a7778.dtsi            | 28 ++++++++++++++++++++++++++++
 include/dt-bindings/clock/r8a7778-clock.h |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/arch/arm/boot/dts/r8a7778.dtsi b/arch/arm/boot/dts/r8a7778.dtsi
index 05db0ccad7a6..10d996d2941f 100644
--- a/arch/arm/boot/dts/r8a7778.dtsi
+++ b/arch/arm/boot/dts/r8a7778.dtsi
@@ -367,6 +367,30 @@
 		status = "disabled";
 	};
 
+	hscif0: serial@ffe48000 {
+		compatible = "renesas,hscif-r8a7778",
+			     "renesas,rcar-gen1-hscif", "renesas,hscif";
+		reg = <0xffe48000 96>;
+		interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp0_clks R8A7778_CLK_HSCIF0>,
+			 <&cpg_clocks R8A7778_CLK_S>, <&scif_clk>;
+		clock-names = "fck", "brg_int", "scif_clk";
+		power-domains = <&cpg_clocks>;
+		status = "disabled";
+	};
+
+	hscif1: serial@ffe49000 {
+		compatible = "renesas,hscif-r8a7778",
+			     "renesas,rcar-gen1-hscif", "renesas,hscif";
+		reg = <0xffe49000 96>;
+		interrupts = <GIC_SPI 119 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&mstp0_clks R8A7778_CLK_HSCIF1>,
+			 <&cpg_clocks R8A7778_CLK_S>, <&scif_clk>;
+		clock-names = "fck", "brg_int", "scif_clk";
+		power-domains = <&cpg_clocks>;
+		status = "disabled";
+	};
+
 	mmcif: mmc@ffe4e000 {
 		compatible = "renesas,mmcif-r8a7778", "renesas,sh-mmcif";
 		reg = <0xffe4e000 0x100>;
@@ -535,6 +559,8 @@
 				 <&cpg_clocks R8A7778_CLK_P>,
 				 <&cpg_clocks R8A7778_CLK_P>,
 				 <&cpg_clocks R8A7778_CLK_P>,
+				 <&cpg_clocks R8A7778_CLK_S>,
+				 <&cpg_clocks R8A7778_CLK_S>,
 				 <&cpg_clocks R8A7778_CLK_P>,
 				 <&cpg_clocks R8A7778_CLK_P>,
 				 <&cpg_clocks R8A7778_CLK_P>,
@@ -551,6 +577,7 @@
 				R8A7778_CLK_SCIF0 R8A7778_CLK_SCIF1
 				R8A7778_CLK_SCIF2 R8A7778_CLK_SCIF3
 				R8A7778_CLK_SCIF4 R8A7778_CLK_SCIF5
+				R8A7778_CLK_HSCIF0 R8A7778_CLK_HSCIF1
 				R8A7778_CLK_TMU0 R8A7778_CLK_TMU1
 				R8A7778_CLK_TMU2 R8A7778_CLK_SSI0
 				R8A7778_CLK_SSI1 R8A7778_CLK_SSI2
@@ -560,6 +587,7 @@
 			clock-output-names =
 				"i2c0", "i2c1", "i2c2", "i2c3", "scif0",
 				"scif1", "scif2", "scif3", "scif4", "scif5",
+				"hscif0", "hscif1",
 				"tmu0", "tmu1", "tmu2", "ssi0", "ssi1",
 				"ssi2", "ssi3", "sru", "hspi";
 		};
diff --git a/include/dt-bindings/clock/r8a7778-clock.h b/include/dt-bindings/clock/r8a7778-clock.h
index f6b07c5399de..d0bff9ec5c66 100644
--- a/include/dt-bindings/clock/r8a7778-clock.h
+++ b/include/dt-bindings/clock/r8a7778-clock.h
@@ -30,6 +30,8 @@
 #define R8A7778_CLK_SCIF3	23
 #define R8A7778_CLK_SCIF4	22
 #define R8A7778_CLK_SCIF5	21
+#define R8A7778_CLK_HSCIF0	19
+#define R8A7778_CLK_HSCIF1	18
 #define R8A7778_CLK_TMU0	16
 #define R8A7778_CLK_TMU1	15
 #define R8A7778_CLK_TMU2	14
-- 
cgit v1.2.3


From 6d7fbce7da0cd06ff3f3f30e009a15a6243f0bc0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 17 Jan 2019 12:02:57 -0500
Subject: kill kernfs_pin_sb()

unused now and impossible to use safely anyway.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/kernfs/mount.c      | 30 ------------------------------
 include/linux/kernfs.h |  1 -
 2 files changed, 31 deletions(-)

(limited to 'include')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d71c9405874a..4d303047a4f8 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -377,36 +377,6 @@ void kernfs_kill_sb(struct super_block *sb)
 	kfree(info);
 }
 
-/**
- * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
- * @kernfs_root: the kernfs_root in question
- * @ns: the namespace tag
- *
- * Pin the superblock so the superblock won't be destroyed in subsequent
- * operations.  This can be used to block ->kill_sb() which may be useful
- * for kernfs users which dynamically manage superblocks.
- *
- * Returns NULL if there's no superblock associated to this kernfs_root, or
- * -EINVAL if the superblock is being freed.
- */
-struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
-{
-	struct kernfs_super_info *info;
-	struct super_block *sb = NULL;
-
-	mutex_lock(&kernfs_mutex);
-	list_for_each_entry(info, &root->supers, node) {
-		if (info->ns == ns) {
-			sb = info->sb;
-			if (!atomic_inc_not_zero(&info->sb->s_active))
-				sb = ERR_PTR(-EINVAL);
-			break;
-		}
-	}
-	mutex_unlock(&kernfs_mutex);
-	return sb;
-}
-
 void __init kernfs_init(void)
 {
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 5b36b1287a5a..44acb4c3659c 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -357,7 +357,6 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 			       struct kernfs_root *root, unsigned long magic,
 			       bool *new_sb_created, const void *ns);
 void kernfs_kill_sb(struct super_block *sb);
-struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
 
 void kernfs_init(void);
 
-- 
cgit v1.2.3


From ecfc937210e5fdc6554e49b2a735ff22e72ae3f0 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 15 Jan 2019 15:06:11 -0800
Subject: net: dsa: Split platform data to header file

Instead of having net/dsa.h contain both the internal switch tree/driver
structures, split the relevant platform_data parts into
include/linux/platform_data/dsa.h and make that header be included by
net/dsa.h in order not to break any setup. A subsequent set of patches
will update code including net/dsa.h to include only the platform_data
header.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                       |  1 +
 include/linux/platform_data/dsa.h | 68 +++++++++++++++++++++++++++++++++++++++
 include/net/dsa.h                 | 61 +----------------------------------
 3 files changed, 70 insertions(+), 60 deletions(-)
 create mode 100644 include/linux/platform_data/dsa.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 4d04cebb4a71..a592b9992b46 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10576,6 +10576,7 @@ F:	Documentation/devicetree/bindings/net/dsa/
 F:	net/dsa/
 F:	include/net/dsa.h
 F:	include/linux/dsa/
+F:	include/linux/platform_data/dsa.h
 F:	drivers/net/dsa/
 
 NETWORKING [GENERAL]
diff --git a/include/linux/platform_data/dsa.h b/include/linux/platform_data/dsa.h
new file mode 100644
index 000000000000..d4d9bf2060a6
--- /dev/null
+++ b/include/linux/platform_data/dsa.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DSA_PDATA_H
+#define __DSA_PDATA_H
+
+struct device;
+struct net_device;
+
+#define DSA_MAX_SWITCHES	4
+#define DSA_MAX_PORTS		12
+#define DSA_RTABLE_NONE		-1
+
+struct dsa_chip_data {
+	/*
+	 * How to access the switch configuration registers.
+	 */
+	struct device	*host_dev;
+	int		sw_addr;
+
+	/*
+	 * Reference to network devices
+	 */
+	struct device	*netdev[DSA_MAX_PORTS];
+
+	/* set to size of eeprom if supported by the switch */
+	int		eeprom_len;
+
+	/* Device tree node pointer for this specific switch chip
+	 * used during switch setup in case additional properties
+	 * and resources needs to be used
+	 */
+	struct device_node *of_node;
+
+	/*
+	 * The names of the switch's ports.  Use "cpu" to
+	 * designate the switch port that the cpu is connected to,
+	 * "dsa" to indicate that this port is a DSA link to
+	 * another switch, NULL to indicate the port is unused,
+	 * or any other string to indicate this is a physical port.
+	 */
+	char		*port_names[DSA_MAX_PORTS];
+	struct device_node *port_dn[DSA_MAX_PORTS];
+
+	/*
+	 * An array of which element [a] indicates which port on this
+	 * switch should be used to send packets to that are destined
+	 * for switch a. Can be NULL if there is only one switch chip.
+	 */
+	s8		rtable[DSA_MAX_SWITCHES];
+};
+
+struct dsa_platform_data {
+	/*
+	 * Reference to a Linux network interface that connects
+	 * to the root switch chip of the tree.
+	 */
+	struct device	*netdev;
+	struct net_device *of_netdev;
+
+	/*
+	 * Info structs describing each of the switch chips
+	 * connected via this network interface.
+	 */
+	int		nr_chips;
+	struct dsa_chip_data	*chip;
+};
+
+
+#endif /* __DSA_PDATA_H */
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2f1daf29131a..7f2a668ef2cc 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -21,6 +21,7 @@
 #include <linux/ethtool.h>
 #include <linux/net_tstamp.h>
 #include <linux/phy.h>
+#include <linux/platform_data/dsa.h>
 #include <net/devlink.h>
 #include <net/switchdev.h>
 
@@ -44,66 +45,6 @@ enum dsa_tag_protocol {
 	DSA_TAG_LAST,		/* MUST BE LAST */
 };
 
-#define DSA_MAX_SWITCHES	4
-#define DSA_MAX_PORTS		12
-
-#define DSA_RTABLE_NONE		-1
-
-struct dsa_chip_data {
-	/*
-	 * How to access the switch configuration registers.
-	 */
-	struct device	*host_dev;
-	int		sw_addr;
-
-	/*
-	 * Reference to network devices
-	 */
-	struct device	*netdev[DSA_MAX_PORTS];
-
-	/* set to size of eeprom if supported by the switch */
-	int		eeprom_len;
-
-	/* Device tree node pointer for this specific switch chip
-	 * used during switch setup in case additional properties
-	 * and resources needs to be used
-	 */
-	struct device_node *of_node;
-
-	/*
-	 * The names of the switch's ports.  Use "cpu" to
-	 * designate the switch port that the cpu is connected to,
-	 * "dsa" to indicate that this port is a DSA link to
-	 * another switch, NULL to indicate the port is unused,
-	 * or any other string to indicate this is a physical port.
-	 */
-	char		*port_names[DSA_MAX_PORTS];
-	struct device_node *port_dn[DSA_MAX_PORTS];
-
-	/*
-	 * An array of which element [a] indicates which port on this
-	 * switch should be used to send packets to that are destined
-	 * for switch a. Can be NULL if there is only one switch chip.
-	 */
-	s8		rtable[DSA_MAX_SWITCHES];
-};
-
-struct dsa_platform_data {
-	/*
-	 * Reference to a Linux network interface that connects
-	 * to the root switch chip of the tree.
-	 */
-	struct device	*netdev;
-	struct net_device *of_netdev;
-
-	/*
-	 * Info structs describing each of the switch chips
-	 * connected via this network interface.
-	 */
-	int		nr_chips;
-	struct dsa_chip_data	*chip;
-};
-
 struct packet_type;
 struct dsa_switch;
 
-- 
cgit v1.2.3


From 8cfb5faf32e85b62f08cfe242ce80b2864d0b8f3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 15 Jan 2019 15:06:13 -0800
Subject: net: dsa: Include platform_data header file

b53 and mv88e6xxx support passing platform_data, and now that we have
split the platform_data portion from the main net/dsa.h header file,
include only the relevant parts.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/platform_data/b53.h       | 2 +-
 include/linux/platform_data/mv88e6xxx.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/b53.h b/include/linux/platform_data/b53.h
index 8eaef2f2b691..c3b61ead41f2 100644
--- a/include/linux/platform_data/b53.h
+++ b/include/linux/platform_data/b53.h
@@ -20,7 +20,7 @@
 #define __B53_H
 
 #include <linux/kernel.h>
-#include <net/dsa.h>
+#include <linux/platform_data/dsa.h>
 
 struct b53_platform_data {
 	/* Must be first such that dsa_register_switch() can access it */
diff --git a/include/linux/platform_data/mv88e6xxx.h b/include/linux/platform_data/mv88e6xxx.h
index f63af2955ea0..963730b44aea 100644
--- a/include/linux/platform_data/mv88e6xxx.h
+++ b/include/linux/platform_data/mv88e6xxx.h
@@ -2,7 +2,7 @@
 #ifndef __DSA_MV88E6XXX_H
 #define __DSA_MV88E6XXX_H
 
-#include <net/dsa.h>
+#include <linux/platform_data/dsa.h>
 
 struct dsa_mv88e6xxx_pdata {
 	/* Must be first, such that dsa_register_switch() can access this
-- 
cgit v1.2.3


From 5db5ea995fc2fa89fdef61ef3a658cbb41a24222 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 15 Jan 2019 15:09:35 -0800
Subject: net: phy: Add helpers to determine if PHY driver is generic

We are already checking in phy_detach() that the PHY driver is of
generic kind (1G or 10G) and we are going to make use of that in the SFP
layer as well for 1000BaseT SFP modules, so expose helper functions to
return that information.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 34 ++++++++++++++++++++++++++++++++--
 include/linux/phy.h          |  3 +++
 2 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bb1410821ce4..c02669270c41 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1277,6 +1277,36 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
 }
 EXPORT_SYMBOL(phy_attach);
 
+static bool phy_driver_is_genphy_kind(struct phy_device *phydev,
+				      struct device_driver *driver)
+{
+	struct device *d = &phydev->mdio.dev;
+	bool ret = false;
+
+	if (!phydev->drv)
+		return ret;
+
+	get_device(d);
+	ret = d->driver == driver;
+	put_device(d);
+
+	return ret;
+}
+
+bool phy_driver_is_genphy(struct phy_device *phydev)
+{
+	return phy_driver_is_genphy_kind(phydev,
+					 &genphy_driver.mdiodrv.driver);
+}
+EXPORT_SYMBOL_GPL(phy_driver_is_genphy);
+
+bool phy_driver_is_genphy_10g(struct phy_device *phydev)
+{
+	return phy_driver_is_genphy_kind(phydev,
+					 &genphy_10g_driver.mdiodrv.driver);
+}
+EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
+
 /**
  * phy_detach - detach a PHY device from its network device
  * @phydev: target phy_device struct
@@ -1308,8 +1338,8 @@ void phy_detach(struct phy_device *phydev)
 	 * from the generic driver so that there's a chance a
 	 * real driver could be loaded
 	 */
-	if (phydev->mdio.dev.driver == &genphy_10g_driver.mdiodrv.driver ||
-	    phydev->mdio.dev.driver == &genphy_driver.mdiodrv.driver)
+	if (phy_driver_is_genphy(phydev) ||
+	    phy_driver_is_genphy_10g(phydev))
 		device_release_driver(&phydev->mdio.dev);
 
 	/*
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3b051f761450..f1c19bf8c658 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1183,4 +1183,7 @@ module_exit(phy_module_exit)
 #define module_phy_driver(__phy_drivers)				\
 	phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers))
 
+bool phy_driver_is_genphy(struct phy_device *phydev);
+bool phy_driver_is_genphy_10g(struct phy_device *phydev);
+
 #endif /* __PHY_H */
-- 
cgit v1.2.3


From 44021606298870e4adc641ef3927e7bb47ca8236 Mon Sep 17 00:00:00 2001
From: Yangtao Li <tiny.windzz@gmail.com>
Date: Tue, 15 Jan 2019 12:22:10 -0500
Subject: cpuidle: use BIT() for idle state flags and remove
 CPUIDLE_DRIVER_FLAGS_MASK

Use BIT() macro to do a small tidy-up.

CPUIDLE_DRIVER_FLAGS_MASK is not used, so remove it.

Signed-off-by: Yangtao Li <tiny.windzz@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpuidle.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4dff74f48d4b..3b39472324a3 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -69,11 +69,9 @@ struct cpuidle_state {
 
 /* Idle State Flags */
 #define CPUIDLE_FLAG_NONE       (0x00)
-#define CPUIDLE_FLAG_POLLING	(0x01) /* polling state */
-#define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
-#define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */
-
-#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
+#define CPUIDLE_FLAG_POLLING	BIT(0) /* polling state */
+#define CPUIDLE_FLAG_COUPLED	BIT(1) /* state applies to multiple cpus */
+#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */
 
 struct cpuidle_device_kobj;
 struct cpuidle_state_kobj;
-- 
cgit v1.2.3


From 692d7b5d1f9125a1cf0595e979e3b5fb7210547e Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Wed, 16 Jan 2019 10:40:16 +0000
Subject: tls: Fix recvmsg() to be able to peek across multiple records

This fixes recvmsg() to be able to peek across multiple tls records.
Without this patch, the tls's selftests test case
'recv_peek_large_buf_mult_recs' fails. Each tls receive context now
maintains a 'rx_list' to retain incoming skb carrying tls records. If a
tls record needs to be retained e.g. for peek case or for the case when
the buffer passed to recvmsg() has a length smaller than decrypted
record length, then it is added to 'rx_list'. Additionally, records are
added in 'rx_list' if the crypto operation runs in async mode. The
records are dequeued from 'rx_list' after the decrypted data is consumed
by copying into the buffer passed to recvmsg(). In case, the MSG_PEEK
flag is used in recvmsg(), then records are not consumed or removed
from the 'rx_list'.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h |   3 +-
 net/tls/tls_sw.c  | 266 ++++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 198 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 2a6ac8d642af..90bf52db573e 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -145,12 +145,13 @@ struct tls_sw_context_tx {
 struct tls_sw_context_rx {
 	struct crypto_aead *aead_recv;
 	struct crypto_wait async_wait;
-
 	struct strparser strp;
+	struct sk_buff_head rx_list;	/* list of decrypted 'data' records */
 	void (*saved_data_ready)(struct sock *sk);
 
 	struct sk_buff *recv_pkt;
 	u8 control;
+	int async_capable;
 	bool decrypted;
 	atomic_t decrypt_pending;
 	bool async_notify;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index b8e50e22b777..86b9527c4826 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -124,6 +124,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 {
 	struct aead_request *aead_req = (struct aead_request *)req;
 	struct scatterlist *sgout = aead_req->dst;
+	struct scatterlist *sgin = aead_req->src;
 	struct tls_sw_context_rx *ctx;
 	struct tls_context *tls_ctx;
 	struct scatterlist *sg;
@@ -134,12 +135,16 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 	skb = (struct sk_buff *)req->data;
 	tls_ctx = tls_get_ctx(skb->sk);
 	ctx = tls_sw_ctx_rx(tls_ctx);
-	pending = atomic_dec_return(&ctx->decrypt_pending);
 
 	/* Propagate if there was an err */
 	if (err) {
 		ctx->async_wait.err = err;
 		tls_err_abort(skb->sk, err);
+	} else {
+		struct strp_msg *rxm = strp_msg(skb);
+
+		rxm->offset += tls_ctx->rx.prepend_size;
+		rxm->full_len -= tls_ctx->rx.overhead_size;
 	}
 
 	/* After using skb->sk to propagate sk through crypto async callback
@@ -147,18 +152,21 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 	 */
 	skb->sk = NULL;
 
-	/* Release the skb, pages and memory allocated for crypto req */
-	kfree_skb(skb);
 
-	/* Skip the first S/G entry as it points to AAD */
-	for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
-		if (!sg)
-			break;
-		put_page(sg_page(sg));
+	/* Free the destination pages if skb was not decrypted inplace */
+	if (sgout != sgin) {
+		/* Skip the first S/G entry as it points to AAD */
+		for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
+			if (!sg)
+				break;
+			put_page(sg_page(sg));
+		}
 	}
 
 	kfree(aead_req);
 
+	pending = atomic_dec_return(&ctx->decrypt_pending);
+
 	if (!pending && READ_ONCE(ctx->async_notify))
 		complete(&ctx->async_wait.completion);
 }
@@ -1271,7 +1279,7 @@ out:
 static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 			    struct iov_iter *out_iov,
 			    struct scatterlist *out_sg,
-			    int *chunk, bool *zc)
+			    int *chunk, bool *zc, bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1371,13 +1379,13 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 fallback_to_reg_recv:
 		sgout = sgin;
 		pages = 0;
-		*chunk = 0;
+		*chunk = data_len;
 		*zc = false;
 	}
 
 	/* Prepare and submit AEAD request */
 	err = tls_do_decryption(sk, skb, sgin, sgout, iv,
-				data_len, aead_req, *zc);
+				data_len, aead_req, async);
 	if (err == -EINPROGRESS)
 		return err;
 
@@ -1390,7 +1398,8 @@ fallback_to_reg_recv:
 }
 
 static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
-			      struct iov_iter *dest, int *chunk, bool *zc)
+			      struct iov_iter *dest, int *chunk, bool *zc,
+			      bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
@@ -1403,7 +1412,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 		return err;
 #endif
 	if (!ctx->decrypted) {
-		err = decrypt_internal(sk, skb, dest, NULL, chunk, zc);
+		err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async);
 		if (err < 0) {
 			if (err == -EINPROGRESS)
 				tls_advance_record_sn(sk, &tls_ctx->rx);
@@ -1429,7 +1438,7 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 	bool zc = true;
 	int chunk;
 
-	return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc);
+	return decrypt_internal(sk, skb, NULL, sgout, &chunk, &zc, false);
 }
 
 static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
@@ -1456,6 +1465,77 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
 	return true;
 }
 
+/* This function traverses the rx_list in tls receive context to copies the
+ * decrypted data records into the buffer provided by caller zero copy is not
+ * true. Further, the records are removed from the rx_list if it is not a peek
+ * case and the record has been consumed completely.
+ */
+static int process_rx_list(struct tls_sw_context_rx *ctx,
+			   struct msghdr *msg,
+			   size_t skip,
+			   size_t len,
+			   bool zc,
+			   bool is_peek)
+{
+	struct sk_buff *skb = skb_peek(&ctx->rx_list);
+	ssize_t copied = 0;
+
+	while (skip && skb) {
+		struct strp_msg *rxm = strp_msg(skb);
+
+		if (skip < rxm->full_len)
+			break;
+
+		skip = skip - rxm->full_len;
+		skb = skb_peek_next(skb, &ctx->rx_list);
+	}
+
+	while (len && skb) {
+		struct sk_buff *next_skb;
+		struct strp_msg *rxm = strp_msg(skb);
+		int chunk = min_t(unsigned int, rxm->full_len - skip, len);
+
+		if (!zc || (rxm->full_len - skip) > len) {
+			int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
+						    msg, chunk);
+			if (err < 0)
+				return err;
+		}
+
+		len = len - chunk;
+		copied = copied + chunk;
+
+		/* Consume the data from record if it is non-peek case*/
+		if (!is_peek) {
+			rxm->offset = rxm->offset + chunk;
+			rxm->full_len = rxm->full_len - chunk;
+
+			/* Return if there is unconsumed data in the record */
+			if (rxm->full_len - skip)
+				break;
+		}
+
+		/* The remaining skip-bytes must lie in 1st record in rx_list.
+		 * So from the 2nd record, 'skip' should be 0.
+		 */
+		skip = 0;
+
+		if (msg)
+			msg->msg_flags |= MSG_EOR;
+
+		next_skb = skb_peek_next(skb, &ctx->rx_list);
+
+		if (!is_peek) {
+			skb_unlink(skb, &ctx->rx_list);
+			kfree_skb(skb);
+		}
+
+		skb = next_skb;
+	}
+
+	return copied;
+}
+
 int tls_sw_recvmsg(struct sock *sk,
 		   struct msghdr *msg,
 		   size_t len,
@@ -1466,7 +1546,8 @@ int tls_sw_recvmsg(struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	struct sk_psock *psock;
-	unsigned char control;
+	unsigned char control = 0;
+	ssize_t decrypted = 0;
 	struct strp_msg *rxm;
 	struct sk_buff *skb;
 	ssize_t copied = 0;
@@ -1474,6 +1555,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	int target, err = 0;
 	long timeo;
 	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
+	bool is_peek = flags & MSG_PEEK;
 	int num_async = 0;
 
 	flags |= nonblock;
@@ -1484,11 +1566,28 @@ int tls_sw_recvmsg(struct sock *sk,
 	psock = sk_psock_get(sk);
 	lock_sock(sk);
 
-	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
-	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	/* Process pending decrypted records. It must be non-zero-copy */
+	err = process_rx_list(ctx, msg, 0, len, false, is_peek);
+	if (err < 0) {
+		tls_err_abort(sk, err);
+		goto end;
+	} else {
+		copied = err;
+	}
+
+	len = len - copied;
+	if (len) {
+		target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+		timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+	} else {
+		goto recv_end;
+	}
+
 	do {
-		bool zc = false;
+		bool retain_skb = false;
 		bool async = false;
+		bool zc = false;
+		int to_decrypt;
 		int chunk = 0;
 
 		skb = tls_wait_data(sk, psock, flags, timeo, &err);
@@ -1498,7 +1597,7 @@ int tls_sw_recvmsg(struct sock *sk,
 							    msg, len, flags);
 
 				if (ret > 0) {
-					copied += ret;
+					decrypted += ret;
 					len -= ret;
 					continue;
 				}
@@ -1525,70 +1624,70 @@ int tls_sw_recvmsg(struct sock *sk,
 			goto recv_end;
 		}
 
-		if (!ctx->decrypted) {
-			int to_copy = rxm->full_len - tls_ctx->rx.overhead_size;
+		to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size;
 
-			if (!is_kvec && to_copy <= len &&
-			    likely(!(flags & MSG_PEEK)))
-				zc = true;
+		if (to_decrypt <= len && !is_kvec && !is_peek)
+			zc = true;
 
-			err = decrypt_skb_update(sk, skb, &msg->msg_iter,
-						 &chunk, &zc);
-			if (err < 0 && err != -EINPROGRESS) {
-				tls_err_abort(sk, EBADMSG);
-				goto recv_end;
-			}
-
-			if (err == -EINPROGRESS) {
-				async = true;
-				num_async++;
-				goto pick_next_record;
-			}
-
-			ctx->decrypted = true;
+		err = decrypt_skb_update(sk, skb, &msg->msg_iter,
+					 &chunk, &zc, ctx->async_capable);
+		if (err < 0 && err != -EINPROGRESS) {
+			tls_err_abort(sk, EBADMSG);
+			goto recv_end;
 		}
 
-		if (!zc) {
-			chunk = min_t(unsigned int, rxm->full_len, len);
+		if (err == -EINPROGRESS) {
+			async = true;
+			num_async++;
+			goto pick_next_record;
+		} else {
+			if (!zc) {
+				if (rxm->full_len > len) {
+					retain_skb = true;
+					chunk = len;
+				} else {
+					chunk = rxm->full_len;
+				}
+
+				err = skb_copy_datagram_msg(skb, rxm->offset,
+							    msg, chunk);
+				if (err < 0)
+					goto recv_end;
 
-			err = skb_copy_datagram_msg(skb, rxm->offset, msg,
-						    chunk);
-			if (err < 0)
-				goto recv_end;
+				if (!is_peek) {
+					rxm->offset = rxm->offset + chunk;
+					rxm->full_len = rxm->full_len - chunk;
+				}
+			}
 		}
 
 pick_next_record:
-		copied += chunk;
+		if (chunk > len)
+			chunk = len;
+
+		decrypted += chunk;
 		len -= chunk;
-		if (likely(!(flags & MSG_PEEK))) {
-			u8 control = ctx->control;
-
-			/* For async, drop current skb reference */
-			if (async)
-				skb = NULL;
-
-			if (tls_sw_advance_skb(sk, skb, chunk)) {
-				/* Return full control message to
-				 * userspace before trying to parse
-				 * another message type
-				 */
-				msg->msg_flags |= MSG_EOR;
-				if (control != TLS_RECORD_TYPE_DATA)
-					goto recv_end;
-			} else {
-				break;
-			}
-		} else {
-			/* MSG_PEEK right now cannot look beyond current skb
-			 * from strparser, meaning we cannot advance skb here
-			 * and thus unpause strparser since we'd loose original
-			 * one.
+
+		/* For async or peek case, queue the current skb */
+		if (async || is_peek || retain_skb) {
+			skb_queue_tail(&ctx->rx_list, skb);
+			skb = NULL;
+		}
+
+		if (tls_sw_advance_skb(sk, skb, chunk)) {
+			/* Return full control message to
+			 * userspace before trying to parse
+			 * another message type
 			 */
+			msg->msg_flags |= MSG_EOR;
+			if (ctx->control != TLS_RECORD_TYPE_DATA)
+				goto recv_end;
+		} else {
 			break;
 		}
 
 		/* If we have a new message from strparser, continue now. */
-		if (copied >= target && !ctx->recv_pkt)
+		if (decrypted >= target && !ctx->recv_pkt)
 			break;
 	} while (len);
 
@@ -1602,13 +1701,33 @@ recv_end:
 				/* one of async decrypt failed */
 				tls_err_abort(sk, err);
 				copied = 0;
+				decrypted = 0;
+				goto end;
 			}
 		} else {
 			reinit_completion(&ctx->async_wait.completion);
 		}
 		WRITE_ONCE(ctx->async_notify, false);
+
+		/* Drain records from the rx_list & copy if required */
+		if (is_peek || is_kvec)
+			err = process_rx_list(ctx, msg, copied,
+					      decrypted, false, is_peek);
+		else
+			err = process_rx_list(ctx, msg, 0,
+					      decrypted, true, is_peek);
+		if (err < 0) {
+			tls_err_abort(sk, err);
+			copied = 0;
+			goto end;
+		}
+
+		WARN_ON(decrypted != err);
 	}
 
+	copied += decrypted;
+
+end:
 	release_sock(sk);
 	if (psock)
 		sk_psock_put(sk, psock);
@@ -1645,7 +1764,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	}
 
 	if (!ctx->decrypted) {
-		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc);
+		err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false);
 
 		if (err < 0) {
 			tls_err_abort(sk, EBADMSG);
@@ -1832,6 +1951,7 @@ void tls_sw_release_resources_rx(struct sock *sk)
 	if (ctx->aead_recv) {
 		kfree_skb(ctx->recv_pkt);
 		ctx->recv_pkt = NULL;
+		skb_queue_purge(&ctx->rx_list);
 		crypto_free_aead(ctx->aead_recv);
 		strp_stop(&ctx->strp);
 		write_lock_bh(&sk->sk_callback_lock);
@@ -1881,6 +2001,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	struct crypto_aead **aead;
 	struct strp_callbacks cb;
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
+	struct crypto_tfm *tfm;
 	char *iv, *rec_seq;
 	int rc = 0;
 
@@ -1927,6 +2048,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		crypto_init_wait(&sw_ctx_rx->async_wait);
 		crypto_info = &ctx->crypto_recv.info;
 		cctx = &ctx->rx;
+		skb_queue_head_init(&sw_ctx_rx->rx_list);
 		aead = &sw_ctx_rx->aead_recv;
 	}
 
@@ -1994,6 +2116,10 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		goto free_aead;
 
 	if (sw_ctx_rx) {
+		tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv);
+		sw_ctx_rx->async_capable =
+			tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+
 		/* Set up strparser */
 		memset(&cb, 0, sizeof(cb));
 		cb.rcv_msg = tls_queue;
-- 
cgit v1.2.3


From f5dd3d0c9638a9d9a02b5964c4ad636f06cf7e2c Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Tue, 15 Jan 2019 14:42:14 +0100
Subject: net: introduce SO_BINDTOIFINDEX sockopt

This introduces a new generic SOL_SOCKET-level socket option called
SO_BINDTOIFINDEX. It behaves similar to SO_BINDTODEVICE, but takes a
network interface index as argument, rather than the network interface
name.

User-space often refers to network-interfaces via their index, but has
to temporarily resolve it to a name for a call into SO_BINDTODEVICE.
This might pose problems when the network-device is renamed
asynchronously by other parts of the system. When this happens, the
SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong
device.

In most cases user-space only ever operates on devices which they
either manage themselves, or otherwise have a guarantee that the device
name will not change (e.g., devices that are UP cannot be renamed).
However, particularly in libraries this guarantee is non-obvious and it
would be nice if that race-condition would simply not exist. It would
make it easier for those libraries to operate even in situations where
the device-name might change under the hood.

A real use-case that we recently hit is trying to start the network
stack early in the initrd but make it survive into the real system.
Existing distributions rename network-interfaces during the transition
from initrd into the real system. This, obviously, cannot affect
devices that are up and running (unless you also consider moving them
between network-namespaces). However, the network manager now has to
make sure its management engine for dormant devices will not run in
parallel to these renames. Particularly, when you offload operations
like DHCP into separate processes, these might setup their sockets
early, and thus have to resolve the device-name possibly running into
this race-condition.

By avoiding a call to resolve the device-name, we no longer depend on
the name and can run network setup of dormant devices in parallel to
the transition off the initrd. The SO_BINDTOIFINDEX ioctl plugs this
race.

Reviewed-by: Tom Gundersen <teg@jklm.no>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  |  2 ++
 arch/ia64/include/uapi/asm/socket.h   |  2 ++
 arch/mips/include/uapi/asm/socket.h   |  2 ++
 arch/parisc/include/uapi/asm/socket.h |  2 ++
 arch/s390/include/uapi/asm/socket.h   |  2 ++
 arch/sparc/include/uapi/asm/socket.h  |  2 ++
 arch/xtensa/include/uapi/asm/socket.h |  2 ++
 include/uapi/asm-generic/socket.h     |  2 ++
 net/core/sock.c                       | 46 +++++++++++++++++++++++++++--------
 9 files changed, 52 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 065fb372e355..b1c9b542c021 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -115,4 +115,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index c872c4e6bafb..ba0d245f9576 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -117,4 +117,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 71370fb3ceef..73e25e35d803 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -126,4 +126,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 061b9cf2a779..52bed5976cbe 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -107,4 +107,6 @@
 #define SO_TXTIME		0x4036
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	0x4037
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 39d901476ee5..49c971587087 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -114,4 +114,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 7ea35e5601b6..bbdb81594dd4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -104,6 +104,8 @@
 #define SO_TXTIME		0x003f
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	0x0041
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 1de07a7f7680..b434217783d0 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -119,4 +119,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index a12692e5f7a8..3066ab3853a8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -110,4 +110,6 @@
 #define SO_TXTIME		61
 #define SCM_TXTIME		SO_TXTIME
 
+#define SO_BINDTOIFINDEX	62
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 6aa2e7e0b4fb..b53764ebb973 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -520,20 +520,43 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 }
 EXPORT_SYMBOL(sk_dst_check);
 
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
-				int optlen)
+static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
 {
 	int ret = -ENOPROTOOPT;
 #ifdef CONFIG_NETDEVICES
 	struct net *net = sock_net(sk);
-	char devname[IFNAMSIZ];
-	int index;
 
 	/* Sorry... */
 	ret = -EPERM;
 	if (!ns_capable(net->user_ns, CAP_NET_RAW))
 		goto out;
 
+	ret = -EINVAL;
+	if (ifindex < 0)
+		goto out;
+
+	sk->sk_bound_dev_if = ifindex;
+	if (sk->sk_prot->rehash)
+		sk->sk_prot->rehash(sk);
+	sk_dst_reset(sk);
+
+	ret = 0;
+
+out:
+#endif
+
+	return ret;
+}
+
+static int sock_setbindtodevice(struct sock *sk, char __user *optval,
+				int optlen)
+{
+	int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+	struct net *net = sock_net(sk);
+	char devname[IFNAMSIZ];
+	int index;
+
 	ret = -EINVAL;
 	if (optlen < 0)
 		goto out;
@@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
 	}
 
 	lock_sock(sk);
-	sk->sk_bound_dev_if = index;
-	if (sk->sk_prot->rehash)
-		sk->sk_prot->rehash(sk);
-	sk_dst_reset(sk);
+	ret = sock_setbindtodevice_locked(sk, index);
 	release_sock(sk);
 
-	ret = 0;
-
 out:
 #endif
 
@@ -1055,6 +1073,10 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_BINDTOIFINDEX:
+		ret = sock_setbindtodevice_locked(sk, val);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 				  SOF_TXTIME_REPORT_ERRORS : 0;
 		break;
 
+	case SO_BINDTOIFINDEX:
+		v.val = sk->sk_bound_dev_if;
+		break;
+
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
-- 
cgit v1.2.3


From 87b0984ebfabafcfe959e52ca5cdab5eeb2d60c0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 16 Jan 2019 23:06:50 +0000
Subject: net: Add extack argument to ndo_fdb_add()

Drivers may not be able to support certain FDB entries, and an error
code is insufficient to give clear hints as to the reasons of rejection.

In order to make it possible to communicate the rejection reason, extend
ndo_fdb_add() with an extack argument. Adapt the existing
implementations of ndo_fdb_add() to take the parameter (and ignore it).
Pass the extack parameter when invoking ndo_fdb_add() from rtnl_fdb_add().

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c      | 3 ++-
 drivers/net/ethernet/intel/ice/ice_main.c        | 3 ++-
 drivers/net/ethernet/intel/igb/igb_main.c        | 3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    | 3 ++-
 drivers/net/ethernet/mscc/ocelot.c               | 3 ++-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 3 ++-
 drivers/net/macvlan.c                            | 3 ++-
 drivers/net/vxlan.c                              | 3 ++-
 include/linux/netdevice.h                        | 6 ++++--
 net/bridge/br_fdb.c                              | 3 ++-
 net/bridge/br_private.h                          | 3 ++-
 net/core/rtnetlink.c                             | 5 +++--
 net/dsa/dsa_priv.h                               | 3 ++-
 net/dsa/slave.c                                  | 3 ++-
 14 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f52e2c46e6a7..0ee641c41be4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11644,7 +11644,8 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
 static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
 			    const unsigned char *addr, u16 vid,
-			    u16 flags)
+			    u16 flags,
+			    struct netlink_ext_ack *extack)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_pf *pf = np->vsi->back;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index f4bf6bda32a9..48f033928aa2 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2438,7 +2438,8 @@ static void ice_set_rx_mode(struct net_device *netdev)
  */
 static int ice_fdb_add(struct ndmsg *ndm, struct nlattr __always_unused *tb[],
 		       struct net_device *dev, const unsigned char *addr,
-		       u16 vid, u16 flags)
+		       u16 vid, u16 flags,
+		       struct netlink_ext_ack *extack)
 {
 	int err;
 
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 87bdf1604ae2..3615e2e52399 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2486,7 +2486,8 @@ static int igb_set_features(struct net_device *netdev,
 static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr, u16 vid,
-			   u16 flags)
+			   u16 flags,
+			   struct netlink_ext_ack *extack)
 {
 	/* guarantee we can provide a unique filter for the unicast address */
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index daff8183534b..b53087a980ef 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -9910,7 +9910,8 @@ static void ixgbe_del_udp_tunnel_port(struct net_device *dev,
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
 			     const unsigned char *addr, u16 vid,
-			     u16 flags)
+			     u16 flags,
+			     struct netlink_ext_ack *extack)
 {
 	/* guarantee we can provide a unique filter for the unicast address */
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 215a45374d7b..c6a575eb0ff5 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -721,7 +721,8 @@ static void ocelot_get_stats64(struct net_device *dev,
 
 static int ocelot_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			  struct net_device *dev, const unsigned char *addr,
-			  u16 vid, u16 flags)
+			  u16 vid, u16 flags,
+			  struct netlink_ext_ack *extack)
 {
 	struct ocelot_port *port = netdev_priv(dev);
 	struct ocelot *ocelot = port->ocelot;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 16d0479f6891..7a873002e626 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -396,7 +396,8 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			struct net_device *netdev,
-			const unsigned char *addr, u16 vid, u16 flags)
+			const unsigned char *addr, u16 vid, u16 flags,
+			struct netlink_ext_ack *extack)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = 0;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index fc726ce4c164..084a1b3fbc80 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -963,7 +963,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
 			   const unsigned char *addr, u16 vid,
-			   u16 flags)
+			   u16 flags,
+			   struct netlink_ext_ack *extack)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err = -EINVAL;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 83f65eb3085f..11f38fd71678 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1087,7 +1087,8 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 /* Add static entry (via netlink) */
 static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
-			 const unsigned char *addr, u16 vid, u16 flags)
+			 const unsigned char *addr, u16 vid, u16 flags,
+			 struct netlink_ext_ack *extack)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	/* struct net *net = dev_net(vxlan->dev); */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1377d085ef99..a57b9a853aab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1152,7 +1152,8 @@ struct dev_ifalias {
  *
  * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
- *		      const unsigned char *addr, u16 vid, u16 flags)
+ *		      const unsigned char *addr, u16 vid, u16 flags,
+ *		      struct netlink_ext_ack *extack);
  *	Adds an FDB entry to dev for addr.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
@@ -1376,7 +1377,8 @@ struct net_device_ops {
 					       struct net_device *dev,
 					       const unsigned char *addr,
 					       u16 vid,
-					       u16 flags);
+					       u16 flags,
+					       struct netlink_ext_ack *extack);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
 					       struct net_device *dev,
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index fe3c758791ca..6664cb8590f8 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -915,7 +915,8 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
-	       const unsigned char *addr, u16 vid, u16 nlh_flags)
+	       const unsigned char *addr, u16 vid, u16 nlh_flags,
+	       struct netlink_ext_ack *extack)
 {
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_port *p = NULL;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index eabf8bf28a3f..00deef7fc1f3 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -573,7 +573,8 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev, const unsigned char *addr, u16 vid);
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
-	       const unsigned char *addr, u16 vid, u16 nlh_flags);
+	       const unsigned char *addr, u16 vid, u16 nlh_flags,
+	       struct netlink_ext_ack *extack);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, struct net_device *fdev, int *idx);
 int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ea1bed08ede..b302df0cd5ae 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3639,7 +3639,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
-				       nlh->nlmsg_flags);
+				       nlh->nlmsg_flags, extack);
 		if (err)
 			goto out;
 		else
@@ -3651,7 +3651,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
 		if (dev->netdev_ops->ndo_fdb_add)
 			err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
 							   vid,
-							   nlh->nlmsg_flags);
+							   nlh->nlmsg_flags,
+							   extack);
 		else
 			err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
 					       nlh->nlmsg_flags);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 026a05774bf7..1f4972dab9f2 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -103,7 +103,8 @@ static inline void dsa_legacy_unregister(void) { }
 int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid,
-		       u16 flags);
+		       u16 flags,
+		       struct netlink_ext_ack *extack);
 int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a3fcc1d01615..d5680a98a7f0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1009,7 +1009,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		       struct net_device *dev,
 		       const unsigned char *addr, u16 vid,
-		       u16 flags)
+		       u16 flags,
+		       struct netlink_ext_ack *extack)
 {
 	struct dsa_port *dp = dsa_slave_to_port(dev);
 
-- 
cgit v1.2.3


From 4c59b7d16096e5e2cfbc7f8cba972eb72f6b96c0 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 16 Jan 2019 23:06:54 +0000
Subject: vxlan: Add extack to switchdev operations

There are four sources of VXLAN switchdev notifier calls:

- the changelink() link operation, which already supports extack,
- ndo_fdb_add() which got extack support in a previous patch,
- FDB updates due to packet forwarding,
- and vxlan_fdb_replay().

Extend vxlan_fdb_switchdev_call_notifiers() to include extack in the
switchdev message that it sends, and propagate the argument upwards to
the callers. For the first two cases, pass in the extack gotten through
the operation. For case #3, pass in NULL.

To cover the last case, extend vxlan_fdb_replay() to take extack
argument, which might come from whatever operation necessitated the FDB
replay.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c   |  3 +-
 drivers/net/vxlan.c                                | 68 +++++++++++++---------
 include/net/vxlan.h                                |  6 +-
 3 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
index bc565f2aa121..9ba0b83bd949 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -217,7 +217,8 @@ mlxsw_sp_nve_vxlan_fdb_replay(const struct net_device *nve_dev, __be32 vni,
 {
 	if (WARN_ON(!netif_is_vxlan(nve_dev)))
 		return -EINVAL;
-	return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier);
+	return vxlan_fdb_replay(nve_dev, vni, &mlxsw_sp_switchdev_notifier,
+				extack);
 }
 
 static void
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 11f38fd71678..a9e90159bb61 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -361,10 +361,11 @@ errout:
 static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
 			    const struct vxlan_fdb *fdb,
 			    const struct vxlan_rdst *rd,
+			    struct netlink_ext_ack *extack,
 			    struct switchdev_notifier_vxlan_fdb_info *fdb_info)
 {
 	fdb_info->info.dev = vxlan->dev;
-	fdb_info->info.extack = NULL;
+	fdb_info->info.extack = extack;
 	fdb_info->remote_ip = rd->remote_ip;
 	fdb_info->remote_port = rd->remote_port;
 	fdb_info->remote_vni = rd->remote_vni;
@@ -378,7 +379,8 @@ static void vxlan_fdb_switchdev_notifier_info(const struct vxlan_dev *vxlan,
 static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
 					      struct vxlan_fdb *fdb,
 					      struct vxlan_rdst *rd,
-					      bool adding)
+					      bool adding,
+					      struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_vxlan_fdb_info info;
 	enum switchdev_notifier_type notifier_type;
@@ -389,14 +391,15 @@ static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
 
 	notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
 			       : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
-	vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, &info);
+	vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
 	ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
 				       &info.info);
 	return notifier_to_errno(ret);
 }
 
 static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
-			    struct vxlan_rdst *rd, int type, bool swdev_notify)
+			    struct vxlan_rdst *rd, int type, bool swdev_notify,
+			    struct netlink_ext_ack *extack)
 {
 	int err;
 
@@ -404,13 +407,13 @@ static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
 		switch (type) {
 		case RTM_NEWNEIGH:
 			err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
-								 true);
+								 true, extack);
 			if (err)
 				return err;
 			break;
 		case RTM_DELNEIGH:
 			vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
-							   false);
+							   false, extack);
 			break;
 		}
 	}
@@ -430,7 +433,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
 		.remote_vni = cpu_to_be32(VXLAN_N_VID),
 	};
 
-	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true);
+	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
 }
 
 static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
@@ -442,7 +445,7 @@ static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN])
 
 	memcpy(f.eth_addr, eth_addr, ETH_ALEN);
 
-	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true);
+	vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH, true, NULL);
 }
 
 /* Hash Ethernet address */
@@ -552,7 +555,7 @@ int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
 	}
 
 	rdst = first_remote_rcu(f);
-	vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, fdb_info);
+	vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, NULL, fdb_info);
 
 out:
 	rcu_read_unlock();
@@ -563,19 +566,21 @@ EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
 static int vxlan_fdb_notify_one(struct notifier_block *nb,
 				const struct vxlan_dev *vxlan,
 				const struct vxlan_fdb *f,
-				const struct vxlan_rdst *rdst)
+				const struct vxlan_rdst *rdst,
+				struct netlink_ext_ack *extack)
 {
 	struct switchdev_notifier_vxlan_fdb_info fdb_info;
 	int rc;
 
-	vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, &fdb_info);
+	vxlan_fdb_switchdev_notifier_info(vxlan, f, rdst, extack, &fdb_info);
 	rc = nb->notifier_call(nb, SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
 			       &fdb_info);
 	return notifier_to_errno(rc);
 }
 
 int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
-		     struct notifier_block *nb)
+		     struct notifier_block *nb,
+		     struct netlink_ext_ack *extack)
 {
 	struct vxlan_dev *vxlan;
 	struct vxlan_rdst *rdst;
@@ -593,7 +598,8 @@ int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
 			if (f->vni == vni) {
 				list_for_each_entry(rdst, &f->remotes, list) {
 					rc = vxlan_fdb_notify_one(nb, vxlan,
-								  f, rdst);
+								  f, rdst,
+								  extack);
 					if (rc)
 						goto out;
 				}
@@ -857,7 +863,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 	if (do_notify)
 		list_for_each_entry(rd, &f->remotes, list)
 			vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
-					 swdev_notify);
+					 swdev_notify, NULL);
 
 	hlist_del_rcu(&f->hlist);
 	call_rcu(&f->rcu, vxlan_fdb_free);
@@ -869,7 +875,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 				     __be16 port, __be32 vni,
 				     __u32 ifindex, __u16 ndm_flags,
 				     struct vxlan_fdb *f,
-				     bool swdev_notify)
+				     bool swdev_notify,
+				     struct netlink_ext_ack *extack)
 {
 	__u16 fdb_flags = (ndm_flags & ~NTF_USE);
 	struct vxlan_rdst *rd = NULL;
@@ -924,7 +931,7 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 			rd = first_remote_rtnl(f);
 
 		err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH,
-				       swdev_notify);
+				       swdev_notify, extack);
 		if (err)
 			goto err_notify;
 	}
@@ -944,7 +951,8 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
 				   __u16 state, __u16 flags,
 				   __be16 port, __be32 src_vni, __be32 vni,
 				   __u32 ifindex, __u16 ndm_flags,
-				   bool swdev_notify)
+				   bool swdev_notify,
+				   struct netlink_ext_ack *extack)
 {
 	__u16 fdb_flags = (ndm_flags & ~NTF_USE);
 	struct vxlan_fdb *f;
@@ -962,7 +970,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
 		return rc;
 
 	rc = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH,
-			      swdev_notify);
+			      swdev_notify, extack);
 	if (rc)
 		goto err_notify;
 
@@ -979,7 +987,8 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
 			    __u16 state, __u16 flags,
 			    __be16 port, __be32 src_vni, __be32 vni,
 			    __u32 ifindex, __u16 ndm_flags,
-			    bool swdev_notify)
+			    bool swdev_notify,
+			    struct netlink_ext_ack *extack)
 {
 	struct vxlan_fdb *f;
 
@@ -993,14 +1002,14 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
 
 		return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
 						 vni, ifindex, ndm_flags, f,
-						 swdev_notify);
+						 swdev_notify, extack);
 	} else {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
 		return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
 					       port, src_vni, vni, ifindex,
-					       ndm_flags, swdev_notify);
+					       ndm_flags, swdev_notify, extack);
 	}
 }
 
@@ -1016,7 +1025,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 				  struct vxlan_rdst *rd, bool swdev_notify)
 {
 	list_del_rcu(&rd->list);
-	vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify);
+	vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH, swdev_notify, NULL);
 	call_rcu(&rd->rcu, vxlan_dst_free);
 }
 
@@ -1118,7 +1127,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
 			       port, src_vni, vni, ifindex,
 			       ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
-			       true);
+			       true, extack);
 	spin_unlock_bh(&vxlan->hash_lock);
 
 	return err;
@@ -1286,7 +1295,7 @@ static bool vxlan_snoop(struct net_device *dev,
 
 		rdst->remote_ip = *src_ip;
 		f->updated = jiffies;
-		vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true);
+		vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL);
 	} else {
 		/* learned new entry */
 		spin_lock(&vxlan->hash_lock);
@@ -1299,7 +1308,7 @@ static bool vxlan_snoop(struct net_device *dev,
 					 vxlan->cfg.dst_port,
 					 vni,
 					 vxlan->default_dst.remote_vni,
-					 ifindex, NTF_SELF, true);
+					 ifindex, NTF_SELF, true, NULL);
 		spin_unlock(&vxlan->hash_lock);
 	}
 
@@ -3543,7 +3552,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 	/* notify default fdb entry */
 	if (f) {
 		err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
-				       RTM_NEWNEIGH, true);
+				       RTM_NEWNEIGH, true, extack);
 		if (err)
 			goto errout;
 	}
@@ -3818,7 +3827,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 					       vxlan->cfg.dst_port,
 					       conf.vni, conf.vni,
 					       conf.remote_ifindex,
-					       NTF_SELF, true);
+					       NTF_SELF, true, extack);
 			if (err) {
 				spin_unlock_bh(&vxlan->hash_lock);
 				return err;
@@ -4113,8 +4122,11 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
 			     struct switchdev_notifier_vxlan_fdb_info *fdb_info)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct netlink_ext_ack *extack;
 	int err;
 
+	extack = switchdev_notifier_info_to_extack(&fdb_info->info);
+
 	spin_lock_bh(&vxlan->hash_lock);
 	err = vxlan_fdb_update(vxlan, fdb_info->eth_addr, &fdb_info->remote_ip,
 			       NUD_REACHABLE,
@@ -4124,7 +4136,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
 			       fdb_info->remote_vni,
 			       fdb_info->remote_ifindex,
 			       NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
-			       false);
+			       false, extack);
 	spin_unlock_bh(&vxlan->hash_lock);
 
 	return err;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 236403eb5ba6..09767819c3d4 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -428,7 +428,8 @@ struct switchdev_notifier_vxlan_fdb_info {
 int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
 		      struct switchdev_notifier_vxlan_fdb_info *fdb_info);
 int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
-		     struct notifier_block *nb);
+		     struct notifier_block *nb,
+		     struct netlink_ext_ack *extack);
 void vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni);
 
 #else
@@ -440,7 +441,8 @@ vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
 }
 
 static inline int vxlan_fdb_replay(const struct net_device *dev, __be32 vni,
-				   struct notifier_block *nb)
+				   struct notifier_block *nb,
+				   struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3


From 6685987c29582afc79b7fa3998dfbf36b4295791 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 16 Jan 2019 23:06:56 +0000
Subject: switchdev: Add extack argument to call_switchdev_notifiers()

A follow-up patch will enable vetoing of FDB entries. Make it possible
to communicate details of why an FDB entry is not acceptable back to the
user.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt                   |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c    |  6 ++++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 ++++++------
 drivers/net/ethernet/rocker/rocker_main.c                |  2 +-
 drivers/net/ethernet/rocker/rocker_ofdpa.c               |  4 ++--
 drivers/net/vxlan.c                                      |  2 +-
 include/net/switchdev.h                                  |  6 ++++--
 net/bridge/br_switchdev.c                                |  2 +-
 net/dsa/slave.c                                          |  2 +-
 net/switchdev/switchdev.c                                |  5 +++--
 10 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 82236a17b5e6..f3244d87512a 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -196,7 +196,7 @@ The switch device will learn/forget source MAC address/VLAN on ingress packets
 and notify the switch driver of the mac/vlan/port tuples.  The switch driver,
 in turn, will notify the bridge driver using the switchdev notifier call:
 
-	err = call_switchdev_notifiers(val, dev, info);
+	err = call_switchdev_notifiers(val, dev, info, extack);
 
 Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when
 forgetting, and info points to a struct switchdev_notifier_fdb_info.  On
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 98e5ffd71b91..042341c7f6b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7294,7 +7294,8 @@ static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
 
 	info.addr = mac;
 	info.vid = vid;
-	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
+				 NULL);
 }
 
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
@@ -7381,7 +7382,8 @@ static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
 
 	info.addr = mac;
 	info.vid = 0;
-	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info);
+	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
+				 NULL);
 }
 
 static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 0abbaa0fbf14..71aad9d4e160 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -2443,7 +2443,7 @@ static void mlxsw_sp_fdb_vxlan_call_notifiers(struct net_device *dev,
 	ether_addr_copy(info.eth_addr, mac);
 	info.vni = vni;
 	info.offloaded = adding;
-	call_switchdev_notifiers(type, dev, &info.info);
+	call_switchdev_notifiers(type, dev, &info.info, NULL);
 }
 
 static void mlxsw_sp_fdb_nve_call_notifiers(struct net_device *dev,
@@ -2468,7 +2468,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
 	info.addr = mac;
 	info.vid = vid;
 	info.offloaded = offloaded;
-	call_switchdev_notifiers(type, dev, &info.info);
+	call_switchdev_notifiers(type, dev, &info.info, NULL);
 }
 
 static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp,
@@ -2819,7 +2819,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
 			return;
 		vxlan_fdb_info.offloaded = true;
 		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
-					 &vxlan_fdb_info.info);
+					 &vxlan_fdb_info.info, NULL);
 		mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
 					    vxlan_fdb_info.eth_addr,
 					    fdb_info->vid, dev, true);
@@ -2832,7 +2832,7 @@ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
 						     false);
 		vxlan_fdb_info.offloaded = false;
 		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
-					 &vxlan_fdb_info.info);
+					 &vxlan_fdb_info.info, NULL);
 		break;
 	}
 }
@@ -2977,7 +2977,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
 		}
 		vxlan_fdb_info->offloaded = true;
 		call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
-					 &vxlan_fdb_info->info);
+					 &vxlan_fdb_info->info, NULL);
 		mlxsw_sp_fid_put(fid);
 		return;
 	}
@@ -2998,7 +2998,7 @@ mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
 		goto err_fdb_tunnel_uc_op;
 	vxlan_fdb_info->offloaded = true;
 	call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
-				 &vxlan_fdb_info->info);
+				 &vxlan_fdb_info->info, NULL);
 	mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
 				    vxlan_fdb_info->eth_addr, vid, dev, true);
 
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 6213827e3956..62a205eba9f7 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2725,7 +2725,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
 	info.vid = recv_info->vid;
 	info.offloaded = true;
 	call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
-				 rocker_port->dev, &info.info);
+				 rocker_port->dev, &info.info, NULL);
 }
 
 static void rocker_switchdev_event_work(struct work_struct *work)
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6473cc68c2d5..bea7895930f6 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1833,10 +1833,10 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work)
 	rtnl_lock();
 	if (learned && removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
-					 lw->ofdpa_port->dev, &info.info);
+					 lw->ofdpa_port->dev, &info.info, NULL);
 	else if (learned && !removing)
 		call_switchdev_notifiers(SWITCHDEV_FDB_ADD_TO_BRIDGE,
-					 lw->ofdpa_port->dev, &info.info);
+					 lw->ofdpa_port->dev, &info.info, NULL);
 	rtnl_unlock();
 
 	kfree(work);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a9e90159bb61..ef45c3c925be 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -393,7 +393,7 @@ static int vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
 			       : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
 	vxlan_fdb_switchdev_notifier_info(vxlan, fdb, rd, NULL, &info);
 	ret = call_switchdev_notifiers(notifier_type, vxlan->dev,
-				       &info.info);
+				       &info.info, extack);
 	return notifier_to_errno(ret);
 }
 
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index a7fdab5ee6c3..63843ae5dc81 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -195,7 +195,8 @@ int switchdev_port_obj_del(struct net_device *dev,
 int register_switchdev_notifier(struct notifier_block *nb);
 int unregister_switchdev_notifier(struct notifier_block *nb);
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
-			     struct switchdev_notifier_info *info);
+			     struct switchdev_notifier_info *info,
+			     struct netlink_ext_ack *extack);
 
 int register_switchdev_blocking_notifier(struct notifier_block *nb);
 int unregister_switchdev_blocking_notifier(struct notifier_block *nb);
@@ -267,7 +268,8 @@ static inline int unregister_switchdev_notifier(struct notifier_block *nb)
 
 static inline int call_switchdev_notifiers(unsigned long val,
 					   struct net_device *dev,
-					   struct switchdev_notifier_info *info)
+					   struct switchdev_notifier_info *info,
+					   struct netlink_ext_ack *extack)
 {
 	return NOTIFY_DONE;
 }
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 035ff59d9cbd..4d2b9eb7604a 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -113,7 +113,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
 	info.added_by_user = added_by_user;
 	info.offloaded = offloaded;
 	notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
-	call_switchdev_notifiers(notifier_type, dev, &info.info);
+	call_switchdev_notifiers(notifier_type, dev, &info.info, NULL);
 }
 
 void
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d5680a98a7f0..91de3a663226 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1451,7 +1451,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
 		}
 		fdb_info->offloaded = true;
 		call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
-					 &fdb_info->info);
+					 &fdb_info->info, NULL);
 		break;
 
 	case SWITCHDEV_FDB_DEL_TO_DEVICE:
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5df9d1138ac9..cd78253de31d 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -556,10 +556,11 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
  *	Call all network notifier blocks.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
-			     struct switchdev_notifier_info *info)
+			     struct switchdev_notifier_info *info,
+			     struct netlink_ext_ack *extack)
 {
 	info->dev = dev;
-	info->extack = NULL;
+	info->extack = extack;
 	return atomic_notifier_call_chain(&switchdev_notif_chain, val, info);
 }
 EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
-- 
cgit v1.2.3


From 8b59bfe83cf15f755024e88812e057af7341f525 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 17 Jan 2019 15:22:20 +0800
Subject: qed: remove duplicated include from qed_if.h

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 91c536a01b56..5f818fda96bd 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -38,7 +38,6 @@
 #include <linux/netdevice.h>
 #include <linux/pci.h>
 #include <linux/skbuff.h>
-#include <linux/types.h>
 #include <asm/byteorder.h>
 #include <linux/io.h>
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From 58fa4a410fc31afe08d0d0c6b6d8860c22ec17c2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 16 Jan 2019 14:15:20 +0100
Subject: ipc: introduce ksys_ipc()/compat_ksys_ipc() for s390

The sys_ipc() and compat_ksys_ipc() functions are meant to only
be used from the system call table, not called by another function.

Introduce ksys_*() interfaces for this purpose, as we have done
for many other system calls.

Link: https://lore.kernel.org/lkml/20190116131527.2071570-3-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
[heiko.carstens@de.ibm.com: compile fix for !CONFIG_COMPAT]
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/kernel/compat_linux.c |  2 +-
 arch/s390/kernel/sys_s390.c     |  4 +++-
 include/linux/syscalls.h        |  4 ++++
 ipc/syscall.c                   | 20 ++++++++++++++++----
 kernel/sys_ni.c                 |  1 +
 5 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 8ac38d51ed7d..a47f6d3c6d5b 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -296,7 +296,7 @@ COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
 {
 	if (call >> 16)		/* hack for backward compatibility */
 		return -EINVAL;
-	return compat_sys_ipc(call, first, second, third, ptr, third);
+	return compat_ksys_ipc(call, first, second, third, ptr, third);
 }
 #endif
 
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 560bdaf8a74f..fd0cbbed4d9f 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -58,6 +58,7 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_SYSVIPC
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls.
  */
@@ -74,8 +75,9 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
 	 * Therefore we can call the generic variant by simply passing the
 	 * third parameter also as fifth parameter.
 	 */
-	return sys_ipc(call, first, second, third, ptr, third);
+	return ksys_ipc(call, first, second, third, ptr, third);
 }
+#endif /* CONFIG_SYSVIPC */
 
 SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
 {
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 257cccba3062..fb63045a0fb6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1185,6 +1185,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			      unsigned long prot, unsigned long flags,
 			      unsigned long fd, unsigned long pgoff);
 ssize_t ksys_readahead(int fd, loff_t offset, size_t count);
+int ksys_ipc(unsigned int call, int first, unsigned long second,
+	unsigned long third, void __user * ptr, long fifth);
+int compat_ksys_ipc(u32 call, int first, int second,
+	u32 third, u32 ptr, u32 fifth);
 
 /*
  * The following kernel syscall equivalents are just wrappers to fs-internal
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 1ac06e3983c0..3cf8ad703a4d 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -17,8 +17,8 @@
 #include <linux/shm.h>
 #include <linux/uaccess.h>
 
-SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
-		unsigned long, third, void __user *, ptr, long, fifth)
+int ksys_ipc(unsigned int call, int first, unsigned long second,
+	unsigned long third, void __user * ptr, long fifth)
 {
 	int version, ret;
 
@@ -106,6 +106,12 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
 		return -ENOSYS;
 	}
 }
+
+SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
+		unsigned long, third, void __user *, ptr, long, fifth)
+{
+	return ksys_ipc(call, first, second, third, ptr, fifth);
+}
 #endif
 
 #ifdef CONFIG_COMPAT
@@ -121,8 +127,8 @@ struct compat_ipc_kludge {
 };
 
 #ifdef CONFIG_ARCH_WANT_OLD_COMPAT_IPC
-COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second,
-	u32, third, compat_uptr_t, ptr, u32, fifth)
+int compat_ksys_ipc(u32 call, int first, int second,
+	u32 third, compat_uptr_t ptr, u32 fifth)
 {
 	int version;
 	u32 pad;
@@ -195,5 +201,11 @@ COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second,
 
 	return -ENOSYS;
 }
+
+COMPAT_SYSCALL_DEFINE6(ipc, u32, call, int, first, int, second,
+	u32, third, compat_uptr_t, ptr, u32, fifth)
+{
+	return compat_ksys_ipc(call, first, second, third, ptr, fifth);
+}
 #endif
 #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index ab9d0e3c6d50..bc934f31ab10 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -366,6 +366,7 @@ COND_SYSCALL(kexec_file_load);
 /* s390 */
 COND_SYSCALL(s390_pci_mmio_read);
 COND_SYSCALL(s390_pci_mmio_write);
+COND_SYSCALL(s390_ipc);
 COND_SYSCALL_COMPAT(s390_ipc);
 
 /* powerpc */
-- 
cgit v1.2.3


From 5f620bb6439ea8f354cfe4c7d47887df9d3acaf0 Mon Sep 17 00:00:00 2001
From: Ran Wang <ran.wang_1@nxp.com>
Date: Thu, 17 Jan 2019 09:10:55 +0000
Subject: drivers: usb :fsl: Remove USB Errata checking code

Remove USB errata checking code from driver. Applicability of erratum
is retrieved by reading corresponding property in device tree.
This property is written during device tree fixup.

Besides, replace spaces with tabs to make code aligned.

Signed-off-by: Ramneek Mehresh <ramneek.mehresh@nxp.com>
Signed-off-by: Nikhil Badola <nikhil.badola@freescale.com>
Signed-off-by: Yinbo Zhu <yinbo.zhu@nxp.com>
Signed-off-by: Ran Wang <ran.wang_1@nxp.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-fsl.c      | 7 +------
 drivers/usb/host/fsl-mph-dr-of.c | 6 ++++++
 include/linux/fsl_devices.h      | 7 ++++---
 3 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 0a867d96c126..e3d0c1c25160 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -304,14 +304,9 @@ static int ehci_fsl_usb_setup(struct ehci_hcd *ehci)
 			return -EINVAL;
 
 	if (pdata->operating_mode == FSL_USB2_MPH_HOST) {
-		unsigned int chip, rev, svr;
-
-		svr = mfspr(SPRN_SVR);
-		chip = svr >> 16;
-		rev = (svr >> 4) & 0xf;
 
 		/* Deal with USB Erratum #14 on MPC834x Rev 1.0 & 1.1 chips */
-		if ((rev == 1) && (chip >= 0x8050) && (chip <= 0x8055))
+		if (pdata->has_fsl_erratum_14 == 1)
 			ehci->has_fsl_port_bug = 1;
 
 		if (pdata->port_enables & FSL_USB2_PORT0_ENABLED)
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index 677f9d592109..4f8b8a08c914 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -225,6 +225,12 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev)
 	pdata->has_fsl_erratum_a005697 =
 		of_property_read_bool(np, "fsl,usb_erratum-a005697");
 
+	if (of_get_property(np, "fsl,usb_erratum_14", NULL))
+		pdata->has_fsl_erratum_14 = 1;
+	else
+		pdata->has_fsl_erratum_14 = 0;
+
+
 	/*
 	 * Determine whether phy_clk_valid needs to be checked
 	 * by reading property in device tree
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index 60cef8227534..5da56a674f2f 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -98,10 +98,11 @@ struct fsl_usb2_platform_data {
 
 	unsigned	suspended:1;
 	unsigned	already_suspended:1;
-	unsigned        has_fsl_erratum_a007792:1;
-	unsigned        has_fsl_erratum_a005275:1;
+	unsigned	has_fsl_erratum_a007792:1;
+	unsigned	has_fsl_erratum_14:1;
+	unsigned	has_fsl_erratum_a005275:1;
 	unsigned	has_fsl_erratum_a005697:1;
-	unsigned        check_phy_clk_valid:1;
+	unsigned	check_phy_clk_valid:1;
 
 	/* register save area for suspend/resume */
 	u32		pm_command;
-- 
cgit v1.2.3


From 73381da5f9ec333b1fbe38a156e7764a90e36a74 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 10 Jan 2019 12:17:54 -0800
Subject: crypto: streebog - use correct endianness type

streebog_uint512::qword needs to be __le64, not u64.  This fixes a large
number of sparse warnings:

    crypto/streebog_generic.c:25:9: warning: incorrect type in initializer (different base types)
    crypto/streebog_generic.c:25:9:    expected unsigned long long
    crypto/streebog_generic.c:25:9:    got restricted __le64 [usertype]
    [omitted many similar warnings]

No actual change in behavior.

Cc: Vitaly Chikunov <vt@altlinux.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/streebog_generic.c | 2 +-
 include/crypto/streebog.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/crypto/streebog_generic.c b/crypto/streebog_generic.c
index 03272a22afce..5a2eafed9c29 100644
--- a/crypto/streebog_generic.c
+++ b/crypto/streebog_generic.c
@@ -960,7 +960,7 @@ static int streebog_init(struct shash_desc *desc)
 	memset(ctx, 0, sizeof(struct streebog_state));
 	for (i = 0; i < 8; i++) {
 		if (digest_size == STREEBOG256_DIGEST_SIZE)
-			ctx->h.qword[i] = 0x0101010101010101ULL;
+			ctx->h.qword[i] = cpu_to_le64(0x0101010101010101ULL);
 	}
 	return 0;
 }
diff --git a/include/crypto/streebog.h b/include/crypto/streebog.h
index 4af119f7e07b..856e32af8657 100644
--- a/include/crypto/streebog.h
+++ b/include/crypto/streebog.h
@@ -19,7 +19,7 @@
 #define STREEBOG_BLOCK_SIZE	64
 
 struct streebog_uint512 {
-	u64 qword[8];
+	__le64 qword[8];
 };
 
 struct streebog_state {
-- 
cgit v1.2.3


From e17568e15830c6d158f6492b5b60ccdeb0b7b20b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 10 Jan 2019 12:17:56 -0800
Subject: crypto: user - forward declare crypto_nlsk

Move the declaration of crypto_nlsk into internal/cryptouser.h.  This
fixes the following sparse warning:

    crypto/crypto_user_base.c:41:13: warning: symbol 'crypto_nlsk' was not declared. Should it be static?

Cc: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user_stat.c            | 2 --
 include/crypto/internal/cryptouser.h | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index 3e9a53233d80..d4d1cb2bedd7 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -22,8 +22,6 @@
 
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
-extern struct sock *crypto_nlsk;
-
 struct crypto_dump_info {
 	struct sk_buff *in_skb;
 	struct sk_buff *out_skb;
diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
index 40623f4457df..8c602b187c58 100644
--- a/include/crypto/internal/cryptouser.h
+++ b/include/crypto/internal/cryptouser.h
@@ -1,6 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <net/netlink.h>
 
+extern struct sock *crypto_nlsk;
+
 struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
 
 #ifdef CONFIG_CRYPTO_STATS
-- 
cgit v1.2.3


From 466e0759269d31485074126700574230bfff3b1c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 10 Jan 2019 12:18:00 -0800
Subject: crypto: af_alg - make some functions static

Some exported functions in af_alg.c aren't used outside of that file.
Therefore, un-export them and make them 'static'.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/af_alg.c         | 20 +++++++-------------
 include/crypto/if_alg.h |  7 -------
 2 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 17eb09d222ff..ccae4a7ada8a 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -425,12 +425,12 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
 }
 EXPORT_SYMBOL_GPL(af_alg_make_sg);
 
-void af_alg_link_sg(struct af_alg_sgl *sgl_prev, struct af_alg_sgl *sgl_new)
+static void af_alg_link_sg(struct af_alg_sgl *sgl_prev,
+			   struct af_alg_sgl *sgl_new)
 {
 	sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
 	sg_chain(sgl_prev->sg, sgl_prev->npages + 1, sgl_new->sg);
 }
-EXPORT_SYMBOL_GPL(af_alg_link_sg);
 
 void af_alg_free_sg(struct af_alg_sgl *sgl)
 {
@@ -441,7 +441,7 @@ void af_alg_free_sg(struct af_alg_sgl *sgl)
 }
 EXPORT_SYMBOL_GPL(af_alg_free_sg);
 
-int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
+static int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
 {
 	struct cmsghdr *cmsg;
 
@@ -480,7 +480,6 @@ int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(af_alg_cmsg_send);
 
 /**
  * af_alg_alloc_tsgl - allocate the TX SGL
@@ -488,7 +487,7 @@ EXPORT_SYMBOL_GPL(af_alg_cmsg_send);
  * @sk socket of connection to user space
  * @return: 0 upon success, < 0 upon error
  */
-int af_alg_alloc_tsgl(struct sock *sk)
+static int af_alg_alloc_tsgl(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct af_alg_ctx *ctx = ask->private;
@@ -517,7 +516,6 @@ int af_alg_alloc_tsgl(struct sock *sk)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(af_alg_alloc_tsgl);
 
 /**
  * aead_count_tsgl - Count number of TX SG entries
@@ -654,7 +652,7 @@ EXPORT_SYMBOL_GPL(af_alg_pull_tsgl);
  *
  * @areq Request holding the TX and RX SGL
  */
-void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
+static void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
 {
 	struct sock *sk = areq->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -683,7 +681,6 @@ void af_alg_free_areq_sgls(struct af_alg_async_req *areq)
 		sock_kfree_s(sk, tsgl, areq->tsgl_entries * sizeof(*tsgl));
 	}
 }
-EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
 
 /**
  * af_alg_wait_for_wmem - wait for availability of writable memory
@@ -692,7 +689,7 @@ EXPORT_SYMBOL_GPL(af_alg_free_areq_sgls);
  * @flags If MSG_DONTWAIT is set, then only report if function would sleep
  * @return 0 when writable memory is available, < 0 upon error
  */
-int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
+static int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
 {
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	int err = -ERESTARTSYS;
@@ -717,7 +714,6 @@ int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(af_alg_wait_for_wmem);
 
 /**
  * af_alg_wmem_wakeup - wakeup caller when writable memory is available
@@ -786,8 +782,7 @@ EXPORT_SYMBOL_GPL(af_alg_wait_for_data);
  *
  * @sk socket of connection to user space
  */
-
-void af_alg_data_wakeup(struct sock *sk)
+static void af_alg_data_wakeup(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct af_alg_ctx *ctx = ask->private;
@@ -805,7 +800,6 @@ void af_alg_data_wakeup(struct sock *sk)
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL_GPL(af_alg_data_wakeup);
 
 /**
  * af_alg_sendmsg - implementation of sendmsg system call handler
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 482461d8931d..0d464db74bf5 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -169,9 +169,6 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
 int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
-void af_alg_link_sg(struct af_alg_sgl *sgl_prev, struct af_alg_sgl *sgl_new);
-
-int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
 
 static inline struct alg_sock *alg_sk(struct sock *sk)
 {
@@ -230,15 +227,11 @@ static inline bool af_alg_readable(struct sock *sk)
 	return PAGE_SIZE <= af_alg_rcvbuf(sk);
 }
 
-int af_alg_alloc_tsgl(struct sock *sk);
 unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset);
 void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst,
 		      size_t dst_offset);
-void af_alg_free_areq_sgls(struct af_alg_async_req *areq);
-int af_alg_wait_for_wmem(struct sock *sk, unsigned int flags);
 void af_alg_wmem_wakeup(struct sock *sk);
 int af_alg_wait_for_data(struct sock *sk, unsigned flags);
-void af_alg_data_wakeup(struct sock *sk);
 int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
 		   unsigned int ivsize);
 ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
-- 
cgit v1.2.3


From 2ff5c5a1dc6e6c502e0a3e49db4e792804e43693 Mon Sep 17 00:00:00 2001
From: Martin Hostettler <textshell@uchuujin.de>
Date: Sat, 15 Dec 2018 15:34:20 +0100
Subject: vt: refactor vc_ques to allow of other private sequences.

The vc_ques keeps track if a csi sequence is a private DEC control
function beginning with '?'. Nowadays some private control functions
begin with '>' and '='. Switch the code to instead use a new 3-bit
vc_priv that allows for all private use parameter prefixes.

Signed-off-by: Martin Hostettler <textshell@uchuujin.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c            | 20 +++++++++++---------
 include/linux/console_struct.h |  2 +-
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index ec55336abf95..b59feeaaf02b 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1341,6 +1341,8 @@ struct vc_data *vc_deallocate(unsigned int currcons)
  *	VT102 emulator
  */
 
+enum { EPecma = 0, EPdec, EPeq, EPgt, EPlt};
+
 #define set_kbd(vc, x)	vt_set_kbd_mode_bit((vc)->vc_num, (x))
 #define clr_kbd(vc, x)	vt_clr_kbd_mode_bit((vc)->vc_num, (x))
 #define is_kbd(vc, x)	vt_get_kbd_mode_bit((vc)->vc_num, (x))
@@ -1814,7 +1816,7 @@ static void set_mode(struct vc_data *vc, int on_off)
 	int i;
 
 	for (i = 0; i <= vc->vc_npar; i++)
-		if (vc->vc_ques) {
+		if (vc->vc_priv == EPdec) {
 			switch(vc->vc_par[i]) {	/* DEC private modes set/reset */
 			case 1:			/* Cursor keys send ^[Ox/^[[x */
 				if (on_off)
@@ -2030,7 +2032,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear)
 	vc->vc_top		= 0;
 	vc->vc_bottom		= vc->vc_rows;
 	vc->vc_state		= ESnormal;
-	vc->vc_ques		= 0;
+	vc->vc_priv		= EPecma;
 	vc->vc_translate	= set_translate(LAT1_MAP, vc);
 	vc->vc_G0_charset	= LAT1_MAP;
 	vc->vc_G1_charset	= GRAF_MAP;
@@ -2234,8 +2236,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 			vc->vc_state=ESfunckey;
 			return;
 		}
-		vc->vc_ques = (c == '?');
-		if (vc->vc_ques)
+		vc->vc_priv = (c == '?') ? EPdec : EPecma;
+		if (vc->vc_priv != EPecma)
 			return;
 		/* fall through */
 	case ESgetpars:
@@ -2256,7 +2258,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 			set_mode(vc, 0);
 			return;
 		case 'c':
-			if (vc->vc_ques) {
+			if (vc->vc_priv == EPdec) {
 				if (vc->vc_par[0])
 					vc->vc_cursor_type = vc->vc_par[0] | (vc->vc_par[1] << 8) | (vc->vc_par[2] << 16);
 				else
@@ -2265,7 +2267,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 			}
 			break;
 		case 'm':
-			if (vc->vc_ques) {
+			if (vc->vc_priv == EPdec) {
 				clear_selection();
 				if (vc->vc_par[0])
 					vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1];
@@ -2275,7 +2277,7 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 			}
 			break;
 		case 'n':
-			if (!vc->vc_ques) {
+			if (vc->vc_priv == EPecma) {
 				if (vc->vc_par[0] == 5)
 					status_report(tty);
 				else if (vc->vc_par[0] == 6)
@@ -2283,8 +2285,8 @@ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c)
 			}
 			return;
 		}
-		if (vc->vc_ques) {
-			vc->vc_ques = 0;
+		if (vc->vc_priv != EPecma) {
+			vc->vc_priv = EPecma;
 			return;
 		}
 		switch(c) {
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index ab137f97ecbd..ed798e114663 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -119,7 +119,7 @@ struct vc_data {
 	unsigned int	vc_s_blink	: 1;
 	unsigned int	vc_s_reverse	: 1;
 	/* misc */
-	unsigned int	vc_ques		: 1;
+	unsigned int	vc_priv		: 3;
 	unsigned int	vc_need_wrap	: 1;
 	unsigned int	vc_can_do_color	: 1;
 	unsigned int	vc_report_mouse : 2;
-- 
cgit v1.2.3


From d152159b89118841ebc0f7be2aadf79a22c6c501 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jan 2019 15:45:58 +0100
Subject: netfilter: nf_tables: prepare nft_object for lookups via hashtable

Add a 'key' structure for object, so we can look them up by name + table
combination (the name can be the same in each table).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 19 ++++++++++++++-----
 net/netfilter/nf_tables_api.c     | 27 ++++++++++++++++-----------
 net/netfilter/nft_objref.c        |  2 +-
 net/netfilter/nft_quota.c         |  2 +-
 4 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 841835a387e1..325d0a6b808b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1011,12 +1011,22 @@ void nft_unregister_expr(struct nft_expr_type *);
 int nft_verdict_dump(struct sk_buff *skb, int type,
 		     const struct nft_verdict *v);
 
+/**
+ *	struct nft_object_hash_key - key to lookup nft_object
+ *
+ *	@name: name of the stateful object to look up
+ *	@table: table the object belongs to
+ */
+struct nft_object_hash_key {
+	const char                      *name;
+	const struct nft_table          *table;
+};
+
 /**
  *	struct nft_object - nf_tables stateful object
  *
  *	@list: table stateful object list node
- *	@table: table this object belongs to
- *	@name: name of this stateful object
+ *	@key:  keys that identify this object
  *	@genmask: generation mask
  *	@use: number of references to this stateful object
  *	@handle: unique object handle
@@ -1025,8 +1035,7 @@ int nft_verdict_dump(struct sk_buff *skb, int type,
  */
 struct nft_object {
 	struct list_head		list;
-	char				*name;
-	struct nft_table		*table;
+	struct nft_object_hash_key	key;
 	u32				genmask:2,
 					use:30;
 	u64				handle;
@@ -1047,7 +1056,7 @@ struct nft_object *nft_obj_lookup(const struct nft_table *table,
 				  const struct nlattr *nla, u32 objtype,
 				  u8 genmask);
 
-void nft_obj_notify(struct net *net, struct nft_table *table,
+void nft_obj_notify(struct net *net, const struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq,
 		    int event, int family, int report, gfp_t gfp);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2b0a93300dd7..5e213941e85b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3853,7 +3853,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
 	    nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
-			   (*nft_set_ext_obj(ext))->name) < 0)
+			   (*nft_set_ext_obj(ext))->key.name) < 0)
 		goto nla_put_failure;
 
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
@@ -4826,7 +4826,7 @@ struct nft_object *nft_obj_lookup(const struct nft_table *table,
 	struct nft_object *obj;
 
 	list_for_each_entry_rcu(obj, &table->objects, list) {
-		if (!nla_strcmp(nla, obj->name) &&
+		if (!nla_strcmp(nla, obj->key.name) &&
 		    objtype == obj->ops->type->type &&
 		    nft_active_genmask(obj, genmask))
 			return obj;
@@ -5014,11 +5014,11 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 		err = PTR_ERR(obj);
 		goto err1;
 	}
-	obj->table = table;
+	obj->key.table = table;
 	obj->handle = nf_tables_alloc_handle(table);
 
-	obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
-	if (!obj->name) {
+	obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+	if (!obj->key.name) {
 		err = -ENOMEM;
 		goto err2;
 	}
@@ -5031,7 +5031,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	table->use++;
 	return 0;
 err3:
-	kfree(obj->name);
+	kfree(obj->key.name);
 err2:
 	if (obj->ops->destroy)
 		obj->ops->destroy(&ctx, obj);
@@ -5060,7 +5060,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
 	nfmsg->res_id		= htons(net->nft.base_seq & 0xffff);
 
 	if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
-	    nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
+	    nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
 	    nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
 	    nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
 	    nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
@@ -5246,7 +5246,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
 		obj->ops->destroy(ctx, obj);
 
 	module_put(obj->ops->type->owner);
-	kfree(obj->name);
+	kfree(obj->key.name);
 	kfree(obj);
 }
 
@@ -5297,7 +5297,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 	return nft_delobj(&ctx, obj);
 }
 
-void nft_obj_notify(struct net *net, struct nft_table *table,
+void nft_obj_notify(struct net *net, const struct nft_table *table,
 		    struct nft_object *obj, u32 portid, u32 seq, int event,
 		    int family, int report, gfp_t gfp)
 {
@@ -6404,6 +6404,11 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 		nf_tables_commit_chain_free_rules_old(g0);
 }
 
+static void nft_obj_del(struct nft_object *obj)
+{
+	list_del_rcu(&obj->list);
+}
+
 static void nft_chain_del(struct nft_chain *chain)
 {
 	struct nft_table *table = chain->table;
@@ -6580,7 +6585,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELOBJ:
-			list_del_rcu(&nft_trans_obj(trans)->list);
+			nft_obj_del(nft_trans_obj(trans));
 			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
 					     NFT_MSG_DELOBJ);
 			break;
@@ -7330,7 +7335,7 @@ static void __nft_release_tables(struct net *net)
 			nft_set_destroy(set);
 		}
 		list_for_each_entry_safe(obj, ne, &table->objects, list) {
-			list_del(&obj->list);
+			nft_obj_del(obj);
 			table->use--;
 			nft_obj_destroy(&ctx, obj);
 		}
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index a3185ca2a3a9..58eb75ad61bf 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -53,7 +53,7 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_object *obj = nft_objref_priv(expr);
 
-	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+	if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) ||
 	    nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
 			 htonl(obj->ops->type->type)))
 		goto nla_put_failure;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0ed124a93fcf..354cde67bca9 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -61,7 +61,7 @@ static void nft_quota_obj_eval(struct nft_object *obj,
 
 	if (overquota &&
 	    !test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
-		nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
+		nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
 			       NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
 }
 
-- 
cgit v1.2.3


From 4d44175aa5bb5f68772b1eb0306554812294ca52 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jan 2019 15:45:59 +0100
Subject: netfilter: nf_tables: handle nft_object lookups via rhltable

Instead of linear search, use rhlist interface to look up the objects.
This fixes rulesets with thousands of named objects (quota, counters and
the like).

We only use a single table for this and consider the address of the
table we're doing the lookup in as a part of the key.

This reduces restore time of a sample ruleset with ~20k named counters
from 37 seconds to 0.8 seconds.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   7 ++-
 net/netfilter/nf_tables_api.c     | 103 +++++++++++++++++++++++++++++++++-----
 net/netfilter/nft_objref.c        |   3 +-
 3 files changed, 98 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 325d0a6b808b..45eba7d7ab38 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1027,14 +1027,16 @@ struct nft_object_hash_key {
  *
  *	@list: table stateful object list node
  *	@key:  keys that identify this object
+ *	@rhlhead: nft_objname_ht node
  *	@genmask: generation mask
  *	@use: number of references to this stateful object
  *	@handle: unique object handle
  *	@ops: object operations
- * 	@data: object data, layout depends on type
+ *	@data: object data, layout depends on type
  */
 struct nft_object {
 	struct list_head		list;
+	struct rhlist_head		rhlhead;
 	struct nft_object_hash_key	key;
 	u32				genmask:2,
 					use:30;
@@ -1052,7 +1054,8 @@ static inline void *nft_obj_data(const struct nft_object *obj)
 
 #define nft_expr_obj(expr)	*((struct nft_object **)nft_expr_priv(expr))
 
-struct nft_object *nft_obj_lookup(const struct nft_table *table,
+struct nft_object *nft_obj_lookup(const struct net *net,
+				  const struct nft_table *table,
 				  const struct nlattr *nla, u32 objtype,
 				  u8 genmask);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5e213941e85b..621ff834d3a4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -37,10 +37,16 @@ enum {
 	NFT_VALIDATE_DO,
 };
 
+static struct rhltable nft_objname_ht;
+
 static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
 static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
 static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
 
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
 static const struct rhashtable_params nft_chain_ht_params = {
 	.head_offset		= offsetof(struct nft_chain, rhlhead),
 	.key_offset		= offsetof(struct nft_chain, name),
@@ -51,6 +57,15 @@ static const struct rhashtable_params nft_chain_ht_params = {
 	.automatic_shrinking	= true,
 };
 
+static const struct rhashtable_params nft_objname_ht_params = {
+	.head_offset		= offsetof(struct nft_object, rhlhead),
+	.key_offset		= offsetof(struct nft_object, key),
+	.hashfn			= nft_objname_hash,
+	.obj_hashfn		= nft_objname_hash_obj,
+	.obj_cmpfn		= nft_objname_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
 static void nft_validate_state_update(struct net *net, u8 new_validate_state)
 {
 	switch (net->nft.validate_state) {
@@ -814,6 +829,34 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
 	return strcmp(chain->name, name);
 }
 
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object_hash_key *k = data;
+
+	seed ^= hash_ptr(k->table, 32);
+
+	return jhash(k->name, strlen(k->name), seed);
+}
+
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_object *obj = data;
+
+	return nft_objname_hash(&obj->key, 0, seed);
+}
+
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
+				const void *ptr)
+{
+	const struct nft_object_hash_key *k = arg->key;
+	const struct nft_object *obj = ptr;
+
+	if (obj->key.table != k->table)
+		return -1;
+
+	return strcmp(obj->key.name, k->name);
+}
+
 static int nf_tables_newtable(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[],
@@ -1070,7 +1113,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
 	return ERR_PTR(-ENOENT);
 }
 
-static bool lockdep_commit_lock_is_held(struct net *net)
+static bool lockdep_commit_lock_is_held(const struct net *net)
 {
 #ifdef CONFIG_PROVE_LOCKING
 	return lockdep_is_held(&net->nft.commit_mutex);
@@ -4386,7 +4429,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			err = -EINVAL;
 			goto err2;
 		}
-		obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+		obj = nft_obj_lookup(ctx->net, ctx->table,
+				     nla[NFTA_SET_ELEM_OBJREF],
 				     set->objtype, genmask);
 		if (IS_ERR(obj)) {
 			err = PTR_ERR(obj);
@@ -4819,18 +4863,36 @@ void nft_unregister_obj(struct nft_object_type *obj_type)
 }
 EXPORT_SYMBOL_GPL(nft_unregister_obj);
 
-struct nft_object *nft_obj_lookup(const struct nft_table *table,
+struct nft_object *nft_obj_lookup(const struct net *net,
+				  const struct nft_table *table,
 				  const struct nlattr *nla, u32 objtype,
 				  u8 genmask)
 {
+	struct nft_object_hash_key k = { .table = table };
+	char search[NFT_OBJ_MAXNAMELEN];
+	struct rhlist_head *tmp, *list;
 	struct nft_object *obj;
 
-	list_for_each_entry_rcu(obj, &table->objects, list) {
-		if (!nla_strcmp(nla, obj->key.name) &&
-		    objtype == obj->ops->type->type &&
-		    nft_active_genmask(obj, genmask))
+	nla_strlcpy(search, nla, sizeof(search));
+	k.name = search;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() &&
+		     !lockdep_commit_lock_is_held(net));
+
+	rcu_read_lock();
+	list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params);
+	if (!list)
+		goto out;
+
+	rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) {
+		if (objtype == obj->ops->type->type &&
+		    nft_active_genmask(obj, genmask)) {
+			rcu_read_unlock();
 			return obj;
+		}
 	}
+out:
+	rcu_read_unlock();
 	return ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL_GPL(nft_obj_lookup);
@@ -4988,7 +5050,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		if (err != -ENOENT) {
@@ -5027,9 +5089,18 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		goto err3;
 
+	err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
+			      nft_objname_ht_params);
+	if (err < 0)
+		goto err4;
+
 	list_add_tail_rcu(&obj->list, &table->objects);
 	table->use++;
 	return 0;
+err4:
+	/* queued in transaction log */
+	INIT_LIST_HEAD(&obj->list);
+	return err;
 err3:
 	kfree(obj->key.name);
 err2:
@@ -5215,7 +5286,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
 	}
 
 	objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
-	obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+	obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
 	if (IS_ERR(obj)) {
 		NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 		return PTR_ERR(obj);
@@ -5280,7 +5351,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
 		obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
 	} else {
 		attr = nla[NFTA_OBJ_NAME];
-		obj = nft_obj_lookup(table, attr, objtype, genmask);
+		obj = nft_obj_lookup(net, table, attr, objtype, genmask);
 	}
 
 	if (IS_ERR(obj)) {
@@ -6406,6 +6477,7 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
 
 static void nft_obj_del(struct nft_object *obj)
 {
+	rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params);
 	list_del_rcu(&obj->list);
 }
 
@@ -6721,7 +6793,7 @@ static int __nf_tables_abort(struct net *net)
 			break;
 		case NFT_MSG_NEWOBJ:
 			trans->ctx.table->use--;
-			list_del_rcu(&nft_trans_obj(trans)->list);
+			nft_obj_del(nft_trans_obj(trans));
 			break;
 		case NFT_MSG_DELOBJ:
 			trans->ctx.table->use++;
@@ -7397,12 +7469,18 @@ static int __init nf_tables_module_init(void)
 	if (err < 0)
 		goto err3;
 
+	err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
+	if (err < 0)
+		goto err4;
+
 	/* must be last */
 	err = nfnetlink_subsys_register(&nf_tables_subsys);
 	if (err < 0)
-		goto err4;
+		goto err5;
 
 	return err;
+err5:
+	rhltable_destroy(&nft_objname_ht);
 err4:
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 err3:
@@ -7422,6 +7500,7 @@ static void __exit nf_tables_module_exit(void)
 	unregister_pernet_subsys(&nf_tables_net_ops);
 	cancel_work_sync(&trans_destroy_work);
 	rcu_barrier();
+	rhltable_destroy(&nft_objname_ht);
 	nf_tables_core_module_exit();
 }
 
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 58eb75ad61bf..c1f2adf198a0 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -38,7 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
 		return -EINVAL;
 
 	objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
-	obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+	obj = nft_obj_lookup(ctx->net, ctx->table,
+			     tb[NFTA_OBJREF_IMM_NAME], objtype,
 			     genmask);
 	if (IS_ERR(obj))
 		return -ENOENT;
-- 
cgit v1.2.3


From 10870dd89e956d911d1a39474c0bf4a18c72cffc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 8 Jan 2019 17:35:34 +0100
Subject: netfilter: nf_tables: add direct calls for all builtin expressions

With CONFIG_RETPOLINE its faster to add an if (ptr == &foo_func)
check and and use direct calls for all the built-in expressions.

~15% improvement in pathological cases.

checkpatch doesn't like the X macro due to the embedded return statement,
but the macro has a very limited scope so I don't think its a problem.

I would like to avoid bugs of the form
  If (e->ops->eval == (unsigned long)nft_foo_eval)
	 nft_bar_eval();

and open-coded if ()/else if()/else cascade, thus the macro.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_core.h | 16 ++++++++++++++++
 net/netfilter/nf_tables_core.c         | 25 ++++++++++++++++++-------
 net/netfilter/nft_bitwise.c            |  5 ++---
 net/netfilter/nft_byteorder.c          |  6 +++---
 net/netfilter/nft_cmp.c                |  6 +++---
 net/netfilter/nft_dynset.c             |  5 ++---
 net/netfilter/nft_immediate.c          |  6 +++---
 net/netfilter/nft_payload.c            |  6 +++---
 net/netfilter/nft_range.c              |  5 ++---
 net/netfilter/nft_rt.c                 |  6 +++---
 10 files changed, 55 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 2046d104f323..7281895fa6d9 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -80,6 +80,22 @@ struct nft_regs;
 struct nft_pktinfo;
 void nft_meta_get_eval(const struct nft_expr *expr,
 		       struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_cmp_eval(const struct nft_expr *expr,
+		  struct nft_regs *regs, const struct nft_pktinfo *pkt);
 void nft_lookup_eval(const struct nft_expr *expr,
 		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_payload_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_immediate_eval(const struct nft_expr *expr,
+			struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_bitwise_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_range_eval(const struct nft_expr *expr,
+		    struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_byteorder_eval(const struct nft_expr *expr,
+			struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_dynset_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
+void nft_rt_get_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt);
 #endif /* _NET_NF_TABLES_CORE_H */
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a50500232b0a..2a00aef7b6d4 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -124,14 +124,25 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
 			       struct nft_regs *regs,
 			       struct nft_pktinfo *pkt)
 {
+#ifdef CONFIG_RETPOLINE
 	unsigned long e = (unsigned long)expr->ops->eval;
-
-	if (e == (unsigned long)nft_meta_get_eval)
-		nft_meta_get_eval(expr, regs, pkt);
-	else if (e == (unsigned long)nft_lookup_eval)
-		nft_lookup_eval(expr, regs, pkt);
-	else
-		expr->ops->eval(expr, regs, pkt);
+#define X(e, fun) \
+	do { if ((e) == (unsigned long)(fun)) \
+		return fun(expr, regs, pkt); } while (0)
+
+	X(e, nft_payload_eval);
+	X(e, nft_cmp_eval);
+	X(e, nft_meta_get_eval);
+	X(e, nft_lookup_eval);
+	X(e, nft_range_eval);
+	X(e, nft_immediate_eval);
+	X(e, nft_byteorder_eval);
+	X(e, nft_dynset_eval);
+	X(e, nft_rt_get_eval);
+	X(e, nft_bitwise_eval);
+#undef  X
+#endif /* CONFIG_RETPOLINE */
+	expr->ops->eval(expr, regs, pkt);
 }
 
 unsigned int
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index fff8073e2a56..2c75b9e0474e 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -25,9 +25,8 @@ struct nft_bitwise {
 	struct nft_data		xor;
 };
 
-static void nft_bitwise_eval(const struct nft_expr *expr,
-			     struct nft_regs *regs,
-			     const struct nft_pktinfo *pkt)
+void nft_bitwise_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_bitwise *priv = nft_expr_priv(expr);
 	const u32 *src = &regs->data[priv->sreg];
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 13d4e421a6b3..19dbc34cc75e 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -26,9 +26,9 @@ struct nft_byteorder {
 	u8			size;
 };
 
-static void nft_byteorder_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
+void nft_byteorder_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
 {
 	const struct nft_byteorder *priv = nft_expr_priv(expr);
 	u32 *src = &regs->data[priv->sreg];
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 79d48c1d06f4..f9f1fa66a16e 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -24,9 +24,9 @@ struct nft_cmp_expr {
 	enum nft_cmp_ops	op:8;
 };
 
-static void nft_cmp_eval(const struct nft_expr *expr,
-			 struct nft_regs *regs,
-			 const struct nft_pktinfo *pkt)
+void nft_cmp_eval(const struct nft_expr *expr,
+		  struct nft_regs *regs,
+		  const struct nft_pktinfo *pkt)
 {
 	const struct nft_cmp_expr *priv = nft_expr_priv(expr);
 	int d;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 07d4efd3d851..9658493d37d4 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -62,9 +62,8 @@ err1:
 	return NULL;
 }
 
-static void nft_dynset_eval(const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_pktinfo *pkt)
+void nft_dynset_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
 	struct nft_set *set = priv->set;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 0777a93211e2..3e5ed787b1d4 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -17,9 +17,9 @@
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_tables.h>
 
-static void nft_immediate_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
+void nft_immediate_eval(const struct nft_expr *expr,
+			struct nft_regs *regs,
+			const struct nft_pktinfo *pkt)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index e110b0ebbf58..54e15de4b79a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -70,9 +70,9 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
 	return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
 }
 
-static void nft_payload_eval(const struct nft_expr *expr,
-			     struct nft_regs *regs,
-			     const struct nft_pktinfo *pkt)
+void nft_payload_eval(const struct nft_expr *expr,
+		      struct nft_regs *regs,
+		      const struct nft_pktinfo *pkt)
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index cedb96c3619f..529ac8acb19d 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -23,9 +23,8 @@ struct nft_range_expr {
 	enum nft_range_ops	op:8;
 };
 
-static void nft_range_eval(const struct nft_expr *expr,
-			 struct nft_regs *regs,
-			 const struct nft_pktinfo *pkt)
+void nft_range_eval(const struct nft_expr *expr,
+		    struct nft_regs *regs, const struct nft_pktinfo *pkt)
 {
 	const struct nft_range_expr *priv = nft_expr_priv(expr);
 	int d1, d2;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index f35fa33913ae..c48daed5c46b 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -53,9 +53,9 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
 	return mtu - minlen;
 }
 
-static void nft_rt_get_eval(const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_pktinfo *pkt)
+void nft_rt_get_eval(const struct nft_expr *expr,
+		     struct nft_regs *regs,
+		     const struct nft_pktinfo *pkt)
 {
 	const struct nft_rt *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-- 
cgit v1.2.3


From 8e2f311a68494a6677c1724bdcb10bada21af37c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 11 Jan 2019 14:46:15 +0100
Subject: netfilter: physdev: relax br_netfilter dependency

Following command:
  iptables -D FORWARD -m physdev ...
causes connectivity loss in some setups.

Reason is that iptables userspace will probe kernel for the module revision
of the physdev patch, and physdev has an artificial dependency on
br_netfilter (xt_physdev use makes no sense unless a br_netfilter module
is loaded).

This causes the "phydev" module to be loaded, which in turn enables the
"call-iptables" infrastructure.

bridged packets might then get dropped by the iptables ruleset.

The better fix would be to change the "call-iptables" defaults to 0 and
enforce explicit setting to 1, but that breaks backwards compatibility.

This does the next best thing: add a request_module call to checkentry.
This was a stray '-D ... -m physdev' won't activate br_netfilter
anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 1 -
 net/bridge/br_netfilter_hooks.c      | 5 -----
 net/netfilter/xt_physdev.c           | 9 +++++++--
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 4cd56808ac4e..89808ce293c4 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -43,7 +43,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 }
 
 struct net_device *setup_pre_routing(struct sk_buff *skb);
-void br_netfilter_enable(void);
 
 #if IS_ENABLED(CONFIG_IPV6)
 int br_validate_ipv6(struct net *net, struct sk_buff *skb);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c93c35bb73dd..40d058378b52 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -881,11 +881,6 @@ static const struct nf_br_ops br_ops = {
 	.br_dev_xmit_hook =	br_nf_dev_xmit,
 };
 
-void br_netfilter_enable(void)
-{
-}
-EXPORT_SYMBOL_GPL(br_netfilter_enable);
-
 /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
  * br_dev_queue_push_xmit is called afterwards */
 static const struct nf_hook_ops br_nf_ops[] = {
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 4034d70bff39..b2e39cb6a590 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -96,8 +96,7 @@ match_outdev:
 static int physdev_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_physdev_info *info = par->matchinfo;
-
-	br_netfilter_enable();
+	static bool brnf_probed __read_mostly;
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -111,6 +110,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return -EINVAL;
 	}
+
+	if (!brnf_probed) {
+		brnf_probed = true;
+		request_module("br_netfilter");
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 75dd48e2e420a3cbbe56dd7adfcc6f142c948272 Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Mon, 14 Jan 2019 18:41:35 +0100
Subject: netfilter: nf_tables: Support RULE_ID reference in new rule

To allow for a batch to contain rules in arbitrary ordering, introduce
NFTA_RULE_POSITION_ID attribute which works just like NFTA_RULE_POSITION
but contains the ID of another rule within the same batch. This helps
iptables-nft-restore handling dumps with mixed insert/append commands
correctly.

Note that NFTA_RULE_POSITION takes precedence over
NFTA_RULE_POSITION_ID, so if the former is present, the latter is
ignored.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 net/netfilter/nf_tables_api.c            | 9 +++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 7de4f1bdaf06..99ca95b830b6 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -219,6 +219,7 @@ enum nft_chain_attributes {
  * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
  * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
  * @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
+ * @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32)
  */
 enum nft_rule_attributes {
 	NFTA_RULE_UNSPEC,
@@ -231,6 +232,7 @@ enum nft_rule_attributes {
 	NFTA_RULE_USERDATA,
 	NFTA_RULE_PAD,
 	NFTA_RULE_ID,
+	NFTA_RULE_POSITION_ID,
 	__NFTA_RULE_MAX
 };
 #define NFTA_RULE_MAX		(__NFTA_RULE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 621ff834d3a4..d88c86c5b433 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2610,6 +2610,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
 	return 0;
 }
 
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+					     const struct nlattr *nla);
+
 #define NFT_RULE_MAXEXPRS	128
 
 static int nf_tables_newrule(struct net *net, struct sock *nlsk,
@@ -2679,6 +2682,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
 				return PTR_ERR(old_rule);
 			}
+		} else if (nla[NFTA_RULE_POSITION_ID]) {
+			old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
+			if (IS_ERR(old_rule)) {
+				NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
+				return PTR_ERR(old_rule);
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From a47c5404816254d6cc4787f335c4720066a538e6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:32 +0100
Subject: netfilter: conntrack: handle builtin l4proto packet functions via
 direct calls

The l4 protocol trackers are invoked via indirect call: l4proto->packet().

With one exception (gre), all l4trackers are builtin, so we can make
.packet optional and use a direct call for most protocols.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 37 +++++++++++++++++++++++
 net/netfilter/nf_conntrack_core.c            | 45 +++++++++++++++++++++++++++-
 net/netfilter/nf_conntrack_proto_dccp.c      |  8 ++---
 net/netfilter/nf_conntrack_proto_icmp.c      | 12 +++-----
 net/netfilter/nf_conntrack_proto_icmpv6.c    | 10 +++----
 net/netfilter/nf_conntrack_proto_sctp.c      | 12 ++++----
 net/netfilter/nf_conntrack_proto_tcp.c       | 11 ++++---
 net/netfilter/nf_conntrack_proto_udp.c       | 22 +++++++-------
 8 files changed, 113 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index ae7b86f587f2..6ca4355ae3dc 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -99,6 +99,43 @@ int nf_conntrack_icmpv6_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb,
 			      unsigned int dataoff,
 			      const struct nf_hook_state *state);
+
+int nf_conntrack_icmp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state);
+
+int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
+			       struct sk_buff *skb,
+			       enum ip_conntrack_info ctinfo,
+			       const struct nf_hook_state *state);
+
+int nf_conntrack_udp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state);
+int nf_conntrack_udplite_packet(struct nf_conn *ct,
+				struct sk_buff *skb,
+				unsigned int dataoff,
+				enum ip_conntrack_info ctinfo,
+				const struct nf_hook_state *state);
+int nf_conntrack_tcp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state);
+int nf_conntrack_dccp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state);
+int nf_conntrack_sctp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state);
+
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 741b533148ba..5dfd95ae510d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1522,6 +1522,45 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl,
 	return ret;
 }
 
+/* Returns verdict for packet, or -1 for invalid. */
+static int nf_conntrack_handle_packet(struct nf_conn *ct,
+				      struct sk_buff *skb,
+				      unsigned int dataoff,
+				      enum ip_conntrack_info ctinfo,
+				      const struct nf_hook_state *state)
+{
+	switch (nf_ct_protonum(ct)) {
+	case IPPROTO_TCP:
+		return nf_conntrack_tcp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_UDP:
+		return nf_conntrack_udp_packet(ct, skb, dataoff,
+					       ctinfo, state);
+	case IPPROTO_ICMP:
+		return nf_conntrack_icmp_packet(ct, skb, ctinfo, state);
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state);
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE:
+		return nf_conntrack_udplite_packet(ct, skb, dataoff,
+						   ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP:
+		return nf_conntrack_sctp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP:
+		return nf_conntrack_dccp_packet(ct, skb, dataoff,
+						ctinfo, state);
+#endif
+	}
+
+	WARN_ON_ONCE(1);
+	return -NF_ACCEPT;
+}
+
 unsigned int
 nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 {
@@ -1583,7 +1622,11 @@ repeat:
 		goto out;
 	}
 
-	ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
+	if (l4proto->packet)
+		ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
+	else
+		ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
+
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 023c1445bc39..84408ec80624 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -472,9 +472,10 @@ out_invalid:
 	return true;
 }
 
-static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
-		       unsigned int dataoff, enum ip_conntrack_info ctinfo,
-		       const struct nf_hook_state *state)
+int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
 	struct dccp_hdr _dh, *dh;
@@ -839,7 +840,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net)
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
 	.l4proto		= IPPROTO_DCCP,
-	.packet			= dccp_packet,
 	.can_early_drop		= dccp_can_early_drop,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= dccp_print_conntrack,
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index de64d8a5fdfd..381a7f3ea68a 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -68,11 +68,10 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct nf_conn *ct,
-		       struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       const struct nf_hook_state *state)
+int nf_conntrack_icmp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	/* Do not immediately delete the connection after the first
 	   successful reply to avoid excessive conntrackd traffic
@@ -350,9 +349,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 	.l4proto		= IPPROTO_ICMP,
 	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
-	.packet			= icmp_packet,
-	.destroy		= NULL,
-	.me			= NULL,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index a15eefb8e317..93f27d0bcb75 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -86,11 +86,10 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int icmpv6_packet(struct nf_conn *ct,
-		         struct sk_buff *skb,
-		         unsigned int dataoff,
-		         enum ip_conntrack_info ctinfo,
-		         const struct nf_hook_state *state)
+int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
+			       struct sk_buff *skb,
+			       enum ip_conntrack_info ctinfo,
+			       const struct nf_hook_state *state)
 {
 	unsigned int *timeout = nf_ct_timeout_lookup(ct);
 	static const u8 valid_new[] = {
@@ -361,7 +360,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 	.l4proto		= IPPROTO_ICMPV6,
 	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
-	.packet			= icmpv6_packet,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d53e3e78f605..952f2cc0fd74 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -357,11 +357,11 @@ out_invalid:
 }
 
 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */
-static int sctp_packet(struct nf_conn *ct,
-		       struct sk_buff *skb,
-		       unsigned int dataoff,
-		       enum ip_conntrack_info ctinfo,
-		       const struct nf_hook_state *state)
+int nf_conntrack_sctp_packet(struct nf_conn *ct,
+			     struct sk_buff *skb,
+			     unsigned int dataoff,
+			     enum ip_conntrack_info ctinfo,
+			     const struct nf_hook_state *state)
 {
 	enum sctp_conntrack new_state, old_state;
 	enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -759,9 +759,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack	= sctp_print_conntrack,
 #endif
-	.packet 		= sctp_packet,
 	.can_early_drop		= sctp_can_early_drop,
-	.me 			= THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.nlattr_size		= SCTP_NLATTR_SIZE,
 	.to_nlattr		= sctp_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4dcbd51a8e97..32aac9a835d4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -829,11 +829,11 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
 }
 
 /* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct nf_conn *ct,
-		      struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      const struct nf_hook_state *state)
+int nf_conntrack_tcp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
 	struct net *net = nf_ct_net(ct);
 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
@@ -1535,7 +1535,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack 	= tcp_print_conntrack,
 #endif
-	.packet 		= tcp_packet,
 	.can_early_drop		= tcp_can_early_drop,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.to_nlattr		= tcp_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b4f5d5e82031..e1f409aa0006 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -85,11 +85,11 @@ static bool udp_error(struct sk_buff *skb,
 }
 
 /* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct nf_conn *ct,
-		      struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      const struct nf_hook_state *state)
+int nf_conntrack_udp_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
 	unsigned int *timeouts;
 
@@ -177,11 +177,11 @@ static bool udplite_error(struct sk_buff *skb,
 }
 
 /* Returns verdict for packet, and may modify conntracktype */
-static int udplite_packet(struct nf_conn *ct,
-			  struct sk_buff *skb,
-			  unsigned int dataoff,
-			  enum ip_conntrack_info ctinfo,
-			  const struct nf_hook_state *state)
+int nf_conntrack_udplite_packet(struct nf_conn *ct,
+				struct sk_buff *skb,
+				unsigned int dataoff,
+				enum ip_conntrack_info ctinfo,
+				const struct nf_hook_state *state)
 {
 	unsigned int *timeouts;
 
@@ -319,7 +319,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
 {
 	.l4proto		= IPPROTO_UDP,
 	.allow_clash		= true,
-	.packet			= udp_packet,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
@@ -344,7 +343,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
 {
 	.l4proto		= IPPROTO_UDPLITE,
 	.allow_clash		= true,
-	.packet			= udplite_packet,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
-- 
cgit v1.2.3


From e2e48b471634faa6958718c27208d366b13fe878 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:33 +0100
Subject: netfilter: conntrack: handle icmp pkt_to_tuple helper via direct
 calls

rather than handling them via indirect call, use a direct one instead.
This leaves GRE as the last user of this indirect call facility.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 10 ++++++++++
 net/netfilter/nf_conntrack_core.c            |  6 ++++++
 net/netfilter/nf_conntrack_proto_icmp.c      |  5 ++---
 net/netfilter/nf_conntrack_proto_icmpv6.c    |  9 ++++-----
 4 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6ca4355ae3dc..46d554806eb3 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -90,6 +90,16 @@ struct nf_conntrack_l4proto {
 	struct module *me;
 };
 
+bool icmp_pkt_to_tuple(const struct sk_buff *skb,
+		       unsigned int dataoff,
+		       struct net *net,
+		       struct nf_conntrack_tuple *tuple);
+
+bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+			 unsigned int dataoff,
+			 struct net *net,
+			 struct nf_conntrack_tuple *tuple);
+
 int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb,
 			      unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5dfd95ae510d..b3840d36c3a6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -274,6 +274,12 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 	tuple->dst.protonum = protonum;
 	tuple->dst.dir = IP_CT_DIR_ORIGINAL;
 
+	switch (protonum) {
+	case IPPROTO_ICMPV6:
+		return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple);
+	case IPPROTO_ICMP:
+		return icmp_pkt_to_tuple(skb, dataoff, net, tuple);
+	}
 	if (unlikely(l4proto->pkt_to_tuple))
 		return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 381a7f3ea68a..805c1fe5b837 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -25,8 +25,8 @@
 
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
-static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			      struct net *net, struct nf_conntrack_tuple *tuple)
+bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct icmphdr *hp;
 	struct icmphdr _hdr;
@@ -347,7 +347,6 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 {
 	.l4proto		= IPPROTO_ICMP,
-	.pkt_to_tuple		= icmp_pkt_to_tuple,
 	.invert_tuple		= icmp_invert_tuple,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 93f27d0bcb75..20cd55e55e41 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -30,10 +30,10 @@
 
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
-static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
-				unsigned int dataoff,
-				struct net *net,
-				struct nf_conntrack_tuple *tuple)
+bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+			 unsigned int dataoff,
+			 struct net *net,
+			 struct nf_conntrack_tuple *tuple)
 {
 	const struct icmp6hdr *hp;
 	struct icmp6hdr _hdr;
@@ -358,7 +358,6 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 {
 	.l4proto		= IPPROTO_ICMPV6,
-	.pkt_to_tuple		= icmpv6_pkt_to_tuple,
 	.invert_tuple		= icmpv6_invert_tuple,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
-- 
cgit v1.2.3


From 202e651cd43c69a43f75b445e90f55b59f9af0ad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:34 +0100
Subject: netfilter: conntrack: gre: convert rwlock to rcu

We can use gre.  Lock is only needed when a new expectation is added.

In case a single spinlock proves to be problematic we can either add one
per netns or use an array of locks combined with net_hash_mix() or similar
to pick the 'correct' one.

But given this is only needed for an expectation rather than per packet
a single one should be ok.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h |  1 +
 net/netfilter/nf_conntrack_proto_gre.c           | 37 ++++++++++--------------
 2 files changed, 16 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 6989e2e4eabf..222c9d3d453f 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -19,6 +19,7 @@ struct nf_conn;
 struct nf_ct_gre_keymap {
 	struct list_head list;
 	struct nf_conntrack_tuple tuple;
+	struct rcu_head rcu;
 };
 
 enum grep_conntrack {
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 8899b51aad44..34dd89485be2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -49,6 +49,8 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = {
 };
 
 static unsigned int proto_gre_net_id __read_mostly;
+/* used when expectation is added */
+static DEFINE_SPINLOCK(keymap_lock);
 
 static inline struct netns_proto_gre *gre_pernet(struct net *net)
 {
@@ -60,12 +62,12 @@ static void nf_ct_gre_keymap_flush(struct net *net)
 	struct netns_proto_gre *net_gre = gre_pernet(net);
 	struct nf_ct_gre_keymap *km, *tmp;
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
-		list_del(&km->list);
-		kfree(km);
+		list_del_rcu(&km->list);
+		kfree_rcu(km, rcu);
 	}
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 }
 
 static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
@@ -85,14 +87,12 @@ static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
 	struct nf_ct_gre_keymap *km;
 	__be16 key = 0;
 
-	read_lock_bh(&net_gre->keymap_lock);
-	list_for_each_entry(km, &net_gre->keymap_list, list) {
+	list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
 		if (gre_key_cmpfn(km, t)) {
 			key = km->tuple.src.u.gre.key;
 			break;
 		}
 	}
-	read_unlock_bh(&net_gre->keymap_lock);
 
 	pr_debug("lookup src key 0x%x for ", key);
 	nf_ct_dump_tuple(t);
@@ -112,14 +112,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	kmp = &ct_pptp_info->keymap[dir];
 	if (*kmp) {
 		/* check whether it's a retransmission */
-		read_lock_bh(&net_gre->keymap_lock);
-		list_for_each_entry(km, &net_gre->keymap_list, list) {
-			if (gre_key_cmpfn(km, t) && km == *kmp) {
-				read_unlock_bh(&net_gre->keymap_lock);
+		list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
+			if (gre_key_cmpfn(km, t) && km == *kmp)
 				return 0;
-			}
 		}
-		read_unlock_bh(&net_gre->keymap_lock);
 		pr_debug("trying to override keymap_%s for ct %p\n",
 			 dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
 		return -EEXIST;
@@ -134,9 +130,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 	pr_debug("adding new entry %p: ", km);
 	nf_ct_dump_tuple(&km->tuple);
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	list_add_tail(&km->list, &net_gre->keymap_list);
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 
 	return 0;
 }
@@ -145,24 +141,22 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
 /* destroy the keymap entries associated with specified master ct */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
 {
-	struct net *net = nf_ct_net(ct);
-	struct netns_proto_gre *net_gre = gre_pernet(net);
 	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
 	enum ip_conntrack_dir dir;
 
 	pr_debug("entering for ct %p\n", ct);
 
-	write_lock_bh(&net_gre->keymap_lock);
+	spin_lock_bh(&keymap_lock);
 	for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
 		if (ct_pptp_info->keymap[dir]) {
 			pr_debug("removing %p from list\n",
 				 ct_pptp_info->keymap[dir]);
-			list_del(&ct_pptp_info->keymap[dir]->list);
-			kfree(ct_pptp_info->keymap[dir]);
+			list_del_rcu(&ct_pptp_info->keymap[dir]->list);
+			kfree_rcu(ct_pptp_info->keymap[dir], rcu);
 			ct_pptp_info->keymap[dir] = NULL;
 		}
 	}
-	write_unlock_bh(&net_gre->keymap_lock);
+	spin_unlock_bh(&keymap_lock);
 }
 EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 
@@ -365,7 +359,6 @@ static int gre_init_net(struct net *net)
 	struct nf_proto_net *nf = &net_gre->nf;
 	int i;
 
-	rwlock_init(&net_gre->keymap_lock);
 	INIT_LIST_HEAD(&net_gre->keymap_list);
 	for (i = 0; i < GRE_CT_MAX; i++)
 		net_gre->gre_timeouts[i] = gre_timeouts[i];
-- 
cgit v1.2.3


From 22fc4c4c9fd60427bcda00878cee94e7622cfa7a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:35 +0100
Subject: netfilter: conntrack: gre: switch module to be built-in

This makes the last of the modular l4 trackers 'bool'.

After this, all infrastructure to handle dynamic l4 protocol registration
becomes obsolete and can be removed in followup patches.

Old:
302824 net/netfilter/nf_conntrack.ko
 21504 net/netfilter/nf_conntrack_proto_gre.ko

New:
313728 net/netfilter/nf_conntrack.ko

Old:
   text	   data	    bss	    dec	    hex	filename
   6281	   1732	      4	   8017	   1f51	nf_conntrack_proto_gre.ko
 108356	  20613	    236	 129205	  1f8b5	nf_conntrack.ko
New:
 112095	  21381	    240	 133716	  20a54	nf_conntrack.ko

The size increase is only temporary.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 14 +---
 include/net/netfilter/ipv4/nf_conntrack_ipv4.h   |  3 +
 include/net/netfilter/nf_conntrack_l4proto.h     |  7 ++
 include/net/netns/conntrack.h                    | 17 +++++
 net/netfilter/Kconfig                            |  2 +-
 net/netfilter/Makefile                           |  3 +-
 net/netfilter/nf_conntrack_proto.c               |  7 +-
 net/netfilter/nf_conntrack_proto_gre.c           | 93 +++++-------------------
 net/netfilter/nfnetlink_cttimeout.c              |  7 +-
 9 files changed, 55 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 222c9d3d453f..59714e9ee4ef 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -22,23 +22,11 @@ struct nf_ct_gre_keymap {
 	struct rcu_head rcu;
 };
 
-enum grep_conntrack {
-	GRE_CT_UNREPLIED,
-	GRE_CT_REPLIED,
-	GRE_CT_MAX
-};
-
-struct netns_proto_gre {
-	struct nf_proto_net	nf;
-	rwlock_t		keymap_lock;
-	struct list_head	keymap_list;
-	unsigned int		gre_timeouts[GRE_CT_MAX];
-};
-
 /* add new tuple->key_reply pair to keymap */
 int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t);
 
+void nf_ct_gre_keymap_flush(struct net *net);
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 135ee702c7b0..2c8c2b023848 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -22,5 +22,8 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre;
+#endif
 
 #endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 46d554806eb3..fded3f164dcc 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -239,4 +239,11 @@ static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net)
 }
 #endif
 
+#ifdef CONFIG_NF_CT_PROTO_GRE
+static inline struct nf_gre_net *nf_gre_pernet(struct net *net)
+{
+	return &net->ct.nf_ct_proto.gre;
+}
+#endif
+
 #endif /*_NF_CONNTRACK_PROTOCOL_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 51cba0b8adf5..c72f413a2d4d 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -70,6 +70,20 @@ struct nf_sctp_net {
 };
 #endif
 
+#ifdef CONFIG_NF_CT_PROTO_GRE
+enum gre_conntrack {
+	GRE_CT_UNREPLIED,
+	GRE_CT_REPLIED,
+	GRE_CT_MAX
+};
+
+struct nf_gre_net {
+	struct nf_proto_net	nf;
+	struct list_head	keymap_list;
+	unsigned int		timeouts[GRE_CT_MAX];
+};
+#endif
+
 struct nf_ip_net {
 	struct nf_generic_net   generic;
 	struct nf_tcp_net	tcp;
@@ -82,6 +96,9 @@ struct nf_ip_net {
 #ifdef CONFIG_NF_CT_PROTO_SCTP
 	struct nf_sctp_net	sctp;
 #endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	struct nf_gre_net	gre;
+#endif
 };
 
 struct ct_pcpu {
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index beb3a69ce1d4..fefd63a243f2 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -174,7 +174,7 @@ config NF_CT_PROTO_DCCP
 	  If unsure, say Y.
 
 config NF_CT_PROTO_GRE
-	tristate
+	bool
 
 config NF_CT_PROTO_SCTP
 	bool 'SCTP protocol connection tracking support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1ae65a314d7a..e66067befa42 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -13,6 +13,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
@@ -25,8 +26,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
 
-obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-
 # netlink interface for nf_conntrack
 obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
 obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 2bbc32d939e4..e113bb2dc88d 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -817,6 +817,9 @@ static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
 	&nf_conntrack_l4proto_udplite,
 #endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	&nf_conntrack_l4proto_gre,
+#endif
 #if IS_ENABLED(CONFIG_IPV6)
 	&nf_conntrack_l4proto_icmpv6,
 #endif /* CONFIG_IPV6 */
@@ -897,9 +900,11 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
 					ARRAY_SIZE(builtin_l4proto));
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(pn);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	nf_ct_gre_keymap_flush(net);
+#endif
 }
 
-
 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 		  &nf_conntrack_htable_size, 0600);
 
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 34dd89485be2..68f9bfb79c4e 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -48,18 +48,17 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = {
 	[GRE_CT_REPLIED]	= 180*HZ,
 };
 
-static unsigned int proto_gre_net_id __read_mostly;
 /* used when expectation is added */
 static DEFINE_SPINLOCK(keymap_lock);
 
-static inline struct netns_proto_gre *gre_pernet(struct net *net)
+static inline struct nf_gre_net *gre_pernet(struct net *net)
 {
-	return net_generic(net, proto_gre_net_id);
+	return &net->ct.nf_ct_proto.gre;
 }
 
-static void nf_ct_gre_keymap_flush(struct net *net)
+void nf_ct_gre_keymap_flush(struct net *net)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_gre_keymap *km, *tmp;
 
 	spin_lock_bh(&keymap_lock);
@@ -83,7 +82,7 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
 /* look up the source key for a given tuple */
 static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_gre_keymap *km;
 	__be16 key = 0;
 
@@ -105,7 +104,7 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
 			 struct nf_conntrack_tuple *t)
 {
 	struct net *net = nf_ct_net(ct);
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
 	struct nf_ct_gre_keymap **kmp, *km;
 
@@ -210,7 +209,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
 
 static unsigned int *gre_get_timeouts(struct net *net)
 {
-	return gre_pernet(net)->gre_timeouts;
+	return gre_pernet(net)->timeouts;
 }
 
 /* Returns verdict for packet, and may modify conntrack */
@@ -272,13 +271,13 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
 				     struct net *net, void *data)
 {
 	unsigned int *timeouts = data;
-	struct netns_proto_gre *net_gre = gre_pernet(net);
+	struct nf_gre_net *net_gre = gre_pernet(net);
 
 	if (!timeouts)
 		timeouts = gre_get_timeouts(net);
 	/* set default timeouts for GRE. */
-	timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
-	timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
+	timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED];
+	timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED];
 
 	if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) {
 		timeouts[GRE_CT_UNREPLIED] =
@@ -332,10 +331,11 @@ static struct ctl_table gre_sysctl_table[] = {
 };
 #endif
 
-static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf,
-				    struct netns_proto_gre *net_gre)
+static int gre_kmemdup_sysctl_table(struct net *net)
 {
 #ifdef CONFIG_SYSCTL
+	struct nf_gre_net *net_gre = gre_pernet(net);
+	struct nf_proto_net *nf = &net_gre->nf;
 	int i;
 
 	if (nf->ctl_table)
@@ -348,26 +348,25 @@ static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf,
 		return -ENOMEM;
 
 	for (i = 0; i < GRE_CT_MAX; i++)
-		nf->ctl_table[i].data = &net_gre->gre_timeouts[i];
+		nf->ctl_table[i].data = &net_gre->timeouts[i];
 #endif
 	return 0;
 }
 
 static int gre_init_net(struct net *net)
 {
-	struct netns_proto_gre *net_gre = gre_pernet(net);
-	struct nf_proto_net *nf = &net_gre->nf;
+	struct nf_gre_net *net_gre = gre_pernet(net);
 	int i;
 
 	INIT_LIST_HEAD(&net_gre->keymap_list);
 	for (i = 0; i < GRE_CT_MAX; i++)
-		net_gre->gre_timeouts[i] = gre_timeouts[i];
+		net_gre->timeouts[i] = gre_timeouts[i];
 
-	return gre_kmemdup_sysctl_table(net, nf, net_gre);
+	return gre_kmemdup_sysctl_table(net);
 }
 
 /* protocol helper struct */
-static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 	.l4proto	 = IPPROTO_GRE,
 	.pkt_to_tuple	 = gre_pkt_to_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -391,61 +390,5 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
 		.nla_policy	= gre_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.net_id		= &proto_gre_net_id,
 	.init_net	= gre_init_net,
 };
-
-static int proto_gre_net_init(struct net *net)
-{
-	int ret = 0;
-
-	ret = nf_ct_l4proto_pernet_register_one(net,
-						&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		pr_err("nf_conntrack_gre4: pernet registration failed.\n");
-	return ret;
-}
-
-static void proto_gre_net_exit(struct net *net)
-{
-	nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
-	nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations proto_gre_net_ops = {
-	.init = proto_gre_net_init,
-	.exit = proto_gre_net_exit,
-	.id   = &proto_gre_net_id,
-	.size = sizeof(struct netns_proto_gre),
-};
-
-static int __init nf_ct_proto_gre_init(void)
-{
-	int ret;
-
-	BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
-
-	ret = register_pernet_subsys(&proto_gre_net_ops);
-	if (ret < 0)
-		goto out_pernet;
-	ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
-	if (ret < 0)
-		goto out_gre4;
-
-	return 0;
-out_gre4:
-	unregister_pernet_subsys(&proto_gre_net_ops);
-out_pernet:
-	return ret;
-}
-
-static void __exit nf_ct_proto_gre_fini(void)
-{
-	nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
-	unregister_pernet_subsys(&proto_gre_net_ops);
-}
-
-module_init(nf_ct_proto_gre_init);
-module_exit(nf_ct_proto_gre_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 109b0d27345a..0e3e1a018206 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -474,12 +474,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 		break;
 	case IPPROTO_GRE:
 #ifdef CONFIG_NF_CT_PROTO_GRE
-		if (l4proto->net_id) {
-			struct netns_proto_gre *net_gre;
-
-			net_gre = net_generic(net, *l4proto->net_id);
-			timeouts = net_gre->gre_timeouts;
-		}
+		timeouts = nf_gre_pernet(net)->timeouts;
 #endif
 		break;
 	case 255:
-- 
cgit v1.2.3


From 751fc301ecbd0fc08d4d6c388f170e2081df26e6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:36 +0100
Subject: netfilter: conntrack: remove net_id

No users anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 +-
 net/netfilter/nf_conntrack_proto.c           | 8 ++------
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index fded3f164dcc..3585f8666fc0 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -79,7 +79,7 @@ struct nf_conntrack_l4proto {
 	/* Print out the private part of the conntrack. */
 	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 #endif
-	unsigned int	*net_id;
+
 	/* Init l4proto pernet data */
 	int (*init_net)(struct net *net);
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index e113bb2dc88d..47b9294a86f7 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -163,13 +163,9 @@ static int kill_l4proto(struct nf_conn *i, void *data)
 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 				const struct nf_conntrack_l4proto *l4proto)
 {
-	if (l4proto->get_net_proto) {
-		/* statically built-in protocols use static per-net */
+	if (l4proto->get_net_proto)
 		return l4proto->get_net_proto(net);
-	} else if (l4proto->net_id) {
-		/* ... and loadable protocols use dynamic per-net */
-		return net_generic(net, *l4proto->net_id);
-	}
+
 	return NULL;
 }
 
-- 
cgit v1.2.3


From df5e1629087a45ca915fa0f69ea662175261855e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:37 +0100
Subject: netfilter: conntrack: remove pkt_to_tuple callback

GRE is now builtin, so we can handle it via direct call and
remove the callback.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_proto_gre.h |  2 ++
 include/net/netfilter/nf_conntrack_l4proto.h     |  5 -----
 net/netfilter/nf_conntrack_core.c                |  6 ++++--
 net/netfilter/nf_conntrack_proto_generic.c       | 11 -----------
 net/netfilter/nf_conntrack_proto_gre.c           |  5 ++---
 5 files changed, 8 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 59714e9ee4ef..25f9a770fb84 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -30,5 +30,7 @@ void nf_ct_gre_keymap_flush(struct net *net);
 /* delete keymap entries */
 void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
+bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		      struct net *net, struct nf_conntrack_tuple *tuple);
 #endif /* __KERNEL__ */
 #endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 3585f8666fc0..0d4b0398aeb9 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,11 +27,6 @@ struct nf_conntrack_l4proto {
 	/* protoinfo nlattr size, closes a hole */
 	u16 nlattr_size;
 
-	/* Try to fill in the third arg: dataoff is offset past network protocol
-           hdr.  Return true if possible. */
-	bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int dataoff,
-			     struct net *net, struct nf_conntrack_tuple *tuple);
-
 	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
 	 * Only used by icmp, most protocols use a generic version.
 	 */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b3840d36c3a6..b71e271f2b44 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -279,9 +279,11 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 		return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple);
 	case IPPROTO_ICMP:
 		return icmp_pkt_to_tuple(skb, dataoff, net, tuple);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return gre_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
 	}
-	if (unlikely(l4proto->pkt_to_tuple))
-		return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
 
 	/* Actually only need first 4 bytes to get ports. */
 	inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 5da19d5fbc76..5a5bf7cb6508 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -27,16 +27,6 @@ static bool nf_generic_should_process(u8 proto)
 	}
 }
 
-static bool generic_pkt_to_tuple(const struct sk_buff *skb,
-				 unsigned int dataoff,
-				 struct net *net, struct nf_conntrack_tuple *tuple)
-{
-	tuple->src.u.all = 0;
-	tuple->dst.u.all = 0;
-
-	return true;
-}
-
 /* Returns verdict for packet, or -1 for invalid. */
 static int generic_packet(struct nf_conn *ct,
 			  struct sk_buff *skb,
@@ -149,7 +139,6 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 {
 	.l4proto		= 255,
-	.pkt_to_tuple		= generic_pkt_to_tuple,
 	.packet			= generic_packet,
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 68f9bfb79c4e..04bc982b274d 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -162,8 +162,8 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
 
 /* gre hdr info to tuple */
-static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
-			     struct net *net, struct nf_conntrack_tuple *tuple)
+bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+		      struct net *net, struct nf_conntrack_tuple *tuple)
 {
 	const struct pptp_gre_header *pgrehdr;
 	struct pptp_gre_header _pgrehdr;
@@ -368,7 +368,6 @@ static int gre_init_net(struct net *net)
 /* protocol helper struct */
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 	.l4proto	 = IPPROTO_GRE,
-	.pkt_to_tuple	 = gre_pkt_to_tuple,
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-- 
cgit v1.2.3


From 197c4300aec09b7cd2df124cea92f9f5a82efe42 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:38 +0100
Subject: netfilter: conntrack: remove invert_tuple callback

Only used by icmp(v6).  Prefer a direct call and remove this
function from the l4proto struct.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 11 +++++------
 net/netfilter/nf_conntrack_core.c            |  8 ++++++--
 net/netfilter/nf_conntrack_proto_icmp.c      |  5 ++---
 net/netfilter/nf_conntrack_proto_icmpv6.c    |  5 ++---
 4 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 0d4b0398aeb9..6cec8337e684 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,12 +27,6 @@ struct nf_conntrack_l4proto {
 	/* protoinfo nlattr size, closes a hole */
 	u16 nlattr_size;
 
-	/* Invert the per-proto part of the tuple: ie. turn xmit into reply.
-	 * Only used by icmp, most protocols use a generic version.
-	 */
-	bool (*invert_tuple)(struct nf_conntrack_tuple *inverse,
-			     const struct nf_conntrack_tuple *orig);
-
 	/* Returns verdict for packet, or -1 for invalid. */
 	int (*packet)(struct nf_conn *ct,
 		      struct sk_buff *skb,
@@ -95,6 +89,11 @@ bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 			 struct net *net,
 			 struct nf_conntrack_tuple *tuple);
 
+bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
+				    const struct nf_conntrack_tuple *orig);
+bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
+				      const struct nf_conntrack_tuple *orig);
+
 int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 			      struct sk_buff *skb,
 			      unsigned int dataoff,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b71e271f2b44..d56cb0fc82b6 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -423,8 +423,12 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 
 	inverse->dst.protonum = orig->dst.protonum;
 
-	if (unlikely(l4proto->invert_tuple))
-		return l4proto->invert_tuple(inverse, orig);
+	switch (orig->dst.protonum) {
+	case IPPROTO_ICMP:
+		return nf_conntrack_invert_icmp_tuple(inverse, orig);
+	case IPPROTO_ICMPV6:
+		return nf_conntrack_invert_icmpv6_tuple(inverse, orig);
+	}
 
 	inverse->src.u.all = orig->dst.u.all;
 	inverse->dst.u.all = orig->src.u.all;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index 805c1fe5b837..d28c1d7633b2 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -54,8 +54,8 @@ static const u_int8_t invmap[] = {
 	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
 };
 
-static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
-			      const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
+				    const struct nf_conntrack_tuple *orig)
 {
 	if (orig->dst.u.icmp.type >= sizeof(invmap) ||
 	    !invmap[orig->dst.u.icmp.type])
@@ -347,7 +347,6 @@ static struct nf_proto_net *icmp_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 {
 	.l4proto		= IPPROTO_ICMP,
-	.invert_tuple		= icmp_invert_tuple,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmp_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmp_nlattr_tuple_size,
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 20cd55e55e41..2910dcdea134 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -67,8 +67,8 @@ static const u_int8_t noct_valid_new[] = {
 	[ICMPV6_MLD2_REPORT - 130] = 1
 };
 
-static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
-				const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
+				      const struct nf_conntrack_tuple *orig)
 {
 	int type = orig->dst.u.icmp.type - 128;
 	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
@@ -358,7 +358,6 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 {
 	.l4proto		= IPPROTO_ICMPV6,
-	.invert_tuple		= icmpv6_invert_tuple,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr	= icmpv6_tuple_to_nlattr,
 	.nlattr_tuple_size	= icmpv6_nlattr_tuple_size,
-- 
cgit v1.2.3


From b184356d0a003ac5e82b4adf37f2d334df1d213c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:39 +0100
Subject: netfilter: conntrack: remove module owner field

No need to get/put module owner reference, none of these can be removed
anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 4 ----
 net/netfilter/nf_conntrack_proto.c           | 8 --------
 net/netfilter/nf_conntrack_proto_gre.c       | 1 -
 net/netfilter/nfnetlink_cttimeout.c          | 6 ------
 net/netfilter/nft_ct.c                       | 2 --
 5 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 6cec8337e684..fd8954429e87 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -74,9 +74,6 @@ struct nf_conntrack_l4proto {
 
 	/* Return the per-net protocol part. */
 	struct nf_proto_net *(*get_net_proto)(struct net *net);
-
-	/* Module (if any) which this is connected to. */
-	struct module *me;
 };
 
 bool icmp_pkt_to_tuple(const struct sk_buff *skb,
@@ -148,7 +145,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
 
 const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
-void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p);
 
 /* Protocol pernet registration. */
 int nf_ct_l4proto_pernet_register_one(struct net *net,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 47b9294a86f7..3455f993cc68 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -139,20 +139,12 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
 
 	rcu_read_lock();
 	p = __nf_ct_l4proto_find(l4num);
-	if (!try_module_get(p->me))
-		p = &nf_conntrack_l4proto_generic;
 	rcu_read_unlock();
 
 	return p;
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
 
-void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
-{
-	module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
-
 static int kill_l4proto(struct nf_conn *i, void *data)
 {
 	const struct nf_conntrack_l4proto *l4proto;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 04bc982b274d..0c26cfd5d29f 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -373,7 +373,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 #endif
 	.packet		 = gre_packet,
 	.destroy	 = gre_destroy,
-	.me 		 = THIS_MODULE,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
 	.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 0e3e1a018206..37b4f84ac153 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -152,7 +152,6 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 err:
 	kfree(timeout);
 err_proto_put:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
@@ -302,7 +301,6 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
 	if (refcount_dec_if_one(&timeout->refcnt)) {
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
-		nf_ct_l4proto_put(timeout->timeout.l4proto);
 		nf_ct_untimeout(net, &timeout->timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
@@ -372,10 +370,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 	if (ret < 0)
 		goto err;
 
-	nf_ct_l4proto_put(l4proto);
 	return 0;
 err:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
@@ -511,7 +507,6 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 	/* this avoids a loop in nfnetlink. */
 	return ret == -EAGAIN ? -ENOBUFS : ret;
 err:
-	nf_ct_l4proto_put(l4proto);
 	return err;
 }
 
@@ -592,7 +587,6 @@ static void __net_exit cttimeout_net_exit(struct net *net)
 
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
-		nf_ct_l4proto_put(cur->timeout.l4proto);
 
 		if (refcount_dec_and_test(&cur->refcnt))
 			kfree_rcu(cur, rcu_head);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 586627c361df..3249cc059048 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -902,7 +902,6 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
 err_free_timeout:
 	kfree(timeout);
 err_proto_put:
-	nf_ct_l4proto_put(l4proto);
 	return ret;
 }
 
@@ -913,7 +912,6 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
 	struct nf_ct_timeout *timeout = priv->timeout;
 
 	nf_ct_untimeout(ctx->net, timeout);
-	nf_ct_l4proto_put(timeout->l4proto);
 	nf_ct_netns_put(ctx->net, ctx->family);
 	kfree(priv->timeout);
 }
-- 
cgit v1.2.3


From 44fb87f6351d04fafae2b17df5f46f1ce22c6f93 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:40 +0100
Subject: netfilter: conntrack: remove remaining l4proto indirect packet calls

Now that all l4trackers are builtin, no need to use a mix of direct and
indirect calls.
This removes the last two users: gre and the generic l4 protocol
tracker.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 12 ++++------
 net/netfilter/nf_conntrack_core.c            | 26 +++++++++++++++------
 net/netfilter/nf_conntrack_proto_generic.c   | 35 ----------------------------
 net/netfilter/nf_conntrack_proto_gre.c       | 11 ++++-----
 4 files changed, 29 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index fd8954429e87..dda028996559 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,13 +27,6 @@ struct nf_conntrack_l4proto {
 	/* protoinfo nlattr size, closes a hole */
 	u16 nlattr_size;
 
-	/* Returns verdict for packet, or -1 for invalid. */
-	int (*packet)(struct nf_conn *ct,
-		      struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      const struct nf_hook_state *state);
-
 	/* Called when a conntrack entry is destroyed */
 	void (*destroy)(struct nf_conn *ct);
 
@@ -136,6 +129,11 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
 			     unsigned int dataoff,
 			     enum ip_conntrack_info ctinfo,
 			     const struct nf_hook_state *state);
+int nf_conntrack_gre_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state);
 
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d56cb0fc82b6..264074f04615 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1534,6 +1534,18 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl,
 	return ret;
 }
 
+static int generic_packet(struct nf_conn *ct, struct sk_buff *skb,
+			  enum ip_conntrack_info ctinfo)
+{
+	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+	if (!timeout)
+		timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
+
+	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+	return NF_ACCEPT;
+}
+
 /* Returns verdict for packet, or -1 for invalid. */
 static int nf_conntrack_handle_packet(struct nf_conn *ct,
 				      struct sk_buff *skb,
@@ -1566,11 +1578,15 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
 	case IPPROTO_DCCP:
 		return nf_conntrack_dccp_packet(ct, skb, dataoff,
 						ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE:
+		return nf_conntrack_gre_packet(ct, skb, dataoff,
+					       ctinfo, state);
 #endif
 	}
 
-	WARN_ON_ONCE(1);
-	return -NF_ACCEPT;
+	return generic_packet(ct, skb, ctinfo);
 }
 
 unsigned int
@@ -1634,11 +1650,7 @@ repeat:
 		goto out;
 	}
 
-	if (l4proto->packet)
-		ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
-	else
-		ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
-
+	ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
 	if (ret <= 0) {
 		/* Invalid: inverse of the return code tells
 		 * the netfilter core what to do */
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 5a5bf7cb6508..78358d192c60 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -15,40 +15,6 @@
 
 static const unsigned int nf_ct_generic_timeout = 600*HZ;
 
-static bool nf_generic_should_process(u8 proto)
-{
-	switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
-	case IPPROTO_GRE:
-		return false;
-#endif
-	default:
-		return true;
-	}
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int generic_packet(struct nf_conn *ct,
-			  struct sk_buff *skb,
-			  unsigned int dataoff,
-			  enum ip_conntrack_info ctinfo,
-			  const struct nf_hook_state *state)
-{
-	const unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
-	if (!nf_generic_should_process(nf_ct_protonum(ct))) {
-		pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
-			     nf_ct_protonum(ct));
-		return -NF_ACCEPT;
-	}
-
-	if (!timeout)
-		timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
-
-	nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
-	return NF_ACCEPT;
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -139,7 +105,6 @@ static struct nf_proto_net *generic_get_net_proto(struct net *net)
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 {
 	.l4proto		= 255,
-	.packet			= generic_packet,
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	.ctnl_timeout		= {
 		.nlattr_to_obj	= generic_timeout_nlattr_to_obj,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 0c26cfd5d29f..f6391991bcf6 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -213,11 +213,11 @@ static unsigned int *gre_get_timeouts(struct net *net)
 }
 
 /* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct nf_conn *ct,
-		      struct sk_buff *skb,
-		      unsigned int dataoff,
-		      enum ip_conntrack_info ctinfo,
-		      const struct nf_hook_state *state)
+int nf_conntrack_gre_packet(struct nf_conn *ct,
+			    struct sk_buff *skb,
+			    unsigned int dataoff,
+			    enum ip_conntrack_info ctinfo,
+			    const struct nf_hook_state *state)
 {
 	if (state->pf != NFPROTO_IPV4)
 		return -NF_ACCEPT;
@@ -371,7 +371,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-	.packet		 = gre_packet,
 	.destroy	 = gre_destroy,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
-- 
cgit v1.2.3


From edf0338dabf0f330873f7a9f84ea9f9ac431c0a8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:41 +0100
Subject: netfilter: conntrack: remove pernet l4 proto register interface

No used anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h | 12 ------------
 net/netfilter/nf_conntrack_proto.c           | 28 ++++++++++++----------------
 2 files changed, 12 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index dda028996559..87d9c198c05e 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -144,18 +144,6 @@ const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
 
 const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
 
-/* Protocol pernet registration. */
-int nf_ct_l4proto_pernet_register_one(struct net *net,
-				const struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-				const struct nf_conntrack_l4proto *proto);
-int nf_ct_l4proto_pernet_register(struct net *net,
-				  const struct nf_conntrack_l4proto *const proto[],
-				  unsigned int num_proto);
-void nf_ct_l4proto_pernet_unregister(struct net *net,
-				const struct nf_conntrack_l4proto *const proto[],
-				unsigned int num_proto);
-
 /* Protocol global registration. */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 3455f993cc68..13b782cc6a7a 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -221,8 +221,8 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
-int nf_ct_l4proto_pernet_register_one(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
+static int nf_ct_l4proto_pernet_register_one(struct net *net,
+					     const struct nf_conntrack_l4proto *l4proto)
 {
 	int ret = 0;
 	struct nf_proto_net *pn = NULL;
@@ -245,7 +245,6 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
 out:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
 
 static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
@@ -272,7 +271,7 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
-void nf_ct_l4proto_pernet_unregister_one(struct net *net,
+static void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 				const struct nf_conntrack_l4proto *l4proto)
 {
 	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
@@ -283,7 +282,6 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 	pn->users--;
 	nf_ct_l4proto_unregister_sysctl(pn);
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
 
 static void
 nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
@@ -322,7 +320,15 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
 	return ret;
 }
 
-int nf_ct_l4proto_pernet_register(struct net *net,
+static void nf_ct_l4proto_pernet_unregister(struct net *net,
+				const struct nf_conntrack_l4proto *const l4proto[],
+				unsigned int num_proto)
+{
+	while (num_proto-- != 0)
+		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
+}
+
+static int nf_ct_l4proto_pernet_register(struct net *net,
 				  const struct nf_conntrack_l4proto *const l4proto[],
 				  unsigned int num_proto)
 {
@@ -341,16 +347,6 @@ int nf_ct_l4proto_pernet_register(struct net *net,
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-
-void nf_ct_l4proto_pernet_unregister(struct net *net,
-				const struct nf_conntrack_l4proto *const l4proto[],
-				unsigned int num_proto)
-{
-	while (num_proto-- != 0)
-		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
 
 static unsigned int nf_confirm(struct sk_buff *skb,
 			       unsigned int protoff,
-- 
cgit v1.2.3


From 303e0c5589592e4f623bfcaf4292a1ed816328ad Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:42 +0100
Subject: netfilter: conntrack: avoid unneeded nf_conntrack_l4proto lookups

after removal of the packet and invert function pointers, several
places do not need to lookup the l4proto structure anymore.

Remove those lookups.
The function nf_ct_invert_tuplepr becomes redundant, replace
it with nf_ct_invert_tuple everywhere.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h      |  2 --
 include/net/netfilter/nf_conntrack_core.h |  3 +-
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c  |  2 +-
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c  |  2 +-
 net/netfilter/nf_conntrack_core.c         | 53 ++++++-------------------------
 net/netfilter/nf_conntrack_pptp.c         |  2 +-
 net/netfilter/nf_conntrack_proto_icmp.c   |  6 +---
 net/netfilter/nf_conntrack_proto_icmpv6.c |  6 +---
 net/netfilter/nf_nat_core.c               | 12 +++----
 net/openvswitch/conntrack.c               |  2 +-
 10 files changed, 22 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 249d0a5b12b8..b5aac5ae5129 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -187,8 +187,6 @@ bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num, struct net *net,
 		       struct nf_conntrack_tuple *tuple);
-bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
-			  const struct nf_conntrack_tuple *orig);
 
 void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			  const struct sk_buff *skb,
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index afc9b3620473..235c182022b2 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -39,8 +39,7 @@ void nf_conntrack_init_end(void);
 void nf_conntrack_cleanup_end(void);
 
 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
-			const struct nf_conntrack_tuple *orig,
-			const struct nf_conntrack_l4proto *l4proto);
+			const struct nf_conntrack_tuple *orig);
 
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 2687db015b6f..e26165af45cb 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -214,7 +214,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 	}
 
 	/* Change outer to look like the reply to an incoming packet */
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 	if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
 		return 0;
 
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 23022447eb49..9c914db44bec 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -225,7 +225,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 						     skb->len - hdrlen, 0));
 	}
 
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 	if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
 		return 0;
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 264074f04615..728d2b5bdb1a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -229,8 +229,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
 		u_int16_t l3num,
 		u_int8_t protonum,
 		struct net *net,
-		struct nf_conntrack_tuple *tuple,
-		const struct nf_conntrack_l4proto *l4proto)
+		struct nf_conntrack_tuple *tuple)
 {
 	unsigned int size;
 	const __be32 *ap;
@@ -374,33 +373,20 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num,
 		       struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	const struct nf_conntrack_l4proto *l4proto;
 	u8 protonum;
 	int protoff;
-	int ret;
-
-	rcu_read_lock();
 
 	protoff = get_l4proto(skb, nhoff, l3num, &protonum);
-	if (protoff <= 0) {
-		rcu_read_unlock();
+	if (protoff <= 0)
 		return false;
-	}
 
-	l4proto = __nf_ct_l4proto_find(protonum);
-
-	ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
-			      l4proto);
-
-	rcu_read_unlock();
-	return ret;
+	return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple);
 }
 EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
 
 bool
 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
-		   const struct nf_conntrack_tuple *orig,
-		   const struct nf_conntrack_l4proto *l4proto)
+		   const struct nf_conntrack_tuple *orig)
 {
 	memset(inverse, 0, sizeof(*inverse));
 
@@ -1354,7 +1340,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
 static noinline struct nf_conntrack_tuple_hash *
 init_conntrack(struct net *net, struct nf_conn *tmpl,
 	       const struct nf_conntrack_tuple *tuple,
-	       const struct nf_conntrack_l4proto *l4proto,
 	       struct sk_buff *skb,
 	       unsigned int dataoff, u32 hash)
 {
@@ -1367,7 +1352,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 	struct nf_conn_timeout *timeout_ext;
 	struct nf_conntrack_zone tmp;
 
-	if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
+	if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
 		pr_debug("Can't invert tuple.\n");
 		return NULL;
 	}
@@ -1449,7 +1434,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
 		  struct sk_buff *skb,
 		  unsigned int dataoff,
 		  u_int8_t protonum,
-		  const struct nf_conntrack_l4proto *l4proto,
 		  const struct nf_hook_state *state)
 {
 	const struct nf_conntrack_zone *zone;
@@ -1462,7 +1446,7 @@ resolve_normal_ct(struct nf_conn *tmpl,
 
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
 			     dataoff, state->pf, protonum, state->net,
-			     &tuple, l4proto)) {
+			     &tuple)) {
 		pr_debug("Can't get tuple\n");
 		return 0;
 	}
@@ -1472,7 +1456,7 @@ resolve_normal_ct(struct nf_conn *tmpl,
 	hash = hash_conntrack_raw(&tuple, state->net);
 	h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
 	if (!h) {
-		h = init_conntrack(state->net, tmpl, &tuple, l4proto,
+		h = init_conntrack(state->net, tmpl, &tuple,
 				   skb, dataoff, hash);
 		if (!h)
 			return 0;
@@ -1592,7 +1576,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
 unsigned int
 nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 {
-	const struct nf_conntrack_l4proto *l4proto;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct, *tmpl;
 	u_int8_t protonum;
@@ -1619,8 +1602,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 		goto out;
 	}
 
-	l4proto = __nf_ct_l4proto_find(protonum);
-
 	if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
 		ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
 					       protonum, state);
@@ -1634,7 +1615,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
 	}
 repeat:
 	ret = resolve_normal_ct(tmpl, skb, dataoff,
-				protonum, l4proto, state);
+				protonum, state);
 	if (ret < 0) {
 		/* Too stressed to deal. */
 		NF_CT_STAT_INC_ATOMIC(state->net, drop);
@@ -1681,19 +1662,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_in);
 
-bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
-			  const struct nf_conntrack_tuple *orig)
-{
-	bool ret;
-
-	rcu_read_lock();
-	ret = nf_ct_invert_tuple(inverse, orig,
-				 __nf_ct_l4proto_find(orig->dst.protonum));
-	rcu_read_unlock();
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
-
 /* Alter reply tuple (maybe alter helper).  This is for NAT, and is
    implicitly racy: see __nf_conntrack_confirm */
 void nf_conntrack_alter_reply(struct nf_conn *ct,
@@ -1824,7 +1792,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
 
 static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 {
-	const struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
 	enum ip_conntrack_info ctinfo;
@@ -1845,10 +1812,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
 	if (dataoff <= 0)
 		return -1;
 
-	l4proto = nf_ct_l4proto_find_get(l4num);
-
 	if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
-			     l4num, net, &tuple, l4proto))
+			     l4num, net, &tuple))
 		return -1;
 
 	if (ct->status & IPS_SRC_NAT) {
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 11562f2a08bb..976f1dcb97f0 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct,
 		struct nf_conntrack_expect *exp_other;
 
 		/* obviously this tuple inversion only works until you do NAT */
-		nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
+		nf_ct_invert_tuple(&inv_t, &exp->tuple);
 		pr_debug("trying to unexpect other dir: ");
 		nf_ct_dump_tuple(&inv_t);
 
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index d28c1d7633b2..1007efae741d 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -109,7 +109,6 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
 		   const struct nf_hook_state *state)
 {
 	struct nf_conntrack_tuple innertuple, origtuple;
-	const struct nf_conntrack_l4proto *innerproto;
 	const struct nf_conntrack_tuple_hash *h;
 	const struct nf_conntrack_zone *zone;
 	enum ip_conntrack_info ctinfo;
@@ -127,12 +126,9 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
-
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+	if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
 		pr_debug("icmp_error_message: no match\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 2910dcdea134..6c93c091a8dd 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -130,7 +130,6 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 {
 	struct nf_conntrack_tuple intuple, origtuple;
 	const struct nf_conntrack_tuple_hash *h;
-	const struct nf_conntrack_l4proto *inproto;
 	enum ip_conntrack_info ctinfo;
 	struct nf_conntrack_zone tmp;
 
@@ -146,12 +145,9 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
 		return -NF_ACCEPT;
 	}
 
-	/* rcu_read_lock()ed by nf_hook_thresh */
-	inproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
-
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
 	   been preserved inside the ICMP. */
-	if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+	if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
 		pr_debug("icmpv6_error: Can't invert tuple\n");
 		return -NF_ACCEPT;
 	}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d159e9e7835b..44f97b3a215a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -158,7 +158,7 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 	 */
 	struct nf_conntrack_tuple reply;
 
-	nf_ct_invert_tuplepr(&reply, tuple);
+	nf_ct_invert_tuple(&reply, tuple);
 	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
 }
 EXPORT_SYMBOL(nf_nat_used_tuple);
@@ -253,7 +253,7 @@ find_appropriate_src(struct net *net,
 		    net_eq(net, nf_ct_net(ct)) &&
 		    nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
 			/* Copy source part from reply tuple. */
-			nf_ct_invert_tuplepr(result,
+			nf_ct_invert_tuple(result,
 				       &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 			result->dst = tuple->dst;
 
@@ -560,8 +560,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 	 * manipulations (future optimization: if num_manips == 0,
 	 * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
 	 */
-	nf_ct_invert_tuplepr(&curr_tuple,
-			     &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	nf_ct_invert_tuple(&curr_tuple,
+			   &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
 
@@ -569,7 +569,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 		struct nf_conntrack_tuple reply;
 
 		/* Alter conntrack table so will recognize replies. */
-		nf_ct_invert_tuplepr(&reply, &new_tuple);
+		nf_ct_invert_tuple(&reply, &new_tuple);
 		nf_conntrack_alter_reply(ct, &reply);
 
 		/* Non-atomic: we own this at the moment. */
@@ -640,7 +640,7 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
 	struct nf_conntrack_tuple target;
 
 	/* We are aiming to look like inverse of other direction. */
-	nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
 
 	l3proto = __nf_nat_l3proto_find(target.src.l3num);
 	if (!l3proto->manip_pkt(skb, 0, &target, mtype))
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index cd94f925495a..35884f836260 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -622,7 +622,7 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone,
 	if (natted) {
 		struct nf_conntrack_tuple inverse;
 
-		if (!nf_ct_invert_tuplepr(&inverse, &tuple)) {
+		if (!nf_ct_invert_tuple(&inverse, &tuple)) {
 			pr_debug("ovs_ct_find_existing: Inversion failed!\n");
 			return NULL;
 		}
-- 
cgit v1.2.3


From 70aed4647cbd63e3371386ddfaeb195413f38d99 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:44 +0100
Subject: netfilter: conntrack: remove sysctl registration helpers

After previous patch these are not used anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netns/conntrack.h      |  4 --
 net/netfilter/nf_conntrack_proto.c | 77 +-------------------------------------
 2 files changed, 1 insertion(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index c72f413a2d4d..b8faa72e0f5b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -19,10 +19,6 @@ struct ctl_table_header;
 struct nf_conntrack_ecache;
 
 struct nf_proto_net {
-#ifdef CONFIG_SYSCTL
-	struct ctl_table_header *ctl_table_header;
-	struct ctl_table        *ctl_table;
-#endif
 	unsigned int		users;
 };
 
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 13b782cc6a7a..c8f024d731c8 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -48,35 +48,6 @@ static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __re
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
-static int
-nf_ct_register_sysctl(struct net *net,
-		      struct ctl_table_header **header,
-		      const char *path,
-		      struct ctl_table *table)
-{
-	if (*header == NULL) {
-		*header = register_net_sysctl(net, path, table);
-		if (*header == NULL)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void
-nf_ct_unregister_sysctl(struct ctl_table_header **header,
-			struct ctl_table **table,
-			unsigned int users)
-{
-	if (users > 0)
-		return;
-
-	unregister_net_sysctl_table(*header);
-	kfree(*table);
-	*header = NULL;
-	*table = NULL;
-}
-
 __printf(5, 6)
 void nf_l4proto_log_invalid(const struct sk_buff *skb,
 			    struct net *net,
@@ -161,40 +132,6 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
 	return NULL;
 }
 
-static
-int nf_ct_l4proto_register_sysctl(struct net *net,
-				  struct nf_proto_net *pn)
-{
-	int err = 0;
-
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table != NULL) {
-		err = nf_ct_register_sysctl(net,
-					    &pn->ctl_table_header,
-					    "net/netfilter",
-					    pn->ctl_table);
-		if (err < 0) {
-			if (!pn->users) {
-				kfree(pn->ctl_table);
-				pn->ctl_table = NULL;
-			}
-		}
-	}
-#endif /* CONFIG_SYSCTL */
-	return err;
-}
-
-static
-void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn)
-{
-#ifdef CONFIG_SYSCTL
-	if (pn->ctl_table_header != NULL)
-		nf_ct_unregister_sysctl(&pn->ctl_table_header,
-					&pn->ctl_table,
-					pn->users);
-#endif /* CONFIG_SYSCTL */
-}
-
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
@@ -237,10 +174,6 @@ static int nf_ct_l4proto_pernet_register_one(struct net *net,
 	if (pn == NULL)
 		goto out;
 
-	ret = nf_ct_l4proto_register_sysctl(net, pn);
-	if (ret < 0)
-		goto out;
-
 	pn->users++;
 out:
 	return ret;
@@ -280,7 +213,6 @@ static void nf_ct_l4proto_pernet_unregister_one(struct net *net,
 		return;
 
 	pn->users--;
-	nf_ct_l4proto_unregister_sysctl(pn);
 }
 
 static void
@@ -859,17 +791,11 @@ int nf_conntrack_proto_pernet_init(struct net *net)
 	err = nf_conntrack_l4proto_generic.init_net(net);
 	if (err < 0)
 		return err;
-	err = nf_ct_l4proto_register_sysctl(net,
-					    pn);
-	if (err < 0)
-		return err;
 
 	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
 					    ARRAY_SIZE(builtin_l4proto));
-	if (err < 0) {
-		nf_ct_l4proto_unregister_sysctl(pn);
+	if (err < 0)
 		return err;
-	}
 
 	pn->users++;
 	return 0;
@@ -883,7 +809,6 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
 	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
 					ARRAY_SIZE(builtin_l4proto));
 	pn->users--;
-	nf_ct_l4proto_unregister_sysctl(pn);
 #ifdef CONFIG_NF_CT_PROTO_GRE
 	nf_ct_gre_keymap_flush(net);
 #endif
-- 
cgit v1.2.3


From 2a389de86e4a5d0bd3abed9e6fee27050652d339 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:45 +0100
Subject: netfilter: conntrack: remove l4proto init and get_net callbacks

Those were needed we still had modular trackers.
As we don't have those anymore, prefer direct calls and remove all
the (un)register infrastructure associated with this.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  15 ++--
 include/net/netns/conntrack.h                |  11 ---
 net/netfilter/nf_conntrack_proto.c           | 103 ++++-----------------------
 net/netfilter/nf_conntrack_proto_dccp.c      |  42 ++++-------
 net/netfilter/nf_conntrack_proto_generic.c   |  11 +--
 net/netfilter/nf_conntrack_proto_gre.c       |   5 +-
 net/netfilter/nf_conntrack_proto_icmp.c      |  12 +---
 net/netfilter/nf_conntrack_proto_icmpv6.c    |  12 +---
 net/netfilter/nf_conntrack_proto_sctp.c      |  29 +++-----
 net/netfilter/nf_conntrack_proto_tcp.c       |  35 +++------
 net/netfilter/nf_conntrack_proto_udp.c       |  24 ++-----
 11 files changed, 65 insertions(+), 234 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 87d9c198c05e..d5909e51ca92 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -61,12 +61,6 @@ struct nf_conntrack_l4proto {
 	/* Print out the private part of the conntrack. */
 	void (*print_conntrack)(struct seq_file *s, struct nf_conn *);
 #endif
-
-	/* Init l4proto pernet data */
-	int (*init_net)(struct net *net);
-
-	/* Return the per-net protocol part. */
-	struct nf_proto_net *(*get_net_proto)(struct net *net);
 };
 
 bool icmp_pkt_to_tuple(const struct sk_buff *skb,
@@ -135,6 +129,15 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
 			    enum ip_conntrack_info ctinfo,
 			    const struct nf_hook_state *state);
 
+void nf_conntrack_generic_init_net(struct net *net);
+void nf_conntrack_tcp_init_net(struct net *net);
+void nf_conntrack_udp_init_net(struct net *net);
+void nf_conntrack_gre_init_net(struct net *net);
+void nf_conntrack_dccp_init_net(struct net *net);
+void nf_conntrack_sctp_init_net(struct net *net);
+void nf_conntrack_icmp_init_net(struct net *net);
+void nf_conntrack_icmpv6_init_net(struct net *net);
+
 /* Existing built-in generic protocol */
 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index b8faa72e0f5b..f19b53130bf7 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -18,17 +18,11 @@
 struct ctl_table_header;
 struct nf_conntrack_ecache;
 
-struct nf_proto_net {
-	unsigned int		users;
-};
-
 struct nf_generic_net {
-	struct nf_proto_net pn;
 	unsigned int timeout;
 };
 
 struct nf_tcp_net {
-	struct nf_proto_net pn;
 	unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX];
 	unsigned int tcp_loose;
 	unsigned int tcp_be_liberal;
@@ -42,18 +36,15 @@ enum udp_conntrack {
 };
 
 struct nf_udp_net {
-	struct nf_proto_net pn;
 	unsigned int timeouts[UDP_CT_MAX];
 };
 
 struct nf_icmp_net {
-	struct nf_proto_net pn;
 	unsigned int timeout;
 };
 
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 struct nf_dccp_net {
-	struct nf_proto_net pn;
 	int dccp_loose;
 	unsigned int dccp_timeout[CT_DCCP_MAX + 1];
 };
@@ -61,7 +52,6 @@ struct nf_dccp_net {
 
 #ifdef CONFIG_NF_CT_PROTO_SCTP
 struct nf_sctp_net {
-	struct nf_proto_net pn;
 	unsigned int timeouts[SCTP_CONNTRACK_MAX];
 };
 #endif
@@ -74,7 +64,6 @@ enum gre_conntrack {
 };
 
 struct nf_gre_net {
-	struct nf_proto_net	nf;
 	struct list_head	keymap_list;
 	unsigned int		timeouts[GRE_CT_MAX];
 };
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c8f024d731c8..e6bc02c13f0f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -123,15 +123,6 @@ static int kill_l4proto(struct nf_conn *i, void *data)
 	return nf_ct_protonum(i) == l4proto->l4proto;
 }
 
-static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
-{
-	if (l4proto->get_net_proto)
-		return l4proto->get_net_proto(net);
-
-	return NULL;
-}
-
 /* FIXME: Allow NULL functions and sub in pointers to generic for
    them. --RR */
 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
@@ -158,27 +149,6 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
 
-static int nf_ct_l4proto_pernet_register_one(struct net *net,
-					     const struct nf_conntrack_l4proto *l4proto)
-{
-	int ret = 0;
-	struct nf_proto_net *pn = NULL;
-
-	if (l4proto->init_net) {
-		ret = l4proto->init_net(net);
-		if (ret < 0)
-			goto out;
-	}
-
-	pn = nf_ct_l4proto_net(net, l4proto);
-	if (pn == NULL)
-		goto out;
-
-	pn->users++;
-out:
-	return ret;
-}
-
 static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 
 {
@@ -204,17 +174,6 @@ void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
 
-static void nf_ct_l4proto_pernet_unregister_one(struct net *net,
-				const struct nf_conntrack_l4proto *l4proto)
-{
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
-
-	if (pn == NULL)
-		return;
-
-	pn->users--;
-}
-
 static void
 nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
 			 unsigned int num_proto)
@@ -252,34 +211,6 @@ nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
 	return ret;
 }
 
-static void nf_ct_l4proto_pernet_unregister(struct net *net,
-				const struct nf_conntrack_l4proto *const l4proto[],
-				unsigned int num_proto)
-{
-	while (num_proto-- != 0)
-		nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
-}
-
-static int nf_ct_l4proto_pernet_register(struct net *net,
-				  const struct nf_conntrack_l4proto *const l4proto[],
-				  unsigned int num_proto)
-{
-	int ret = -EINVAL;
-	unsigned int i;
-
-	for (i = 0; i < num_proto; i++) {
-		ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
-		if (ret < 0)
-			break;
-	}
-	if (i != num_proto) {
-		pr_err("nf_conntrack %d: pernet registration failed\n",
-		       l4proto[i]->l4proto);
-		nf_ct_l4proto_pernet_unregister(net, l4proto, i);
-	}
-	return ret;
-}
-
 static unsigned int nf_confirm(struct sk_buff *skb,
 			       unsigned int protoff,
 			       struct nf_conn *ct,
@@ -784,31 +715,25 @@ void nf_conntrack_proto_fini(void)
 
 int nf_conntrack_proto_pernet_init(struct net *net)
 {
-	int err;
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
-					&nf_conntrack_l4proto_generic);
-
-	err = nf_conntrack_l4proto_generic.init_net(net);
-	if (err < 0)
-		return err;
-
-	err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
-					    ARRAY_SIZE(builtin_l4proto));
-	if (err < 0)
-		return err;
-
-	pn->users++;
+	nf_conntrack_generic_init_net(net);
+	nf_conntrack_udp_init_net(net);
+	nf_conntrack_tcp_init_net(net);
+	nf_conntrack_icmp_init_net(net);
+	nf_conntrack_icmpv6_init_net(net);
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	nf_conntrack_dccp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	nf_conntrack_sctp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	nf_conntrack_gre_init_net(net);
+#endif
 	return 0;
 }
 
 void nf_conntrack_proto_pernet_fini(struct net *net)
 {
-	struct nf_proto_net *pn = nf_ct_l4proto_net(net,
-					&nf_conntrack_l4proto_generic);
-
-	nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
-					ARRAY_SIZE(builtin_l4proto));
-	pn->users--;
 #ifdef CONFIG_NF_CT_PROTO_GRE
 	nf_ct_gre_keymap_flush(net);
 #endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 31ba88311bc4..6fca80587505 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -724,34 +724,24 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int dccp_init_net(struct net *net)
+void nf_conntrack_dccp_init_net(struct net *net)
 {
 	struct nf_dccp_net *dn = nf_dccp_pernet(net);
-	struct nf_proto_net *pn = &dn->pn;
-
-	if (!pn->users) {
-		/* default values */
-		dn->dccp_loose = 1;
-		dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_RESPOND]	= 4 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_PARTOPEN]	= 4 * DCCP_MSL;
-		dn->dccp_timeout[CT_DCCP_OPEN]		= 12 * 3600 * HZ;
-		dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
-		dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
-		dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
-
-		/* timeouts[0] is unused, make it same as SYN_SENT so
-		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
-		 */
-		dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
-	}
-
-	return 0;
-}
 
-static struct nf_proto_net *dccp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.dccp.pn;
+	/* default values */
+	dn->dccp_loose = 1;
+	dn->dccp_timeout[CT_DCCP_REQUEST]	= 2 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_RESPOND]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_PARTOPEN]	= 4 * DCCP_MSL;
+	dn->dccp_timeout[CT_DCCP_OPEN]		= 12 * 3600 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSEREQ]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_CLOSING]	= 64 * HZ;
+	dn->dccp_timeout[CT_DCCP_TIMEWAIT]	= 2 * DCCP_MSL;
+
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
@@ -778,6 +768,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
 		.nla_policy	= dccp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= dccp_init_net,
-	.get_net_proto		= dccp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 0edbf82594d0..0f526fafecae 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -60,18 +60,11 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int generic_init_net(struct net *net)
+void nf_conntrack_generic_init_net(struct net *net)
 {
 	struct nf_generic_net *gn = nf_generic_pernet(net);
 
 	gn->timeout = nf_ct_generic_timeout;
-
-	return 0;
-}
-
-static struct nf_proto_net *generic_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.generic.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
@@ -86,6 +79,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
 		.nla_policy	= generic_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= generic_init_net,
-	.get_net_proto		= generic_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index e573ec0fa12b..fa765d2fd586 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -313,7 +313,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int gre_init_net(struct net *net)
+void nf_conntrack_gre_init_net(struct net *net)
 {
 	struct nf_gre_net *net_gre = gre_pernet(net);
 	int i;
@@ -321,8 +321,6 @@ static int gre_init_net(struct net *net)
 	INIT_LIST_HEAD(&net_gre->keymap_list);
 	for (i = 0; i < GRE_CT_MAX; i++)
 		net_gre->timeouts[i] = gre_timeouts[i];
-
-	return 0;
 }
 
 /* protocol helper struct */
@@ -347,5 +345,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 		.nla_policy	= gre_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net	= gre_init_net,
 };
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index eb77f747759f..7df477996b16 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -298,19 +298,11 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-
-static int icmp_init_net(struct net *net)
+void nf_conntrack_icmp_init_net(struct net *net)
 {
 	struct nf_icmp_net *in = nf_icmp_pernet(net);
 
 	in->timeout = nf_ct_icmp_timeout;
-
-	return 0;
-}
-
-static struct nf_proto_net *icmp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmp.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
@@ -331,6 +323,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
 		.nla_policy	= icmp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= icmp_init_net,
-	.get_net_proto		= icmp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index d243ef8a128e..bec4a3211658 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -309,19 +309,11 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-
-static int icmpv6_init_net(struct net *net)
+void nf_conntrack_icmpv6_init_net(struct net *net)
 {
 	struct nf_icmp_net *in = nf_icmpv6_pernet(net);
 
 	in->timeout = nf_ct_icmpv6_timeout;
-
-	return 0;
-}
-
-static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.icmpv6.pn;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
@@ -342,6 +334,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
 		.nla_policy	= icmpv6_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= icmpv6_init_net,
-	.get_net_proto		= icmpv6_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 31130f218d8f..a7818101ad80 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -642,29 +642,18 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int sctp_init_net(struct net *net)
+void nf_conntrack_sctp_init_net(struct net *net)
 {
 	struct nf_sctp_net *sn = nf_sctp_pernet(net);
-	struct nf_proto_net *pn = &sn->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
-			sn->timeouts[i] = sctp_timeouts[i];
-
-		/* timeouts[0] is unused, init it so ->timeouts[0] contains
-		 * 'new' timeout, like udp or icmp.
-		 */
-		sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
-	}
+	int i;
 
-	return 0;
-}
+	for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+		sn->timeouts[i] = sctp_timeouts[i];
 
-static struct nf_proto_net *sctp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.sctp.pn;
+	/* timeouts[0] is unused, init it so ->timeouts[0] contains
+	 * 'new' timeout, like udp or icmp.
+	 */
+	sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
@@ -691,6 +680,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
 		.nla_policy	= sctp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= sctp_init_net,
-	.get_net_proto		= sctp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 422bdedac0ed..01c748fa8913 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1387,32 +1387,21 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-static int tcp_init_net(struct net *net)
+void nf_conntrack_tcp_init_net(struct net *net)
 {
 	struct nf_tcp_net *tn = nf_tcp_pernet(net);
-	struct nf_proto_net *pn = &tn->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
-			tn->timeouts[i] = tcp_timeouts[i];
-
-		/* timeouts[0] is unused, make it same as SYN_SENT so
-		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
-		 */
-		tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
-		tn->tcp_loose = nf_ct_tcp_loose;
-		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
-		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
-	}
+	int i;
 
-	return 0;
-}
+	for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
+		tn->timeouts[i] = tcp_timeouts[i];
 
-static struct nf_proto_net *tcp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.tcp.pn;
+	/* timeouts[0] is unused, make it same as SYN_SENT so
+	 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+	 */
+	tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
+	tn->tcp_loose = nf_ct_tcp_loose;
+	tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
+	tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
@@ -1441,6 +1430,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
 		.nla_policy	= tcp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= tcp_init_net,
-	.get_net_proto		= tcp_get_net_proto,
 };
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 6e81e79844d7..951366dfbec3 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -260,25 +260,13 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = {
 };
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
 
-
-static int udp_init_net(struct net *net)
+void nf_conntrack_udp_init_net(struct net *net)
 {
 	struct nf_udp_net *un = nf_udp_pernet(net);
-	struct nf_proto_net *pn = &un->pn;
-
-	if (!pn->users) {
-		int i;
-
-		for (i = 0; i < UDP_CT_MAX; i++)
-			un->timeouts[i] = udp_timeouts[i];
-	}
+	int i;
 
-	return 0;
-}
-
-static struct nf_proto_net *udp_get_net_proto(struct net *net)
-{
-	return &net->ct.nf_ct_proto.udp.pn;
+	for (i = 0; i < UDP_CT_MAX; i++)
+		un->timeouts[i] = udp_timeouts[i];
 }
 
 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
@@ -300,8 +288,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
 		.nla_policy	= udp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
 };
 
 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
@@ -324,7 +310,5 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
 		.nla_policy	= udp_timeout_nla_policy,
 	},
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-	.init_net		= udp_init_net,
-	.get_net_proto		= udp_get_net_proto,
 };
 #endif
-- 
cgit v1.2.3


From e56894356f60f9d11bdf53ee5a050a235f6d2b48 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:46 +0100
Subject: netfilter: conntrack: remove l4proto destroy hook

Only one user (gre), add a direct call and remove this facility.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h |  3 ---
 net/netfilter/nf_conntrack_core.c            | 15 +++++++++++----
 net/netfilter/nf_conntrack_proto_gre.c       | 14 --------------
 3 files changed, 11 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index d5909e51ca92..5d1419ac6a38 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -27,9 +27,6 @@ struct nf_conntrack_l4proto {
 	/* protoinfo nlattr size, closes a hole */
 	u16 nlattr_size;
 
-	/* Called when a conntrack entry is destroyed */
-	void (*destroy)(struct nf_conn *ct);
-
 	/* called by gc worker if table is full */
 	bool (*can_early_drop)(const struct nf_conn *ct);
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 728d2b5bdb1a..52e6c5c6f202 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -524,11 +524,18 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl)
 }
 EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
 
+static void destroy_gre_conntrack(struct nf_conn *ct)
+{
+	struct nf_conn *master = ct->master;
+
+	if (master)
+		nf_ct_gre_keymap_destroy(master);
+}
+
 static void
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	const struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
 	WARN_ON(atomic_read(&nfct->use) != 0);
@@ -537,9 +544,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
 		nf_ct_tmpl_free(ct);
 		return;
 	}
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
-	if (l4proto->destroy)
-		l4proto->destroy(ct);
+
+	if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
+		destroy_gre_conntrack(ct);
 
 	local_bh_disable();
 	/* Expectations will have been removed in clean_from_lists,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index fa765d2fd586..ee9ab10a32e4 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -249,19 +249,6 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
 	return NF_ACCEPT;
 }
 
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct nf_conn *ct)
-{
-	struct nf_conn *master = ct->master;
-	pr_debug(" entering\n");
-
-	if (!master)
-		pr_debug("no master !?!\n");
-	else
-		nf_ct_gre_keymap_destroy(master);
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 
 #include <linux/netfilter/nfnetlink.h>
@@ -329,7 +316,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
 #ifdef CONFIG_NF_CONNTRACK_PROCFS
 	.print_conntrack = gre_print_conntrack,
 #endif
-	.destroy	 = gre_destroy,
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
 	.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
-- 
cgit v1.2.3


From 4a60dc748d121b52533a2956567df4f87a3835b1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 15 Jan 2019 22:03:47 +0100
Subject: netfilter: conntrack: remove nf_ct_l4proto_find_get

Its now same as __nf_ct_l4proto_find(), so rename that to
nf_ct_l4proto_find and use it everywhere.

It never returns NULL and doesn't need locks or reference counts.

Before this series:
302824  net/netfilter/nf_conntrack.ko
 21504  net/netfilter/nf_conntrack_proto_gre.ko

  text	   data	    bss	    dec	    hex	filename
  6281	   1732	      4	   8017	   1f51	nf_conntrack_proto_gre.ko
108356	  20613	    236	 129205	  1f8b5	nf_conntrack.ko

After:
294864  net/netfilter/nf_conntrack.ko
  text	   data	    bss	    dec	    hex	filename
106979	  19557	    240	 126776	  1ef38	nf_conntrack.ko

so, even with builtin gre, total size got reduced.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h    |   2 +-
 include/net/netfilter/nf_conntrack_l4proto.h |   8 +-
 net/netfilter/nf_conntrack_core.c            |  11 +-
 net/netfilter/nf_conntrack_expect.c          |   2 +-
 net/netfilter/nf_conntrack_netlink.c         |  14 +--
 net/netfilter/nf_conntrack_proto.c           | 175 ++++-----------------------
 net/netfilter/nf_conntrack_standalone.c      |   3 +-
 net/netfilter/nf_flow_table_core.c           |   2 +-
 net/netfilter/nfnetlink_cttimeout.c          |   6 +-
 net/netfilter/nft_ct.c                       |   2 +-
 net/netfilter/xt_CT.c                        |   2 +-
 11 files changed, 45 insertions(+), 182 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 235c182022b2..ae41e92251dd 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -26,7 +26,7 @@ int nf_conntrack_init_net(struct net *net);
 void nf_conntrack_cleanup_net(struct net *net);
 void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list);
 
-int nf_conntrack_proto_pernet_init(struct net *net);
+void nf_conntrack_proto_pernet_init(struct net *net);
 void nf_conntrack_proto_pernet_fini(struct net *net);
 
 int nf_conntrack_proto_init(void);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 5d1419ac6a38..778087591983 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -140,13 +140,7 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
 
 #define MAX_NF_CT_PROTO IPPROTO_UDPLITE
 
-const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto);
-
-const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4proto);
-
-/* Protocol global registration. */
-int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto);
-void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto);
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
 
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 52e6c5c6f202..171659aa69a1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -845,7 +845,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
 	enum ip_conntrack_info oldinfo;
 	struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->allow_clash &&
 	    !nf_ct_is_dying(ct) &&
 	    atomic_inc_not_zero(&ct->ct_general.use)) {
@@ -1117,7 +1117,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
 	if (!test_bit(IPS_ASSURED_BIT, &ct->status))
 		return true;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
 		return true;
 
@@ -2452,15 +2452,10 @@ int nf_conntrack_init_net(struct net *net)
 	nf_conntrack_tstamp_pernet_init(net);
 	nf_conntrack_ecache_pernet_init(net);
 	nf_conntrack_helper_pernet_init(net);
+	nf_conntrack_proto_pernet_init(net);
 
-	ret = nf_conntrack_proto_pernet_init(net);
-	if (ret < 0)
-		goto err_proto;
 	return 0;
 
-err_proto:
-	nf_conntrack_ecache_pernet_fini(net);
-	nf_conntrack_expect_pernet_fini(net);
 err_expect:
 	free_percpu(net->ct.stat);
 err_pcpu_lists:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3034038bfdf0..334d6e5b7762 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,7 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    __nf_ct_l4proto_find(expect->tuple.dst.protonum));
+		    nf_ct_l4proto_find(expect->tuple.dst.protonum));
 
 	if (expect->flags & NF_CT_EXPECT_PERMANENT) {
 		seq_puts(s, "PERMANENT");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1213beb5a714..8071bb04a849 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -134,7 +134,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb,
 	ret = ctnetlink_dump_tuples_ip(skb, tuple);
 
 	if (ret >= 0) {
-		l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
 		ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
 	}
 	rcu_read_unlock();
@@ -182,7 +182,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
 	struct nlattr *nest_proto;
 	int ret;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (!l4proto->to_nlattr)
 		return 0;
 
@@ -590,7 +590,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
 	len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
 	len *= 3u; /* ORIG, REPLY, MASTER */
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	len += l4proto->nlattr_size;
 	if (l4proto->nlattr_tuple_size) {
 		len4 = l4proto->nlattr_tuple_size();
@@ -1059,7 +1059,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
 	rcu_read_lock();
-	l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
+	l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
 
 	if (likely(l4proto->nlattr_to_tuple)) {
 		ret = nla_validate_nested(attr, CTA_PROTO_MAX,
@@ -1722,11 +1722,9 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
 	if (err < 0)
 		return err;
 
-	rcu_read_lock();
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 	if (l4proto->from_nlattr)
 		err = l4proto->from_nlattr(tb, ct);
-	rcu_read_unlock();
 
 	return err;
 }
@@ -2676,7 +2674,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
 	rcu_read_lock();
 	ret = ctnetlink_dump_tuples_ip(skb, &m);
 	if (ret >= 0) {
-		l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
+		l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
 	ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
 	}
 	rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index e6bc02c13f0f..aa8d3fe0b37f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -43,8 +43,6 @@
 
 extern unsigned int nf_conntrack_net_id;
 
-static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
-
 static DEFINE_MUTEX(nf_ct_proto_mutex);
 
 #ifdef CONFIG_SYSCTL
@@ -95,121 +93,32 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
 #endif
 
-const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
-{
-	if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
-		return &nf_conntrack_l4proto_generic;
-
-	return rcu_dereference(nf_ct_protos[l4proto]);
-}
-EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
-
-const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
-{
-	const struct nf_conntrack_l4proto *p;
-
-	rcu_read_lock();
-	p = __nf_ct_l4proto_find(l4num);
-	rcu_read_unlock();
-
-	return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-
-static int kill_l4proto(struct nf_conn *i, void *data)
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
 {
-	const struct nf_conntrack_l4proto *l4proto;
-	l4proto = data;
-	return nf_ct_protonum(i) == l4proto->l4proto;
-}
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
-   them. --RR */
-int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
-{
-	int ret = 0;
-
-	if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
-	    (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
-		return -EINVAL;
-
-	mutex_lock(&nf_ct_proto_mutex);
-	if (rcu_dereference_protected(
-			nf_ct_protos[l4proto->l4proto],
-			lockdep_is_held(&nf_ct_proto_mutex)
-			) != &nf_conntrack_l4proto_generic) {
-		ret = -EBUSY;
-		goto out_unlock;
+	switch (l4proto) {
+	case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
+	case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
+	case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+	case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+	case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+	case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+	case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
+#endif /* CONFIG_IPV6 */
 	}
 
-	rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
-out_unlock:
-	mutex_unlock(&nf_ct_proto_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
-
-static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-
-{
-	BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
-
-	BUG_ON(rcu_dereference_protected(
-			nf_ct_protos[l4proto->l4proto],
-			lockdep_is_held(&nf_ct_proto_mutex)
-			) != l4proto);
-	rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
-			   &nf_conntrack_l4proto_generic);
-}
-
-void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-{
-	mutex_lock(&nf_ct_proto_mutex);
-	__nf_ct_l4proto_unregister_one(l4proto);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-	/* Remove all contrack entries for this protocol */
-	nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
-
-static void
-nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
-			 unsigned int num_proto)
-{
-	int i;
-
-	mutex_lock(&nf_ct_proto_mutex);
-	for (i = 0; i < num_proto; i++)
-		__nf_ct_l4proto_unregister_one(l4proto[i]);
-	mutex_unlock(&nf_ct_proto_mutex);
-
-	synchronize_net();
-
-	for (i = 0; i < num_proto; i++)
-		nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
-}
-
-static int
-nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
-		       unsigned int num_proto)
-{
-	int ret = -EINVAL;
-	unsigned int i;
-
-	for (i = 0; i < num_proto; i++) {
-		ret = nf_ct_l4proto_register_one(l4proto[i]);
-		if (ret < 0)
-			break;
-	}
-	if (i != num_proto) {
-		pr_err("nf_conntrack: can't register l4 %d proto.\n",
-		       l4proto[i]->l4proto);
-		nf_ct_l4proto_unregister(l4proto, i);
-	}
-	return ret;
-}
+	return &nf_conntrack_l4proto_generic;
+};
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
 
 static unsigned int nf_confirm(struct sk_buff *skb,
 			       unsigned int protoff,
@@ -651,30 +560,9 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
 }
 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
 
-static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
-	&nf_conntrack_l4proto_tcp,
-	&nf_conntrack_l4proto_udp,
-	&nf_conntrack_l4proto_icmp,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
-	&nf_conntrack_l4proto_dccp,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
-	&nf_conntrack_l4proto_sctp,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
-	&nf_conntrack_l4proto_udplite,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_GRE
-	&nf_conntrack_l4proto_gre,
-#endif
-#if IS_ENABLED(CONFIG_IPV6)
-	&nf_conntrack_l4proto_icmpv6,
-#endif /* CONFIG_IPV6 */
-};
-
 int nf_conntrack_proto_init(void)
 {
-	int ret = 0, i;
+	int ret;
 
 	ret = nf_register_sockopt(&so_getorigdst);
 	if (ret < 0)
@@ -686,18 +574,8 @@ int nf_conntrack_proto_init(void)
 		goto cleanup_sockopt;
 #endif
 
-	for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
-		RCU_INIT_POINTER(nf_ct_protos[i],
-				 &nf_conntrack_l4proto_generic);
-
-	ret = nf_ct_l4proto_register(builtin_l4proto,
-				     ARRAY_SIZE(builtin_l4proto));
-	if (ret < 0)
-		goto cleanup_sockopt2;
-
 	return ret;
-cleanup_sockopt2:
-	nf_unregister_sockopt(&so_getorigdst);
+
 #if IS_ENABLED(CONFIG_IPV6)
 cleanup_sockopt:
 	nf_unregister_sockopt(&so_getorigdst6);
@@ -713,7 +591,7 @@ void nf_conntrack_proto_fini(void)
 #endif
 }
 
-int nf_conntrack_proto_pernet_init(struct net *net)
+void nf_conntrack_proto_pernet_init(struct net *net)
 {
 	nf_conntrack_generic_init_net(net);
 	nf_conntrack_udp_init_net(net);
@@ -729,7 +607,6 @@ int nf_conntrack_proto_pernet_init(struct net *net)
 #ifdef CONFIG_NF_CT_PROTO_GRE
 	nf_conntrack_gre_init_net(net);
 #endif
-	return 0;
 }
 
 void nf_conntrack_proto_pernet_fini(struct net *net)
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index d848de713dc0..ddfca5f1784c 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -310,8 +310,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (!net_eq(nf_ct_net(ct), net))
 		goto release;
 
-	l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
-	WARN_ON(!l4proto);
+	l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
 
 	ret = -ENOSPC;
 	seq_printf(s, "%-8s %u %-8s %u ",
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index fa0844e2a68d..8099f0f778ab 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -120,7 +120,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 	if (l4num == IPPROTO_TCP)
 		flow_offload_fixup_tcp(&ct->proto.tcp);
 
-	l4proto = __nf_ct_l4proto_find(l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 	if (!l4proto)
 		return;
 
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 37b4f84ac153..c69b11ca5aad 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -122,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
 		return -EBUSY;
 	}
 
-	l4proto = nf_ct_l4proto_find_get(l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	/* This protocol is not supportted, skip. */
 	if (l4proto->l4proto != l4num) {
@@ -357,7 +357,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
 		return -EINVAL;
 
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-	l4proto = nf_ct_l4proto_find_get(l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	/* This protocol is not supported, skip. */
 	if (l4proto->l4proto != l4num) {
@@ -438,7 +438,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
 
 	l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
 	l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
-	l4proto = nf_ct_l4proto_find_get(l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	err = -EOPNOTSUPP;
 	if (l4proto->l4proto != l4num)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 3249cc059048..7b717fad6cdc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -870,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
 	l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
 	priv->l4proto = l4num;
 
-	l4proto = nf_ct_l4proto_find_get(l4num);
+	l4proto = nf_ct_l4proto_find(l4num);
 
 	if (l4proto->l4proto != l4num) {
 		ret = -EOPNOTSUPP;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 2c7a4b80206f..0fa863f57575 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
 	/* Make sure the timeout policy matches any existing protocol tracker,
 	 * otherwise default to generic.
 	 */
-	l4proto = __nf_ct_l4proto_find(proto);
+	l4proto = nf_ct_l4proto_find(proto);
 	if (timeout->l4proto->l4proto != l4proto->l4proto) {
 		ret = -EINVAL;
 		pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
-- 
cgit v1.2.3


From 0fb4d21956f4a9af225594a46857ccf29bd747bc Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 16 Jan 2019 07:53:51 +0800
Subject: netfilter: nft_meta: Add NFT_META_I/OIFKIND meta type

In the ip_rcv the skb goes through the PREROUTING hook first, then kicks
in vrf device and go through the same hook again. When conntrack dnat
works with vrf, there will be some conflict with rules because the
packet goes through the hook twice with different nf status.

ip link add user1 type vrf table 1
ip link add user2 type vrf table 2
ip l set dev tun1 master user1
ip l set dev tun2 master user2

nft add table firewall
nft add chain firewall zones { type filter hook prerouting  priority - 300 \; }
nft add rule firewall zones counter ct zone set iif map { "tun1" : 1, "tun2" : 2 }
nft add chain firewall rule-1000-ingress
nft add rule firewall rule-1000-ingress ct zone 1 tcp dport 22 ct state new counter accept
nft add rule firewall rule-1000-ingress counter drop
nft add chain firewall rule-1000-egress
nft add rule firewall rule-1000-egress tcp dport 22 ct state new counter drop
nft add rule firewall rule-1000-egress counter accept

nft add chain firewall rules-all { type filter hook prerouting priority - 150 \; }
nft add rule firewall rules-all ip daddr vmap { "2.2.2.11" : jump rule-1000-ingress }
nft add rule firewall rules-all ct zone vmap { 1 : jump rule-1000-egress }

nft add rule firewall dnat-all ct zone vmap { 1 : jump dnat-1000 }
nft add rule firewall dnat-1000 ip daddr 2.2.2.11 counter dnat to 10.0.0.7

For a package with ip daddr 2.2.2.11 and tcp dport 22, first time accept in the
rule-1000-ingress and dnat to 10.0.0.7. Then second time the packet goto the wrong
chain rule-1000-egress which leads the packet drop

With this patch, userspace can add the 'don't re-do entire ruleset for
vrf' policy itself via:

nft add rule firewall rules-all meta iifkind "vrf" counter accept

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nft_meta.c                 | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 99ca95b830b6..0ba8f48bdf0b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -791,6 +791,8 @@ enum nft_exthdr_attributes {
  * @NFT_META_CGROUP: socket control group (skb->sk->sk_classid)
  * @NFT_META_PRANDOM: a 32bit pseudo-random number
  * @NFT_META_SECPATH: boolean, secpath_exists (!!skb->sp)
+ * @NFT_META_IIFKIND: packet input interface kind name (dev->rtnl_link_ops->kind)
+ * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind)
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -819,6 +821,8 @@ enum nft_meta_keys {
 	NFT_META_CGROUP,
 	NFT_META_PRANDOM,
 	NFT_META_SECPATH,
+	NFT_META_IIFKIND,
+	NFT_META_OIFKIND,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6df486c5ebd3..987d2d6ce624 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -244,6 +244,16 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 		strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
 		return;
 #endif
+	case NFT_META_IIFKIND:
+		if (in == NULL || in->rtnl_link_ops == NULL)
+			goto err;
+		strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
+	case NFT_META_OIFKIND:
+		if (out == NULL || out->rtnl_link_ops == NULL)
+			goto err;
+		strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -340,6 +350,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
 		break;
 	case NFT_META_IIFNAME:
 	case NFT_META_OIFNAME:
+	case NFT_META_IIFKIND:
+	case NFT_META_OIFKIND:
 		len = IFNAMSIZ;
 		break;
 	case NFT_META_PRANDOM:
-- 
cgit v1.2.3


From 472caa69183f7eaf30ecb34451826dd5b98d7b3a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 17 Jan 2019 00:11:43 +0100
Subject: netfilter: nat: un-export nf_nat_used_tuple

Not used since 203f2e78200c27e ("netfilter: nat: remove l4proto->unique_tuple")

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h | 4 ----
 net/netfilter/nf_nat_core.c    | 3 +--
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index a17eb2f8d40e..8aff77cafb8b 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -47,10 +47,6 @@ extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
 
 struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct);
 
-/* Is this tuple already taken? (not by us)*/
-int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
-		      const struct nf_conn *ignored_conntrack);
-
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
 #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 44f97b3a215a..35e61038ae96 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -146,7 +146,7 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
 }
 
 /* Is this tuple already taken? (not by us) */
-int
+static int
 nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 		  const struct nf_conn *ignored_conntrack)
 {
@@ -161,7 +161,6 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
 	nf_ct_invert_tuple(&reply, tuple);
 	return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
 }
-EXPORT_SYMBOL(nf_nat_used_tuple);
 
 static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
 				 const struct nf_nat_range2 *range)
-- 
cgit v1.2.3


From 0123a75e1d57c3df31e536868339c98c02c14917 Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Fri, 18 Jan 2019 14:36:29 +0100
Subject: Revert "netfilter: nft_hash: add map lookups for hashing operations"

A better way to implement this from userspace has been found without
specific code in the kernel side, revert this.

Fixes: b9ccc07e3f31 ("netfilter: nft_hash: add map lookups for hashing operations")
Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   4 +-
 net/netfilter/nft_hash.c                 | 121 -------------------------------
 2 files changed, 2 insertions(+), 123 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 0ba8f48bdf0b..030302893d96 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -877,8 +877,8 @@ enum nft_hash_attributes {
 	NFTA_HASH_SEED,
 	NFTA_HASH_OFFSET,
 	NFTA_HASH_TYPE,
-	NFTA_HASH_SET_NAME,
-	NFTA_HASH_SET_ID,
+	NFTA_HASH_SET_NAME,	/* deprecated */
+	NFTA_HASH_SET_ID,	/* deprecated */
 	__NFTA_HASH_MAX,
 };
 #define NFTA_HASH_MAX	(__NFTA_HASH_MAX - 1)
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c2d237144f74..ea658e6c53e3 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -25,7 +25,6 @@ struct nft_jhash {
 	u32			modulus;
 	u32			seed;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static void nft_jhash_eval(const struct nft_expr *expr,
@@ -42,33 +41,10 @@ static void nft_jhash_eval(const struct nft_expr *expr,
 	regs->data[priv->dreg] = h + priv->offset;
 }
 
-static void nft_jhash_map_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	const void *data = &regs->data[priv->sreg];
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = reciprocal_scale(jhash(data, priv->len, priv->seed),
-					priv->modulus) + priv->offset;
-
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 struct nft_symhash {
 	enum nft_registers      dreg:8;
 	u32			modulus;
 	u32			offset;
-	struct nft_set		*map;
 };
 
 static void nft_symhash_eval(const struct nft_expr *expr,
@@ -84,28 +60,6 @@ static void nft_symhash_eval(const struct nft_expr *expr,
 	regs->data[priv->dreg] = h + priv->offset;
 }
 
-static void nft_symhash_map_eval(const struct nft_expr *expr,
-				 struct nft_regs *regs,
-				 const struct nft_pktinfo *pkt)
-{
-	struct nft_symhash *priv = nft_expr_priv(expr);
-	struct sk_buff *skb = pkt->skb;
-	const struct nft_set *map = priv->map;
-	const struct nft_set_ext *ext;
-	u32 result;
-	bool found;
-
-	result = reciprocal_scale(__skb_get_hash_symmetric(skb),
-				  priv->modulus) + priv->offset;
-
-	found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
-	if (!found)
-		return;
-
-	nft_data_copy(&regs->data[priv->dreg],
-		      nft_set_ext_data(ext), map->dlen);
-}
-
 static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
 	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
 	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
@@ -114,9 +68,6 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
 	[NFTA_HASH_SEED]	= { .type = NLA_U32 },
 	[NFTA_HASH_OFFSET]	= { .type = NLA_U32 },
 	[NFTA_HASH_TYPE]	= { .type = NLA_U32 },
-	[NFTA_HASH_SET_NAME]	= { .type = NLA_STRING,
-				    .len = NFT_SET_MAXNAMELEN - 1 },
-	[NFTA_HASH_SET_ID]	= { .type = NLA_U32 },
 };
 
 static int nft_jhash_init(const struct nft_ctx *ctx,
@@ -166,20 +117,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_jhash_map_init(const struct nft_ctx *ctx,
-			      const struct nft_expr *expr,
-			      const struct nlattr * const tb[])
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_jhash_init(ctx, expr, tb);
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_HASH_SET_NAME],
-					  tb[NFTA_HASH_SET_ID], genmask);
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_symhash_init(const struct nft_ctx *ctx,
 			    const struct nft_expr *expr,
 			    const struct nlattr * const tb[])
@@ -206,20 +143,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
 					   NFT_DATA_VALUE, sizeof(u32));
 }
 
-static int nft_symhash_map_init(const struct nft_ctx *ctx,
-				const struct nft_expr *expr,
-				const struct nlattr * const tb[])
-{
-	struct nft_jhash *priv = nft_expr_priv(expr);
-	u8 genmask = nft_genmask_next(ctx->net);
-
-	nft_symhash_init(ctx, expr, tb);
-	priv->map = nft_set_lookup_global(ctx->net, ctx->table,
-					  tb[NFTA_HASH_SET_NAME],
-					  tb[NFTA_HASH_SET_ID], genmask);
-	return PTR_ERR_OR_ZERO(priv->map);
-}
-
 static int nft_jhash_dump(struct sk_buff *skb,
 			  const struct nft_expr *expr)
 {
@@ -247,18 +170,6 @@ nla_put_failure:
 	return -1;
 }
 
-static int nft_jhash_map_dump(struct sk_buff *skb,
-			       const struct nft_expr *expr)
-{
-	const struct nft_jhash *priv = nft_expr_priv(expr);
-
-	if (nft_jhash_dump(skb, expr) ||
-	    nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
-		return -1;
-
-	return 0;
-}
-
 static int nft_symhash_dump(struct sk_buff *skb,
 			    const struct nft_expr *expr)
 {
@@ -279,18 +190,6 @@ nla_put_failure:
 	return -1;
 }
 
-static int nft_symhash_map_dump(struct sk_buff *skb,
-				const struct nft_expr *expr)
-{
-	const struct nft_symhash *priv = nft_expr_priv(expr);
-
-	if (nft_symhash_dump(skb, expr) ||
-	    nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
-		return -1;
-
-	return 0;
-}
-
 static struct nft_expr_type nft_hash_type;
 static const struct nft_expr_ops nft_jhash_ops = {
 	.type		= &nft_hash_type,
@@ -300,14 +199,6 @@ static const struct nft_expr_ops nft_jhash_ops = {
 	.dump		= nft_jhash_dump,
 };
 
-static const struct nft_expr_ops nft_jhash_map_ops = {
-	.type		= &nft_hash_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
-	.eval		= nft_jhash_map_eval,
-	.init		= nft_jhash_map_init,
-	.dump		= nft_jhash_map_dump,
-};
-
 static const struct nft_expr_ops nft_symhash_ops = {
 	.type		= &nft_hash_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
@@ -316,14 +207,6 @@ static const struct nft_expr_ops nft_symhash_ops = {
 	.dump		= nft_symhash_dump,
 };
 
-static const struct nft_expr_ops nft_symhash_map_ops = {
-	.type		= &nft_hash_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
-	.eval		= nft_symhash_map_eval,
-	.init		= nft_symhash_map_init,
-	.dump		= nft_symhash_map_dump,
-};
-
 static const struct nft_expr_ops *
 nft_hash_select_ops(const struct nft_ctx *ctx,
 		    const struct nlattr * const tb[])
@@ -336,12 +219,8 @@ nft_hash_select_ops(const struct nft_ctx *ctx,
 	type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
 	switch (type) {
 	case NFT_HASH_SYM:
-		if (tb[NFTA_HASH_SET_NAME])
-			return &nft_symhash_map_ops;
 		return &nft_symhash_ops;
 	case NFT_HASH_JENKINS:
-		if (tb[NFTA_HASH_SET_NAME])
-			return &nft_jhash_map_ops;
 		return &nft_jhash_ops;
 	default:
 		break;
-- 
cgit v1.2.3


From 570d0200123fb4f809aa2f6226e93a458d664d70 Mon Sep 17 00:00:00 2001
From: Wei Yang <richardw.yang@linux.intel.com>
Date: Fri, 18 Jan 2019 10:34:59 +0800
Subject: driver core: move device->knode_class to device_private

As the description of struct device_private says, it stores data which
is private to driver core. And it already has similar fields like:
knode_parent, knode_driver, knode_driver and knode_bus. This look it is
more proper to put knode_class together with those fields to make it
private to driver core.

This patch move device->knode_class to device_private to make it comply
with code convention.

Signed-off-by: Wei Yang <richardw.yang@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/base.h    |  4 ++++
 drivers/base/class.c   | 14 ++++++++++----
 drivers/base/core.c    |  4 ++--
 include/linux/device.h |  1 -
 4 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 7a419a7a6235..37329a668935 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -60,6 +60,7 @@ struct driver_private {
  * @knode_parent - node in sibling list
  * @knode_driver - node in driver list
  * @knode_bus - node in bus list
+ * @knode_class - node in class list
  * @deferred_probe - entry in deferred_probe_list which is used to retry the
  *	binding of drivers which were unable to get all the resources needed by
  *	the device; typically because it depends on another driver getting
@@ -74,6 +75,7 @@ struct device_private {
 	struct klist_node knode_parent;
 	struct klist_node knode_driver;
 	struct klist_node knode_bus;
+	struct klist_node knode_class;
 	struct list_head deferred_probe;
 	struct device *device;
 };
@@ -83,6 +85,8 @@ struct device_private {
 	container_of(obj, struct device_private, knode_driver)
 #define to_device_private_bus(obj)	\
 	container_of(obj, struct device_private, knode_bus)
+#define to_device_private_class(obj)	\
+	container_of(obj, struct device_private, knode_class)
 
 /* initialisation functions */
 extern int devices_init(void);
diff --git a/drivers/base/class.c b/drivers/base/class.c
index 54def4e02f00..d8a6a5864c2e 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -117,16 +117,22 @@ static void class_put(struct class *cls)
 		kset_put(&cls->p->subsys);
 }
 
+static struct device *klist_class_to_dev(struct klist_node *n)
+{
+	struct device_private *p = to_device_private_class(n);
+	return p->device;
+}
+
 static void klist_class_dev_get(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_class);
+	struct device *dev = klist_class_to_dev(n);
 
 	get_device(dev);
 }
 
 static void klist_class_dev_put(struct klist_node *n)
 {
-	struct device *dev = container_of(n, struct device, knode_class);
+	struct device *dev = klist_class_to_dev(n);
 
 	put_device(dev);
 }
@@ -277,7 +283,7 @@ void class_dev_iter_init(struct class_dev_iter *iter, struct class *class,
 	struct klist_node *start_knode = NULL;
 
 	if (start)
-		start_knode = &start->knode_class;
+		start_knode = &start->p->knode_class;
 	klist_iter_init_node(&class->p->klist_devices, &iter->ki, start_knode);
 	iter->type = type;
 }
@@ -304,7 +310,7 @@ struct device *class_dev_iter_next(struct class_dev_iter *iter)
 		knode = klist_next(&iter->ki);
 		if (!knode)
 			return NULL;
-		dev = container_of(knode, struct device, knode_class);
+		dev = klist_class_to_dev(knode);
 		if (!iter->type || iter->type == dev->type)
 			return dev;
 	}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0073b09bb99f..4a4b6f8cbc4f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1966,7 +1966,7 @@ int device_add(struct device *dev)
 	if (dev->class) {
 		mutex_lock(&dev->class->p->mutex);
 		/* tie the class to the device */
-		klist_add_tail(&dev->knode_class,
+		klist_add_tail(&dev->p->knode_class,
 			       &dev->class->p->klist_devices);
 
 		/* notify any interfaces that the device is here */
@@ -2105,7 +2105,7 @@ void device_del(struct device *dev)
 			if (class_intf->remove_dev)
 				class_intf->remove_dev(dev, class_intf);
 		/* remove the device from the class list */
-		klist_del(&dev->knode_class);
+		klist_del(&dev->p->knode_class);
 		mutex_unlock(&dev->class->p->mutex);
 	}
 	device_remove_file(dev, &dev_attr_uevent);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6cb4640b6160..d0e452fd0bff 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1035,7 +1035,6 @@ struct device {
 	spinlock_t		devres_lock;
 	struct list_head	devres_head;
 
-	struct klist_node	knode_class;
 	struct class		*class;
 	const struct attribute_group **groups;	/* optional groups */
 
-- 
cgit v1.2.3


From 62bc79d35ebb55451112979babea864975cfd16d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 18 Jan 2019 11:20:41 +0900
Subject: ASoC: soc.h: add explanation of legacy/modern style of dai_link

Current ALSA SoC is assuming 1 CPU 1 Platform (= DMA) style system.
Because of this background, it is directly using
xxx_name / xxx_of_node / xxx_dai_name on dai_link.
Let's call it as legacy style here.

More complex style system like multi CPU multi Platform (= DMA) will
coming. To supporting it, we can use snd_soc_dai_link_component on
dai_link. Let's call it as modern style here.
But current ALSA SoC can't support it so far. Thus, we need to have
multi CPU / multi Codec / multi Platform style in the future on ALSA SoC.

Currently we already have multi Codec support. Platform is starting to
use modern style on dai_link, but still style only. Multi Platform is
not yet implemented. And we still don't have multi CPU support on ALSA
SoC, and not have modern style either.

Currently, if driver is using legacy style Codec/Platform, it will be
converted to modern style on soc-core. This means, we are using glue code
for legacy vs modern style so far on ALSA SoC.
We can fully switch to modern style on all drivers if ALSA SoC supported
modern style for CPU, and then, legacy style code will be removed from
ALSA SoC.
Untile then, we need to keep both legacy/modern style and its glue code.
This patch adds such future plan and background on soc.h

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 36 ++++++++++++++++++++++++++++++++++++
 sound/soc/soc-core.c | 20 ++++++++++++++++++--
 2 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index e665f111b0d2..c31b6d122ff6 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -891,6 +891,18 @@ struct snd_soc_dai_link {
 	/* config - must be set by machine driver */
 	const char *name;			/* Codec name */
 	const char *stream_name;		/* Stream name */
+
+	/*
+	 *	cpu_name
+	 *	cpu_of_node
+	 *	cpu_dai_name
+	 *
+	 * These are legacy style, and will be replaced to
+	 * modern style (= snd_soc_dai_link_component) in the future,
+	 * but, not yet supported so far.
+	 * If modern style was supported for CPU, all driver will switch
+	 * to use it, and, legacy style code will be removed from ALSA SoC.
+	 */
 	/*
 	 * You MAY specify the link's CPU-side device, either by device name,
 	 * or by DT/OF node, but not both. If this information is omitted,
@@ -906,6 +918,19 @@ struct snd_soc_dai_link {
 	 * only, which only works well when that device exposes a single DAI.
 	 */
 	const char *cpu_dai_name;
+
+	/*
+	 *	codec_name
+	 *	codec_of_node
+	 *	codec_dai_name
+	 *
+	 * These are legacy style, it will be converted to modern style
+	 * (= snd_soc_dai_link_component) automatically in soc-core
+	 * if driver is using legacy style.
+	 * Driver shouldn't use both legacy and modern style in the same time.
+	 * If modern style was supported for CPU, all driver will switch
+	 * to use it, and, legacy style code will be removed from ALSA SoC.
+	 */
 	/*
 	 * You MUST specify the link's codec, either by device name, or by
 	 * DT/OF node, but not both.
@@ -918,6 +943,17 @@ struct snd_soc_dai_link {
 	struct snd_soc_dai_link_component *codecs;
 	unsigned int num_codecs;
 
+	/*
+	 *	platform_name
+	 *	platform_of_node
+	 *
+	 * These are legacy style, it will be converted to modern style
+	 * (= snd_soc_dai_link_component) automatically in soc-core
+	 * if driver is using legacy style.
+	 * Driver shouldn't use both legacy and modern style in the same time.
+	 * If modern style was supported for CPU, all driver will switch
+	 * to use it, and, legacy style code will be removed from ALSA SoC.
+	 */
 	/*
 	 * You MAY specify the link's platform/PCM/DMA driver, either by
 	 * device name, or by DT/OF node, but not both. Some forms of link
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 890d6c9c2752..de2851f1b3df 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1029,9 +1029,14 @@ static int snd_soc_init_platform(struct snd_soc_card *card,
 	struct snd_soc_dai_link_component *platform = dai_link->platform;
 
 	/*
-	 * FIXME
+	 * REMOVE ME
 	 *
-	 * this function should be removed in the future
+	 * This is glue code for Legacy vs Modern dai_link.
+	 * This function will be removed if all derivers are switched to
+	 * modern style dai_link.
+	 * Driver shouldn't use both legacy and modern style in the same time.
+	 * see
+	 *	soc.h :: struct snd_soc_dai_link
 	 */
 	/* convert Legacy platform link */
 	if (!platform || dai_link->legacy_platform) {
@@ -1059,6 +1064,17 @@ static int snd_soc_init_platform(struct snd_soc_card *card,
 static int snd_soc_init_multicodec(struct snd_soc_card *card,
 				   struct snd_soc_dai_link *dai_link)
 {
+	/*
+	 * REMOVE ME
+	 *
+	 * This is glue code for Legacy vs Modern dai_link.
+	 * This function will be removed if all derivers are switched to
+	 * modern style dai_link.
+	 * Driver shouldn't use both legacy and modern style in the same time.
+	 * see
+	 *	soc.h :: struct snd_soc_dai_link
+	 */
+
 	/* Legacy codec/codec_dai link is a single entry in multicodec */
 	if (dai_link->codec_name || dai_link->codec_of_node ||
 	    dai_link->codec_dai_name) {
-- 
cgit v1.2.3


From 1cfb2a512e74e577bb0ed7c8d76df90a41a83f6a Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 18 Jan 2019 19:15:59 +0900
Subject: LSM: Make lsm_early_cred() and lsm_early_task() local functions.

Since current->cred == current->real_cred when ordered_lsm_init()
is called, and lsm_early_cred()/lsm_early_task() need to be called
between the amount of required bytes is determined and module specific
initialization function is called, we can move these calls from
individual modules to ordered_lsm_init().

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/lsm_hooks.h  |  5 -----
 security/apparmor/lsm.c    |  2 --
 security/security.c        | 27 +++++++++++----------------
 security/selinux/hooks.c   |  1 -
 security/smack/smack_lsm.c |  2 --
 security/tomoyo/tomoyo.c   |  1 -
 6 files changed, 11 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 195707210975..22fc786d723a 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2112,9 +2112,4 @@ static inline void security_delete_hooks(struct security_hook_list *hooks,
 
 extern int lsm_inode_alloc(struct inode *inode);
 
-#ifdef CONFIG_SECURITY
-void __init lsm_early_cred(struct cred *cred);
-void __init lsm_early_task(struct task_struct *task);
-#endif
-
 #endif /* ! __LINUX_LSM_HOOKS_H */
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index b6c395e2acd0..bb5a02d2439f 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1484,8 +1484,6 @@ static int __init set_init_ctx(void)
 {
 	struct cred *cred = (struct cred *)current->real_cred;
 
-	lsm_early_cred(cred);
-	lsm_early_task(current);
 	set_cred_label(cred, aa_get_label(ns_unconfined(root_ns)));
 
 	return 0;
diff --git a/security/security.c b/security/security.c
index a618e22df5c6..992b612c819a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -278,6 +278,9 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 	kfree(sep);
 }
 
+static void __init lsm_early_cred(struct cred *cred);
+static void __init lsm_early_task(struct task_struct *task);
+
 static void __init ordered_lsm_init(void)
 {
 	struct lsm_info **lsm;
@@ -312,6 +315,8 @@ static void __init ordered_lsm_init(void)
 						    blob_sizes.lbs_inode, 0,
 						    SLAB_PANIC, NULL);
 
+	lsm_early_cred((struct cred *) current->cred);
+	lsm_early_task(current);
 	for (lsm = ordered_lsms; *lsm; lsm++)
 		initialize_lsm(*lsm);
 
@@ -465,17 +470,12 @@ static int lsm_cred_alloc(struct cred *cred, gfp_t gfp)
  * lsm_early_cred - during initialization allocate a composite cred blob
  * @cred: the cred that needs a blob
  *
- * Allocate the cred blob for all the modules if it's not already there
+ * Allocate the cred blob for all the modules
  */
-void __init lsm_early_cred(struct cred *cred)
+static void __init lsm_early_cred(struct cred *cred)
 {
-	int rc;
+	int rc = lsm_cred_alloc(cred, GFP_KERNEL);
 
-	if (cred == NULL)
-		panic("%s: NULL cred.\n", __func__);
-	if (cred->security != NULL)
-		return;
-	rc = lsm_cred_alloc(cred, GFP_KERNEL);
 	if (rc)
 		panic("%s: Early cred alloc failed.\n", __func__);
 }
@@ -589,17 +589,12 @@ int lsm_msg_msg_alloc(struct msg_msg *mp)
  * lsm_early_task - during initialization allocate a composite task blob
  * @task: the task that needs a blob
  *
- * Allocate the task blob for all the modules if it's not already there
+ * Allocate the task blob for all the modules
  */
-void __init lsm_early_task(struct task_struct *task)
+static void __init lsm_early_task(struct task_struct *task)
 {
-	int rc;
+	int rc = lsm_task_alloc(task);
 
-	if (task == NULL)
-		panic("%s: task cred.\n", __func__);
-	if (task->security != NULL)
-		return;
-	rc = lsm_task_alloc(task);
 	if (rc)
 		panic("%s: Early task alloc failed.\n", __func__);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b2ee49f938f1..5d92167dbe05 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -207,7 +207,6 @@ static void cred_init_security(void)
 	struct cred *cred = (struct cred *) current->real_cred;
 	struct task_security_struct *tsec;
 
-	lsm_early_cred(cred);
 	tsec = selinux_cred(cred);
 	tsec->osid = tsec->sid = SECINITSID_KERNEL;
 }
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0b848b1f6366..79d6d2a6a0bc 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4671,8 +4671,6 @@ static __init int smack_init(void)
 	if (!smack_inode_cache)
 		return -ENOMEM;
 
-	lsm_early_cred(cred);
-
 	/*
 	 * Set the security state for the initial task.
 	 */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 066c0daf0efc..2b3eee06004b 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -566,7 +566,6 @@ static int __init tomoyo_init(void)
 	/* register ourselves with the security framework */
 	security_add_hooks(tomoyo_hooks, ARRAY_SIZE(tomoyo_hooks), "tomoyo");
 	printk(KERN_INFO "TOMOYO Linux initialized\n");
-	lsm_early_cred(cred);
 	blob = tomoyo_cred(cred);
 	*blob = &tomoyo_kernel_domain;
 	tomoyo_mm_init();
-- 
cgit v1.2.3


From 7527a7b157d1191b23562ed70154ae93bd65f845 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Thu, 17 Jan 2019 20:14:15 +0200
Subject: IB/core: Simplify rdma cgroup registration

RDMA cgroup registration routine always returns success, so simplify
function to be void and run clang formatter over whole CONFIG_CGROUP_RDMA
art of core_priv.h.

This reduces unwinding error path for regular registration and future net
namespace change functionality for rdma device.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cgroup.c    |  5 ++---
 drivers/infiniband/core/core_priv.h | 17 +++++++++++------
 drivers/infiniband/core/device.c    |  8 +-------
 include/linux/cgroup_rdma.h         |  2 +-
 kernel/cgroup/rdma.c                |  5 +----
 5 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
index 126ac5f99db7..388fd04e5f63 100644
--- a/drivers/infiniband/core/cgroup.c
+++ b/drivers/infiniband/core/cgroup.c
@@ -21,12 +21,11 @@
  * Register with the rdma cgroup. Should be called before
  * exposing rdma device to user space applications to avoid
  * resource accounting leak.
- * Returns 0 on success or otherwise failure code.
  */
-int ib_device_register_rdmacg(struct ib_device *device)
+void ib_device_register_rdmacg(struct ib_device *device)
 {
 	device->cg_device.name = device->name;
-	return rdmacg_register_device(&device->cg_device);
+	rdmacg_register_device(&device->cg_device);
 }
 
 /**
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index aca75c74e451..42a49982f66e 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -115,7 +115,7 @@ void ib_cache_cleanup_one(struct ib_device *device);
 void ib_cache_release_one(struct ib_device *device);
 
 #ifdef CONFIG_CGROUP_RDMA
-int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_register_rdmacg(struct ib_device *device);
 void ib_device_unregister_rdmacg(struct ib_device *device);
 
 int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
@@ -126,21 +126,26 @@ void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
 			struct ib_device *device,
 			enum rdmacg_resource_type resource_index);
 #else
-static inline int ib_device_register_rdmacg(struct ib_device *device)
-{ return 0; }
+static inline void ib_device_register_rdmacg(struct ib_device *device)
+{
+}
 
 static inline void ib_device_unregister_rdmacg(struct ib_device *device)
-{ }
+{
+}
 
 static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
 				       struct ib_device *device,
 				       enum rdmacg_resource_type resource_index)
-{ return 0; }
+{
+	return 0;
+}
 
 static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
 				      struct ib_device *device,
 				      enum rdmacg_resource_type resource_index)
-{ }
+{
+}
 #endif
 
 static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 4a9aa6d10c5e..200431c540f2 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -599,12 +599,7 @@ int ib_register_device(struct ib_device *device, const char *name)
 
 	device->index = __dev_new_index();
 
-	ret = ib_device_register_rdmacg(device);
-	if (ret) {
-		dev_warn(&device->dev,
-			 "Couldn't register device with rdma cgroup\n");
-		goto dev_cleanup;
-	}
+	ib_device_register_rdmacg(device);
 
 	ret = ib_device_register_sysfs(device);
 	if (ret) {
@@ -627,7 +622,6 @@ int ib_register_device(struct ib_device *device, const char *name)
 
 cg_cleanup:
 	ib_device_unregister_rdmacg(device);
-dev_cleanup:
 	cleanup_device(device);
 out:
 	mutex_unlock(&device_mutex);
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
index e94290b29e99..ef1bae2983f3 100644
--- a/include/linux/cgroup_rdma.h
+++ b/include/linux/cgroup_rdma.h
@@ -39,7 +39,7 @@ struct rdmacg_device {
  * APIs for RDMA/IB stack to publish when a device wants to
  * participate in resource accounting
  */
-int rdmacg_register_device(struct rdmacg_device *device);
+void rdmacg_register_device(struct rdmacg_device *device);
 void rdmacg_unregister_device(struct rdmacg_device *device);
 
 /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
index d3bbb757ee49..1d75ae7f1cb7 100644
--- a/kernel/cgroup/rdma.c
+++ b/kernel/cgroup/rdma.c
@@ -313,10 +313,8 @@ EXPORT_SYMBOL(rdmacg_try_charge);
  * If IB stack wish a device to participate in rdma cgroup resource
  * tracking, it must invoke this API to register with rdma cgroup before
  * any user space application can start using the RDMA resources.
- * Returns 0 on success or EINVAL when table length given is beyond
- * supported size.
  */
-int rdmacg_register_device(struct rdmacg_device *device)
+void rdmacg_register_device(struct rdmacg_device *device)
 {
 	INIT_LIST_HEAD(&device->dev_node);
 	INIT_LIST_HEAD(&device->rpools);
@@ -324,7 +322,6 @@ int rdmacg_register_device(struct rdmacg_device *device)
 	mutex_lock(&rdmacg_mutex);
 	list_add_tail(&device->dev_node, &rdmacg_devices);
 	mutex_unlock(&rdmacg_mutex);
-	return 0;
 }
 EXPORT_SYMBOL(rdmacg_register_device);
 
-- 
cgit v1.2.3


From e302c2a5fe0ca63b8fcc93389917625f486e0670 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 16 Jan 2019 19:47:57 +0100
Subject: net: phy: remove state PHY_CHANGELINK

Since recent changes to the phylib state machine state PHY_CHANGELINK
isn't used any longer. Therefore let's remove it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 2 --
 include/linux/phy.h   | 6 ------
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 241fb83ef4de..b0632e859564 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -47,7 +47,6 @@ static const char *phy_state_to_str(enum phy_state st)
 	PHY_STATE_STR(RUNNING)
 	PHY_STATE_STR(NOLINK)
 	PHY_STATE_STR(FORCING)
-	PHY_STATE_STR(CHANGELINK)
 	PHY_STATE_STR(HALTED)
 	PHY_STATE_STR(RESUMING)
 	}
@@ -939,7 +938,6 @@ void phy_state_machine(struct work_struct *work)
 		break;
 	case PHY_NOLINK:
 	case PHY_RUNNING:
-	case PHY_CHANGELINK:
 	case PHY_RESUMING:
 		err = phy_check_link_status(phydev);
 		break;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f1c19bf8c658..232d93b9cea4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -304,11 +304,6 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
  * - irq or timer will set NOLINK if link goes down
  * - phy_stop moves to HALTED
  *
- * CHANGELINK: PHY experienced a change in link state
- * - timer moves to RUNNING if link
- * - timer moves to NOLINK if the link is down
- * - phy_stop moves to HALTED
- *
  * HALTED: PHY is up, but no polling or interrupts are done. Or
  * PHY is in an error state.
  *
@@ -327,7 +322,6 @@ enum phy_state {
 	PHY_RUNNING,
 	PHY_NOLINK,
 	PHY_FORCING,
-	PHY_CHANGELINK,
 	PHY_RESUMING
 };
 
-- 
cgit v1.2.3


From 340a6f3d2d52a1b72f3454818e53293807f8f127 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Thu, 17 Jan 2019 18:03:14 +0800
Subject: tcp: declare tcp_mmap() only when CONFIG_MMU is set

Since tcp_mmap() is defined when CONFIG_MMU is set.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e0a65c067662..5c950180d61b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -406,8 +406,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		int flags, int *addr_len);
 int tcp_set_rcvlowat(struct sock *sk, int val);
 void tcp_data_ready(struct sock *sk);
+#ifdef CONFIG_MMU
 int tcp_mmap(struct file *file, struct socket *sock,
 	     struct vm_area_struct *vma);
+#endif
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
 		       int estab, struct tcp_fastopen_cookie *foc);
-- 
cgit v1.2.3


From bb658ab7b8f2828b35c207a95cb0c05965721022 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 17 Jan 2019 20:09:21 +0100
Subject: net: phy: remove phy_stop_interrupts

Interrupts have been disabled in phy_stop() already. So we can remove
phy_stop_interrupts() and free the interrupt in phy_disconnect()
directly.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 17 -----------------
 drivers/net/phy/phy_device.c |  4 ++--
 include/linux/phy.h          |  1 -
 3 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 37cf39fdcc91..f7a92e7edff7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -818,23 +818,6 @@ int phy_start_interrupts(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_start_interrupts);
 
-/**
- * phy_stop_interrupts - disable interrupts from a PHY device
- * @phydev: target phy_device struct
- */
-int phy_stop_interrupts(struct phy_device *phydev)
-{
-	int err = phy_disable_interrupts(phydev);
-
-	if (err)
-		phy_error(phydev);
-
-	free_irq(phydev->irq, phydev);
-
-	return err;
-}
-EXPORT_SYMBOL(phy_stop_interrupts);
-
 /**
  * phy_stop - Bring down the PHY link, and stop checking the status
  * @phydev: target phy_device struct
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e269a355012d..7b3164174251 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1002,8 +1002,8 @@ void phy_disconnect(struct phy_device *phydev)
 	if (phy_is_started(phydev))
 		phy_stop(phydev);
 
-	if (phydev->irq > 0)
-		phy_stop_interrupts(phydev);
+	if (phy_interrupt_is_valid(phydev))
+		free_irq(phydev->irq, phydev);
 
 	phydev->adjust_link = NULL;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 232d93b9cea4..0990f913d649 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -951,7 +951,6 @@ int phy_aneg_done(struct phy_device *phydev);
 int phy_speed_down(struct phy_device *phydev, bool sync);
 int phy_speed_up(struct phy_device *phydev);
 
-int phy_stop_interrupts(struct phy_device *phydev);
 int phy_restart_aneg(struct phy_device *phydev);
 int phy_reset_after_clk_enable(struct phy_device *phydev);
 
-- 
cgit v1.2.3


From f88c19aab5f34835f1ba467c5b508ec4f782f07f Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 17 Jan 2019 12:44:25 -0800
Subject: net_sched: add hit counter for matchall

Although matchall always matches packets, however, it still
relies on a protocol match first. So it is still useful to have
such a counter for matchall. Of course, unlike u32, every time
we hit a matchall filter, it is always a success, so we don't
have to distinguish them.

Sample output:

filter protocol 802.1Q pref 100 matchall chain 0
filter protocol 802.1Q pref 100 matchall chain 0 handle 0x1
  not_in_hw (rule hit 10)
	action order 1: vlan  pop continue
	 index 1 ref 1 bind 1 installed 40 sec used 1 sec
	Action statistics:
	Sent 836 bytes 10 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

Reported-by: Martin Olsson <martin.olsson+netdev@sentorsecurity.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  6 ++++++
 net/sched/cls_matchall.c     | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 95d0db2a8350..32a3416b51c3 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -527,11 +527,17 @@ enum {
 
 /* Match-all classifier */
 
+struct tc_matchall_pcnt {
+	__u64 rhit;
+};
+
 enum {
 	TCA_MATCHALL_UNSPEC,
 	TCA_MATCHALL_CLASSID,
 	TCA_MATCHALL_ACT,
 	TCA_MATCHALL_FLAGS,
+	TCA_MATCHALL_PCNT,
+	TCA_MATCHALL_PAD,
 	__TCA_MATCHALL_MAX,
 };
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 0e408ee9dcec..a1b803fd372e 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
@@ -22,6 +23,7 @@ struct cls_mall_head {
 	u32 handle;
 	u32 flags;
 	unsigned int in_hw_count;
+	struct tc_matchall_pcnt __percpu *pf;
 	struct rcu_work rwork;
 };
 
@@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		return -1;
 
 	*res = head->res;
+	__this_cpu_inc(head->pf->rhit);
 	return tcf_exts_exec(skb, &head->exts, res);
 }
 
@@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head)
 {
 	tcf_exts_destroy(&head->exts);
 	tcf_exts_put_net(&head->exts);
+	free_percpu(head->pf);
 	kfree(head);
 }
 
@@ -192,6 +196,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 		handle = 1;
 	new->handle = handle;
 	new->flags = flags;
+	new->pf = alloc_percpu(struct tc_matchall_pcnt);
+	if (!new->pf) {
+		err = -ENOMEM;
+		goto err_alloc_percpu;
+	}
 
 	err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
 			     extack);
@@ -214,6 +223,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 
 err_replace_hw_filter:
 err_set_parms:
+	free_percpu(new->pf);
+err_alloc_percpu:
 	tcf_exts_destroy(&new->exts);
 err_exts_init:
 	kfree(new);
@@ -270,8 +281,10 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		     struct sk_buff *skb, struct tcmsg *t)
 {
+	struct tc_matchall_pcnt gpf = {};
 	struct cls_mall_head *head = fh;
 	struct nlattr *nest;
+	int cpu;
 
 	if (!head)
 		return skb->len;
@@ -289,6 +302,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
 		goto nla_put_failure;
 
+	for_each_possible_cpu(cpu) {
+		struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
+
+		gpf.rhit += pf->rhit;
+	}
+
+	if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
+			  sizeof(struct tc_matchall_pcnt),
+			  &gpf, TCA_MATCHALL_PAD))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &head->exts))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From cb5ccfbe73b389470e1dc11061bb185ef4bc9aec Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:10 +0200
Subject: devlink: Add health buffer support

Devlink health buffer is a mechanism to pass descriptors between drivers
and devlink. The API allows the driver to add objects, object pair,
value array (nested attributes), value and name.

Driver can use this API to fill the buffers in a format which can be
translated by the devlink to the netlink message.

In order to fulfill it, an internal buffer descriptor is defined. This
will hold the data and metadata per each attribute and by used to pass
actual commands to the netlink.

This mechanism will be later used in devlink health for dump and diagnose
data store by the drivers.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  76 +++++++
 include/uapi/linux/devlink.h |   8 +
 net/core/devlink.c           | 501 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 585 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 67f4293bc970..77c77319290a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -423,6 +423,8 @@ struct devlink_region;
 
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
+struct devlink_health_buffer;
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -584,6 +586,22 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor);
 
+int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
+				     int attrtype);
+void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer);
+void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer);
+int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
+					  char *name);
+int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
+				       u8 value);
+int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
+					u32 value);
+int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
+					u64 value);
+int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
+					   char *name);
+int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
+					 void *data, int len);
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -844,6 +862,64 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 	return 0;
 }
 
+static inline int
+devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
+				 int attrtype)
+{
+	return 0;
+}
+
+static inline void
+devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
+{
+}
+
+static inline void
+devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
+{
+}
+
+static inline int
+devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
+				      char *name)
+{
+	return 0;
+}
+
+static inline int
+devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
+				   u8 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
+				    u32 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
+				    u64 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
+				       char *name)
+{
+	return 0;
+}
+
+static inline int
+devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
+				     void *data, int len)
+{
+	return 0;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6e52d3660654..cff0e0cb5ac2 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -285,6 +285,14 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
 	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
 
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT,		/* nested */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR,		/* nested */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME,		/* string */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE,	/* nested */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY,	/* nested */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,	/* u8 */
+	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,	/* dynamic */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index abb0da9d7b4b..8984501edade 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3597,6 +3597,507 @@ out:
 	return 0;
 }
 
+#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN)
+#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2)
+#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE)
+#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024
+
+struct devlink_health_buffer {
+	void *data;
+	u64 offset;
+	u64 bytes_left;
+	u64 bytes_left_metadata;
+	u64 max_nested_depth;
+	u64 curr_nest;
+};
+
+struct devlink_health_buffer_desc {
+	int attrtype;
+	u16 len;
+	u8 nla_type;
+	u8 nest_end;
+	int value[0];
+};
+
+static void
+devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list,
+			     u64 num_of_buffers)
+{
+	u64 i;
+
+	for (i = 0; i < num_of_buffers; i++) {
+		memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE);
+		buffers_list[i]->offset = 0;
+		buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE;
+		buffers_list[i]->bytes_left_metadata =
+			DEVLINK_HEALTH_BUFFER_DATA_SIZE;
+		buffers_list[i]->max_nested_depth = 0;
+		buffers_list[i]->curr_nest = 0;
+	}
+}
+
+static void
+devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
+			       u64 size);
+
+static struct devlink_health_buffer **
+devlink_health_buffers_create(u64 size)
+{
+	struct devlink_health_buffer **buffers_list;
+	u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size);
+	u64 i;
+
+	buffers_list = kcalloc(num_of_buffers,
+			       sizeof(struct devlink_health_buffer *),
+			       GFP_KERNEL);
+	if (!buffers_list)
+		return NULL;
+
+	for (i = 0; i < num_of_buffers; i++) {
+		struct devlink_health_buffer *buffer;
+		void *data;
+
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL);
+		if (!buffer || !data) {
+			kfree(buffer);
+			kfree(data);
+			goto buffers_cleanup;
+		}
+		buffers_list[i] = buffer;
+		buffer->data = data;
+	}
+	devlink_health_buffers_reset(buffers_list, num_of_buffers);
+
+	return buffers_list;
+
+buffers_cleanup:
+	devlink_health_buffers_destroy(buffers_list, --i);
+	kfree(buffers_list);
+	return NULL;
+}
+
+static void
+devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
+			       u64 num_of_buffers)
+{
+	u64 i;
+
+	for (i = 0; i < num_of_buffers; i++) {
+		kfree(buffers_list[i]->data);
+		kfree(buffers_list[i]);
+	}
+}
+
+void
+devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer,
+				 int len)
+{
+	buffer->offset += len;
+}
+
+/* In order to store a nest, need two descriptors, for start and end */
+#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2)
+
+int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer,
+				     int len, int metadata_len)
+{
+	if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE)
+		return -EINVAL;
+
+	if (buffer->bytes_left < len ||
+	    buffer->bytes_left_metadata < metadata_len)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static struct devlink_health_buffer_desc *
+devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer)
+{
+	return buffer->data + buffer->offset;
+}
+
+int
+devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
+				 int attrtype)
+{
+	struct devlink_health_buffer_desc *desc;
+	int err;
+
+	err = devlink_health_buffer_verify_len(buffer, 0,
+					       DEVLINK_HEALTH_BUFFER_NEST_SIZE);
+	if (err)
+		return err;
+
+	if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT &&
+	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR &&
+	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE &&
+	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY)
+		return -EINVAL;
+
+	desc = devlink_health_buffer_get_desc_from_offset(buffer);
+
+	desc->attrtype = attrtype;
+	buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE;
+	devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
+
+	buffer->curr_nest++;
+	buffer->max_nested_depth = max(buffer->max_nested_depth,
+				       buffer->curr_nest);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start);
+
+enum devlink_health_buffer_nest_end_cancel {
+	DEVLINK_HEALTH_BUFFER_NEST_END = 1,
+	DEVLINK_HEALTH_BUFFER_NEST_CANCEL,
+};
+
+static void
+devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer,
+				      enum devlink_health_buffer_nest_end_cancel nest)
+{
+	struct devlink_health_buffer_desc *desc;
+
+	WARN_ON(!buffer->curr_nest);
+	buffer->curr_nest--;
+
+	desc = devlink_health_buffer_get_desc_from_offset(buffer);
+	desc->nest_end = nest;
+	devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
+}
+
+void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
+{
+	devlink_health_buffer_nest_end_cancel(buffer,
+					      DEVLINK_HEALTH_BUFFER_NEST_END);
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end);
+
+void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
+{
+	devlink_health_buffer_nest_end_cancel(buffer,
+					      DEVLINK_HEALTH_BUFFER_NEST_CANCEL);
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel);
+
+int
+devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
+				      char *name)
+{
+	struct devlink_health_buffer_desc *desc;
+	int err;
+
+	err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1,
+					       sizeof(*desc));
+	if (err)
+		return err;
+
+	desc = devlink_health_buffer_get_desc_from_offset(buffer);
+	desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME;
+	desc->nla_type = NLA_NUL_STRING;
+	desc->len = strlen(name) + 1;
+	memcpy(&desc->value, name, desc->len);
+	devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
+
+	buffer->bytes_left_metadata -= sizeof(*desc);
+	buffer->bytes_left -= (strlen(name) + 1);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name);
+
+static int
+devlink_health_buffer_put_value(struct devlink_health_buffer *buffer,
+				u8 nla_type, void *value, int len)
+{
+	struct devlink_health_buffer_desc *desc;
+	int err;
+
+	err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc));
+	if (err)
+		return err;
+
+	desc = devlink_health_buffer_get_desc_from_offset(buffer);
+	desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA;
+	desc->nla_type = nla_type;
+	desc->len = len;
+	memcpy(&desc->value, value, len);
+	devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
+
+	buffer->bytes_left_metadata -= sizeof(*desc);
+	buffer->bytes_left -= len;
+
+	return 0;
+}
+
+int
+devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
+				   u8 value)
+{
+	int err;
+
+	err = devlink_health_buffer_put_value(buffer, NLA_U8, &value,
+					      sizeof(value));
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8);
+
+int
+devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
+				    u32 value)
+{
+	int err;
+
+	err = devlink_health_buffer_put_value(buffer, NLA_U32, &value,
+					      sizeof(value));
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32);
+
+int
+devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
+				    u64 value)
+{
+	int err;
+
+	err = devlink_health_buffer_put_value(buffer, NLA_U64, &value,
+					      sizeof(value));
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64);
+
+int
+devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
+				       char *name)
+{
+	int err;
+
+	if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
+		return -EINVAL;
+
+	err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name,
+					      strlen(name) + 1);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string);
+
+int
+devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
+				     void *data, int len)
+{
+	int err;
+
+	if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
+		return -EINVAL;
+
+	err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data);
+
+static int
+devlink_health_buffer_fill_data(struct sk_buff *skb,
+				struct devlink_health_buffer_desc *desc)
+{
+	int err = -EINVAL;
+
+	switch (desc->nla_type) {
+	case NLA_U8:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
+				 *(u8 *)desc->value);
+		break;
+	case NLA_U32:
+		err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
+				  *(u32 *)desc->value);
+		break;
+	case NLA_U64:
+		err = nla_put_u64_64bit(skb,
+					DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
+					*(u64 *)desc->value, DEVLINK_ATTR_PAD);
+		break;
+	case NLA_NUL_STRING:
+		err = nla_put_string(skb,
+				     DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
+				     (char *)&desc->value);
+		break;
+	case NLA_BINARY:
+		err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
+			      desc->len, (void *)&desc->value);
+		break;
+	}
+
+	return err;
+}
+
+static int
+devlink_health_buffer_fill_type(struct sk_buff *skb,
+				struct devlink_health_buffer_desc *desc)
+{
+	int err = -EINVAL;
+
+	switch (desc->nla_type) {
+	case NLA_U8:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
+				 NLA_U8);
+		break;
+	case NLA_U32:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
+				 NLA_U32);
+		break;
+	case NLA_U64:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
+				 NLA_U64);
+		break;
+	case NLA_NUL_STRING:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
+				 NLA_NUL_STRING);
+		break;
+	case NLA_BINARY:
+		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
+				 NLA_BINARY);
+		break;
+	}
+
+	return err;
+}
+
+static inline struct devlink_health_buffer_desc *
+devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc)
+{
+	return (void *)&desc->value + desc->len;
+}
+
+static int
+devlink_health_buffer_prepare_skb(struct sk_buff *skb,
+				  struct devlink_health_buffer *buffer)
+{
+	struct devlink_health_buffer_desc *last_desc, *desc;
+	struct nlattr **buffer_nlattr;
+	int err;
+	int i = 0;
+
+	buffer_nlattr = kcalloc(buffer->max_nested_depth,
+				sizeof(*buffer_nlattr), GFP_KERNEL);
+	if (!buffer_nlattr)
+		return -EINVAL;
+
+	last_desc = devlink_health_buffer_get_desc_from_offset(buffer);
+	desc = buffer->data;
+	while (desc != last_desc) {
+		switch (desc->attrtype) {
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT:
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR:
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE:
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY:
+			buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype);
+			if (!buffer_nlattr[i])
+				goto nla_put_failure;
+			i++;
+			break;
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA:
+			err = devlink_health_buffer_fill_data(skb, desc);
+			if (err)
+				goto nla_put_failure;
+			err = devlink_health_buffer_fill_type(skb, desc);
+			if (err)
+				goto nla_put_failure;
+			break;
+		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME:
+			err = nla_put_string(skb, desc->attrtype,
+					     (char *)&desc->value);
+			if (err)
+				goto nla_put_failure;
+			break;
+		default:
+			WARN_ON(!desc->nest_end);
+			WARN_ON(i <= 0);
+			if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END)
+				nla_nest_end(skb, buffer_nlattr[--i]);
+			else
+				nla_nest_cancel(skb, buffer_nlattr[--i]);
+			break;
+		}
+		desc = devlink_health_buffer_get_next_desc(desc);
+	}
+
+	return 0;
+
+nla_put_failure:
+	kfree(buffer_nlattr);
+	return err;
+}
+
+static int
+devlink_health_buffer_snd(struct genl_info *info,
+			  enum devlink_command cmd, int flags,
+			  struct devlink_health_buffer **buffers_array,
+			  u64 num_of_buffers)
+{
+	struct sk_buff *skb;
+	struct nlmsghdr *nlh;
+	void *hdr;
+	int err;
+	u64 i;
+
+	for (i = 0; i < num_of_buffers; i++) {
+		/* Skip buffer if driver did not fill it up with any data */
+		if (!buffers_array[i]->offset)
+			continue;
+
+		skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+
+		hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+				  &devlink_nl_family, NLM_F_MULTI, cmd);
+		if (!hdr)
+			goto nla_put_failure;
+
+		err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]);
+		if (err)
+			goto nla_put_failure;
+
+		genlmsg_end(skb, hdr);
+		err = genlmsg_reply(skb, info);
+		if (err)
+			return err;
+	}
+
+	skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	err = genlmsg_reply(skb, info);
+	if (err)
+		return err;
+
+	return 0;
+
+nla_put_failure:
+	err = -EIO;
+	nlmsg_free(skb);
+	return err;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
-- 
cgit v1.2.3


From 880ee82f0313453ec5a6cb122866ac057263066b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:11 +0200
Subject: devlink: Add health reporter create/destroy functionality

Devlink health reporter is an instance for reporting, diagnosing and
recovering from run time errors discovered by the reporters.
Define it's data structure and supported operations.
In addition, expose devlink API to create and destroy a reporter.
Each devlink instance will hold it's own reporters list.

As part of the allocation, driver shall provide a set of callbacks which
will be used the devlink in order to handle health reports and user
commands related to this reporter. In addition, driver is entitled to
provide some priv pointer, which can be fetched from the reporter by
devlink_health_reporter_priv function.

For each reporter, devlink will hold a metadata of statistics,
buffers and status.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h |  59 +++++++++++++++++++++++
 net/core/devlink.c    | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 77c77319290a..7fe30d67678a 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -30,6 +30,7 @@ struct devlink {
 	struct list_head param_list;
 	struct list_head region_list;
 	u32 snapshot_id;
+	struct list_head reporter_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -424,6 +425,34 @@ struct devlink_region;
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
 struct devlink_health_buffer;
+struct devlink_health_reporter;
+
+/**
+ * struct devlink_health_reporter_ops - Reporter operations
+ * @name: reporter name
+ * dump_size: dump buffer size allocated by the devlink
+ * diagnose_size: diagnose buffer size allocated by the devlink
+ * recover: callback to recover from reported error
+ *          if priv_ctx is NULL, run a full recover
+ * dump: callback to dump an object
+ *       if priv_ctx is NULL, run a full dump
+ * diagnose: callback to diagnose the current status
+ */
+
+struct devlink_health_reporter_ops {
+	char *name;
+	unsigned int dump_size;
+	unsigned int diagnose_size;
+	int (*recover)(struct devlink_health_reporter *reporter,
+		       void *priv_ctx);
+	int (*dump)(struct devlink_health_reporter *reporter,
+		    struct devlink_health_buffer **buffers_array,
+		    unsigned int buffer_size, unsigned int num_buffers,
+		    void *priv_ctx);
+	int (*diagnose)(struct devlink_health_reporter *reporter,
+			struct devlink_health_buffer **buffers_array,
+			unsigned int buffer_size, unsigned int num_buffers);
+};
 
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
@@ -602,6 +631,16 @@ int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
 					   char *name);
 int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
 					 void *data, int len);
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv);
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -920,6 +959,26 @@ devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
 {
 	return 0;
 }
+
+static inline struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv)
+{
+	return NULL;
+}
+
+static inline void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+}
+
+static inline void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+	return NULL;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 8984501edade..fec169a28dba 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4098,6 +4098,132 @@ nla_put_failure:
 	return err;
 }
 
+struct devlink_health_reporter {
+	struct list_head list;
+	struct devlink_health_buffer **dump_buffers_array;
+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+	struct devlink_health_buffer **diagnose_buffers_array;
+	struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
+	void *priv;
+	const struct devlink_health_reporter_ops *ops;
+	struct devlink *devlink;
+	u64 graceful_period;
+	bool auto_recover;
+	u8 health_state;
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+	return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+				     const char *reporter_name)
+{
+	struct devlink_health_reporter *reporter;
+
+	list_for_each_entry(reporter, &devlink->reporter_list, list)
+		if (!strcmp(reporter->ops->name, reporter_name))
+			return reporter;
+	return NULL;
+}
+
+/**
+ *	devlink_health_reporter_create - create devlink health reporter
+ *
+ *	@devlink: devlink
+ *	@ops: ops
+ *	@graceful_period: to avoid recovery loops, in msecs
+ *	@auto_recover: auto recover when error occurs
+ *	@priv: priv
+ */
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv)
+{
+	struct devlink_health_reporter *reporter;
+
+	mutex_lock(&devlink->lock);
+	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
+		reporter = ERR_PTR(-EEXIST);
+		goto unlock;
+	}
+
+	if (WARN_ON(ops->dump && !ops->dump_size) ||
+	    WARN_ON(ops->diagnose && !ops->diagnose_size) ||
+	    WARN_ON(auto_recover && !ops->recover) ||
+	    WARN_ON(graceful_period && !ops->recover)) {
+		reporter = ERR_PTR(-EINVAL);
+		goto unlock;
+	}
+
+	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+	if (!reporter) {
+		reporter = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	if (ops->dump) {
+		reporter->dump_buffers_array =
+			devlink_health_buffers_create(ops->dump_size);
+		if (!reporter->dump_buffers_array) {
+			kfree(reporter);
+			reporter = ERR_PTR(-ENOMEM);
+			goto unlock;
+		}
+	}
+
+	if (ops->diagnose) {
+		reporter->diagnose_buffers_array =
+			devlink_health_buffers_create(ops->diagnose_size);
+		if (!reporter->diagnose_buffers_array) {
+			devlink_health_buffers_destroy(reporter->dump_buffers_array,
+						       DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
+			kfree(reporter);
+			reporter = ERR_PTR(-ENOMEM);
+			goto unlock;
+		}
+	}
+
+	list_add_tail(&reporter->list, &devlink->reporter_list);
+	mutex_init(&reporter->dump_lock);
+	mutex_init(&reporter->diagnose_lock);
+
+	reporter->priv = priv;
+	reporter->ops = ops;
+	reporter->devlink = devlink;
+	reporter->graceful_period = graceful_period;
+	reporter->auto_recover = auto_recover;
+unlock:
+	mutex_unlock(&devlink->lock);
+	return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+/**
+ *	devlink_health_reporter_destroy - destroy devlink health reporter
+ *
+ *	@reporter: devlink health reporter to destroy
+ */
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+	mutex_lock(&reporter->devlink->lock);
+	list_del(&reporter->list);
+	devlink_health_buffers_destroy(reporter->dump_buffers_array,
+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
+	devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
+				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
+	kfree(reporter);
+	mutex_unlock(&reporter->devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4383,6 +4509,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
+	INIT_LIST_HEAD(&devlink->reporter_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
-- 
cgit v1.2.3


From c7af343b4e33578b7de91786a3f639c8cfa0d97b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:12 +0200
Subject: devlink: Add health report functionality

Upon error discover, every driver can report it to the devlink health
mechanism via devlink_health_report function, using the appropriate
reporter registered to it. Driver can pass error specific context which
will be delivered to it as part of the dump / recovery callbacks.

Once an error is reported, devlink health will do the following actions:
* A log is being send to the kernel trace events buffer
* Health status and statistics are being updated for the reporter instance
* Object dump is being taken and stored at the reporter instance (as long
  as there is no other dump which is already stored)
* Auto recovery attempt is being done. depends on:
  - Auto Recovery configuration
  - Grace period vs. time since last recover

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h          |  9 ++++
 include/trace/events/devlink.h | 62 ++++++++++++++++++++++++++++
 net/core/devlink.c             | 93 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 164 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7fe30d67678a..a81a1b7a67d7 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -641,6 +641,8 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx);
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -979,6 +981,13 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
 {
 	return NULL;
 }
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+		      const char *msg, void *priv_ctx)
+{
+	return 0;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 44acfbca1266..7e39d2fc7c75 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,6 +46,65 @@ TRACE_EVENT(devlink_hwmsg,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
+TRACE_EVENT(devlink_health_report,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 const char *msg),
+
+	TP_ARGS(devlink, reporter_name, msg),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, msg)
+		__string(msg, msg)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__assign_str(msg, msg);
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __get_str(msg))
+);
+
+TRACE_EVENT(devlink_health_recover_aborted,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 bool health_state, u64 time_since_last_recover),
+
+	TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, reporter_name)
+		__field(bool, health_state)
+		__field(u64, time_since_last_recover)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__entry->health_state = health_state;
+		__entry->time_since_last_recover = time_since_last_recover;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __entry->health_state,
+		  __entry->time_since_last_recover)
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
@@ -64,6 +123,9 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
 {
 }
 
+static inline void trace_devlink_health(const char *msg)
+{
+}
 #endif /* _TRACE_DEVLINK_H */
 
 #endif
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fec169a28dba..943d3e7dea6a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4110,6 +4110,16 @@ struct devlink_health_reporter {
 	u64 graceful_period;
 	bool auto_recover;
 	u8 health_state;
+	u8 dump_avail;
+	u64 dump_ts;
+	u64 error_count;
+	u64 recovery_count;
+	u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
 };
 
 void *
@@ -4224,6 +4234,89 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	return 0;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+				  void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->dump)
+		return 0;
+
+	if (reporter->dump_avail)
+		return 0;
+
+	devlink_health_buffers_reset(reporter->dump_buffers_array,
+				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
+	err = reporter->ops->dump(reporter, reporter->dump_buffers_array,
+				     DEVLINK_HEALTH_BUFFER_SIZE,
+				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size),
+				     priv_ctx);
+	if (!err) {
+		reporter->dump_avail = true;
+		reporter->dump_ts = jiffies;
+	}
+
+	return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx)
+{
+	struct devlink *devlink = reporter->devlink;
+	int err = 0;
+
+	/* write a log message of the current error */
+	WARN_ON(!msg);
+	trace_devlink_health_report(devlink, reporter->ops->name, msg);
+	reporter->error_count++;
+
+	/* abort if the previous error wasn't recovered */
+	if (reporter->auto_recover &&
+	    (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+	     jiffies - reporter->last_recovery_ts <
+	     msecs_to_jiffies(reporter->graceful_period))) {
+		trace_devlink_health_recover_aborted(devlink,
+						     reporter->ops->name,
+						     reporter->health_state,
+						     jiffies -
+						     reporter->last_recovery_ts);
+		return -ECANCELED;
+	}
+
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+	mutex_lock(&reporter->dump_lock);
+	/* store current dump of current error, for later analysis */
+	devlink_health_do_dump(reporter, priv_ctx);
+	mutex_unlock(&reporter->dump_lock);
+
+	if (reporter->auto_recover)
+		err = devlink_health_reporter_recover(reporter, priv_ctx);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
-- 
cgit v1.2.3


From ff253fedab961b22117a73ab808fcfa9e6852b50 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:13 +0200
Subject: devlink: Add health get command

Add devlink health get command to provide reporter/s data for user space.
Add the ability to get data per reporter or dump data from all available
reporters.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  12 ++++
 net/core/devlink.c           | 152 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index cff0e0cb5ac2..c05470578b99 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -89,6 +89,8 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_DEL,
 	DEVLINK_CMD_REGION_READ,
 
+	DEVLINK_CMD_HEALTH_REPORTER_GET,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -293,6 +295,16 @@ enum devlink_attr {
 	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,	/* u8 */
 	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,	/* dynamic */
 
+	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
+	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
+	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
+	DEVLINK_ATTR_HEALTH_REPORTER_ERR,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,	/* u8 */
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 943d3e7dea6a..2ba9275449c2 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4317,6 +4317,149 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
 }
 EXPORT_SYMBOL_GPL(devlink_health_report);
 
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	char *reporter_name;
+
+	if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+		return NULL;
+
+	reporter_name =
+		nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+	return devlink_health_reporter_find_by_name(devlink, reporter_name);
+}
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+				struct devlink *devlink,
+				struct devlink_health_reporter *reporter,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags)
+{
+	struct nlattr *reporter_attr;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+
+	reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
+	if (!reporter_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+			   reporter->ops->name))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+		       reporter->health_state))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
+			      reporter->error_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
+			      reporter->recovery_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+			      reporter->graceful_period,
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+		       reporter->auto_recover))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,
+		       reporter->dump_avail))
+		goto reporter_nest_cancel;
+	if (reporter->dump_avail &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+			      jiffies_to_msecs(reporter->dump_ts),
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+
+	nla_nest_end(msg, reporter_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+reporter_nest_cancel:
+	nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+						   struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	struct sk_buff *msg;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+					      DEVLINK_CMD_HEALTH_REPORTER_GET,
+					      info->snd_portid, info->snd_seq,
+					      0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(reporter, &devlink->reporter_list,
+				    list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_health_reporter_fill(msg, devlink,
+							      reporter,
+							      DEVLINK_CMD_HEALTH_REPORTER_GET,
+							      NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4342,6 +4485,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4562,6 +4706,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+		.doit = devlink_nl_cmd_health_reporter_get_doit,
+		.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 6f9d56132eb6d2603d4273cfc65bed914ec47acb Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:14 +0200
Subject: devlink: Add health set command

Add devlink health set command, in order to set configuration parameters
for a specific reporter.
Supported parameters are:
- graceful_period: Time interval between auto recoveries (in msec)
- auto_recover: Determines if the devlink shall execute recover upon
		receiving error for the reporter

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index c05470578b99..49ad5a76b121 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -90,6 +90,7 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_READ,
 
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_SET,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2ba9275449c2..a34414bf1c27 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4460,6 +4460,33 @@ out:
 	return msg->len;
 }
 
+static int
+devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->recover &&
+	    (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+	     info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+		return -EINVAL;
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+		reporter->graceful_period =
+			nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+		reporter->auto_recover =
+			nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4486,6 +4513,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4714,6 +4743,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+		.doit = devlink_nl_cmd_health_reporter_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From fcd852c69d776c0f46c8f79e8e431e5cc6ddc7b7 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:15 +0200
Subject: devlink: Add health recover command

Add devlink health recover command to the uapi, in order to allow the user
to execute a recover operation over a specific reporter.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 49ad5a76b121..1c186fd77343 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -91,6 +91,7 @@ enum devlink_command {
 
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
+	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a34414bf1c27..b224d0d31c0c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4487,6 +4487,19 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 	return 0;
 }
 
+static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+						       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	return devlink_health_reporter_recover(reporter, NULL);
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4750,6 +4763,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+		.doit = devlink_nl_cmd_health_reporter_recover_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 8a66704a13d9713593342e29b4f0c19762f5746b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:16 +0200
Subject: devlink: Add health diagnose command

Add devlink health diagnose command, in order to run a diagnose
operation over a specific reporter.

It is expected from driver's callback for diagnose command to fill it
via the buffer descriptors API. Devlink will parse it and convert it to
netlink nla API in order to pass it to the user.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 51 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 1c186fd77343..51b4d7612cf8 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -92,6 +92,7 @@ enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
 	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index b224d0d31c0c..57252ca31e1e 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4500,6 +4500,50 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
 	return devlink_health_reporter_recover(reporter, NULL);
 }
 
+static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+							struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	u64 num_of_buffers;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->diagnose)
+		return -EOPNOTSUPP;
+
+	num_of_buffers =
+		DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size);
+
+	mutex_lock(&reporter->diagnose_lock);
+	devlink_health_buffers_reset(reporter->diagnose_buffers_array,
+				     num_of_buffers);
+
+	err = reporter->ops->diagnose(reporter,
+				      reporter->diagnose_buffers_array,
+				      DEVLINK_HEALTH_BUFFER_SIZE,
+				      num_of_buffers);
+	if (err)
+		goto out;
+
+	err = devlink_health_buffer_snd(info,
+					DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+					0, reporter->diagnose_buffers_array,
+					num_of_buffers);
+	if (err)
+		goto out;
+
+	mutex_unlock(&reporter->diagnose_lock);
+	return 0;
+
+out:
+	mutex_unlock(&reporter->diagnose_lock);
+	return err;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4770,6 +4814,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 12bd0dcefe88782ac1c9fff632958dd1b71d27e5 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 17 Jan 2019 23:59:17 +0200
Subject: devlink: Add health dump {get,clear} commands

Add devlink health dump commands, in order to run an dump operation
over a specific reporter.

The supported operations are dump_get in order to get last saved
dump (if not exist, dump now) and dump_clear to clear last saved
dump.

It is expected from driver's callback for diagnose command to fill it
via the buffer descriptors API. Devlink will parse it and convert it to
netlink nla API in order to pass it to the user.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 75 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 51b4d7612cf8..6b26bb2ce4dc 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -93,6 +93,8 @@ enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
 	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
 	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 57252ca31e1e..60248a53c0ad 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4544,6 +4544,65 @@ out:
 	return err;
 }
 
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+	reporter->dump_avail = false;
+	reporter->dump_ts = 0;
+	devlink_health_buffers_reset(reporter->dump_buffers_array,
+				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
+}
+
+static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
+							struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	u64 num_of_buffers;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->dump)
+		return -EOPNOTSUPP;
+
+	num_of_buffers =
+		DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size);
+
+	mutex_lock(&reporter->dump_lock);
+	err = devlink_health_do_dump(reporter, NULL);
+	if (err)
+		goto out;
+
+	err = devlink_health_buffer_snd(info,
+					DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+					0, reporter->dump_buffers_array,
+					num_of_buffers);
+
+out:
+	mutex_unlock(&reporter->dump_lock);
+	return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+					       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	mutex_lock(&reporter->dump_lock);
+	devlink_health_dump_clear(reporter);
+	mutex_unlock(&reporter->dump_lock);
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4821,6 +4880,22 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+		.doit = devlink_nl_cmd_health_reporter_dump_get_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 1866760096bf40bcf6977a9076b3026598bc12ee Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Tue, 18 Dec 2018 17:02:06 -0800
Subject: mac80211: Add TXQ scheduling API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds an API to mac80211 to handle scheduling of TXQs. The interface
between driver and mac80211 for TXQ handling is changed by adding two new
functions: ieee80211_next_txq(), which will return the next TXQ to schedule
in the current round-robin rotation, and ieee80211_return_txq(), which the
driver uses to indicate that it has finished scheduling a TXQ (which will
then be put back in the scheduling rotation if it isn't empty).

The driver must call ieee80211_txq_schedule_start() at the start of each
scheduling session, and ieee80211_txq_schedule_end() at the end. The API
then guarantees that the same TXQ is not returned twice in the same
session (so a driver can loop on ieee80211_next_txq() without worrying
about breaking the loop.

Usage of the new API is optional, so drivers can be ported one at a time.
In this patch, the actual scheduling performed by mac80211 is simple
round-robin, but a subsequent commit adds airtime fairness awareness to the
scheduler.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
[minor kernel-doc fix, propagate sparse locking checks out]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     | 63 +++++++++++++++++++++++++++++++++++++++++++---
 net/mac80211/agg-tx.c      |  2 +-
 net/mac80211/driver-ops.h  |  9 +++++++
 net/mac80211/ieee80211_i.h |  9 +++++++
 net/mac80211/main.c        |  5 ++++
 net/mac80211/sta_info.c    |  2 +-
 net/mac80211/tx.c          | 60 ++++++++++++++++++++++++++++++++++++++++++-
 7 files changed, 143 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 88219cc137c3..e8a057f071c4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -108,9 +108,15 @@
  * The driver is expected to initialize its private per-queue data for stations
  * and interfaces in the .add_interface and .sta_add ops.
  *
- * The driver can't access the queue directly. To dequeue a frame, it calls
- * ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a queue, it
- * calls the .wake_tx_queue driver op.
+ * The driver can't access the queue directly. To dequeue a frame from a
+ * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a
+ * queue, it calls the .wake_tx_queue driver op.
+ *
+ * Drivers can optionally delegate responsibility for scheduling queues to
+ * mac80211, to take advantage of airtime fairness accounting. In this case, to
+ * obtain the next queue to pull frames from, the driver calls
+ * ieee80211_next_txq(). The driver is then expected to return the txq using
+ * ieee80211_return_txq().
  *
  * For AP powersave TIM handling, the driver only needs to indicate if it has
  * buffered packets in the driver specific data structures by calling
@@ -6103,7 +6109,8 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
  * ieee80211_tx_dequeue - dequeue a packet from a software tx queue
  *
  * @hw: pointer as obtained from ieee80211_alloc_hw()
- * @txq: pointer obtained from station or virtual interface
+ * @txq: pointer obtained from station or virtual interface, or from
+ *	ieee80211_next_txq()
  *
  * Returns the skb if successful, %NULL if no frame was available.
  *
@@ -6118,6 +6125,54 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq);
 
+/**
+ * ieee80211_next_txq - get next tx queue to pull packets from
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @ac: AC number to return packets from.
+ *
+ * Should only be called between calls to ieee80211_txq_schedule_start()
+ * and ieee80211_txq_schedule_end().
+ * Returns the next txq if successful, %NULL if no queue is eligible. If a txq
+ * is returned, it should be returned with ieee80211_return_txq() after the
+ * driver has finished scheduling it.
+ */
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac);
+
+/**
+ * ieee80211_return_txq - return a TXQ previously acquired by ieee80211_next_txq()
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface
+ *
+ * Should only be called between calls to ieee80211_txq_schedule_start()
+ * and ieee80211_txq_schedule_end().
+ */
+void ieee80211_return_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq);
+
+/**
+ * ieee80211_txq_schedule_start - acquire locks for safe scheduling of an AC
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @ac: AC number to acquire locks for
+ *
+ * Acquire locks needed to schedule TXQs from the given AC. Should be called
+ * before ieee80211_next_txq() or ieee80211_return_txq().
+ */
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
+	__acquires(txq_lock);
+
+/**
+ * ieee80211_txq_schedule_end - release locks for safe scheduling of an AC
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @ac: AC number to acquire locks for
+ *
+ * Release locks previously acquired by ieee80211_txq_schedule_end().
+ */
+void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+	__releases(txq_lock);
+
 /**
  * ieee80211_txq_get_depth - get pending frame/byte count of given txq
  *
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 69e831bc317b..e94b1a0407af 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -229,7 +229,7 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
 	clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
 	local_bh_disable();
 	rcu_read_lock();
-	drv_wake_tx_queue(sta->sdata->local, txqi);
+	schedule_and_wake_txq(sta->sdata->local, txqi);
 	rcu_read_unlock();
 	local_bh_enable();
 }
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 3e0d5922a440..1aab1734b26f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1173,6 +1173,15 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
 	local->ops->wake_tx_queue(&local->hw, &txq->txq);
 }
 
+static inline void schedule_and_wake_txq(struct ieee80211_local *local,
+					 struct txq_info *txqi)
+{
+	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	ieee80211_return_txq(&local->hw, &txqi->txq);
+	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	drv_wake_tx_queue(local, txqi);
+}
+
 static inline int drv_can_aggregate_in_amsdu(struct ieee80211_local *local,
 					     struct sk_buff *head,
 					     struct sk_buff *skb)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7dfb4e2f98b2..d1db27b1e989 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -831,6 +831,8 @@ enum txq_info_flags {
  *	a fq_flow which is already owned by a different tin
  * @def_cvars: codel vars for @def_flow
  * @frags: used to keep fragments created after dequeue
+ * @schedule_order: used with ieee80211_local->active_txqs
+ * @schedule_round: counter to prevent infinite loops on TXQ scheduling
  */
 struct txq_info {
 	struct fq_tin tin;
@@ -838,6 +840,8 @@ struct txq_info {
 	struct codel_vars def_cvars;
 	struct codel_stats cstats;
 	struct sk_buff_head frags;
+	struct list_head schedule_order;
+	u16 schedule_round;
 	unsigned long flags;
 
 	/* keep last! */
@@ -1129,6 +1133,11 @@ struct ieee80211_local {
 	struct codel_vars *cvars;
 	struct codel_params cparams;
 
+	/* protects active_txqs and txqi->schedule_order */
+	spinlock_t active_txq_lock[IEEE80211_NUM_ACS];
+	struct list_head active_txqs[IEEE80211_NUM_ACS];
+	u16 schedule_round[IEEE80211_NUM_ACS];
+
 	const struct ieee80211_ops *ops;
 
 	/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 87a729926734..9b9d6cadf56e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -663,6 +663,11 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 	spin_lock_init(&local->rx_path_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
+	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+		INIT_LIST_HEAD(&local->active_txqs[i]);
+		spin_lock_init(&local->active_txq_lock[i]);
+	}
+
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index c4a8f115ed33..83e1c316a29e 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1249,7 +1249,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
 		if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i]))
 			continue;
 
-		drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
+		schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i]));
 	}
 
 	skb_queue_head_init(&pending);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f170d6c6629a..544da6411620 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1449,6 +1449,7 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
 	codel_vars_init(&txqi->def_cvars);
 	codel_stats_init(&txqi->cstats);
 	__skb_queue_head_init(&txqi->frags);
+	INIT_LIST_HEAD(&txqi->schedule_order);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1489,6 +1490,9 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
 
 	fq_tin_reset(fq, tin, fq_skb_free_func);
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
+	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	list_del_init(&txqi->schedule_order);
+	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
 }
 
 void ieee80211_txq_set_params(struct ieee80211_local *local)
@@ -1605,7 +1609,7 @@ static bool ieee80211_queue_skb(struct ieee80211_local *local,
 	ieee80211_txq_enqueue(local, txqi, skb);
 	spin_unlock_bh(&fq->lock);
 
-	drv_wake_tx_queue(local, txqi);
+	schedule_and_wake_txq(local, txqi);
 
 	return true;
 }
@@ -3630,6 +3634,60 @@ out:
 }
 EXPORT_SYMBOL(ieee80211_tx_dequeue);
 
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = NULL;
+
+	lockdep_assert_held(&local->active_txq_lock[ac]);
+
+	txqi = list_first_entry_or_null(&local->active_txqs[ac],
+					struct txq_info,
+					schedule_order);
+
+	if (!txqi || txqi->schedule_round == local->schedule_round[ac])
+		return NULL;
+
+	list_del_init(&txqi->schedule_order);
+	txqi->schedule_round = local->schedule_round[ac];
+	return &txqi->txq;
+}
+EXPORT_SYMBOL(ieee80211_next_txq);
+
+void ieee80211_return_txq(struct ieee80211_hw *hw,
+			  struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
+
+	if (list_empty(&txqi->schedule_order) &&
+	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets))
+		list_add_tail(&txqi->schedule_order,
+			      &local->active_txqs[txq->ac]);
+}
+EXPORT_SYMBOL(ieee80211_return_txq);
+
+void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
+	__acquires(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	local->schedule_round[ac]++;
+}
+EXPORT_SYMBOL(ieee80211_txq_schedule_start);
+
+void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
+	__releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_txq_schedule_end);
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags)
-- 
cgit v1.2.3


From 36647055b37ec78e9068f470f14e7cd75c001c22 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Tue, 18 Dec 2018 17:02:07 -0800
Subject: cfg80211: Add airtime statistics and settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds TX airtime statistics to the cfg80211 station dump (to go along
with the RX info already present), and adds a new parameter to set the
airtime weight of each station. The latter allows userspace to implement
policies for different stations by varying their weights.

Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
[rmanohar@codeaurora.org: fixed checkpatch warnings]
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
[move airtime weight != 0 check into policy]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 11 ++++++++++-
 include/uapi/linux/nl80211.h | 15 +++++++++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e0c41eb1c860..1691f52fcc80 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1003,6 +1003,7 @@ enum station_parameters_apply_mask {
  * @support_p2p_ps: information if station supports P2P PS mechanism
  * @he_capa: HE capabilities of station
  * @he_capa_len: the length of the HE capabilities
+ * @airtime_weight: airtime scheduler weight for this station
  */
 struct station_parameters {
 	const u8 *supported_rates;
@@ -1032,6 +1033,7 @@ struct station_parameters {
 	int support_p2p_ps;
 	const struct ieee80211_he_cap_elem *he_capa;
 	u8 he_capa_len;
+	u16 airtime_weight;
 };
 
 /**
@@ -1300,6 +1302,8 @@ struct cfg80211_tid_stats {
  *	from this peer
  * @connected_to_gate: true if mesh STA has a path to mesh gate
  * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer
+ * @tx_duration: aggregate PPDU duration(usecs) for all the frames to a peer
+ * @airtime_weight: current airtime scheduling weight
  * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
  *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
  *	Note that this doesn't use the @filled bit, but is used if non-NULL.
@@ -1350,8 +1354,9 @@ struct station_info {
 
 	u32 expected_throughput;
 
-	u64 rx_beacon;
+	u64 tx_duration;
 	u64 rx_duration;
+	u64 rx_beacon;
 	u8 rx_beacon_signal_avg;
 	u8 connected_to_gate;
 
@@ -1359,6 +1364,8 @@ struct station_info {
 	s8 ack_signal;
 	s8 avg_ack_signal;
 
+	u16 airtime_weight;
+
 	u32 rx_mpdu_count;
 	u32 fcs_err_count;
 };
@@ -2391,6 +2398,8 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_TXQ_QUANTUM		= 1 << 8,
 };
 
+#define IEEE80211_DEFAULT_AIRTIME_WEIGHT	256
+
 /**
  * struct cfg80211_pmksa - PMK Security Association
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 31ae5c7f10e3..ebe79e12c82e 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2299,6 +2299,9 @@ enum nl80211_commands {
  *	This is also used for capability advertisement in the wiphy information,
  *	with the appropriate sub-attributes.
  *
+ * @NL80211_ATTR_AIRTIME_WEIGHT: Station's weight when scheduled by the airtime
+ *	scheduler.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2748,6 +2751,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PEER_MEASUREMENTS,
 
+	NL80211_ATTR_AIRTIME_WEIGHT,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3125,6 +3130,9 @@ enum nl80211_sta_bss_param {
  *	might not be fully accurate.
  * @NL80211_STA_INFO_CONNECTED_TO_GATE: set to true if STA has a path to a
  *	mesh gate (u8, 0 or 1)
+ * @NL80211_STA_INFO_TX_DURATION: aggregate PPDU duration for all frames
+ *	sent to the station (u64, usec)
+ * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -3168,6 +3176,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_RX_MPDUS,
 	NL80211_STA_INFO_FCS_ERROR_COUNT,
 	NL80211_STA_INFO_CONNECTED_TO_GATE,
+	NL80211_STA_INFO_TX_DURATION,
+	NL80211_STA_INFO_AIRTIME_WEIGHT,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
@@ -5316,6 +5326,10 @@ enum nl80211_feature_flags {
  *      if this flag is not set. Ignoring this can leak clear text packets and/or
  *      freeze the connection.
  *
+ * @NL80211_EXT_FEATURE_AIRTIME_FAIRNESS: Driver supports getting airtime
+ *	fairness for transmitted packets and has enabled airtime fairness
+ *	scheduling.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5355,6 +5369,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT,
 	NL80211_EXT_FEATURE_CAN_REPLACE_PTK0,
 	NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
+	NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5e49492d5911..a89688929b16 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -557,6 +557,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_PEER_MEASUREMENTS] =
 		NLA_POLICY_NESTED(NL80211_PMSR_FTM_REQ_ATTR_MAX,
 				  nl80211_pmsr_attr_policy),
+	[NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1),
 };
 
 /* policy for the key attributes */
@@ -4851,6 +4852,11 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(PLID, plid, u16);
 	PUT_SINFO(PLINK_STATE, plink_state, u8);
 	PUT_SINFO_U64(RX_DURATION, rx_duration);
+	PUT_SINFO_U64(TX_DURATION, tx_duration);
+
+	if (wiphy_ext_feature_isset(&rdev->wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		PUT_SINFO(AIRTIME_WEIGHT, airtime_weight, u16);
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
@@ -5470,6 +5476,15 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
 	}
 
+	if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
+		params.airtime_weight =
+			nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
+
+	if (params.airtime_weight &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		return -EOPNOTSUPP;
+
 	/* Include parameters for TDLS peer (will check later) */
 	err = nl80211_set_station_tdls(info, &params);
 	if (err)
@@ -5598,6 +5613,15 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
+	if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
+		params.airtime_weight =
+			nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
+
+	if (params.airtime_weight &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		return -EOPNOTSUPP;
+
 	err = nl80211_parse_sta_channel_info(info, &params);
 	if (err)
 		return err;
-- 
cgit v1.2.3


From b4809e9484da147dc82a31b961df111eee72201a Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@toke.dk>
Date: Tue, 18 Dec 2018 17:02:08 -0800
Subject: mac80211: Add airtime accounting and scheduling to TXQs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds airtime accounting and scheduling to the mac80211 TXQ
scheduler. A new callback, ieee80211_sta_register_airtime(), is added
that drivers can call to report airtime usage for stations.

When airtime information is present, mac80211 will schedule TXQs
(through ieee80211_next_txq()) in a way that enforces airtime fairness
between active stations. This scheduling works the same way as the ath9k
in-driver airtime fairness scheduling. If no airtime usage is reported
by the driver, the scheduler will default to round-robin scheduling.

For drivers that don't control TXQ scheduling in software, a new API
function, ieee80211_txq_may_transmit(), is added which the driver can use
to check if the TXQ is eligible for transmission, or should be throttled to
enforce fairness. Calls to this function must also be enclosed in
ieee80211_txq_schedule_{start,end}() calls to ensure proper locking.

The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
aligned aginst driver's own round-robin scheduler list. i.e it rotates
the TXQ list till it makes the requested node becomes the first entry
in TXQ list. Thus both the TXQ list and driver's list are in sync.

Co-developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Louie Lu <git@louie.lu>
[added debugfs write op to reset airtime counter]
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     | 59 ++++++++++++++++++++++++++++++
 net/mac80211/cfg.c         |  3 ++
 net/mac80211/debugfs.c     |  3 ++
 net/mac80211/debugfs_sta.c | 68 +++++++++++++++++++++++++++++++++--
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/main.c        |  4 +++
 net/mac80211/sta_info.c    | 44 +++++++++++++++++++++--
 net/mac80211/sta_info.h    | 13 +++++++
 net/mac80211/status.c      |  6 ++++
 net/mac80211/tx.c          | 90 +++++++++++++++++++++++++++++++++++++++++++---
 10 files changed, 282 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e8a057f071c4..5d36eacf401e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2360,6 +2360,9 @@ enum ieee80211_hw_flags {
  * @tx_sk_pacing_shift: Pacing shift to set on TCP sockets when frames from
  *	them are encountered. The default should typically not be changed,
  *	unless the driver has good reasons for needing more buffers.
+ *
+ * @weight_multipler: Driver specific airtime weight multiplier used while
+ *	refilling deficit of each TXQ.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2396,6 +2399,7 @@ struct ieee80211_hw {
 	const struct ieee80211_cipher_scheme *cipher_schemes;
 	u8 max_nan_de_entries;
 	u8 tx_sk_pacing_shift;
+	u8 weight_multiplier;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
@@ -5407,6 +5411,34 @@ void ieee80211_sta_eosp(struct ieee80211_sta *pubsta);
  */
 void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid);
 
+/**
+ * ieee80211_sta_register_airtime - register airtime usage for a sta/tid
+ *
+ * Register airtime usage for a given sta on a given tid. The driver can call
+ * this function to notify mac80211 that a station used a certain amount of
+ * airtime. This information will be used by the TXQ scheduler to schedule
+ * stations in a way that ensures airtime fairness.
+ *
+ * The reported airtime should as a minimum include all time that is spent
+ * transmitting to the remote station, including overhead and padding, but not
+ * including time spent waiting for a TXOP. If the time is not reported by the
+ * hardware it can in some cases be calculated from the rate and known frame
+ * composition. When possible, the time should include any failed transmission
+ * attempts.
+ *
+ * The driver can either call this function synchronously for every packet or
+ * aggregate, or asynchronously as airtime usage information becomes available.
+ * TX and RX airtime can be reported together, or separately by setting one of
+ * them to 0.
+ *
+ * @pubsta: the station
+ * @tid: the TID to register airtime for
+ * @tx_airtime: airtime used during TX (in usec)
+ * @rx_airtime: airtime used during RX (in usec)
+ */
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+				    u32 tx_airtime, u32 rx_airtime);
+
 /**
  * ieee80211_iter_keys - iterate keys programmed into the device
  * @hw: pointer obtained from ieee80211_alloc_hw()
@@ -6173,6 +6205,33 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	__releases(txq_lock);
 
+/**
+ * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
+ *
+ * This function is used to check whether given txq is allowed to transmit by
+ * the airtime scheduler, and can be used by drivers to access the airtime
+ * fairness accounting without going using the scheduling order enfored by
+ * next_txq().
+ *
+ * Returns %true if the airtime scheduler thinks the TXQ should be allowed to
+ * transmit, and %false if it should be throttled. This function can also have
+ * the side effect of rotating the TXQ in the scheduler rotation, which will
+ * eventually bring the deficit to positive and allow the station to transmit
+ * again.
+ *
+ * The API ieee80211_txq_may_transmit() also ensures that TXQ list will be
+ * aligned aginst driver's own round-robin scheduler list. i.e it rotates
+ * the TXQ list till it makes the requested node becomes the first entry
+ * in TXQ list. Thus both the TXQ list and driver's list are in sync. If this
+ * function returns %true, the driver is expected to schedule packets
+ * for transmission, and then return the TXQ through ieee80211_return_txq().
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface
+ */
+bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+				struct ieee80211_txq *txq);
+
 /**
  * ieee80211_txq_get_depth - get pending frame/byte count of given txq
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index de65fe3ed9cc..83ee573b1804 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1447,6 +1447,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif))
 		sta_apply_mesh_params(local, sta, params);
 
+	if (params->airtime_weight)
+		sta->airtime_weight = params->airtime_weight;
+
 	/* set the STA state after all sta info from usermode has been set */
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
 	    set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 3fe541e358f3..81c5fec2eae7 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -383,6 +383,9 @@ void debugfs_hw_add(struct ieee80211_local *local)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD_MODE(aqm, 0600);
 
+	debugfs_create_u16("airtime_flags", 0600,
+			   phyd, &local->airtime_flags);
+
 	statsd = debugfs_create_dir("statistics", phyd);
 
 	/* if the dir failed, don't put all the other things into the root! */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b753194710ad..3aa618dcc58e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -181,9 +181,9 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 			       txqi->tin.tx_bytes,
 			       txqi->tin.tx_packets,
 			       txqi->flags,
-			       txqi->flags & (1<<IEEE80211_TXQ_STOP) ? "STOP" : "RUN",
-			       txqi->flags & (1<<IEEE80211_TXQ_AMPDU) ? " AMPDU" : "",
-			       txqi->flags & (1<<IEEE80211_TXQ_NO_AMSDU) ? " NO-AMSDU" : "");
+			       test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
+			       test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
+			       test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
 	}
 
 	rcu_read_unlock();
@@ -195,6 +195,64 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
 }
 STA_OPS(aqm);
 
+static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
+				size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->sdata->local;
+	size_t bufsz = 200;
+	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+	u64 rx_airtime = 0, tx_airtime = 0;
+	s64 deficit[IEEE80211_NUM_ACS];
+	ssize_t rv;
+	int ac;
+
+	if (!buf)
+		return -ENOMEM;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		rx_airtime += sta->airtime[ac].rx_airtime;
+		tx_airtime += sta->airtime[ac].tx_airtime;
+		deficit[ac] = sta->airtime[ac].deficit;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	p += scnprintf(p, bufsz + buf - p,
+		"RX: %llu us\nTX: %llu us\nWeight: %u\n"
+		"Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+		rx_airtime,
+		tx_airtime,
+		sta->airtime_weight,
+		deficit[0],
+		deficit[1],
+		deficit[2],
+		deficit[3]);
+
+	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	kfree(buf);
+	return rv;
+}
+
+static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+				 size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->sdata->local;
+	int ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		spin_lock_bh(&local->active_txq_lock[ac]);
+		sta->airtime[ac].rx_airtime = 0;
+		sta->airtime[ac].tx_airtime = 0;
+		sta->airtime[ac].deficit = sta->airtime_weight;
+		spin_unlock_bh(&local->active_txq_lock[ac]);
+	}
+
+	return count;
+}
+STA_OPS_RW(airtime);
+
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -906,6 +964,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	if (local->ops->wake_tx_queue)
 		DEBUGFS_ADD(aqm);
 
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+		DEBUGFS_ADD(airtime);
+
 	if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
 		debugfs_create_x32("driver_buffered_tids", 0400,
 				   sta->debugfs_dir,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index d1db27b1e989..056b16bce3b0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1138,6 +1138,8 @@ struct ieee80211_local {
 	struct list_head active_txqs[IEEE80211_NUM_ACS];
 	u16 schedule_round[IEEE80211_NUM_ACS];
 
+	u16 airtime_flags;
+
 	const struct ieee80211_ops *ops;
 
 	/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 9b9d6cadf56e..896f17d726d0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -667,6 +667,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 		INIT_LIST_HEAD(&local->active_txqs[i]);
 		spin_lock_init(&local->active_txq_lock[i]);
 	}
+	local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
 
 	INIT_LIST_HEAD(&local->chanctx_list);
 	mutex_init(&local->chanctx_mtx);
@@ -1153,6 +1154,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (!local->hw.max_nan_de_entries)
 		local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;
 
+	if (!local->hw.weight_multiplier)
+		local->hw.weight_multiplier = 1;
+
 	result = ieee80211_wep_init(local);
 	if (result < 0)
 		wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 83e1c316a29e..11f058987a54 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -90,7 +90,6 @@ static void __cleanup_single_sta(struct sta_info *sta)
 	struct tid_ampdu_tx *tid_tx;
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
-	struct fq *fq = &local->fq;
 	struct ps_data *ps;
 
 	if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -120,9 +119,7 @@ static void __cleanup_single_sta(struct sta_info *sta)
 
 			txqi = to_txq_info(sta->sta.txq[i]);
 
-			spin_lock_bh(&fq->lock);
 			ieee80211_txq_purge(local, txqi);
-			spin_unlock_bh(&fq->lock);
 		}
 	}
 
@@ -387,9 +384,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 	if (sta_prepare_rate_control(local, sta, gfp))
 		goto free_txq;
 
+	sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT;
+
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 		skb_queue_head_init(&sta->ps_tx_buf[i]);
 		skb_queue_head_init(&sta->tx_filtered[i]);
+		sta->airtime[i].deficit = sta->airtime_weight;
 	}
 
 	for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1826,6 +1826,27 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
 }
 EXPORT_SYMBOL(ieee80211_sta_set_buffered);
 
+void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
+				    u32 tx_airtime, u32 rx_airtime)
+{
+	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
+	struct ieee80211_local *local = sta->sdata->local;
+	u8 ac = ieee80211_ac_from_tid(tid);
+	u32 airtime = 0;
+
+	if (sta->local->airtime_flags & AIRTIME_USE_TX)
+		airtime += tx_airtime;
+	if (sta->local->airtime_flags & AIRTIME_USE_RX)
+		airtime += rx_airtime;
+
+	spin_lock_bh(&local->active_txq_lock[ac]);
+	sta->airtime[ac].tx_airtime += tx_airtime;
+	sta->airtime[ac].rx_airtime += rx_airtime;
+	sta->airtime[ac].deficit -= airtime;
+	spin_unlock_bh(&local->active_txq_lock[ac]);
+}
+EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+
 int sta_info_move_state(struct sta_info *sta,
 			enum ieee80211_sta_state new_state)
 {
@@ -2188,6 +2209,23 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo,
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
 
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->rx_duration += sta->airtime[ac].rx_airtime;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) {
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_duration += sta->airtime[ac].tx_airtime;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION);
+	}
+
+	if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) {
+		sinfo->airtime_weight = sta->airtime_weight;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT);
+	}
+
 	sinfo->rx_dropped_misc = sta->rx_stats.dropped;
 	if (sta->pcpu_rx_stats) {
 		for_each_possible_cpu(cpu) {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 8eb29041be54..9a380803e597 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -127,6 +127,16 @@ enum ieee80211_agg_stop_reason {
 	AGG_STOP_DESTROY_STA,
 };
 
+/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
+#define AIRTIME_USE_TX		BIT(0)
+#define AIRTIME_USE_RX		BIT(1)
+
+struct airtime_info {
+	u64 rx_airtime;
+	u64 tx_airtime;
+	s64 deficit;
+};
+
 struct sta_info;
 
 /**
@@ -565,6 +575,9 @@ struct sta_info {
 	} tx_stats;
 	u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1];
 
+	struct airtime_info airtime[IEEE80211_NUM_ACS];
+	u16 airtime_weight;
+
 	/*
 	 * Aggregation information, locked with lock.
 	 */
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3f0b96e1e02f..5b9952b1caf3 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -823,6 +823,12 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
 			ieee80211_sta_tx_notify(sta->sdata, (void *) skb->data,
 						acked, info->status.tx_time);
 
+		if (info->status.tx_time &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			ieee80211_sta_register_airtime(&sta->sta, tid,
+						       info->status.tx_time, 0);
+
 		if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
 			if (info->flags & IEEE80211_TX_STAT_ACK) {
 				if (sta->status_stats.lost_packets)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 544da6411620..f46d8d822f86 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1488,8 +1488,11 @@ void ieee80211_txq_purge(struct ieee80211_local *local,
 	struct fq *fq = &local->fq;
 	struct fq_tin *tin = &txqi->tin;
 
+	spin_lock_bh(&fq->lock);
 	fq_tin_reset(fq, tin, fq_skb_free_func);
 	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
+	spin_unlock_bh(&fq->lock);
+
 	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
 	list_del_init(&txqi->schedule_order);
 	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
@@ -3641,11 +3644,28 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
 
 	lockdep_assert_held(&local->active_txq_lock[ac]);
 
+ begin:
 	txqi = list_first_entry_or_null(&local->active_txqs[ac],
 					struct txq_info,
 					schedule_order);
+	if (!txqi)
+		return NULL;
+
+	if (txqi->txq.sta) {
+		struct sta_info *sta = container_of(txqi->txq.sta,
+						struct sta_info, sta);
+
+		if (sta->airtime[txqi->txq.ac].deficit < 0) {
+			sta->airtime[txqi->txq.ac].deficit +=
+				sta->airtime_weight;
+			list_move_tail(&txqi->schedule_order,
+				       &local->active_txqs[txqi->txq.ac]);
+			goto begin;
+		}
+	}
+
 
-	if (!txqi || txqi->schedule_round == local->schedule_round[ac])
+	if (txqi->schedule_round == local->schedule_round[ac])
 		return NULL;
 
 	list_del_init(&txqi->schedule_order);
@@ -3663,12 +3683,74 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 	lockdep_assert_held(&local->active_txq_lock[txq->ac]);
 
 	if (list_empty(&txqi->schedule_order) &&
-	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets))
-		list_add_tail(&txqi->schedule_order,
-			      &local->active_txqs[txq->ac]);
+	    (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) {
+		/* If airtime accounting is active, always enqueue STAs at the
+		 * head of the list to ensure that they only get moved to the
+		 * back by the airtime DRR scheduler once they have a negative
+		 * deficit. A station that already has a negative deficit will
+		 * get immediately moved to the back of the list on the next
+		 * call to ieee80211_next_txq().
+		 */
+		if (txqi->txq.sta &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
+			list_add(&txqi->schedule_order,
+				 &local->active_txqs[txq->ac]);
+		else
+			list_add_tail(&txqi->schedule_order,
+				      &local->active_txqs[txq->ac]);
+	}
 }
 EXPORT_SYMBOL(ieee80211_return_txq);
 
+bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
+				struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *iter, *tmp, *txqi = to_txq_info(txq);
+	struct sta_info *sta;
+	u8 ac = txq->ac;
+
+	lockdep_assert_held(&local->active_txq_lock[ac]);
+
+	if (!txqi->txq.sta)
+		goto out;
+
+	if (list_empty(&txqi->schedule_order))
+		goto out;
+
+	list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac],
+				 schedule_order) {
+		if (iter == txqi)
+			break;
+
+		if (!iter->txq.sta) {
+			list_move_tail(&iter->schedule_order,
+				       &local->active_txqs[ac]);
+			continue;
+		}
+		sta = container_of(iter->txq.sta, struct sta_info, sta);
+		if (sta->airtime[ac].deficit < 0)
+			sta->airtime[ac].deficit += sta->airtime_weight;
+		list_move_tail(&iter->schedule_order, &local->active_txqs[ac]);
+	}
+
+	sta = container_of(txqi->txq.sta, struct sta_info, sta);
+	if (sta->airtime[ac].deficit >= 0)
+		goto out;
+
+	sta->airtime[ac].deficit += sta->airtime_weight;
+	list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]);
+
+	return false;
+out:
+	if (!list_empty(&txqi->schedule_order))
+		list_del_init(&txqi->schedule_order);
+
+	return true;
+}
+EXPORT_SYMBOL(ieee80211_txq_may_transmit);
+
 void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 	__acquires(txq_lock)
 {
-- 
cgit v1.2.3


From cc24163690997c685641d84e77ff6f1c592b06fe Mon Sep 17 00:00:00 2001
From: Julan Hsu <julanhsu@google.com>
Date: Tue, 15 Jan 2019 15:28:42 -0800
Subject: nl80211/mac80211: mesh: add hop count to mpath info

Expose hop count to destination information in mpath info

Signed-off-by: Julan Hsu <julanhsu@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 2 ++
 net/mac80211/cfg.c           | 4 +++-
 net/mac80211/mesh_hwmp.c     | 5 +++++
 net/wireless/nl80211.c       | 5 ++++-
 5 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1691f52fcc80..37816786d3e1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1429,6 +1429,7 @@ enum monitor_flags {
  * @MPATH_INFO_DISCOVERY_TIMEOUT: @discovery_timeout filled
  * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled
  * @MPATH_INFO_FLAGS: @flags filled
+ * @MPATH_INFO_HOP_COUNT: @hop_count filled
  */
 enum mpath_info_flags {
 	MPATH_INFO_FRAME_QLEN		= BIT(0),
@@ -1438,6 +1439,7 @@ enum mpath_info_flags {
 	MPATH_INFO_DISCOVERY_TIMEOUT	= BIT(4),
 	MPATH_INFO_DISCOVERY_RETRIES	= BIT(5),
 	MPATH_INFO_FLAGS		= BIT(6),
+	MPATH_INFO_HOP_COUNT		= BIT(7)
 };
 
 /**
@@ -1457,6 +1459,7 @@ enum mpath_info_flags {
  *	This number should increase every time the list of mesh paths
  *	changes, i.e. when a station is added or removed, so that
  *	userspace can tell whether it got a consistent snapshot.
+ * @hop_count: hops to destination
  */
 struct mpath_info {
 	u32 filled;
@@ -1467,6 +1470,7 @@ struct mpath_info {
 	u32 discovery_timeout;
 	u8 discovery_retries;
 	u8 flags;
+	u8 hop_count;
 
 	int generation;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ebe79e12c82e..213a1d7c1063 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3287,6 +3287,7 @@ enum nl80211_mpath_flags {
  * 	&enum nl80211_mpath_flags;
  * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
  * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries
+ * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination
  * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number
  *	currently defind
  * @__NL80211_MPATH_INFO_AFTER_LAST: internal use
@@ -3300,6 +3301,7 @@ enum nl80211_mpath_info {
 	NL80211_MPATH_INFO_FLAGS,
 	NL80211_MPATH_INFO_DISCOVERY_TIMEOUT,
 	NL80211_MPATH_INFO_DISCOVERY_RETRIES,
+	NL80211_MPATH_INFO_HOP_COUNT,
 
 	/* keep last */
 	__NL80211_MPATH_INFO_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 83ee573b1804..52cbaaf5caea 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1745,7 +1745,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 			MPATH_INFO_EXPTIME |
 			MPATH_INFO_DISCOVERY_TIMEOUT |
 			MPATH_INFO_DISCOVERY_RETRIES |
-			MPATH_INFO_FLAGS;
+			MPATH_INFO_FLAGS |
+			MPATH_INFO_HOP_COUNT;
 
 	pinfo->frame_qlen = mpath->frame_queue.qlen;
 	pinfo->sn = mpath->sn;
@@ -1765,6 +1766,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 		pinfo->flags |= NL80211_MPATH_FLAG_FIXED;
 	if (mpath->flags & MESH_PATH_RESOLVED)
 		pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED;
+	pinfo->hop_count = mpath->hop_count;
 }
 
 static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6950cd0bf594..6d1190b3332f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -386,6 +386,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 	unsigned long orig_lifetime, exp_time;
 	u32 last_hop_metric, new_metric;
 	bool process = true;
+	u8 hopcount;
 
 	rcu_read_lock();
 	sta = sta_info_get(sdata, mgmt->sa);
@@ -404,6 +405,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		orig_sn = PREQ_IE_ORIG_SN(hwmp_ie);
 		orig_lifetime = PREQ_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREQ_IE_METRIC(hwmp_ie);
+		hopcount = PREQ_IE_HOPCOUNT(hwmp_ie) + 1;
 		break;
 	case MPATH_PREP:
 		/* Originator here refers to the MP that was the target in the
@@ -415,6 +417,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		orig_sn = PREP_IE_TARGET_SN(hwmp_ie);
 		orig_lifetime = PREP_IE_LIFETIME(hwmp_ie);
 		orig_metric = PREP_IE_METRIC(hwmp_ie);
+		hopcount = PREP_IE_HOPCOUNT(hwmp_ie) + 1;
 		break;
 	default:
 		rcu_read_unlock();
@@ -482,6 +485,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 			mpath->sn = orig_sn;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
+			mpath->hop_count = hopcount;
 			mesh_path_activate(mpath);
 			spin_unlock_bh(&mpath->state_lock);
 			ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
@@ -523,6 +527,7 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 			mpath->metric = last_hop_metric;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
 					  ?  mpath->exp_time : exp_time;
+			mpath->hop_count = 1;
 			mesh_path_activate(mpath);
 			spin_unlock_bh(&mpath->state_lock);
 			ewma_mesh_fail_avg_init(&sta->mesh->fail_avg);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a89688929b16..159125e16c79 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5827,7 +5827,10 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
 			 pinfo->discovery_timeout)) ||
 	    ((pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) &&
 	     nla_put_u8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES,
-			pinfo->discovery_retries)))
+			pinfo->discovery_retries)) ||
+	    ((pinfo->filled & MPATH_INFO_HOP_COUNT) &&
+	     nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT,
+			pinfo->hop_count)))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, pinfoattr);
-- 
cgit v1.2.3


From 540bbcb930ed2fc9d6a57e0babea00027a7ecc67 Mon Sep 17 00:00:00 2001
From: Julan Hsu <julanhsu@google.com>
Date: Tue, 15 Jan 2019 15:28:43 -0800
Subject: nl80211/mac80211: mesh: add mesh path change count to mpath info

Expose path change count to destination in mpath info

Signed-off-by: Julan Hsu <julanhsu@google.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 5 ++++-
 include/uapi/linux/nl80211.h | 4 +++-
 net/mac80211/cfg.c           | 4 +++-
 net/mac80211/mesh.h          | 2 ++
 net/mac80211/mesh_hwmp.c     | 4 ++++
 net/wireless/nl80211.c       | 5 ++++-
 6 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 37816786d3e1..9c1d7979c200 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1439,7 +1439,8 @@ enum mpath_info_flags {
 	MPATH_INFO_DISCOVERY_TIMEOUT	= BIT(4),
 	MPATH_INFO_DISCOVERY_RETRIES	= BIT(5),
 	MPATH_INFO_FLAGS		= BIT(6),
-	MPATH_INFO_HOP_COUNT		= BIT(7)
+	MPATH_INFO_HOP_COUNT		= BIT(7),
+	MPATH_INFO_PATH_CHANGE		= BIT(8),
 };
 
 /**
@@ -1460,6 +1461,7 @@ enum mpath_info_flags {
  *	changes, i.e. when a station is added or removed, so that
  *	userspace can tell whether it got a consistent snapshot.
  * @hop_count: hops to destination
+ * @path_change_count: total number of path changes to destination
  */
 struct mpath_info {
 	u32 filled;
@@ -1471,6 +1473,7 @@ struct mpath_info {
 	u8 discovery_retries;
 	u8 flags;
 	u8 hop_count;
+	u32 path_change_count;
 
 	int generation;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 213a1d7c1063..426db4d8f71c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3288,8 +3288,9 @@ enum nl80211_mpath_flags {
  * @NL80211_MPATH_INFO_DISCOVERY_TIMEOUT: total path discovery timeout, in msec
  * @NL80211_MPATH_INFO_DISCOVERY_RETRIES: mesh path discovery retries
  * @NL80211_MPATH_INFO_HOP_COUNT: hop count to destination
+ * @NL80211_MPATH_INFO_PATH_CHANGE: total number of path changes to destination
  * @NL80211_MPATH_INFO_MAX: highest mesh path information attribute number
- *	currently defind
+ *	currently defined
  * @__NL80211_MPATH_INFO_AFTER_LAST: internal use
  */
 enum nl80211_mpath_info {
@@ -3302,6 +3303,7 @@ enum nl80211_mpath_info {
 	NL80211_MPATH_INFO_DISCOVERY_TIMEOUT,
 	NL80211_MPATH_INFO_DISCOVERY_RETRIES,
 	NL80211_MPATH_INFO_HOP_COUNT,
+	NL80211_MPATH_INFO_PATH_CHANGE,
 
 	/* keep last */
 	__NL80211_MPATH_INFO_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 52cbaaf5caea..e5e0f100389c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1746,7 +1746,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 			MPATH_INFO_DISCOVERY_TIMEOUT |
 			MPATH_INFO_DISCOVERY_RETRIES |
 			MPATH_INFO_FLAGS |
-			MPATH_INFO_HOP_COUNT;
+			MPATH_INFO_HOP_COUNT |
+			MPATH_INFO_PATH_CHANGE;
 
 	pinfo->frame_qlen = mpath->frame_queue.qlen;
 	pinfo->sn = mpath->sn;
@@ -1767,6 +1768,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 	if (mpath->flags & MESH_PATH_RESOLVED)
 		pinfo->flags |= NL80211_MPATH_FLAG_RESOLVED;
 	pinfo->hop_count = mpath->hop_count;
+	pinfo->path_change_count = mpath->path_change_count;
 }
 
 static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index cad6592c52a1..8b26858ab4d5 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -94,6 +94,7 @@ enum mesh_deferred_task_flags {
  * @last_preq_to_root: Timestamp of last PREQ sent to root
  * @is_root: the destination station of this path is a root node
  * @is_gate: the destination station of this path is a mesh gate
+ * @path_change_count: the number of path changes to destination
  *
  *
  * The dst address is unique in the mesh path table. Since the mesh_path is
@@ -124,6 +125,7 @@ struct mesh_path {
 	unsigned long last_preq_to_root;
 	bool is_root;
 	bool is_gate;
+	u32 path_change_count;
 };
 
 /**
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6d1190b3332f..a0aebf44493f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -479,6 +479,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		}
 
 		if (fresh_info) {
+			if (rcu_access_pointer(mpath->next_hop) != sta)
+				mpath->path_change_count++;
 			mesh_path_assign_nexthop(mpath, sta);
 			mpath->flags |= MESH_PATH_SN_VALID;
 			mpath->metric = new_metric;
@@ -523,6 +525,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
 		}
 
 		if (fresh_info) {
+			if (rcu_access_pointer(mpath->next_hop) != sta)
+				mpath->path_change_count++;
 			mesh_path_assign_nexthop(mpath, sta);
 			mpath->metric = last_hop_metric;
 			mpath->exp_time = time_after(mpath->exp_time, exp_time)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 159125e16c79..e5f9c9ceb6c9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5830,7 +5830,10 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
 			pinfo->discovery_retries)) ||
 	    ((pinfo->filled & MPATH_INFO_HOP_COUNT) &&
 	     nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT,
-			pinfo->hop_count)))
+			pinfo->hop_count)) ||
+	    ((pinfo->filled & MPATH_INFO_PATH_CHANGE) &&
+	     nla_put_u32(msg, NL80211_MPATH_INFO_PATH_CHANGE,
+			 pinfo->path_change_count)))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, pinfoattr);
-- 
cgit v1.2.3


From 77f7ffdc335de8541a2a40618781ed0b76cee9f7 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 16 Jan 2019 22:32:12 +0100
Subject: mac80211: minstrel_ht: add flag to indicate missing/inaccurate tx
 A-MPDU length

Some hardware (e.g. MediaTek MT7603) cannot report A-MPDU length in tx status
information. Add support for a flag to indicate that, to allow minstrel_ht
to use a fixed value in its internal calculation (which gives better results
than just defaulting to 1).

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h                     |  4 ++++
 net/mac80211/debugfs.c                     |  1 +
 net/mac80211/rc80211_minstrel_ht.c         | 25 ++++++++++++++++++++-----
 net/mac80211/rc80211_minstrel_ht_debugfs.c |  7 ++++---
 4 files changed, 29 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5d36eacf401e..a285c2bfd14e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2190,6 +2190,9 @@ struct ieee80211_txq {
  *	MMPDUs on station interfaces. This of course requires the driver to use
  *	TXQs to start with.
  *
+ * @IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN: Driver does not report accurate A-MPDU
+ *	length in tx status information
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2238,6 +2241,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_BUFF_MMPDU_TXQ,
 	IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
 	IEEE80211_HW_STA_MMPDU_TXQ,
+	IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 81c5fec2eae7..343ad0a915e4 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -218,6 +218,7 @@ static const char *hw_flag_names[] = {
 	FLAG(BUFF_MMPDU_TXQ),
 	FLAG(SUPPORTS_VHT_EXT_NSS_BW),
 	FLAG(STA_MMPDU_TXQ),
+	FLAG(TX_STATUS_NO_AMPDU_LEN),
 #undef FLAG
 };
 
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index f466ec37d161..ccaf951e4e31 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -294,6 +294,15 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index)
 	return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES];
 }
 
+static unsigned int
+minstrel_ht_avg_ampdu_len(struct minstrel_ht_sta *mi)
+{
+	if (!mi->avg_ampdu_len)
+		return AVG_AMPDU_SIZE;
+
+	return MINSTREL_TRUNC(mi->avg_ampdu_len);
+}
+
 /*
  * Return current throughput based on the average A-MPDU length, taking into
  * account the expected number of retransmissions and their expected length
@@ -309,7 +318,7 @@ minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate,
 		return 0;
 
 	if (group != MINSTREL_CCK_GROUP)
-		nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len);
+		nsecs = 1000 * mi->overhead / minstrel_ht_avg_ampdu_len(mi);
 
 	nsecs += minstrel_mcs_groups[group].duration[rate] <<
 		 minstrel_mcs_groups[group].shift;
@@ -503,8 +512,12 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)
 	u16 tmp_cck_tp_rate[MAX_THR_RATES], index;
 
 	if (mi->ampdu_packets > 0) {
-		mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
-			MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets), EWMA_LEVEL);
+		if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN))
+			mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len,
+				MINSTREL_FRAC(mi->ampdu_len, mi->ampdu_packets),
+					      EWMA_LEVEL);
+		else
+			mi->avg_ampdu_len = 0;
 		mi->ampdu_len = 0;
 		mi->ampdu_packets = 0;
 	}
@@ -709,7 +722,9 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
 	mi->ampdu_len += info->status.ampdu_len;
 
 	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {
-		mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len);
+		int avg_ampdu_len = minstrel_ht_avg_ampdu_len(mi);
+
+		mi->sample_wait = 16 + 2 * avg_ampdu_len;
 		mi->sample_tries = 1;
 		mi->sample_count--;
 	}
@@ -777,7 +792,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
 	unsigned int cw = mp->cw_min;
 	unsigned int ctime = 0;
 	unsigned int t_slot = 9; /* FIXME */
-	unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len);
+	unsigned int ampdu_len = minstrel_ht_avg_ampdu_len(mi);
 	unsigned int overhead = 0, overhead_rtscts = 0;
 
 	mrs = minstrel_get_ratestats(mi, index);
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 57820a5f2c16..31641d0b0f5c 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -160,9 +160,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
 			"lookaround %d\n",
 			max(0, (int) mi->total_packets - (int) mi->sample_packets),
 			mi->sample_packets);
-	p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n",
-		MINSTREL_TRUNC(mi->avg_ampdu_len),
-		MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
+	if (mi->avg_ampdu_len)
+		p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n",
+			MINSTREL_TRUNC(mi->avg_ampdu_len),
+			MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10);
 	ms->len = p - ms->buf;
 	WARN_ON(ms->len + sizeof(*ms) > 32768);
 
-- 
cgit v1.2.3


From 5f2939d933e9616b8b779fa46963689000f1a8f4 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Wed, 9 Jan 2019 17:10:57 +0100
Subject: sch_api: Change signature of qdisc_tree_reduce_backlog() to use ints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are now several places where qdisc_tree_reduce_backlog() is called
with a negative number of packets (to signal an increase in number of
packets in the queue). Rather than rely on overflow behaviour, change the
function signature to use signed integers to communicate this usage to
people reading the code.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 3 +--
 net/sched/sch_api.c       | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9481f2c142e2..7a4957599874 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -580,8 +580,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 void qdisc_reset(struct Qdisc *qdisc);
 void qdisc_put(struct Qdisc *qdisc);
 void qdisc_put_unlocked(struct Qdisc *qdisc);
-void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
-			       unsigned int len);
+void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, int n, int len);
 #ifdef CONFIG_NET_SCHED
 int qdisc_offload_dump_helper(struct Qdisc *q, enum tc_setup_type type,
 			      void *type_data);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7e4d1ccf4c87..03e26e8d0ec9 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -758,8 +758,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 	return 0;
 }
 
-void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
-			       unsigned int len)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
 {
 	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
 	const struct Qdisc_class_ops *cops;
-- 
cgit v1.2.3


From 59c28058fa7bb1cc7ab8b2c5607093cbbefafeb4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 18 Jan 2019 10:46:13 -0800
Subject: net: netlink: add helper to retrieve NETLINK_F_STRICT_CHK

Dumps can read state of the NETLINK_F_STRICT_CHK flag from
a field in the callback structure.  For non-dump GET requests
we need a way to access the state of that flag from a socket.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 1 +
 net/netlink/af_netlink.c | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 4e8add270200..593d1b9c33a8 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -126,6 +126,7 @@ void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 		 const struct netlink_ext_ack *extack);
 int netlink_has_listeners(struct sock *sk, unsigned int group);
+bool netlink_strict_get_check(struct sk_buff *skb);
 
 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3c023d6120f6..8fa35df94c07 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1371,6 +1371,14 @@ int netlink_has_listeners(struct sock *sk, unsigned int group)
 }
 EXPORT_SYMBOL_GPL(netlink_has_listeners);
 
+bool netlink_strict_get_check(struct sk_buff *skb)
+{
+	const struct netlink_sock *nlk = nlk_sk(NETLINK_CB(skb).sk);
+
+	return nlk->flags & NETLINK_F_STRICT_CHK;
+}
+EXPORT_SYMBOL_GPL(netlink_strict_get_check);
+
 static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
-- 
cgit v1.2.3


From 5954894ba3723995fbeab77bef62bb4878a654bb Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 17 Jan 2019 17:14:01 -0800
Subject: net_sched: add performance counters for basic filter

Similar to u32 filter, it is useful to know how many times
we reach each basic filter and how many times we pass the
ematch attached to it.

Sample output:

filter protocol arp pref 49152 basic chain 0
filter protocol arp pref 49152 basic chain 0 handle 0x1  (rule hit 3 success 3)
	action order 1: gact action pass
	 random type none pass val 0
	 index 1 ref 1 bind 1 installed 81 sec used 4 sec
	Action statistics:
	Sent 126 bytes 3 pkt (dropped 0, overlimits 0 requeues 0)
	backlog 0b 0p requeues 0

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  7 +++++++
 net/sched/cls_basic.c        | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 32a3416b51c3..02ac251be8c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -333,12 +333,19 @@ enum {
 
 /* Basic filter */
 
+struct tc_basic_pcnt {
+	__u64 rcnt;
+	__u64 rhit;
+};
+
 enum {
 	TCA_BASIC_UNSPEC,
 	TCA_BASIC_CLASSID,
 	TCA_BASIC_EMATCHES,
 	TCA_BASIC_ACT,
 	TCA_BASIC_POLICE,
+	TCA_BASIC_PCNT,
+	TCA_BASIC_PAD,
 	__TCA_BASIC_MAX
 };
 
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..4a57fec6f306 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/idr.h>
+#include <linux/percpu.h>
 #include <net/netlink.h>
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
@@ -35,6 +36,7 @@ struct basic_filter {
 	struct tcf_result	res;
 	struct tcf_proto	*tp;
 	struct list_head	link;
+	struct tc_basic_pcnt __percpu *pf;
 	struct rcu_work		rwork;
 };
 
@@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct basic_filter *f;
 
 	list_for_each_entry_rcu(f, &head->flist, link) {
+		__this_cpu_inc(f->pf->rcnt);
 		if (!tcf_em_tree_match(skb, &f->ematches, NULL))
 			continue;
+		__this_cpu_inc(f->pf->rhit);
 		*res = f->res;
 		r = tcf_exts_exec(skb, &f->exts, res);
 		if (r < 0)
@@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f)
 	tcf_exts_destroy(&f->exts);
 	tcf_em_tree_destroy(&f->ematches);
 	tcf_exts_put_net(&f->exts);
+	free_percpu(f->pf);
 	kfree(f);
 }
 
@@ -208,6 +213,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (err)
 		goto errout;
 	fnew->handle = handle;
+	fnew->pf = alloc_percpu(struct tc_basic_pcnt);
+	if (!fnew->pf) {
+		err = -ENOMEM;
+		goto errout;
+	}
 
 	err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
 			      extack);
@@ -231,6 +241,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 
 	return 0;
 errout:
+	free_percpu(fnew->pf);
 	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
@@ -265,8 +276,10 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 		      struct sk_buff *skb, struct tcmsg *t)
 {
+	struct tc_basic_pcnt gpf = {};
 	struct basic_filter *f = fh;
 	struct nlattr *nest;
+	int cpu;
 
 	if (f == NULL)
 		return skb->len;
@@ -281,6 +294,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
 	    nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
 		goto nla_put_failure;
 
+	for_each_possible_cpu(cpu) {
+		struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
+
+		gpf.rcnt += pf->rcnt;
+		gpf.rhit += pf->rhit;
+	}
+
+	if (nla_put_64bit(skb, TCA_BASIC_PCNT,
+			  sizeof(struct tc_basic_pcnt),
+			  &gpf, TCA_BASIC_PAD))
+		goto nla_put_failure;
+
 	if (tcf_exts_dump(skb, &f->exts) < 0 ||
 	    tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
 		goto nla_put_failure;
-- 
cgit v1.2.3


From f5d782d46aa5d4dd369e6560ce5227136b58926f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 13 Dec 2018 02:38:58 +0100
Subject: power: supply: isp1704: switch to gpiod API

This migrates isp1704 driver from old GPIO API to new descriptor
based GPIO API and drops useless platform data as a side-effect.

Migration is simple, since all mainline users are DT based and
DT API does not change. Out of tree users of the platform data
need to migrate to gpiod_lookup_table as described here:

Documentation/driver-api/gpio/board.rst

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/isp1704_charger.c | 60 +++++++---------------------------
 include/linux/power/isp1704_charger.h  | 30 -----------------
 2 files changed, 12 insertions(+), 78 deletions(-)
 delete mode 100644 include/linux/power/isp1704_charger.h

(limited to 'include')

diff --git a/drivers/power/supply/isp1704_charger.c b/drivers/power/supply/isp1704_charger.c
index 95af5f305838..a63cb5dcfa08 100644
--- a/drivers/power/supply/isp1704_charger.c
+++ b/drivers/power/supply/isp1704_charger.c
@@ -30,13 +30,12 @@
 #include <linux/power_supply.h>
 #include <linux/delay.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
+#include <linux/gpio/consumer.h>
 #include <linux/usb/otg.h>
 #include <linux/usb/ulpi.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
-#include <linux/power/isp1704_charger.h>
 
 /* Vendor specific Power Control register */
 #define ISP1704_PWR_CTRL		0x3d
@@ -60,6 +59,7 @@ struct isp1704_charger {
 	struct device			*dev;
 	struct power_supply		*psy;
 	struct power_supply_desc	psy_desc;
+	struct gpio_desc		*enable_gpio;
 	struct usb_phy			*phy;
 	struct notifier_block		nb;
 	struct work_struct		work;
@@ -81,18 +81,9 @@ static inline int isp1704_write(struct isp1704_charger *isp, u32 reg, u32 val)
 	return usb_phy_io_write(isp->phy, val, reg);
 }
 
-/*
- * Disable/enable the power from the isp1704 if a function for it
- * has been provided with platform data.
- */
 static void isp1704_charger_set_power(struct isp1704_charger *isp, bool on)
 {
-	struct isp1704_charger_data	*board = isp->dev->platform_data;
-
-	if (board && board->set_power)
-		board->set_power(on);
-	else if (board)
-		gpio_set_value(board->enable_gpio, on);
+	gpiod_set_value(isp->enable_gpio, on);
 }
 
 /*
@@ -405,46 +396,19 @@ static int isp1704_charger_probe(struct platform_device *pdev)
 	int			ret = -ENODEV;
 	struct power_supply_config psy_cfg = {};
 
-	struct isp1704_charger_data *pdata = dev_get_platdata(&pdev->dev);
-	struct device_node *np = pdev->dev.of_node;
-
-	if (np) {
-		int gpio = of_get_named_gpio(np, "nxp,enable-gpio", 0);
-
-		if (gpio < 0) {
-			dev_err(&pdev->dev, "missing DT GPIO nxp,enable-gpio\n");
-			return gpio;
-		}
-
-		pdata = devm_kzalloc(&pdev->dev,
-			sizeof(struct isp1704_charger_data), GFP_KERNEL);
-		if (!pdata) {
-			ret = -ENOMEM;
-			goto fail0;
-		}
-		pdata->enable_gpio = gpio;
-
-		dev_info(&pdev->dev, "init gpio %d\n", pdata->enable_gpio);
-
-		ret = devm_gpio_request_one(&pdev->dev, pdata->enable_gpio,
-					GPIOF_OUT_INIT_HIGH, "isp1704_reset");
-		if (ret) {
-			dev_err(&pdev->dev, "gpio request failed\n");
-			goto fail0;
-		}
-	}
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "missing platform data!\n");
-		return -ENODEV;
-	}
-
-
 	isp = devm_kzalloc(&pdev->dev, sizeof(*isp), GFP_KERNEL);
 	if (!isp)
 		return -ENOMEM;
 
-	if (np)
+	isp->enable_gpio = devm_gpiod_get(&pdev->dev, "nxp,enable",
+					  GPIOD_OUT_HIGH);
+	if (IS_ERR(isp->enable_gpio)) {
+		ret = PTR_ERR(isp->enable_gpio);
+		dev_err(&pdev->dev, "Could not get reset gpio: %d\n", ret);
+		return ret;
+	}
+
+	if (pdev->dev.of_node)
 		isp->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "usb-phy", 0);
 	else
 		isp->phy = devm_usb_get_phy(&pdev->dev, USB_PHY_TYPE_USB2);
diff --git a/include/linux/power/isp1704_charger.h b/include/linux/power/isp1704_charger.h
deleted file mode 100644
index 0105d9e7af85..000000000000
--- a/include/linux/power/isp1704_charger.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * ISP1704 USB Charger Detection driver
- *
- * Copyright (C) 2011 Nokia Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-
-#ifndef __ISP1704_CHARGER_H
-#define __ISP1704_CHARGER_H
-
-struct isp1704_charger_data {
-	void		(*set_power)(bool on);
-	int		enable_gpio;
-};
-
-#endif
-- 
cgit v1.2.3


From 486efe9f8e30bac1e236f867df164f4966f3e207 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Thu, 10 Jan 2019 13:53:24 +0000
Subject: perf/core: Add function to test for event exclusion flags

Add a function that tests if any of the perf event exclusion flags
are set on a given event.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: robin.murphy@arm.com
Cc: suzuki.poulose@arm.com
Link: https://lkml.kernel.org/r/1547128414-50693-3-git-send-email-andrew.murray@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1d5c551a5add..54a78d22f0a6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1004,6 +1004,15 @@ perf_event__output_id_sample(struct perf_event *event,
 extern void
 perf_log_lost_samples(struct perf_event *event, u64 lost);
 
+static inline bool event_has_any_exclude_flag(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+
+	return attr->exclude_idle || attr->exclude_user ||
+	       attr->exclude_kernel || attr->exclude_hv ||
+	       attr->exclude_guest || attr->exclude_host;
+}
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
-- 
cgit v1.2.3


From cc6795aeffea0a80d0baf9ad31ba926a6c42cef5 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Thu, 10 Jan 2019 13:53:25 +0000
Subject: perf/core: Add PERF_PMU_CAP_NO_EXCLUDE for exclusion incapable PMUs

Many PMU drivers do not have the capability to exclude counting events
that occur in specific contexts such as idle, kernel, guest, etc. These
drivers indicate this by returning an error in their event_init upon
testing the events attribute flags. This approach is error prone and
often inconsistent.

Let's instead allow PMU drivers to advertise their inability to exclude
based on context via a new capability: PERF_PMU_CAP_NO_EXCLUDE. This
allows the perf core to reject requests for exclusion events where
there is no support in the PMU.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: robin.murphy@arm.com
Cc: suzuki.poulose@arm.com
Link: https://lkml.kernel.org/r/1547128414-50693-4-git-send-email-andrew.murray@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 1 +
 kernel/events/core.c       | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 54a78d22f0a6..cec02dc63b51 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -244,6 +244,7 @@ struct perf_event;
 #define PERF_PMU_CAP_EXCLUSIVE			0x10
 #define PERF_PMU_CAP_ITRACE			0x20
 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
+#define PERF_PMU_CAP_NO_EXCLUDE			0x80
 
 /**
  * struct pmu - generic performance monitoring unit
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3cd13a30f732..fbe59b793b36 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9772,6 +9772,15 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
 	if (ctx)
 		perf_event_ctx_unlock(event->group_leader, ctx);
 
+	if (!ret) {
+		if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
+				event_has_any_exclude_flag(event)) {
+			if (event->destroy)
+				event->destroy(event);
+			ret = -EINVAL;
+		}
+	}
+
 	if (ret)
 		module_put(pmu->module);
 
-- 
cgit v1.2.3


From ad07c8ceb6631a83b62d405a61448bba92adac68 Mon Sep 17 00:00:00 2001
From: Andrew Murray <andrew.murray@arm.com>
Date: Thu, 10 Jan 2019 13:53:34 +0000
Subject: perf/core: Remove unused perf_flags

Now that perf_flags is not used we remove it.

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: robin.murphy@arm.com
Cc: suzuki.poulose@arm.com
Link: https://lkml.kernel.org/r/1547128414-50693-13-git-send-email-andrew.murray@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h       | 2 --
 tools/include/uapi/linux/perf_event.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9de8780ac8d9..ea19b5d491bf 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -445,8 +445,6 @@ struct perf_event_query_bpf {
 	__u32	ids[0];
 };
 
-#define perf_flags(attr)	(*(&(attr)->read_format + 1))
-
 /*
  * Ioctls that can be done on a perf event fd:
  */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 9de8780ac8d9..ea19b5d491bf 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -445,8 +445,6 @@ struct perf_event_query_bpf {
 	__u32	ids[0];
 };
 
-#define perf_flags(attr)	(*(&(attr)->read_format + 1))
-
 /*
  * Ioctls that can be done on a perf event fd:
  */
-- 
cgit v1.2.3


From 8321be6a9df5c5cfbf3fb5f716caf8698a5a7016 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@linaro.org>
Date: Mon, 21 Jan 2019 14:17:37 +0530
Subject: cpufreq: Replace open-coded << with BIT()

Minor clean-up to use BIT() and keep checkpatch happy. Clean up the
comment formatting while we're at it to make it easier to read.

Signed-off-by: Amit Kucheria <amit.kucheria@linaro.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c86d6d8bdfed..bd7fbd6a4478 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -346,14 +346,15 @@ struct cpufreq_driver {
 };
 
 /* flags */
-#define CPUFREQ_STICKY		(1 << 0)	/* driver isn't removed even if
-						   all ->init() calls failed */
-#define CPUFREQ_CONST_LOOPS	(1 << 1)	/* loops_per_jiffy or other
-						   kernel "constants" aren't
-						   affected by frequency
-						   transitions */
-#define CPUFREQ_PM_NO_WARN	(1 << 2)	/* don't warn on suspend/resume
-						   speed mismatches */
+
+/* driver isn't removed even if all ->init() calls failed */
+#define CPUFREQ_STICKY				BIT(0)
+
+/* loops_per_jiffy or other kernel "constants" aren't affected by frequency transitions */
+#define CPUFREQ_CONST_LOOPS			BIT(1)
+
+/* don't warn on suspend/resume speed mismatches */
+#define CPUFREQ_PM_NO_WARN			BIT(2)
 
 /*
  * This should be set by platforms having multiple clock-domains, i.e.
@@ -361,14 +362,14 @@ struct cpufreq_driver {
  * be created in cpu/cpu<num>/cpufreq/ directory and so they can use the same
  * governor with different tunables for different clusters.
  */
-#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY (1 << 3)
+#define CPUFREQ_HAVE_GOVERNOR_PER_POLICY	BIT(3)
 
 /*
  * Driver will do POSTCHANGE notifications from outside of their ->target()
  * routine and so must set cpufreq_driver->flags with this flag, so that core
  * can handle them specially.
  */
-#define CPUFREQ_ASYNC_NOTIFICATION  (1 << 4)
+#define CPUFREQ_ASYNC_NOTIFICATION		BIT(4)
 
 /*
  * Set by drivers which want cpufreq core to check if CPU is running at a
@@ -377,13 +378,13 @@ struct cpufreq_driver {
  * from the table. And if that fails, we will stop further boot process by
  * issuing a BUG_ON().
  */
-#define CPUFREQ_NEED_INITIAL_FREQ_CHECK	(1 << 5)
+#define CPUFREQ_NEED_INITIAL_FREQ_CHECK	BIT(5)
 
 /*
  * Set by drivers to disallow use of governors with "dynamic_switching" flag
  * set.
  */
-#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING (1 << 6)
+#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING	BIT(6)
 
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
-- 
cgit v1.2.3


From 2a6efbc6da5d248c4fbe7fdb7de7f3eff1de95a4 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Wed, 16 Jan 2019 19:45:45 +0000
Subject: clk: renesas: r8a774c0: Add missing CANFD clock

This patch adds the missing CANFD clock to the r8a774c0 specific
clock driver.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a774c0-cpg-mssr.c       | 4 ++++
 include/dt-bindings/clock/r8a774c0-cpg-mssr.h | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/renesas/r8a774c0-cpg-mssr.c b/drivers/clk/renesas/r8a774c0-cpg-mssr.c
index 10b96895d452..28bcc8105d57 100644
--- a/drivers/clk/renesas/r8a774c0-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774c0-cpg-mssr.c
@@ -33,6 +33,7 @@ enum clk_ids {
 	CLK_PLL1,
 	CLK_PLL3,
 	CLK_PLL0D4,
+	CLK_PLL0D6,
 	CLK_PLL0D8,
 	CLK_PLL0D20,
 	CLK_PLL0D24,
@@ -61,6 +62,7 @@ static const struct cpg_core_clk r8a774c0_core_clks[] __initconst = {
 
 	DEF_FIXED(".pll0",     CLK_PLL0,           CLK_MAIN,	   1, 100),
 	DEF_FIXED(".pll0d4",   CLK_PLL0D4,         CLK_PLL0,       4, 1),
+	DEF_FIXED(".pll0d6",   CLK_PLL0D6,         CLK_PLL0,       6, 1),
 	DEF_FIXED(".pll0d8",   CLK_PLL0D8,         CLK_PLL0,       8, 1),
 	DEF_FIXED(".pll0d20",  CLK_PLL0D20,        CLK_PLL0,      20, 1),
 	DEF_FIXED(".pll0d24",  CLK_PLL0D24,        CLK_PLL0,      24, 1),
@@ -112,6 +114,7 @@ static const struct cpg_core_clk r8a774c0_core_clks[] __initconst = {
 	DEF_GEN3_PE("s3d2c",   R8A774C0_CLK_S3D2C, CLK_S3, 2, CLK_PE, 2),
 	DEF_GEN3_PE("s3d4c",   R8A774C0_CLK_S3D4C, CLK_S3, 4, CLK_PE, 4),
 
+	DEF_DIV6P1("canfd",    R8A774C0_CLK_CANFD, CLK_PLL0D6, 0x244),
 	DEF_DIV6P1("csi0",     R8A774C0_CLK_CSI0,  CLK_PLL1D2, 0x00c),
 	DEF_DIV6P1("mso",      R8A774C0_CLK_MSO,   CLK_PLL1D2, 0x014),
 
@@ -187,6 +190,7 @@ static const struct mssr_mod_clk r8a774c0_mod_clks[] __initconst = {
 	DEF_MOD("gpio2",		 910,	R8A774C0_CLK_S3D4),
 	DEF_MOD("gpio1",		 911,	R8A774C0_CLK_S3D4),
 	DEF_MOD("gpio0",		 912,	R8A774C0_CLK_S3D4),
+	DEF_MOD("can-fd",		 914,	R8A774C0_CLK_S3D2),
 	DEF_MOD("can-if1",		 915,	R8A774C0_CLK_S3D4),
 	DEF_MOD("can-if0",		 916,	R8A774C0_CLK_S3D4),
 	DEF_MOD("i2c6",			 918,	R8A774C0_CLK_S3D2),
diff --git a/include/dt-bindings/clock/r8a774c0-cpg-mssr.h b/include/dt-bindings/clock/r8a774c0-cpg-mssr.h
index 8fe51b6aca28..8ad9cd6be8e9 100644
--- a/include/dt-bindings/clock/r8a774c0-cpg-mssr.h
+++ b/include/dt-bindings/clock/r8a774c0-cpg-mssr.h
@@ -56,5 +56,6 @@
 #define R8A774C0_CLK_CSI0		45
 #define R8A774C0_CLK_CP			46
 #define R8A774C0_CLK_CPEX		47
+#define R8A774C0_CLK_CANFD		48
 
 #endif /* __DT_BINDINGS_CLOCK_R8A774C0_CPG_MSSR_H__ */
-- 
cgit v1.2.3


From 9d034e151b407cbd2c66bc4c48b423f814533374 Mon Sep 17 00:00:00 2001
From: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Date: Thu, 17 Jan 2019 14:54:14 +0000
Subject: clk: renesas: r8a774a1: Add missing CANFD clock

This patch adds the missing CANFD clock to the r8a774a1 specific
clock driver.

Signed-off-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Reviewed-by: Chris Paterson <Chris.Paterson2@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
---
 drivers/clk/renesas/r8a774a1-cpg-mssr.c       | 2 ++
 include/dt-bindings/clock/r8a774a1-cpg-mssr.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/renesas/r8a774a1-cpg-mssr.c b/drivers/clk/renesas/r8a774a1-cpg-mssr.c
index 10e852518870..e10374119a91 100644
--- a/drivers/clk/renesas/r8a774a1-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a774a1-cpg-mssr.c
@@ -102,6 +102,7 @@ static const struct cpg_core_clk r8a774a1_core_clks[] __initconst = {
 	DEF_FIXED("cp",         R8A774A1_CLK_CP,    CLK_EXTAL,      2, 1),
 	DEF_FIXED("cpex",       R8A774A1_CLK_CPEX,  CLK_EXTAL,      2, 1),
 
+	DEF_DIV6P1("canfd",     R8A774A1_CLK_CANFD, CLK_PLL1_DIV4, 0x244),
 	DEF_DIV6P1("csi0",      R8A774A1_CLK_CSI0,  CLK_PLL1_DIV4, 0x00c),
 	DEF_DIV6P1("mso",       R8A774A1_CLK_MSO,   CLK_PLL1_DIV4, 0x014),
 	DEF_DIV6P1("hdmi",      R8A774A1_CLK_HDMI,  CLK_PLL1_DIV4, 0x250),
@@ -191,6 +192,7 @@ static const struct mssr_mod_clk r8a774a1_mod_clks[] __initconst = {
 	DEF_MOD("gpio2",		 910,	R8A774A1_CLK_S3D4),
 	DEF_MOD("gpio1",		 911,	R8A774A1_CLK_S3D4),
 	DEF_MOD("gpio0",		 912,	R8A774A1_CLK_S3D4),
+	DEF_MOD("can-fd",		 914,	R8A774A1_CLK_S3D2),
 	DEF_MOD("can-if1",		 915,	R8A774A1_CLK_S3D4),
 	DEF_MOD("can-if0",		 916,	R8A774A1_CLK_S3D4),
 	DEF_MOD("i2c6",			 918,	R8A774A1_CLK_S0D6),
diff --git a/include/dt-bindings/clock/r8a774a1-cpg-mssr.h b/include/dt-bindings/clock/r8a774a1-cpg-mssr.h
index 9bc5d45ff4b5..e355363f40c2 100644
--- a/include/dt-bindings/clock/r8a774a1-cpg-mssr.h
+++ b/include/dt-bindings/clock/r8a774a1-cpg-mssr.h
@@ -54,5 +54,6 @@
 #define R8A774A1_CLK_CPEX		43
 #define R8A774A1_CLK_R			44
 #define R8A774A1_CLK_OSC		45
+#define R8A774A1_CLK_CANFD		46
 
 #endif /* __DT_BINDINGS_CLOCK_R8A774A1_CPG_MSSR_H__ */
-- 
cgit v1.2.3


From a41c4cb913b53bf74f1ec66a4b96057626c87009 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 13 Jan 2019 09:40:21 +0100
Subject: ALSA: pcm: Make PCM linked list consistent while re-grouping

Make a common helper to re-assign the PCM link using list_move() instead
of open code with manual list_del() and list_add_tail().  This assures
the consistency and we can get rid of snd_pcm_group.count field -- its
purpose is only to check whether the list is singular, and we can know
it by list_is_singular() call now.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |  1 -
 sound/core/pcm_native.c | 34 ++++++++++++++++++++--------------
 2 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index d6bd3caf6878..e1c747c70883 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -439,7 +439,6 @@ struct snd_pcm_group {		/* keep linked substreams */
 	spinlock_t lock;
 	struct mutex mutex;
 	struct list_head substreams;
-	int count;
 };
 
 struct pid;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 9e4e289e5703..1a56bb1ad780 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -1131,6 +1131,13 @@ static int snd_pcm_action_single(const struct action_ops *ops,
 	return res;
 }
 
+static void snd_pcm_group_assign(struct snd_pcm_substream *substream,
+				 struct snd_pcm_group *new_group)
+{
+	substream->group = new_group;
+	list_move(&substream->link_list, &new_group->substreams);
+}
+
 /*
  *  Note: call with stream lock
  */
@@ -1995,14 +2002,10 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 		goto _end;
 	}
 	if (!snd_pcm_stream_linked(substream)) {
-		substream->group = group;
+		snd_pcm_group_assign(substream, group);
 		group = NULL;
-		list_add_tail(&substream->link_list, &substream->group->substreams);
-		substream->group->count = 1;
 	}
-	list_add_tail(&substream1->link_list, &substream->group->substreams);
-	substream->group->count++;
-	substream1->group = substream->group;
+	snd_pcm_group_assign(substream1, substream->group);
  _end:
 	write_unlock_irq(&snd_pcm_link_rwlock);
 	up_write(&snd_pcm_link_rwsem);
@@ -2015,14 +2018,13 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 
 static void relink_to_local(struct snd_pcm_substream *substream)
 {
-	substream->group = &substream->self_group;
-	INIT_LIST_HEAD(&substream->self_group.substreams);
-	list_add_tail(&substream->link_list, &substream->self_group.substreams);
+	snd_pcm_group_assign(substream, &substream->self_group);
 }
 
 static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_substream *s;
+	struct snd_pcm_group *group;
 	int res = 0;
 
 	down_write_nonfifo(&snd_pcm_link_rwsem);
@@ -2031,16 +2033,20 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 		res = -EALREADY;
 		goto _end;
 	}
-	list_del(&substream->link_list);
-	substream->group->count--;
-	if (substream->group->count == 1) {	/* detach the last stream, too */
+
+	group = substream->group;
+
+	relink_to_local(substream);
+
+	/* detach the last stream, too */
+	if (list_is_singular(&group->substreams)) {
 		snd_pcm_group_for_each_entry(s, substream) {
 			relink_to_local(s);
 			break;
 		}
-		kfree(substream->group);
+		kfree(group);
 	}
-	relink_to_local(substream);
+
        _end:
 	write_unlock_irq(&snd_pcm_link_rwlock);
 	up_write(&snd_pcm_link_rwsem);
-- 
cgit v1.2.3


From bbe7449e2599b58cf7b995461e2189998111f907 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Mon, 21 Jan 2019 00:54:27 +0000
Subject: fs: common implementation of file type

Many file systems use a copy&paste implementation
of dirent to on-disk file type conversions.

Create a common implementation to be used by file systems
with some useful conversion helpers to reduce open coded
file type conversions in file system code.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 MAINTAINERS              |   1 +
 fs/Makefile              |   3 +-
 fs/fs_types.c            | 105 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h       |  17 +-------
 include/linux/fs_types.h |  75 +++++++++++++++++++++++++++++++++
 5 files changed, 184 insertions(+), 17 deletions(-)
 create mode 100644 fs/fs_types.c
 create mode 100644 include/linux/fs_types.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 51029a425dbe..0afaaf0aa6be 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5881,6 +5881,7 @@ L:	linux-fsdevel@vger.kernel.org
 S:	Maintained
 F:	fs/*
 F:	include/linux/fs.h
+F:	include/linux/fs_types.h
 F:	include/uapi/linux/fs.h
 
 FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER
diff --git a/fs/Makefile b/fs/Makefile
index 293733f61594..23fcd8c164a3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -12,7 +12,8 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o d_path.o \
-		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
+		fs_types.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/fs_types.c b/fs/fs_types.c
new file mode 100644
index 000000000000..78365e5dc08c
--- /dev/null
+++ b/fs/fs_types.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/fs.h>
+#include <linux/export.h>
+
+/*
+ * fs on-disk file type to dirent file type conversion
+ */
+static const unsigned char fs_dtype_by_ftype[FT_MAX] = {
+	[FT_UNKNOWN]	= DT_UNKNOWN,
+	[FT_REG_FILE]	= DT_REG,
+	[FT_DIR]	= DT_DIR,
+	[FT_CHRDEV]	= DT_CHR,
+	[FT_BLKDEV]	= DT_BLK,
+	[FT_FIFO]	= DT_FIFO,
+	[FT_SOCK]	= DT_SOCK,
+	[FT_SYMLINK]	= DT_LNK
+};
+
+/**
+ * fs_ftype_to_dtype() - fs on-disk file type to dirent type.
+ * @filetype: The on-disk file type to convert.
+ *
+ * This function converts the on-disk file type value (FT_*) to the directory
+ * entry type (DT_*).
+ *
+ * Context: Any context.
+ * Return:
+ * * DT_UNKNOWN		- Unknown type
+ * * DT_FIFO		- FIFO
+ * * DT_CHR		- Character device
+ * * DT_DIR		- Directory
+ * * DT_BLK		- Block device
+ * * DT_REG		- Regular file
+ * * DT_LNK		- Symbolic link
+ * * DT_SOCK		- Local-domain socket
+ */
+unsigned char fs_ftype_to_dtype(unsigned int filetype)
+{
+	if (filetype >= FT_MAX)
+		return DT_UNKNOWN;
+
+	return fs_dtype_by_ftype[filetype];
+}
+EXPORT_SYMBOL_GPL(fs_ftype_to_dtype);
+
+/*
+ * dirent file type to fs on-disk file type conversion
+ * Values not initialized explicitly are FT_UNKNOWN (0).
+ */
+static const unsigned char fs_ftype_by_dtype[DT_MAX] = {
+	[DT_REG]	= FT_REG_FILE,
+	[DT_DIR]	= FT_DIR,
+	[DT_LNK]	= FT_SYMLINK,
+	[DT_CHR]	= FT_CHRDEV,
+	[DT_BLK]	= FT_BLKDEV,
+	[DT_FIFO]	= FT_FIFO,
+	[DT_SOCK]	= FT_SOCK,
+};
+
+/**
+ * fs_umode_to_ftype() - file mode to on-disk file type.
+ * @mode: The file mode to convert.
+ *
+ * This function converts the file mode value to the on-disk file type (FT_*).
+ *
+ * Context: Any context.
+ * Return:
+ * * FT_UNKNOWN		- Unknown type
+ * * FT_REG_FILE	- Regular file
+ * * FT_DIR		- Directory
+ * * FT_CHRDEV		- Character device
+ * * FT_BLKDEV		- Block device
+ * * FT_FIFO		- FIFO
+ * * FT_SOCK		- Local-domain socket
+ * * FT_SYMLINK		- Symbolic link
+ */
+unsigned char fs_umode_to_ftype(umode_t mode)
+{
+	return fs_ftype_by_dtype[S_DT(mode)];
+}
+EXPORT_SYMBOL_GPL(fs_umode_to_ftype);
+
+/**
+ * fs_umode_to_dtype() - file mode to dirent file type.
+ * @mode: The file mode to convert.
+ *
+ * This function converts the file mode value to the directory
+ * entry type (DT_*).
+ *
+ * Context: Any context.
+ * Return:
+ * * DT_UNKNOWN		- Unknown type
+ * * DT_FIFO		- FIFO
+ * * DT_CHR		- Character device
+ * * DT_DIR		- Directory
+ * * DT_BLK		- Block device
+ * * DT_REG		- Regular file
+ * * DT_LNK		- Symbolic link
+ * * DT_SOCK		- Local-domain socket
+ */
+unsigned char fs_umode_to_dtype(umode_t mode)
+{
+	return fs_ftype_to_dtype(fs_umode_to_ftype(mode));
+}
+EXPORT_SYMBOL_GPL(fs_umode_to_dtype);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 811c77743dad..92966678539d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -37,6 +37,7 @@
 #include <linux/uuid.h>
 #include <linux/errseq.h>
 #include <linux/ioprio.h>
+#include <linux/fs_types.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1699,22 +1700,6 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
 			    u64 phys, u64 len, u32 flags);
 int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 
-/*
- * File types
- *
- * NOTE! These match bits 12..15 of stat.st_mode
- * (ie "(i_mode >> 12) & 15").
- */
-#define DT_UNKNOWN	0
-#define DT_FIFO		1
-#define DT_CHR		2
-#define DT_DIR		4
-#define DT_BLK		6
-#define DT_REG		8
-#define DT_LNK		10
-#define DT_SOCK		12
-#define DT_WHT		14
-
 /*
  * This is the "filldir" function type, used by readdir() to let
  * the kernel specify what kind of dirent layout it wants to have.
diff --git a/include/linux/fs_types.h b/include/linux/fs_types.h
new file mode 100644
index 000000000000..54816791196f
--- /dev/null
+++ b/include/linux/fs_types.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FS_TYPES_H
+#define _LINUX_FS_TYPES_H
+
+/*
+ * This is a header for the common implementation of dirent
+ * to fs on-disk file type conversion.  Although the fs on-disk
+ * bits are specific to every file system, in practice, many
+ * file systems use the exact same on-disk format to describe
+ * the lower 3 file type bits that represent the 7 POSIX file
+ * types.
+ *
+ * It is important to note that the definitions in this
+ * header MUST NOT change. This would break both the
+ * userspace ABI and the on-disk format of filesystems
+ * using this code.
+ *
+ * All those file systems can use this generic code for the
+ * conversions.
+ */
+
+/*
+ * struct dirent file types
+ * exposed to user via getdents(2), readdir(3)
+ *
+ * These match bits 12..15 of stat.st_mode
+ * (ie "(i_mode >> 12) & 15").
+ */
+#define S_DT_SHIFT	12
+#define S_DT(mode)	(((mode) & S_IFMT) >> S_DT_SHIFT)
+#define S_DT_MASK	(S_IFMT >> S_DT_SHIFT)
+
+/* these are defined by POSIX and also present in glibc's dirent.h */
+#define DT_UNKNOWN	0
+#define DT_FIFO		1
+#define DT_CHR		2
+#define DT_DIR		4
+#define DT_BLK		6
+#define DT_REG		8
+#define DT_LNK		10
+#define DT_SOCK		12
+#define DT_WHT		14
+
+#define DT_MAX		(S_DT_MASK + 1) /* 16 */
+
+/*
+ * fs on-disk file types.
+ * Only the low 3 bits are used for the POSIX file types.
+ * Other bits are reserved for fs private use.
+ * These definitions are shared and used by multiple filesystems,
+ * and MUST NOT change under any circumstances.
+ *
+ * Note that no fs currently stores the whiteout type on-disk,
+ * so whiteout dirents are exposed to user as DT_CHR.
+ */
+#define FT_UNKNOWN	0
+#define FT_REG_FILE	1
+#define FT_DIR		2
+#define FT_CHRDEV	3
+#define FT_BLKDEV	4
+#define FT_FIFO		5
+#define FT_SOCK		6
+#define FT_SYMLINK	7
+
+#define FT_MAX		8
+
+/*
+ * declarations for helper functions, accompanying implementation
+ * is in fs/fs_types.c
+ */
+extern unsigned char fs_ftype_to_dtype(unsigned int filetype);
+extern unsigned char fs_umode_to_ftype(umode_t mode);
+extern unsigned char fs_umode_to_dtype(umode_t mode);
+
+#endif
-- 
cgit v1.2.3


From 386a35eb70569b3158392eb573fe42589a669da4 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 17 Jan 2019 11:33:42 -0200
Subject: media: tw9910.h: remove obsolete soc_camera.h include.

This include isn't use anymore, so drop it.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/i2c/tw9910.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/media/i2c/tw9910.h b/include/media/i2c/tw9910.h
index bec8f7bce745..2f93799d5a21 100644
--- a/include/media/i2c/tw9910.h
+++ b/include/media/i2c/tw9910.h
@@ -16,8 +16,6 @@
 #ifndef __TW9910_H__
 #define __TW9910_H__
 
-#include <media/soc_camera.h>
-
 /**
  * tw9910_mpout_pin - MPOUT (multi-purpose output) pin functions
  */
-- 
cgit v1.2.3


From 43a445f188e1a4d58e1be052b23afc9aacccbdd8 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 17 Jan 2019 11:41:23 -0200
Subject: media: sh_mobile_ceu_camera: remove obsolete soc_camera driver

This driver got converted to not depend on soc_camera in commit
32e5a70dc8f4 ("media: platform: Add Renesas CEU driver").

There's no sense in keeping the old version there.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/soc_camera/Kconfig          |    9 -
 drivers/media/platform/soc_camera/Makefile         |    3 -
 .../platform/soc_camera/sh_mobile_ceu_camera.c     | 1810 --------------------
 include/media/drv-intf/sh_mobile_ceu.h             |   29 -
 4 files changed, 1851 deletions(-)
 delete mode 100644 drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
 delete mode 100644 include/media/drv-intf/sh_mobile_ceu.h

(limited to 'include')

diff --git a/drivers/media/platform/soc_camera/Kconfig b/drivers/media/platform/soc_camera/Kconfig
index 669d116b8f09..94907d0611d6 100644
--- a/drivers/media/platform/soc_camera/Kconfig
+++ b/drivers/media/platform/soc_camera/Kconfig
@@ -15,12 +15,3 @@ config SOC_CAMERA_PLATFORM
 	depends on SOC_CAMERA
 	help
 	  This is a generic SoC camera platform driver, useful for testing
-
-config VIDEO_SH_MOBILE_CEU
-	tristate "SuperH Mobile CEU Interface driver"
-	depends on VIDEO_DEV && SOC_CAMERA && HAVE_CLK
-	depends on ARCH_SHMOBILE || COMPILE_TEST
-	select VIDEOBUF2_DMA_CONTIG
-	select SOC_CAMERA_SCALE_CROP
-	---help---
-	  This is a v4l2 driver for the SuperH Mobile CEU Interface
diff --git a/drivers/media/platform/soc_camera/Makefile b/drivers/media/platform/soc_camera/Makefile
index 07a451e8b228..aa4b855a0882 100644
--- a/drivers/media/platform/soc_camera/Makefile
+++ b/drivers/media/platform/soc_camera/Makefile
@@ -4,6 +4,3 @@ obj-$(CONFIG_SOC_CAMERA_SCALE_CROP)	+= soc_scale_crop.o
 # a platform subdevice driver stub, allowing to support cameras by adding a
 # couple of callback functions to the board code
 obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
-
-# soc-camera host drivers have to be linked after camera drivers
-obj-$(CONFIG_VIDEO_SH_MOBILE_CEU)	+= sh_mobile_ceu_camera.o
diff --git a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
deleted file mode 100644
index 6803f744e307..000000000000
--- a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
+++ /dev/null
@@ -1,1810 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * V4L2 Driver for SuperH Mobile CEU interface
- *
- * Copyright (C) 2008 Magnus Damm
- *
- * Based on V4L2 Driver for PXA camera host - "pxa_camera.c",
- *
- * Copyright (C) 2006, Sascha Hauer, Pengutronix
- * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/io.h>
-#include <linux/completion.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/time.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/videodev2.h>
-#include <linux/pm_runtime.h>
-#include <linux/sched.h>
-
-#include <media/v4l2-async.h>
-#include <media/v4l2-common.h>
-#include <media/v4l2-dev.h>
-#include <media/soc_camera.h>
-#include <media/drv-intf/sh_mobile_ceu.h>
-#include <media/videobuf2-dma-contig.h>
-#include <media/v4l2-mediabus.h>
-#include <media/drv-intf/soc_mediabus.h>
-
-#include "soc_scale_crop.h"
-
-/* register offsets for sh7722 / sh7723 */
-
-#define CAPSR  0x00 /* Capture start register */
-#define CAPCR  0x04 /* Capture control register */
-#define CAMCR  0x08 /* Capture interface control register */
-#define CMCYR  0x0c /* Capture interface cycle  register */
-#define CAMOR  0x10 /* Capture interface offset register */
-#define CAPWR  0x14 /* Capture interface width register */
-#define CAIFR  0x18 /* Capture interface input format register */
-#define CSTCR  0x20 /* Camera strobe control register (<= sh7722) */
-#define CSECR  0x24 /* Camera strobe emission count register (<= sh7722) */
-#define CRCNTR 0x28 /* CEU register control register */
-#define CRCMPR 0x2c /* CEU register forcible control register */
-#define CFLCR  0x30 /* Capture filter control register */
-#define CFSZR  0x34 /* Capture filter size clip register */
-#define CDWDR  0x38 /* Capture destination width register */
-#define CDAYR  0x3c /* Capture data address Y register */
-#define CDACR  0x40 /* Capture data address C register */
-#define CDBYR  0x44 /* Capture data bottom-field address Y register */
-#define CDBCR  0x48 /* Capture data bottom-field address C register */
-#define CBDSR  0x4c /* Capture bundle destination size register */
-#define CFWCR  0x5c /* Firewall operation control register */
-#define CLFCR  0x60 /* Capture low-pass filter control register */
-#define CDOCR  0x64 /* Capture data output control register */
-#define CDDCR  0x68 /* Capture data complexity level register */
-#define CDDAR  0x6c /* Capture data complexity level address register */
-#define CEIER  0x70 /* Capture event interrupt enable register */
-#define CETCR  0x74 /* Capture event flag clear register */
-#define CSTSR  0x7c /* Capture status register */
-#define CSRTR  0x80 /* Capture software reset register */
-#define CDSSR  0x84 /* Capture data size register */
-#define CDAYR2 0x90 /* Capture data address Y register 2 */
-#define CDACR2 0x94 /* Capture data address C register 2 */
-#define CDBYR2 0x98 /* Capture data bottom-field address Y register 2 */
-#define CDBCR2 0x9c /* Capture data bottom-field address C register 2 */
-
-#undef DEBUG_GEOMETRY
-#ifdef DEBUG_GEOMETRY
-#define dev_geo	dev_info
-#else
-#define dev_geo	dev_dbg
-#endif
-
-/* per video frame buffer */
-struct sh_mobile_ceu_buffer {
-	struct vb2_v4l2_buffer vb; /* v4l buffer must be first */
-	struct list_head queue;
-};
-
-struct sh_mobile_ceu_dev {
-	struct soc_camera_host ici;
-
-	unsigned int irq;
-	void __iomem *base;
-	size_t video_limit;
-	size_t buf_total;
-
-	spinlock_t lock;		/* Protects video buffer lists */
-	struct list_head capture;
-	struct vb2_v4l2_buffer *active;
-
-	struct sh_mobile_ceu_info *pdata;
-	struct completion complete;
-
-	u32 cflcr;
-
-	/* static max sizes either from platform data or default */
-	int max_width;
-	int max_height;
-
-	enum v4l2_field field;
-	int sequence;
-	unsigned long flags;
-
-	unsigned int image_mode:1;
-	unsigned int is_16bit:1;
-	unsigned int frozen:1;
-};
-
-struct sh_mobile_ceu_cam {
-	/* CEU offsets within the camera output, before the CEU scaler */
-	unsigned int ceu_left;
-	unsigned int ceu_top;
-	/* Client output, as seen by the CEU */
-	unsigned int width;
-	unsigned int height;
-	/*
-	 * User window from S_SELECTION / G_SELECTION, produced by client cropping and
-	 * scaling, CEU scaling and CEU cropping, mapped back onto the client
-	 * input window
-	 */
-	struct v4l2_rect subrect;
-	/* Camera cropping rectangle */
-	struct v4l2_rect rect;
-	const struct soc_mbus_pixelfmt *extra_fmt;
-	u32 code;
-};
-
-static struct sh_mobile_ceu_buffer *to_ceu_vb(struct vb2_v4l2_buffer *vbuf)
-{
-	return container_of(vbuf, struct sh_mobile_ceu_buffer, vb);
-}
-
-static void ceu_write(struct sh_mobile_ceu_dev *priv,
-		      unsigned long reg_offs, u32 data)
-{
-	iowrite32(data, priv->base + reg_offs);
-}
-
-static u32 ceu_read(struct sh_mobile_ceu_dev *priv, unsigned long reg_offs)
-{
-	return ioread32(priv->base + reg_offs);
-}
-
-static int sh_mobile_ceu_soft_reset(struct sh_mobile_ceu_dev *pcdev)
-{
-	int i, success = 0;
-
-	ceu_write(pcdev, CAPSR, 1 << 16); /* reset */
-
-	/* wait CSTSR.CPTON bit */
-	for (i = 0; i < 1000; i++) {
-		if (!(ceu_read(pcdev, CSTSR) & 1)) {
-			success++;
-			break;
-		}
-		udelay(1);
-	}
-
-	/* wait CAPSR.CPKIL bit */
-	for (i = 0; i < 1000; i++) {
-		if (!(ceu_read(pcdev, CAPSR) & (1 << 16))) {
-			success++;
-			break;
-		}
-		udelay(1);
-	}
-
-	if (2 != success) {
-		dev_warn(pcdev->ici.v4l2_dev.dev, "soft reset time out\n");
-		return -EIO;
-	}
-
-	return 0;
-}
-
-/*
- *  Videobuf operations
- */
-
-/*
- * .queue_setup() is called to check, whether the driver can accept the
- *		  requested number of buffers and to fill in plane sizes
- *		  for the current frame format if required
- */
-static int sh_mobile_ceu_videobuf_setup(struct vb2_queue *vq,
-			unsigned int *count, unsigned int *num_planes,
-			unsigned int sizes[], struct device *alloc_devs[])
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vq);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	if (!vq->num_buffers)
-		pcdev->sequence = 0;
-
-	if (!*count)
-		*count = 2;
-
-	/* Called from VIDIOC_REQBUFS or in compatibility mode */
-	if (!*num_planes)
-		sizes[0] = icd->sizeimage;
-	else if (sizes[0] < icd->sizeimage)
-		return -EINVAL;
-
-	/* If *num_planes != 0, we have already verified *count. */
-	if (pcdev->video_limit) {
-		size_t size = PAGE_ALIGN(sizes[0]) * *count;
-
-		if (size + pcdev->buf_total > pcdev->video_limit)
-			*count = (pcdev->video_limit - pcdev->buf_total) /
-				PAGE_ALIGN(sizes[0]);
-	}
-
-	*num_planes = 1;
-
-	dev_dbg(icd->parent, "count=%d, size=%u\n", *count, sizes[0]);
-
-	return 0;
-}
-
-#define CEU_CETCR_MAGIC 0x0317f313 /* acknowledge magical interrupt sources */
-#define CEU_CETCR_IGRW (1 << 4) /* prohibited register access interrupt bit */
-#define CEU_CEIER_CPEIE (1 << 0) /* one-frame capture end interrupt */
-#define CEU_CEIER_VBP   (1 << 20) /* vbp error */
-#define CEU_CAPCR_CTNCP (1 << 16) /* continuous capture mode (if set) */
-#define CEU_CEIER_MASK (CEU_CEIER_CPEIE | CEU_CEIER_VBP)
-
-
-/*
- * return value doesn't reflex the success/failure to queue the new buffer,
- * but rather the status of the previous buffer.
- */
-static int sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
-{
-	struct soc_camera_device *icd = pcdev->ici.icd;
-	dma_addr_t phys_addr_top, phys_addr_bottom;
-	unsigned long top1, top2;
-	unsigned long bottom1, bottom2;
-	u32 status;
-	bool planar;
-	int ret = 0;
-
-	/*
-	 * The hardware is _very_ picky about this sequence. Especially
-	 * the CEU_CETCR_MAGIC value. It seems like we need to acknowledge
-	 * several not-so-well documented interrupt sources in CETCR.
-	 */
-	ceu_write(pcdev, CEIER, ceu_read(pcdev, CEIER) & ~CEU_CEIER_MASK);
-	status = ceu_read(pcdev, CETCR);
-	ceu_write(pcdev, CETCR, ~status & CEU_CETCR_MAGIC);
-	if (!pcdev->frozen)
-		ceu_write(pcdev, CEIER, ceu_read(pcdev, CEIER) | CEU_CEIER_MASK);
-	ceu_write(pcdev, CAPCR, ceu_read(pcdev, CAPCR) & ~CEU_CAPCR_CTNCP);
-	ceu_write(pcdev, CETCR, CEU_CETCR_MAGIC ^ CEU_CETCR_IGRW);
-
-	/*
-	 * When a VBP interrupt occurs, a capture end interrupt does not occur
-	 * and the image of that frame is not captured correctly. So, soft reset
-	 * is needed here.
-	 */
-	if (status & CEU_CEIER_VBP) {
-		sh_mobile_ceu_soft_reset(pcdev);
-		ret = -EIO;
-	}
-
-	if (pcdev->frozen) {
-		complete(&pcdev->complete);
-		return ret;
-	}
-
-	if (!pcdev->active)
-		return ret;
-
-	if (V4L2_FIELD_INTERLACED_BT == pcdev->field) {
-		top1	= CDBYR;
-		top2	= CDBCR;
-		bottom1	= CDAYR;
-		bottom2	= CDACR;
-	} else {
-		top1	= CDAYR;
-		top2	= CDACR;
-		bottom1	= CDBYR;
-		bottom2	= CDBCR;
-	}
-
-	phys_addr_top =
-		vb2_dma_contig_plane_dma_addr(&pcdev->active->vb2_buf, 0);
-
-	switch (icd->current_fmt->host_fmt->fourcc) {
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV21:
-	case V4L2_PIX_FMT_NV16:
-	case V4L2_PIX_FMT_NV61:
-		planar = true;
-		break;
-	default:
-		planar = false;
-	}
-
-	ceu_write(pcdev, top1, phys_addr_top);
-	if (V4L2_FIELD_NONE != pcdev->field) {
-		phys_addr_bottom = phys_addr_top + icd->bytesperline;
-		ceu_write(pcdev, bottom1, phys_addr_bottom);
-	}
-
-	if (planar) {
-		phys_addr_top += icd->bytesperline * icd->user_height;
-		ceu_write(pcdev, top2, phys_addr_top);
-		if (V4L2_FIELD_NONE != pcdev->field) {
-			phys_addr_bottom = phys_addr_top + icd->bytesperline;
-			ceu_write(pcdev, bottom2, phys_addr_bottom);
-		}
-	}
-
-	ceu_write(pcdev, CAPSR, 0x1); /* start capture */
-
-	return ret;
-}
-
-static int sh_mobile_ceu_videobuf_prepare(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct sh_mobile_ceu_buffer *buf = to_ceu_vb(vbuf);
-
-	/* Added list head initialization on alloc */
-	WARN(!list_empty(&buf->queue), "Buffer %p on queue!\n", vb);
-
-	return 0;
-}
-
-static void sh_mobile_ceu_videobuf_queue(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct sh_mobile_ceu_buffer *buf = to_ceu_vb(vbuf);
-	unsigned long size;
-
-	size = icd->sizeimage;
-
-	if (vb2_plane_size(vb, 0) < size) {
-		dev_err(icd->parent, "Buffer #%d too small (%lu < %lu)\n",
-			vb->index, vb2_plane_size(vb, 0), size);
-		goto error;
-	}
-
-	vb2_set_plane_payload(vb, 0, size);
-
-	dev_dbg(icd->parent, "%s (vb=0x%p) 0x%p %lu\n", __func__,
-		vb, vb2_plane_vaddr(vb, 0), vb2_get_plane_payload(vb, 0));
-
-#ifdef DEBUG
-	/*
-	 * This can be useful if you want to see if we actually fill
-	 * the buffer with something
-	 */
-	if (vb2_plane_vaddr(vb, 0))
-		memset(vb2_plane_vaddr(vb, 0), 0xaa, vb2_get_plane_payload(vb, 0));
-#endif
-
-	spin_lock_irq(&pcdev->lock);
-	list_add_tail(&buf->queue, &pcdev->capture);
-
-	if (!pcdev->active) {
-		/*
-		 * Because there were no active buffer at this moment,
-		 * we are not interested in the return value of
-		 * sh_mobile_ceu_capture here.
-		 */
-		pcdev->active = vbuf;
-		sh_mobile_ceu_capture(pcdev);
-	}
-	spin_unlock_irq(&pcdev->lock);
-
-	return;
-
-error:
-	vb2_buffer_done(vb, VB2_BUF_STATE_ERROR);
-}
-
-static void sh_mobile_ceu_videobuf_release(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_buffer *buf = to_ceu_vb(vbuf);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	spin_lock_irq(&pcdev->lock);
-
-	if (pcdev->active == vbuf) {
-		/* disable capture (release DMA buffer), reset */
-		ceu_write(pcdev, CAPSR, 1 << 16);
-		pcdev->active = NULL;
-	}
-
-	/*
-	 * Doesn't hurt also if the list is empty, but it hurts, if queuing the
-	 * buffer failed, and .buf_init() hasn't been called
-	 */
-	if (buf->queue.next)
-		list_del_init(&buf->queue);
-
-	pcdev->buf_total -= PAGE_ALIGN(vb2_plane_size(vb, 0));
-	dev_dbg(icd->parent, "%s() %zu bytes buffers\n", __func__,
-		pcdev->buf_total);
-
-	spin_unlock_irq(&pcdev->lock);
-}
-
-static int sh_mobile_ceu_videobuf_init(struct vb2_buffer *vb)
-{
-	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
-	struct soc_camera_device *icd = soc_camera_from_vb2q(vb->vb2_queue);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	pcdev->buf_total += PAGE_ALIGN(vb2_plane_size(vb, 0));
-	dev_dbg(icd->parent, "%s() %zu bytes buffers\n", __func__,
-		pcdev->buf_total);
-
-	/* This is for locking debugging only */
-	INIT_LIST_HEAD(&to_ceu_vb(vbuf)->queue);
-	return 0;
-}
-
-static void sh_mobile_ceu_stop_streaming(struct vb2_queue *q)
-{
-	struct soc_camera_device *icd = soc_camera_from_vb2q(q);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct list_head *buf_head, *tmp;
-	struct vb2_v4l2_buffer *vbuf;
-
-	spin_lock_irq(&pcdev->lock);
-
-	pcdev->active = NULL;
-
-	list_for_each_safe(buf_head, tmp, &pcdev->capture) {
-		vbuf = &list_entry(buf_head, struct sh_mobile_ceu_buffer,
-				   queue)->vb;
-		vb2_buffer_done(&vbuf->vb2_buf, VB2_BUF_STATE_DONE);
-		list_del_init(buf_head);
-	}
-
-	spin_unlock_irq(&pcdev->lock);
-
-	sh_mobile_ceu_soft_reset(pcdev);
-}
-
-static const struct vb2_ops sh_mobile_ceu_videobuf_ops = {
-	.queue_setup	= sh_mobile_ceu_videobuf_setup,
-	.buf_prepare	= sh_mobile_ceu_videobuf_prepare,
-	.buf_queue	= sh_mobile_ceu_videobuf_queue,
-	.buf_cleanup	= sh_mobile_ceu_videobuf_release,
-	.buf_init	= sh_mobile_ceu_videobuf_init,
-	.wait_prepare	= vb2_ops_wait_prepare,
-	.wait_finish	= vb2_ops_wait_finish,
-	.stop_streaming	= sh_mobile_ceu_stop_streaming,
-};
-
-static irqreturn_t sh_mobile_ceu_irq(int irq, void *data)
-{
-	struct sh_mobile_ceu_dev *pcdev = data;
-	struct vb2_v4l2_buffer *vbuf;
-	int ret;
-
-	spin_lock(&pcdev->lock);
-
-	vbuf = pcdev->active;
-	if (!vbuf)
-		/* Stale interrupt from a released buffer */
-		goto out;
-
-	list_del_init(&to_ceu_vb(vbuf)->queue);
-
-	if (!list_empty(&pcdev->capture))
-		pcdev->active = &list_entry(pcdev->capture.next,
-					    struct sh_mobile_ceu_buffer, queue)->vb;
-	else
-		pcdev->active = NULL;
-
-	ret = sh_mobile_ceu_capture(pcdev);
-	vbuf->vb2_buf.timestamp = ktime_get_ns();
-	if (!ret) {
-		vbuf->field = pcdev->field;
-		vbuf->sequence = pcdev->sequence++;
-	}
-	vb2_buffer_done(&vbuf->vb2_buf,
-			ret < 0 ? VB2_BUF_STATE_ERROR : VB2_BUF_STATE_DONE);
-
-out:
-	spin_unlock(&pcdev->lock);
-
-	return IRQ_HANDLED;
-}
-
-static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent,
-		 "SuperH Mobile CEU driver attached to camera %d\n",
-		 icd->devnum);
-
-	return 0;
-}
-
-static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
-{
-	dev_info(icd->parent,
-		 "SuperH Mobile CEU driver detached from camera %d\n",
-		 icd->devnum);
-}
-
-/* Called with .host_lock held */
-static int sh_mobile_ceu_clock_start(struct soc_camera_host *ici)
-{
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	pm_runtime_get_sync(ici->v4l2_dev.dev);
-
-	pcdev->buf_total = 0;
-
-	sh_mobile_ceu_soft_reset(pcdev);
-
-	return 0;
-}
-
-/* Called with .host_lock held */
-static void sh_mobile_ceu_clock_stop(struct soc_camera_host *ici)
-{
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	/* disable capture, disable interrupts */
-	ceu_write(pcdev, CEIER, 0);
-	sh_mobile_ceu_soft_reset(pcdev);
-
-	/* make sure active buffer is canceled */
-	spin_lock_irq(&pcdev->lock);
-	if (pcdev->active) {
-		list_del_init(&to_ceu_vb(pcdev->active)->queue);
-		vb2_buffer_done(&pcdev->active->vb2_buf, VB2_BUF_STATE_ERROR);
-		pcdev->active = NULL;
-	}
-	spin_unlock_irq(&pcdev->lock);
-
-	pm_runtime_put(ici->v4l2_dev.dev);
-}
-
-/*
- * See chapter 29.4.12 "Capture Filter Control Register (CFLCR)"
- * in SH7722 Hardware Manual
- */
-static unsigned int size_dst(unsigned int src, unsigned int scale)
-{
-	unsigned int mant_pre = scale >> 12;
-	if (!src || !scale)
-		return src;
-	return ((mant_pre + 2 * (src - 1)) / (2 * mant_pre) - 1) *
-		mant_pre * 4096 / scale + 1;
-}
-
-static u16 calc_scale(unsigned int src, unsigned int *dst)
-{
-	u16 scale;
-
-	if (src == *dst)
-		return 0;
-
-	scale = (src * 4096 / *dst) & ~7;
-
-	while (scale > 4096 && size_dst(src, scale) < *dst)
-		scale -= 8;
-
-	*dst = size_dst(src, scale);
-
-	return scale;
-}
-
-/* rect is guaranteed to not exceed the scaled camera rectangle */
-static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	unsigned int height, width, cdwdr_width, in_width, in_height;
-	unsigned int left_offset, top_offset;
-	u32 camor;
-
-	dev_geo(icd->parent, "Crop %ux%u@%u:%u\n",
-		icd->user_width, icd->user_height, cam->ceu_left, cam->ceu_top);
-
-	left_offset	= cam->ceu_left;
-	top_offset	= cam->ceu_top;
-
-	WARN_ON(icd->user_width & 3 || icd->user_height & 3);
-
-	width = icd->user_width;
-
-	if (pcdev->image_mode) {
-		in_width = cam->width;
-		if (!pcdev->is_16bit) {
-			in_width *= 2;
-			left_offset *= 2;
-		}
-	} else {
-		unsigned int w_factor;
-
-		switch (icd->current_fmt->host_fmt->packing) {
-		case SOC_MBUS_PACKING_2X8_PADHI:
-			w_factor = 2;
-			break;
-		default:
-			w_factor = 1;
-		}
-
-		in_width = cam->width * w_factor;
-		left_offset *= w_factor;
-	}
-
-	cdwdr_width = icd->bytesperline;
-
-	height = icd->user_height;
-	in_height = cam->height;
-	if (V4L2_FIELD_NONE != pcdev->field) {
-		height = (height / 2) & ~3;
-		in_height /= 2;
-		top_offset /= 2;
-		cdwdr_width *= 2;
-	}
-
-	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
-	camor = left_offset | (top_offset << 16);
-
-	dev_geo(icd->parent,
-		"CAMOR 0x%x, CAPWR 0x%x, CFSZR 0x%x, CDWDR 0x%x\n", camor,
-		(in_height << 16) | in_width, (height << 16) | width,
-		cdwdr_width);
-
-	ceu_write(pcdev, CAMOR, camor);
-	ceu_write(pcdev, CAPWR, (in_height << 16) | in_width);
-	/* CFSZR clipping is applied _after_ the scaling filter (CFLCR) */
-	ceu_write(pcdev, CFSZR, (height << 16) | width);
-	ceu_write(pcdev, CDWDR, cdwdr_width);
-}
-
-static u32 capture_save_reset(struct sh_mobile_ceu_dev *pcdev)
-{
-	u32 capsr = ceu_read(pcdev, CAPSR);
-	ceu_write(pcdev, CAPSR, 1 << 16); /* reset, stop capture */
-	return capsr;
-}
-
-static void capture_restore(struct sh_mobile_ceu_dev *pcdev, u32 capsr)
-{
-	unsigned long timeout = jiffies + 10 * HZ;
-
-	/*
-	 * Wait until the end of the current frame. It can take a long time,
-	 * but if it has been aborted by a CAPSR reset, it shoule exit sooner.
-	 */
-	while ((ceu_read(pcdev, CSTSR) & 1) && time_before(jiffies, timeout))
-		msleep(1);
-
-	if (time_after(jiffies, timeout)) {
-		dev_err(pcdev->ici.v4l2_dev.dev,
-			"Timeout waiting for frame end! Interface problem?\n");
-		return;
-	}
-
-	/* Wait until reset clears, this shall not hang... */
-	while (ceu_read(pcdev, CAPSR) & (1 << 16))
-		udelay(10);
-
-	/* Anything to restore? */
-	if (capsr & ~(1 << 16))
-		ceu_write(pcdev, CAPSR, capsr);
-}
-
-#define CEU_BUS_FLAGS (V4L2_MBUS_MASTER |	\
-		V4L2_MBUS_PCLK_SAMPLE_RISING |	\
-		V4L2_MBUS_HSYNC_ACTIVE_HIGH |	\
-		V4L2_MBUS_HSYNC_ACTIVE_LOW |	\
-		V4L2_MBUS_VSYNC_ACTIVE_HIGH |	\
-		V4L2_MBUS_VSYNC_ACTIVE_LOW |	\
-		V4L2_MBUS_DATA_ACTIVE_HIGH)
-
-/* Capture is not running, no interrupts, no locking needed */
-static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	unsigned long value, common_flags = CEU_BUS_FLAGS;
-	u32 capsr = capture_save_reset(pcdev);
-	unsigned int yuv_lineskip;
-	int ret;
-
-	/*
-	 * If the client doesn't implement g_mbus_config, we just use our
-	 * platform data
-	 */
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret) {
-		common_flags = soc_mbus_config_compatible(&cfg,
-							  common_flags);
-		if (!common_flags)
-			return -EINVAL;
-	} else if (ret != -ENOIOCTLCMD) {
-		return ret;
-	}
-
-	/* Make choises, based on platform preferences */
-	if ((common_flags & V4L2_MBUS_HSYNC_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW)) {
-		if (pcdev->flags & SH_CEU_FLAG_HSYNC_LOW)
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~V4L2_MBUS_HSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & V4L2_MBUS_VSYNC_ACTIVE_HIGH) &&
-	    (common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW)) {
-		if (pcdev->flags & SH_CEU_FLAG_VSYNC_LOW)
-			common_flags &= ~V4L2_MBUS_VSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~V4L2_MBUS_VSYNC_ACTIVE_LOW;
-	}
-
-	cfg.flags = common_flags;
-	ret = v4l2_subdev_call(sd, video, s_mbus_config, &cfg);
-	if (ret < 0 && ret != -ENOIOCTLCMD)
-		return ret;
-
-	if (icd->current_fmt->host_fmt->bits_per_sample > 8)
-		pcdev->is_16bit = 1;
-	else
-		pcdev->is_16bit = 0;
-
-	ceu_write(pcdev, CRCNTR, 0);
-	ceu_write(pcdev, CRCMPR, 0);
-
-	value = 0x00000010; /* data fetch by default */
-	yuv_lineskip = 0x10;
-
-	switch (icd->current_fmt->host_fmt->fourcc) {
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV21:
-		/* convert 4:2:2 -> 4:2:0 */
-		yuv_lineskip = 0; /* skip for NV12/21, no skip for NV16/61 */
-		/* fall-through */
-	case V4L2_PIX_FMT_NV16:
-	case V4L2_PIX_FMT_NV61:
-		switch (cam->code) {
-		case MEDIA_BUS_FMT_UYVY8_2X8:
-			value = 0x00000000; /* Cb0, Y0, Cr0, Y1 */
-			break;
-		case MEDIA_BUS_FMT_VYUY8_2X8:
-			value = 0x00000100; /* Cr0, Y0, Cb0, Y1 */
-			break;
-		case MEDIA_BUS_FMT_YUYV8_2X8:
-			value = 0x00000200; /* Y0, Cb0, Y1, Cr0 */
-			break;
-		case MEDIA_BUS_FMT_YVYU8_2X8:
-			value = 0x00000300; /* Y0, Cr0, Y1, Cb0 */
-			break;
-		default:
-			BUG();
-		}
-	}
-
-	if (icd->current_fmt->host_fmt->fourcc == V4L2_PIX_FMT_NV21 ||
-	    icd->current_fmt->host_fmt->fourcc == V4L2_PIX_FMT_NV61)
-		value ^= 0x00000100; /* swap U, V to change from NV1x->NVx1 */
-
-	value |= common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW ? 1 << 1 : 0;
-	value |= common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW ? 1 << 0 : 0;
-
-	if (pcdev->is_16bit)
-		value |= 1 << 12;
-	else if (pcdev->flags & SH_CEU_FLAG_LOWER_8BIT)
-		value |= 2 << 12;
-
-	ceu_write(pcdev, CAMCR, value);
-
-	ceu_write(pcdev, CAPCR, 0x00300000);
-
-	switch (pcdev->field) {
-	case V4L2_FIELD_INTERLACED_TB:
-		value = 0x101;
-		break;
-	case V4L2_FIELD_INTERLACED_BT:
-		value = 0x102;
-		break;
-	default:
-		value = 0;
-		break;
-	}
-	ceu_write(pcdev, CAIFR, value);
-
-	sh_mobile_ceu_set_rect(icd);
-	mdelay(1);
-
-	dev_geo(icd->parent, "CFLCR 0x%x\n", pcdev->cflcr);
-	ceu_write(pcdev, CFLCR, pcdev->cflcr);
-
-	/*
-	 * A few words about byte order (observed in Big Endian mode)
-	 *
-	 * In data fetch mode bytes are received in chunks of 8 bytes.
-	 * D0, D1, D2, D3, D4, D5, D6, D7 (D0 received first)
-	 *
-	 * The data is however by default written to memory in reverse order:
-	 * D7, D6, D5, D4, D3, D2, D1, D0 (D7 written to lowest byte)
-	 *
-	 * The lowest three bits of CDOCR allows us to do swapping,
-	 * using 7 we swap the data bytes to match the incoming order:
-	 * D0, D1, D2, D3, D4, D5, D6, D7
-	 */
-	value = 0x00000007 | yuv_lineskip;
-
-	ceu_write(pcdev, CDOCR, value);
-	ceu_write(pcdev, CFWCR, 0); /* keep "datafetch firewall" disabled */
-
-	capture_restore(pcdev, capsr);
-
-	/* not in bundle mode: skip CBDSR, CDAYR2, CDACR2, CDBYR2, CDBCR2 */
-	return 0;
-}
-
-static int sh_mobile_ceu_try_bus_param(struct soc_camera_device *icd,
-				       unsigned char buswidth)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	unsigned long common_flags = CEU_BUS_FLAGS;
-	struct v4l2_mbus_config cfg = {.type = V4L2_MBUS_PARALLEL,};
-	int ret;
-
-	ret = v4l2_subdev_call(sd, video, g_mbus_config, &cfg);
-	if (!ret)
-		common_flags = soc_mbus_config_compatible(&cfg,
-							  common_flags);
-	else if (ret != -ENOIOCTLCMD)
-		return ret;
-
-	if (!common_flags || buswidth > 16)
-		return -EINVAL;
-
-	return 0;
-}
-
-static const struct soc_mbus_pixelfmt sh_mobile_ceu_formats[] = {
-	{
-		.fourcc			= V4L2_PIX_FMT_NV12,
-		.name			= "NV12",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_1_5X8,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PLANAR_2Y_C,
-	}, {
-		.fourcc			= V4L2_PIX_FMT_NV21,
-		.name			= "NV21",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_1_5X8,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PLANAR_2Y_C,
-	}, {
-		.fourcc			= V4L2_PIX_FMT_NV16,
-		.name			= "NV16",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PLANAR_Y_C,
-	}, {
-		.fourcc			= V4L2_PIX_FMT_NV61,
-		.name			= "NV61",
-		.bits_per_sample	= 8,
-		.packing		= SOC_MBUS_PACKING_2X8_PADHI,
-		.order			= SOC_MBUS_ORDER_LE,
-		.layout			= SOC_MBUS_LAYOUT_PLANAR_Y_C,
-	},
-};
-
-/* This will be corrected as we get more formats */
-static bool sh_mobile_ceu_packing_supported(const struct soc_mbus_pixelfmt *fmt)
-{
-	return	fmt->packing == SOC_MBUS_PACKING_NONE ||
-		(fmt->bits_per_sample == 8 &&
-		 fmt->packing == SOC_MBUS_PACKING_1_5X8) ||
-		(fmt->bits_per_sample == 8 &&
-		 fmt->packing == SOC_MBUS_PACKING_2X8_PADHI) ||
-		(fmt->bits_per_sample > 8 &&
-		 fmt->packing == SOC_MBUS_PACKING_EXTEND16);
-}
-
-static struct soc_camera_device *ctrl_to_icd(struct v4l2_ctrl *ctrl)
-{
-	return container_of(ctrl->handler, struct soc_camera_device,
-							ctrl_handler);
-}
-
-static int sh_mobile_ceu_s_ctrl(struct v4l2_ctrl *ctrl)
-{
-	struct soc_camera_device *icd = ctrl_to_icd(ctrl);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-
-	switch (ctrl->id) {
-	case V4L2_CID_SHARPNESS:
-		switch (icd->current_fmt->host_fmt->fourcc) {
-		case V4L2_PIX_FMT_NV12:
-		case V4L2_PIX_FMT_NV21:
-		case V4L2_PIX_FMT_NV16:
-		case V4L2_PIX_FMT_NV61:
-			ceu_write(pcdev, CLFCR, !ctrl->val);
-			return 0;
-		}
-		break;
-	}
-
-	return -EINVAL;
-}
-
-static const struct v4l2_ctrl_ops sh_mobile_ceu_ctrl_ops = {
-	.s_ctrl = sh_mobile_ceu_s_ctrl,
-};
-
-static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, unsigned int idx,
-				     struct soc_camera_format_xlate *xlate)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	int ret, k, n;
-	int formats = 0;
-	struct sh_mobile_ceu_cam *cam;
-	struct v4l2_subdev_mbus_code_enum code = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-		.index = idx,
-	};
-	const struct soc_mbus_pixelfmt *fmt;
-
-	ret = v4l2_subdev_call(sd, pad, enum_mbus_code, NULL, &code);
-	if (ret < 0)
-		/* No more formats */
-		return 0;
-
-	fmt = soc_mbus_get_fmtdesc(code.code);
-	if (!fmt) {
-		dev_warn(dev, "unsupported format code #%u: %d\n", idx, code.code);
-		return 0;
-	}
-
-	ret = sh_mobile_ceu_try_bus_param(icd, fmt->bits_per_sample);
-	if (ret < 0)
-		return 0;
-
-	if (!icd->host_priv) {
-		struct v4l2_subdev_format fmt = {
-			.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-		};
-		struct v4l2_mbus_framefmt *mf = &fmt.format;
-		struct v4l2_rect rect;
-		int shift = 0;
-
-		/* Add our control */
-		v4l2_ctrl_new_std(&icd->ctrl_handler, &sh_mobile_ceu_ctrl_ops,
-				  V4L2_CID_SHARPNESS, 0, 1, 1, 1);
-		if (icd->ctrl_handler.error)
-			return icd->ctrl_handler.error;
-
-		/* FIXME: subwindow is lost between close / open */
-
-		/* Cache current client geometry */
-		ret = soc_camera_client_g_rect(sd, &rect);
-		if (ret < 0)
-			return ret;
-
-		/* First time */
-		ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
-		if (ret < 0)
-			return ret;
-
-		/*
-		 * All currently existing CEU implementations support 2560x1920
-		 * or larger frames. If the sensor is proposing too big a frame,
-		 * don't bother with possibly supportred by the CEU larger
-		 * sizes, just try VGA multiples. If needed, this can be
-		 * adjusted in the future.
-		 */
-		while ((mf->width > pcdev->max_width ||
-			mf->height > pcdev->max_height) && shift < 4) {
-			/* Try 2560x1920, 1280x960, 640x480, 320x240 */
-			mf->width	= 2560 >> shift;
-			mf->height	= 1920 >> shift;
-			ret = v4l2_device_call_until_err(sd->v4l2_dev,
-					soc_camera_grp_id(icd), pad,
-					set_fmt, NULL, &fmt);
-			if (ret < 0)
-				return ret;
-			shift++;
-		}
-
-		if (shift == 4) {
-			dev_err(dev, "Failed to configure the client below %ux%x\n",
-				mf->width, mf->height);
-			return -EIO;
-		}
-
-		dev_geo(dev, "camera fmt %ux%u\n", mf->width, mf->height);
-
-		cam = kzalloc(sizeof(*cam), GFP_KERNEL);
-		if (!cam)
-			return -ENOMEM;
-
-		/* We are called with current camera crop, initialise subrect with it */
-		cam->rect	= rect;
-		cam->subrect	= rect;
-
-		cam->width	= mf->width;
-		cam->height	= mf->height;
-
-		icd->host_priv = cam;
-	} else {
-		cam = icd->host_priv;
-	}
-
-	/* Beginning of a pass */
-	if (!idx)
-		cam->extra_fmt = NULL;
-
-	switch (code.code) {
-	case MEDIA_BUS_FMT_UYVY8_2X8:
-	case MEDIA_BUS_FMT_VYUY8_2X8:
-	case MEDIA_BUS_FMT_YUYV8_2X8:
-	case MEDIA_BUS_FMT_YVYU8_2X8:
-		if (cam->extra_fmt)
-			break;
-
-		/*
-		 * Our case is simple so far: for any of the above four camera
-		 * formats we add all our four synthesized NV* formats, so,
-		 * just marking the device with a single flag suffices. If
-		 * the format generation rules are more complex, you would have
-		 * to actually hang your already added / counted formats onto
-		 * the host_priv pointer and check whether the format you're
-		 * going to add now is already there.
-		 */
-		cam->extra_fmt = sh_mobile_ceu_formats;
-
-		n = ARRAY_SIZE(sh_mobile_ceu_formats);
-		formats += n;
-		for (k = 0; xlate && k < n; k++) {
-			xlate->host_fmt	= &sh_mobile_ceu_formats[k];
-			xlate->code	= code.code;
-			xlate++;
-			dev_dbg(dev, "Providing format %s using code %d\n",
-				sh_mobile_ceu_formats[k].name, code.code);
-		}
-		break;
-	default:
-		if (!sh_mobile_ceu_packing_supported(fmt))
-			return 0;
-	}
-
-	/* Generic pass-through */
-	formats++;
-	if (xlate) {
-		xlate->host_fmt	= fmt;
-		xlate->code	= code.code;
-		xlate++;
-		dev_dbg(dev, "Providing format %s in pass-through mode\n",
-			fmt->name);
-	}
-
-	return formats;
-}
-
-static void sh_mobile_ceu_put_formats(struct soc_camera_device *icd)
-{
-	kfree(icd->host_priv);
-	icd->host_priv = NULL;
-}
-
-#define scale_down(size, scale) soc_camera_shift_scale(size, 12, scale)
-#define calc_generic_scale(in, out) soc_camera_calc_scale(in, 12, out)
-
-/*
- * CEU can scale and crop, but we don't want to waste bandwidth and kill the
- * framerate by always requesting the maximum image from the client. See
- * Documentation/media/v4l-drivers/sh_mobile_ceu_camera.rst for a description of
- * scaling and cropping algorithms and for the meaning of referenced here steps.
- */
-static int sh_mobile_ceu_set_selection(struct soc_camera_device *icd,
-				       struct v4l2_selection *sel)
-{
-	struct v4l2_rect *rect = &sel->r;
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_selection cam_sel;
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct v4l2_rect *cam_rect = &cam_sel.r;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct v4l2_subdev_format fmt = {
-		.which = V4L2_SUBDEV_FORMAT_ACTIVE,
-	};
-	struct v4l2_mbus_framefmt *mf = &fmt.format;
-	unsigned int scale_cam_h, scale_cam_v, scale_ceu_h, scale_ceu_v,
-		out_width, out_height;
-	int interm_width, interm_height;
-	u32 capsr, cflcr;
-	int ret;
-
-	dev_geo(dev, "S_SELECTION(%ux%u@%u:%u)\n", rect->width, rect->height,
-		rect->left, rect->top);
-
-	/* During camera cropping its output window can change too, stop CEU */
-	capsr = capture_save_reset(pcdev);
-	dev_dbg(dev, "CAPSR 0x%x, CFLCR 0x%x\n", capsr, pcdev->cflcr);
-
-	/*
-	 * 1. - 2. Apply iterative camera S_SELECTION for new input window, read back
-	 * actual camera rectangle.
-	 */
-	ret = soc_camera_client_s_selection(sd, sel, &cam_sel,
-				       &cam->rect, &cam->subrect);
-	if (ret < 0)
-		return ret;
-
-	dev_geo(dev, "1-2: camera cropped to %ux%u@%u:%u\n",
-		cam_rect->width, cam_rect->height,
-		cam_rect->left, cam_rect->top);
-
-	/* On success cam_crop contains current camera crop */
-
-	/* 3. Retrieve camera output window */
-	ret = v4l2_subdev_call(sd, pad, get_fmt, NULL, &fmt);
-	if (ret < 0)
-		return ret;
-
-	if (mf->width > pcdev->max_width || mf->height > pcdev->max_height)
-		return -EINVAL;
-
-	/* 4. Calculate camera scales */
-	scale_cam_h	= calc_generic_scale(cam_rect->width, mf->width);
-	scale_cam_v	= calc_generic_scale(cam_rect->height, mf->height);
-
-	/* Calculate intermediate window */
-	interm_width	= scale_down(rect->width, scale_cam_h);
-	interm_height	= scale_down(rect->height, scale_cam_v);
-
-	if (interm_width < icd->user_width) {
-		u32 new_scale_h;
-
-		new_scale_h = calc_generic_scale(rect->width, icd->user_width);
-
-		mf->width = scale_down(cam_rect->width, new_scale_h);
-	}
-
-	if (interm_height < icd->user_height) {
-		u32 new_scale_v;
-
-		new_scale_v = calc_generic_scale(rect->height, icd->user_height);
-
-		mf->height = scale_down(cam_rect->height, new_scale_v);
-	}
-
-	if (interm_width < icd->user_width || interm_height < icd->user_height) {
-		ret = v4l2_device_call_until_err(sd->v4l2_dev,
-					soc_camera_grp_id(icd), pad,
-					set_fmt, NULL, &fmt);
-		if (ret < 0)
-			return ret;
-
-		dev_geo(dev, "New camera output %ux%u\n", mf->width, mf->height);
-		scale_cam_h	= calc_generic_scale(cam_rect->width, mf->width);
-		scale_cam_v	= calc_generic_scale(cam_rect->height, mf->height);
-		interm_width	= scale_down(rect->width, scale_cam_h);
-		interm_height	= scale_down(rect->height, scale_cam_v);
-	}
-
-	/* Cache camera output window */
-	cam->width	= mf->width;
-	cam->height	= mf->height;
-
-	if (pcdev->image_mode) {
-		out_width	= min(interm_width, icd->user_width);
-		out_height	= min(interm_height, icd->user_height);
-	} else {
-		out_width	= interm_width;
-		out_height	= interm_height;
-	}
-
-	/*
-	 * 5. Calculate CEU scales from camera scales from results of (5) and
-	 *    the user window
-	 */
-	scale_ceu_h	= calc_scale(interm_width, &out_width);
-	scale_ceu_v	= calc_scale(interm_height, &out_height);
-
-	dev_geo(dev, "5: CEU scales %u:%u\n", scale_ceu_h, scale_ceu_v);
-
-	/* Apply CEU scales. */
-	cflcr = scale_ceu_h | (scale_ceu_v << 16);
-	if (cflcr != pcdev->cflcr) {
-		pcdev->cflcr = cflcr;
-		ceu_write(pcdev, CFLCR, cflcr);
-	}
-
-	icd->user_width	 = out_width & ~3;
-	icd->user_height = out_height & ~3;
-	/* Offsets are applied at the CEU scaling filter input */
-	cam->ceu_left	 = scale_down(rect->left - cam_rect->left, scale_cam_h) & ~1;
-	cam->ceu_top	 = scale_down(rect->top - cam_rect->top, scale_cam_v) & ~1;
-
-	/* 6. Use CEU cropping to crop to the new window. */
-	sh_mobile_ceu_set_rect(icd);
-
-	cam->subrect = *rect;
-
-	dev_geo(dev, "6: CEU cropped to %ux%u@%u:%u\n",
-		icd->user_width, icd->user_height,
-		cam->ceu_left, cam->ceu_top);
-
-	/* Restore capture. The CE bit can be cleared by the hardware */
-	if (pcdev->active)
-		capsr |= 1;
-	capture_restore(pcdev, capsr);
-
-	/* Even if only camera cropping succeeded */
-	return ret;
-}
-
-static int sh_mobile_ceu_get_selection(struct soc_camera_device *icd,
-				       struct v4l2_selection *sel)
-{
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-
-	sel->r = cam->subrect;
-
-	return 0;
-}
-
-/* Similar to set_crop multistage iterative algorithm */
-static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
-				 struct v4l2_format *f)
-{
-	struct device *dev = icd->parent;
-	struct soc_camera_host *ici = to_soc_camera_host(dev);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_mbus_framefmt mf;
-	__u32 pixfmt = pix->pixelformat;
-	const struct soc_camera_format_xlate *xlate;
-	unsigned int ceu_sub_width = pcdev->max_width,
-		ceu_sub_height = pcdev->max_height;
-	u16 scale_v, scale_h;
-	int ret;
-	bool image_mode;
-	enum v4l2_field field;
-
-	switch (pix->field) {
-	default:
-		pix->field = V4L2_FIELD_NONE;
-		/* fall-through */
-	case V4L2_FIELD_INTERLACED_TB:
-	case V4L2_FIELD_INTERLACED_BT:
-	case V4L2_FIELD_NONE:
-		field = pix->field;
-		break;
-	case V4L2_FIELD_INTERLACED:
-		field = V4L2_FIELD_INTERLACED_TB;
-		break;
-	}
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	if (!xlate) {
-		dev_warn(dev, "Format %x not found\n", pixfmt);
-		return -EINVAL;
-	}
-
-	/* 1.-4. Calculate desired client output geometry */
-	soc_camera_calc_client_output(icd, &cam->rect, &cam->subrect, pix, &mf, 12);
-	mf.field	= pix->field;
-	mf.colorspace	= pix->colorspace;
-	mf.code		= xlate->code;
-
-	switch (pixfmt) {
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV21:
-	case V4L2_PIX_FMT_NV16:
-	case V4L2_PIX_FMT_NV61:
-		image_mode = true;
-		break;
-	default:
-		image_mode = false;
-	}
-
-	dev_geo(dev, "S_FMT(pix=0x%x, fld 0x%x, code 0x%x, %ux%u)\n", pixfmt, mf.field, mf.code,
-		pix->width, pix->height);
-
-	dev_geo(dev, "4: request camera output %ux%u\n", mf.width, mf.height);
-
-	/* 5. - 9. */
-	ret = soc_camera_client_scale(icd, &cam->rect, &cam->subrect,
-				&mf, &ceu_sub_width, &ceu_sub_height,
-				image_mode && V4L2_FIELD_NONE == field, 12);
-
-	dev_geo(dev, "5-9: client scale return %d\n", ret);
-
-	/* Done with the camera. Now see if we can improve the result */
-
-	dev_geo(dev, "fmt %ux%u, requested %ux%u\n",
-		mf.width, mf.height, pix->width, pix->height);
-	if (ret < 0)
-		return ret;
-
-	if (mf.code != xlate->code)
-		return -EINVAL;
-
-	/* 9. Prepare CEU crop */
-	cam->width = mf.width;
-	cam->height = mf.height;
-
-	/* 10. Use CEU scaling to scale to the requested user window. */
-
-	/* We cannot scale up */
-	if (pix->width > ceu_sub_width)
-		ceu_sub_width = pix->width;
-
-	if (pix->height > ceu_sub_height)
-		ceu_sub_height = pix->height;
-
-	pix->colorspace = mf.colorspace;
-
-	if (image_mode) {
-		/* Scale pix->{width x height} down to width x height */
-		scale_h		= calc_scale(ceu_sub_width, &pix->width);
-		scale_v		= calc_scale(ceu_sub_height, &pix->height);
-	} else {
-		pix->width	= ceu_sub_width;
-		pix->height	= ceu_sub_height;
-		scale_h		= 0;
-		scale_v		= 0;
-	}
-
-	pcdev->cflcr = scale_h | (scale_v << 16);
-
-	/*
-	 * We have calculated CFLCR, the actual configuration will be performed
-	 * in sh_mobile_ceu_set_bus_param()
-	 */
-
-	dev_geo(dev, "10: W: %u : 0x%x = %u, H: %u : 0x%x = %u\n",
-		ceu_sub_width, scale_h, pix->width,
-		ceu_sub_height, scale_v, pix->height);
-
-	cam->code		= xlate->code;
-	icd->current_fmt	= xlate;
-
-	pcdev->field = field;
-	pcdev->image_mode = image_mode;
-
-	/* CFSZR requirement */
-	pix->width	&= ~3;
-	pix->height	&= ~3;
-
-	return 0;
-}
-
-#define CEU_CHDW_MAX	8188U	/* Maximum line stride */
-
-static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
-				 struct v4l2_format *f)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct v4l2_subdev_pad_config pad_cfg;
-	struct v4l2_subdev_format format = {
-		.which = V4L2_SUBDEV_FORMAT_TRY,
-	};
-	struct v4l2_mbus_framefmt *mf = &format.format;
-	__u32 pixfmt = pix->pixelformat;
-	int width, height;
-	int ret;
-
-	dev_geo(icd->parent, "TRY_FMT(pix=0x%x, %ux%u)\n",
-		 pixfmt, pix->width, pix->height);
-
-	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
-	if (!xlate) {
-		xlate = icd->current_fmt;
-		dev_dbg(icd->parent, "Format %x not found, keeping %x\n",
-			pixfmt, xlate->host_fmt->fourcc);
-		pixfmt = xlate->host_fmt->fourcc;
-		pix->pixelformat = pixfmt;
-		pix->colorspace = icd->colorspace;
-	}
-
-	/* FIXME: calculate using depth and bus width */
-
-	/* CFSZR requires height and width to be 4-pixel aligned */
-	v4l_bound_align_image(&pix->width, 2, pcdev->max_width, 2,
-			      &pix->height, 4, pcdev->max_height, 2, 0);
-
-	width = pix->width;
-	height = pix->height;
-
-	/* limit to sensor capabilities */
-	mf->width	= pix->width;
-	mf->height	= pix->height;
-	mf->field	= pix->field;
-	mf->code	= xlate->code;
-	mf->colorspace	= pix->colorspace;
-
-	ret = v4l2_device_call_until_err(sd->v4l2_dev, soc_camera_grp_id(icd),
-					 pad, set_fmt, &pad_cfg, &format);
-	if (ret < 0)
-		return ret;
-
-	pix->width	= mf->width;
-	pix->height	= mf->height;
-	pix->field	= mf->field;
-	pix->colorspace	= mf->colorspace;
-
-	switch (pixfmt) {
-	case V4L2_PIX_FMT_NV12:
-	case V4L2_PIX_FMT_NV21:
-	case V4L2_PIX_FMT_NV16:
-	case V4L2_PIX_FMT_NV61:
-		/* FIXME: check against rect_max after converting soc-camera */
-		/* We can scale precisely, need a bigger image from camera */
-		if (pix->width < width || pix->height < height) {
-			/*
-			 * We presume, the sensor behaves sanely, i.e., if
-			 * requested a bigger rectangle, it will not return a
-			 * smaller one.
-			 */
-			mf->width = pcdev->max_width;
-			mf->height = pcdev->max_height;
-			ret = v4l2_device_call_until_err(sd->v4l2_dev,
-					soc_camera_grp_id(icd), pad,
-					set_fmt, &pad_cfg, &format);
-			if (ret < 0) {
-				/* Shouldn't actually happen... */
-				dev_err(icd->parent,
-					"FIXME: client try_fmt() = %d\n", ret);
-				return ret;
-			}
-		}
-		/* We will scale exactly */
-		if (mf->width > width)
-			pix->width = width;
-		if (mf->height > height)
-			pix->height = height;
-
-		pix->bytesperline = max(pix->bytesperline, pix->width);
-		pix->bytesperline = min(pix->bytesperline, CEU_CHDW_MAX);
-		pix->bytesperline &= ~3;
-		break;
-
-	default:
-		/* Configurable stride isn't supported in pass-through mode. */
-		pix->bytesperline  = 0;
-	}
-
-	pix->width	&= ~3;
-	pix->height	&= ~3;
-	pix->sizeimage	= 0;
-
-	dev_geo(icd->parent, "%s(): return %d, fmt 0x%x, %ux%u\n",
-		__func__, ret, pix->pixelformat, pix->width, pix->height);
-
-	return ret;
-}
-
-static int sh_mobile_ceu_set_liveselection(struct soc_camera_device *icd,
-					   struct v4l2_selection *sel)
-{
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	u32 out_width = icd->user_width, out_height = icd->user_height;
-	int ret;
-
-	/* Freeze queue */
-	pcdev->frozen = 1;
-	/* Wait for frame */
-	ret = wait_for_completion_interruptible(&pcdev->complete);
-	/* Stop the client */
-	ret = v4l2_subdev_call(sd, video, s_stream, 0);
-	if (ret < 0)
-		dev_warn(icd->parent,
-			 "Client failed to stop the stream: %d\n", ret);
-	else
-		/* Do the crop, if it fails, there's nothing more we can do */
-		sh_mobile_ceu_set_selection(icd, sel);
-
-	dev_geo(icd->parent, "Output after crop: %ux%u\n", icd->user_width, icd->user_height);
-
-	if (icd->user_width != out_width || icd->user_height != out_height) {
-		struct v4l2_format f = {
-			.type	= V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.fmt.pix	= {
-				.width		= out_width,
-				.height		= out_height,
-				.pixelformat	= icd->current_fmt->host_fmt->fourcc,
-				.field		= pcdev->field,
-				.colorspace	= icd->colorspace,
-			},
-		};
-		ret = sh_mobile_ceu_set_fmt(icd, &f);
-		if (!ret && (out_width != f.fmt.pix.width ||
-			     out_height != f.fmt.pix.height))
-			ret = -EINVAL;
-		if (!ret) {
-			icd->user_width		= out_width & ~3;
-			icd->user_height	= out_height & ~3;
-			ret = sh_mobile_ceu_set_bus_param(icd);
-		}
-	}
-
-	/* Thaw the queue */
-	pcdev->frozen = 0;
-	spin_lock_irq(&pcdev->lock);
-	sh_mobile_ceu_capture(pcdev);
-	spin_unlock_irq(&pcdev->lock);
-	/* Start the client */
-	ret = v4l2_subdev_call(sd, video, s_stream, 1);
-	return ret;
-}
-
-static __poll_t sh_mobile_ceu_poll(struct file *file, poll_table *pt)
-{
-	struct soc_camera_device *icd = file->private_data;
-
-	return vb2_poll(&icd->vb2_vidq, file, pt);
-}
-
-static int sh_mobile_ceu_querycap(struct soc_camera_host *ici,
-				  struct v4l2_capability *cap)
-{
-	strscpy(cap->card, "SuperH_Mobile_CEU", sizeof(cap->card));
-	strscpy(cap->driver, "sh_mobile_ceu", sizeof(cap->driver));
-	strscpy(cap->bus_info, "platform:sh_mobile_ceu", sizeof(cap->bus_info));
-	cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
-	cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
-
-	return 0;
-}
-
-static int sh_mobile_ceu_init_videobuf(struct vb2_queue *q,
-				       struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->parent);
-
-	q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	q->io_modes = VB2_MMAP | VB2_USERPTR;
-	q->drv_priv = icd;
-	q->ops = &sh_mobile_ceu_videobuf_ops;
-	q->mem_ops = &vb2_dma_contig_memops;
-	q->buf_struct_size = sizeof(struct sh_mobile_ceu_buffer);
-	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
-	q->lock = &ici->host_lock;
-	q->dev = ici->v4l2_dev.dev;
-
-	return vb2_queue_init(q);
-}
-
-static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
-	.owner		= THIS_MODULE,
-	.add		= sh_mobile_ceu_add_device,
-	.remove		= sh_mobile_ceu_remove_device,
-	.clock_start	= sh_mobile_ceu_clock_start,
-	.clock_stop	= sh_mobile_ceu_clock_stop,
-	.get_formats	= sh_mobile_ceu_get_formats,
-	.put_formats	= sh_mobile_ceu_put_formats,
-	.get_selection	= sh_mobile_ceu_get_selection,
-	.set_selection	= sh_mobile_ceu_set_selection,
-	.set_liveselection	= sh_mobile_ceu_set_liveselection,
-	.set_fmt	= sh_mobile_ceu_set_fmt,
-	.try_fmt	= sh_mobile_ceu_try_fmt,
-	.poll		= sh_mobile_ceu_poll,
-	.querycap	= sh_mobile_ceu_querycap,
-	.set_bus_param	= sh_mobile_ceu_set_bus_param,
-	.init_videobuf2	= sh_mobile_ceu_init_videobuf,
-};
-
-struct bus_wait {
-	struct notifier_block	notifier;
-	struct completion	completion;
-	struct device		*dev;
-};
-
-static int bus_notify(struct notifier_block *nb,
-		      unsigned long action, void *data)
-{
-	struct device *dev = data;
-	struct bus_wait *wait = container_of(nb, struct bus_wait, notifier);
-
-	if (wait->dev != dev)
-		return NOTIFY_DONE;
-
-	switch (action) {
-	case BUS_NOTIFY_UNBOUND_DRIVER:
-		/* Protect from module unloading */
-		wait_for_completion(&wait->completion);
-		return NOTIFY_OK;
-	}
-	return NOTIFY_DONE;
-}
-
-static int sh_mobile_ceu_probe(struct platform_device *pdev)
-{
-	struct sh_mobile_ceu_dev *pcdev;
-	struct resource *res;
-	void __iomem *base;
-	unsigned int irq;
-	int err;
-	struct bus_wait wait = {
-		.completion = COMPLETION_INITIALIZER_ONSTACK(wait.completion),
-		.notifier.notifier_call = bus_notify,
-	};
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	irq = platform_get_irq(pdev, 0);
-	if (!res || (int)irq <= 0) {
-		dev_err(&pdev->dev, "Not enough CEU platform resources.\n");
-		return -ENODEV;
-	}
-
-	pcdev = devm_kzalloc(&pdev->dev, sizeof(*pcdev), GFP_KERNEL);
-	if (!pcdev) {
-		dev_err(&pdev->dev, "Could not allocate pcdev\n");
-		return -ENOMEM;
-	}
-
-	INIT_LIST_HEAD(&pcdev->capture);
-	spin_lock_init(&pcdev->lock);
-	init_completion(&pcdev->complete);
-
-	pcdev->pdata = pdev->dev.platform_data;
-	if (!pcdev->pdata && !pdev->dev.of_node) {
-		dev_err(&pdev->dev, "CEU platform data not set.\n");
-		return -EINVAL;
-	}
-
-	/* TODO: implement per-device bus flags */
-	if (pcdev->pdata) {
-		pcdev->max_width = pcdev->pdata->max_width;
-		pcdev->max_height = pcdev->pdata->max_height;
-		pcdev->flags = pcdev->pdata->flags;
-	}
-	pcdev->field = V4L2_FIELD_NONE;
-
-	if (!pcdev->max_width) {
-		unsigned int v;
-		err = of_property_read_u32(pdev->dev.of_node, "renesas,max-width", &v);
-		if (!err)
-			pcdev->max_width = v;
-
-		if (!pcdev->max_width)
-			pcdev->max_width = 2560;
-	}
-	if (!pcdev->max_height) {
-		unsigned int v;
-		err = of_property_read_u32(pdev->dev.of_node, "renesas,max-height", &v);
-		if (!err)
-			pcdev->max_height = v;
-
-		if (!pcdev->max_height)
-			pcdev->max_height = 1920;
-	}
-
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
-
-	pcdev->irq = irq;
-	pcdev->base = base;
-	pcdev->video_limit = 0; /* only enabled if second resource exists */
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	if (res) {
-		err = dma_declare_coherent_memory(&pdev->dev, res->start,
-						  res->start,
-						  resource_size(res),
-						  DMA_MEMORY_EXCLUSIVE);
-		if (err) {
-			dev_err(&pdev->dev, "Unable to declare CEU memory.\n");
-			return err;
-		}
-
-		pcdev->video_limit = resource_size(res);
-	}
-
-	/* request irq */
-	err = devm_request_irq(&pdev->dev, pcdev->irq, sh_mobile_ceu_irq,
-			       0, dev_name(&pdev->dev), pcdev);
-	if (err) {
-		dev_err(&pdev->dev, "Unable to register CEU interrupt.\n");
-		goto exit_release_mem;
-	}
-
-	pm_suspend_ignore_children(&pdev->dev, true);
-	pm_runtime_enable(&pdev->dev);
-	pm_runtime_resume(&pdev->dev);
-
-	pcdev->ici.priv = pcdev;
-	pcdev->ici.v4l2_dev.dev = &pdev->dev;
-	pcdev->ici.nr = pdev->id;
-	pcdev->ici.drv_name = dev_name(&pdev->dev);
-	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
-	pcdev->ici.capabilities = SOCAM_HOST_CAP_STRIDE;
-
-	if (pcdev->pdata && pcdev->pdata->asd_sizes) {
-		pcdev->ici.asd = pcdev->pdata->asd;
-		pcdev->ici.asd_sizes = pcdev->pdata->asd_sizes;
-	}
-
-	err = soc_camera_host_register(&pcdev->ici);
-	if (err)
-		goto exit_free_clk;
-
-	return 0;
-
-exit_free_clk:
-	pm_runtime_disable(&pdev->dev);
-exit_release_mem:
-	if (platform_get_resource(pdev, IORESOURCE_MEM, 1))
-		dma_release_declared_memory(&pdev->dev);
-	return err;
-}
-
-static int sh_mobile_ceu_remove(struct platform_device *pdev)
-{
-	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
-
-	soc_camera_host_unregister(soc_host);
-	pm_runtime_disable(&pdev->dev);
-	if (platform_get_resource(pdev, IORESOURCE_MEM, 1))
-		dma_release_declared_memory(&pdev->dev);
-
-	return 0;
-}
-
-static int sh_mobile_ceu_runtime_nop(struct device *dev)
-{
-	/* Runtime PM callback shared between ->runtime_suspend()
-	 * and ->runtime_resume(). Simply returns success.
-	 *
-	 * This driver re-initializes all registers after
-	 * pm_runtime_get_sync() anyway so there is no need
-	 * to save and restore registers here.
-	 */
-	return 0;
-}
-
-static const struct dev_pm_ops sh_mobile_ceu_dev_pm_ops = {
-	.runtime_suspend = sh_mobile_ceu_runtime_nop,
-	.runtime_resume = sh_mobile_ceu_runtime_nop,
-};
-
-static const struct of_device_id sh_mobile_ceu_of_match[] = {
-	{ .compatible = "renesas,sh-mobile-ceu" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, sh_mobile_ceu_of_match);
-
-static struct platform_driver sh_mobile_ceu_driver = {
-	.driver		= {
-		.name	= "sh_mobile_ceu",
-		.pm	= &sh_mobile_ceu_dev_pm_ops,
-		.of_match_table = sh_mobile_ceu_of_match,
-	},
-	.probe		= sh_mobile_ceu_probe,
-	.remove		= sh_mobile_ceu_remove,
-};
-
-module_platform_driver(sh_mobile_ceu_driver);
-
-MODULE_DESCRIPTION("SuperH Mobile CEU driver");
-MODULE_AUTHOR("Magnus Damm");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("0.1.0");
-MODULE_ALIAS("platform:sh_mobile_ceu");
diff --git a/include/media/drv-intf/sh_mobile_ceu.h b/include/media/drv-intf/sh_mobile_ceu.h
deleted file mode 100644
index 555f0ecc0fde..000000000000
--- a/include/media/drv-intf/sh_mobile_ceu.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SH_MOBILE_CEU_H__
-#define __ASM_SH_MOBILE_CEU_H__
-
-#define SH_CEU_FLAG_USE_8BIT_BUS	(1 << 0) /* use  8bit bus width */
-#define SH_CEU_FLAG_USE_16BIT_BUS	(1 << 1) /* use 16bit bus width */
-#define SH_CEU_FLAG_HSYNC_LOW		(1 << 2) /* default High if possible */
-#define SH_CEU_FLAG_VSYNC_LOW		(1 << 3) /* default High if possible */
-#define SH_CEU_FLAG_LOWER_8BIT		(1 << 4) /* default upper 8bit */
-
-struct device;
-struct resource;
-
-struct sh_mobile_ceu_companion {
-	u32		num_resources;
-	struct resource	*resource;
-	int		id;
-	void		*platform_data;
-};
-
-struct sh_mobile_ceu_info {
-	unsigned long flags;
-	int max_width;
-	int max_height;
-	struct v4l2_async_subdev **asd;	/* Flat array, arranged in groups */
-	unsigned int *asd_sizes;	/* 0-terminated array pf asd group sizes */
-};
-
-#endif /* __ASM_SH_MOBILE_CEU_H__ */
-- 
cgit v1.2.3


From dc60a4cfb77c891f67f31953025208067b05883c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 17 Jan 2019 11:47:55 -0200
Subject: media: soc_camera_platform: remove obsolete soc_camera test driver

This is a test stub driver for soc_camera. Since soc_camera is
being deprecated (and in fact, nobody is using it anymore)
there's no sense in keeping this test driver.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/soc_camera/Kconfig          |   6 -
 drivers/media/platform/soc_camera/Makefile         |   4 -
 .../platform/soc_camera/soc_camera_platform.c      | 188 ---------------------
 .../platform_data/media/soc_camera_platform.h      |  83 ---------
 4 files changed, 281 deletions(-)
 delete mode 100644 drivers/media/platform/soc_camera/soc_camera_platform.c
 delete mode 100644 include/linux/platform_data/media/soc_camera_platform.h

(limited to 'include')

diff --git a/drivers/media/platform/soc_camera/Kconfig b/drivers/media/platform/soc_camera/Kconfig
index d471d34b884c..8f9b3bac5450 100644
--- a/drivers/media/platform/soc_camera/Kconfig
+++ b/drivers/media/platform/soc_camera/Kconfig
@@ -6,9 +6,3 @@ config SOC_CAMERA
 	  SoC Camera is a common API to several cameras, not connecting
 	  over a bus like PCI or USB. For example some i2c camera connected
 	  directly to the data bus of an SoC.
-
-config SOC_CAMERA_PLATFORM
-	tristate "platform camera support"
-	depends on SOC_CAMERA
-	help
-	  This is a generic SoC camera platform driver, useful for testing
diff --git a/drivers/media/platform/soc_camera/Makefile b/drivers/media/platform/soc_camera/Makefile
index 2cb7022e073b..85d5e74f3b2b 100644
--- a/drivers/media/platform/soc_camera/Makefile
+++ b/drivers/media/platform/soc_camera/Makefile
@@ -1,5 +1 @@
 obj-$(CONFIG_SOC_CAMERA)		+= soc_camera.o soc_mediabus.o
-
-# a platform subdevice driver stub, allowing to support cameras by adding a
-# couple of callback functions to the board code
-obj-$(CONFIG_SOC_CAMERA_PLATFORM)	+= soc_camera_platform.o
diff --git a/drivers/media/platform/soc_camera/soc_camera_platform.c b/drivers/media/platform/soc_camera/soc_camera_platform.c
deleted file mode 100644
index 79fbe1fea95f..000000000000
--- a/drivers/media/platform/soc_camera/soc_camera_platform.c
+++ /dev/null
@@ -1,188 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Generic Platform Camera Driver
- *
- * Copyright (C) 2008 Magnus Damm
- * Based on mt9m001 driver,
- * Copyright (C) 2008, Guennadi Liakhovetski <kernel@pengutronix.de>
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/videodev2.h>
-#include <media/v4l2-subdev.h>
-#include <media/soc_camera.h>
-#include <linux/platform_data/media/soc_camera_platform.h>
-
-struct soc_camera_platform_priv {
-	struct v4l2_subdev subdev;
-};
-
-static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev)
-{
-	struct v4l2_subdev *subdev = platform_get_drvdata(pdev);
-	return container_of(subdev, struct soc_camera_platform_priv, subdev);
-}
-
-static int soc_camera_platform_s_stream(struct v4l2_subdev *sd, int enable)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-	return p->set_capture(p, enable);
-}
-
-static int soc_camera_platform_fill_fmt(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
-		struct v4l2_subdev_format *format)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-	struct v4l2_mbus_framefmt *mf = &format->format;
-
-	mf->width	= p->format.width;
-	mf->height	= p->format.height;
-	mf->code	= p->format.code;
-	mf->colorspace	= p->format.colorspace;
-	mf->field	= p->format.field;
-
-	return 0;
-}
-
-static int soc_camera_platform_s_power(struct v4l2_subdev *sd, int on)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-
-	return soc_camera_set_power(p->icd->control, &p->icd->sdesc->subdev_desc, NULL, on);
-}
-
-static const struct v4l2_subdev_core_ops platform_subdev_core_ops = {
-	.s_power = soc_camera_platform_s_power,
-};
-
-static int soc_camera_platform_enum_mbus_code(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
-		struct v4l2_subdev_mbus_code_enum *code)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-
-	if (code->pad || code->index)
-		return -EINVAL;
-
-	code->code = p->format.code;
-	return 0;
-}
-
-static int soc_camera_platform_get_selection(struct v4l2_subdev *sd,
-		struct v4l2_subdev_pad_config *cfg,
-		struct v4l2_subdev_selection *sel)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-
-	if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE)
-		return -EINVAL;
-
-	switch (sel->target) {
-	case V4L2_SEL_TGT_CROP_BOUNDS:
-	case V4L2_SEL_TGT_CROP_DEFAULT:
-	case V4L2_SEL_TGT_CROP:
-		sel->r.left = 0;
-		sel->r.top = 0;
-		sel->r.width = p->format.width;
-		sel->r.height = p->format.height;
-		return 0;
-	default:
-		return -EINVAL;
-	}
-}
-
-static int soc_camera_platform_g_mbus_config(struct v4l2_subdev *sd,
-					     struct v4l2_mbus_config *cfg)
-{
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
-
-	cfg->flags = p->mbus_param;
-	cfg->type = p->mbus_type;
-
-	return 0;
-}
-
-static const struct v4l2_subdev_video_ops platform_subdev_video_ops = {
-	.s_stream	= soc_camera_platform_s_stream,
-	.g_mbus_config	= soc_camera_platform_g_mbus_config,
-};
-
-static const struct v4l2_subdev_pad_ops platform_subdev_pad_ops = {
-	.enum_mbus_code = soc_camera_platform_enum_mbus_code,
-	.get_selection	= soc_camera_platform_get_selection,
-	.get_fmt	= soc_camera_platform_fill_fmt,
-	.set_fmt	= soc_camera_platform_fill_fmt,
-};
-
-static const struct v4l2_subdev_ops platform_subdev_ops = {
-	.core	= &platform_subdev_core_ops,
-	.video	= &platform_subdev_video_ops,
-	.pad	= &platform_subdev_pad_ops,
-};
-
-static int soc_camera_platform_probe(struct platform_device *pdev)
-{
-	struct soc_camera_host *ici;
-	struct soc_camera_platform_priv *priv;
-	struct soc_camera_platform_info *p = pdev->dev.platform_data;
-	struct soc_camera_device *icd;
-
-	if (!p)
-		return -EINVAL;
-
-	if (!p->icd) {
-		dev_err(&pdev->dev,
-			"Platform has not set soc_camera_device pointer!\n");
-		return -EINVAL;
-	}
-
-	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	icd = p->icd;
-
-	/* soc-camera convention: control's drvdata points to the subdev */
-	platform_set_drvdata(pdev, &priv->subdev);
-	/* Set the control device reference */
-	icd->control = &pdev->dev;
-
-	ici = to_soc_camera_host(icd->parent);
-
-	v4l2_subdev_init(&priv->subdev, &platform_subdev_ops);
-	v4l2_set_subdevdata(&priv->subdev, p);
-	strscpy(priv->subdev.name, dev_name(&pdev->dev),
-		sizeof(priv->subdev.name));
-
-	return v4l2_device_register_subdev(&ici->v4l2_dev, &priv->subdev);
-}
-
-static int soc_camera_platform_remove(struct platform_device *pdev)
-{
-	struct soc_camera_platform_priv *priv = get_priv(pdev);
-	struct soc_camera_platform_info *p = v4l2_get_subdevdata(&priv->subdev);
-
-	p->icd->control = NULL;
-	v4l2_device_unregister_subdev(&priv->subdev);
-	return 0;
-}
-
-static struct platform_driver soc_camera_platform_driver = {
-	.driver		= {
-		.name	= "soc_camera_platform",
-	},
-	.probe		= soc_camera_platform_probe,
-	.remove		= soc_camera_platform_remove,
-};
-
-module_platform_driver(soc_camera_platform_driver);
-
-MODULE_DESCRIPTION("SoC Camera Platform driver");
-MODULE_AUTHOR("Magnus Damm");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:soc_camera_platform");
diff --git a/include/linux/platform_data/media/soc_camera_platform.h b/include/linux/platform_data/media/soc_camera_platform.h
deleted file mode 100644
index 1e5065dab430..000000000000
--- a/include/linux/platform_data/media/soc_camera_platform.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Generic Platform Camera Driver Header
- *
- * Copyright (C) 2008 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __SOC_CAMERA_H__
-#define __SOC_CAMERA_H__
-
-#include <linux/videodev2.h>
-#include <media/soc_camera.h>
-#include <media/v4l2-mediabus.h>
-
-struct device;
-
-struct soc_camera_platform_info {
-	const char *format_name;
-	unsigned long format_depth;
-	struct v4l2_mbus_framefmt format;
-	unsigned long mbus_param;
-	enum v4l2_mbus_type mbus_type;
-	struct soc_camera_device *icd;
-	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
-};
-
-static inline void soc_camera_platform_release(struct platform_device **pdev)
-{
-	*pdev = NULL;
-}
-
-static inline int soc_camera_platform_add(struct soc_camera_device *icd,
-					  struct platform_device **pdev,
-					  struct soc_camera_link *plink,
-					  void (*release)(struct device *dev),
-					  int id)
-{
-	struct soc_camera_subdev_desc *ssdd =
-		(struct soc_camera_subdev_desc *)plink;
-	struct soc_camera_platform_info *info = ssdd->drv_priv;
-	int ret;
-
-	if (&icd->sdesc->subdev_desc != ssdd)
-		return -ENODEV;
-
-	if (*pdev)
-		return -EBUSY;
-
-	*pdev = platform_device_alloc("soc_camera_platform", id);
-	if (!*pdev)
-		return -ENOMEM;
-
-	info->icd = icd;
-
-	(*pdev)->dev.platform_data = info;
-	(*pdev)->dev.release = release;
-
-	ret = platform_device_add(*pdev);
-	if (ret < 0) {
-		platform_device_put(*pdev);
-		*pdev = NULL;
-		info->icd = NULL;
-	}
-
-	return ret;
-}
-
-static inline void soc_camera_platform_del(const struct soc_camera_device *icd,
-					   struct platform_device *pdev,
-					   const struct soc_camera_link *plink)
-{
-	const struct soc_camera_subdev_desc *ssdd =
-		(const struct soc_camera_subdev_desc *)plink;
-	if (&icd->sdesc->subdev_desc != ssdd || !pdev)
-		return;
-
-	platform_device_unregister(pdev);
-}
-
-#endif /* __SOC_CAMERA_H__ */
-- 
cgit v1.2.3


From 1fc1b63638da1accb27264a507b23aa6863c3852 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <bbrezillon@kernel.org>
Date: Sat, 19 Jan 2019 16:04:12 +0100
Subject: spi: spi-mem: Add devm_spi_mem_dirmap_{create,destroy}()

Since direct mapping descriptors usually the same lifetime as the SPI
MEM device adding devm_ variants of the spi_mem_dirmap_{create,destroy}()
should greatly simplify error/remove path of spi-mem drivers making use
of the direct mapping API.

Signed-off-by: Boris Brezillon <bbrezillon@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-mem.c       | 69 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi-mem.h |  5 ++++
 2 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 5217a5628be2..08e326a124cc 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -551,6 +551,75 @@ void spi_mem_dirmap_destroy(struct spi_mem_dirmap_desc *desc)
 }
 EXPORT_SYMBOL_GPL(spi_mem_dirmap_destroy);
 
+static void devm_spi_mem_dirmap_release(struct device *dev, void *res)
+{
+	struct spi_mem_dirmap_desc *desc = *(struct spi_mem_dirmap_desc **)res;
+
+	spi_mem_dirmap_destroy(desc);
+}
+
+/**
+ * devm_spi_mem_dirmap_create() - Create a direct mapping descriptor and attach
+ *				  it to a device
+ * @dev: device the dirmap desc will be attached to
+ * @mem: SPI mem device this direct mapping should be created for
+ * @info: direct mapping information
+ *
+ * devm_ variant of the spi_mem_dirmap_create() function. See
+ * spi_mem_dirmap_create() for more details.
+ *
+ * Return: a valid pointer in case of success, and ERR_PTR() otherwise.
+ */
+struct spi_mem_dirmap_desc *
+devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem,
+			   const struct spi_mem_dirmap_info *info)
+{
+	struct spi_mem_dirmap_desc **ptr, *desc;
+
+	ptr = devres_alloc(devm_spi_mem_dirmap_release, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	desc = spi_mem_dirmap_create(mem, info);
+	if (IS_ERR(desc)) {
+		devres_free(ptr);
+	} else {
+		*ptr = desc;
+		devres_add(dev, ptr);
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_create);
+
+static int devm_spi_mem_dirmap_match(struct device *dev, void *res, void *data)
+{
+        struct spi_mem_dirmap_desc **ptr = res;
+
+        if (WARN_ON(!ptr || !*ptr))
+                return 0;
+
+	return *ptr == data;
+}
+
+/**
+ * devm_spi_mem_dirmap_destroy() - Destroy a direct mapping descriptor attached
+ *				   to a device
+ * @dev: device the dirmap desc is attached to
+ * @desc: the direct mapping descriptor to destroy
+ *
+ * devm_ variant of the spi_mem_dirmap_destroy() function. See
+ * spi_mem_dirmap_destroy() for more details.
+ */
+void devm_spi_mem_dirmap_destroy(struct device *dev,
+				 struct spi_mem_dirmap_desc *desc)
+{
+	devres_release(dev, devm_spi_mem_dirmap_release,
+		       devm_spi_mem_dirmap_match, desc);
+}
+EXPORT_SYMBOL_GPL(devm_spi_mem_dirmap_destroy);
+
 /**
  * spi_mem_dirmap_dirmap_read() - Read data through a direct mapping
  * @desc: direct mapping descriptor
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 3fe24500c5ee..3703d0dcac2e 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -330,6 +330,11 @@ ssize_t spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc,
 			    u64 offs, size_t len, void *buf);
 ssize_t spi_mem_dirmap_write(struct spi_mem_dirmap_desc *desc,
 			     u64 offs, size_t len, const void *buf);
+struct spi_mem_dirmap_desc *
+devm_spi_mem_dirmap_create(struct device *dev, struct spi_mem *mem,
+			   const struct spi_mem_dirmap_info *info);
+void devm_spi_mem_dirmap_destroy(struct device *dev,
+				 struct spi_mem_dirmap_desc *desc);
 
 int spi_mem_driver_register_with_owner(struct spi_mem_driver *drv,
 				       struct module *owner);
-- 
cgit v1.2.3


From 910fdcabedd2354d161b1beab6ad7dc7e859651d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 21 Jan 2019 09:32:32 +0900
Subject: ASoC: soc-core: add .num_platform for dai_link

Current snd_soc_dai_link is starting to use snd_soc_dai_link_component
(= modern) style for Platform, but it is still assuming single Platform
so far. We will need to have multi Platform support in the not far
future.

Currently only simple card is using it as sound card driver,
and other drivers are converted to it from legacy style by
snd_soc_init_platform().
To avoid future problem of multi Platform support, let's add
num_platforms before it is too late.

In the same time, to make it same naming mothed, "platform" should
be "platforms". This patch fixup it too.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h     |  2 +-
 include/sound/soc.h                   |  3 ++-
 sound/soc/generic/audio-graph-card.c  |  5 +++--
 sound/soc/generic/simple-card-utils.c |  4 ++--
 sound/soc/generic/simple-card.c       |  7 ++++---
 sound/soc/soc-core.c                  | 23 ++++++++++++++++-------
 6 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 6d69ed2bd7b1..ab5a2ba09c07 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -75,7 +75,7 @@ void asoc_simple_card_clk_disable(struct asoc_simple_dai *dai);
 				   &dai_link->codec_dai_name,			\
 				   list_name, cells_name, NULL)
 #define asoc_simple_card_parse_platform(node, dai_link, list_name, cells_name)	\
-	asoc_simple_card_parse_dai(node, dai_link->platform,					\
+	asoc_simple_card_parse_dai(node, dai_link->platforms,			\
 		&dai_link->platform_of_node,					\
 		NULL, list_name, cells_name, NULL)
 int asoc_simple_card_parse_dai(struct device_node *node,
diff --git a/include/sound/soc.h b/include/sound/soc.h
index c31b6d122ff6..3089257ead95 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -961,7 +961,8 @@ struct snd_soc_dai_link {
 	 */
 	const char *platform_name;
 	struct device_node *platform_of_node;
-	struct snd_soc_dai_link_component *platform;
+	struct snd_soc_dai_link_component *platforms;
+	unsigned int num_platforms;
 
 	int id;	/* optional ID for machine driver link identification */
 
diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index 3ec96cdc683b..42b077c6be4c 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -26,7 +26,7 @@ struct graph_priv {
 		struct asoc_simple_dai *cpu_dai;
 		struct asoc_simple_dai *codec_dai;
 		struct snd_soc_dai_link_component codecs; /* single codec */
-		struct snd_soc_dai_link_component platform;
+		struct snd_soc_dai_link_component platforms;
 		struct asoc_simple_card_data adata;
 		struct snd_soc_codec_conf *codec_conf;
 		unsigned int mclk_fs;
@@ -687,7 +687,8 @@ static int graph_probe(struct platform_device *pdev)
 	for (i = 0; i < li.link; i++) {
 		dai_link[i].codecs	= &dai_props[i].codecs;
 		dai_link[i].num_codecs	= 1;
-		dai_link[i].platform	= &dai_props[i].platform;
+		dai_link[i].platforms	= &dai_props[i].platforms;
+		dai_link[i].num_platforms = 1;
 	}
 
 	priv->pa_gpio = devm_gpiod_get_optional(dev, "pa", GPIOD_OUT_LOW);
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 336895f7fd1e..3c0901df5796 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -397,8 +397,8 @@ EXPORT_SYMBOL_GPL(asoc_simple_card_init_dai);
 int asoc_simple_card_canonicalize_dailink(struct snd_soc_dai_link *dai_link)
 {
 	/* Assumes platform == cpu */
-	if (!dai_link->platform->of_node)
-		dai_link->platform->of_node = dai_link->cpu_of_node;
+	if (!dai_link->platforms->of_node)
+		dai_link->platforms->of_node = dai_link->cpu_of_node;
 
 	return 0;
 
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 479de236e694..d8a0d1ec256e 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -21,7 +21,7 @@ struct simple_priv {
 		struct asoc_simple_dai *cpu_dai;
 		struct asoc_simple_dai *codec_dai;
 		struct snd_soc_dai_link_component codecs; /* single codec */
-		struct snd_soc_dai_link_component platform;
+		struct snd_soc_dai_link_component platforms;
 		struct asoc_simple_card_data adata;
 		struct snd_soc_codec_conf *codec_conf;
 		unsigned int mclk_fs;
@@ -732,7 +732,8 @@ static int simple_probe(struct platform_device *pdev)
 	for (i = 0; i < li.link; i++) {
 		dai_link[i].codecs	= &dai_props[i].codecs;
 		dai_link[i].num_codecs	= 1;
-		dai_link[i].platform	= &dai_props[i].platform;
+		dai_link[i].platforms	= &dai_props[i].platforms;
+		dai_link[i].num_platforms = 1;
 	}
 
 	priv->dai_props		= dai_props;
@@ -782,7 +783,7 @@ static int simple_probe(struct platform_device *pdev)
 		codecs->name		= cinfo->codec;
 		codecs->dai_name	= cinfo->codec_dai.name;
 
-		platform		= dai_link->platform;
+		platform		= dai_link->platforms;
 		platform->name		= cinfo->platform;
 
 		card->name		= (cinfo->card) ? cinfo->card : cinfo->name;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index de2851f1b3df..2c63921675d5 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -915,7 +915,7 @@ static int soc_bind_dai_link(struct snd_soc_card *card,
 
 	/* find one from the set of registered platforms */
 	for_each_component(component) {
-		if (!snd_soc_is_matching_component(dai_link->platform,
+		if (!snd_soc_is_matching_component(dai_link->platforms,
 						   component))
 			continue;
 
@@ -1026,7 +1026,7 @@ static void soc_remove_dai_links(struct snd_soc_card *card)
 static int snd_soc_init_platform(struct snd_soc_card *card,
 				 struct snd_soc_dai_link *dai_link)
 {
-	struct snd_soc_dai_link_component *platform = dai_link->platform;
+	struct snd_soc_dai_link_component *platform = dai_link->platforms;
 
 	/*
 	 * REMOVE ME
@@ -1046,7 +1046,8 @@ static int snd_soc_init_platform(struct snd_soc_card *card,
 		if (!platform)
 			return -ENOMEM;
 
-		dai_link->platform	  = platform;
+		dai_link->platforms	  = platform;
+		dai_link->num_platforms	  = 1;
 		dai_link->legacy_platform = 1;
 		platform->name		  = dai_link->platform_name;
 		platform->of_node	  = dai_link->platform_of_node;
@@ -1136,11 +1137,19 @@ static int soc_init_dai_link(struct snd_soc_card *card,
 		}
 	}
 
+	/* FIXME */
+	if (link->num_platforms > 1) {
+		dev_err(card->dev,
+			"ASoC: multi platform is not yet supported %s\n",
+			link->name);
+		return -EINVAL;
+	}
+
 	/*
 	 * Platform may be specified by either name or OF node, but
 	 * can be left unspecified, and a dummy platform will be used.
 	 */
-	if (link->platform->name && link->platform->of_node) {
+	if (link->platforms->name && link->platforms->of_node) {
 		dev_err(card->dev,
 			"ASoC: Both platform name/of_node are set for %s\n",
 			link->name);
@@ -1151,8 +1160,8 @@ static int soc_init_dai_link(struct snd_soc_card *card,
 	 * Defer card registartion if platform dai component is not added to
 	 * component list.
 	 */
-	if ((link->platform->of_node || link->platform->name) &&
-	    !soc_find_component(link->platform->of_node, link->platform->name))
+	if ((link->platforms->of_node || link->platforms->name) &&
+	    !soc_find_component(link->platforms->of_node, link->platforms->name))
 		return -EPROBE_DEFER;
 
 	/*
@@ -1956,7 +1965,7 @@ static void soc_check_tplg_fes(struct snd_soc_card *card)
 				dev_err(card->dev, "init platform error");
 				continue;
 			}
-			dai_link->platform->name = component->name;
+			dai_link->platforms->name = component->name;
 
 			/* convert non BE into BE */
 			dai_link->no_pcm = 1;
-- 
cgit v1.2.3


From fe7ed4dec2e6289eab81dd18c0d613c0851d85a1 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 21 Jan 2019 16:40:59 +0900
Subject: ASoC: simple-card: rename to asoc_simple_card_canonicalize_platform()

Current simple-card is using asoc_simple_card_canonicalize_dailink().
Its naming is "dailink", but is for "platform".
We already have asoc_simple_card_canonicalize_cpu() for "cpu",
let's follow same naming rule.
It never return error, so, void function is better idea.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h     |  2 +-
 sound/soc/generic/audio-graph-card.c  | 11 +++--------
 sound/soc/generic/simple-card-utils.c |  7 ++-----
 sound/soc/generic/simple-card.c       | 11 +++--------
 4 files changed, 9 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index ab5a2ba09c07..7afe45389972 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -108,7 +108,7 @@ int asoc_simple_card_parse_graph_dai(struct device_node *ep,
 int asoc_simple_card_init_dai(struct snd_soc_dai *dai,
 			      struct asoc_simple_dai *simple_dai);
 
-int asoc_simple_card_canonicalize_dailink(struct snd_soc_dai_link *dai_link);
+void asoc_simple_card_canonicalize_platform(struct snd_soc_dai_link *dai_link);
 void asoc_simple_card_canonicalize_cpu(struct snd_soc_dai_link *dai_link,
 				      int is_single_links);
 
diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c
index 42b077c6be4c..bb12351330e8 100644
--- a/sound/soc/generic/audio-graph-card.c
+++ b/sound/soc/generic/audio-graph-card.c
@@ -307,14 +307,12 @@ static int graph_dai_link_of_dpcm(struct graph_priv *priv,
 					     "prefix");
 	}
 
+	asoc_simple_card_canonicalize_platform(dai_link);
+
 	ret = asoc_simple_card_of_parse_tdm(ep, dai);
 	if (ret)
 		return ret;
 
-	ret = asoc_simple_card_canonicalize_dailink(dai_link);
-	if (ret < 0)
-		return ret;
-
 	ret = asoc_simple_card_parse_daifmt(dev, cpu_ep, codec_ep,
 					    NULL, &dai_link->dai_fmt);
 	if (ret < 0)
@@ -405,10 +403,6 @@ static int graph_dai_link_of(struct graph_priv *priv,
 	if (ret < 0)
 		return ret;
 
-	ret = asoc_simple_card_canonicalize_dailink(dai_link);
-	if (ret < 0)
-		return ret;
-
 	ret = asoc_simple_card_set_dailink_name(dev, dai_link,
 						"%s-%s",
 						dai_link->cpu_dai_name,
@@ -419,6 +413,7 @@ static int graph_dai_link_of(struct graph_priv *priv,
 	dai_link->ops = &graph_ops;
 	dai_link->init = graph_dai_init;
 
+	asoc_simple_card_canonicalize_platform(dai_link);
 	asoc_simple_card_canonicalize_cpu(dai_link,
 		of_graph_get_endpoint_count(dai_link->cpu_of_node) == 1);
 
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index 3c0901df5796..5c1424f03620 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -394,16 +394,13 @@ int asoc_simple_card_init_dai(struct snd_soc_dai *dai,
 }
 EXPORT_SYMBOL_GPL(asoc_simple_card_init_dai);
 
-int asoc_simple_card_canonicalize_dailink(struct snd_soc_dai_link *dai_link)
+void asoc_simple_card_canonicalize_platform(struct snd_soc_dai_link *dai_link)
 {
 	/* Assumes platform == cpu */
 	if (!dai_link->platforms->of_node)
 		dai_link->platforms->of_node = dai_link->cpu_of_node;
-
-	return 0;
-
 }
-EXPORT_SYMBOL_GPL(asoc_simple_card_canonicalize_dailink);
+EXPORT_SYMBOL_GPL(asoc_simple_card_canonicalize_platform);
 
 void asoc_simple_card_canonicalize_cpu(struct snd_soc_dai_link *dai_link,
 				       int is_single_links)
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index d8a0d1ec256e..08df261024cf 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -297,14 +297,12 @@ static int simple_dai_link_of_dpcm(struct simple_priv *priv,
 
 	simple_get_conversion(dev, np, &dai_props->adata);
 
+	asoc_simple_card_canonicalize_platform(dai_link);
+
 	ret = asoc_simple_card_of_parse_tdm(np, dai);
 	if (ret)
 		return ret;
 
-	ret = asoc_simple_card_canonicalize_dailink(dai_link);
-	if (ret < 0)
-		return ret;
-
 	snprintf(prop, sizeof(prop), "%smclk-fs", prefix);
 	of_property_read_u32(top,  PREFIX "mclk-fs", &dai_props->mclk_fs);
 	of_property_read_u32(node, prop, &dai_props->mclk_fs);
@@ -409,10 +407,6 @@ static int simple_dai_link_of(struct simple_priv *priv,
 	if (ret < 0)
 		goto dai_link_of_err;
 
-	ret = asoc_simple_card_canonicalize_dailink(dai_link);
-	if (ret < 0)
-		goto dai_link_of_err;
-
 	ret = asoc_simple_card_set_dailink_name(dev, dai_link,
 						"%s-%s",
 						dai_link->cpu_dai_name,
@@ -424,6 +418,7 @@ static int simple_dai_link_of(struct simple_priv *priv,
 	dai_link->init = simple_dai_init;
 
 	asoc_simple_card_canonicalize_cpu(dai_link, single_cpu);
+	asoc_simple_card_canonicalize_platform(dai_link);
 
 dai_link_of_err:
 	of_node_put(node);
-- 
cgit v1.2.3


From cf5c6c211b7e9eb4f4219f83671432c9ef257187 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 17 Jan 2019 15:25:04 +0800
Subject: perf: Remove duplicated workqueue.h include from perf_event.h

It is already included a little bit higher up in that file.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20190117072504.14428-1-yuehaibing@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cec02dc63b51..f8ec36197718 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -53,7 +53,6 @@ struct perf_guest_info_callbacks {
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
-#include <linux/workqueue.h>
 #include <linux/cgroup.h>
 #include <asm/local.h>
 
-- 
cgit v1.2.3


From 5620196951192f7cd2da0a04e7c0113f40bfc14e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 11 Jan 2019 13:20:20 -0300
Subject: perf: Make perf_event_output() propagate the output() return

For the original mode of operation it isn't needed, since we report back
errors via PERF_RECORD_LOST records in the ring buffer, but for use in
bpf_perf_event_output() it is convenient to return the errors, basically
-ENOSPC.

Currently bpf_perf_event_output() returns an error indication, the last
thing it does, which is to push it to the ring buffer is that can fail
and if so, this failure won't be reported back to its users, fix it.

Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Tested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/r/20190118150938.GN5823@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h                       |  6 +++---
 kernel/events/core.c                             | 11 +++++++----
 kernel/trace/bpf_trace.c                         |  3 +--
 tools/perf/examples/bpf/augmented_raw_syscalls.c |  4 ++--
 tools/perf/examples/bpf/augmented_syscalls.c     | 14 +++++++-------
 tools/perf/examples/bpf/etcsnoop.c               | 10 +++++-----
 6 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f8ec36197718..4eb88065a9b5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -978,9 +978,9 @@ extern void perf_event_output_forward(struct perf_event *event,
 extern void perf_event_output_backward(struct perf_event *event,
 				       struct perf_sample_data *data,
 				       struct pt_regs *regs);
-extern void perf_event_output(struct perf_event *event,
-			      struct perf_sample_data *data,
-			      struct pt_regs *regs);
+extern int perf_event_output(struct perf_event *event,
+			     struct perf_sample_data *data,
+			     struct pt_regs *regs);
 
 static inline bool
 is_default_overflow_handler(struct perf_event *event)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index fbe59b793b36..bc525cd1615c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6489,7 +6489,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		data->phys_addr = perf_virt_to_phys(data->addr);
 }
 
-static __always_inline void
+static __always_inline int
 __perf_event_output(struct perf_event *event,
 		    struct perf_sample_data *data,
 		    struct pt_regs *regs,
@@ -6499,13 +6499,15 @@ __perf_event_output(struct perf_event *event,
 {
 	struct perf_output_handle handle;
 	struct perf_event_header header;
+	int err;
 
 	/* protect the callchain buffers */
 	rcu_read_lock();
 
 	perf_prepare_sample(&header, data, event, regs);
 
-	if (output_begin(&handle, event, header.size))
+	err = output_begin(&handle, event, header.size);
+	if (err)
 		goto exit;
 
 	perf_output_sample(&handle, &header, data, event);
@@ -6514,6 +6516,7 @@ __perf_event_output(struct perf_event *event,
 
 exit:
 	rcu_read_unlock();
+	return err;
 }
 
 void
@@ -6532,12 +6535,12 @@ perf_event_output_backward(struct perf_event *event,
 	__perf_event_output(event, data, regs, perf_output_begin_backward);
 }
 
-void
+int
 perf_event_output(struct perf_event *event,
 		  struct perf_sample_data *data,
 		  struct pt_regs *regs)
 {
-	__perf_event_output(event, data, regs, perf_output_begin);
+	return __perf_event_output(event, data, regs, perf_output_begin);
 }
 
 /*
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8b068adb9da1..088c2032ceaf 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -431,8 +431,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
 	if (unlikely(event->oncpu != cpu))
 		return -EOPNOTSUPP;
 
-	perf_event_output(event, sd, regs);
-	return 0;
+	return perf_event_output(event, sd, regs);
 }
 
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 53c233370fae..9e9d4c66e53c 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -141,8 +141,8 @@ int sys_enter(struct syscall_enter_args *args)
 		len = sizeof(augmented_args.args);
 	}
 
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
-	return 0;
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
 }
 
 SEC("raw_syscalls:sys_exit")
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
index 2ae44813ef2d..b7dba114e36c 100644
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -55,9 +55,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
 		len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;	\
 		len &= sizeof(augmented_args.filename.value) - 1;				\
 	}											\
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
-			  &augmented_args, len);						\
-	return 0;										\
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
+				 &augmented_args, len);						\
 }												\
 int syscall_exit(syscall)(struct syscall_exit_args *args)					\
 {												\
@@ -125,10 +125,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
 /*		addrlen = augmented_args.args.addrlen;				     */		\
 /*										     */		\
 	probe_read(&augmented_args.addr, addrlen, args->addr_ptr); 				\
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
-			  &augmented_args, 							\
-			  sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);	\
-	return 0;										\
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
+				 &augmented_args, 						\
+				sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\
 }												\
 int syscall_exit(syscall)(struct syscall_exit_args *args)					\
 {												\
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
index b59e8812ee8c..550e69c2e8d1 100644
--- a/tools/perf/examples/bpf/etcsnoop.c
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -49,11 +49,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
 						      args->filename_ptr); 			\
 	if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0)			\
 		return 0;									\
-	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 			\
-			  &augmented_args, 							\
-			  (sizeof(augmented_args) - sizeof(augmented_args.filename.value) +	\
-			   augmented_args.filename.size));					\
-	return 0;										\
+	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
+	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
+				 &augmented_args,						\
+				 (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \
+				 augmented_args.filename.size));				\
 }
 
 struct syscall_enter_openat_args {
-- 
cgit v1.2.3


From 76193a94522f1d4edf2447a536f3f796ce56343b Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 17 Jan 2019 08:15:13 -0800
Subject: perf, bpf: Introduce PERF_RECORD_KSYMBOL

For better performance analysis of dynamically JITed and loaded kernel
functions, such as BPF programs, this patch introduces
PERF_RECORD_KSYMBOL, a new perf_event_type that exposes kernel symbol
register/unregister information to user space.

The following data structure is used for PERF_RECORD_KSYMBOL.

    /*
     * struct {
     *      struct perf_event_header        header;
     *      u64                             addr;
     *      u32                             len;
     *      u16                             ksym_type;
     *      u16                             flags;
     *      char                            name[];
     *      struct sample_id                sample_id;
     * };
     */

Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/20190117161521.1341602-2-songliubraving@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h      |  8 ++++
 include/uapi/linux/perf_event.h | 26 ++++++++++-
 kernel/events/core.c            | 98 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 130 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4eb88065a9b5..136fe0495374 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1122,6 +1122,10 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
 }
 
 extern void perf_event_mmap(struct vm_area_struct *vma);
+
+extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+			       bool unregister, const char *sym);
+
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1342,6 +1346,10 @@ static inline int perf_unregister_guest_info_callbacks
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
+
+typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
+static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
+				      bool unregister, const char *sym)	{ }
 static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index ea19b5d491bf..1dee5c8f166b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -372,7 +372,8 @@ struct perf_event_attr {
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
 				namespaces     :  1, /* include namespaces data */
-				__reserved_1   : 35;
+				ksymbol        :  1, /* include ksymbol events */
+				__reserved_1   : 34;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -963,9 +964,32 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_NAMESPACES			= 16,
 
+	/*
+	 * Record ksymbol register/unregister events:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				addr;
+	 *	u32				len;
+	 *	u16				ksym_type;
+	 *	u16				flags;
+	 *	char				name[];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_KSYMBOL			= 17,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
+enum perf_record_ksymbol_type {
+	PERF_RECORD_KSYMBOL_TYPE_UNKNOWN	= 0,
+	PERF_RECORD_KSYMBOL_TYPE_BPF		= 1,
+	PERF_RECORD_KSYMBOL_TYPE_MAX		/* non-ABI */
+};
+
+#define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
+
 #define PERF_MAX_STACK_DEPTH		127
 #define PERF_MAX_CONTEXTS_PER_STACK	  8
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bc525cd1615c..e04ab5f325cf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -385,6 +385,7 @@ static atomic_t nr_namespaces_events __read_mostly;
 static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
+static atomic_t nr_ksymbol_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4235,7 +4236,7 @@ static bool is_sb_event(struct perf_event *event)
 
 	if (attr->mmap || attr->mmap_data || attr->mmap2 ||
 	    attr->comm || attr->comm_exec ||
-	    attr->task ||
+	    attr->task || attr->ksymbol ||
 	    attr->context_switch)
 		return true;
 	return false;
@@ -4305,6 +4306,8 @@ static void unaccount_event(struct perf_event *event)
 		dec = true;
 	if (has_branch_stack(event))
 		dec = true;
+	if (event->attr.ksymbol)
+		atomic_dec(&nr_ksymbol_events);
 
 	if (dec) {
 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7653,6 +7656,97 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 	perf_output_end(&handle);
 }
 
+/*
+ * ksymbol register/unregister tracking
+ */
+
+struct perf_ksymbol_event {
+	const char	*name;
+	int		name_len;
+	struct {
+		struct perf_event_header        header;
+		u64				addr;
+		u32				len;
+		u16				ksym_type;
+		u16				flags;
+	} event_id;
+};
+
+static int perf_event_ksymbol_match(struct perf_event *event)
+{
+	return event->attr.ksymbol;
+}
+
+static void perf_event_ksymbol_output(struct perf_event *event, void *data)
+{
+	struct perf_ksymbol_event *ksymbol_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	if (!perf_event_ksymbol_match(event))
+		return;
+
+	perf_event_header__init_id(&ksymbol_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				ksymbol_event->event_id.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, ksymbol_event->event_id);
+	__output_copy(&handle, ksymbol_event->name, ksymbol_event->name_len);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
+			const char *sym)
+{
+	struct perf_ksymbol_event ksymbol_event;
+	char name[KSYM_NAME_LEN];
+	u16 flags = 0;
+	int name_len;
+
+	if (!atomic_read(&nr_ksymbol_events))
+		return;
+
+	if (ksym_type >= PERF_RECORD_KSYMBOL_TYPE_MAX ||
+	    ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
+		goto err;
+
+	strlcpy(name, sym, KSYM_NAME_LEN);
+	name_len = strlen(name) + 1;
+	while (!IS_ALIGNED(name_len, sizeof(u64)))
+		name[name_len++] = '\0';
+	BUILD_BUG_ON(KSYM_NAME_LEN % sizeof(u64));
+
+	if (unregister)
+		flags |= PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER;
+
+	ksymbol_event = (struct perf_ksymbol_event){
+		.name = name,
+		.name_len = name_len,
+		.event_id = {
+			.header = {
+				.type = PERF_RECORD_KSYMBOL,
+				.size = sizeof(ksymbol_event.event_id) +
+					name_len,
+			},
+			.addr = addr,
+			.len = len,
+			.ksym_type = ksym_type,
+			.flags = flags,
+		},
+	};
+
+	perf_iterate_sb(perf_event_ksymbol_output, &ksymbol_event, NULL);
+	return;
+err:
+	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
 	event->attach_state |= PERF_ATTACH_ITRACE;
@@ -9912,6 +10006,8 @@ static void account_event(struct perf_event *event)
 		inc = true;
 	if (is_cgroup_event(event))
 		inc = true;
+	if (event->attr.ksymbol)
+		atomic_inc(&nr_ksymbol_events);
 
 	if (inc) {
 		/*
-- 
cgit v1.2.3


From 6ee52e2a3fe4ea35520720736e6791df1fb67106 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 17 Jan 2019 08:15:15 -0800
Subject: perf, bpf: Introduce PERF_RECORD_BPF_EVENT

For better performance analysis of BPF programs, this patch introduces
PERF_RECORD_BPF_EVENT, a new perf_event_type that exposes BPF program
load/unload information to user space.

Each BPF program may contain up to BPF_MAX_SUBPROGS (256) sub programs.
The following example shows kernel symbols for a BPF program with 7 sub
programs:

    ffffffffa0257cf9 t bpf_prog_b07ccb89267cf242_F
    ffffffffa02592e1 t bpf_prog_2dcecc18072623fc_F
    ffffffffa025b0e9 t bpf_prog_bb7a405ebaec5d5c_F
    ffffffffa025dd2c t bpf_prog_a7540d4a39ec1fc7_F
    ffffffffa025fcca t bpf_prog_05762d4ade0e3737_F
    ffffffffa026108f t bpf_prog_db4bd11e35df90d4_F
    ffffffffa0263f00 t bpf_prog_89d64e4abf0f0126_F
    ffffffffa0257cf9 t bpf_prog_ae31629322c4b018__dummy_tracepoi

When a bpf program is loaded, PERF_RECORD_KSYMBOL is generated for each
of these sub programs. Therefore, PERF_RECORD_BPF_EVENT is not needed
for simple profiling.

For annotation, user space need to listen to PERF_RECORD_BPF_EVENT and
gather more information about these (sub) programs via sys_bpf.

Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradeaed.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: kernel-team@fb.com
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/20190117161521.1341602-4-songliubraving@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/filter.h          |   7 +++
 include/linux/perf_event.h      |   6 +++
 include/uapi/linux/perf_event.h |  29 +++++++++-
 kernel/bpf/core.c               |   2 +-
 kernel/bpf/syscall.c            |   2 +
 kernel/events/core.c            | 115 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 159 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad106d845b22..d531d4250bff 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -951,6 +951,7 @@ bpf_address_lookup(unsigned long addr, unsigned long *size,
 
 void bpf_prog_kallsyms_add(struct bpf_prog *fp);
 void bpf_prog_kallsyms_del(struct bpf_prog *fp);
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym);
 
 #else /* CONFIG_BPF_JIT */
 
@@ -1006,6 +1007,12 @@ static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 {
 }
+
+static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+{
+	sym[0] = '\0';
+}
+
 #endif /* CONFIG_BPF_JIT */
 
 void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 136fe0495374..a79e59fc3b7d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1125,6 +1125,9 @@ extern void perf_event_mmap(struct vm_area_struct *vma);
 
 extern void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
 			       bool unregister, const char *sym);
+extern void perf_event_bpf_event(struct bpf_prog *prog,
+				 enum perf_bpf_event_type type,
+				 u16 flags);
 
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1350,6 +1353,9 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 typedef int (perf_ksymbol_get_name_f)(char *name, int name_len, void *data);
 static inline void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len,
 				      bool unregister, const char *sym)	{ }
+static inline void perf_event_bpf_event(struct bpf_prog *prog,
+					enum perf_bpf_event_type type,
+					u16 flags)			{ }
 static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_namespaces(struct task_struct *tsk)	{ }
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1dee5c8f166b..7198ddd0c6b1 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -373,7 +373,8 @@ struct perf_event_attr {
 				write_backward :  1, /* Write ring buffer from end to beginning */
 				namespaces     :  1, /* include namespaces data */
 				ksymbol        :  1, /* include ksymbol events */
-				__reserved_1   : 34;
+				bpf_event      :  1, /* include bpf events */
+				__reserved_1   : 33;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -979,6 +980,25 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_KSYMBOL			= 17,
 
+	/*
+	 * Record bpf events:
+	 *  enum perf_bpf_event_type {
+	 *	PERF_BPF_EVENT_UNKNOWN		= 0,
+	 *	PERF_BPF_EVENT_PROG_LOAD	= 1,
+	 *	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
+	 *  };
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u16				type;
+	 *	u16				flags;
+	 *	u32				id;
+	 *	u8				tag[BPF_TAG_SIZE];
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_BPF_EVENT			= 18,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -990,6 +1010,13 @@ enum perf_record_ksymbol_type {
 
 #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER	(1 << 0)
 
+enum perf_bpf_event_type {
+	PERF_BPF_EVENT_UNKNOWN		= 0,
+	PERF_BPF_EVENT_PROG_LOAD	= 1,
+	PERF_BPF_EVENT_PROG_UNLOAD	= 2,
+	PERF_BPF_EVENT_MAX,		/* non-ABI */
+};
+
 #define PERF_MAX_STACK_DEPTH		127
 #define PERF_MAX_CONTEXTS_PER_STACK	  8
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f908b9356025..19c49313c709 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -495,7 +495,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
 	*symbol_end   = addr + hdr->pages * PAGE_SIZE;
 }
 
-static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
+void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
 {
 	const char *end = sym + KSYM_NAME_LEN;
 	const struct btf_type *type;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..30ebd085790b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1211,6 +1211,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
 	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+		perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
 		/* bpf_prog_free_id() must be called first */
 		bpf_prog_free_id(prog, do_idr_lock);
 		bpf_prog_kallsyms_del_all(prog);
@@ -1554,6 +1555,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	}
 
 	bpf_prog_kallsyms_add(prog);
+	perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
 	return err;
 
 free_used_maps:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e04ab5f325cf..236bb8ddb7bc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -386,6 +386,7 @@ static atomic_t nr_task_events __read_mostly;
 static atomic_t nr_freq_events __read_mostly;
 static atomic_t nr_switch_events __read_mostly;
 static atomic_t nr_ksymbol_events __read_mostly;
+static atomic_t nr_bpf_events __read_mostly;
 
 static LIST_HEAD(pmus);
 static DEFINE_MUTEX(pmus_lock);
@@ -4308,6 +4309,8 @@ static void unaccount_event(struct perf_event *event)
 		dec = true;
 	if (event->attr.ksymbol)
 		atomic_dec(&nr_ksymbol_events);
+	if (event->attr.bpf_event)
+		atomic_dec(&nr_bpf_events);
 
 	if (dec) {
 		if (!atomic_add_unless(&perf_sched_count, -1, 1))
@@ -7747,6 +7750,116 @@ err:
 	WARN_ONCE(1, "%s: Invalid KSYMBOL type 0x%x\n", __func__, ksym_type);
 }
 
+/*
+ * bpf program load/unload tracking
+ */
+
+struct perf_bpf_event {
+	struct bpf_prog	*prog;
+	struct {
+		struct perf_event_header        header;
+		u16				type;
+		u16				flags;
+		u32				id;
+		u8				tag[BPF_TAG_SIZE];
+	} event_id;
+};
+
+static int perf_event_bpf_match(struct perf_event *event)
+{
+	return event->attr.bpf_event;
+}
+
+static void perf_event_bpf_output(struct perf_event *event, void *data)
+{
+	struct perf_bpf_event *bpf_event = data;
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	if (!perf_event_bpf_match(event))
+		return;
+
+	perf_event_header__init_id(&bpf_event->event_id.header,
+				   &sample, event);
+	ret = perf_output_begin(&handle, event,
+				bpf_event->event_id.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, bpf_event->event_id);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
+static void perf_event_bpf_emit_ksymbols(struct bpf_prog *prog,
+					 enum perf_bpf_event_type type)
+{
+	bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD;
+	char sym[KSYM_NAME_LEN];
+	int i;
+
+	if (prog->aux->func_cnt == 0) {
+		bpf_get_prog_name(prog, sym);
+		perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF,
+				   (u64)(unsigned long)prog->bpf_func,
+				   prog->jited_len, unregister, sym);
+	} else {
+		for (i = 0; i < prog->aux->func_cnt; i++) {
+			struct bpf_prog *subprog = prog->aux->func[i];
+
+			bpf_get_prog_name(subprog, sym);
+			perf_event_ksymbol(
+				PERF_RECORD_KSYMBOL_TYPE_BPF,
+				(u64)(unsigned long)subprog->bpf_func,
+				subprog->jited_len, unregister, sym);
+		}
+	}
+}
+
+void perf_event_bpf_event(struct bpf_prog *prog,
+			  enum perf_bpf_event_type type,
+			  u16 flags)
+{
+	struct perf_bpf_event bpf_event;
+
+	if (type <= PERF_BPF_EVENT_UNKNOWN ||
+	    type >= PERF_BPF_EVENT_MAX)
+		return;
+
+	switch (type) {
+	case PERF_BPF_EVENT_PROG_LOAD:
+	case PERF_BPF_EVENT_PROG_UNLOAD:
+		if (atomic_read(&nr_ksymbol_events))
+			perf_event_bpf_emit_ksymbols(prog, type);
+		break;
+	default:
+		break;
+	}
+
+	if (!atomic_read(&nr_bpf_events))
+		return;
+
+	bpf_event = (struct perf_bpf_event){
+		.prog = prog,
+		.event_id = {
+			.header = {
+				.type = PERF_RECORD_BPF_EVENT,
+				.size = sizeof(bpf_event.event_id),
+			},
+			.type = type,
+			.flags = flags,
+			.id = prog->aux->id,
+		},
+	};
+
+	BUILD_BUG_ON(BPF_TAG_SIZE % sizeof(u64));
+
+	memcpy(bpf_event.event_id.tag, prog->tag, BPF_TAG_SIZE);
+	perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
+}
+
 void perf_event_itrace_started(struct perf_event *event)
 {
 	event->attach_state |= PERF_ATTACH_ITRACE;
@@ -10008,6 +10121,8 @@ static void account_event(struct perf_event *event)
 		inc = true;
 	if (event->attr.ksymbol)
 		atomic_inc(&nr_ksymbol_events);
+	if (event->attr.bpf_event)
+		atomic_inc(&nr_bpf_events);
 
 	if (inc) {
 		/*
-- 
cgit v1.2.3


From 534fd7aac56a7994d16032f32123def9923e339f Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 13 Jan 2019 16:01:17 +0200
Subject: IB/mlx5: Manage indirection mkey upon DEVX flow for ODP

Manage indirection mkey upon DEVX flow to support ODP.

To support a page fault event on the indirection mkey it needs to be part
of the device mkey radix tree.

Both the creation and the deletion flows for a DEVX object which is
indirection mkey were adapted to handle that.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c    | 89 +++++++++++++++++++++++++++++++++++-
 drivers/infiniband/hw/mlx5/main.c    |  1 +
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  6 +++
 include/linux/mlx5/driver.h          |  1 +
 4 files changed, 96 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index b7ff2138ac2a..bbf9a26d8fa6 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -17,12 +17,18 @@
 #define UVERBS_MODULE_NAME mlx5_ib
 #include <rdma/uverbs_named_ioctl.h>
 
+enum devx_obj_flags {
+	DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
+};
+
 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
 struct devx_obj {
 	struct mlx5_core_dev	*mdev;
 	u64			obj_id;
 	u32			dinlen; /* destroy inbox length */
 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+	u32			flags;
+	struct mlx5_ib_devx_mr	devx_mr;
 };
 
 struct devx_umem {
@@ -1011,6 +1017,36 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
 	}
 }
 
+static int devx_handle_mkey_indirect(struct devx_obj *obj,
+				     struct mlx5_ib_dev *dev,
+				     void *in, void *out)
+{
+	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
+	struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
+	unsigned long flags;
+	struct mlx5_core_mkey *mkey;
+	void *mkc;
+	u8 key;
+	int err;
+
+	mkey = &devx_mr->mmkey;
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	key = MLX5_GET(mkc, mkc, mkey_7_0);
+	mkey->key = mlx5_idx_to_mkey(
+			MLX5_GET(create_mkey_out, out, mkey_index)) | key;
+	mkey->type = MLX5_MKEY_INDIRECT_DEVX;
+	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+	mkey->size = MLX5_GET64(mkc, mkc, len);
+	mkey->pd = MLX5_GET(mkc, mkc, pd);
+	devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
+
+	write_lock_irqsave(&table->lock, flags);
+	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key),
+				mkey);
+	write_unlock_irqrestore(&table->lock, flags);
+	return err;
+}
+
 static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
 				   struct devx_obj *obj,
 				   void *in, int in_len)
@@ -1030,13 +1066,45 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
 	access_mode |= MLX5_GET(mkc, mkc, access_mode_4_2) << 2;
 
 	if (access_mode == MLX5_MKC_ACCESS_MODE_KLMS ||
-		access_mode == MLX5_MKC_ACCESS_MODE_KSM)
+		access_mode == MLX5_MKC_ACCESS_MODE_KSM) {
+		if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+			obj->flags |= DEVX_OBJ_FLAGS_INDIRECT_MKEY;
 		return 0;
+	}
 
 	MLX5_SET(create_mkey_in, in, mkey_umem_valid, 1);
 	return 0;
 }
 
+static void devx_free_indirect_mkey(struct rcu_head *rcu)
+{
+	kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
+}
+
+/* This function to delete from the radix tree needs to be called before
+ * destroying the underlying mkey. Otherwise a race might occur in case that
+ * other thread will get the same mkey before this one will be deleted,
+ * in that case it will fail via inserting to the tree its own data.
+ *
+ * Note:
+ * An error in the destroy is not expected unless there is some other indirect
+ * mkey which points to this one. In a kernel cleanup flow it will be just
+ * destroyed in the iterative destruction call. In a user flow, in case
+ * the application didn't close in the expected order it's its own problem,
+ * the mkey won't be part of the tree, in both cases the kernel is safe.
+ */
+static void devx_cleanup_mkey(struct devx_obj *obj)
+{
+	struct mlx5_mkey_table *table = &obj->mdev->priv.mkey_table;
+	struct mlx5_core_mkey *del_mkey;
+	unsigned long flags;
+
+	write_lock_irqsave(&table->lock, flags);
+	del_mkey = radix_tree_delete(&table->tree,
+				     mlx5_base_mkey(obj->devx_mr.mmkey.key));
+	write_unlock_irqrestore(&table->lock, flags);
+}
+
 static int devx_obj_cleanup(struct ib_uobject *uobject,
 			    enum rdma_remove_reason why)
 {
@@ -1044,10 +1112,21 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	struct devx_obj *obj = uobject->object;
 	int ret;
 
+	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+		devx_cleanup_mkey(obj);
+
 	ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
 	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
+	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+		struct mlx5_ib_dev *dev = to_mdev(uobject->context->device);
+
+		call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
+			  devx_free_indirect_mkey);
+		return ret;
+	}
+
 	kfree(obj);
 	return ret;
 }
@@ -1108,6 +1187,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 				   &obj_id);
 	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
 
+	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+		err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+		if (err)
+			goto obj_destroy;
+	}
+
 	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
 	if (err)
 		goto obj_destroy;
@@ -1116,6 +1201,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 	return 0;
 
 obj_destroy:
+	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
+		devx_cleanup_mkey(obj);
 	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
 obj_free:
 	kfree(obj);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 61064b7171fc..ae00f994673b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5724,6 +5724,7 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
 	mlx5_ib_cleanup_multiport_master(dev);
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+		srcu_barrier(&dev->mr_srcu);
 		cleanup_srcu_struct(&dev->mr_srcu);
 		drain_workqueue(dev->advise_mr_wq);
 		destroy_workqueue(dev->advise_mr_wq);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b0a37ca2a714..819207190343 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -602,6 +602,12 @@ struct mlx5_ib_mw {
 	int			ndescs;
 };
 
+struct mlx5_ib_devx_mr {
+	struct mlx5_core_mkey	mmkey;
+	int			ndescs;
+	struct rcu_head		rcu;
+};
+
 struct mlx5_ib_umr_context {
 	struct ib_cqe		cqe;
 	enum ib_wc_status	status;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b6f5839f129a..619d6fee96a1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -364,6 +364,7 @@ struct mlx5_core_sig_ctx {
 enum {
 	MLX5_MKEY_MR = 1,
 	MLX5_MKEY_MW,
+	MLX5_MKEY_INDIRECT_DEVX,
 };
 
 struct mlx5_core_mkey {
-- 
cgit v1.2.3


From c4f5627f7eeecde1bb6b646d8c0907b96dc2b2a6 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Wed, 2 Jan 2019 16:11:20 -0800
Subject: Bluetooth: Fix locking in bt_accept_enqueue() for BH context

With commit e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket
atomically") lock_sock[_nested]() is used to acquire the socket lock
before manipulating the socket. lock_sock[_nested]() may block, which
is problematic since bt_accept_enqueue() can be called in bottom half
context (e.g. from rfcomm_connect_ind()):

[<ffffff80080d81ec>] __might_sleep+0x4c/0x80
[<ffffff800876c7b0>] lock_sock_nested+0x24/0x58
[<ffffff8000d7c27c>] bt_accept_enqueue+0x48/0xd4 [bluetooth]
[<ffffff8000e67d8c>] rfcomm_connect_ind+0x190/0x218 [rfcomm]

Add a parameter to bt_accept_enqueue() to indicate whether the
function is called from BH context, and acquire the socket lock
with bh_lock_sock_nested() if that's the case.

Also adapt all callers of bt_accept_enqueue() to pass the new
parameter:

- l2cap_sock_new_connection_cb()
  - uses lock_sock() to lock the parent socket => process context

- rfcomm_connect_ind()
  - acquires the parent socket lock with bh_lock_sock() => BH
    context

- __sco_chan_add()
  - called from sco_chan_add(), which is called from sco_connect().
    parent is NULL, hence bt_accept_enqueue() isn't called in this
    code path and we can ignore it
  - also called from sco_conn_ready(). uses bh_lock_sock() to acquire
    the parent lock => BH context

Fixes: e16337622016 ("Bluetooth: Handle bt_accept_enqueue() socket atomically")
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Cc: stable@vger.kernel.org
---
 include/net/bluetooth/bluetooth.h |  2 +-
 net/bluetooth/af_bluetooth.c      | 16 +++++++++++++---
 net/bluetooth/l2cap_sock.c        |  2 +-
 net/bluetooth/rfcomm/sock.c       |  2 +-
 net/bluetooth/sco.c               |  2 +-
 5 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ec9d6bc65855..fabee6db0abb 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -276,7 +276,7 @@ int  bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int  bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
 int  bt_sock_wait_ready(struct sock *sk, unsigned long flags);
 
-void bt_accept_enqueue(struct sock *parent, struct sock *sk);
+void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh);
 void bt_accept_unlink(struct sock *sk);
 struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
 
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index deacc52d7ff1..8d12198eaa94 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -154,15 +154,25 @@ void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk)
 }
 EXPORT_SYMBOL(bt_sock_unlink);
 
-void bt_accept_enqueue(struct sock *parent, struct sock *sk)
+void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh)
 {
 	BT_DBG("parent %p, sk %p", parent, sk);
 
 	sock_hold(sk);
-	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+	if (bh)
+		bh_lock_sock_nested(sk);
+	else
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
 	list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q);
 	bt_sk(sk)->parent = parent;
-	release_sock(sk);
+
+	if (bh)
+		bh_unlock_sock(sk);
+	else
+		release_sock(sk);
+
 	parent->sk_ack_backlog++;
 }
 EXPORT_SYMBOL(bt_accept_enqueue);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6b35b0..a3a2cd55e23a 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1252,7 +1252,7 @@ static struct l2cap_chan *l2cap_sock_new_connection_cb(struct l2cap_chan *chan)
 
 	l2cap_sock_init(sk, parent);
 
-	bt_accept_enqueue(parent, sk);
+	bt_accept_enqueue(parent, sk, false);
 
 	release_sock(parent);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index aa0db1d1bd9b..b1f49fcc0478 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -988,7 +988,7 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
 	rfcomm_pi(sk)->channel = channel;
 
 	sk->sk_state = BT_CONFIG;
-	bt_accept_enqueue(parent, sk);
+	bt_accept_enqueue(parent, sk, true);
 
 	/* Accept connection and return socket DLC */
 	*d = rfcomm_pi(sk)->dlc;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 529b38996d8b..9a580999ca57 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -193,7 +193,7 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk,
 	conn->sk = sk;
 
 	if (parent)
-		bt_accept_enqueue(parent, sk);
+		bt_accept_enqueue(parent, sk, true);
 }
 
 static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
-- 
cgit v1.2.3


From 1278cf66cf4b1c3d30e311200b50c45457c92baa Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: nvram: Replace nvram_* function exports with static functions

Replace nvram_* functions with static functions in nvram.h. These will
become wrappers for struct nvram_ops method calls.

This patch effectively disables existing NVRAM functionality so as to
allow the rest of the series to be bisected without build failures.
That functionality is gradually re-implemented in subsequent patches.

Replace the sole validate-checksum-and-read-byte sequence with a call to
nvram_read() which will gain the same semantics in subsequent patches.

Remove unused exports.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/atari/nvram.c   | 39 +++------------------------------------
 drivers/char/nvram.c      | 27 +++++----------------------
 drivers/scsi/atari_scsi.c |  8 +++++---
 include/linux/nvram.h     | 32 +++++++++++++++++++++++++-------
 4 files changed, 38 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c
index a8c457e40b0b..1d767847ffa6 100644
--- a/arch/m68k/atari/nvram.c
+++ b/arch/m68k/atari/nvram.c
@@ -34,38 +34,17 @@
  * periodic 11 min sync from kernel/time/ntp.c vs. this driver.)
  */
 
-unsigned char __nvram_read_byte(int i)
+static unsigned char __nvram_read_byte(int i)
 {
 	return CMOS_READ(NVRAM_FIRST_BYTE + i);
 }
 
-unsigned char nvram_read_byte(int i)
-{
-	unsigned long flags;
-	unsigned char c;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	c = __nvram_read_byte(i);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return c;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
 /* This races nicely with trying to read with checksum checking */
-void __nvram_write_byte(unsigned char c, int i)
+static void __nvram_write_byte(unsigned char c, int i)
 {
 	CMOS_WRITE(c, NVRAM_FIRST_BYTE + i);
 }
 
-void nvram_write_byte(unsigned char c, int i)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	__nvram_write_byte(c, i);
-	spin_unlock_irqrestore(&rtc_lock, flags);
-}
-
 /* On Ataris, the checksum is over all bytes except the checksum bytes
  * themselves; these are at the very end.
  */
@@ -73,7 +52,7 @@ void nvram_write_byte(unsigned char c, int i)
 #define ATARI_CKS_RANGE_END	47
 #define ATARI_CKS_LOC		48
 
-int __nvram_check_checksum(void)
+static int __nvram_check_checksum(void)
 {
 	int i;
 	unsigned char sum = 0;
@@ -84,18 +63,6 @@ int __nvram_check_checksum(void)
 	       (__nvram_read_byte(ATARI_CKS_LOC + 1) == (sum & 0xff));
 }
 
-int nvram_check_checksum(void)
-{
-	unsigned long flags;
-	int rv;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	rv = __nvram_check_checksum();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return rv;
-}
-EXPORT_SYMBOL(nvram_check_checksum);
-
 static void __nvram_set_checksum(void)
 {
 	int i;
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index c660cff9faf4..c98775bfd896 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -74,13 +74,12 @@ static int nvram_open_mode;	/* special open modes */
  * periodic 11 min sync from kernel/time/ntp.c vs. this driver.)
  */
 
-unsigned char __nvram_read_byte(int i)
+static unsigned char __nvram_read_byte(int i)
 {
 	return CMOS_READ(NVRAM_FIRST_BYTE + i);
 }
-EXPORT_SYMBOL(__nvram_read_byte);
 
-unsigned char nvram_read_byte(int i)
+static unsigned char pc_nvram_read_byte(int i)
 {
 	unsigned long flags;
 	unsigned char c;
@@ -90,16 +89,14 @@ unsigned char nvram_read_byte(int i)
 	spin_unlock_irqrestore(&rtc_lock, flags);
 	return c;
 }
-EXPORT_SYMBOL(nvram_read_byte);
 
 /* This races nicely with trying to read with checksum checking (nvram_read) */
-void __nvram_write_byte(unsigned char c, int i)
+static void __nvram_write_byte(unsigned char c, int i)
 {
 	CMOS_WRITE(c, NVRAM_FIRST_BYTE + i);
 }
-EXPORT_SYMBOL(__nvram_write_byte);
 
-void nvram_write_byte(unsigned char c, int i)
+static void pc_nvram_write_byte(unsigned char c, int i)
 {
 	unsigned long flags;
 
@@ -107,14 +104,13 @@ void nvram_write_byte(unsigned char c, int i)
 	__nvram_write_byte(c, i);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 }
-EXPORT_SYMBOL(nvram_write_byte);
 
 /* On PCs, the checksum is built only over bytes 2..31 */
 #define PC_CKS_RANGE_START	2
 #define PC_CKS_RANGE_END	31
 #define PC_CKS_LOC		32
 
-int __nvram_check_checksum(void)
+static int __nvram_check_checksum(void)
 {
 	int i;
 	unsigned short sum = 0;
@@ -126,19 +122,6 @@ int __nvram_check_checksum(void)
 	    __nvram_read_byte(PC_CKS_LOC+1);
 	return (sum & 0xffff) == expect;
 }
-EXPORT_SYMBOL(__nvram_check_checksum);
-
-int nvram_check_checksum(void)
-{
-	unsigned long flags;
-	int rv;
-
-	spin_lock_irqsave(&rtc_lock, flags);
-	rv = __nvram_check_checksum();
-	spin_unlock_irqrestore(&rtc_lock, flags);
-	return rv;
-}
-EXPORT_SYMBOL(nvram_check_checksum);
 
 static void __nvram_set_checksum(void)
 {
diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c
index 78b43200c99e..e809493d0d06 100644
--- a/drivers/scsi/atari_scsi.c
+++ b/drivers/scsi/atari_scsi.c
@@ -759,13 +759,15 @@ static int __init atari_scsi_probe(struct platform_device *pdev)
 		atari_scsi_template.this_id = setup_hostid & 7;
 	} else if (IS_REACHABLE(CONFIG_NVRAM)) {
 		/* Test if a host id is set in the NVRam */
-		if (ATARIHW_PRESENT(TT_CLK) && nvram_check_checksum()) {
-			unsigned char b = nvram_read_byte(16);
+		if (ATARIHW_PRESENT(TT_CLK)) {
+			unsigned char b;
+			loff_t offset = 16;
+			ssize_t count = nvram_read(&b, 1, &offset);
 
 			/* Arbitration enabled? (for TOS)
 			 * If yes, use configured host ID
 			 */
-			if (b & 0x80)
+			if ((count == 1) && (b & 0x80))
 				atari_scsi_template.this_id = b & 7;
 		}
 	}
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index 28bfb9ab94ca..eb5b52a9a747 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -2,13 +2,31 @@
 #ifndef _LINUX_NVRAM_H
 #define _LINUX_NVRAM_H
 
+#include <linux/errno.h>
 #include <uapi/linux/nvram.h>
 
-/* __foo is foo without grabbing the rtc_lock - get it yourself */
-extern unsigned char __nvram_read_byte(int i);
-extern unsigned char nvram_read_byte(int i);
-extern void __nvram_write_byte(unsigned char c, int i);
-extern void nvram_write_byte(unsigned char c, int i);
-extern int __nvram_check_checksum(void);
-extern int nvram_check_checksum(void);
+static inline ssize_t nvram_get_size(void)
+{
+	return -ENODEV;
+}
+
+static inline unsigned char nvram_read_byte(int addr)
+{
+	return 0xFF;
+}
+
+static inline void nvram_write_byte(unsigned char val, int addr)
+{
+}
+
+static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos)
+{
+	return -ENODEV;
+}
+
+static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos)
+{
+	return -ENODEV;
+}
+
 #endif  /* _LINUX_NVRAM_H */
-- 
cgit v1.2.3


From a084dbf6592c22468eb946014b2e731fb42da7a9 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: m68k/atari: Implement arch_nvram_ops struct

By implementing an arch_nvram_ops struct, a platform can re-use the
drivers/char/nvram.c module without needing any arch-specific code
in that module. Atari does so here.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/atari/nvram.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvram.h   | 14 ++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/arch/m68k/atari/nvram.c b/arch/m68k/atari/nvram.c
index 1d767847ffa6..e75adebe6e7d 100644
--- a/arch/m68k/atari/nvram.c
+++ b/arch/m68k/atari/nvram.c
@@ -74,6 +74,55 @@ static void __nvram_set_checksum(void)
 	__nvram_write_byte(sum, ATARI_CKS_LOC + 1);
 }
 
+static ssize_t atari_nvram_read(char *buf, size_t count, loff_t *ppos)
+{
+	char *p = buf;
+	loff_t i;
+
+	spin_lock_irq(&rtc_lock);
+	if (!__nvram_check_checksum()) {
+		spin_unlock_irq(&rtc_lock);
+		return -EIO;
+	}
+	for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p)
+		*p = __nvram_read_byte(i);
+	spin_unlock_irq(&rtc_lock);
+
+	*ppos = i;
+	return p - buf;
+}
+
+static ssize_t atari_nvram_write(char *buf, size_t count, loff_t *ppos)
+{
+	char *p = buf;
+	loff_t i;
+
+	spin_lock_irq(&rtc_lock);
+	if (!__nvram_check_checksum()) {
+		spin_unlock_irq(&rtc_lock);
+		return -EIO;
+	}
+	for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p)
+		__nvram_write_byte(*p, i);
+	__nvram_set_checksum();
+	spin_unlock_irq(&rtc_lock);
+
+	*ppos = i;
+	return p - buf;
+}
+
+static ssize_t atari_nvram_get_size(void)
+{
+	return NVRAM_BYTES;
+}
+
+const struct nvram_ops arch_nvram_ops = {
+	.read           = atari_nvram_read,
+	.write          = atari_nvram_write,
+	.get_size       = atari_nvram_get_size,
+};
+EXPORT_SYMBOL(arch_nvram_ops);
+
 #ifdef CONFIG_PROC_FS
 static struct {
 	unsigned char val;
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index eb5b52a9a747..a1e01dc89759 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -5,8 +5,18 @@
 #include <linux/errno.h>
 #include <uapi/linux/nvram.h>
 
+struct nvram_ops {
+	ssize_t         (*get_size)(void);
+	ssize_t         (*read)(char *, size_t, loff_t *);
+	ssize_t         (*write)(char *, size_t, loff_t *);
+};
+
+extern const struct nvram_ops arch_nvram_ops;
+
 static inline ssize_t nvram_get_size(void)
 {
+	if (arch_nvram_ops.get_size)
+		return arch_nvram_ops.get_size();
 	return -ENODEV;
 }
 
@@ -21,11 +31,15 @@ static inline void nvram_write_byte(unsigned char val, int addr)
 
 static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos)
 {
+	if (arch_nvram_ops.read)
+		return arch_nvram_ops.read(buf, count, ppos);
 	return -ENODEV;
 }
 
 static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos)
 {
+	if (arch_nvram_ops.write)
+		return arch_nvram_ops.write(buf, count, ppos);
 	return -ENODEV;
 }
 
-- 
cgit v1.2.3


From a156c7ba669c65b55c7afcc3994e1199cc0cad47 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: powerpc: Replace nvram_* extern declarations with standard header

Remove the nvram_read_byte() and nvram_write_byte() declarations in
powerpc/include/asm/nvram.h and use the cross-platform static functions
in linux/nvram.h instead.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/nvram.h           |  6 ------
 arch/powerpc/kernel/setup_32.c             | 25 +------------------------
 drivers/char/generic_nvram.c               |  1 +
 drivers/video/fbdev/matrox/matroxfb_base.c |  2 +-
 include/linux/nvram.h                      |  3 +++
 5 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index 09a518bb7c03..56a388da9c4f 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -98,10 +98,4 @@ extern int nvram_write_os_partition(struct nvram_os_partition *part,
 				    unsigned int err_type,
 				    unsigned int error_log_cnt);
 
-/* Determine NVRAM size */
-extern ssize_t nvram_get_size(void);
-
-/* Normal access to NVRAM */
-extern unsigned char nvram_read_byte(int i);
-extern void nvram_write_byte(unsigned char c, int i);
 #endif /* _ASM_POWERPC_NVRAM_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 947f904688b0..f5107796e2d7 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -17,6 +17,7 @@
 #include <linux/console.h>
 #include <linux/memblock.h>
 #include <linux/export.h>
+#include <linux/nvram.h>
 
 #include <asm/io.h>
 #include <asm/prom.h>
@@ -149,30 +150,6 @@ __setup("l3cr=", ppc_setup_l3cr);
 
 #ifdef CONFIG_GENERIC_NVRAM
 
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
-	if (ppc_md.nvram_read_val)
-		return ppc_md.nvram_read_val(addr);
-	return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
-	if (ppc_md.nvram_write_val)
-		ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
-	if (ppc_md.nvram_size)
-		return ppc_md.nvram_size();
-	return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
 void nvram_sync(void)
 {
 	if (ppc_md.nvram_sync)
diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c
index ff5394f47587..0c22b9503e84 100644
--- a/drivers/char/generic_nvram.c
+++ b/drivers/char/generic_nvram.c
@@ -20,6 +20,7 @@
 #include <linux/fcntl.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
+#include <linux/nvram.h>
 #include <linux/pagemap.h>
 #include <linux/uaccess.h>
 #include <asm/nvram.h>
diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c
index 838869c6490c..0a4e5bad33f4 100644
--- a/drivers/video/fbdev/matrox/matroxfb_base.c
+++ b/drivers/video/fbdev/matrox/matroxfb_base.c
@@ -111,12 +111,12 @@
 #include "matroxfb_g450.h"
 #include <linux/matroxfb.h>
 #include <linux/interrupt.h>
+#include <linux/nvram.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
 #ifdef CONFIG_PPC_PMAC
 #include <asm/machdep.h>
-unsigned char nvram_read_byte(int);
 static int default_vmode = VMODE_NVRAM;
 static int default_cmode = CMODE_NVRAM;
 #endif
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index a1e01dc89759..79431dab87a1 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -15,8 +15,11 @@ extern const struct nvram_ops arch_nvram_ops;
 
 static inline ssize_t nvram_get_size(void)
 {
+#ifdef CONFIG_PPC
+#else
 	if (arch_nvram_ops.get_size)
 		return arch_nvram_ops.get_size();
+#endif
 	return -ENODEV;
 }
 
-- 
cgit v1.2.3


From d5bbb5021ce8d9ff561c7469f5b4589ccb3bc4a6 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: char/nvram: Adopt arch_nvram_ops

NVRAMs on different platforms and architectures have different attributes
and access methods. E.g. some platforms have byte-at-a-time accessor
functions while others have byte-range accessor functions. Some have
checksum functionality while others do not. By calling ops struct methods
via the common wrapper functions, the nvram module and other drivers can
make use of the available NVRAM functionality in a portable way.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/nvram.c  | 30 ++++++++++++++++++++++++------
 include/linux/nvram.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index c98775bfd896..2df391f78986 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -52,9 +52,11 @@ static DEFINE_MUTEX(nvram_mutex);
 static DEFINE_SPINLOCK(nvram_state_lock);
 static int nvram_open_cnt;	/* #times opened */
 static int nvram_open_mode;	/* special open modes */
+static ssize_t nvram_size;
 #define NVRAM_WRITE		1 /* opened for writing (exclusive) */
 #define NVRAM_EXCL		2 /* opened with O_EXCL */
 
+#ifdef CONFIG_X86
 /*
  * These functions are provided to be called internally or by other parts of
  * the kernel. It's up to the caller to ensure correct checksum before reading
@@ -145,6 +147,19 @@ void nvram_set_checksum(void)
 }
 #endif  /*  0  */
 
+static ssize_t pc_nvram_get_size(void)
+{
+	return NVRAM_BYTES;
+}
+
+const struct nvram_ops arch_nvram_ops = {
+	.read_byte      = pc_nvram_read_byte,
+	.write_byte     = pc_nvram_write_byte,
+	.get_size       = pc_nvram_get_size,
+};
+EXPORT_SYMBOL(arch_nvram_ops);
+#endif /* CONFIG_X86 */
+
 /*
  * The are the file operation function for user access to /dev/nvram
  */
@@ -152,7 +167,7 @@ void nvram_set_checksum(void)
 static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin)
 {
 	return generic_file_llseek_size(file, offset, origin, MAX_LFS_FILESIZE,
-					NVRAM_BYTES);
+					nvram_size);
 }
 
 static ssize_t nvram_misc_read(struct file *file, char __user *buf,
@@ -303,8 +318,7 @@ static int nvram_misc_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-#ifdef CONFIG_PROC_FS
-
+#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS)
 static const char * const floppy_types[] = {
 	"none", "5.25'' 360k", "5.25'' 1.2M", "3.5'' 720k", "3.5'' 1.44M",
 	"3.5'' 2.88M", "3.5'' 2.88M"
@@ -394,7 +408,7 @@ static int nvram_proc_read(struct seq_file *seq, void *offset)
 
 	return 0;
 }
-#endif /* CONFIG_PROC_FS */
+#endif /* CONFIG_X86 && CONFIG_PROC_FS */
 
 static const struct file_operations nvram_misc_fops = {
 	.owner		= THIS_MODULE,
@@ -416,13 +430,17 @@ static int __init nvram_module_init(void)
 {
 	int ret;
 
+	nvram_size = nvram_get_size();
+	if (nvram_size < 0)
+		return nvram_size;
+
 	ret = misc_register(&nvram_misc);
 	if (ret) {
 		pr_err("nvram: can't misc_register on minor=%d\n", NVRAM_MINOR);
 		return ret;
 	}
 
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS)
 	if (!proc_create_single("driver/nvram", 0, NULL, nvram_proc_read)) {
 		pr_err("nvram: can't create /proc/driver/nvram\n");
 		misc_deregister(&nvram_misc);
@@ -436,7 +454,7 @@ static int __init nvram_module_init(void)
 
 static void __exit nvram_module_exit(void)
 {
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_X86) && defined(CONFIG_PROC_FS)
 	remove_proc_entry("driver/nvram", NULL);
 #endif
 	misc_deregister(&nvram_misc);
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index 79431dab87a1..bb4ea8cc6ea6 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -5,8 +5,30 @@
 #include <linux/errno.h>
 #include <uapi/linux/nvram.h>
 
+/**
+ * struct nvram_ops - NVRAM functionality made available to drivers
+ * @read: validate checksum (if any) then load a range of bytes from NVRAM
+ * @write: store a range of bytes to NVRAM then update checksum (if any)
+ * @read_byte: load a single byte from NVRAM
+ * @write_byte: store a single byte to NVRAM
+ * @get_size: return the fixed number of bytes in the NVRAM
+ *
+ * Architectures which provide an nvram ops struct need not implement all
+ * of these methods. If the NVRAM hardware can be accessed only one byte
+ * at a time then it may be sufficient to provide .read_byte and .write_byte.
+ * If the NVRAM has a checksum (and it is to be checked) the .read and
+ * .write methods can be used to implement that efficiently.
+ *
+ * Portable drivers may use the wrapper functions defined here.
+ * The nvram_read() and nvram_write() functions call the .read and .write
+ * methods when available and fall back on the .read_byte and .write_byte
+ * methods otherwise.
+ */
+
 struct nvram_ops {
 	ssize_t         (*get_size)(void);
+	unsigned char   (*read_byte)(int);
+	void            (*write_byte)(unsigned char, int);
 	ssize_t         (*read)(char *, size_t, loff_t *);
 	ssize_t         (*write)(char *, size_t, loff_t *);
 };
@@ -25,11 +47,21 @@ static inline ssize_t nvram_get_size(void)
 
 static inline unsigned char nvram_read_byte(int addr)
 {
+#ifdef CONFIG_PPC
+#else
+	if (arch_nvram_ops.read_byte)
+		return arch_nvram_ops.read_byte(addr);
+#endif
 	return 0xFF;
 }
 
 static inline void nvram_write_byte(unsigned char val, int addr)
 {
+#ifdef CONFIG_PPC
+#else
+	if (arch_nvram_ops.write_byte)
+		arch_nvram_ops.write_byte(val, addr);
+#endif
 }
 
 static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos)
-- 
cgit v1.2.3


From 2d58636e0af724f38acad25246c1625efec36122 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: char/nvram: Allow the set_checksum and initialize ioctls to be
 omitted

The drivers/char/nvram.c module has previously supported only RTC "CMOS"
NVRAM, for which it provides appropriate checksum ioctls. Make these
ioctls optional so the module can be re-used with other kinds of NVRAM.

The ops struct methods that implement the ioctls now return error
codes so that a multi-platform kernel binary can do the right thing when
running on hardware without a suitable NVRAM.

Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/nvram.c  | 70 +++++++++++++++++++++++++++++----------------------
 include/linux/nvram.h |  2 ++
 2 files changed, 42 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 2df391f78986..f88ef41d0598 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -136,16 +136,25 @@ static void __nvram_set_checksum(void)
 	__nvram_write_byte(sum & 0xff, PC_CKS_LOC + 1);
 }
 
-#if 0
-void nvram_set_checksum(void)
+static long pc_nvram_set_checksum(void)
 {
-	unsigned long flags;
+	spin_lock_irq(&rtc_lock);
+	__nvram_set_checksum();
+	spin_unlock_irq(&rtc_lock);
+	return 0;
+}
 
-	spin_lock_irqsave(&rtc_lock, flags);
+static long pc_nvram_initialize(void)
+{
+	ssize_t i;
+
+	spin_lock_irq(&rtc_lock);
+	for (i = 0; i < NVRAM_BYTES; ++i)
+		__nvram_write_byte(0, i);
 	__nvram_set_checksum();
-	spin_unlock_irqrestore(&rtc_lock, flags);
+	spin_unlock_irq(&rtc_lock);
+	return 0;
 }
-#endif  /*  0  */
 
 static ssize_t pc_nvram_get_size(void)
 {
@@ -156,6 +165,8 @@ const struct nvram_ops arch_nvram_ops = {
 	.read_byte      = pc_nvram_read_byte,
 	.write_byte     = pc_nvram_write_byte,
 	.get_size       = pc_nvram_get_size,
+	.set_checksum   = pc_nvram_set_checksum,
+	.initialize     = pc_nvram_initialize,
 };
 EXPORT_SYMBOL(arch_nvram_ops);
 #endif /* CONFIG_X86 */
@@ -241,51 +252,50 @@ checksum_err:
 static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 			     unsigned long arg)
 {
-	int i;
+	long ret = -ENOTTY;
 
 	switch (cmd) {
-
 	case NVRAM_INIT:
 		/* initialize NVRAM contents and checksum */
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 
-		mutex_lock(&nvram_mutex);
-		spin_lock_irq(&rtc_lock);
-
-		for (i = 0; i < NVRAM_BYTES; ++i)
-			__nvram_write_byte(0, i);
-		__nvram_set_checksum();
-
-		spin_unlock_irq(&rtc_lock);
-		mutex_unlock(&nvram_mutex);
-		return 0;
-
+		if (arch_nvram_ops.initialize != NULL) {
+			mutex_lock(&nvram_mutex);
+			ret = arch_nvram_ops.initialize();
+			mutex_unlock(&nvram_mutex);
+		}
+		break;
 	case NVRAM_SETCKS:
 		/* just set checksum, contents unchanged (maybe useful after
 		 * checksum garbaged somehow...) */
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 
-		mutex_lock(&nvram_mutex);
-		spin_lock_irq(&rtc_lock);
-		__nvram_set_checksum();
-		spin_unlock_irq(&rtc_lock);
-		mutex_unlock(&nvram_mutex);
-		return 0;
-
-	default:
-		return -ENOTTY;
+		if (arch_nvram_ops.set_checksum != NULL) {
+			mutex_lock(&nvram_mutex);
+			ret = arch_nvram_ops.set_checksum();
+			mutex_unlock(&nvram_mutex);
+		}
+		break;
 	}
+	return ret;
 }
 
 static int nvram_misc_open(struct inode *inode, struct file *file)
 {
 	spin_lock(&nvram_state_lock);
 
+	/* Prevent multiple readers/writers if desired. */
 	if ((nvram_open_cnt && (file->f_flags & O_EXCL)) ||
-	    (nvram_open_mode & NVRAM_EXCL) ||
-	    ((file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE))) {
+	    (nvram_open_mode & NVRAM_EXCL)) {
+		spin_unlock(&nvram_state_lock);
+		return -EBUSY;
+	}
+
+	/* Prevent multiple writers if the set_checksum ioctl is implemented. */
+	if ((arch_nvram_ops.set_checksum != NULL) &&
+	    (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) {
 		spin_unlock(&nvram_state_lock);
 		return -EBUSY;
 	}
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index bb4ea8cc6ea6..31c763087746 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -31,6 +31,8 @@ struct nvram_ops {
 	void            (*write_byte)(unsigned char, int);
 	ssize_t         (*read)(char *, size_t, loff_t *);
 	ssize_t         (*write)(char *, size_t, loff_t *);
+	long            (*initialize)(void);
+	long            (*set_checksum)(void);
 };
 
 extern const struct nvram_ops arch_nvram_ops;
-- 
cgit v1.2.3


From 109b3a89a7c48405d61a05d7a1720581a4f1574c Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: char/nvram: Implement NVRAM read/write methods

Refactor the RTC "CMOS" NVRAM functions so that they can be used as
arch_nvram_ops methods. Checksumming logic is moved from the misc device
operations to the nvram read/write operations. This makes the misc device
implementation more generic.

This preserves the locking mechanism such that "read if checksum valid"
and "write and update checksum" remain atomic operations.

Some platforms implement byte-range read/write methods which are similar
to file_operations struct methods. Other platforms provide only
byte-at-a-time methods. The former are more efficient but may be
unavailable so fall back on the latter methods when necessary.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/nvram.c  | 120 +++++++++++++++++++++++++++++++-------------------
 include/linux/nvram.h |  32 +++++++++++++-
 2 files changed, 104 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index f88ef41d0598..adcc213c331e 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -41,6 +41,7 @@
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
@@ -161,7 +162,46 @@ static ssize_t pc_nvram_get_size(void)
 	return NVRAM_BYTES;
 }
 
+static ssize_t pc_nvram_read(char *buf, size_t count, loff_t *ppos)
+{
+	char *p = buf;
+	loff_t i;
+
+	spin_lock_irq(&rtc_lock);
+	if (!__nvram_check_checksum()) {
+		spin_unlock_irq(&rtc_lock);
+		return -EIO;
+	}
+	for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p)
+		*p = __nvram_read_byte(i);
+	spin_unlock_irq(&rtc_lock);
+
+	*ppos = i;
+	return p - buf;
+}
+
+static ssize_t pc_nvram_write(char *buf, size_t count, loff_t *ppos)
+{
+	char *p = buf;
+	loff_t i;
+
+	spin_lock_irq(&rtc_lock);
+	if (!__nvram_check_checksum()) {
+		spin_unlock_irq(&rtc_lock);
+		return -EIO;
+	}
+	for (i = *ppos; count > 0 && i < NVRAM_BYTES; --count, ++i, ++p)
+		__nvram_write_byte(*p, i);
+	__nvram_set_checksum();
+	spin_unlock_irq(&rtc_lock);
+
+	*ppos = i;
+	return p - buf;
+}
+
 const struct nvram_ops arch_nvram_ops = {
+	.read           = pc_nvram_read,
+	.write          = pc_nvram_write,
 	.read_byte      = pc_nvram_read_byte,
 	.write_byte     = pc_nvram_write_byte,
 	.get_size       = pc_nvram_get_size,
@@ -184,69 +224,57 @@ static loff_t nvram_misc_llseek(struct file *file, loff_t offset, int origin)
 static ssize_t nvram_misc_read(struct file *file, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	unsigned char contents[NVRAM_BYTES];
-	unsigned i = *ppos;
-	unsigned char *tmp;
-
-	spin_lock_irq(&rtc_lock);
+	char *tmp;
+	ssize_t ret;
 
-	if (!__nvram_check_checksum())
-		goto checksum_err;
 
-	for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp)
-		*tmp = __nvram_read_byte(i);
+	if (!access_ok(buf, count))
+		return -EFAULT;
+	if (*ppos >= nvram_size)
+		return 0;
 
-	spin_unlock_irq(&rtc_lock);
+	count = min_t(size_t, count, nvram_size - *ppos);
+	count = min_t(size_t, count, PAGE_SIZE);
 
-	if (copy_to_user(buf, contents, tmp - contents))
-		return -EFAULT;
+	tmp = kmalloc(count, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
-	*ppos = i;
+	ret = nvram_read(tmp, count, ppos);
+	if (ret <= 0)
+		goto out;
 
-	return tmp - contents;
+	if (copy_to_user(buf, tmp, ret)) {
+		*ppos -= ret;
+		ret = -EFAULT;
+	}
 
-checksum_err:
-	spin_unlock_irq(&rtc_lock);
-	return -EIO;
+out:
+	kfree(tmp);
+	return ret;
 }
 
 static ssize_t nvram_misc_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	unsigned char contents[NVRAM_BYTES];
-	unsigned i = *ppos;
-	unsigned char *tmp;
-
-	if (i >= NVRAM_BYTES)
-		return 0;	/* Past EOF */
-
-	if (count > NVRAM_BYTES - i)
-		count = NVRAM_BYTES - i;
-	if (count > NVRAM_BYTES)
-		return -EFAULT;	/* Can't happen, but prove it to gcc */
+	char *tmp;
+	ssize_t ret;
 
-	if (copy_from_user(contents, buf, count))
+	if (!access_ok(buf, count))
 		return -EFAULT;
+	if (*ppos >= nvram_size)
+		return 0;
 
-	spin_lock_irq(&rtc_lock);
-
-	if (!__nvram_check_checksum())
-		goto checksum_err;
-
-	for (tmp = contents; count--; ++i, ++tmp)
-		__nvram_write_byte(*tmp, i);
+	count = min_t(size_t, count, nvram_size - *ppos);
+	count = min_t(size_t, count, PAGE_SIZE);
 
-	__nvram_set_checksum();
-
-	spin_unlock_irq(&rtc_lock);
+	tmp = memdup_user(buf, count);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
-	*ppos = i;
-
-	return tmp - contents;
-
-checksum_err:
-	spin_unlock_irq(&rtc_lock);
-	return -EIO;
+	ret = nvram_write(tmp, count, ppos);
+	kfree(tmp);
+	return ret;
 }
 
 static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index 31c763087746..9df85703735c 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -66,18 +66,46 @@ static inline void nvram_write_byte(unsigned char val, int addr)
 #endif
 }
 
+static inline ssize_t nvram_read_bytes(char *buf, size_t count, loff_t *ppos)
+{
+	ssize_t nvram_size = nvram_get_size();
+	loff_t i;
+	char *p = buf;
+
+	if (nvram_size < 0)
+		return nvram_size;
+	for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count)
+		*p = nvram_read_byte(i);
+	*ppos = i;
+	return p - buf;
+}
+
+static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos)
+{
+	ssize_t nvram_size = nvram_get_size();
+	loff_t i;
+	char *p = buf;
+
+	if (nvram_size < 0)
+		return nvram_size;
+	for (i = *ppos; count > 0 && i < nvram_size; ++i, ++p, --count)
+		nvram_write_byte(*p, i);
+	*ppos = i;
+	return p - buf;
+}
+
 static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos)
 {
 	if (arch_nvram_ops.read)
 		return arch_nvram_ops.read(buf, count, ppos);
-	return -ENODEV;
+	return nvram_read_bytes(buf, count, ppos);
 }
 
 static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos)
 {
 	if (arch_nvram_ops.write)
 		return arch_nvram_ops.write(buf, count, ppos);
-	return -ENODEV;
+	return nvram_write_bytes(buf, count, ppos);
 }
 
 #endif  /* _LINUX_NVRAM_H */
-- 
cgit v1.2.3


From aefcb7460e0b5f35f72601b7a98eec5ca1639cf2 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: m68k/mac: Fix PRAM accessors

PMU-based m68k Macs pre-date PowerMac-style NVRAM. Use the appropriate
PMU commands. Also implement the missing XPRAM accessors for VIA-based
Macs.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/mac/misc.c     | 43 +++++++++++++++++++++++++++++++++----------
 include/uapi/linux/pmu.h |  2 ++
 2 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index af000a015f68..d016ca2e0d10 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -66,23 +66,22 @@ static unsigned char pmu_pram_read_byte(int offset)
 {
 	struct adb_request req;
 
-	if (pmu_request(&req, NULL, 3, PMU_READ_NVRAM,
-			(offset >> 8) & 0xFF, offset & 0xFF) < 0)
+	if (pmu_request(&req, NULL, 3, PMU_READ_XPRAM,
+	                offset & 0xFF, 1) < 0)
 		return 0;
-	while (!req.complete)
-		pmu_poll();
-	return req.reply[3];
+	pmu_wait_complete(&req);
+
+	return req.reply[0];
 }
 
 static void pmu_pram_write_byte(unsigned char data, int offset)
 {
 	struct adb_request req;
 
-	if (pmu_request(&req, NULL, 4, PMU_WRITE_NVRAM,
-			(offset >> 8) & 0xFF, offset & 0xFF, data) < 0)
+	if (pmu_request(&req, NULL, 4, PMU_WRITE_XPRAM,
+	                offset & 0xFF, 1, data) < 0)
 		return;
-	while (!req.complete)
-		pmu_poll();
+	pmu_wait_complete(&req);
 }
 #endif /* CONFIG_ADB_PMU */
 
@@ -151,6 +150,16 @@ static void via_rtc_send(__u8 data)
 #define RTC_REG_SECONDS_3       3
 #define RTC_REG_WRITE_PROTECT   13
 
+/*
+ * Inside Mac has no information about two-byte RTC commands but
+ * the MAME/MESS source code has the essentials.
+ */
+
+#define RTC_REG_XPRAM           14
+#define RTC_CMD_XPRAM_READ      (RTC_CMD_READ(RTC_REG_XPRAM) << 8)
+#define RTC_CMD_XPRAM_WRITE     (RTC_CMD_WRITE(RTC_REG_XPRAM) << 8)
+#define RTC_CMD_XPRAM_ARG(a)    (((a & 0xE0) << 3) | ((a & 0x1F) << 2))
+
 /*
  * Execute a VIA PRAM/RTC command. For read commands
  * data should point to a one-byte buffer for the
@@ -198,11 +207,25 @@ static void via_rtc_command(int command, __u8 *data)
 
 static unsigned char via_pram_read_byte(int offset)
 {
-	return 0;
+	unsigned char temp;
+
+	via_rtc_command(RTC_CMD_XPRAM_READ | RTC_CMD_XPRAM_ARG(offset), &temp);
+
+	return temp;
 }
 
 static void via_pram_write_byte(unsigned char data, int offset)
 {
+	unsigned char temp;
+
+	temp = 0x55;
+	via_rtc_command(RTC_CMD_WRITE(RTC_REG_WRITE_PROTECT), &temp);
+
+	temp = data;
+	via_rtc_command(RTC_CMD_XPRAM_WRITE | RTC_CMD_XPRAM_ARG(offset), &temp);
+
+	temp = 0x55 | RTC_FLG_WRITE_PROTECT;
+	via_rtc_command(RTC_CMD_WRITE(RTC_REG_WRITE_PROTECT), &temp);
 }
 
 /*
diff --git a/include/uapi/linux/pmu.h b/include/uapi/linux/pmu.h
index 97256f90e6df..f2fc1bd80017 100644
--- a/include/uapi/linux/pmu.h
+++ b/include/uapi/linux/pmu.h
@@ -19,7 +19,9 @@
 #define PMU_POWER_CTRL		0x11	/* control power of some devices */
 #define PMU_ADB_CMD		0x20	/* send ADB packet */
 #define PMU_ADB_POLL_OFF	0x21	/* disable ADB auto-poll */
+#define PMU_WRITE_XPRAM		0x32	/* write eXtended Parameter RAM */
 #define PMU_WRITE_NVRAM		0x33	/* write non-volatile RAM */
+#define PMU_READ_XPRAM		0x3a	/* read eXtended Parameter RAM */
 #define PMU_READ_NVRAM		0x3b	/* read non-volatile RAM */
 #define PMU_SET_RTC		0x30	/* set real-time clock */
 #define PMU_READ_RTC		0x38	/* read real-time clock */
-- 
cgit v1.2.3


From 95ac14b8a32817dcd1f13ae4787891484966d2d5 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: powerpc: Implement nvram ioctls

Add the powerpc-specific ioctls to the nvram module. This allows the nvram
module to replace the generic_nvram module.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/nvram.c  | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/nvram.h |  2 ++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index c9e295d73dc5..944f05fddacd 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -48,6 +48,9 @@
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 
+#ifdef CONFIG_PPC
+#include <asm/nvram.h>
+#endif
 
 static DEFINE_MUTEX(nvram_mutex);
 static DEFINE_SPINLOCK(nvram_state_lock);
@@ -283,6 +286,38 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 	long ret = -ENOTTY;
 
 	switch (cmd) {
+#ifdef CONFIG_PPC
+	case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
+		pr_warn("nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
+		/* fall through */
+	case IOC_NVRAM_GET_OFFSET:
+		ret = -EINVAL;
+#ifdef CONFIG_PPC_PMAC
+		if (machine_is(powermac)) {
+			int part, offset;
+
+			if (copy_from_user(&part, (void __user *)arg,
+					   sizeof(part)) != 0)
+				return -EFAULT;
+			if (part < pmac_nvram_OF || part > pmac_nvram_NR)
+				return -EINVAL;
+			offset = pmac_get_partition(part);
+			if (copy_to_user((void __user *)arg,
+					 &offset, sizeof(offset)) != 0)
+				return -EFAULT;
+			ret = 0;
+		}
+#endif
+		break;
+	case IOC_NVRAM_SYNC:
+		if (ppc_md.nvram_sync != NULL) {
+			mutex_lock(&nvram_mutex);
+			ppc_md.nvram_sync();
+			mutex_unlock(&nvram_mutex);
+		}
+		ret = 0;
+		break;
+#elif defined(CONFIG_X86) || defined(CONFIG_M68K)
 	case NVRAM_INIT:
 		/* initialize NVRAM contents and checksum */
 		if (!capable(CAP_SYS_ADMIN))
@@ -306,6 +341,7 @@ static long nvram_misc_ioctl(struct file *file, unsigned int cmd,
 			mutex_unlock(&nvram_mutex);
 		}
 		break;
+#endif /* CONFIG_X86 || CONFIG_M68K */
 	}
 	return ret;
 }
@@ -321,12 +357,14 @@ static int nvram_misc_open(struct inode *inode, struct file *file)
 		return -EBUSY;
 	}
 
+#if defined(CONFIG_X86) || defined(CONFIG_M68K)
 	/* Prevent multiple writers if the set_checksum ioctl is implemented. */
 	if ((arch_nvram_ops.set_checksum != NULL) &&
 	    (file->f_mode & FMODE_WRITE) && (nvram_open_mode & NVRAM_WRITE)) {
 		spin_unlock(&nvram_state_lock);
 		return -EBUSY;
 	}
+#endif
 
 	if (file->f_flags & O_EXCL)
 		nvram_open_mode |= NVRAM_EXCL;
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index 9df85703735c..9e3a957c8f1f 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -31,8 +31,10 @@ struct nvram_ops {
 	void            (*write_byte)(unsigned char, int);
 	ssize_t         (*read)(char *, size_t, loff_t *);
 	ssize_t         (*write)(char *, size_t, loff_t *);
+#if defined(CONFIG_X86) || defined(CONFIG_M68K)
 	long            (*initialize)(void);
 	long            (*set_checksum)(void);
+#endif
 };
 
 extern const struct nvram_ops arch_nvram_ops;
-- 
cgit v1.2.3


From f9c3a570f5fc584f2ca2dd222d1b8c8537fc55f6 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 15 Jan 2019 15:18:56 +1100
Subject: powerpc: Enable HAVE_ARCH_NVRAM_OPS and disable GENERIC_NVRAM

Switch PPC32 kernels from the generic_nvram module to the nvram module.

Also fix a theoretical bug where CHRP omits the chrp_nvram_init() call
when CONFIG_NVRAM_MODULE=m.

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/Kconfig                    |  6 +-----
 arch/powerpc/include/asm/nvram.h        |  3 ---
 arch/powerpc/kernel/setup_32.c          | 11 -----------
 arch/powerpc/platforms/chrp/Makefile    |  2 +-
 arch/powerpc/platforms/chrp/setup.c     |  2 +-
 arch/powerpc/platforms/powermac/setup.c |  3 +--
 drivers/char/Kconfig                    | 19 +++++++++----------
 include/linux/nvram.h                   | 20 ++++++++++++++++++++
 8 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2890d36eb531..f62e6a3f9c4e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -178,6 +178,7 @@ config PPC
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_MMAP_RND_BITS
 	select HAVE_ARCH_MMAP_RND_COMPAT_BITS	if COMPAT
+	select HAVE_ARCH_NVRAM_OPS		if PPC32
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_CBPF_JIT			if !PPC64
@@ -274,11 +275,6 @@ config SYSVIPC_COMPAT
 	depends on COMPAT && SYSVIPC
 	default y
 
-# All PPC32s use generic nvram driver through ppc_md
-config GENERIC_NVRAM
-	bool
-	default y if PPC32
-
 config SCHED_OMIT_FRAME_POINTER
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index 56a388da9c4f..629a5cdcc865 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -78,9 +78,6 @@ extern int	pmac_get_partition(int partition);
 extern u8	pmac_xpram_read(int xpaddr);
 extern void	pmac_xpram_write(int xpaddr, u8 data);
 
-/* Synchronize NVRAM */
-extern void	nvram_sync(void);
-
 /* Initialize NVRAM OS partition */
 extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
 
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index f5107796e2d7..c31082233a25 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -148,17 +148,6 @@ static int __init ppc_setup_l3cr(char *str)
 }
 __setup("l3cr=", ppc_setup_l3cr);
 
-#ifdef CONFIG_GENERIC_NVRAM
-
-void nvram_sync(void)
-{
-	if (ppc_md.nvram_sync)
-		ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
 static int __init ppc_init(void)
 {
 	/* clear the progress line */
diff --git a/arch/powerpc/platforms/chrp/Makefile b/arch/powerpc/platforms/chrp/Makefile
index 4b3bfadc70fa..dc3465cc8bc6 100644
--- a/arch/powerpc/platforms/chrp/Makefile
+++ b/arch/powerpc/platforms/chrp/Makefile
@@ -1,3 +1,3 @@
 obj-y				+= setup.o time.o pegasos_eth.o pci.o
 obj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_NVRAM)		+= nvram.o
+obj-$(CONFIG_NVRAM:m=y)		+= nvram.o
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index e66644e0fb40..e8e804289c8e 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -550,7 +550,7 @@ static void __init chrp_init_IRQ(void)
 static void __init
 chrp_init2(void)
 {
-#ifdef CONFIG_NVRAM
+#if IS_ENABLED(CONFIG_NVRAM)
 	chrp_nvram_init();
 #endif
 
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 2e8221e20ee8..b47f49cf9c4d 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -316,8 +316,7 @@ static void __init pmac_setup_arch(void)
 	find_via_pmu();
 	smu_init();
 
-#if defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) || \
-    defined(CONFIG_PPC64)
+#if IS_ENABLED(CONFIG_NVRAM) || defined(CONFIG_PPC64)
 	pmac_nvram_init();
 #endif
 #ifdef CONFIG_PPC32
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index ce9979529cf3..72866a004f07 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -244,25 +244,24 @@ source "drivers/char/hw_random/Kconfig"
 
 config NVRAM
 	tristate "/dev/nvram support"
-	depends on X86 || GENERIC_NVRAM || HAVE_ARCH_NVRAM_OPS
-	default M68K
+	depends on X86 || HAVE_ARCH_NVRAM_OPS
+	default M68K || PPC
 	---help---
 	  If you say Y here and create a character special file /dev/nvram
 	  with major number 10 and minor number 144 using mknod ("man mknod"),
-	  you get read and write access to the extra bytes of non-volatile
-	  memory in the real time clock (RTC), which is contained in every PC
-	  and most Ataris.  The actual number of bytes varies, depending on the
-	  nvram in the system, but is usually 114 (128-14 for the RTC).
-
-	  This memory is conventionally called "CMOS RAM" on PCs and "NVRAM"
-	  on Ataris. /dev/nvram may be used to view settings there, or to
-	  change them (with some utility). It could also be used to frequently
+	  you get read and write access to the non-volatile memory.
+
+	  /dev/nvram may be used to view settings in NVRAM or to change them
+	  (with some utility). It could also be used to frequently
 	  save a few bits of very important data that may not be lost over
 	  power-off and for which writing to disk is too insecure. Note
 	  however that most NVRAM space in a PC belongs to the BIOS and you
 	  should NEVER idly tamper with it. See Ralf Brown's interrupt list
 	  for a guide to the use of CMOS bytes by your BIOS.
 
+	  This memory is conventionally called "NVRAM" on PowerPC machines,
+	  "CMOS RAM" on PCs, "NVRAM" on Ataris and "PRAM" on Macintoshes.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called nvram.
 
diff --git a/include/linux/nvram.h b/include/linux/nvram.h
index 9e3a957c8f1f..d29d9c93a927 100644
--- a/include/linux/nvram.h
+++ b/include/linux/nvram.h
@@ -5,6 +5,10 @@
 #include <linux/errno.h>
 #include <uapi/linux/nvram.h>
 
+#ifdef CONFIG_PPC
+#include <asm/machdep.h>
+#endif
+
 /**
  * struct nvram_ops - NVRAM functionality made available to drivers
  * @read: validate checksum (if any) then load a range of bytes from NVRAM
@@ -42,6 +46,8 @@ extern const struct nvram_ops arch_nvram_ops;
 static inline ssize_t nvram_get_size(void)
 {
 #ifdef CONFIG_PPC
+	if (ppc_md.nvram_size)
+		return ppc_md.nvram_size();
 #else
 	if (arch_nvram_ops.get_size)
 		return arch_nvram_ops.get_size();
@@ -52,6 +58,8 @@ static inline ssize_t nvram_get_size(void)
 static inline unsigned char nvram_read_byte(int addr)
 {
 #ifdef CONFIG_PPC
+	if (ppc_md.nvram_read_val)
+		return ppc_md.nvram_read_val(addr);
 #else
 	if (arch_nvram_ops.read_byte)
 		return arch_nvram_ops.read_byte(addr);
@@ -62,6 +70,8 @@ static inline unsigned char nvram_read_byte(int addr)
 static inline void nvram_write_byte(unsigned char val, int addr)
 {
 #ifdef CONFIG_PPC
+	if (ppc_md.nvram_write_val)
+		ppc_md.nvram_write_val(addr, val);
 #else
 	if (arch_nvram_ops.write_byte)
 		arch_nvram_ops.write_byte(val, addr);
@@ -98,15 +108,25 @@ static inline ssize_t nvram_write_bytes(char *buf, size_t count, loff_t *ppos)
 
 static inline ssize_t nvram_read(char *buf, size_t count, loff_t *ppos)
 {
+#ifdef CONFIG_PPC
+	if (ppc_md.nvram_read)
+		return ppc_md.nvram_read(buf, count, ppos);
+#else
 	if (arch_nvram_ops.read)
 		return arch_nvram_ops.read(buf, count, ppos);
+#endif
 	return nvram_read_bytes(buf, count, ppos);
 }
 
 static inline ssize_t nvram_write(char *buf, size_t count, loff_t *ppos)
 {
+#ifdef CONFIG_PPC
+	if (ppc_md.nvram_write)
+		return ppc_md.nvram_write(buf, count, ppos);
+#else
 	if (arch_nvram_ops.write)
 		return arch_nvram_ops.write(buf, count, ppos);
+#endif
 	return nvram_write_bytes(buf, count, ppos);
 }
 
-- 
cgit v1.2.3


From 8092e79204e7884f4bee3584ecfe6cf4a124d129 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Thu, 20 Dec 2018 23:28:37 -0800
Subject: ihex: Share code between ihex_validate_fw() and ihex_next_binrec()

Convert both ihex_validate_fw() and ihex_next_binrec() to use a helper
function to calculate next record offest. This way we only have one
place implementing next record offset calculation logic. No functional
change intended.

Cc: Chris Healy <cphealy@gmail.com>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ihex.h | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ihex.h b/include/linux/ihex.h
index 75c194391869..9c701521176b 100644
--- a/include/linux/ihex.h
+++ b/include/linux/ihex.h
@@ -23,29 +23,34 @@ struct ihex_binrec {
 
 /* Find the next record, taking into account the 4-byte alignment */
 static inline const struct ihex_binrec *
-ihex_next_binrec(const struct ihex_binrec *rec)
+__ihex_next_binrec(const struct ihex_binrec *rec)
 {
 	int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2;
 	rec = (void *)&rec->data[next];
 
+	return rec;
+}
+
+static inline const struct ihex_binrec *
+ihex_next_binrec(const struct ihex_binrec *rec)
+{
+	rec = __ihex_next_binrec(rec);
+
 	return be16_to_cpu(rec->len) ? rec : NULL;
 }
 
 /* Check that ihex_next_binrec() won't take us off the end of the image... */
 static inline int ihex_validate_fw(const struct firmware *fw)
 {
-	const struct ihex_binrec *rec;
-	size_t ofs = 0;
+	const struct ihex_binrec *end, *rec;
 
-	while (ofs <= fw->size - sizeof(*rec)) {
-		rec = (void *)&fw->data[ofs];
+	rec = (const void *)fw->data;
+	end = (const void *)&fw->data[fw->size - sizeof(*end)];
 
+	for (; rec <= end; rec = __ihex_next_binrec(rec)) {
 		/* Zero length marks end of records */
 		if (!be16_to_cpu(rec->len))
 			return 0;
-
-		/* Point to next record... */
-		ofs += (sizeof(*rec) + be16_to_cpu(rec->len) + 3) & ~3;
 	}
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 5158c36ec9d0b3343f58987cec7ebfd866331fd0 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Thu, 20 Dec 2018 23:28:38 -0800
Subject: ihex: Check if zero-length record is at the end of the blob

When verifying the validity of IHEX file we need to make sure that
zero-length record we found is located at the end of the file. Not
doing that could result in an invalid file with a bogus zero-length in
the middle short-circuiting the check and being reported as valid.

Cc: Chris Healy <cphealy@gmail.com>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ihex.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ihex.h b/include/linux/ihex.h
index 9c701521176b..9130f307a420 100644
--- a/include/linux/ihex.h
+++ b/include/linux/ihex.h
@@ -49,7 +49,7 @@ static inline int ihex_validate_fw(const struct firmware *fw)
 
 	for (; rec <= end; rec = __ihex_next_binrec(rec)) {
 		/* Zero length marks end of records */
-		if (!be16_to_cpu(rec->len))
+		if (rec == end && !be16_to_cpu(rec->len))
 			return 0;
 	}
 	return -EINVAL;
-- 
cgit v1.2.3


From 9fb4ab4d3dd665a62da9c176a89e7c7feaf5d9e4 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Thu, 20 Dec 2018 23:28:39 -0800
Subject: ihex: Simplify next record offset calculation

Next record calucaltion can be reduced to a much more tivial ALIGN
operation as follows:

1. Splitting 5 into 2 + 3 we get

   next = ((be16_to_cpu(rec->len) + 2 + 3) & ~3) - 2            (1)

2. Using ALIGN macro we reduce (1) to:

   ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2                      (2)

3. Subsituting 'next' in original next record calucation we get:

   (void *)&rec->data[ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2]  (3)

4. Converting array index to pointer arithmetic we convert (3) into:

   (void *)rec + sizeof(*rec) +
   	 ALIGN(be16_to_cpu(rec->len) + 2, 4) - 2		(4)

5. Subsituting sizeof(*rec) with its value, 6, and substracting 2,
   in (4) we get:

   (void *)rec + ALIGN(be16_to_cpu(rec->len) + 2, 4) + 4        (5)

6. Since ALIGN(X, 4) + 4 == ALIGN(X + 4, 4), (5) can be converted to:

   (void *)rec + ALIGN(be16_to_cpu(rec->len) + 6, 4)            (6)

5. Subsituting 6 in (6) to sizeof(*rec) we get:

   (void *)rec + ALIGN(be16_to_cpu(rec->len) + sizeof(*rec), 4) (7)

Using expression (7) should make it more clear that next record is
located by adding full size of the current record (payload + auxiliary
data) aligned to 4 bytes, to the location of the current one. No
functional change intended.

Cc: Chris Healy <cphealy@gmail.com>
Cc: Kyle McMartin <kyle@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel <linux-kernel@vger.kernel.org>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ihex.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ihex.h b/include/linux/ihex.h
index 9130f307a420..98cb5ce0b0a0 100644
--- a/include/linux/ihex.h
+++ b/include/linux/ihex.h
@@ -21,14 +21,18 @@ struct ihex_binrec {
 	uint8_t data[0];
 } __attribute__((packed));
 
+static inline uint16_t ihex_binrec_size(const struct ihex_binrec *p)
+{
+	return be16_to_cpu(p->len) + sizeof(*p);
+}
+
 /* Find the next record, taking into account the 4-byte alignment */
 static inline const struct ihex_binrec *
 __ihex_next_binrec(const struct ihex_binrec *rec)
 {
-	int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2;
-	rec = (void *)&rec->data[next];
+	const void *p = rec;
 
-	return rec;
+	return p + ALIGN(ihex_binrec_size(rec), 4);
 }
 
 static inline const struct ihex_binrec *
-- 
cgit v1.2.3


From 11f1ceca7031deefc1a34236ab7b94360016b71d Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Wed, 16 Jan 2019 18:10:56 +0200
Subject: interconnect: Add generic on-chip interconnect API

This patch introduces a new API to get requirements and configure the
interconnect buses across the entire chipset to fit with the current
demand.

The API is using a consumer/provider-based model, where the providers are
the interconnect buses and the consumers could be various drivers.
The consumers request interconnect resources (path) between endpoints and
set the desired constraints on this data flow path. The providers receive
requests from consumers and aggregate these requests for all master-slave
pairs on that path. Then the providers configure each node along the path
to support a bandwidth that satisfies all bandwidth requests that cross
through that node. The topology could be complicated and multi-tiered and
is SoC specific.

Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/interconnect/interconnect.rst |  94 +++++
 drivers/Kconfig                             |   2 +
 drivers/Makefile                            |   1 +
 drivers/interconnect/Kconfig                |  10 +
 drivers/interconnect/Makefile               |   5 +
 drivers/interconnect/core.c                 | 567 ++++++++++++++++++++++++++++
 include/linux/interconnect-provider.h       | 125 ++++++
 include/linux/interconnect.h                |  52 +++
 8 files changed, 856 insertions(+)
 create mode 100644 Documentation/interconnect/interconnect.rst
 create mode 100644 drivers/interconnect/Kconfig
 create mode 100644 drivers/interconnect/Makefile
 create mode 100644 drivers/interconnect/core.c
 create mode 100644 include/linux/interconnect-provider.h
 create mode 100644 include/linux/interconnect.h

(limited to 'include')

diff --git a/Documentation/interconnect/interconnect.rst b/Documentation/interconnect/interconnect.rst
new file mode 100644
index 000000000000..b8107dcc4cd3
--- /dev/null
+++ b/Documentation/interconnect/interconnect.rst
@@ -0,0 +1,94 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+GENERIC SYSTEM INTERCONNECT SUBSYSTEM
+=====================================
+
+Introduction
+------------
+
+This framework is designed to provide a standard kernel interface to control
+the settings of the interconnects on an SoC. These settings can be throughput,
+latency and priority between multiple interconnected devices or functional
+blocks. This can be controlled dynamically in order to save power or provide
+maximum performance.
+
+The interconnect bus is hardware with configurable parameters, which can be
+set on a data path according to the requests received from various drivers.
+An example of interconnect buses are the interconnects between various
+components or functional blocks in chipsets. There can be multiple interconnects
+on an SoC that can be multi-tiered.
+
+Below is a simplified diagram of a real-world SoC interconnect bus topology.
+
+::
+
+ +----------------+    +----------------+
+ | HW Accelerator |--->|      M NoC     |<---------------+
+ +----------------+    +----------------+                |
+                         |      |                    +------------+
+  +-----+  +-------------+      V       +------+     |            |
+  | DDR |  |                +--------+  | PCIe |     |            |
+  +-----+  |                | Slaves |  +------+     |            |
+    ^ ^    |                +--------+     |         |   C NoC    |
+    | |    V                               V         |            |
+ +------------------+   +------------------------+   |            |   +-----+
+ |                  |-->|                        |-->|            |-->| CPU |
+ |                  |-->|                        |<--|            |   +-----+
+ |     Mem NoC      |   |         S NoC          |   +------------+
+ |                  |<--|                        |---------+    |
+ |                  |<--|                        |<------+ |    |   +--------+
+ +------------------+   +------------------------+       | |    +-->| Slaves |
+   ^  ^    ^    ^          ^                             | |        +--------+
+   |  |    |    |          |                             | V
+ +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
+ | CPUs |  |  | GPU |   | DSP |  | Masters |-->|       P NoC    |-->| Slaves |
+ +------+  |  +-----+   +-----+  +---------+   +----------------+   +--------+
+           |
+       +-------+
+       | Modem |
+       +-------+
+
+Terminology
+-----------
+
+Interconnect provider is the software definition of the interconnect hardware.
+The interconnect providers on the above diagram are M NoC, S NoC, C NoC, P NoC
+and Mem NoC.
+
+Interconnect node is the software definition of the interconnect hardware
+port. Each interconnect provider consists of multiple interconnect nodes,
+which are connected to other SoC components including other interconnect
+providers. The point on the diagram where the CPUs connect to the memory is
+called an interconnect node, which belongs to the Mem NoC interconnect provider.
+
+Interconnect endpoints are the first or the last element of the path. Every
+endpoint is a node, but not every node is an endpoint.
+
+Interconnect path is everything between two endpoints including all the nodes
+that have to be traversed to reach from a source to destination node. It may
+include multiple master-slave pairs across several interconnect providers.
+
+Interconnect consumers are the entities which make use of the data paths exposed
+by the providers. The consumers send requests to providers requesting various
+throughput, latency and priority. Usually the consumers are device drivers, that
+send request based on their needs. An example for a consumer is a video decoder
+that supports various formats and image sizes.
+
+Interconnect providers
+----------------------
+
+Interconnect provider is an entity that implements methods to initialize and
+configure interconnect bus hardware. The interconnect provider drivers should
+be registered with the interconnect provider core.
+
+.. kernel-doc:: include/linux/interconnect-provider.h
+
+Interconnect consumers
+----------------------
+
+Interconnect consumers are the clients which use the interconnect APIs to
+get paths between endpoints and set their bandwidth/latency/QoS requirements
+for these interconnect paths.
+
+.. kernel-doc:: include/linux/interconnect.h
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 4f9f99057ff8..45f9decb9848 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -228,4 +228,6 @@ source "drivers/siox/Kconfig"
 
 source "drivers/slimbus/Kconfig"
 
+source "drivers/interconnect/Kconfig"
+
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index e1ce029d28fd..bb15b9d0e793 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER)	+= mux/
 obj-$(CONFIG_UNISYS_VISORBUS)	+= visorbus/
 obj-$(CONFIG_SIOX)		+= siox/
 obj-$(CONFIG_GNSS)		+= gnss/
+obj-$(CONFIG_INTERCONNECT)	+= interconnect/
diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig
new file mode 100644
index 000000000000..a261c7d41deb
--- /dev/null
+++ b/drivers/interconnect/Kconfig
@@ -0,0 +1,10 @@
+menuconfig INTERCONNECT
+	tristate "On-Chip Interconnect management support"
+	help
+	  Support for management of the on-chip interconnects.
+
+	  This framework is designed to provide a generic interface for
+	  managing the interconnects in a SoC.
+
+	  If unsure, say no.
+
diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile
new file mode 100644
index 000000000000..7a01f33b5593
--- /dev/null
+++ b/drivers/interconnect/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+icc-core-objs				:= core.o
+
+obj-$(CONFIG_INTERCONNECT)		+= icc-core.o
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
new file mode 100644
index 000000000000..2b937b4f43c4
--- /dev/null
+++ b/drivers/interconnect/core.c
@@ -0,0 +1,567 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Interconnect framework core driver
+ *
+ * Copyright (c) 2017-2019, Linaro Ltd.
+ * Author: Georgi Djakov <georgi.djakov@linaro.org>
+ */
+
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/interconnect.h>
+#include <linux/interconnect-provider.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/overflow.h>
+
+static DEFINE_IDR(icc_idr);
+static LIST_HEAD(icc_providers);
+static DEFINE_MUTEX(icc_lock);
+
+/**
+ * struct icc_req - constraints that are attached to each node
+ * @req_node: entry in list of requests for the particular @node
+ * @node: the interconnect node to which this constraint applies
+ * @dev: reference to the device that sets the constraints
+ * @avg_bw: an integer describing the average bandwidth in kBps
+ * @peak_bw: an integer describing the peak bandwidth in kBps
+ */
+struct icc_req {
+	struct hlist_node req_node;
+	struct icc_node *node;
+	struct device *dev;
+	u32 avg_bw;
+	u32 peak_bw;
+};
+
+/**
+ * struct icc_path - interconnect path structure
+ * @num_nodes: number of hops (nodes)
+ * @reqs: array of the requests applicable to this path of nodes
+ */
+struct icc_path {
+	size_t num_nodes;
+	struct icc_req reqs[];
+};
+
+static struct icc_node *node_find(const int id)
+{
+	return idr_find(&icc_idr, id);
+}
+
+static struct icc_path *path_init(struct device *dev, struct icc_node *dst,
+				  ssize_t num_nodes)
+{
+	struct icc_node *node = dst;
+	struct icc_path *path;
+	int i;
+
+	path = kzalloc(struct_size(path, reqs, num_nodes), GFP_KERNEL);
+	if (!path)
+		return ERR_PTR(-ENOMEM);
+
+	path->num_nodes = num_nodes;
+
+	for (i = num_nodes - 1; i >= 0; i--) {
+		node->provider->users++;
+		hlist_add_head(&path->reqs[i].req_node, &node->req_list);
+		path->reqs[i].node = node;
+		path->reqs[i].dev = dev;
+		/* reference to previous node was saved during path traversal */
+		node = node->reverse;
+	}
+
+	return path;
+}
+
+static struct icc_path *path_find(struct device *dev, struct icc_node *src,
+				  struct icc_node *dst)
+{
+	struct icc_path *path = ERR_PTR(-EPROBE_DEFER);
+	struct icc_node *n, *node = NULL;
+	struct list_head traverse_list;
+	struct list_head edge_list;
+	struct list_head visited_list;
+	size_t i, depth = 1;
+	bool found = false;
+
+	INIT_LIST_HEAD(&traverse_list);
+	INIT_LIST_HEAD(&edge_list);
+	INIT_LIST_HEAD(&visited_list);
+
+	list_add(&src->search_list, &traverse_list);
+	src->reverse = NULL;
+
+	do {
+		list_for_each_entry_safe(node, n, &traverse_list, search_list) {
+			if (node == dst) {
+				found = true;
+				list_splice_init(&edge_list, &visited_list);
+				list_splice_init(&traverse_list, &visited_list);
+				break;
+			}
+			for (i = 0; i < node->num_links; i++) {
+				struct icc_node *tmp = node->links[i];
+
+				if (!tmp) {
+					path = ERR_PTR(-ENOENT);
+					goto out;
+				}
+
+				if (tmp->is_traversed)
+					continue;
+
+				tmp->is_traversed = true;
+				tmp->reverse = node;
+				list_add_tail(&tmp->search_list, &edge_list);
+			}
+		}
+
+		if (found)
+			break;
+
+		list_splice_init(&traverse_list, &visited_list);
+		list_splice_init(&edge_list, &traverse_list);
+
+		/* count the hops including the source */
+		depth++;
+
+	} while (!list_empty(&traverse_list));
+
+out:
+
+	/* reset the traversed state */
+	list_for_each_entry_reverse(n, &visited_list, search_list)
+		n->is_traversed = false;
+
+	if (found)
+		path = path_init(dev, dst, depth);
+
+	return path;
+}
+
+/*
+ * We want the path to honor all bandwidth requests, so the average and peak
+ * bandwidth requirements from each consumer are aggregated at each node.
+ * The aggregation is platform specific, so each platform can customize it by
+ * implementing its own aggregate() function.
+ */
+
+static int aggregate_requests(struct icc_node *node)
+{
+	struct icc_provider *p = node->provider;
+	struct icc_req *r;
+
+	node->avg_bw = 0;
+	node->peak_bw = 0;
+
+	hlist_for_each_entry(r, &node->req_list, req_node)
+		p->aggregate(node, r->avg_bw, r->peak_bw,
+			     &node->avg_bw, &node->peak_bw);
+
+	return 0;
+}
+
+static int apply_constraints(struct icc_path *path)
+{
+	struct icc_node *next, *prev = NULL;
+	int ret = -EINVAL;
+	int i;
+
+	for (i = 0; i < path->num_nodes; i++) {
+		next = path->reqs[i].node;
+
+		/*
+		 * Both endpoints should be valid master-slave pairs of the
+		 * same interconnect provider that will be configured.
+		 */
+		if (!prev || next->provider != prev->provider) {
+			prev = next;
+			continue;
+		}
+
+		/* set the constraints */
+		ret = next->provider->set(prev, next);
+		if (ret)
+			goto out;
+
+		prev = next;
+	}
+out:
+	return ret;
+}
+
+/**
+ * icc_set_bw() - set bandwidth constraints on an interconnect path
+ * @path: reference to the path returned by icc_get()
+ * @avg_bw: average bandwidth in kilobytes per second
+ * @peak_bw: peak bandwidth in kilobytes per second
+ *
+ * This function is used by an interconnect consumer to express its own needs
+ * in terms of bandwidth for a previously requested path between two endpoints.
+ * The requests are aggregated and each node is updated accordingly. The entire
+ * path is locked by a mutex to ensure that the set() is completed.
+ * The @path can be NULL when the "interconnects" DT properties is missing,
+ * which will mean that no constraints will be set.
+ *
+ * Returns 0 on success, or an appropriate error code otherwise.
+ */
+int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
+{
+	struct icc_node *node;
+	size_t i;
+	int ret;
+
+	if (!path)
+		return 0;
+
+	mutex_lock(&icc_lock);
+
+	for (i = 0; i < path->num_nodes; i++) {
+		node = path->reqs[i].node;
+
+		/* update the consumer request for this path */
+		path->reqs[i].avg_bw = avg_bw;
+		path->reqs[i].peak_bw = peak_bw;
+
+		/* aggregate requests for this node */
+		aggregate_requests(node);
+	}
+
+	ret = apply_constraints(path);
+	if (ret)
+		pr_debug("interconnect: error applying constraints (%d)\n",
+			 ret);
+
+	mutex_unlock(&icc_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(icc_set_bw);
+
+/**
+ * icc_get() - return a handle for path between two endpoints
+ * @dev: the device requesting the path
+ * @src_id: source device port id
+ * @dst_id: destination device port id
+ *
+ * This function will search for a path between two endpoints and return an
+ * icc_path handle on success. Use icc_put() to release
+ * constraints when they are not needed anymore.
+ * If the interconnect API is disabled, NULL is returned and the consumer
+ * drivers will still build. Drivers are free to handle this specifically,
+ * but they don't have to.
+ *
+ * Return: icc_path pointer on success, ERR_PTR() on error or NULL if the
+ * interconnect API is disabled.
+ */
+struct icc_path *icc_get(struct device *dev, const int src_id, const int dst_id)
+{
+	struct icc_node *src, *dst;
+	struct icc_path *path = ERR_PTR(-EPROBE_DEFER);
+
+	mutex_lock(&icc_lock);
+
+	src = node_find(src_id);
+	if (!src)
+		goto out;
+
+	dst = node_find(dst_id);
+	if (!dst)
+		goto out;
+
+	path = path_find(dev, src, dst);
+	if (IS_ERR(path))
+		dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path));
+
+out:
+	mutex_unlock(&icc_lock);
+	return path;
+}
+EXPORT_SYMBOL_GPL(icc_get);
+
+/**
+ * icc_put() - release the reference to the icc_path
+ * @path: interconnect path
+ *
+ * Use this function to release the constraints on a path when the path is
+ * no longer needed. The constraints will be re-aggregated.
+ */
+void icc_put(struct icc_path *path)
+{
+	struct icc_node *node;
+	size_t i;
+	int ret;
+
+	if (!path || WARN_ON(IS_ERR(path)))
+		return;
+
+	ret = icc_set_bw(path, 0, 0);
+	if (ret)
+		pr_err("%s: error (%d)\n", __func__, ret);
+
+	mutex_lock(&icc_lock);
+	for (i = 0; i < path->num_nodes; i++) {
+		node = path->reqs[i].node;
+		hlist_del(&path->reqs[i].req_node);
+		if (!WARN_ON(!node->provider->users))
+			node->provider->users--;
+	}
+	mutex_unlock(&icc_lock);
+
+	kfree(path);
+}
+EXPORT_SYMBOL_GPL(icc_put);
+
+static struct icc_node *icc_node_create_nolock(int id)
+{
+	struct icc_node *node;
+
+	/* check if node already exists */
+	node = node_find(id);
+	if (node)
+		return node;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	id = idr_alloc(&icc_idr, node, id, id + 1, GFP_KERNEL);
+	if (id < 0) {
+		WARN(1, "%s: couldn't get idr\n", __func__);
+		kfree(node);
+		return ERR_PTR(id);
+	}
+
+	node->id = id;
+
+	return node;
+}
+
+/**
+ * icc_node_create() - create a node
+ * @id: node id
+ *
+ * Return: icc_node pointer on success, or ERR_PTR() on error
+ */
+struct icc_node *icc_node_create(int id)
+{
+	struct icc_node *node;
+
+	mutex_lock(&icc_lock);
+
+	node = icc_node_create_nolock(id);
+
+	mutex_unlock(&icc_lock);
+
+	return node;
+}
+EXPORT_SYMBOL_GPL(icc_node_create);
+
+/**
+ * icc_node_destroy() - destroy a node
+ * @id: node id
+ */
+void icc_node_destroy(int id)
+{
+	struct icc_node *node;
+
+	mutex_lock(&icc_lock);
+
+	node = node_find(id);
+	if (node) {
+		idr_remove(&icc_idr, node->id);
+		WARN_ON(!hlist_empty(&node->req_list));
+	}
+
+	mutex_unlock(&icc_lock);
+
+	kfree(node);
+}
+EXPORT_SYMBOL_GPL(icc_node_destroy);
+
+/**
+ * icc_link_create() - create a link between two nodes
+ * @node: source node id
+ * @dst_id: destination node id
+ *
+ * Create a link between two nodes. The nodes might belong to different
+ * interconnect providers and the @dst_id node might not exist (if the
+ * provider driver has not probed yet). So just create the @dst_id node
+ * and when the actual provider driver is probed, the rest of the node
+ * data is filled.
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int icc_link_create(struct icc_node *node, const int dst_id)
+{
+	struct icc_node *dst;
+	struct icc_node **new;
+	int ret = 0;
+
+	if (!node->provider)
+		return -EINVAL;
+
+	mutex_lock(&icc_lock);
+
+	dst = node_find(dst_id);
+	if (!dst) {
+		dst = icc_node_create_nolock(dst_id);
+
+		if (IS_ERR(dst)) {
+			ret = PTR_ERR(dst);
+			goto out;
+		}
+	}
+
+	new = krealloc(node->links,
+		       (node->num_links + 1) * sizeof(*node->links),
+		       GFP_KERNEL);
+	if (!new) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	node->links = new;
+	node->links[node->num_links++] = dst;
+
+out:
+	mutex_unlock(&icc_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(icc_link_create);
+
+/**
+ * icc_link_destroy() - destroy a link between two nodes
+ * @src: pointer to source node
+ * @dst: pointer to destination node
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int icc_link_destroy(struct icc_node *src, struct icc_node *dst)
+{
+	struct icc_node **new;
+	size_t slot;
+	int ret = 0;
+
+	if (IS_ERR_OR_NULL(src))
+		return -EINVAL;
+
+	if (IS_ERR_OR_NULL(dst))
+		return -EINVAL;
+
+	mutex_lock(&icc_lock);
+
+	for (slot = 0; slot < src->num_links; slot++)
+		if (src->links[slot] == dst)
+			break;
+
+	if (WARN_ON(slot == src->num_links)) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	src->links[slot] = src->links[--src->num_links];
+
+	new = krealloc(src->links, src->num_links * sizeof(*src->links),
+		       GFP_KERNEL);
+	if (new)
+		src->links = new;
+
+out:
+	mutex_unlock(&icc_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(icc_link_destroy);
+
+/**
+ * icc_node_add() - add interconnect node to interconnect provider
+ * @node: pointer to the interconnect node
+ * @provider: pointer to the interconnect provider
+ */
+void icc_node_add(struct icc_node *node, struct icc_provider *provider)
+{
+	mutex_lock(&icc_lock);
+
+	node->provider = provider;
+	list_add_tail(&node->node_list, &provider->nodes);
+
+	mutex_unlock(&icc_lock);
+}
+EXPORT_SYMBOL_GPL(icc_node_add);
+
+/**
+ * icc_node_del() - delete interconnect node from interconnect provider
+ * @node: pointer to the interconnect node
+ */
+void icc_node_del(struct icc_node *node)
+{
+	mutex_lock(&icc_lock);
+
+	list_del(&node->node_list);
+
+	mutex_unlock(&icc_lock);
+}
+EXPORT_SYMBOL_GPL(icc_node_del);
+
+/**
+ * icc_provider_add() - add a new interconnect provider
+ * @provider: the interconnect provider that will be added into topology
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int icc_provider_add(struct icc_provider *provider)
+{
+	if (WARN_ON(!provider->set))
+		return -EINVAL;
+
+	mutex_lock(&icc_lock);
+
+	INIT_LIST_HEAD(&provider->nodes);
+	list_add_tail(&provider->provider_list, &icc_providers);
+
+	mutex_unlock(&icc_lock);
+
+	dev_dbg(provider->dev, "interconnect provider added to topology\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(icc_provider_add);
+
+/**
+ * icc_provider_del() - delete previously added interconnect provider
+ * @provider: the interconnect provider that will be removed from topology
+ *
+ * Return: 0 on success, or an error code otherwise
+ */
+int icc_provider_del(struct icc_provider *provider)
+{
+	mutex_lock(&icc_lock);
+	if (provider->users) {
+		pr_warn("interconnect provider still has %d users\n",
+			provider->users);
+		mutex_unlock(&icc_lock);
+		return -EBUSY;
+	}
+
+	if (!list_empty(&provider->nodes)) {
+		pr_warn("interconnect provider still has nodes\n");
+		mutex_unlock(&icc_lock);
+		return -EBUSY;
+	}
+
+	list_del(&provider->provider_list);
+	mutex_unlock(&icc_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(icc_provider_del);
+
+MODULE_AUTHOR("Georgi Djakov <georgi.djakov@linaro.org>");
+MODULE_DESCRIPTION("Interconnect Driver Core");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
new file mode 100644
index 000000000000..78208a754181
--- /dev/null
+++ b/include/linux/interconnect-provider.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, Linaro Ltd.
+ * Author: Georgi Djakov <georgi.djakov@linaro.org>
+ */
+
+#ifndef __LINUX_INTERCONNECT_PROVIDER_H
+#define __LINUX_INTERCONNECT_PROVIDER_H
+
+#include <linux/interconnect.h>
+
+#define icc_units_to_bps(bw)  ((bw) * 1000ULL)
+
+struct icc_node;
+
+/**
+ * struct icc_provider - interconnect provider (controller) entity that might
+ * provide multiple interconnect controls
+ *
+ * @provider_list: list of the registered interconnect providers
+ * @nodes: internal list of the interconnect provider nodes
+ * @set: pointer to device specific set operation function
+ * @aggregate: pointer to device specific aggregate operation function
+ * @dev: the device this interconnect provider belongs to
+ * @users: count of active users
+ * @data: pointer to private data
+ */
+struct icc_provider {
+	struct list_head	provider_list;
+	struct list_head	nodes;
+	int (*set)(struct icc_node *src, struct icc_node *dst);
+	int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw,
+			 u32 *agg_avg, u32 *agg_peak);
+	struct device		*dev;
+	int			users;
+	void			*data;
+};
+
+/**
+ * struct icc_node - entity that is part of the interconnect topology
+ *
+ * @id: platform specific node id
+ * @name: node name used in debugfs
+ * @links: a list of targets pointing to where we can go next when traversing
+ * @num_links: number of links to other interconnect nodes
+ * @provider: points to the interconnect provider of this node
+ * @node_list: the list entry in the parent provider's "nodes" list
+ * @search_list: list used when walking the nodes graph
+ * @reverse: pointer to previous node when walking the nodes graph
+ * @is_traversed: flag that is used when walking the nodes graph
+ * @req_list: a list of QoS constraint requests associated with this node
+ * @avg_bw: aggregated value of average bandwidth requests from all consumers
+ * @peak_bw: aggregated value of peak bandwidth requests from all consumers
+ * @data: pointer to private data
+ */
+struct icc_node {
+	int			id;
+	const char              *name;
+	struct icc_node		**links;
+	size_t			num_links;
+
+	struct icc_provider	*provider;
+	struct list_head	node_list;
+	struct list_head	search_list;
+	struct icc_node		*reverse;
+	u8			is_traversed:1;
+	struct hlist_head	req_list;
+	u32			avg_bw;
+	u32			peak_bw;
+	void			*data;
+};
+
+#if IS_ENABLED(CONFIG_INTERCONNECT)
+
+struct icc_node *icc_node_create(int id);
+void icc_node_destroy(int id);
+int icc_link_create(struct icc_node *node, const int dst_id);
+int icc_link_destroy(struct icc_node *src, struct icc_node *dst);
+void icc_node_add(struct icc_node *node, struct icc_provider *provider);
+void icc_node_del(struct icc_node *node);
+int icc_provider_add(struct icc_provider *provider);
+int icc_provider_del(struct icc_provider *provider);
+
+#else
+
+static inline struct icc_node *icc_node_create(int id)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+void icc_node_destroy(int id)
+{
+}
+
+static inline int icc_link_create(struct icc_node *node, const int dst_id)
+{
+	return -ENOTSUPP;
+}
+
+int icc_link_destroy(struct icc_node *src, struct icc_node *dst)
+{
+	return -ENOTSUPP;
+}
+
+void icc_node_add(struct icc_node *node, struct icc_provider *provider)
+{
+}
+
+void icc_node_del(struct icc_node *node)
+{
+}
+
+static inline int icc_provider_add(struct icc_provider *provider)
+{
+	return -ENOTSUPP;
+}
+
+static inline int icc_provider_del(struct icc_provider *provider)
+{
+	return -ENOTSUPP;
+}
+
+#endif /* CONFIG_INTERCONNECT */
+
+#endif /* __LINUX_INTERCONNECT_PROVIDER_H */
diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h
new file mode 100644
index 000000000000..c331afb3a2c8
--- /dev/null
+++ b/include/linux/interconnect.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018-2019, Linaro Ltd.
+ * Author: Georgi Djakov <georgi.djakov@linaro.org>
+ */
+
+#ifndef __LINUX_INTERCONNECT_H
+#define __LINUX_INTERCONNECT_H
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+/* macros for converting to icc units */
+#define Bps_to_icc(x)	((x) / 1000)
+#define kBps_to_icc(x)	(x)
+#define MBps_to_icc(x)	((x) * 1000)
+#define GBps_to_icc(x)	((x) * 1000 * 1000)
+#define bps_to_icc(x)	(1)
+#define kbps_to_icc(x)	((x) / 8 + ((x) % 8 ? 1 : 0))
+#define Mbps_to_icc(x)	((x) * 1000 / 8)
+#define Gbps_to_icc(x)	((x) * 1000 * 1000 / 8)
+
+struct icc_path;
+struct device;
+
+#if IS_ENABLED(CONFIG_INTERCONNECT)
+
+struct icc_path *icc_get(struct device *dev, const int src_id,
+			 const int dst_id);
+void icc_put(struct icc_path *path);
+int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw);
+
+#else
+
+static inline struct icc_path *icc_get(struct device *dev, const int src_id,
+				       const int dst_id)
+{
+	return NULL;
+}
+
+static inline void icc_put(struct icc_path *path)
+{
+}
+
+static inline int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
+{
+	return 0;
+}
+
+#endif /* CONFIG_INTERCONNECT */
+
+#endif /* __LINUX_INTERCONNECT_H */
-- 
cgit v1.2.3


From 87e3031b6fbd83ea83adf1bf9602bcce313ee787 Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Wed, 16 Jan 2019 18:10:58 +0200
Subject: interconnect: Allow endpoints translation via DT

Currently we support only platform data for specifying the interconnect
endpoints. As now the endpoints are hard-coded into the consumer driver
this may lead to complications when a single driver is used by multiple
SoCs, which may have different interconnect topology.
To avoid cluttering the consumer drivers, introduce a translation function
to help us get the board specific interconnect data from device-tree.

Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/interconnect/core.c           | 149 ++++++++++++++++++++++++++++++++++
 include/linux/interconnect-provider.h |  17 ++++
 include/linux/interconnect.h          |   7 ++
 3 files changed, 173 insertions(+)

(limited to 'include')

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 2b937b4f43c4..a8c2bd35197f 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 #include <linux/overflow.h>
 
 static DEFINE_IDR(icc_idr);
@@ -194,6 +195,152 @@ out:
 	return ret;
 }
 
+/* of_icc_xlate_onecell() - Translate function using a single index.
+ * @spec: OF phandle args to map into an interconnect node.
+ * @data: private data (pointer to struct icc_onecell_data)
+ *
+ * This is a generic translate function that can be used to model simple
+ * interconnect providers that have one device tree node and provide
+ * multiple interconnect nodes. A single cell is used as an index into
+ * an array of icc nodes specified in the icc_onecell_data struct when
+ * registering the provider.
+ */
+struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec,
+				      void *data)
+{
+	struct icc_onecell_data *icc_data = data;
+	unsigned int idx = spec->args[0];
+
+	if (idx >= icc_data->num_nodes) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return icc_data->nodes[idx];
+}
+EXPORT_SYMBOL_GPL(of_icc_xlate_onecell);
+
+/**
+ * of_icc_get_from_provider() - Look-up interconnect node
+ * @spec: OF phandle args to use for look-up
+ *
+ * Looks for interconnect provider under the node specified by @spec and if
+ * found, uses xlate function of the provider to map phandle args to node.
+ *
+ * Returns a valid pointer to struct icc_node on success or ERR_PTR()
+ * on failure.
+ */
+static struct icc_node *of_icc_get_from_provider(struct of_phandle_args *spec)
+{
+	struct icc_node *node = ERR_PTR(-EPROBE_DEFER);
+	struct icc_provider *provider;
+
+	if (!spec || spec->args_count != 1)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&icc_lock);
+	list_for_each_entry(provider, &icc_providers, provider_list) {
+		if (provider->dev->of_node == spec->np)
+			node = provider->xlate(spec, provider->data);
+		if (!IS_ERR(node))
+			break;
+	}
+	mutex_unlock(&icc_lock);
+
+	return node;
+}
+
+/**
+ * of_icc_get() - get a path handle from a DT node based on name
+ * @dev: device pointer for the consumer device
+ * @name: interconnect path name
+ *
+ * This function will search for a path between two endpoints and return an
+ * icc_path handle on success. Use icc_put() to release constraints when they
+ * are not needed anymore.
+ * If the interconnect API is disabled, NULL is returned and the consumer
+ * drivers will still build. Drivers are free to handle this specifically,
+ * but they don't have to.
+ *
+ * Return: icc_path pointer on success or ERR_PTR() on error. NULL is returned
+ * when the API is disabled or the "interconnects" DT property is missing.
+ */
+struct icc_path *of_icc_get(struct device *dev, const char *name)
+{
+	struct icc_path *path = ERR_PTR(-EPROBE_DEFER);
+	struct icc_node *src_node, *dst_node;
+	struct device_node *np = NULL;
+	struct of_phandle_args src_args, dst_args;
+	int idx = 0;
+	int ret;
+
+	if (!dev || !dev->of_node)
+		return ERR_PTR(-ENODEV);
+
+	np = dev->of_node;
+
+	/*
+	 * When the consumer DT node do not have "interconnects" property
+	 * return a NULL path to skip setting constraints.
+	 */
+	if (!of_find_property(np, "interconnects", NULL))
+		return NULL;
+
+	/*
+	 * We use a combination of phandle and specifier for endpoint. For now
+	 * lets support only global ids and extend this in the future if needed
+	 * without breaking DT compatibility.
+	 */
+	if (name) {
+		idx = of_property_match_string(np, "interconnect-names", name);
+		if (idx < 0)
+			return ERR_PTR(idx);
+	}
+
+	ret = of_parse_phandle_with_args(np, "interconnects",
+					 "#interconnect-cells", idx * 2,
+					 &src_args);
+	if (ret)
+		return ERR_PTR(ret);
+
+	of_node_put(src_args.np);
+
+	ret = of_parse_phandle_with_args(np, "interconnects",
+					 "#interconnect-cells", idx * 2 + 1,
+					 &dst_args);
+	if (ret)
+		return ERR_PTR(ret);
+
+	of_node_put(dst_args.np);
+
+	src_node = of_icc_get_from_provider(&src_args);
+
+	if (IS_ERR(src_node)) {
+		if (PTR_ERR(src_node) != -EPROBE_DEFER)
+			dev_err(dev, "error finding src node: %ld\n",
+				PTR_ERR(src_node));
+		return ERR_CAST(src_node);
+	}
+
+	dst_node = of_icc_get_from_provider(&dst_args);
+
+	if (IS_ERR(dst_node)) {
+		if (PTR_ERR(dst_node) != -EPROBE_DEFER)
+			dev_err(dev, "error finding dst node: %ld\n",
+				PTR_ERR(dst_node));
+		return ERR_CAST(dst_node);
+	}
+
+	mutex_lock(&icc_lock);
+	path = path_find(dev, src_node, dst_node);
+	if (IS_ERR(path))
+		dev_err(dev, "%s: invalid path=%ld\n", __func__, PTR_ERR(path));
+	mutex_unlock(&icc_lock);
+
+	return path;
+}
+EXPORT_SYMBOL_GPL(of_icc_get);
+
 /**
  * icc_set_bw() - set bandwidth constraints on an interconnect path
  * @path: reference to the path returned by icc_get()
@@ -519,6 +666,8 @@ int icc_provider_add(struct icc_provider *provider)
 {
 	if (WARN_ON(!provider->set))
 		return -EINVAL;
+	if (WARN_ON(!provider->xlate))
+		return -EINVAL;
 
 	mutex_lock(&icc_lock);
 
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index 78208a754181..63caccadc2db 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -12,6 +12,21 @@
 #define icc_units_to_bps(bw)  ((bw) * 1000ULL)
 
 struct icc_node;
+struct of_phandle_args;
+
+/**
+ * struct icc_onecell_data - driver data for onecell interconnect providers
+ *
+ * @num_nodes: number of nodes in this device
+ * @nodes: array of pointers to the nodes in this device
+ */
+struct icc_onecell_data {
+	unsigned int num_nodes;
+	struct icc_node *nodes[];
+};
+
+struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec,
+				      void *data);
 
 /**
  * struct icc_provider - interconnect provider (controller) entity that might
@@ -21,6 +36,7 @@ struct icc_node;
  * @nodes: internal list of the interconnect provider nodes
  * @set: pointer to device specific set operation function
  * @aggregate: pointer to device specific aggregate operation function
+ * @xlate: provider-specific callback for mapping nodes from phandle arguments
  * @dev: the device this interconnect provider belongs to
  * @users: count of active users
  * @data: pointer to private data
@@ -31,6 +47,7 @@ struct icc_provider {
 	int (*set)(struct icc_node *src, struct icc_node *dst);
 	int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw,
 			 u32 *agg_avg, u32 *agg_peak);
+	struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data);
 	struct device		*dev;
 	int			users;
 	void			*data;
diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h
index c331afb3a2c8..dc25864755ba 100644
--- a/include/linux/interconnect.h
+++ b/include/linux/interconnect.h
@@ -27,6 +27,7 @@ struct device;
 
 struct icc_path *icc_get(struct device *dev, const int src_id,
 			 const int dst_id);
+struct icc_path *of_icc_get(struct device *dev, const char *name);
 void icc_put(struct icc_path *path);
 int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw);
 
@@ -38,6 +39,12 @@ static inline struct icc_path *icc_get(struct device *dev, const int src_id,
 	return NULL;
 }
 
+static inline struct icc_path *of_icc_get(struct device *dev,
+					  const char *name)
+{
+	return NULL;
+}
+
 static inline void icc_put(struct icc_path *path)
 {
 }
-- 
cgit v1.2.3


From b5d2f741077abc71205e60b5bbd7dfa07b9d6953 Mon Sep 17 00:00:00 2001
From: David Dai <daidavid1@codeaurora.org>
Date: Wed, 16 Jan 2019 18:11:00 +0200
Subject: interconnect: qcom: Add sdm845 interconnect provider driver

Introduce Qualcomm SDM845 specific provider driver using the
interconnect framework.

Signed-off-by: David Dai <daidavid1@codeaurora.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/interconnect/qcom,sdm845.txt          |  24 +
 drivers/interconnect/Kconfig                       |   5 +
 drivers/interconnect/Makefile                      |   1 +
 drivers/interconnect/qcom/Kconfig                  |  13 +
 drivers/interconnect/qcom/Makefile                 |   5 +
 drivers/interconnect/qcom/sdm845.c                 | 838 +++++++++++++++++++++
 include/dt-bindings/interconnect/qcom,sdm845.h     | 143 ++++
 7 files changed, 1029 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt
 create mode 100644 drivers/interconnect/qcom/Kconfig
 create mode 100644 drivers/interconnect/qcom/Makefile
 create mode 100644 drivers/interconnect/qcom/sdm845.c
 create mode 100644 include/dt-bindings/interconnect/qcom,sdm845.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt
new file mode 100644
index 000000000000..5c4f1d911630
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,sdm845.txt
@@ -0,0 +1,24 @@
+Qualcomm SDM845 Network-On-Chip interconnect driver binding
+-----------------------------------------------------------
+
+SDM845 interconnect providers support system bandwidth requirements through
+RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is
+able to communicate with the BCM through the Resource State Coordinator (RSC)
+associated with each execution environment. Provider nodes must reside within
+an RPMh device node pertaining to their RSC and each provider maps to a single
+RPMh resource.
+
+Required properties :
+- compatible : shall contain only one of the following:
+			"qcom,sdm845-rsc-hlos"
+- #interconnect-cells : should contain 1
+
+Examples:
+
+apps_rsc: rsc {
+	rsc_hlos: interconnect {
+		compatible = "qcom,sdm845-rsc-hlos";
+		#interconnect-cells = <1>;
+	};
+};
+
diff --git a/drivers/interconnect/Kconfig b/drivers/interconnect/Kconfig
index a261c7d41deb..07a8276fa35a 100644
--- a/drivers/interconnect/Kconfig
+++ b/drivers/interconnect/Kconfig
@@ -8,3 +8,8 @@ menuconfig INTERCONNECT
 
 	  If unsure, say no.
 
+if INTERCONNECT
+
+source "drivers/interconnect/qcom/Kconfig"
+
+endif
diff --git a/drivers/interconnect/Makefile b/drivers/interconnect/Makefile
index 7a01f33b5593..28f2ab0824d5 100644
--- a/drivers/interconnect/Makefile
+++ b/drivers/interconnect/Makefile
@@ -3,3 +3,4 @@
 icc-core-objs				:= core.o
 
 obj-$(CONFIG_INTERCONNECT)		+= icc-core.o
+obj-$(CONFIG_INTERCONNECT_QCOM)		+= qcom/
diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
new file mode 100644
index 000000000000..290d330abe5a
--- /dev/null
+++ b/drivers/interconnect/qcom/Kconfig
@@ -0,0 +1,13 @@
+config INTERCONNECT_QCOM
+	bool "Qualcomm Network-on-Chip interconnect drivers"
+	depends on ARCH_QCOM
+	help
+	  Support for Qualcomm's Network-on-Chip interconnect hardware.
+
+config INTERCONNECT_QCOM_SDM845
+	tristate "Qualcomm SDM845 interconnect driver"
+	depends on INTERCONNECT_QCOM
+	depends on (QCOM_RPMH && QCOM_COMMAND_DB && OF) || COMPILE_TEST
+	help
+	  This is a driver for the Qualcomm Network-on-Chip on sdm845-based
+	  platforms.
diff --git a/drivers/interconnect/qcom/Makefile b/drivers/interconnect/qcom/Makefile
new file mode 100644
index 000000000000..1c1cea690f92
--- /dev/null
+++ b/drivers/interconnect/qcom/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+qnoc-sdm845-objs			:= sdm845.o
+
+obj-$(CONFIG_INTERCONNECT_QCOM_SDM845) += qnoc-sdm845.o
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
new file mode 100644
index 000000000000..4915b78da673
--- /dev/null
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -0,0 +1,838 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ *
+ */
+
+#include <asm/div64.h>
+#include <dt-bindings/interconnect/qcom,sdm845.h>
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/sort.h>
+
+#include <soc/qcom/cmd-db.h>
+#include <soc/qcom/rpmh.h>
+#include <soc/qcom/tcs.h>
+
+#define BCM_TCS_CMD_COMMIT_SHFT		30
+#define BCM_TCS_CMD_COMMIT_MASK		0x40000000
+#define BCM_TCS_CMD_VALID_SHFT		29
+#define BCM_TCS_CMD_VALID_MASK		0x20000000
+#define BCM_TCS_CMD_VOTE_X_SHFT		14
+#define BCM_TCS_CMD_VOTE_MASK		0x3fff
+#define BCM_TCS_CMD_VOTE_Y_SHFT		0
+#define BCM_TCS_CMD_VOTE_Y_MASK		0xfffc000
+
+#define BCM_TCS_CMD(commit, valid, vote_x, vote_y)		\
+	(((commit) << BCM_TCS_CMD_COMMIT_SHFT) |		\
+	((valid) << BCM_TCS_CMD_VALID_SHFT) |			\
+	((cpu_to_le32(vote_x) &					\
+	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_X_SHFT) |	\
+	((cpu_to_le32(vote_y) &					\
+	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_Y_SHFT))
+
+#define to_qcom_provider(_provider) \
+	container_of(_provider, struct qcom_icc_provider, provider)
+
+struct qcom_icc_provider {
+	struct icc_provider provider;
+	struct device *dev;
+	struct qcom_icc_bcm **bcms;
+	size_t num_bcms;
+};
+
+/**
+ * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM)
+ * @unit: divisor used to convert bytes/sec bw value to an RPMh msg
+ * @width: multiplier used to convert bytes/sec bw value to an RPMh msg
+ * @vcd: virtual clock domain that this bcm belongs to
+ * @reserved: reserved field
+ */
+struct bcm_db {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
+#define SDM845_MAX_LINKS	43
+#define SDM845_MAX_BCMS		30
+#define SDM845_MAX_BCM_PER_NODE	2
+#define SDM845_MAX_VCD		10
+
+/**
+ * struct qcom_icc_node - Qualcomm specific interconnect nodes
+ * @name: the node name used in debugfs
+ * @links: an array of nodes where we can go next while traversing
+ * @id: a unique node identifier
+ * @num_links: the total number of @links
+ * @channels: num of channels at this node
+ * @buswidth: width of the interconnect between a node and the bus
+ * @sum_avg: current sum aggregate value of all avg bw requests
+ * @max_peak: current max aggregate value of all peak bw requests
+ * @bcms: list of bcms associated with this logical node
+ * @num_bcms: num of @bcms
+ */
+struct qcom_icc_node {
+	const char *name;
+	u16 links[SDM845_MAX_LINKS];
+	u16 id;
+	u16 num_links;
+	u16 channels;
+	u16 buswidth;
+	u64 sum_avg;
+	u64 max_peak;
+	struct qcom_icc_bcm *bcms[SDM845_MAX_BCM_PER_NODE];
+	size_t num_bcms;
+};
+
+/**
+ * struct qcom_icc_bcm - Qualcomm specific hardware accelerator nodes
+ * known as Bus Clock Manager (BCM)
+ * @name: the bcm node name used to fetch BCM data from command db
+ * @type: latency or bandwidth bcm
+ * @addr: address offsets used when voting to RPMH
+ * @vote_x: aggregated threshold values, represents sum_bw when @type is bw bcm
+ * @vote_y: aggregated threshold values, represents peak_bw when @type is bw bcm
+ * @dirty: flag used to indicate whether the bcm needs to be committed
+ * @keepalive: flag used to indicate whether a keepalive is required
+ * @aux_data: auxiliary data used when calculating threshold values and
+ * communicating with RPMh
+ * @list: used to link to other bcms when compiling lists for commit
+ * @num_nodes: total number of @num_nodes
+ * @nodes: list of qcom_icc_nodes that this BCM encapsulates
+ */
+struct qcom_icc_bcm {
+	const char *name;
+	u32 type;
+	u32 addr;
+	u64 vote_x;
+	u64 vote_y;
+	bool dirty;
+	bool keepalive;
+	struct bcm_db aux_data;
+	struct list_head list;
+	size_t num_nodes;
+	struct qcom_icc_node *nodes[];
+};
+
+struct qcom_icc_fabric {
+	struct qcom_icc_node **nodes;
+	size_t num_nodes;
+};
+
+struct qcom_icc_desc {
+	struct qcom_icc_node **nodes;
+	size_t num_nodes;
+	struct qcom_icc_bcm **bcms;
+	size_t num_bcms;
+};
+
+#define DEFINE_QNODE(_name, _id, _channels, _buswidth,			\
+			_numlinks, ...)					\
+		static struct qcom_icc_node _name = {			\
+		.id = _id,						\
+		.name = #_name,						\
+		.channels = _channels,					\
+		.buswidth = _buswidth,					\
+		.num_links = _numlinks,					\
+		.links = { __VA_ARGS__ },				\
+	}
+
+DEFINE_QNODE(qhm_a1noc_cfg, MASTER_A1NOC_CFG, 1, 4, 1, SLAVE_SERVICE_A1NOC);
+DEFINE_QNODE(qhm_qup1, MASTER_BLSP_1, 1, 4, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(qhm_tsif, MASTER_TSIF, 1, 4, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(xm_sdc2, MASTER_SDCC_2, 1, 8, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(xm_sdc4, MASTER_SDCC_4, 1, 8, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(xm_ufs_card, MASTER_UFS_CARD, 1, 8, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(xm_ufs_mem, MASTER_UFS_MEM, 1, 8, 1, SLAVE_A1NOC_SNOC);
+DEFINE_QNODE(xm_pcie_0, MASTER_PCIE_0, 1, 8, 1, SLAVE_ANOC_PCIE_A1NOC_SNOC);
+DEFINE_QNODE(qhm_a2noc_cfg, MASTER_A2NOC_CFG, 1, 4, 1, SLAVE_SERVICE_A2NOC);
+DEFINE_QNODE(qhm_qdss_bam, MASTER_QDSS_BAM, 1, 4, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(qhm_qup2, MASTER_BLSP_2, 1, 4, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(qnm_cnoc, MASTER_CNOC_A2NOC, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(qxm_crypto, MASTER_CRYPTO, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(qxm_ipa, MASTER_IPA, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(xm_pcie3_1, MASTER_PCIE_1, 1, 8, 1, SLAVE_ANOC_PCIE_SNOC);
+DEFINE_QNODE(xm_qdss_etr, MASTER_QDSS_ETR, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(xm_usb3_0, MASTER_USB3_0, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(xm_usb3_1, MASTER_USB3_1, 1, 8, 1, SLAVE_A2NOC_SNOC);
+DEFINE_QNODE(qxm_camnoc_hf0_uncomp, MASTER_CAMNOC_HF0_UNCOMP, 1, 32, 1, SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(qxm_camnoc_hf1_uncomp, MASTER_CAMNOC_HF1_UNCOMP, 1, 32, 1, SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(qxm_camnoc_sf_uncomp, MASTER_CAMNOC_SF_UNCOMP, 1, 32, 1, SLAVE_CAMNOC_UNCOMP);
+DEFINE_QNODE(qhm_spdm, MASTER_SPDM, 1, 4, 1, SLAVE_CNOC_A2NOC);
+DEFINE_QNODE(qhm_tic, MASTER_TIC, 1, 4, 43, SLAVE_A1NOC_CFG, SLAVE_A2NOC_CFG, SLAVE_AOP, SLAVE_AOSS, SLAVE_CAMERA_CFG, SLAVE_CLK_CTL, SLAVE_CDSP_CFG, SLAVE_RBCPR_CX_CFG, SLAVE_CRYPTO_0_CFG, SLAVE_DCC_CFG, SLAVE_CNOC_DDRSS, SLAVE_DISPLAY_CFG, SLAVE_GLM, SLAVE_GFX3D_CFG, SLAVE_IMEM_CFG, SLAVE_IPA_CFG, SLAVE_CNOC_MNOC_CFG, SLAVE_PCIE_0_CFG, SLAVE_PCIE_1_CFG, SLAVE_PDM, SLAVE_SOUTH_PHY_CFG, SLAVE_PIMEM_CFG, SLAVE_PRNG, SLAVE_QDSS_CFG, SLAVE_BLSP_2, SLAVE_BLSP_1, SLAVE_SDCC_2, SLAVE_SDCC_4, SLAVE_SNOC_CFG, SLAVE_SPDM_WRAPPER, SLAVE_SPSS_CFG, SLAVE_TCSR, SLAVE_TLMM_NORTH, SLAVE_TLMM_SOUTH, SLAVE_TSIF, SLAVE_UFS_CARD_CFG, SLAVE_UFS_MEM_CFG, SLAVE_USB3_0, SLAVE_USB3_1, SLAVE_VENUS_CFG, SLAVE_VSENSE_CTRL_CFG, SLAVE_CNOC_A2NOC, SLAVE_SERVICE_CNOC);
+DEFINE_QNODE(qnm_snoc, MASTER_SNOC_CNOC, 1, 8, 42, SLAVE_A1NOC_CFG, SLAVE_A2NOC_CFG, SLAVE_AOP, SLAVE_AOSS, SLAVE_CAMERA_CFG, SLAVE_CLK_CTL, SLAVE_CDSP_CFG, SLAVE_RBCPR_CX_CFG, SLAVE_CRYPTO_0_CFG, SLAVE_DCC_CFG, SLAVE_CNOC_DDRSS, SLAVE_DISPLAY_CFG, SLAVE_GLM, SLAVE_GFX3D_CFG, SLAVE_IMEM_CFG, SLAVE_IPA_CFG, SLAVE_CNOC_MNOC_CFG, SLAVE_PCIE_0_CFG, SLAVE_PCIE_1_CFG, SLAVE_PDM, SLAVE_SOUTH_PHY_CFG, SLAVE_PIMEM_CFG, SLAVE_PRNG, SLAVE_QDSS_CFG, SLAVE_BLSP_2, SLAVE_BLSP_1, SLAVE_SDCC_2, SLAVE_SDCC_4, SLAVE_SNOC_CFG, SLAVE_SPDM_WRAPPER, SLAVE_SPSS_CFG, SLAVE_TCSR, SLAVE_TLMM_NORTH, SLAVE_TLMM_SOUTH, SLAVE_TSIF, SLAVE_UFS_CARD_CFG, SLAVE_UFS_MEM_CFG, SLAVE_USB3_0, SLAVE_USB3_1, SLAVE_VENUS_CFG, SLAVE_VSENSE_CTRL_CFG, SLAVE_SERVICE_CNOC);
+DEFINE_QNODE(xm_qdss_dap, MASTER_QDSS_DAP, 1, 8, 43, SLAVE_A1NOC_CFG, SLAVE_A2NOC_CFG, SLAVE_AOP, SLAVE_AOSS, SLAVE_CAMERA_CFG, SLAVE_CLK_CTL, SLAVE_CDSP_CFG, SLAVE_RBCPR_CX_CFG, SLAVE_CRYPTO_0_CFG, SLAVE_DCC_CFG, SLAVE_CNOC_DDRSS, SLAVE_DISPLAY_CFG, SLAVE_GLM, SLAVE_GFX3D_CFG, SLAVE_IMEM_CFG, SLAVE_IPA_CFG, SLAVE_CNOC_MNOC_CFG, SLAVE_PCIE_0_CFG, SLAVE_PCIE_1_CFG, SLAVE_PDM, SLAVE_SOUTH_PHY_CFG, SLAVE_PIMEM_CFG, SLAVE_PRNG, SLAVE_QDSS_CFG, SLAVE_BLSP_2, SLAVE_BLSP_1, SLAVE_SDCC_2, SLAVE_SDCC_4, SLAVE_SNOC_CFG, SLAVE_SPDM_WRAPPER, SLAVE_SPSS_CFG, SLAVE_TCSR, SLAVE_TLMM_NORTH, SLAVE_TLMM_SOUTH, SLAVE_TSIF, SLAVE_UFS_CARD_CFG, SLAVE_UFS_MEM_CFG, SLAVE_USB3_0, SLAVE_USB3_1, SLAVE_VENUS_CFG, SLAVE_VSENSE_CTRL_CFG, SLAVE_CNOC_A2NOC, SLAVE_SERVICE_CNOC);
+DEFINE_QNODE(qhm_cnoc, MASTER_CNOC_DC_NOC, 1, 4, 2, SLAVE_LLCC_CFG, SLAVE_MEM_NOC_CFG);
+DEFINE_QNODE(acm_l3, MASTER_APPSS_PROC, 1, 16, 3, SLAVE_GNOC_SNOC, SLAVE_GNOC_MEM_NOC, SLAVE_SERVICE_GNOC);
+DEFINE_QNODE(pm_gnoc_cfg, MASTER_GNOC_CFG, 1, 4, 1, SLAVE_SERVICE_GNOC);
+DEFINE_QNODE(llcc_mc, MASTER_LLCC, 4, 4, 1, SLAVE_EBI1);
+DEFINE_QNODE(acm_tcu, MASTER_TCU_0, 1, 8, 3, SLAVE_MEM_NOC_GNOC, SLAVE_LLCC, SLAVE_MEM_NOC_SNOC);
+DEFINE_QNODE(qhm_memnoc_cfg, MASTER_MEM_NOC_CFG, 1, 4, 2, SLAVE_MSS_PROC_MS_MPU_CFG, SLAVE_SERVICE_MEM_NOC);
+DEFINE_QNODE(qnm_apps, MASTER_GNOC_MEM_NOC, 2, 32, 1, SLAVE_LLCC);
+DEFINE_QNODE(qnm_mnoc_hf, MASTER_MNOC_HF_MEM_NOC, 2, 32, 2, SLAVE_MEM_NOC_GNOC, SLAVE_LLCC);
+DEFINE_QNODE(qnm_mnoc_sf, MASTER_MNOC_SF_MEM_NOC, 1, 32, 3, SLAVE_MEM_NOC_GNOC, SLAVE_LLCC, SLAVE_MEM_NOC_SNOC);
+DEFINE_QNODE(qnm_snoc_gc, MASTER_SNOC_GC_MEM_NOC, 1, 8, 1, SLAVE_LLCC);
+DEFINE_QNODE(qnm_snoc_sf, MASTER_SNOC_SF_MEM_NOC, 1, 16, 2, SLAVE_MEM_NOC_GNOC, SLAVE_LLCC);
+DEFINE_QNODE(qxm_gpu, MASTER_GFX3D, 2, 32, 3, SLAVE_MEM_NOC_GNOC, SLAVE_LLCC, SLAVE_MEM_NOC_SNOC);
+DEFINE_QNODE(qhm_mnoc_cfg, MASTER_CNOC_MNOC_CFG, 1, 4, 1, SLAVE_SERVICE_MNOC);
+DEFINE_QNODE(qxm_camnoc_hf0, MASTER_CAMNOC_HF0, 1, 32, 1, SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(qxm_camnoc_hf1, MASTER_CAMNOC_HF1, 1, 32, 1, SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(qxm_camnoc_sf, MASTER_CAMNOC_SF, 1, 32, 1, SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qxm_mdp0, MASTER_MDP0, 1, 32, 1, SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(qxm_mdp1, MASTER_MDP1, 1, 32, 1, SLAVE_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(qxm_rot, MASTER_ROTATOR, 1, 32, 1, SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qxm_venus0, MASTER_VIDEO_P0, 1, 32, 1, SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qxm_venus1, MASTER_VIDEO_P1, 1, 32, 1, SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qxm_venus_arm9, MASTER_VIDEO_PROC, 1, 8, 1, SLAVE_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qhm_snoc_cfg, MASTER_SNOC_CFG, 1, 4, 1, SLAVE_SERVICE_SNOC);
+DEFINE_QNODE(qnm_aggre1_noc, MASTER_A1NOC_SNOC, 1, 16, 6, SLAVE_APPSS, SLAVE_SNOC_CNOC, SLAVE_SNOC_MEM_NOC_SF, SLAVE_IMEM, SLAVE_PIMEM, SLAVE_QDSS_STM);
+DEFINE_QNODE(qnm_aggre2_noc, MASTER_A2NOC_SNOC, 1, 16, 9, SLAVE_APPSS, SLAVE_SNOC_CNOC, SLAVE_SNOC_MEM_NOC_SF, SLAVE_IMEM, SLAVE_PCIE_0, SLAVE_PCIE_1, SLAVE_PIMEM, SLAVE_QDSS_STM, SLAVE_TCU);
+DEFINE_QNODE(qnm_gladiator_sodv, MASTER_GNOC_SNOC, 1, 8, 8, SLAVE_APPSS, SLAVE_SNOC_CNOC, SLAVE_IMEM, SLAVE_PCIE_0, SLAVE_PCIE_1, SLAVE_PIMEM, SLAVE_QDSS_STM, SLAVE_TCU);
+DEFINE_QNODE(qnm_memnoc, MASTER_MEM_NOC_SNOC, 1, 8, 5, SLAVE_APPSS, SLAVE_SNOC_CNOC, SLAVE_IMEM, SLAVE_PIMEM, SLAVE_QDSS_STM);
+DEFINE_QNODE(qnm_pcie_anoc, MASTER_ANOC_PCIE_SNOC, 1, 16, 5, SLAVE_APPSS, SLAVE_SNOC_CNOC, SLAVE_SNOC_MEM_NOC_SF, SLAVE_IMEM, SLAVE_QDSS_STM);
+DEFINE_QNODE(qxm_pimem, MASTER_PIMEM, 1, 8, 2, SLAVE_SNOC_MEM_NOC_GC, SLAVE_IMEM);
+DEFINE_QNODE(xm_gic, MASTER_GIC, 1, 8, 2, SLAVE_SNOC_MEM_NOC_GC, SLAVE_IMEM);
+DEFINE_QNODE(qns_a1noc_snoc, SLAVE_A1NOC_SNOC, 1, 16, 1, MASTER_A1NOC_SNOC);
+DEFINE_QNODE(srvc_aggre1_noc, SLAVE_SERVICE_A1NOC, 1, 4, 0);
+DEFINE_QNODE(qns_pcie_a1noc_snoc, SLAVE_ANOC_PCIE_A1NOC_SNOC, 1, 16, 1, MASTER_ANOC_PCIE_SNOC);
+DEFINE_QNODE(qns_a2noc_snoc, SLAVE_A2NOC_SNOC, 1, 16, 1, MASTER_A2NOC_SNOC);
+DEFINE_QNODE(qns_pcie_snoc, SLAVE_ANOC_PCIE_SNOC, 1, 16, 1, MASTER_ANOC_PCIE_SNOC);
+DEFINE_QNODE(srvc_aggre2_noc, SLAVE_SERVICE_A2NOC, 1, 4, 0);
+DEFINE_QNODE(qns_camnoc_uncomp, SLAVE_CAMNOC_UNCOMP, 1, 32, 0);
+DEFINE_QNODE(qhs_a1_noc_cfg, SLAVE_A1NOC_CFG, 1, 4, 1, MASTER_A1NOC_CFG);
+DEFINE_QNODE(qhs_a2_noc_cfg, SLAVE_A2NOC_CFG, 1, 4, 1, MASTER_A2NOC_CFG);
+DEFINE_QNODE(qhs_aop, SLAVE_AOP, 1, 4, 0);
+DEFINE_QNODE(qhs_aoss, SLAVE_AOSS, 1, 4, 0);
+DEFINE_QNODE(qhs_camera_cfg, SLAVE_CAMERA_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_clk_ctl, SLAVE_CLK_CTL, 1, 4, 0);
+DEFINE_QNODE(qhs_compute_dsp_cfg, SLAVE_CDSP_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_cpr_cx, SLAVE_RBCPR_CX_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_crypto0_cfg, SLAVE_CRYPTO_0_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_dcc_cfg, SLAVE_DCC_CFG, 1, 4, 1, MASTER_CNOC_DC_NOC);
+DEFINE_QNODE(qhs_ddrss_cfg, SLAVE_CNOC_DDRSS, 1, 4, 0);
+DEFINE_QNODE(qhs_display_cfg, SLAVE_DISPLAY_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_glm, SLAVE_GLM, 1, 4, 0);
+DEFINE_QNODE(qhs_gpuss_cfg, SLAVE_GFX3D_CFG, 1, 8, 0);
+DEFINE_QNODE(qhs_imem_cfg, SLAVE_IMEM_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_ipa, SLAVE_IPA_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_mnoc_cfg, SLAVE_CNOC_MNOC_CFG, 1, 4, 1, MASTER_CNOC_MNOC_CFG);
+DEFINE_QNODE(qhs_pcie0_cfg, SLAVE_PCIE_0_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_pcie_gen3_cfg, SLAVE_PCIE_1_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_pdm, SLAVE_PDM, 1, 4, 0);
+DEFINE_QNODE(qhs_phy_refgen_south, SLAVE_SOUTH_PHY_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_pimem_cfg, SLAVE_PIMEM_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_prng, SLAVE_PRNG, 1, 4, 0);
+DEFINE_QNODE(qhs_qdss_cfg, SLAVE_QDSS_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_qupv3_north, SLAVE_BLSP_2, 1, 4, 0);
+DEFINE_QNODE(qhs_qupv3_south, SLAVE_BLSP_1, 1, 4, 0);
+DEFINE_QNODE(qhs_sdc2, SLAVE_SDCC_2, 1, 4, 0);
+DEFINE_QNODE(qhs_sdc4, SLAVE_SDCC_4, 1, 4, 0);
+DEFINE_QNODE(qhs_snoc_cfg, SLAVE_SNOC_CFG, 1, 4, 1, MASTER_SNOC_CFG);
+DEFINE_QNODE(qhs_spdm, SLAVE_SPDM_WRAPPER, 1, 4, 0);
+DEFINE_QNODE(qhs_spss_cfg, SLAVE_SPSS_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_tcsr, SLAVE_TCSR, 1, 4, 0);
+DEFINE_QNODE(qhs_tlmm_north, SLAVE_TLMM_NORTH, 1, 4, 0);
+DEFINE_QNODE(qhs_tlmm_south, SLAVE_TLMM_SOUTH, 1, 4, 0);
+DEFINE_QNODE(qhs_tsif, SLAVE_TSIF, 1, 4, 0);
+DEFINE_QNODE(qhs_ufs_card_cfg, SLAVE_UFS_CARD_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_ufs_mem_cfg, SLAVE_UFS_MEM_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_usb3_0, SLAVE_USB3_0, 1, 4, 0);
+DEFINE_QNODE(qhs_usb3_1, SLAVE_USB3_1, 1, 4, 0);
+DEFINE_QNODE(qhs_venus_cfg, SLAVE_VENUS_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_vsense_ctrl_cfg, SLAVE_VSENSE_CTRL_CFG, 1, 4, 0);
+DEFINE_QNODE(qns_cnoc_a2noc, SLAVE_CNOC_A2NOC, 1, 8, 1, MASTER_CNOC_A2NOC);
+DEFINE_QNODE(srvc_cnoc, SLAVE_SERVICE_CNOC, 1, 4, 0);
+DEFINE_QNODE(qhs_llcc, SLAVE_LLCC_CFG, 1, 4, 0);
+DEFINE_QNODE(qhs_memnoc, SLAVE_MEM_NOC_CFG, 1, 4, 1, MASTER_MEM_NOC_CFG);
+DEFINE_QNODE(qns_gladiator_sodv, SLAVE_GNOC_SNOC, 1, 8, 1, MASTER_GNOC_SNOC);
+DEFINE_QNODE(qns_gnoc_memnoc, SLAVE_GNOC_MEM_NOC, 2, 32, 1, MASTER_GNOC_MEM_NOC);
+DEFINE_QNODE(srvc_gnoc, SLAVE_SERVICE_GNOC, 1, 4, 0);
+DEFINE_QNODE(ebi, SLAVE_EBI1, 4, 4, 0);
+DEFINE_QNODE(qhs_mdsp_ms_mpu_cfg, SLAVE_MSS_PROC_MS_MPU_CFG, 1, 4, 0);
+DEFINE_QNODE(qns_apps_io, SLAVE_MEM_NOC_GNOC, 1, 32, 0);
+DEFINE_QNODE(qns_llcc, SLAVE_LLCC, 4, 16, 1, MASTER_LLCC);
+DEFINE_QNODE(qns_memnoc_snoc, SLAVE_MEM_NOC_SNOC, 1, 8, 1, MASTER_MEM_NOC_SNOC);
+DEFINE_QNODE(srvc_memnoc, SLAVE_SERVICE_MEM_NOC, 1, 4, 0);
+DEFINE_QNODE(qns2_mem_noc, SLAVE_MNOC_SF_MEM_NOC, 1, 32, 1, MASTER_MNOC_SF_MEM_NOC);
+DEFINE_QNODE(qns_mem_noc_hf, SLAVE_MNOC_HF_MEM_NOC, 2, 32, 1, MASTER_MNOC_HF_MEM_NOC);
+DEFINE_QNODE(srvc_mnoc, SLAVE_SERVICE_MNOC, 1, 4, 0);
+DEFINE_QNODE(qhs_apss, SLAVE_APPSS, 1, 8, 0);
+DEFINE_QNODE(qns_cnoc, SLAVE_SNOC_CNOC, 1, 8, 1, MASTER_SNOC_CNOC);
+DEFINE_QNODE(qns_memnoc_gc, SLAVE_SNOC_MEM_NOC_GC, 1, 8, 1, MASTER_SNOC_GC_MEM_NOC);
+DEFINE_QNODE(qns_memnoc_sf, SLAVE_SNOC_MEM_NOC_SF, 1, 16, 1, MASTER_SNOC_SF_MEM_NOC);
+DEFINE_QNODE(qxs_imem, SLAVE_IMEM, 1, 8, 0);
+DEFINE_QNODE(qxs_pcie, SLAVE_PCIE_0, 1, 8, 0);
+DEFINE_QNODE(qxs_pcie_gen3, SLAVE_PCIE_1, 1, 8, 0);
+DEFINE_QNODE(qxs_pimem, SLAVE_PIMEM, 1, 8, 0);
+DEFINE_QNODE(srvc_snoc, SLAVE_SERVICE_SNOC, 1, 4, 0);
+DEFINE_QNODE(xs_qdss_stm, SLAVE_QDSS_STM, 1, 4, 0);
+DEFINE_QNODE(xs_sys_tcu_cfg, SLAVE_TCU, 1, 8, 0);
+
+#define DEFINE_QBCM(_name, _bcmname, _keepalive, _numnodes, ...)	\
+		static struct qcom_icc_bcm _name = {			\
+		.name = _bcmname,					\
+		.keepalive = _keepalive,				\
+		.num_nodes = _numnodes,					\
+		.nodes = { __VA_ARGS__ },				\
+	}
+
+DEFINE_QBCM(bcm_acv, "ACV", false, 1, &ebi);
+DEFINE_QBCM(bcm_mc0, "MC0", true, 1, &ebi);
+DEFINE_QBCM(bcm_sh0, "SH0", true, 1, &qns_llcc);
+DEFINE_QBCM(bcm_mm0, "MM0", false, 1, &qns_mem_noc_hf);
+DEFINE_QBCM(bcm_sh1, "SH1", false, 1, &qns_apps_io);
+DEFINE_QBCM(bcm_mm1, "MM1", false, 7, &qxm_camnoc_hf0_uncomp, &qxm_camnoc_hf1_uncomp, &qxm_camnoc_sf_uncomp, &qxm_camnoc_hf0, &qxm_camnoc_hf1, &qxm_mdp0, &qxm_mdp1);
+DEFINE_QBCM(bcm_sh2, "SH2", false, 1, &qns_memnoc_snoc);
+DEFINE_QBCM(bcm_mm2, "MM2", false, 1, &qns2_mem_noc);
+DEFINE_QBCM(bcm_sh3, "SH3", false, 1, &acm_tcu);
+DEFINE_QBCM(bcm_mm3, "MM3", false, 5, &qxm_camnoc_sf, &qxm_rot, &qxm_venus0, &qxm_venus1, &qxm_venus_arm9);
+DEFINE_QBCM(bcm_sh5, "SH5", false, 1, &qnm_apps);
+DEFINE_QBCM(bcm_sn0, "SN0", true, 1, &qns_memnoc_sf);
+DEFINE_QBCM(bcm_ce0, "CE0", false, 1, &qxm_crypto);
+DEFINE_QBCM(bcm_cn0, "CN0", false, 47, &qhm_spdm, &qhm_tic, &qnm_snoc, &xm_qdss_dap, &qhs_a1_noc_cfg, &qhs_a2_noc_cfg, &qhs_aop, &qhs_aoss, &qhs_camera_cfg, &qhs_clk_ctl, &qhs_compute_dsp_cfg, &qhs_cpr_cx, &qhs_crypto0_cfg, &qhs_dcc_cfg, &qhs_ddrss_cfg, &qhs_display_cfg, &qhs_glm, &qhs_gpuss_cfg, &qhs_imem_cfg, &qhs_ipa, &qhs_mnoc_cfg, &qhs_pcie0_cfg, &qhs_pcie_gen3_cfg, &qhs_pdm, &qhs_phy_refgen_south, &qhs_pimem_cfg, &qhs_prng, &qhs_qdss_cfg, &qhs_qupv3_north, &qhs_qupv3_south, &qhs_sdc2, &qhs_sdc4, &qhs_snoc_cfg, &qhs_spdm, &qhs_spss_cfg, &qhs_tcsr, &qhs_tlmm_north, &qhs_tlmm_south, &qhs_tsif, &qhs_ufs_card_cfg, &qhs_ufs_mem_cfg, &qhs_usb3_0, &qhs_usb3_1, &qhs_venus_cfg, &qhs_vsense_ctrl_cfg, &qns_cnoc_a2noc, &srvc_cnoc);
+DEFINE_QBCM(bcm_qup0, "QUP0", false, 2, &qhm_qup1, &qhm_qup2);
+DEFINE_QBCM(bcm_sn1, "SN1", false, 1, &qxs_imem);
+DEFINE_QBCM(bcm_sn2, "SN2", false, 1, &qns_memnoc_gc);
+DEFINE_QBCM(bcm_sn3, "SN3", false, 1, &qns_cnoc);
+DEFINE_QBCM(bcm_sn4, "SN4", false, 1, &qxm_pimem);
+DEFINE_QBCM(bcm_sn5, "SN5", false, 1, &xs_qdss_stm);
+DEFINE_QBCM(bcm_sn6, "SN6", false, 3, &qhs_apss, &srvc_snoc, &xs_sys_tcu_cfg);
+DEFINE_QBCM(bcm_sn7, "SN7", false, 1, &qxs_pcie);
+DEFINE_QBCM(bcm_sn8, "SN8", false, 1, &qxs_pcie_gen3);
+DEFINE_QBCM(bcm_sn9, "SN9", false, 2, &srvc_aggre1_noc, &qnm_aggre1_noc);
+DEFINE_QBCM(bcm_sn11, "SN11", false, 2, &srvc_aggre2_noc, &qnm_aggre2_noc);
+DEFINE_QBCM(bcm_sn12, "SN12", false, 2, &qnm_gladiator_sodv, &xm_gic);
+DEFINE_QBCM(bcm_sn14, "SN14", false, 1, &qnm_pcie_anoc);
+DEFINE_QBCM(bcm_sn15, "SN15", false, 1, &qnm_memnoc);
+
+static struct qcom_icc_node *rsc_hlos_nodes[] = {
+	[MASTER_APPSS_PROC] = &acm_l3,
+	[MASTER_TCU_0] = &acm_tcu,
+	[MASTER_LLCC] = &llcc_mc,
+	[MASTER_GNOC_CFG] = &pm_gnoc_cfg,
+	[MASTER_A1NOC_CFG] = &qhm_a1noc_cfg,
+	[MASTER_A2NOC_CFG] = &qhm_a2noc_cfg,
+	[MASTER_CNOC_DC_NOC] = &qhm_cnoc,
+	[MASTER_MEM_NOC_CFG] = &qhm_memnoc_cfg,
+	[MASTER_CNOC_MNOC_CFG] = &qhm_mnoc_cfg,
+	[MASTER_QDSS_BAM] = &qhm_qdss_bam,
+	[MASTER_BLSP_1] = &qhm_qup1,
+	[MASTER_BLSP_2] = &qhm_qup2,
+	[MASTER_SNOC_CFG] = &qhm_snoc_cfg,
+	[MASTER_SPDM] = &qhm_spdm,
+	[MASTER_TIC] = &qhm_tic,
+	[MASTER_TSIF] = &qhm_tsif,
+	[MASTER_A1NOC_SNOC] = &qnm_aggre1_noc,
+	[MASTER_A2NOC_SNOC] = &qnm_aggre2_noc,
+	[MASTER_GNOC_MEM_NOC] = &qnm_apps,
+	[MASTER_CNOC_A2NOC] = &qnm_cnoc,
+	[MASTER_GNOC_SNOC] = &qnm_gladiator_sodv,
+	[MASTER_MEM_NOC_SNOC] = &qnm_memnoc,
+	[MASTER_MNOC_HF_MEM_NOC] = &qnm_mnoc_hf,
+	[MASTER_MNOC_SF_MEM_NOC] = &qnm_mnoc_sf,
+	[MASTER_ANOC_PCIE_SNOC] = &qnm_pcie_anoc,
+	[MASTER_SNOC_CNOC] = &qnm_snoc,
+	[MASTER_SNOC_GC_MEM_NOC] = &qnm_snoc_gc,
+	[MASTER_SNOC_SF_MEM_NOC] = &qnm_snoc_sf,
+	[MASTER_CAMNOC_HF0] = &qxm_camnoc_hf0,
+	[MASTER_CAMNOC_HF0_UNCOMP] = &qxm_camnoc_hf0_uncomp,
+	[MASTER_CAMNOC_HF1] = &qxm_camnoc_hf1,
+	[MASTER_CAMNOC_HF1_UNCOMP] = &qxm_camnoc_hf1_uncomp,
+	[MASTER_CAMNOC_SF] = &qxm_camnoc_sf,
+	[MASTER_CAMNOC_SF_UNCOMP] = &qxm_camnoc_sf_uncomp,
+	[MASTER_CRYPTO] = &qxm_crypto,
+	[MASTER_GFX3D] = &qxm_gpu,
+	[MASTER_IPA] = &qxm_ipa,
+	[MASTER_MDP0] = &qxm_mdp0,
+	[MASTER_MDP1] = &qxm_mdp1,
+	[MASTER_PIMEM] = &qxm_pimem,
+	[MASTER_ROTATOR] = &qxm_rot,
+	[MASTER_VIDEO_P0] = &qxm_venus0,
+	[MASTER_VIDEO_P1] = &qxm_venus1,
+	[MASTER_VIDEO_PROC] = &qxm_venus_arm9,
+	[MASTER_GIC] = &xm_gic,
+	[MASTER_PCIE_1] = &xm_pcie3_1,
+	[MASTER_PCIE_0] = &xm_pcie_0,
+	[MASTER_QDSS_DAP] = &xm_qdss_dap,
+	[MASTER_QDSS_ETR] = &xm_qdss_etr,
+	[MASTER_SDCC_2] = &xm_sdc2,
+	[MASTER_SDCC_4] = &xm_sdc4,
+	[MASTER_UFS_CARD] = &xm_ufs_card,
+	[MASTER_UFS_MEM] = &xm_ufs_mem,
+	[MASTER_USB3_0] = &xm_usb3_0,
+	[MASTER_USB3_1] = &xm_usb3_1,
+	[SLAVE_EBI1] = &ebi,
+	[SLAVE_A1NOC_CFG] = &qhs_a1_noc_cfg,
+	[SLAVE_A2NOC_CFG] = &qhs_a2_noc_cfg,
+	[SLAVE_AOP] = &qhs_aop,
+	[SLAVE_AOSS] = &qhs_aoss,
+	[SLAVE_APPSS] = &qhs_apss,
+	[SLAVE_CAMERA_CFG] = &qhs_camera_cfg,
+	[SLAVE_CLK_CTL] = &qhs_clk_ctl,
+	[SLAVE_CDSP_CFG] = &qhs_compute_dsp_cfg,
+	[SLAVE_RBCPR_CX_CFG] = &qhs_cpr_cx,
+	[SLAVE_CRYPTO_0_CFG] = &qhs_crypto0_cfg,
+	[SLAVE_DCC_CFG] = &qhs_dcc_cfg,
+	[SLAVE_CNOC_DDRSS] = &qhs_ddrss_cfg,
+	[SLAVE_DISPLAY_CFG] = &qhs_display_cfg,
+	[SLAVE_GLM] = &qhs_glm,
+	[SLAVE_GFX3D_CFG] = &qhs_gpuss_cfg,
+	[SLAVE_IMEM_CFG] = &qhs_imem_cfg,
+	[SLAVE_IPA_CFG] = &qhs_ipa,
+	[SLAVE_LLCC_CFG] = &qhs_llcc,
+	[SLAVE_MSS_PROC_MS_MPU_CFG] = &qhs_mdsp_ms_mpu_cfg,
+	[SLAVE_MEM_NOC_CFG] = &qhs_memnoc,
+	[SLAVE_CNOC_MNOC_CFG] = &qhs_mnoc_cfg,
+	[SLAVE_PCIE_0_CFG] = &qhs_pcie0_cfg,
+	[SLAVE_PCIE_1_CFG] = &qhs_pcie_gen3_cfg,
+	[SLAVE_PDM] = &qhs_pdm,
+	[SLAVE_SOUTH_PHY_CFG] = &qhs_phy_refgen_south,
+	[SLAVE_PIMEM_CFG] = &qhs_pimem_cfg,
+	[SLAVE_PRNG] = &qhs_prng,
+	[SLAVE_QDSS_CFG] = &qhs_qdss_cfg,
+	[SLAVE_BLSP_2] = &qhs_qupv3_north,
+	[SLAVE_BLSP_1] = &qhs_qupv3_south,
+	[SLAVE_SDCC_2] = &qhs_sdc2,
+	[SLAVE_SDCC_4] = &qhs_sdc4,
+	[SLAVE_SNOC_CFG] = &qhs_snoc_cfg,
+	[SLAVE_SPDM_WRAPPER] = &qhs_spdm,
+	[SLAVE_SPSS_CFG] = &qhs_spss_cfg,
+	[SLAVE_TCSR] = &qhs_tcsr,
+	[SLAVE_TLMM_NORTH] = &qhs_tlmm_north,
+	[SLAVE_TLMM_SOUTH] = &qhs_tlmm_south,
+	[SLAVE_TSIF] = &qhs_tsif,
+	[SLAVE_UFS_CARD_CFG] = &qhs_ufs_card_cfg,
+	[SLAVE_UFS_MEM_CFG] = &qhs_ufs_mem_cfg,
+	[SLAVE_USB3_0] = &qhs_usb3_0,
+	[SLAVE_USB3_1] = &qhs_usb3_1,
+	[SLAVE_VENUS_CFG] = &qhs_venus_cfg,
+	[SLAVE_VSENSE_CTRL_CFG] = &qhs_vsense_ctrl_cfg,
+	[SLAVE_MNOC_SF_MEM_NOC] = &qns2_mem_noc,
+	[SLAVE_A1NOC_SNOC] = &qns_a1noc_snoc,
+	[SLAVE_A2NOC_SNOC] = &qns_a2noc_snoc,
+	[SLAVE_MEM_NOC_GNOC] = &qns_apps_io,
+	[SLAVE_CAMNOC_UNCOMP] = &qns_camnoc_uncomp,
+	[SLAVE_SNOC_CNOC] = &qns_cnoc,
+	[SLAVE_CNOC_A2NOC] = &qns_cnoc_a2noc,
+	[SLAVE_GNOC_SNOC] = &qns_gladiator_sodv,
+	[SLAVE_GNOC_MEM_NOC] = &qns_gnoc_memnoc,
+	[SLAVE_LLCC] = &qns_llcc,
+	[SLAVE_MNOC_HF_MEM_NOC] = &qns_mem_noc_hf,
+	[SLAVE_SNOC_MEM_NOC_GC] = &qns_memnoc_gc,
+	[SLAVE_SNOC_MEM_NOC_SF] = &qns_memnoc_sf,
+	[SLAVE_MEM_NOC_SNOC] = &qns_memnoc_snoc,
+	[SLAVE_ANOC_PCIE_A1NOC_SNOC] = &qns_pcie_a1noc_snoc,
+	[SLAVE_ANOC_PCIE_SNOC] = &qns_pcie_snoc,
+	[SLAVE_IMEM] = &qxs_imem,
+	[SLAVE_PCIE_0] = &qxs_pcie,
+	[SLAVE_PCIE_1] = &qxs_pcie_gen3,
+	[SLAVE_PIMEM] = &qxs_pimem,
+	[SLAVE_SERVICE_A1NOC] = &srvc_aggre1_noc,
+	[SLAVE_SERVICE_A2NOC] = &srvc_aggre2_noc,
+	[SLAVE_SERVICE_CNOC] = &srvc_cnoc,
+	[SLAVE_SERVICE_GNOC] = &srvc_gnoc,
+	[SLAVE_SERVICE_MEM_NOC] = &srvc_memnoc,
+	[SLAVE_SERVICE_MNOC] = &srvc_mnoc,
+	[SLAVE_SERVICE_SNOC] = &srvc_snoc,
+	[SLAVE_QDSS_STM] = &xs_qdss_stm,
+	[SLAVE_TCU] = &xs_sys_tcu_cfg,
+};
+
+static struct qcom_icc_bcm *rsc_hlos_bcms[] = {
+	&bcm_acv,
+	&bcm_mc0,
+	&bcm_sh0,
+	&bcm_mm0,
+	&bcm_sh1,
+	&bcm_mm1,
+	&bcm_sh2,
+	&bcm_mm2,
+	&bcm_sh3,
+	&bcm_mm3,
+	&bcm_sh5,
+	&bcm_sn0,
+	&bcm_ce0,
+	&bcm_cn0,
+	&bcm_qup0,
+	&bcm_sn1,
+	&bcm_sn2,
+	&bcm_sn3,
+	&bcm_sn4,
+	&bcm_sn5,
+	&bcm_sn6,
+	&bcm_sn7,
+	&bcm_sn8,
+	&bcm_sn9,
+	&bcm_sn11,
+	&bcm_sn12,
+	&bcm_sn14,
+	&bcm_sn15,
+};
+
+static struct qcom_icc_desc sdm845_rsc_hlos = {
+	.nodes = rsc_hlos_nodes,
+	.num_nodes = ARRAY_SIZE(rsc_hlos_nodes),
+	.bcms = rsc_hlos_bcms,
+	.num_bcms = ARRAY_SIZE(rsc_hlos_bcms),
+};
+
+static int qcom_icc_bcm_init(struct qcom_icc_bcm *bcm, struct device *dev)
+{
+	struct qcom_icc_node *qn;
+	const struct bcm_db *data;
+	size_t data_count;
+	int i;
+
+	bcm->addr = cmd_db_read_addr(bcm->name);
+	if (!bcm->addr) {
+		dev_err(dev, "%s could not find RPMh address\n",
+			bcm->name);
+		return -EINVAL;
+	}
+
+	data = cmd_db_read_aux_data(bcm->name, &data_count);
+	if (IS_ERR(data)) {
+		dev_err(dev, "%s command db read error (%ld)\n",
+			bcm->name, PTR_ERR(data));
+		return PTR_ERR(data);
+	}
+	if (!data_count) {
+		dev_err(dev, "%s command db missing or partial aux data\n",
+			bcm->name);
+		return -EINVAL;
+	}
+
+	bcm->aux_data.unit = le32_to_cpu(data->unit);
+	bcm->aux_data.width = le16_to_cpu(data->width);
+	bcm->aux_data.vcd = data->vcd;
+	bcm->aux_data.reserved = data->reserved;
+
+	/*
+	 * Link Qnodes to their respective BCMs
+	 */
+	for (i = 0; i < bcm->num_nodes; i++) {
+		qn = bcm->nodes[i];
+		qn->bcms[qn->num_bcms] = bcm;
+		qn->num_bcms++;
+	}
+
+	return 0;
+}
+
+inline void tcs_cmd_gen(struct tcs_cmd *cmd, u64 vote_x, u64 vote_y,
+			u32 addr, bool commit)
+{
+	bool valid = true;
+
+	if (!cmd)
+		return;
+
+	if (vote_x == 0 && vote_y == 0)
+		valid = false;
+
+	if (vote_x > BCM_TCS_CMD_VOTE_MASK)
+		vote_x = BCM_TCS_CMD_VOTE_MASK;
+
+	if (vote_y > BCM_TCS_CMD_VOTE_MASK)
+		vote_y = BCM_TCS_CMD_VOTE_MASK;
+
+	cmd->addr = addr;
+	cmd->data = BCM_TCS_CMD(commit, valid, vote_x, vote_y);
+
+	/*
+	 * Set the wait for completion flag on command that need to be completed
+	 * before the next command.
+	 */
+	if (commit)
+		cmd->wait = true;
+}
+
+static void tcs_list_gen(struct list_head *bcm_list,
+			 struct tcs_cmd tcs_list[SDM845_MAX_VCD],
+			 int n[SDM845_MAX_VCD])
+{
+	struct qcom_icc_bcm *bcm;
+	bool commit;
+	size_t idx = 0, batch = 0, cur_vcd_size = 0;
+
+	memset(n, 0, sizeof(int) * SDM845_MAX_VCD);
+
+	list_for_each_entry(bcm, bcm_list, list) {
+		commit = false;
+		cur_vcd_size++;
+		if ((list_is_last(&bcm->list, bcm_list)) ||
+		    bcm->aux_data.vcd != list_next_entry(bcm, list)->aux_data.vcd) {
+			commit = true;
+			cur_vcd_size = 0;
+		}
+		tcs_cmd_gen(&tcs_list[idx], bcm->vote_x, bcm->vote_y,
+			    bcm->addr, commit);
+		idx++;
+		n[batch]++;
+		/*
+		 * Batch the BCMs in such a way that we do not split them in
+		 * multiple payloads when they are under the same VCD. This is
+		 * to ensure that every BCM is committed since we only set the
+		 * commit bit on the last BCM request of every VCD.
+		 */
+		if (n[batch] >= MAX_RPMH_PAYLOAD) {
+			if (!commit) {
+				n[batch] -= cur_vcd_size;
+				n[batch + 1] = cur_vcd_size;
+			}
+			batch++;
+		}
+	}
+}
+
+static void bcm_aggregate(struct qcom_icc_bcm *bcm)
+{
+	size_t i;
+	u64 agg_avg = 0;
+	u64 agg_peak = 0;
+	u64 temp;
+
+	for (i = 0; i < bcm->num_nodes; i++) {
+		temp = bcm->nodes[i]->sum_avg * bcm->aux_data.width;
+		do_div(temp, bcm->nodes[i]->buswidth * bcm->nodes[i]->channels);
+		agg_avg = max(agg_avg, temp);
+
+		temp = bcm->nodes[i]->max_peak * bcm->aux_data.width;
+		do_div(temp, bcm->nodes[i]->buswidth);
+		agg_peak = max(agg_peak, temp);
+	}
+
+	temp = agg_avg * 1000ULL;
+	do_div(temp, bcm->aux_data.unit);
+	bcm->vote_x = temp;
+
+	temp = agg_peak * 1000ULL;
+	do_div(temp, bcm->aux_data.unit);
+	bcm->vote_y = temp;
+
+	if (bcm->keepalive && bcm->vote_x == 0 && bcm->vote_y == 0) {
+		bcm->vote_x = 1;
+		bcm->vote_y = 1;
+	}
+
+	bcm->dirty = false;
+}
+
+static int qcom_icc_aggregate(struct icc_node *node, u32 avg_bw,
+			      u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
+{
+	size_t i;
+	struct qcom_icc_node *qn;
+
+	qn = node->data;
+
+	*agg_avg += avg_bw;
+	*agg_peak = max_t(u32, *agg_peak, peak_bw);
+
+	qn->sum_avg = *agg_avg;
+	qn->max_peak = *agg_peak;
+
+	for (i = 0; i < qn->num_bcms; i++)
+		qn->bcms[i]->dirty = true;
+
+	return 0;
+}
+
+static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct qcom_icc_provider *qp;
+	struct icc_node *node;
+	struct tcs_cmd cmds[SDM845_MAX_BCMS];
+	struct list_head commit_list;
+	int commit_idx[SDM845_MAX_VCD];
+	int ret = 0, i;
+
+	if (!src)
+		node = dst;
+	else
+		node = src;
+
+	qp = to_qcom_provider(node->provider);
+
+	INIT_LIST_HEAD(&commit_list);
+
+	for (i = 0; i < qp->num_bcms; i++) {
+		if (qp->bcms[i]->dirty) {
+			bcm_aggregate(qp->bcms[i]);
+			list_add_tail(&qp->bcms[i]->list, &commit_list);
+		}
+	}
+
+	/*
+	 * Construct the command list based on a pre ordered list of BCMs
+	 * based on VCD.
+	 */
+	tcs_list_gen(&commit_list, cmds, commit_idx);
+
+	if (!commit_idx[0])
+		return ret;
+
+	ret = rpmh_invalidate(qp->dev);
+	if (ret) {
+		pr_err("Error invalidating RPMH client (%d)\n", ret);
+		return ret;
+	}
+
+	ret = rpmh_write_batch(qp->dev, RPMH_ACTIVE_ONLY_STATE,
+			       cmds, commit_idx);
+	if (ret) {
+		pr_err("Error sending AMC RPMH requests (%d)\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int cmp_vcd(const void *_l, const void *_r)
+{
+	const struct qcom_icc_bcm **l = (const struct qcom_icc_bcm **)_l;
+	const struct qcom_icc_bcm **r = (const struct qcom_icc_bcm **)_r;
+
+	if (l[0]->aux_data.vcd < r[0]->aux_data.vcd)
+		return -1;
+	else if (l[0]->aux_data.vcd == r[0]->aux_data.vcd)
+		return 0;
+	else
+		return 1;
+}
+
+static int qnoc_probe(struct platform_device *pdev)
+{
+	const struct qcom_icc_desc *desc;
+	struct icc_onecell_data *data;
+	struct icc_provider *provider;
+	struct qcom_icc_node **qnodes;
+	struct qcom_icc_provider *qp;
+	struct icc_node *node;
+	size_t num_nodes, i;
+	int ret;
+
+	desc = of_device_get_match_data(&pdev->dev);
+	if (!desc)
+		return -EINVAL;
+
+	qnodes = desc->nodes;
+	num_nodes = desc->num_nodes;
+
+	qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return -ENOMEM;
+
+	data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	provider = &qp->provider;
+	provider->dev = &pdev->dev;
+	provider->set = qcom_icc_set;
+	provider->aggregate = qcom_icc_aggregate;
+	provider->xlate = of_icc_xlate_onecell;
+	INIT_LIST_HEAD(&provider->nodes);
+	provider->data = data;
+
+	qp->dev = &pdev->dev;
+	qp->bcms = desc->bcms;
+	qp->num_bcms = desc->num_bcms;
+
+	ret = icc_provider_add(provider);
+	if (ret) {
+		dev_err(&pdev->dev, "error adding interconnect provider\n");
+		return ret;
+	}
+
+	for (i = 0; i < num_nodes; i++) {
+		size_t j;
+
+		node = icc_node_create(qnodes[i]->id);
+		if (IS_ERR(node)) {
+			ret = PTR_ERR(node);
+			goto err;
+		}
+
+		node->name = qnodes[i]->name;
+		node->data = qnodes[i];
+		icc_node_add(node, provider);
+
+		dev_dbg(&pdev->dev, "registered node %p %s %d\n", node,
+			qnodes[i]->name, node->id);
+
+		/* populate links */
+		for (j = 0; j < qnodes[i]->num_links; j++)
+			icc_link_create(node, qnodes[i]->links[j]);
+
+		data->nodes[i] = node;
+	}
+	data->num_nodes = num_nodes;
+
+	for (i = 0; i < qp->num_bcms; i++)
+		qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
+
+	/*
+	 * Pre sort the BCMs based on VCD for ease of generating a command list
+	 * that groups the BCMs with the same VCD together. VCDs are numbered
+	 * with lowest being the most expensive time wise, ensuring that
+	 * those commands are being sent the earliest in the queue.
+	 */
+	sort(qp->bcms, qp->num_bcms, sizeof(*qp->bcms), cmp_vcd, NULL);
+
+	platform_set_drvdata(pdev, qp);
+
+	dev_dbg(&pdev->dev, "Registered SDM845 ICC\n");
+
+	return ret;
+err:
+	list_for_each_entry(node, &provider->nodes, node_list) {
+		icc_node_del(node);
+		icc_node_destroy(node->id);
+	}
+
+	icc_provider_del(provider);
+	return ret;
+}
+
+static int qnoc_remove(struct platform_device *pdev)
+{
+	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+	struct icc_provider *provider = &qp->provider;
+	struct icc_node *n;
+
+	list_for_each_entry(n, &provider->nodes, node_list) {
+		icc_node_del(n);
+		icc_node_destroy(n->id);
+	}
+
+	return icc_provider_del(provider);
+}
+
+static const struct of_device_id qnoc_of_match[] = {
+	{ .compatible = "qcom,sdm845-rsc-hlos", .data = &sdm845_rsc_hlos },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+	.probe = qnoc_probe,
+	.remove = qnoc_remove,
+	.driver = {
+		.name = "qnoc-sdm845",
+		.of_match_table = qnoc_of_match,
+	},
+};
+module_platform_driver(qnoc_driver);
+
+MODULE_AUTHOR("David Dai <daidavid1@codeaurora.org>");
+MODULE_DESCRIPTION("Qualcomm sdm845 NoC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dt-bindings/interconnect/qcom,sdm845.h b/include/dt-bindings/interconnect/qcom,sdm845.h
new file mode 100644
index 000000000000..7b2393be7361
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,sdm845.h
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Qualcomm SDM845 interconnect IDs
+ *
+ * Copyright (c) 2018, Linaro Ltd.
+ * Author: Georgi Djakov <georgi.djakov@linaro.org>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SDM845_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SDM845_H
+
+#define MASTER_A1NOC_CFG		0
+#define MASTER_BLSP_1			1
+#define MASTER_TSIF			2
+#define MASTER_SDCC_2			3
+#define MASTER_SDCC_4			4
+#define MASTER_UFS_CARD			5
+#define MASTER_UFS_MEM			6
+#define MASTER_PCIE_0			7
+#define MASTER_A2NOC_CFG		8
+#define MASTER_QDSS_BAM			9
+#define MASTER_BLSP_2			10
+#define MASTER_CNOC_A2NOC		11
+#define MASTER_CRYPTO			12
+#define MASTER_IPA			13
+#define MASTER_PCIE_1			14
+#define MASTER_QDSS_ETR			15
+#define MASTER_USB3_0			16
+#define MASTER_USB3_1			17
+#define MASTER_CAMNOC_HF0_UNCOMP	18
+#define MASTER_CAMNOC_HF1_UNCOMP	19
+#define MASTER_CAMNOC_SF_UNCOMP		20
+#define MASTER_SPDM			21
+#define MASTER_TIC			22
+#define MASTER_SNOC_CNOC		23
+#define MASTER_QDSS_DAP			24
+#define MASTER_CNOC_DC_NOC		25
+#define MASTER_APPSS_PROC		26
+#define MASTER_GNOC_CFG			27
+#define MASTER_LLCC			28
+#define MASTER_TCU_0			29
+#define MASTER_MEM_NOC_CFG		30
+#define MASTER_GNOC_MEM_NOC		31
+#define MASTER_MNOC_HF_MEM_NOC		32
+#define MASTER_MNOC_SF_MEM_NOC		33
+#define MASTER_SNOC_GC_MEM_NOC		34
+#define MASTER_SNOC_SF_MEM_NOC		35
+#define MASTER_GFX3D			36
+#define MASTER_CNOC_MNOC_CFG		37
+#define MASTER_CAMNOC_HF0		38
+#define MASTER_CAMNOC_HF1		39
+#define MASTER_CAMNOC_SF		40
+#define MASTER_MDP0			41
+#define MASTER_MDP1			42
+#define MASTER_ROTATOR			43
+#define MASTER_VIDEO_P0			44
+#define MASTER_VIDEO_P1			45
+#define MASTER_VIDEO_PROC		46
+#define MASTER_SNOC_CFG			47
+#define MASTER_A1NOC_SNOC		48
+#define MASTER_A2NOC_SNOC		49
+#define MASTER_GNOC_SNOC		50
+#define MASTER_MEM_NOC_SNOC		51
+#define MASTER_ANOC_PCIE_SNOC		52
+#define MASTER_PIMEM			53
+#define MASTER_GIC			54
+#define SLAVE_A1NOC_SNOC		55
+#define SLAVE_SERVICE_A1NOC		56
+#define SLAVE_ANOC_PCIE_A1NOC_SNOC	57
+#define SLAVE_A2NOC_SNOC		58
+#define SLAVE_ANOC_PCIE_SNOC		59
+#define SLAVE_SERVICE_A2NOC		60
+#define SLAVE_CAMNOC_UNCOMP		61
+#define SLAVE_A1NOC_CFG			62
+#define SLAVE_A2NOC_CFG			63
+#define SLAVE_AOP			64
+#define SLAVE_AOSS			65
+#define SLAVE_CAMERA_CFG		66
+#define SLAVE_CLK_CTL			67
+#define SLAVE_CDSP_CFG			68
+#define SLAVE_RBCPR_CX_CFG		69
+#define SLAVE_CRYPTO_0_CFG		70
+#define SLAVE_DCC_CFG			71
+#define SLAVE_CNOC_DDRSS		72
+#define SLAVE_DISPLAY_CFG		73
+#define SLAVE_GLM			74
+#define SLAVE_GFX3D_CFG			75
+#define SLAVE_IMEM_CFG			76
+#define SLAVE_IPA_CFG			77
+#define SLAVE_CNOC_MNOC_CFG		78
+#define SLAVE_PCIE_0_CFG		79
+#define SLAVE_PCIE_1_CFG		80
+#define SLAVE_PDM			81
+#define SLAVE_SOUTH_PHY_CFG		82
+#define SLAVE_PIMEM_CFG			83
+#define SLAVE_PRNG			84
+#define SLAVE_QDSS_CFG			85
+#define SLAVE_BLSP_2			86
+#define SLAVE_BLSP_1			87
+#define SLAVE_SDCC_2			88
+#define SLAVE_SDCC_4			89
+#define SLAVE_SNOC_CFG			90
+#define SLAVE_SPDM_WRAPPER		91
+#define SLAVE_SPSS_CFG			92
+#define SLAVE_TCSR			93
+#define SLAVE_TLMM_NORTH		94
+#define SLAVE_TLMM_SOUTH		95
+#define SLAVE_TSIF			96
+#define SLAVE_UFS_CARD_CFG		97
+#define SLAVE_UFS_MEM_CFG		98
+#define SLAVE_USB3_0			99
+#define SLAVE_USB3_1			100
+#define SLAVE_VENUS_CFG			101
+#define SLAVE_VSENSE_CTRL_CFG		102
+#define SLAVE_CNOC_A2NOC		103
+#define SLAVE_SERVICE_CNOC		104
+#define SLAVE_LLCC_CFG			105
+#define SLAVE_MEM_NOC_CFG		106
+#define SLAVE_GNOC_SNOC			107
+#define SLAVE_GNOC_MEM_NOC		108
+#define SLAVE_SERVICE_GNOC		109
+#define SLAVE_EBI1			110
+#define SLAVE_MSS_PROC_MS_MPU_CFG	111
+#define SLAVE_MEM_NOC_GNOC		112
+#define SLAVE_LLCC			113
+#define SLAVE_MEM_NOC_SNOC		114
+#define SLAVE_SERVICE_MEM_NOC		115
+#define SLAVE_MNOC_SF_MEM_NOC		116
+#define SLAVE_MNOC_HF_MEM_NOC		117
+#define SLAVE_SERVICE_MNOC		118
+#define SLAVE_APPSS			119
+#define SLAVE_SNOC_CNOC			120
+#define SLAVE_SNOC_MEM_NOC_GC		121
+#define SLAVE_SNOC_MEM_NOC_SF		122
+#define SLAVE_IMEM			123
+#define SLAVE_PCIE_0			124
+#define SLAVE_PCIE_1			125
+#define SLAVE_PIMEM			126
+#define SLAVE_SERVICE_SNOC		127
+#define SLAVE_QDSS_STM			128
+#define SLAVE_TCU			129
+
+#endif
-- 
cgit v1.2.3


From 79bf0cbd86ac4887a7ac897fec8f011a763e23ba Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 16 Jan 2019 11:25:20 -0700
Subject: iomap: introduce io{read|write}64_{lo_hi|hi_lo}

In order to provide non-atomic functions for io{read|write}64 that will
use readq and writeq when appropriate. We define a number of variants
of these functions in the generic iomap that will do non-atomic
operations on pio but atomic operations on mmio.

These functions are only defined if readq and writeq are defined. If
they are not, then the wrappers that always use non-atomic operations
from include/linux/io-64-nonatomic*.h will be used.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Suresh Warrier <warrier@linux.vnet.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/include/asm/io.h |   2 +
 include/asm-generic/iomap.h   |  22 +++++++
 lib/iomap.c                   | 132 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 7f19fbd3ba55..4b73847e9b95 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -783,8 +783,10 @@ extern void __iounmap_at(void *ea, unsigned long size);
 
 #define mmio_read16be(addr)		readw_be(addr)
 #define mmio_read32be(addr)		readl_be(addr)
+#define mmio_read64be(addr)		readq_be(addr)
 #define mmio_write16be(val, addr)	writew_be(val, addr)
 #define mmio_write32be(val, addr)	writel_be(val, addr)
+#define mmio_write64be(val, addr)	writeq_be(val, addr)
 #define mmio_insb(addr, dst, count)	readsb(addr, dst, count)
 #define mmio_insw(addr, dst, count)	readsw(addr, dst, count)
 #define mmio_insl(addr, dst, count)	readsl(addr, dst, count)
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 5b63b94ef6b5..a008f504a2d0 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -36,6 +36,17 @@ extern u64 ioread64(void __iomem *);
 extern u64 ioread64be(void __iomem *);
 #endif
 
+#ifdef readq
+#define ioread64_lo_hi ioread64_lo_hi
+#define ioread64_hi_lo ioread64_hi_lo
+#define ioread64be_lo_hi ioread64be_lo_hi
+#define ioread64be_hi_lo ioread64be_hi_lo
+extern u64 ioread64_lo_hi(void __iomem *addr);
+extern u64 ioread64_hi_lo(void __iomem *addr);
+extern u64 ioread64be_lo_hi(void __iomem *addr);
+extern u64 ioread64be_hi_lo(void __iomem *addr);
+#endif
+
 extern void iowrite8(u8, void __iomem *);
 extern void iowrite16(u16, void __iomem *);
 extern void iowrite16be(u16, void __iomem *);
@@ -46,6 +57,17 @@ extern void iowrite64(u64, void __iomem *);
 extern void iowrite64be(u64, void __iomem *);
 #endif
 
+#ifdef writeq
+#define iowrite64_lo_hi iowrite64_lo_hi
+#define iowrite64_hi_lo iowrite64_hi_lo
+#define iowrite64be_lo_hi iowrite64be_lo_hi
+#define iowrite64be_hi_lo iowrite64be_hi_lo
+extern void iowrite64_lo_hi(u64 val, void __iomem *addr);
+extern void iowrite64_hi_lo(u64 val, void __iomem *addr);
+extern void iowrite64be_lo_hi(u64 val, void __iomem *addr);
+extern void iowrite64be_hi_lo(u64 val, void __iomem *addr);
+#endif
+
 /*
  * "string" versions of the above. Note that they
  * use native byte ordering for the accesses (on
diff --git a/lib/iomap.c b/lib/iomap.c
index 2c293b22569f..e909ab71e995 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -67,6 +67,7 @@ static void bad_io_access(unsigned long port, const char *access)
 #ifndef mmio_read16be
 #define mmio_read16be(addr) swab16(readw(addr))
 #define mmio_read32be(addr) swab32(readl(addr))
+#define mmio_read64be(addr) swab64(readq(addr))
 #endif
 
 unsigned int ioread8(void __iomem *addr)
@@ -100,6 +101,80 @@ EXPORT_SYMBOL(ioread16be);
 EXPORT_SYMBOL(ioread32);
 EXPORT_SYMBOL(ioread32be);
 
+#ifdef readq
+static u64 pio_read64_lo_hi(unsigned long port)
+{
+	u64 lo, hi;
+
+	lo = inl(port);
+	hi = inl(port + sizeof(u32));
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64_hi_lo(unsigned long port)
+{
+	u64 lo, hi;
+
+	hi = inl(port + sizeof(u32));
+	lo = inl(port);
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64be_lo_hi(unsigned long port)
+{
+	u64 lo, hi;
+
+	lo = pio_read32be(port + sizeof(u32));
+	hi = pio_read32be(port);
+
+	return lo | (hi << 32);
+}
+
+static u64 pio_read64be_hi_lo(unsigned long port)
+{
+	u64 lo, hi;
+
+	hi = pio_read32be(port);
+	lo = pio_read32be(port + sizeof(u32));
+
+	return lo | (hi << 32);
+}
+
+u64 ioread64_lo_hi(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64_hi_lo(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64_hi_lo(port), return readq(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64be_lo_hi(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64be_lo_hi(port),
+		return mmio_read64be(addr));
+	return 0xffffffffffffffffULL;
+}
+
+u64 ioread64be_hi_lo(void __iomem *addr)
+{
+	IO_COND(addr, return pio_read64be_hi_lo(port),
+		return mmio_read64be(addr));
+	return 0xffffffffffffffffULL;
+}
+
+EXPORT_SYMBOL(ioread64_lo_hi);
+EXPORT_SYMBOL(ioread64_hi_lo);
+EXPORT_SYMBOL(ioread64be_lo_hi);
+EXPORT_SYMBOL(ioread64be_hi_lo);
+
+#endif /* readq */
+
 #ifndef pio_write16be
 #define pio_write16be(val,port) outw(swab16(val),port)
 #define pio_write32be(val,port) outl(swab32(val),port)
@@ -108,6 +183,7 @@ EXPORT_SYMBOL(ioread32be);
 #ifndef mmio_write16be
 #define mmio_write16be(val,port) writew(swab16(val),port)
 #define mmio_write32be(val,port) writel(swab32(val),port)
+#define mmio_write64be(val,port) writeq(swab64(val),port)
 #endif
 
 void iowrite8(u8 val, void __iomem *addr)
@@ -136,6 +212,62 @@ EXPORT_SYMBOL(iowrite16be);
 EXPORT_SYMBOL(iowrite32);
 EXPORT_SYMBOL(iowrite32be);
 
+#ifdef writeq
+static void pio_write64_lo_hi(u64 val, unsigned long port)
+{
+	outl(val, port);
+	outl(val >> 32, port + sizeof(u32));
+}
+
+static void pio_write64_hi_lo(u64 val, unsigned long port)
+{
+	outl(val >> 32, port + sizeof(u32));
+	outl(val, port);
+}
+
+static void pio_write64be_lo_hi(u64 val, unsigned long port)
+{
+	pio_write32be(val, port + sizeof(u32));
+	pio_write32be(val >> 32, port);
+}
+
+static void pio_write64be_hi_lo(u64 val, unsigned long port)
+{
+	pio_write32be(val >> 32, port);
+	pio_write32be(val, port + sizeof(u32));
+}
+
+void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64_lo_hi(val, port),
+		writeq(val, addr));
+}
+
+void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64_hi_lo(val, port),
+		writeq(val, addr));
+}
+
+void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64be_lo_hi(val, port),
+		mmio_write64be(val, addr));
+}
+
+void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+	IO_COND(addr, pio_write64be_hi_lo(val, port),
+		mmio_write64be(val, addr));
+}
+
+EXPORT_SYMBOL(iowrite64_lo_hi);
+EXPORT_SYMBOL(iowrite64_hi_lo);
+EXPORT_SYMBOL(iowrite64be_lo_hi);
+EXPORT_SYMBOL(iowrite64be_hi_lo);
+
+#endif /* readq */
+
 /*
  * These are the "repeat MMIO read/write" functions.
  * Note the "__raw" accesses, since we don't want to
-- 
cgit v1.2.3


From c81d64d3dc1f2decf8f3a9354416b7496b5c389b Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 16 Jan 2019 11:25:21 -0700
Subject: io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros

This patch adds generic io{read|write}64[be]{_lo_hi|_hi_lo} macros if
they are not already defined by the architecture. (As they are provided
by the generic iomap library).

The patch also points io{read|write}64[be] to the variant specified by the
header name.

This is because new drivers are encouraged to use ioreadXX, et al instead
of readX[1], et al -- and mixing ioreadXX with readq is pretty ugly.

[1] LDD3: section 9.4.2

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/io-64-nonatomic-hi-lo.h | 64 +++++++++++++++++++++++++++++++++++
 include/linux/io-64-nonatomic-lo-hi.h | 64 +++++++++++++++++++++++++++++++++++
 2 files changed, 128 insertions(+)

(limited to 'include')

diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h
index 862d786a904f..ae21b72cce85 100644
--- a/include/linux/io-64-nonatomic-hi-lo.h
+++ b/include/linux/io-64-nonatomic-hi-lo.h
@@ -55,4 +55,68 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr)
 #define writeq_relaxed hi_lo_writeq_relaxed
 #endif
 
+#ifndef ioread64_hi_lo
+#define ioread64_hi_lo ioread64_hi_lo
+static inline u64 ioread64_hi_lo(void __iomem *addr)
+{
+	u32 low, high;
+
+	high = ioread32(addr + sizeof(u32));
+	low = ioread32(addr);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef iowrite64_hi_lo
+#define iowrite64_hi_lo iowrite64_hi_lo
+static inline void iowrite64_hi_lo(u64 val, void __iomem *addr)
+{
+	iowrite32(val >> 32, addr + sizeof(u32));
+	iowrite32(val, addr);
+}
+#endif
+
+#ifndef ioread64be_hi_lo
+#define ioread64be_hi_lo ioread64be_hi_lo
+static inline u64 ioread64be_hi_lo(void __iomem *addr)
+{
+	u32 low, high;
+
+	high = ioread32be(addr);
+	low = ioread32be(addr + sizeof(u32));
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef iowrite64be_hi_lo
+#define iowrite64be_hi_lo iowrite64be_hi_lo
+static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr)
+{
+	iowrite32be(val >> 32, addr);
+	iowrite32be(val, addr + sizeof(u32));
+}
+#endif
+
+#ifndef ioread64
+#define ioread64_is_nonatomic
+#define ioread64 ioread64_hi_lo
+#endif
+
+#ifndef iowrite64
+#define iowrite64_is_nonatomic
+#define iowrite64 iowrite64_hi_lo
+#endif
+
+#ifndef ioread64be
+#define ioread64be_is_nonatomic
+#define ioread64be ioread64be_hi_lo
+#endif
+
+#ifndef iowrite64be
+#define iowrite64be_is_nonatomic
+#define iowrite64be iowrite64be_hi_lo
+#endif
+
 #endif	/* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */
diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h
index d042e7bb5adb..faaa842dbdb9 100644
--- a/include/linux/io-64-nonatomic-lo-hi.h
+++ b/include/linux/io-64-nonatomic-lo-hi.h
@@ -55,4 +55,68 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr)
 #define writeq_relaxed lo_hi_writeq_relaxed
 #endif
 
+#ifndef ioread64_lo_hi
+#define ioread64_lo_hi ioread64_lo_hi
+static inline u64 ioread64_lo_hi(void __iomem *addr)
+{
+	u32 low, high;
+
+	low = ioread32(addr);
+	high = ioread32(addr + sizeof(u32));
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef iowrite64_lo_hi
+#define iowrite64_lo_hi iowrite64_lo_hi
+static inline void iowrite64_lo_hi(u64 val, void __iomem *addr)
+{
+	iowrite32(val, addr);
+	iowrite32(val >> 32, addr + sizeof(u32));
+}
+#endif
+
+#ifndef ioread64be_lo_hi
+#define ioread64be_lo_hi ioread64be_lo_hi
+static inline u64 ioread64be_lo_hi(void __iomem *addr)
+{
+	u32 low, high;
+
+	low = ioread32be(addr + sizeof(u32));
+	high = ioread32be(addr);
+
+	return low + ((u64)high << 32);
+}
+#endif
+
+#ifndef iowrite64be_lo_hi
+#define iowrite64be_lo_hi iowrite64be_lo_hi
+static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr)
+{
+	iowrite32be(val, addr + sizeof(u32));
+	iowrite32be(val >> 32, addr);
+}
+#endif
+
+#ifndef ioread64
+#define ioread64_is_nonatomic
+#define ioread64 ioread64_lo_hi
+#endif
+
+#ifndef iowrite64
+#define iowrite64_is_nonatomic
+#define iowrite64 iowrite64_lo_hi
+#endif
+
+#ifndef ioread64be
+#define ioread64be_is_nonatomic
+#define ioread64be ioread64be_lo_hi
+#endif
+
+#ifndef iowrite64be
+#define iowrite64be_is_nonatomic
+#define iowrite64be iowrite64be_lo_hi
+#endif
+
 #endif	/* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */
-- 
cgit v1.2.3


From ec74136ded792deed80780a2f8baf3521eeb72f9 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@android.com>
Date: Mon, 14 Jan 2019 09:10:21 -0800
Subject: binder: create node flag to request sender's security context

To allow servers to verify client identity, allow a node
flag to be set that causes the sender's security context
to be delivered with the transaction. The BR_TRANSACTION
command is extended in BR_TRANSACTION_SEC_CTX to
contain a pointer to the security context string.

Signed-off-by: Todd Kjos <tkjos@google.com>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/android/binder.c            | 106 ++++++++++++++++++++++++++++--------
 include/uapi/linux/android/binder.h |  19 +++++++
 2 files changed, 102 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index cdfc87629efb..5f6ef5e63b91 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -329,6 +329,8 @@ struct binder_error {
  *                        (invariant after initialized)
  * @min_priority:         minimum scheduling priority
  *                        (invariant after initialized)
+ * @txn_security_ctx:     require sender's security context
+ *                        (invariant after initialized)
  * @async_todo:           list of async work items
  *                        (protected by @proc->inner_lock)
  *
@@ -365,6 +367,7 @@ struct binder_node {
 		 * invariant after initialization
 		 */
 		u8 accept_fds:1;
+		u8 txn_security_ctx:1;
 		u8 min_priority;
 	};
 	bool has_async_transaction;
@@ -615,6 +618,7 @@ struct binder_transaction {
 	long	saved_priority;
 	kuid_t	sender_euid;
 	struct list_head fd_fixups;
+	binder_uintptr_t security_ctx;
 	/**
 	 * @lock:  protects @from, @to_proc, and @to_thread
 	 *
@@ -1152,6 +1156,7 @@ static struct binder_node *binder_init_node_ilocked(
 	node->work.type = BINDER_WORK_NODE;
 	node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK;
 	node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS);
+	node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX);
 	spin_lock_init(&node->lock);
 	INIT_LIST_HEAD(&node->work.entry);
 	INIT_LIST_HEAD(&node->async_todo);
@@ -2778,6 +2783,8 @@ static void binder_transaction(struct binder_proc *proc,
 	binder_size_t last_fixup_min_off = 0;
 	struct binder_context *context = proc->context;
 	int t_debug_id = atomic_inc_return(&binder_last_id);
+	char *secctx = NULL;
+	u32 secctx_sz = 0;
 
 	e = binder_transaction_log_add(&binder_transaction_log);
 	e->debug_id = t_debug_id;
@@ -3020,6 +3027,20 @@ static void binder_transaction(struct binder_proc *proc,
 	t->flags = tr->flags;
 	t->priority = task_nice(current);
 
+	if (target_node && target_node->txn_security_ctx) {
+		u32 secid;
+
+		security_task_getsecid(proc->tsk, &secid);
+		ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
+		if (ret) {
+			return_error = BR_FAILED_REPLY;
+			return_error_param = ret;
+			return_error_line = __LINE__;
+			goto err_get_secctx_failed;
+		}
+		extra_buffers_size += ALIGN(secctx_sz, sizeof(u64));
+	}
+
 	trace_binder_transaction(reply, t, target_node);
 
 	t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size,
@@ -3036,6 +3057,19 @@ static void binder_transaction(struct binder_proc *proc,
 		t->buffer = NULL;
 		goto err_binder_alloc_buf_failed;
 	}
+	if (secctx) {
+		size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) +
+				    ALIGN(tr->offsets_size, sizeof(void *)) +
+				    ALIGN(extra_buffers_size, sizeof(void *)) -
+				    ALIGN(secctx_sz, sizeof(u64));
+		char *kptr = t->buffer->data + buf_offset;
+
+		t->security_ctx = (uintptr_t)kptr +
+		    binder_alloc_get_user_buffer_offset(&target_proc->alloc);
+		memcpy(kptr, secctx, secctx_sz);
+		security_release_secctx(secctx, secctx_sz);
+		secctx = NULL;
+	}
 	t->buffer->debug_id = t->debug_id;
 	t->buffer->transaction = t;
 	t->buffer->target_node = target_node;
@@ -3305,6 +3339,9 @@ err_copy_data_failed:
 	t->buffer->transaction = NULL;
 	binder_alloc_free_buf(&target_proc->alloc, t->buffer);
 err_binder_alloc_buf_failed:
+	if (secctx)
+		security_release_secctx(secctx, secctx_sz);
+err_get_secctx_failed:
 	kfree(tcomplete);
 	binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE);
 err_alloc_tcomplete_failed:
@@ -4036,11 +4073,13 @@ retry:
 
 	while (1) {
 		uint32_t cmd;
-		struct binder_transaction_data tr;
+		struct binder_transaction_data_secctx tr;
+		struct binder_transaction_data *trd = &tr.transaction_data;
 		struct binder_work *w = NULL;
 		struct list_head *list = NULL;
 		struct binder_transaction *t = NULL;
 		struct binder_thread *t_from;
+		size_t trsize = sizeof(*trd);
 
 		binder_inner_proc_lock(proc);
 		if (!binder_worklist_empty_ilocked(&thread->todo))
@@ -4240,8 +4279,8 @@ retry:
 		if (t->buffer->target_node) {
 			struct binder_node *target_node = t->buffer->target_node;
 
-			tr.target.ptr = target_node->ptr;
-			tr.cookie =  target_node->cookie;
+			trd->target.ptr = target_node->ptr;
+			trd->cookie =  target_node->cookie;
 			t->saved_priority = task_nice(current);
 			if (t->priority < target_node->min_priority &&
 			    !(t->flags & TF_ONE_WAY))
@@ -4251,22 +4290,23 @@ retry:
 				binder_set_nice(target_node->min_priority);
 			cmd = BR_TRANSACTION;
 		} else {
-			tr.target.ptr = 0;
-			tr.cookie = 0;
+			trd->target.ptr = 0;
+			trd->cookie = 0;
 			cmd = BR_REPLY;
 		}
-		tr.code = t->code;
-		tr.flags = t->flags;
-		tr.sender_euid = from_kuid(current_user_ns(), t->sender_euid);
+		trd->code = t->code;
+		trd->flags = t->flags;
+		trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid);
 
 		t_from = binder_get_txn_from(t);
 		if (t_from) {
 			struct task_struct *sender = t_from->proc->tsk;
 
-			tr.sender_pid = task_tgid_nr_ns(sender,
-							task_active_pid_ns(current));
+			trd->sender_pid =
+				task_tgid_nr_ns(sender,
+						task_active_pid_ns(current));
 		} else {
-			tr.sender_pid = 0;
+			trd->sender_pid = 0;
 		}
 
 		ret = binder_apply_fd_fixups(t);
@@ -4297,15 +4337,20 @@ retry:
 			}
 			continue;
 		}
-		tr.data_size = t->buffer->data_size;
-		tr.offsets_size = t->buffer->offsets_size;
-		tr.data.ptr.buffer = (binder_uintptr_t)
+		trd->data_size = t->buffer->data_size;
+		trd->offsets_size = t->buffer->offsets_size;
+		trd->data.ptr.buffer = (binder_uintptr_t)
 			((uintptr_t)t->buffer->data +
 			binder_alloc_get_user_buffer_offset(&proc->alloc));
-		tr.data.ptr.offsets = tr.data.ptr.buffer +
+		trd->data.ptr.offsets = trd->data.ptr.buffer +
 					ALIGN(t->buffer->data_size,
 					    sizeof(void *));
 
+		tr.secctx = t->security_ctx;
+		if (t->security_ctx) {
+			cmd = BR_TRANSACTION_SEC_CTX;
+			trsize = sizeof(tr);
+		}
 		if (put_user(cmd, (uint32_t __user *)ptr)) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
@@ -4316,7 +4361,7 @@ retry:
 			return -EFAULT;
 		}
 		ptr += sizeof(uint32_t);
-		if (copy_to_user(ptr, &tr, sizeof(tr))) {
+		if (copy_to_user(ptr, &tr, trsize)) {
 			if (t_from)
 				binder_thread_dec_tmpref(t_from);
 
@@ -4325,7 +4370,7 @@ retry:
 
 			return -EFAULT;
 		}
-		ptr += sizeof(tr);
+		ptr += trsize;
 
 		trace_binder_transaction_received(t);
 		binder_stat_br(proc, thread, cmd);
@@ -4333,16 +4378,18 @@ retry:
 			     "%d:%d %s %d %d:%d, cmd %d size %zd-%zd ptr %016llx-%016llx\n",
 			     proc->pid, thread->pid,
 			     (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" :
-			     "BR_REPLY",
+				(cmd == BR_TRANSACTION_SEC_CTX) ?
+				     "BR_TRANSACTION_SEC_CTX" : "BR_REPLY",
 			     t->debug_id, t_from ? t_from->proc->pid : 0,
 			     t_from ? t_from->pid : 0, cmd,
 			     t->buffer->data_size, t->buffer->offsets_size,
-			     (u64)tr.data.ptr.buffer, (u64)tr.data.ptr.offsets);
+			     (u64)trd->data.ptr.buffer,
+			     (u64)trd->data.ptr.offsets);
 
 		if (t_from)
 			binder_thread_dec_tmpref(t_from);
 		t->buffer->allow_user_free = 1;
-		if (cmd == BR_TRANSACTION && !(t->flags & TF_ONE_WAY)) {
+		if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) {
 			binder_inner_proc_lock(thread->proc);
 			t->to_parent = thread->transaction_stack;
 			t->to_thread = thread;
@@ -4690,7 +4737,8 @@ out:
 	return ret;
 }
 
-static int binder_ioctl_set_ctx_mgr(struct file *filp)
+static int binder_ioctl_set_ctx_mgr(struct file *filp,
+				    struct flat_binder_object *fbo)
 {
 	int ret = 0;
 	struct binder_proc *proc = filp->private_data;
@@ -4719,7 +4767,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp)
 	} else {
 		context->binder_context_mgr_uid = curr_euid;
 	}
-	new_node = binder_new_node(proc, NULL);
+	new_node = binder_new_node(proc, fbo);
 	if (!new_node) {
 		ret = -ENOMEM;
 		goto out;
@@ -4842,8 +4890,20 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		binder_inner_proc_unlock(proc);
 		break;
 	}
+	case BINDER_SET_CONTEXT_MGR_EXT: {
+		struct flat_binder_object fbo;
+
+		if (copy_from_user(&fbo, ubuf, sizeof(fbo))) {
+			ret = -EINVAL;
+			goto err;
+		}
+		ret = binder_ioctl_set_ctx_mgr(filp, &fbo);
+		if (ret)
+			goto err;
+		break;
+	}
 	case BINDER_SET_CONTEXT_MGR:
-		ret = binder_ioctl_set_ctx_mgr(filp);
+		ret = binder_ioctl_set_ctx_mgr(filp, NULL);
 		if (ret)
 			goto err;
 		break;
diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h
index b9ba520f7e4b..2832134e5397 100644
--- a/include/uapi/linux/android/binder.h
+++ b/include/uapi/linux/android/binder.h
@@ -41,6 +41,14 @@ enum {
 enum {
 	FLAT_BINDER_FLAG_PRIORITY_MASK = 0xff,
 	FLAT_BINDER_FLAG_ACCEPTS_FDS = 0x100,
+
+	/**
+	 * @FLAT_BINDER_FLAG_TXN_SECURITY_CTX: request security contexts
+	 *
+	 * Only when set, causes senders to include their security
+	 * context
+	 */
+	FLAT_BINDER_FLAG_TXN_SECURITY_CTX = 0x1000,
 };
 
 #ifdef BINDER_IPC_32BIT
@@ -218,6 +226,7 @@ struct binder_node_info_for_ref {
 #define BINDER_VERSION			_IOWR('b', 9, struct binder_version)
 #define BINDER_GET_NODE_DEBUG_INFO	_IOWR('b', 11, struct binder_node_debug_info)
 #define BINDER_GET_NODE_INFO_FOR_REF	_IOWR('b', 12, struct binder_node_info_for_ref)
+#define BINDER_SET_CONTEXT_MGR_EXT	_IOW('b', 13, struct flat_binder_object)
 
 /*
  * NOTE: Two special error codes you should check for when calling
@@ -276,6 +285,11 @@ struct binder_transaction_data {
 	} data;
 };
 
+struct binder_transaction_data_secctx {
+	struct binder_transaction_data transaction_data;
+	binder_uintptr_t secctx;
+};
+
 struct binder_transaction_data_sg {
 	struct binder_transaction_data transaction_data;
 	binder_size_t buffers_size;
@@ -311,6 +325,11 @@ enum binder_driver_return_protocol {
 	BR_OK = _IO('r', 1),
 	/* No parameters! */
 
+	BR_TRANSACTION_SEC_CTX = _IOR('r', 2,
+				      struct binder_transaction_data_secctx),
+	/*
+	 * binder_transaction_data_secctx: the received command.
+	 */
 	BR_TRANSACTION = _IOR('r', 2, struct binder_transaction_data),
 	BR_REPLY = _IOR('r', 3, struct binder_transaction_data),
 	/*
-- 
cgit v1.2.3


From 51c48b310183ab6ba5419edfc6a8de889cc04521 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Sat, 19 Jan 2019 11:35:04 -0600
Subject: PCI: Probe bridge window attributes once at enumeration-time

pci_bridge_check_ranges() determines whether a bridge supports the optional
I/O and prefetchable memory windows and sets the flag bits in the bridge
resources.  This *could* be done once during enumeration except that the
resource allocation code completely clears the flag bits, e.g., in the
pci_assign_unassigned_bridge_resources() path.

The problem with pci_bridge_check_ranges() in the resource allocation path
is that we may allocate resources after devices have been claimed by
drivers, and pci_bridge_check_ranges() *changes* the window registers to
determine whether they're writable.  This may break concurrent accesses to
devices behind the bridge.

Add a new pci_read_bridge_windows() to determine whether a bridge supports
the optional windows, call it once during enumeration, remember the
results, and change pci_bridge_check_ranges() so it doesn't touch the
bridge windows but sets the flag bits based on those remembered results.

Link: https://lore.kernel.org/linux-pci/1506151482-113560-1-git-send-email-wangzhou1@hisilicon.com
Link: https://lists.gnu.org/archive/html/qemu-devel/2018-12/msg02082.html
Reported-by: Yandong Xu <xuyandong2@huawei.com>
Tested-by: Yandong Xu <xuyandong2@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Ofer Hayut <ofer@lightbitslabs.com>
Cc: Roy Shterman <roys@lightbitslabs.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Zhou Wang <wangzhou1@hisilicon.com>
---
 drivers/pci/probe.c     | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/setup-bus.c | 45 ++++--------------------------------------
 include/linux/pci.h     |  3 +++
 3 files changed, 59 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 257b9f6f2ebb..2ef8b954c65a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -348,6 +348,57 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
 	}
 }
 
+static void pci_read_bridge_windows(struct pci_dev *bridge)
+{
+	u16 io;
+	u32 pmem, tmp;
+
+	pci_read_config_word(bridge, PCI_IO_BASE, &io);
+	if (!io) {
+		pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0);
+		pci_read_config_word(bridge, PCI_IO_BASE, &io);
+		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
+	}
+	if (io)
+		bridge->io_window = 1;
+
+	/*
+	 * DECchip 21050 pass 2 errata: the bridge may miss an address
+	 * disconnect boundary by one PCI data phase.  Workaround: do not
+	 * use prefetching on this device.
+	 */
+	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
+		return;
+
+	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+	if (!pmem) {
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
+					       0xffe0fff0);
+		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
+		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
+	}
+	if (!pmem)
+		return;
+
+	bridge->pref_window = 1;
+
+	if ((pmem & PCI_PREF_RANGE_TYPE_MASK) == PCI_PREF_RANGE_TYPE_64) {
+
+		/*
+		 * Bridge claims to have a 64-bit prefetchable memory
+		 * window; verify that the upper bits are actually
+		 * writable.
+		 */
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &pmem);
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
+				       0xffffffff);
+		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
+		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32, pmem);
+		if (tmp)
+			bridge->pref_64_window = 1;
+	}
+}
+
 static void pci_read_bridge_io(struct pci_bus *child)
 {
 	struct pci_dev *dev = child->self;
@@ -1739,6 +1790,7 @@ int pci_setup_device(struct pci_dev *dev)
 		pci_read_irq(dev);
 		dev->transparent = ((dev->class & 0xff) == 1);
 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+		pci_read_bridge_windows(dev);
 		set_pcie_hotplug_bridge(dev);
 		pos = pci_find_capability(dev, PCI_CAP_ID_SSVID);
 		if (pos) {
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index ed960436df5e..1941bb0a6c13 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -735,58 +735,21 @@ int pci_claim_bridge_resource(struct pci_dev *bridge, int i)
    base/limit registers must be read-only and read as 0. */
 static void pci_bridge_check_ranges(struct pci_bus *bus)
 {
-	u16 io;
-	u32 pmem;
 	struct pci_dev *bridge = bus->self;
-	struct resource *b_res;
+	struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
 
-	b_res = &bridge->resource[PCI_BRIDGE_RESOURCES];
 	b_res[1].flags |= IORESOURCE_MEM;
 
-	pci_read_config_word(bridge, PCI_IO_BASE, &io);
-	if (!io) {
-		pci_write_config_word(bridge, PCI_IO_BASE, 0xe0f0);
-		pci_read_config_word(bridge, PCI_IO_BASE, &io);
-		pci_write_config_word(bridge, PCI_IO_BASE, 0x0);
-	}
-	if (io)
+	if (bridge->io_window)
 		b_res[0].flags |= IORESOURCE_IO;
 
-	/*  DECchip 21050 pass 2 errata: the bridge may miss an address
-	    disconnect boundary by one PCI data phase.
-	    Workaround: do not use prefetching on this device. */
-	if (bridge->vendor == PCI_VENDOR_ID_DEC && bridge->device == 0x0001)
-		return;
-
-	pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
-	if (!pmem) {
-		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE,
-					       0xffe0fff0);
-		pci_read_config_dword(bridge, PCI_PREF_MEMORY_BASE, &pmem);
-		pci_write_config_dword(bridge, PCI_PREF_MEMORY_BASE, 0x0);
-	}
-	if (pmem) {
+	if (bridge->pref_window) {
 		b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if ((pmem & PCI_PREF_RANGE_TYPE_MASK) ==
-		    PCI_PREF_RANGE_TYPE_64) {
+		if (bridge->pref_64_window) {
 			b_res[2].flags |= IORESOURCE_MEM_64;
 			b_res[2].flags |= PCI_PREF_RANGE_TYPE_64;
 		}
 	}
-
-	/* double check if bridge does support 64 bit pref */
-	if (b_res[2].flags & IORESOURCE_MEM_64) {
-		u32 mem_base_hi, tmp;
-		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32,
-					 &mem_base_hi);
-		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
-					       0xffffffff);
-		pci_read_config_dword(bridge, PCI_PREF_BASE_UPPER32, &tmp);
-		if (!tmp)
-			b_res[2].flags &= ~IORESOURCE_MEM_64;
-		pci_write_config_dword(bridge, PCI_PREF_BASE_UPPER32,
-				       mem_base_hi);
-	}
 }
 
 /* Helper function for sizing routines: find first available
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 65f1d8c2f082..40b327b814aa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -373,6 +373,9 @@ struct pci_dev {
 	bool		match_driver;		/* Skip attaching driver */
 
 	unsigned int	transparent:1;		/* Subtractive decode bridge */
+	unsigned int	io_window:1;		/* Bridge has I/O window */
+	unsigned int	pref_window:1;		/* Bridge has pref mem window */
+	unsigned int	pref_64_window:1;	/* Pref mem window is 64-bit */
 	unsigned int	multifunction:1;	/* Multi-function device */
 
 	unsigned int	is_busmaster:1;		/* Is busmaster */
-- 
cgit v1.2.3


From 856c395cfa63b94a1d8215182f0243c222f6f927 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 17 Jan 2019 23:27:11 -0800
Subject: net: introduce a knob to control whether to inherit devconf config

There have been many people complaining about the inconsistent
behaviors of IPv4 and IPv6 devconf when creating new network
namespaces.  Currently, for IPv4, we inherit all current settings
from init_net, but for IPv6 we reset all setting to default.

This patch introduces a new /proc file
/proc/sys/net/core/devconf_inherit_init_net to control the
behavior of whether to inhert sysctl current settings from init_net.
This file itself is only available in init_net.

As demonstrated below:

Initial setup in init_net:
 # cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # cat /proc/sys/net/ipv6/conf/all/accept_dad
 1

Default value 0 (current behavior):
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 0

Set to 1 (inherit from init_net):
 # echo 1 > /proc/sys/net/core/devconf_inherit_init_net
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 2
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 1

Set to 2 (reset to default):
 # echo 2 > /proc/sys/net/core/devconf_inherit_init_net
 # ip netns del test
 # ip netns add test
 # ip netns exec test cat /proc/sys/net/ipv4/conf/all/rp_filter
 0
 # ip netns exec test cat /proc/sys/net/ipv6/conf/all/accept_dad
 0

Set to a value out of range (invalid):
 # echo 3 > /proc/sys/net/core/devconf_inherit_init_net
 -bash: echo: write error: Invalid argument
 # echo -1 > /proc/sys/net/core/devconf_inherit_init_net
 -bash: echo: write error: Invalid argument

Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
Reported-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt | 14 ++++++++++++++
 include/linux/netdevice.h    |  1 +
 net/core/sysctl_net_core.c   | 18 ++++++++++++++++++
 net/ipv4/devinet.c           | 43 ++++++++++++++++++++-----------------------
 net/ipv6/addrconf.c          |  5 +++++
 5 files changed, 58 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 2793d4eac55f..bc0680706870 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -291,6 +291,20 @@ user space is responsible for creating them if needed.
 
 Default : 0  (for compatibility reasons)
 
+devconf_inherit_init_net
+----------------------------
+
+Controls if a new network namespace should inherit all current
+settings under /proc/sys/net/{ipv4,ipv6}/conf/{all,default}/. By
+default, we keep the current behavior: for IPv4 we inherit all current
+settings from init_net and for IPv6 we reset all settings to default.
+
+If set to 1, both IPv4 and IPv6 settings are forced to inherit from
+current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
+forced to reset to their default values.
+
+Default : 0  (for compatibility reasons)
+
 2. /proc/sys/net/unix - Parameters for Unix domain sockets
 -------------------------------------------------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a57b9a853aab..e675ef97a426 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -630,6 +630,7 @@ struct netdev_queue {
 } ____cacheline_aligned_in_smp;
 
 extern int sysctl_fb_tunnels_only_for_init_net;
+extern int sysctl_devconf_inherit_init_net;
 
 static inline bool net_has_fallback_tunnels(const struct net *net)
 {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index d67ec17f2cc8..84bf2861f45f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -36,6 +36,15 @@ static int net_msg_warn;	/* Unused, but still a sysctl */
 int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0;
 EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
 
+/* 0 - Keep current behavior:
+ *     IPv4: inherit all current settings from init_net
+ *     IPv6: reset all settings to default
+ * 1 - Both inherit all current settings from init_net
+ * 2 - Both reset all settings to default
+ */
+int sysctl_devconf_inherit_init_net __read_mostly;
+EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
+
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
@@ -544,6 +553,15 @@ static struct ctl_table net_core_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "devconf_inherit_init_net",
+		.data		= &sysctl_devconf_inherit_init_net,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &two,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index cd027639df2f..cd9033245b98 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2591,32 +2591,32 @@ static __net_init int devinet_init_net(struct net *net)
 	int err;
 	struct ipv4_devconf *all, *dflt;
 #ifdef CONFIG_SYSCTL
-	struct ctl_table *tbl = ctl_forward_entry;
+	struct ctl_table *tbl;
 	struct ctl_table_header *forw_hdr;
 #endif
 
 	err = -ENOMEM;
-	all = &ipv4_devconf;
-	dflt = &ipv4_devconf_dflt;
-
-	if (!net_eq(net, &init_net)) {
-		all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
-		if (!all)
-			goto err_alloc_all;
+	all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
+	if (!all)
+		goto err_alloc_all;
 
-		dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
-		if (!dflt)
-			goto err_alloc_dflt;
+	dflt = kmemdup(&ipv4_devconf_dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
+	if (!dflt)
+		goto err_alloc_dflt;
 
 #ifdef CONFIG_SYSCTL
-		tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
-		if (!tbl)
-			goto err_alloc_ctl;
+	tbl = kmemdup(ctl_forward_entry, sizeof(ctl_forward_entry), GFP_KERNEL);
+	if (!tbl)
+		goto err_alloc_ctl;
 
-		tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
-		tbl[0].extra1 = all;
-		tbl[0].extra2 = net;
+	tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
+	tbl[0].extra1 = all;
+	tbl[0].extra2 = net;
 #endif
+
+	if (sysctl_devconf_inherit_init_net != 2 && !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
+		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
 	}
 
 #ifdef CONFIG_SYSCTL
@@ -2646,15 +2646,12 @@ err_reg_ctl:
 err_reg_dflt:
 	__devinet_sysctl_unregister(net, all, NETCONFA_IFINDEX_ALL);
 err_reg_all:
-	if (tbl != ctl_forward_entry)
-		kfree(tbl);
+	kfree(tbl);
 err_alloc_ctl:
 #endif
-	if (dflt != &ipv4_devconf_dflt)
-		kfree(dflt);
+	kfree(dflt);
 err_alloc_dflt:
-	if (all != &ipv4_devconf)
-		kfree(all);
+	kfree(all);
 err_alloc_all:
 	return err;
 }
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 57198b3c86da..48cd36311901 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6902,6 +6902,11 @@ static int __net_init addrconf_init_net(struct net *net)
 	if (!dflt)
 		goto err_alloc_dflt;
 
+	if (sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) {
+		memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf));
+		memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt));
+	}
+
 	/* these will be inherited by all namespaces */
 	dflt->autoconf = ipv6_defaults.autoconf;
 	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
-- 
cgit v1.2.3


From 9b56e0d0cc6353865c5bbc41a18161215063fd33 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 11 Jan 2019 15:22:30 +0100
Subject: MIPS: ath79: add helpers for setting clocks and expose the ref clock

Preparation for transitioning the legacy clock setup code over
to OF.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: John Crispin <john@phrozen.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/ath79/clock.c               | 128 ++++++++++++++++++----------------
 include/dt-bindings/clock/ath79-clk.h |   3 +-
 2 files changed, 68 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index cf9158e3c2d9..50bc3b01a4c4 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -37,20 +37,46 @@ static struct clk_onecell_data clk_data = {
 	.clk_num = ARRAY_SIZE(clks),
 };
 
-static struct clk *__init ath79_add_sys_clkdev(
-	const char *id, unsigned long rate)
+static const char * const clk_names[ATH79_CLK_END] = {
+	[ATH79_CLK_CPU] = "cpu",
+	[ATH79_CLK_DDR] = "ddr",
+	[ATH79_CLK_AHB] = "ahb",
+	[ATH79_CLK_REF] = "ref",
+};
+
+static const char * __init ath79_clk_name(int type)
 {
-	struct clk *clk;
-	int err;
+	BUG_ON(type >= ARRAY_SIZE(clk_names) || !clk_names[type]);
+	return clk_names[type];
+}
 
-	clk = clk_register_fixed_rate(NULL, id, NULL, 0, rate);
+static void __init __ath79_set_clk(int type, const char *name, struct clk *clk)
+{
 	if (IS_ERR(clk))
-		panic("failed to allocate %s clock structure", id);
+		panic("failed to allocate %s clock structure", clk_names[type]);
 
-	err = clk_register_clkdev(clk, id, NULL);
-	if (err)
-		panic("unable to register %s clock device", id);
+	clks[type] = clk;
+	clk_register_clkdev(clk, name, NULL);
+}
 
+static struct clk * __init ath79_set_clk(int type, unsigned long rate)
+{
+	const char *name = ath79_clk_name(type);
+	struct clk *clk;
+
+	clk = clk_register_fixed_rate(NULL, name, NULL, 0, rate);
+	__ath79_set_clk(type, name, clk);
+	return clk;
+}
+
+static struct clk * __init ath79_set_ff_clk(int type, const char *parent,
+					    unsigned int mult, unsigned int div)
+{
+	const char *name = ath79_clk_name(type);
+	struct clk *clk;
+
+	clk = clk_register_fixed_factor(NULL, name, parent, 0, mult, div);
+	__ath79_set_clk(type, name, clk);
 	return clk;
 }
 
@@ -80,27 +106,15 @@ static void __init ar71xx_clocks_init(void)
 	div = (((pll >> AR71XX_AHB_DIV_SHIFT) & AR71XX_AHB_DIV_MASK) + 1) * 2;
 	ahb_rate = cpu_rate / div;
 
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	ath79_set_clk(ATH79_CLK_REF, ref_rate);
+	ath79_set_clk(ATH79_CLK_CPU, cpu_rate);
+	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
+	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
-static struct clk * __init ath79_reg_ffclk(const char *name,
-		const char *parent_name, unsigned int mult, unsigned int div)
-{
-	struct clk *clk;
-
-	clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div);
-	if (IS_ERR(clk))
-		panic("failed to allocate %s clock structure", name);
-
-	return clk;
-}
-
 static void __init ar724x_clk_init(struct clk *ref_clk, void __iomem *pll_base)
 {
 	u32 pll;
@@ -114,24 +128,19 @@ static void __init ar724x_clk_init(struct clk *ref_clk, void __iomem *pll_base)
 	ddr_div = ((pll >> AR724X_DDR_DIV_SHIFT) & AR724X_DDR_DIV_MASK) + 1;
 	ahb_div = (((pll >> AR724X_AHB_DIV_SHIFT) & AR724X_AHB_DIV_MASK) + 1) * 2;
 
-	clks[ATH79_CLK_CPU] = ath79_reg_ffclk("cpu", "ref", mult, div);
-	clks[ATH79_CLK_DDR] = ath79_reg_ffclk("ddr", "ref", mult, div * ddr_div);
-	clks[ATH79_CLK_AHB] = ath79_reg_ffclk("ahb", "ref", mult, div * ahb_div);
+	ath79_set_ff_clk(ATH79_CLK_CPU, "ref", mult, div);
+	ath79_set_ff_clk(ATH79_CLK_DDR, "ref", mult, div * ddr_div);
+	ath79_set_ff_clk(ATH79_CLK_AHB, "ref", mult, div * ahb_div);
 }
 
 static void __init ar724x_clocks_init(void)
 {
 	struct clk *ref_clk;
 
-	ref_clk = ath79_add_sys_clkdev("ref", AR724X_BASE_FREQ);
+	ref_clk = ath79_set_clk(ATH79_CLK_REF, AR724X_BASE_FREQ);
 
 	ar724x_clk_init(ref_clk, ath79_pll_base);
 
-	/* just make happy plat_time_init() from arch/mips/ath79/setup.c */
-	clk_register_clkdev(clks[ATH79_CLK_CPU], "cpu", NULL);
-	clk_register_clkdev(clks[ATH79_CLK_DDR], "ddr", NULL);
-	clk_register_clkdev(clks[ATH79_CLK_AHB], "ahb", NULL);
-
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ahb", NULL);
 }
@@ -186,12 +195,12 @@ static void __init ar9330_clk_init(struct clk *ref_clk, void __iomem *pll_base)
 		     AR933X_PLL_CLOCK_CTRL_AHB_DIV_MASK) + 1;
 	}
 
-	clks[ATH79_CLK_CPU] = ath79_reg_ffclk("cpu", "ref",
-					ninit_mul, ref_div * out_div * cpu_div);
-	clks[ATH79_CLK_DDR] = ath79_reg_ffclk("ddr", "ref",
-					ninit_mul, ref_div * out_div * ddr_div);
-	clks[ATH79_CLK_AHB] = ath79_reg_ffclk("ahb", "ref",
-					ninit_mul, ref_div * out_div * ahb_div);
+	ath79_set_ff_clk(ATH79_CLK_CPU, "ref", ninit_mul,
+			 ref_div * out_div * cpu_div);
+	ath79_set_ff_clk(ATH79_CLK_DDR, "ref", ninit_mul,
+			 ref_div * out_div * ddr_div);
+	ath79_set_ff_clk(ATH79_CLK_AHB, "ref", ninit_mul,
+			 ref_div * out_div * ahb_div);
 }
 
 static void __init ar933x_clocks_init(void)
@@ -206,15 +215,10 @@ static void __init ar933x_clocks_init(void)
 	else
 		ref_rate = (25 * 1000 * 1000);
 
-	ref_clk = ath79_add_sys_clkdev("ref", ref_rate);
+	ref_clk = ath79_set_clk(ATH79_CLK_REF, ref_rate);
 
 	ar9330_clk_init(ref_clk, ath79_pll_base);
 
-	/* just make happy plat_time_init() from arch/mips/ath79/setup.c */
-	clk_register_clkdev(clks[ATH79_CLK_CPU], "cpu", NULL);
-	clk_register_clkdev(clks[ATH79_CLK_DDR], "ddr", NULL);
-	clk_register_clkdev(clks[ATH79_CLK_AHB], "ahb", NULL);
-
 	clk_add_alias("wdt", NULL, "ahb", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
 }
@@ -344,10 +348,10 @@ static void __init ar934x_clocks_init(void)
 	else
 		ahb_rate = cpu_pll / (postdiv + 1);
 
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	ath79_set_clk(ATH79_CLK_REF, ref_rate);
+	ath79_set_clk(ATH79_CLK_CPU, cpu_rate);
+	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
+	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -431,10 +435,10 @@ static void __init qca953x_clocks_init(void)
 	else
 		ahb_rate = cpu_pll / (postdiv + 1);
 
-	ath79_add_sys_clkdev("ref", ref_rate);
-	ath79_add_sys_clkdev("cpu", cpu_rate);
-	ath79_add_sys_clkdev("ddr", ddr_rate);
-	ath79_add_sys_clkdev("ahb", ahb_rate);
+	ath79_set_clk(ATH79_CLK_REF, ref_rate);
+	ath79_set_clk(ATH79_CLK_CPU, cpu_rate);
+	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
+	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -516,10 +520,10 @@ static void __init qca955x_clocks_init(void)
 	else
 		ahb_rate = cpu_pll / (postdiv + 1);
 
-	ath79_add_sys_clkdev("ref", ref_rate);
-	clks[ATH79_CLK_CPU] = ath79_add_sys_clkdev("cpu", cpu_rate);
-	clks[ATH79_CLK_DDR] = ath79_add_sys_clkdev("ddr", ddr_rate);
-	clks[ATH79_CLK_AHB] = ath79_add_sys_clkdev("ahb", ahb_rate);
+	ath79_set_clk(ATH79_CLK_REF, ref_rate);
+	ath79_set_clk(ATH79_CLK_CPU, cpu_rate);
+	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
+	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
@@ -620,10 +624,10 @@ static void __init qca956x_clocks_init(void)
 	else
 		ahb_rate = cpu_pll / (postdiv + 1);
 
-	ath79_add_sys_clkdev("ref", ref_rate);
-	ath79_add_sys_clkdev("cpu", cpu_rate);
-	ath79_add_sys_clkdev("ddr", ddr_rate);
-	ath79_add_sys_clkdev("ahb", ahb_rate);
+	ath79_set_clk(ATH79_CLK_REF, ref_rate);
+	ath79_set_clk(ATH79_CLK_CPU, cpu_rate);
+	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
+	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
 	clk_add_alias("wdt", NULL, "ref", NULL);
 	clk_add_alias("uart", NULL, "ref", NULL);
diff --git a/include/dt-bindings/clock/ath79-clk.h b/include/dt-bindings/clock/ath79-clk.h
index 27359ad83904..262d7c5eb248 100644
--- a/include/dt-bindings/clock/ath79-clk.h
+++ b/include/dt-bindings/clock/ath79-clk.h
@@ -13,7 +13,8 @@
 #define ATH79_CLK_CPU		0
 #define ATH79_CLK_DDR		1
 #define ATH79_CLK_AHB		2
+#define ATH79_CLK_REF		3
 
-#define ATH79_CLK_END		3
+#define ATH79_CLK_END		4
 
 #endif /* __DT_BINDINGS_ATH79_CLK_H */
-- 
cgit v1.2.3


From 6810ed320ec621b37c395be67125f1f456f1e55a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Fri, 11 Jan 2019 15:22:35 +0100
Subject: MIPS: ath79: export switch MDIO reference clock

On AR934x, the MDIO reference clock can be configured to a fixed 100 MHz
clock. If that feature is not used, it defaults to the main reference
clock, like on all other SoC.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: John Crispin <john@phrozen.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: linux-mips@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: devicetree@vger.kernel.org
---
 arch/mips/ath79/clock.c               | 8 ++++++++
 include/dt-bindings/clock/ath79-clk.h | 3 ++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index c234818b30e1..699f00f096cb 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -42,6 +42,7 @@ static const char * const clk_names[ATH79_CLK_END] = {
 	[ATH79_CLK_DDR] = "ddr",
 	[ATH79_CLK_AHB] = "ahb",
 	[ATH79_CLK_REF] = "ref",
+	[ATH79_CLK_MDIO] = "mdio",
 };
 
 static const char * __init ath79_clk_name(int type)
@@ -342,6 +343,10 @@ static void __init ar934x_clocks_init(void __iomem *pll_base)
 	ath79_set_clk(ATH79_CLK_DDR, ddr_rate);
 	ath79_set_clk(ATH79_CLK_AHB, ahb_rate);
 
+	clk_ctrl = __raw_readl(pll_base + AR934X_PLL_SWITCH_CLOCK_CONTROL_REG);
+	if (clk_ctrl & AR934X_PLL_SWITCH_CLOCK_CONTROL_MDIO_CLK_SEL)
+		ath79_set_clk(ATH79_CLK_MDIO, 100 * 1000 * 1000);
+
 	iounmap(dpll_base);
 }
 
@@ -698,6 +703,9 @@ static void __init ath79_clocks_init_dt(struct device_node *np)
 	else if (of_device_is_compatible(np, "qca,qca9560-pll"))
 		qca956x_clocks_init(pll_base);
 
+	if (!clks[ATH79_CLK_MDIO])
+		clks[ATH79_CLK_MDIO] = clks[ATH79_CLK_REF];
+
 	if (of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data)) {
 		pr_err("%pOF: could not register clk provider\n", np);
 		goto err_iounmap;
diff --git a/include/dt-bindings/clock/ath79-clk.h b/include/dt-bindings/clock/ath79-clk.h
index 262d7c5eb248..dcc679a7ad85 100644
--- a/include/dt-bindings/clock/ath79-clk.h
+++ b/include/dt-bindings/clock/ath79-clk.h
@@ -14,7 +14,8 @@
 #define ATH79_CLK_DDR		1
 #define ATH79_CLK_AHB		2
 #define ATH79_CLK_REF		3
+#define ATH79_CLK_MDIO		4
 
-#define ATH79_CLK_END		4
+#define ATH79_CLK_END		5
 
 #endif /* __DT_BINDINGS_ATH79_CLK_H */
-- 
cgit v1.2.3


From 267c095aa2d9126059c1f5a65c660d5a71833e3f Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 18 Jan 2019 14:30:22 +0200
Subject: bonding: add 3ad stats

Count the following types of 3ad packets per slave:
 - rx/tx lacpdu
 - rx/tx marker
 - rx/tx marker response
 - rx illegal lacpdus (right now counted on wrong length)
 - rx unknown lacpdu type
 - rx unknown marker type

The counters are using atomic64 since this is not fast path.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c | 28 +++++++++++++++++++++++++++-
 include/net/bond_3ad.h         | 14 ++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d8ef2350c68d..d1d8cb6b8cdc 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -851,6 +851,8 @@ static int ad_lacpdu_send(struct port *port)
 	if (!skb)
 		return -ENOMEM;
 
+	atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx);
+
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
 	skb->network_header = skb->mac_header + ETH_HLEN;
@@ -892,6 +894,15 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
 	if (!skb)
 		return -ENOMEM;
 
+	switch (marker->tlv_type) {
+	case AD_MARKER_INFORMATION_SUBTYPE:
+		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx);
+		break;
+	case AD_MARKER_RESPONSE_SUBTYPE:
+		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx);
+		break;
+	}
+
 	skb_reserve(skb, 16);
 
 	skb->dev = slave->dev;
@@ -1086,6 +1097,9 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 	 */
 	last_state = port->sm_rx_state;
 
+	if (lacpdu)
+		atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx);
+
 	/* check if state machine should change state */
 
 	/* first, check if port was reinitialized */
@@ -1922,6 +1936,8 @@ static void ad_marker_info_received(struct bond_marker *marker_info,
 {
 	struct bond_marker marker;
 
+	atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx);
+
 	/* copy the received marker data to the response marker */
 	memcpy(&marker, marker_info, sizeof(struct bond_marker));
 	/* change the marker subtype to marker response */
@@ -1946,6 +1962,8 @@ static void ad_marker_info_received(struct bond_marker *marker_info,
 static void ad_marker_response_received(struct bond_marker *marker,
 					struct port *port)
 {
+	atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx);
+
 	/* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */
 }
 
@@ -2358,6 +2376,7 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave)
 	int ret = RX_HANDLER_ANOTHER;
 	struct bond_marker *marker;
 	struct port *port;
+	atomic64_t *stat;
 
 	port = &(SLAVE_AD_INFO(slave)->port);
 	if (!port->slave) {
@@ -2397,7 +2416,12 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave)
 		default:
 			netdev_dbg(slave->bond->dev, "Received an unknown Marker subtype on slot %d\n",
 				   port->actor_port_number);
+			stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx;
+			atomic64_inc(stat);
 		}
+		break;
+	default:
+		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx);
 	}
 
 	return ret;
@@ -2635,8 +2659,10 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 		return RX_HANDLER_ANOTHER;
 
 	lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu);
-	if (!lacpdu)
+	if (!lacpdu) {
+		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx);
 		return RX_HANDLER_ANOTHER;
+	}
 
 	return bond_3ad_rx_indication(lacpdu, slave);
 }
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index fc3111515f5c..30e60dba1b2d 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -180,6 +180,19 @@ struct port;
 #pragma pack(8)
 #endif
 
+struct bond_3ad_stats {
+	atomic64_t lacpdu_rx;
+	atomic64_t lacpdu_tx;
+	atomic64_t lacpdu_unknown_rx;
+	atomic64_t lacpdu_illegal_rx;
+
+	atomic64_t marker_rx;
+	atomic64_t marker_tx;
+	atomic64_t marker_resp_rx;
+	atomic64_t marker_resp_tx;
+	atomic64_t marker_unknown_rx;
+};
+
 /* aggregator structure(43.4.5 in the 802.3ad standard) */
 typedef struct aggregator {
 	struct mac_addr aggregator_mac_address;
@@ -272,6 +285,7 @@ struct ad_bond_info {
 struct ad_slave_info {
 	struct aggregator aggregator;	/* 802.3ad aggregator structure */
 	struct port port;		/* 802.3ad port structure */
+	struct bond_3ad_stats stats;
 	u16 id;
 };
 
-- 
cgit v1.2.3


From a258aeacd7f0dc10bb45caa7e92a3ea3ca1a76e9 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 18 Jan 2019 14:30:23 +0200
Subject: bonding: add support for xstats and export 3ad stats

This patch adds support for extended statistics (xstats) call to the
bonding. The first user would be the 3ad code which counts the following
events:
 - LACPDU Rx/Tx
 - LACPDU unknown type Rx
 - LACPDU illegal Rx
 - Marker Rx/Tx
 - Marker response Rx/Tx
 - Marker unknown type Rx

All of these are exported via netlink as separate attributes to be
easily extensible as we plan to add more in the future.
Similar to how the bridge and other xstats exports, the structure
inside is:
 [ IFLA_STATS_LINK_XSTATS ]
   -> [ LINK_XSTATS_TYPE_BOND ]
        -> [ BOND_XSTATS_3AD ]
             -> [ 3ad stats attributes ]

With this structure it's easy to add more stat types later.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c     | 83 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_netlink.c | 71 ++++++++++++++++++++++++++++++++
 include/net/bond_3ad.h             |  3 ++
 include/uapi/linux/if_bonding.h    | 24 +++++++++++
 include/uapi/linux/if_link.h       |  1 +
 5 files changed, 182 insertions(+)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d1d8cb6b8cdc..d30c21b34858 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -31,6 +31,7 @@
 #include <net/net_namespace.h>
 #include <net/bonding.h>
 #include <net/bond_3ad.h>
+#include <net/netlink.h>
 
 /* General definitions */
 #define AD_SHORT_TIMEOUT           1
@@ -2696,3 +2697,85 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
 	}
 	spin_unlock_bh(&bond->mode_lock);
 }
+
+void bond_3ad_stats_add(struct slave *slave, struct bond_3ad_stats *stats)
+{
+	struct bond_3ad_stats *rstats = &SLAVE_AD_INFO(slave)->stats;
+	u64 stat;
+
+	atomic64_add(atomic64_read(&rstats->lacpdu_rx), &stats->lacpdu_rx);
+	atomic64_add(atomic64_read(&rstats->lacpdu_tx), &stats->lacpdu_tx);
+
+	stat = atomic64_read(&rstats->lacpdu_unknown_rx);
+	atomic64_add(stat, &stats->lacpdu_unknown_rx);
+	stat = atomic64_read(&rstats->lacpdu_illegal_rx);
+	atomic64_add(stat, &stats->lacpdu_illegal_rx);
+
+	atomic64_add(atomic64_read(&rstats->marker_rx), &stats->marker_rx);
+	atomic64_add(atomic64_read(&rstats->marker_tx), &stats->marker_tx);
+
+	stat = atomic64_read(&rstats->marker_resp_rx);
+	atomic64_add(stat, &stats->marker_resp_rx);
+	stat = atomic64_read(&rstats->marker_resp_tx);
+	atomic64_add(stat, &stats->marker_resp_tx);
+	stat = atomic64_read(&rstats->marker_unknown_rx);
+	atomic64_add(stat, &stats->marker_unknown_rx);
+}
+
+size_t bond_3ad_stats_size(void)
+{
+	return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_TX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_UNKNOWN_RX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_ILLEGAL_RX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_TX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_RX */
+	       nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_MARKER_RESP_TX */
+	       nla_total_size_64bit(sizeof(u64)); /* BOND_3AD_STAT_MARKER_UNKNOWN_RX */
+}
+
+int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats)
+{
+	u64 val;
+
+	val = atomic64_read(&stats->lacpdu_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->lacpdu_tx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_TX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->lacpdu_unknown_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_UNKNOWN_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->lacpdu_illegal_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_LACPDU_ILLEGAL_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+
+	val = atomic64_read(&stats->marker_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->marker_tx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_TX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->marker_resp_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->marker_resp_tx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_RESP_TX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+	val = atomic64_read(&stats->marker_unknown_rx);
+	if (nla_put_u64_64bit(skb, BOND_3AD_STAT_MARKER_UNKNOWN_RX, val,
+			      BOND_3AD_STAT_PAD))
+		return -EMSGSIZE;
+
+	return 0;
+}
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 6b9ad8673218..d1338fbe1830 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -675,6 +675,75 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static size_t bond_get_linkxstats_size(const struct net_device *dev, int attr)
+{
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		break;
+	default:
+		return 0;
+	}
+
+	return bond_3ad_stats_size() + nla_total_size(0);
+}
+
+static int bond_fill_linkxstats(struct sk_buff *skb,
+				const struct net_device *dev,
+				int *prividx, int attr)
+{
+	struct nlattr *nla __maybe_unused;
+	struct slave *slave = NULL;
+	struct nlattr *nest, *nest2;
+	struct bonding *bond;
+
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		bond = netdev_priv(dev);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		slave = bond_slave_get_rtnl(dev);
+		if (!slave)
+			return 0;
+		bond = slave->bond;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BOND);
+	if (!nest)
+		return -EMSGSIZE;
+	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
+		struct bond_3ad_stats stats;
+		struct list_head *iter;
+
+		memset(&stats, 0, sizeof(stats));
+		if (slave) {
+			bond_3ad_stats_add(slave, &stats);
+		} else {
+			bond_for_each_slave(bond, slave, iter)
+				bond_3ad_stats_add(slave, &stats);
+		}
+
+		nest2 = nla_nest_start(skb, BOND_XSTATS_3AD);
+		if (!nest2) {
+			nla_nest_end(skb, nest);
+			return -EMSGSIZE;
+		}
+
+		if (bond_3ad_stats_fill(skb, &stats)) {
+			nla_nest_cancel(skb, nest2);
+			nla_nest_end(skb, nest);
+			return -EMSGSIZE;
+		}
+		nla_nest_end(skb, nest2);
+	}
+	nla_nest_end(skb, nest);
+
+	return 0;
+}
+
 struct rtnl_link_ops bond_link_ops __read_mostly = {
 	.kind			= "bond",
 	.priv_size		= sizeof(struct bonding),
@@ -689,6 +758,8 @@ struct rtnl_link_ops bond_link_ops __read_mostly = {
 	.get_num_tx_queues	= bond_get_num_tx_queues,
 	.get_num_rx_queues	= bond_get_num_tx_queues, /* Use the same number
 							     as for TX queues */
+	.fill_linkxstats        = bond_fill_linkxstats,
+	.get_linkxstats_size    = bond_get_linkxstats_size,
 	.slave_maxtype		= IFLA_BOND_SLAVE_MAX,
 	.slave_policy		= bond_slave_policy,
 	.slave_changelink	= bond_slave_changelink,
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 30e60dba1b2d..25aaf49d19be 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -321,5 +321,8 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 int bond_3ad_set_carrier(struct bonding *bond);
 void bond_3ad_update_lacp_rate(struct bonding *bond);
 void bond_3ad_update_ad_actor_settings(struct bonding *bond);
+void bond_3ad_stats_add(struct slave *slave, struct bond_3ad_stats *stats);
+int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
+size_t bond_3ad_stats_size(void);
 #endif /* _NET_BOND_3AD_H */
 
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index 61a1bf6e865e..790585f0e61b 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -117,6 +117,30 @@ struct ad_info {
 	__u8 partner_system[ETH_ALEN];
 };
 
+/* Embedded inside LINK_XSTATS_TYPE_BOND */
+enum {
+	BOND_XSTATS_UNSPEC,
+	BOND_XSTATS_3AD,
+	__BOND_XSTATS_MAX
+};
+#define BOND_XSTATS_MAX (__BOND_XSTATS_MAX - 1)
+
+/* Embedded inside BOND_XSTATS_3AD */
+enum {
+	BOND_3AD_STAT_LACPDU_RX,
+	BOND_3AD_STAT_LACPDU_TX,
+	BOND_3AD_STAT_LACPDU_UNKNOWN_RX,
+	BOND_3AD_STAT_LACPDU_ILLEGAL_RX,
+	BOND_3AD_STAT_MARKER_RX,
+	BOND_3AD_STAT_MARKER_TX,
+	BOND_3AD_STAT_MARKER_RESP_RX,
+	BOND_3AD_STAT_MARKER_RESP_TX,
+	BOND_3AD_STAT_MARKER_UNKNOWN_RX,
+	BOND_3AD_STAT_PAD,
+	__BOND_3AD_STAT_MAX
+};
+#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1)
+
 #endif /* _LINUX_IF_BONDING_H */
 
 /*
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d6533828123a..5b225ff63b48 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -925,6 +925,7 @@ enum {
 enum {
 	LINK_XSTATS_TYPE_UNSPEC,
 	LINK_XSTATS_TYPE_BRIDGE,
+	LINK_XSTATS_TYPE_BOND,
 	__LINK_XSTATS_TYPE_MAX
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
-- 
cgit v1.2.3


From 5b93ac542301026eff8954589cf59f801d03db3e Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@codeaurora.org>
Date: Thu, 10 Jan 2019 09:32:02 +0530
Subject: OPP: Add support for parsing the 'opp-level' property

Now that the OPP bindings are updated to include an optional
'opp-level' property, add support to parse it from device tree
and store it as part of dev_pm_opp structure.
Also add and export an helper 'dev_pm_opp_get_level()' that can be
used to get the level value read from device tree when present.

Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/opp/core.c     | 18 ++++++++++++++++++
 drivers/opp/of.c       |  2 ++
 drivers/opp/opp.h      |  2 ++
 include/linux/pm_opp.h |  7 +++++++
 4 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index e5507add8f04..90b78a122be9 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -130,6 +130,24 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
 
+/**
+ * dev_pm_opp_get_level() - Gets the level corresponding to an available opp
+ * @opp:	opp for which level value has to be returned for
+ *
+ * Return: level read from device tree corresponding to the opp, else
+ * return 0.
+ */
+unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp)
+{
+	if (IS_ERR_OR_NULL(opp) || !opp->available) {
+		pr_err("%s: Invalid parameters\n", __func__);
+		return 0;
+	}
+
+	return opp->level;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_level);
+
 /**
  * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not
  * @opp: opp for which turbo mode is being verified
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 06f0f632ec47..1779f2c93291 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -594,6 +594,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
 		new_opp->rate = (unsigned long)rate;
 	}
 
+	of_property_read_u32(np, "opp-level", &new_opp->level);
+
 	/* Check if the OPP supports hardware's hierarchy of versions or not */
 	if (!_opp_is_supported(dev, opp_table, np)) {
 		dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate);
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index e24d81497375..4458175aa661 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -60,6 +60,7 @@ extern struct list_head opp_tables;
  * @suspend:	true if suspend OPP
  * @pstate: Device's power domain's performance state.
  * @rate:	Frequency in hertz
+ * @level:	Performance level
  * @supplies:	Power supplies voltage/current values
  * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's
  *		frequency from any other OPP's frequency.
@@ -80,6 +81,7 @@ struct dev_pm_opp {
 	bool suspend;
 	unsigned int pstate;
 	unsigned long rate;
+	unsigned int level;
 
 	struct dev_pm_opp_supply *supplies;
 
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0a2a88e5a383..473d2c7516f0 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -86,6 +86,8 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
 
 unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
 
+unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp);
+
 bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp);
 
 int dev_pm_opp_get_opp_count(struct device *dev);
@@ -157,6 +159,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
 	return 0;
 }
 
+static inline unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp)
+{
+	return 0;
+}
+
 static inline bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp)
 {
 	return false;
-- 
cgit v1.2.3


From c6e6eff4d48f8b7180f9f94f1615ffe345fd7e5c Mon Sep 17 00:00:00 2001
From: Rajendra Nayak <rnayak@codeaurora.org>
Date: Thu, 10 Jan 2019 09:32:03 +0530
Subject: dt-bindings: power: Add qcom rpm power domain driver bindings

Add DT bindings to describe the rpm/rpmh power domains found on Qualcomm
Technologies, Inc. SoCs. These power domains communicate a performance
state to RPM/RPMh, which then translates it into corresponding voltage on a
PMIC rail.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 .../devicetree/bindings/power/qcom,rpmpd.txt       | 145 +++++++++++++++++++++
 include/dt-bindings/power/qcom-rpmpd.h             |  39 ++++++
 2 files changed, 184 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/qcom,rpmpd.txt
 create mode 100644 include/dt-bindings/power/qcom-rpmpd.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/power/qcom,rpmpd.txt b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
new file mode 100644
index 000000000000..980e5413d18f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/qcom,rpmpd.txt
@@ -0,0 +1,145 @@
+Qualcomm RPM/RPMh Power domains
+
+For RPM/RPMh Power domains, we communicate a performance state to RPM/RPMh
+which then translates it into a corresponding voltage on a rail
+
+Required Properties:
+ - compatible: Should be one of the following
+	* qcom,msm8996-rpmpd: RPM Power domain for the msm8996 family of SoC
+	* qcom,sdm845-rpmhpd: RPMh Power domain for the sdm845 family of SoC
+ - #power-domain-cells: number of cells in Power domain specifier
+	must be 1.
+ - operating-points-v2: Phandle to the OPP table for the Power domain.
+	Refer to Documentation/devicetree/bindings/power/power_domain.txt
+	and Documentation/devicetree/bindings/opp/opp.txt for more details
+
+Refer to <dt-bindings/power/qcom-rpmpd.h> for the level values for
+various OPPs for different platforms as well as Power domain indexes
+
+Example: rpmh power domain controller and OPP table
+
+#include <dt-bindings/power/qcom-rpmhpd.h>
+
+opp-level values specified in the OPP tables for RPMh power domains
+should use the RPMH_REGULATOR_LEVEL_* constants from
+<dt-bindings/power/qcom-rpmhpd.h>
+
+	rpmhpd: power-controller {
+		compatible = "qcom,sdm845-rpmhpd";
+		#power-domain-cells = <1>;
+		operating-points-v2 = <&rpmhpd_opp_table>;
+
+		rpmhpd_opp_table: opp-table {
+			compatible = "operating-points-v2";
+
+			rpmhpd_opp_ret: opp1 {
+				opp-level = <RPMH_REGULATOR_LEVEL_RETENTION>;
+			};
+
+			rpmhpd_opp_min_svs: opp2 {
+				opp-level = <RPMH_REGULATOR_LEVEL_MIN_SVS>;
+			};
+
+			rpmhpd_opp_low_svs: opp3 {
+				opp-level = <RPMH_REGULATOR_LEVEL_LOW_SVS>;
+			};
+
+			rpmhpd_opp_svs: opp4 {
+				opp-level = <RPMH_REGULATOR_LEVEL_SVS>;
+			};
+
+			rpmhpd_opp_svs_l1: opp5 {
+				opp-level = <RPMH_REGULATOR_LEVEL_SVS_L1>;
+			};
+
+			rpmhpd_opp_nom: opp6 {
+				opp-level = <RPMH_REGULATOR_LEVEL_NOM>;
+			};
+
+			rpmhpd_opp_nom_l1: opp7 {
+				opp-level = <RPMH_REGULATOR_LEVEL_NOM_L1>;
+			};
+
+			rpmhpd_opp_nom_l2: opp8 {
+				opp-level = <RPMH_REGULATOR_LEVEL_NOM_L2>;
+			};
+
+			rpmhpd_opp_turbo: opp9 {
+				opp-level = <RPMH_REGULATOR_LEVEL_TURBO>;
+			};
+
+			rpmhpd_opp_turbo_l1: opp10 {
+				opp-level = <RPMH_REGULATOR_LEVEL_TURBO_L1>;
+			};
+		};
+	};
+
+Example: rpm power domain controller and OPP table
+
+	rpmpd: power-controller {
+		compatible = "qcom,msm8996-rpmpd";
+		#power-domain-cells = <1>;
+		operating-points-v2 = <&rpmpd_opp_table>;
+
+		rpmpd_opp_table: opp-table {
+			compatible = "operating-points-v2";
+
+			rpmpd_opp_low: opp1 {
+				opp-level = <1>;
+			};
+
+			rpmpd_opp_ret: opp2 {
+				opp-level = <2>;
+			};
+
+			rpmpd_opp_svs: opp3 {
+				opp-level = <3>;
+			};
+
+			rpmpd_opp_normal: opp4 {
+				opp-level = <4>;
+			};
+
+			rpmpd_opp_high: opp5 {
+				opp-level = <5>;
+			};
+
+			rpmpd_opp_turbo: opp6 {
+				opp-level = <6>;
+			};
+		};
+	};
+
+Example: Client/Consumer device using OPP table
+
+	leaky-device0@12350000 {
+		compatible = "foo,i-leak-current";
+		reg = <0x12350000 0x1000>;
+		power-domains = <&rpmhpd SDM845_MX>;
+		operating-points-v2 = <&leaky_opp_table>;
+	};
+
+
+	leaky_opp_table: opp-table {
+		compatible = "operating-points-v2";
+
+		opp1 {
+			opp-hz = /bits/ 64 <144000>;
+			required-opps = <&rpmhpd_opp_low>;
+		};
+
+		opp2 {
+			opp-hz = /bits/ 64 <400000>;
+			required-opps = <&rpmhpd_opp_ret>;
+		};
+
+		opp3 {
+			opp-hz = /bits/ 64 <20000000>;
+			required-opps = <&rpmpd_opp_svs>;
+		};
+
+		opp4 {
+			opp-hz = /bits/ 64 <25000000>;
+			required-opps = <&rpmpd_opp_normal>;
+		};
+	};
diff --git a/include/dt-bindings/power/qcom-rpmpd.h b/include/dt-bindings/power/qcom-rpmpd.h
new file mode 100644
index 000000000000..87d9c6611682
--- /dev/null
+++ b/include/dt-bindings/power/qcom-rpmpd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */
+
+#ifndef _DT_BINDINGS_POWER_QCOM_RPMPD_H
+#define _DT_BINDINGS_POWER_QCOM_RPMPD_H
+
+/* SDM845 Power Domain Indexes */
+#define SDM845_EBI	0
+#define SDM845_MX	1
+#define SDM845_MX_AO	2
+#define SDM845_CX	3
+#define SDM845_CX_AO	4
+#define SDM845_LMX	5
+#define SDM845_LCX	6
+#define SDM845_GFX	7
+#define SDM845_MSS	8
+
+/* SDM845 Power Domain performance levels */
+#define RPMH_REGULATOR_LEVEL_RETENTION	16
+#define RPMH_REGULATOR_LEVEL_MIN_SVS	48
+#define RPMH_REGULATOR_LEVEL_LOW_SVS	64
+#define RPMH_REGULATOR_LEVEL_SVS	128
+#define RPMH_REGULATOR_LEVEL_SVS_L1	192
+#define RPMH_REGULATOR_LEVEL_NOM	256
+#define RPMH_REGULATOR_LEVEL_NOM_L1	320
+#define RPMH_REGULATOR_LEVEL_NOM_L2	336
+#define RPMH_REGULATOR_LEVEL_TURBO	384
+#define RPMH_REGULATOR_LEVEL_TURBO_L1	416
+
+/* MSM8996 Power Domain Indexes */
+#define MSM8996_VDDCX		0
+#define MSM8996_VDDCX_AO	1
+#define MSM8996_VDDCX_VFC	2
+#define MSM8996_VDDMX		3
+#define MSM8996_VDDMX_AO	4
+#define MSM8996_VDDSSCX		5
+#define MSM8996_VDDSSCX_VFC	6
+
+#endif
-- 
cgit v1.2.3


From ba5ea614622dca6d675b4cc8a97270569ae13a23 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Mon, 21 Jan 2019 07:26:25 +0100
Subject: bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch refactors ip_mc_check_igmp(), ipv6_mc_check_mld() and
their callers (more precisely, the Linux bridge) to not rely on
the skb_trimmed parameter anymore.

An skb with its tail trimmed to the IP packet length was initially
introduced for the following three reasons:

1) To be able to verify the ICMPv6 checksum.
2) To be able to distinguish the version of an IGMP or MLD query.
   They are distinguishable only by their size.
3) To avoid parsing data for an IGMPv3 or MLDv2 report that is
   beyond the IP packet but still within the skb.

The first case still uses a cloned and potentially trimmed skb to
verfiy. However, there is no need to propagate it to the caller.
For the second and third case explicit IP packet length checks were
added.

This hopefully makes ip_mc_check_igmp() and ipv6_mc_check_mld() easier
to read and verfiy, as well as easier to use.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h       | 11 ++++++++-
 include/linux/ip.h         |  5 ++++
 include/linux/ipv6.h       |  6 +++++
 include/net/addrconf.h     | 12 +++++++++-
 net/batman-adv/multicast.c |  4 ++--
 net/bridge/br_multicast.c  | 57 +++++++++++++++++++++++-----------------------
 net/ipv4/igmp.c            | 23 ++++---------------
 net/ipv6/mcast_snoop.c     | 24 ++++---------------
 8 files changed, 70 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 119f53941c12..8b4348f69bc5 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -18,6 +18,7 @@
 #include <linux/skbuff.h>
 #include <linux/timer.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/refcount.h>
 #include <uapi/linux/igmp.h>
 
@@ -106,6 +107,14 @@ struct ip_mc_list {
 #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value)
 #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value)
 
+static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len)
+{
+	if (skb_transport_offset(skb) + ip_transport_len(skb) < len)
+		return -EINVAL;
+
+	return pskb_may_pull(skb, len);
+}
+
 extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto);
 extern int igmp_rcv(struct sk_buff *);
 extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr);
@@ -130,6 +139,6 @@ extern void ip_mc_unmap(struct in_device *);
 extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
-int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed);
+int ip_mc_check_igmp(struct sk_buff *skb);
 
 #endif
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 492bc6513533..482b7b7c9f30 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -34,4 +34,9 @@ static inline struct iphdr *ipip_hdr(const struct sk_buff *skb)
 {
 	return (struct iphdr *)skb_transport_header(skb);
 }
+
+static inline unsigned int ip_transport_len(const struct sk_buff *skb)
+{
+	return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb);
+}
 #endif	/* _LINUX_IP_H */
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 495e834c1367..6d45ce784bea 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -104,6 +104,12 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 	return (struct ipv6hdr *)skb_transport_header(skb);
 }
 
+static inline unsigned int ipv6_transport_len(const struct sk_buff *skb)
+{
+	return ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr) -
+	       skb_network_header_len(skb);
+}
+
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 1656c5978498..daf11dcb0f70 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -49,6 +49,7 @@ struct prefix_info {
 	struct in6_addr		prefix;
 };
 
+#include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <net/if_inet6.h>
 #include <net/ipv6.h>
@@ -201,6 +202,15 @@ u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr,
 /*
  *	multicast prototypes (mcast.c)
  */
+static inline int ipv6_mc_may_pull(struct sk_buff *skb,
+				   unsigned int len)
+{
+	if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len)
+		return -EINVAL;
+
+	return pskb_may_pull(skb, len);
+}
+
 int ipv6_sock_mc_join(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
@@ -219,7 +229,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev);
 void ipv6_mc_remap(struct inet6_dev *idev);
 void ipv6_mc_init_dev(struct inet6_dev *idev);
 void ipv6_mc_destroy_dev(struct inet6_dev *idev);
-int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed);
+int ipv6_mc_check_mld(struct sk_buff *skb);
 void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
 
 bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 69244e4598f5..1dd70f048e7b 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -674,7 +674,7 @@ static void batadv_mcast_mla_update(struct work_struct *work)
  */
 static bool batadv_mcast_is_report_ipv4(struct sk_buff *skb)
 {
-	if (ip_mc_check_igmp(skb, NULL) < 0)
+	if (ip_mc_check_igmp(skb) < 0)
 		return false;
 
 	switch (igmp_hdr(skb)->type) {
@@ -741,7 +741,7 @@ static int batadv_mcast_forw_mode_check_ipv4(struct batadv_priv *bat_priv,
  */
 static bool batadv_mcast_is_report_ipv6(struct sk_buff *skb)
 {
-	if (ipv6_mc_check_mld(skb, NULL) < 0)
+	if (ipv6_mc_check_mld(skb) < 0)
 		return false;
 
 	switch (icmp6_hdr(skb)->icmp6_type) {
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3aeff0895669..156c4905639e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -938,7 +938,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 
 	for (i = 0; i < num; i++) {
 		len += sizeof(*grec);
-		if (!pskb_may_pull(skb, len))
+		if (!ip_mc_may_pull(skb, len))
 			return -EINVAL;
 
 		grec = (void *)(skb->data + len - sizeof(*grec));
@@ -946,7 +946,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 		type = grec->grec_type;
 
 		len += ntohs(grec->grec_nsrcs) * 4;
-		if (!pskb_may_pull(skb, len))
+		if (!ip_mc_may_pull(skb, len))
 			return -EINVAL;
 
 		/* We treat this as an IGMPv2 report for now. */
@@ -985,15 +985,17 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 					struct sk_buff *skb,
 					u16 vid)
 {
+	unsigned int nsrcs_offset;
 	const unsigned char *src;
 	struct icmp6hdr *icmp6h;
 	struct mld2_grec *grec;
+	unsigned int grec_len;
 	int i;
 	int len;
 	int num;
 	int err = 0;
 
-	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
+	if (!ipv6_mc_may_pull(skb, sizeof(*icmp6h)))
 		return -EINVAL;
 
 	icmp6h = icmp6_hdr(skb);
@@ -1003,21 +1005,25 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 	for (i = 0; i < num; i++) {
 		__be16 *nsrcs, _nsrcs;
 
-		nsrcs = skb_header_pointer(skb,
-					   len + offsetof(struct mld2_grec,
-							  grec_nsrcs),
+		nsrcs_offset = len + offsetof(struct mld2_grec, grec_nsrcs);
+
+		if (skb_transport_offset(skb) + ipv6_transport_len(skb) <
+		    nsrcs_offset + sizeof(_nsrcs))
+			return -EINVAL;
+
+		nsrcs = skb_header_pointer(skb, nsrcs_offset,
 					   sizeof(_nsrcs), &_nsrcs);
 		if (!nsrcs)
 			return -EINVAL;
 
-		if (!pskb_may_pull(skb,
-				   len + sizeof(*grec) +
-				   sizeof(struct in6_addr) * ntohs(*nsrcs)))
+		grec_len = sizeof(*grec) +
+			   sizeof(struct in6_addr) * ntohs(*nsrcs);
+
+		if (!ipv6_mc_may_pull(skb, len + grec_len))
 			return -EINVAL;
 
 		grec = (struct mld2_grec *)(skb->data + len);
-		len += sizeof(*grec) +
-		       sizeof(struct in6_addr) * ntohs(*nsrcs);
+		len += grec_len;
 
 		/* We treat these as MLDv1 reports for now. */
 		switch (grec->grec_type) {
@@ -1219,6 +1225,7 @@ static void br_ip4_multicast_query(struct net_bridge *br,
 				   struct sk_buff *skb,
 				   u16 vid)
 {
+	unsigned int transport_len = ip_transport_len(skb);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct igmphdr *ih = igmp_hdr(skb);
 	struct net_bridge_mdb_entry *mp;
@@ -1228,7 +1235,6 @@ static void br_ip4_multicast_query(struct net_bridge *br,
 	struct br_ip saddr;
 	unsigned long max_delay;
 	unsigned long now = jiffies;
-	unsigned int offset = skb_transport_offset(skb);
 	__be32 group;
 
 	spin_lock(&br->multicast_lock);
@@ -1238,14 +1244,14 @@ static void br_ip4_multicast_query(struct net_bridge *br,
 
 	group = ih->group;
 
-	if (skb->len == offset + sizeof(*ih)) {
+	if (transport_len == sizeof(*ih)) {
 		max_delay = ih->code * (HZ / IGMP_TIMER_SCALE);
 
 		if (!max_delay) {
 			max_delay = 10 * HZ;
 			group = 0;
 		}
-	} else if (skb->len >= offset + sizeof(*ih3)) {
+	} else if (transport_len >= sizeof(*ih3)) {
 		ih3 = igmpv3_query_hdr(skb);
 		if (ih3->nsrcs)
 			goto out;
@@ -1296,6 +1302,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 				  struct sk_buff *skb,
 				  u16 vid)
 {
+	unsigned int transport_len = ipv6_transport_len(skb);
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	struct mld_msg *mld;
 	struct net_bridge_mdb_entry *mp;
@@ -1315,7 +1322,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	if (skb->len == offset + sizeof(*mld)) {
+	if (transport_len == sizeof(*mld)) {
 		if (!pskb_may_pull(skb, offset + sizeof(*mld))) {
 			err = -EINVAL;
 			goto out;
@@ -1581,12 +1588,11 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct sk_buff *skb,
 				 u16 vid)
 {
-	struct sk_buff *skb_trimmed = NULL;
 	const unsigned char *src;
 	struct igmphdr *ih;
 	int err;
 
-	err = ip_mc_check_igmp(skb, &skb_trimmed);
+	err = ip_mc_check_igmp(skb);
 
 	if (err == -ENOMSG) {
 		if (!ipv4_is_local_multicast(ip_hdr(skb)->daddr)) {
@@ -1612,19 +1618,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		err = br_ip4_multicast_add_group(br, port, ih->group, vid, src);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
-		err = br_ip4_multicast_igmp3_report(br, port, skb_trimmed, vid);
+		err = br_ip4_multicast_igmp3_report(br, port, skb, vid);
 		break;
 	case IGMP_HOST_MEMBERSHIP_QUERY:
-		br_ip4_multicast_query(br, port, skb_trimmed, vid);
+		br_ip4_multicast_query(br, port, skb, vid);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
 		br_ip4_multicast_leave_group(br, port, ih->group, vid, src);
 		break;
 	}
 
-	if (skb_trimmed && skb_trimmed != skb)
-		kfree_skb(skb_trimmed);
-
 	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
 			   BR_MCAST_DIR_RX);
 
@@ -1637,12 +1640,11 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct sk_buff *skb,
 				 u16 vid)
 {
-	struct sk_buff *skb_trimmed = NULL;
 	const unsigned char *src;
 	struct mld_msg *mld;
 	int err;
 
-	err = ipv6_mc_check_mld(skb, &skb_trimmed);
+	err = ipv6_mc_check_mld(skb);
 
 	if (err == -ENOMSG) {
 		if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
@@ -1664,10 +1666,10 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 						 src);
 		break;
 	case ICMPV6_MLD2_REPORT:
-		err = br_ip6_multicast_mld2_report(br, port, skb_trimmed, vid);
+		err = br_ip6_multicast_mld2_report(br, port, skb, vid);
 		break;
 	case ICMPV6_MGM_QUERY:
-		err = br_ip6_multicast_query(br, port, skb_trimmed, vid);
+		err = br_ip6_multicast_query(br, port, skb, vid);
 		break;
 	case ICMPV6_MGM_REDUCTION:
 		src = eth_hdr(skb)->h_source;
@@ -1675,9 +1677,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 		break;
 	}
 
-	if (skb_trimmed && skb_trimmed != skb)
-		kfree_skb(skb_trimmed);
-
 	br_multicast_count(br, port, skb, BR_INPUT_SKB_CB(skb)->igmp,
 			   BR_MCAST_DIR_RX);
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 765b2b32c4a4..b1f6d93282d7 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1544,7 +1544,7 @@ static inline __sum16 ip_mc_validate_checksum(struct sk_buff *skb)
 	return skb_checksum_simple_validate(skb);
 }
 
-static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+static int __ip_mc_check_igmp(struct sk_buff *skb)
 
 {
 	struct sk_buff *skb_chk;
@@ -1566,16 +1566,10 @@ static int __ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
 	if (ret)
 		goto err;
 
-	if (skb_trimmed)
-		*skb_trimmed = skb_chk;
-	/* free now unneeded clone */
-	else if (skb_chk != skb)
-		kfree_skb(skb_chk);
-
 	ret = 0;
 
 err:
-	if (ret && skb_chk && skb_chk != skb)
+	if (skb_chk && skb_chk != skb)
 		kfree_skb(skb_chk);
 
 	return ret;
@@ -1584,7 +1578,6 @@ err:
 /**
  * ip_mc_check_igmp - checks whether this is a sane IGMP packet
  * @skb: the skb to validate
- * @skb_trimmed: to store an skb pointer trimmed to IPv4 packet tail (optional)
  *
  * Checks whether an IPv4 packet is a valid IGMP packet. If so sets
  * skb transport header accordingly and returns zero.
@@ -1594,18 +1587,10 @@ err:
  * -ENOMSG: IP header validation succeeded but it is not an IGMP packet.
  * -ENOMEM: A memory allocation failure happened.
  *
- * Optionally, an skb pointer might be provided via skb_trimmed (or set it
- * to NULL): After parsing an IGMP packet successfully it will point to
- * an skb which has its tail aligned to the IP packet end. This might
- * either be the originally provided skb or a trimmed, cloned version if
- * the skb frame had data beyond the IP packet. A cloned skb allows us
- * to leave the original skb and its full frame unchanged (which might be
- * desirable for layer 2 frame jugglers).
- *
  * Caller needs to set the skb network header and free any returned skb if it
  * differs from the provided skb.
  */
-int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+int ip_mc_check_igmp(struct sk_buff *skb)
 {
 	int ret = ip_mc_check_iphdr(skb);
 
@@ -1615,7 +1600,7 @@ int ip_mc_check_igmp(struct sk_buff *skb, struct sk_buff **skb_trimmed)
 	if (ip_hdr(skb)->protocol != IPPROTO_IGMP)
 		return -ENOMSG;
 
-	return __ip_mc_check_igmp(skb, skb_trimmed);
+	return __ip_mc_check_igmp(skb);
 }
 EXPORT_SYMBOL(ip_mc_check_igmp);
 
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index 9405b04eecc6..1a917dc80d5e 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -136,8 +136,7 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
 	return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
 }
 
-static int __ipv6_mc_check_mld(struct sk_buff *skb,
-			       struct sk_buff **skb_trimmed)
+static int __ipv6_mc_check_mld(struct sk_buff *skb)
 
 {
 	struct sk_buff *skb_chk = NULL;
@@ -160,16 +159,10 @@ static int __ipv6_mc_check_mld(struct sk_buff *skb,
 	if (ret)
 		goto err;
 
-	if (skb_trimmed)
-		*skb_trimmed = skb_chk;
-	/* free now unneeded clone */
-	else if (skb_chk != skb)
-		kfree_skb(skb_chk);
-
 	ret = 0;
 
 err:
-	if (ret && skb_chk && skb_chk != skb)
+	if (skb_chk && skb_chk != skb)
 		kfree_skb(skb_chk);
 
 	return ret;
@@ -178,7 +171,6 @@ err:
 /**
  * ipv6_mc_check_mld - checks whether this is a sane MLD packet
  * @skb: the skb to validate
- * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
  *
  * Checks whether an IPv6 packet is a valid MLD packet. If so sets
  * skb transport header accordingly and returns zero.
@@ -188,18 +180,10 @@ err:
  * -ENOMSG: IP header validation succeeded but it is not an MLD packet.
  * -ENOMEM: A memory allocation failure happened.
  *
- * Optionally, an skb pointer might be provided via skb_trimmed (or set it
- * to NULL): After parsing an MLD packet successfully it will point to
- * an skb which has its tail aligned to the IP packet end. This might
- * either be the originally provided skb or a trimmed, cloned version if
- * the skb frame had data beyond the IP packet. A cloned skb allows us
- * to leave the original skb and its full frame unchanged (which might be
- * desirable for layer 2 frame jugglers).
- *
  * Caller needs to set the skb network header and free any returned skb if it
  * differs from the provided skb.
  */
-int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+int ipv6_mc_check_mld(struct sk_buff *skb)
 {
 	int ret;
 
@@ -211,6 +195,6 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
 	if (ret < 0)
 		return ret;
 
-	return __ipv6_mc_check_mld(skb, skb_trimmed);
+	return __ipv6_mc_check_mld(skb);
 }
 EXPORT_SYMBOL(ipv6_mc_check_mld);
-- 
cgit v1.2.3


From 4effd28c1245303dce7fd290c501ac2c11052114 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Mon, 21 Jan 2019 07:26:27 +0100
Subject: bridge: join all-snoopers multicast address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Next to snooping IGMP/MLD queries RFC4541, section 2.1.1.a) recommends
to snoop multicast router advertisements to detect multicast routers.

Multicast router advertisements are sent to an "all-snoopers"
multicast address. To be able to receive them reliably, we need to
join this group.

Otherwise other snooping switches might refrain from forwarding these
advertisements to us.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h   |  9 +++---
 net/bridge/br_multicast.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv6/mcast.c          |  2 ++
 3 files changed, 78 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index a55cb8b10165..e7ad9d350a28 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -292,10 +292,11 @@ struct sockaddr_in {
 #define	IN_LOOPBACK(a)		((((long int) (a)) & 0xff000000) == 0x7f000000)
 
 /* Defines for Multicast INADDR */
-#define INADDR_UNSPEC_GROUP   	0xe0000000U	/* 224.0.0.0   */
-#define INADDR_ALLHOSTS_GROUP 	0xe0000001U	/* 224.0.0.1   */
-#define INADDR_ALLRTRS_GROUP    0xe0000002U	/* 224.0.0.2 */
-#define INADDR_MAX_LOCAL_GROUP  0xe00000ffU	/* 224.0.0.255 */
+#define INADDR_UNSPEC_GROUP		0xe0000000U	/* 224.0.0.0   */
+#define INADDR_ALLHOSTS_GROUP		0xe0000001U	/* 224.0.0.1   */
+#define INADDR_ALLRTRS_GROUP		0xe0000002U	/* 224.0.0.2 */
+#define INADDR_ALLSNOOPERS_GROUP	0xe000006aU	/* 224.0.0.106 */
+#define INADDR_MAX_LOCAL_GROUP		0xe00000ffU	/* 224.0.0.255 */
 #endif
 
 /* <asm/byteorder.h> contains the htonl type stuff.. */
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 156c4905639e..2366f4a2780e 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1780,6 +1780,68 @@ void br_multicast_init(struct net_bridge *br)
 	INIT_HLIST_HEAD(&br->mdb_list);
 }
 
+static void br_ip4_multicast_join_snoopers(struct net_bridge *br)
+{
+	struct in_device *in_dev = in_dev_get(br->dev);
+
+	if (!in_dev)
+		return;
+
+	ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP));
+	in_dev_put(in_dev);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_join_snoopers(struct net_bridge *br)
+{
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
+	ipv6_dev_mc_inc(br->dev, &addr);
+}
+#else
+static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br)
+{
+}
+#endif
+
+static void br_multicast_join_snoopers(struct net_bridge *br)
+{
+	br_ip4_multicast_join_snoopers(br);
+	br_ip6_multicast_join_snoopers(br);
+}
+
+static void br_ip4_multicast_leave_snoopers(struct net_bridge *br)
+{
+	struct in_device *in_dev = in_dev_get(br->dev);
+
+	if (WARN_ON(!in_dev))
+		return;
+
+	ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP));
+	in_dev_put(in_dev);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
+{
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xff020000), 0, 0, htonl(0x6a));
+	ipv6_dev_mc_dec(br->dev, &addr);
+}
+#else
+static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br)
+{
+}
+#endif
+
+static void br_multicast_leave_snoopers(struct net_bridge *br)
+{
+	br_ip4_multicast_leave_snoopers(br);
+	br_ip6_multicast_leave_snoopers(br);
+}
+
 static void __br_multicast_open(struct net_bridge *br,
 				struct bridge_mcast_own_query *query)
 {
@@ -1793,6 +1855,9 @@ static void __br_multicast_open(struct net_bridge *br,
 
 void br_multicast_open(struct net_bridge *br)
 {
+	if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		br_multicast_join_snoopers(br);
+
 	__br_multicast_open(br, &br->ip4_own_query);
 #if IS_ENABLED(CONFIG_IPV6)
 	__br_multicast_open(br, &br->ip6_own_query);
@@ -1808,6 +1873,9 @@ void br_multicast_stop(struct net_bridge *br)
 	del_timer_sync(&br->ip6_other_query.timer);
 	del_timer_sync(&br->ip6_own_query.timer);
 #endif
+
+	if (br_opt_get(br, BROPT_MULTICAST_ENABLED))
+		br_multicast_leave_snoopers(br);
 }
 
 void br_multicast_dev_del(struct net_bridge *br)
@@ -1943,8 +2011,10 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val)
 
 	br_mc_disabled_update(br->dev, val);
 	br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val);
-	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
+	if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) {
+		br_multicast_leave_snoopers(br);
 		goto unlock;
+	}
 
 	if (!netif_running(br->dev))
 		goto unlock;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 21f6deb2aec9..42f3f5cd349f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -940,6 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 {
 	return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
 }
+EXPORT_SYMBOL(ipv6_dev_mc_inc);
 
 /*
  *	device multicast group del
@@ -987,6 +988,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
 
 	return err;
 }
+EXPORT_SYMBOL(ipv6_dev_mc_dec);
 
 /*
  *	check if the interface/address pair is valid
-- 
cgit v1.2.3


From 4b3087c7e37f9e499127201849e33960dc81da11 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@c0d3.blue>
Date: Mon, 21 Jan 2019 07:26:28 +0100
Subject: bridge: Snoop Multicast Router Advertisements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When multiple multicast routers are present in a broadcast domain then
only one of them will be detectable via IGMP/MLD query snooping. The
multicast router with the lowest IP address will become the selected and
active querier while all other multicast routers will then refrain from
sending queries.

To detect such rather silent multicast routers, too, RFC4286
("Multicast Router Discovery") provides a standardized protocol to
detect multicast routers for multicast snooping switches.

This patch implements the necessary MRD Advertisement message parsing
and after successful processing adds such routers to the internal
multicast router list.

Signed-off-by: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h          |  5 +++++
 include/net/addrconf.h      | 15 +++++++++++++
 include/uapi/linux/icmpv6.h |  2 ++
 include/uapi/linux/igmp.h   |  1 +
 net/bridge/br_multicast.c   | 55 +++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/mcast_snoop.c      |  5 ++++-
 6 files changed, 82 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index 31b493734763..435e7f2a513a 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -60,6 +60,11 @@ static inline bool ipv4_is_lbcast(__be32 addr)
 	return addr == htonl(INADDR_BROADCAST);
 }
 
+static inline bool ipv4_is_all_snoopers(__be32 addr)
+{
+	return addr == htonl(INADDR_ALLSNOOPERS_GROUP);
+}
+
 static inline bool ipv4_is_zeronet(__be32 addr)
 {
 	return (addr & htonl(0xff000000)) == htonl(0x00000000);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index daf11dcb0f70..20d523ee2fec 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -229,6 +229,7 @@ void ipv6_mc_unmap(struct inet6_dev *idev);
 void ipv6_mc_remap(struct inet6_dev *idev);
 void ipv6_mc_init_dev(struct inet6_dev *idev);
 void ipv6_mc_destroy_dev(struct inet6_dev *idev);
+int ipv6_mc_check_icmpv6(struct sk_buff *skb);
 int ipv6_mc_check_mld(struct sk_buff *skb);
 void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp);
 
@@ -499,6 +500,20 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr)
 #endif
 }
 
+static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+	__be64 *p = (__be64 *)addr;
+
+	return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
+		(p[1] ^ cpu_to_be64(0x6a))) == 0UL;
+#else
+	return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
+		addr->s6_addr32[1] | addr->s6_addr32[2] |
+		(addr->s6_addr32[3] ^ htonl(0x0000006a))) == 0;
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 int if6_proc_init(void);
 void if6_proc_exit(void);
diff --git a/include/uapi/linux/icmpv6.h b/include/uapi/linux/icmpv6.h
index caf8dc019250..325395f56bfa 100644
--- a/include/uapi/linux/icmpv6.h
+++ b/include/uapi/linux/icmpv6.h
@@ -108,6 +108,8 @@ struct icmp6hdr {
 #define ICMPV6_MOBILE_PREFIX_SOL	146
 #define ICMPV6_MOBILE_PREFIX_ADV	147
 
+#define ICMPV6_MRDISC_ADV		151
+
 /*
  *	Codes for Destination Unreachable
  */
diff --git a/include/uapi/linux/igmp.h b/include/uapi/linux/igmp.h
index 7e44ac02ca18..90c28bc466c6 100644
--- a/include/uapi/linux/igmp.h
+++ b/include/uapi/linux/igmp.h
@@ -93,6 +93,7 @@ struct igmpv3_query {
 #define IGMP_MTRACE_RESP		0x1e
 #define IGMP_MTRACE			0x1f
 
+#define IGMP_MRDISC_ADV			0x30	/* From RFC4286 */
 
 /*
  *	Use the BSD names for these for compatibility
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2366f4a2780e..2c46c7aca571 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/if_ether.h>
 #include <linux/igmp.h>
+#include <linux/in.h>
 #include <linux/jhash.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
@@ -29,10 +30,12 @@
 #include <net/ip.h>
 #include <net/switchdev.h>
 #if IS_ENABLED(CONFIG_IPV6)
+#include <linux/icmpv6.h>
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/ip6_checksum.h>
 #include <net/addrconf.h>
+#include <net/ipv6.h>
 #endif
 
 #include "br_private.h"
@@ -1583,6 +1586,19 @@ static void br_multicast_pim(struct net_bridge *br,
 	br_multicast_mark_router(br, port);
 }
 
+static int br_ip4_multicast_mrd_rcv(struct net_bridge *br,
+				    struct net_bridge_port *port,
+				    struct sk_buff *skb)
+{
+	if (ip_hdr(skb)->protocol != IPPROTO_IGMP ||
+	    igmp_hdr(skb)->type != IGMP_MRDISC_ADV)
+		return -ENOMSG;
+
+	br_multicast_mark_router(br, port);
+
+	return 0;
+}
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
@@ -1600,7 +1616,15 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 		} else if (pim_ipv4_all_pim_routers(ip_hdr(skb)->daddr)) {
 			if (ip_hdr(skb)->protocol == IPPROTO_PIM)
 				br_multicast_pim(br, port, skb);
+		} else if (ipv4_is_all_snoopers(ip_hdr(skb)->daddr)) {
+			err = br_ip4_multicast_mrd_rcv(br, port, skb);
+
+			if (err < 0 && err != -ENOMSG) {
+				br_multicast_err_count(br, port, skb->protocol);
+				return err;
+			}
 		}
+
 		return 0;
 	} else if (err < 0) {
 		br_multicast_err_count(br, port, skb->protocol);
@@ -1635,6 +1659,27 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static int br_ip6_multicast_mrd_rcv(struct net_bridge *br,
+				    struct net_bridge_port *port,
+				    struct sk_buff *skb)
+{
+	int ret;
+
+	if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6)
+		return -ENOMSG;
+
+	ret = ipv6_mc_check_icmpv6(skb);
+	if (ret < 0)
+		return ret;
+
+	if (icmp6_hdr(skb)->icmp6_type != ICMPV6_MRDISC_ADV)
+		return -ENOMSG;
+
+	br_multicast_mark_router(br, port);
+
+	return 0;
+}
+
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb,
@@ -1649,6 +1694,16 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	if (err == -ENOMSG) {
 		if (!ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr))
 			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;
+
+		if (ipv6_addr_is_all_snoopers(&ipv6_hdr(skb)->daddr)) {
+			err = br_ip6_multicast_mrd_rcv(br, port, skb);
+
+			if (err < 0 && err != -ENOMSG) {
+				br_multicast_err_count(br, port, skb->protocol);
+				return err;
+			}
+		}
+
 		return 0;
 	} else if (err < 0) {
 		br_multicast_err_count(br, port, skb->protocol);
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index a72ddfc40eb3..55e2ac179f28 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
 	if (skb->len < len || len <= offset)
 		return -EINVAL;
 
+	skb_set_transport_header(skb, offset);
+
 	return 0;
 }
 
@@ -142,7 +144,7 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
 	return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
 }
 
-static int ipv6_mc_check_icmpv6(struct sk_buff *skb)
+int ipv6_mc_check_icmpv6(struct sk_buff *skb)
 {
 	unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr);
 	unsigned int transport_len = ipv6_transport_len(skb);
@@ -161,6 +163,7 @@ static int ipv6_mc_check_icmpv6(struct sk_buff *skb)
 
 	return 0;
 }
+EXPORT_SYMBOL(ipv6_mc_check_icmpv6);
 
 /**
  * ipv6_mc_check_mld - checks whether this is a sane MLD packet
-- 
cgit v1.2.3


From 6815d8b09282c1df8e016bd2fabf25ada6d4462b Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 21 Jan 2019 18:41:39 +0800
Subject: ptp_qoriq: support external trigger stamp FIFO

The external trigger stamp FIFO was introduced as a new feature
for QorIQ 1588 timer IP block. This patch is to support it by
adding a new dts property "fsl,extts-fifo". Any QorIQ 1588 timer
supporting this feature is required to add this property in its
dts node.

In addition, the FIFO should be cleaned up before enabling external
trigger interrupts. Otherwise, there will be interrupts immediately
just after enabling external trigger interrupts.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 68 ++++++++++++++++++++++++++++++++++---------
 include/linux/fsl/ptp_qoriq.h |  3 ++
 2 files changed, 57 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 274321471d50..a2e7702db3a4 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -88,6 +88,49 @@ static void set_fipers(struct qoriq_ptp *qoriq_ptp)
 	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
 }
 
+static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
+			  bool update_event)
+{
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_clock_event event;
+	void __iomem *reg_etts_l;
+	void __iomem *reg_etts_h;
+	u32 valid, stat, lo, hi;
+
+	switch (index) {
+	case 0:
+		valid = ETS1_VLD;
+		reg_etts_l = &regs->etts_regs->tmr_etts1_l;
+		reg_etts_h = &regs->etts_regs->tmr_etts1_h;
+		break;
+	case 1:
+		valid = ETS2_VLD;
+		reg_etts_l = &regs->etts_regs->tmr_etts2_l;
+		reg_etts_h = &regs->etts_regs->tmr_etts2_h;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	event.type = PTP_CLOCK_EXTTS;
+	event.index = index;
+
+	do {
+		lo = qoriq_read(reg_etts_l);
+		hi = qoriq_read(reg_etts_h);
+
+		if (update_event) {
+			event.timestamp = ((u64) hi) << 32;
+			event.timestamp |= lo;
+			ptp_clock_event(qoriq_ptp->clock, &event);
+		}
+
+		stat = qoriq_read(&regs->ctrl_regs->tmr_stat);
+	} while (qoriq_ptp->extts_fifo_support && (stat & valid));
+
+	return 0;
+}
+
 /*
  * Interrupt service routine
  */
@@ -111,24 +154,12 @@ static irqreturn_t isr(int irq, void *priv)
 
 	if (irqs & ETS1) {
 		ack |= ETS1;
-		hi = qoriq_read(&regs->etts_regs->tmr_etts1_h);
-		lo = qoriq_read(&regs->etts_regs->tmr_etts1_l);
-		event.type = PTP_CLOCK_EXTTS;
-		event.index = 0;
-		event.timestamp = ((u64) hi) << 32;
-		event.timestamp |= lo;
-		ptp_clock_event(qoriq_ptp->clock, &event);
+		extts_clean_up(qoriq_ptp, 0, true);
 	}
 
 	if (irqs & ETS2) {
 		ack |= ETS2;
-		hi = qoriq_read(&regs->etts_regs->tmr_etts2_h);
-		lo = qoriq_read(&regs->etts_regs->tmr_etts2_l);
-		event.type = PTP_CLOCK_EXTTS;
-		event.index = 1;
-		event.timestamp = ((u64) hi) << 32;
-		event.timestamp |= lo;
-		ptp_clock_event(qoriq_ptp->clock, &event);
+		extts_clean_up(qoriq_ptp, 1, true);
 	}
 
 	if (irqs & ALM2) {
@@ -278,6 +309,10 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 		default:
 			return -EINVAL;
 		}
+
+		if (on)
+			extts_clean_up(qoriq_ptp, rq->extts.index, false);
+
 		break;
 	case PTP_CLK_REQ_PPS:
 		bit = PP1EN;
@@ -441,6 +476,11 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel))
 		qoriq_ptp->cksel = DEFAULT_CKSEL;
 
+	if (of_property_read_bool(node, "fsl,extts-fifo"))
+		qoriq_ptp->extts_fifo_support = true;
+	else
+		qoriq_ptp->extts_fifo_support = false;
+
 	if (of_property_read_u32(node,
 				 "fsl,tclk-period", &qoriq_ptp->tclk_period) ||
 	    of_property_read_u32(node,
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index c1f003aadcce..43b4b442f6a4 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -120,6 +120,8 @@ struct qoriq_ptp_registers {
 /* Bit definitions for the TMR_STAT register */
 #define STAT_VEC_SHIFT        (0) /* Timer general purpose status vector */
 #define STAT_VEC_MASK         (0x3f)
+#define ETS1_VLD              (1<<24)
+#define ETS2_VLD              (1<<25)
 
 /* Bit definitions for the TMR_PRSC register */
 #define PRSC_OCK_SHIFT        (0) /* Output clock division/prescale factor. */
@@ -141,6 +143,7 @@ struct qoriq_ptp {
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
 	struct resource *rsrc;
+	bool extts_fifo_support;
 	int irq;
 	int phc_index;
 	u64 alarm_interval; /* for periodic alarm */
-- 
cgit v1.2.3


From 19df7510d5cf077c2e88a7690fb7617e6d341beb Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 21 Jan 2019 18:41:42 +0800
Subject: ptp: add debugfs support for ptp_qoriq

This patch is to add debugfs support for ptp_qoriq. Current debugfs
supports to control fiper1/fiper2 loopback mode. If the loopback mode
is enabled, the fiper1/fiper2 pulse is looped back into trigger1/
trigger2 input. This is very useful for validating hardware and driver
without external hardware. Below is an example to enable fiper1 loopback.

echo 1 > /sys/kernel/debug/2d10e00.ptp_clock/fiper1-loopback

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/Kconfig             |   2 +-
 drivers/ptp/Makefile            |   4 +-
 drivers/ptp/ptp_qoriq.c         |   3 ++
 drivers/ptp/ptp_qoriq_debugfs.c | 101 ++++++++++++++++++++++++++++++++++++++++
 include/linux/fsl/ptp_qoriq.h   |  12 +++++
 5 files changed, 120 insertions(+), 2 deletions(-)
 create mode 100644 drivers/ptp/ptp_qoriq_debugfs.c

(limited to 'include')

diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index d137c480db46..aeb4a8b2e0af 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -53,7 +53,7 @@ config PTP_1588_CLOCK_QORIQ
 	  packets using the SO_TIMESTAMPING API.
 
 	  To compile this driver as a module, choose M here: the module
-	  will be called ptp_qoriq.
+	  will be called ptp-qoriq.
 
 config PTP_1588_CLOCK_IXP46X
 	tristate "Intel IXP46x as PTP clock"
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index 19efa9cfa950..677d1d178a3e 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -9,4 +9,6 @@ obj-$(CONFIG_PTP_1588_CLOCK_DTE)	+= ptp_dte.o
 obj-$(CONFIG_PTP_1588_CLOCK_IXP46X)	+= ptp_ixp46x.o
 obj-$(CONFIG_PTP_1588_CLOCK_PCH)	+= ptp_pch.o
 obj-$(CONFIG_PTP_1588_CLOCK_KVM)	+= ptp_kvm.o
-obj-$(CONFIG_PTP_1588_CLOCK_QORIQ)	+= ptp_qoriq.o
+obj-$(CONFIG_PTP_1588_CLOCK_QORIQ)	+= ptp-qoriq.o
+ptp-qoriq-y				+= ptp_qoriq.o
+ptp-qoriq-$(CONFIG_DEBUG_FS)		+= ptp_qoriq_debugfs.o
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index a2e7702db3a4..43416b2e8a13 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -471,6 +471,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 
 	err = -EINVAL;
 
+	qoriq_ptp->dev = &dev->dev;
 	qoriq_ptp->caps = ptp_qoriq_caps;
 
 	if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel))
@@ -572,6 +573,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
 	}
 	qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock);
 
+	ptp_qoriq_create_debugfs(qoriq_ptp);
 	platform_set_drvdata(dev, qoriq_ptp);
 
 	return 0;
@@ -597,6 +599,7 @@ static int qoriq_ptp_remove(struct platform_device *dev)
 	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
 	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
 
+	ptp_qoriq_remove_debugfs(qoriq_ptp);
 	ptp_clock_unregister(qoriq_ptp->clock);
 	iounmap(qoriq_ptp->base);
 	release_resource(qoriq_ptp->rsrc);
diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c
new file mode 100644
index 000000000000..d904332b240d
--- /dev/null
+++ b/drivers/ptp/ptp_qoriq_debugfs.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright 2019 NXP
+ */
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/fsl/ptp_qoriq.h>
+
+static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
+{
+	struct qoriq_ptp *qoriq_ptp = data;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	u32 ctrl;
+
+	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	*val = ctrl & PP1L ? 1 : 0;
+
+	return 0;
+}
+
+static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val)
+{
+	struct qoriq_ptp *qoriq_ptp = data;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	u32 ctrl;
+
+	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	if (val == 0)
+		ctrl &= ~PP1L;
+	else
+		ctrl |= PP1L;
+
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get,
+			ptp_qoriq_fiper1_lpbk_set, "%llu\n");
+
+static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
+{
+	struct qoriq_ptp *qoriq_ptp = data;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	u32 ctrl;
+
+	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	*val = ctrl & PP2L ? 1 : 0;
+
+	return 0;
+}
+
+static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val)
+{
+	struct qoriq_ptp *qoriq_ptp = data;
+	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	u32 ctrl;
+
+	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	if (val == 0)
+		ctrl &= ~PP2L;
+	else
+		ctrl |= PP2L;
+
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get,
+			ptp_qoriq_fiper2_lpbk_set, "%llu\n");
+
+void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+{
+	struct dentry *root;
+
+	root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL);
+	if (IS_ERR(root))
+		return;
+	if (!root)
+		goto err_root;
+
+	qoriq_ptp->debugfs_root = root;
+
+	if (!debugfs_create_file("fiper1-loopback", 0600, root, qoriq_ptp,
+				 &ptp_qoriq_fiper1_fops))
+		goto err_node;
+	if (!debugfs_create_file("fiper2-loopback", 0600, root, qoriq_ptp,
+				 &ptp_qoriq_fiper2_fops))
+		goto err_node;
+	return;
+
+err_node:
+	debugfs_remove_recursive(root);
+	qoriq_ptp->debugfs_root = NULL;
+err_root:
+	dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n");
+}
+
+void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+{
+	debugfs_remove_recursive(qoriq_ptp->debugfs_root);
+	qoriq_ptp->debugfs_root = NULL;
+}
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 43b4b442f6a4..94e9797e434c 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -143,6 +143,8 @@ struct qoriq_ptp {
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
 	struct resource *rsrc;
+	struct dentry *debugfs_root;
+	struct device *dev;
 	bool extts_fifo_support;
 	int irq;
 	int phc_index;
@@ -169,4 +171,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val)
 	iowrite32be(val, addr);
 }
 
+#ifdef CONFIG_DEBUG_FS
+void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp);
+void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp);
+#else
+static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+{ }
+static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+{ }
+#endif
+
 #endif
-- 
cgit v1.2.3


From f57f3df03a8e6010e321fa0258d3e054713c3cb7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Sun, 13 Jan 2019 09:50:33 +0100
Subject: ALSA: pcm: More fine-grained PCM link locking

We have currently two global locks, a rwlock and a rwsem, that are
used for managing linking the PCM streams.  Due to these global locks,
once when a linked stream is used, the lock granularity suffers a
lot.

This patch attempts to eliminate the former global lock for atomic
ops.  The latter rwsem needs remaining because of the loosy way of the
loop calls in snd_pcm_action_nonatomic(), as well as for avoiding the
deadlock at linking.  However, these are used far rarely, actually
only by two actions (prepare and  reset), where both are no timing
critical ones.  So this can be still seen as a good improvement.

The basic strategy to eliminate the rwlock is to assure group->lock at
adding or removing a stream to / from the group.  Since we already
takes the group lock whenever taking the all substream locks under the
group, this shouldn't be a big problem.  The reference to group
pointer in snd_pcm_substream object is protected by the stream lock
itself.

However, there are still pitfalls: a race window at re-locking and the
lifecycle of group object.  The former is a small race window for
dereferencing the substream group object opened while snd_pcm_action()
performs re-locking to avoid ABBA deadlocks.  This includes the unlink
of group during that window, too.  And the latter is the kfree
performed after all streams are removed from the group while it's
still dereferenced.

For addressing these corner cases, two new tricks are introduced:
- After re-locking, the group assigned to the stream is checked again;
  if the group is changed, we retry the whole procedure.
- Introduce a refcount to snd_pcm_group object, so that it's freed
  only when it's empty and really no one refers to it.

(Some readers might wonder why not RCU for the latter.  RCU in this
case would cost more than refcounting, unfortunately.  We take the
group lock sooner or later, hence the performance improvement by RCU
would be negligible.  Meanwhile, because we need to deal with
schedulable context depending on the pcm->nonatomic flag, it'll become
dynamic RCU/SRCU switch, and the grace period may become too long.)

Along with these changes, there are a significant amount of code
refactoring.  The complex group re-lock & ref code is factored out to
snd_pcm_stream_group_ref() function, for example.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |   2 +
 sound/core/pcm_native.c | 166 +++++++++++++++++++++++++++++++++++-------------
 2 files changed, 124 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index e1c747c70883..3bde24575a99 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -30,6 +30,7 @@
 #include <linux/mm.h>
 #include <linux/bitops.h>
 #include <linux/pm_qos.h>
+#include <linux/refcount.h>
 
 #define snd_pcm_substream_chip(substream) ((substream)->private_data)
 #define snd_pcm_chip(pcm) ((pcm)->private_data)
@@ -439,6 +440,7 @@ struct snd_pcm_group {		/* keep linked substreams */
 	spinlock_t lock;
 	struct mutex mutex;
 	struct list_head substreams;
+	refcount_t refs;
 };
 
 struct pid;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index fb45386270d5..cbde23fc67a9 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -85,7 +85,6 @@ static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream);
  *
  */
 
-static DEFINE_RWLOCK(snd_pcm_link_rwlock);
 static DECLARE_RWSEM(snd_pcm_link_rwsem);
 
 /* Writer in rwsem may block readers even during its waiting in queue,
@@ -105,8 +104,24 @@ void snd_pcm_group_init(struct snd_pcm_group *group)
 	spin_lock_init(&group->lock);
 	mutex_init(&group->mutex);
 	INIT_LIST_HEAD(&group->substreams);
+	refcount_set(&group->refs, 0);
 }
 
+/* define group lock helpers */
+#define DEFINE_PCM_GROUP_LOCK(action, mutex_action) \
+static void snd_pcm_group_ ## action(struct snd_pcm_group *group, bool nonatomic) \
+{ \
+	if (nonatomic) \
+		mutex_ ## mutex_action(&group->mutex); \
+	else \
+		spin_ ## action(&group->lock); \
+}
+
+DEFINE_PCM_GROUP_LOCK(lock, lock);
+DEFINE_PCM_GROUP_LOCK(unlock, unlock);
+DEFINE_PCM_GROUP_LOCK(lock_irq, lock);
+DEFINE_PCM_GROUP_LOCK(unlock_irq, unlock);
+
 #define PCM_LOCK_DEFAULT	0
 #define PCM_LOCK_IRQ	1
 #define PCM_LOCK_IRQSAVE	2
@@ -116,21 +131,19 @@ static unsigned long __snd_pcm_stream_lock_mode(struct snd_pcm_substream *substr
 {
 	unsigned long flags = 0;
 	if (substream->pcm->nonatomic) {
-		down_read_nested(&snd_pcm_link_rwsem, SINGLE_DEPTH_NESTING);
 		mutex_lock(&substream->self_group.mutex);
 	} else {
 		switch (mode) {
 		case PCM_LOCK_DEFAULT:
-			read_lock(&snd_pcm_link_rwlock);
+			spin_lock(&substream->self_group.lock);
 			break;
 		case PCM_LOCK_IRQ:
-			read_lock_irq(&snd_pcm_link_rwlock);
+			spin_lock_irq(&substream->self_group.lock);
 			break;
 		case PCM_LOCK_IRQSAVE:
-			read_lock_irqsave(&snd_pcm_link_rwlock, flags);
+			spin_lock_irqsave(&substream->self_group.lock, flags);
 			break;
 		}
-		spin_lock(&substream->self_group.lock);
 	}
 	return flags;
 }
@@ -140,19 +153,16 @@ static void __snd_pcm_stream_unlock_mode(struct snd_pcm_substream *substream,
 {
 	if (substream->pcm->nonatomic) {
 		mutex_unlock(&substream->self_group.mutex);
-		up_read(&snd_pcm_link_rwsem);
 	} else {
-		spin_unlock(&substream->self_group.lock);
-
 		switch (mode) {
 		case PCM_LOCK_DEFAULT:
-			read_unlock(&snd_pcm_link_rwlock);
+			spin_unlock(&substream->self_group.lock);
 			break;
 		case PCM_LOCK_IRQ:
-			read_unlock_irq(&snd_pcm_link_rwlock);
+			spin_unlock_irq(&substream->self_group.lock);
 			break;
 		case PCM_LOCK_IRQSAVE:
-			read_unlock_irqrestore(&snd_pcm_link_rwlock, flags);
+			spin_unlock_irqrestore(&substream->self_group.lock, flags);
 			break;
 		}
 	}
@@ -1138,6 +1148,61 @@ static void snd_pcm_group_assign(struct snd_pcm_substream *substream,
 	list_move(&substream->link_list, &new_group->substreams);
 }
 
+/*
+ * Unref and unlock the group, but keep the stream lock;
+ * when the group becomes empty and no longer referred, destroy itself
+ */
+static void snd_pcm_group_unref(struct snd_pcm_group *group,
+				struct snd_pcm_substream *substream)
+{
+	bool do_free;
+
+	if (!group)
+		return;
+	do_free = refcount_dec_and_test(&group->refs) &&
+		list_empty(&group->substreams);
+	snd_pcm_group_unlock(group, substream->pcm->nonatomic);
+	if (do_free)
+		kfree(group);
+}
+
+/*
+ * Lock the group inside a stream lock and reference it;
+ * return the locked group object, or NULL if not linked
+ */
+static struct snd_pcm_group *
+snd_pcm_stream_group_ref(struct snd_pcm_substream *substream)
+{
+	bool nonatomic = substream->pcm->nonatomic;
+	struct snd_pcm_group *group;
+	bool trylock;
+
+	for (;;) {
+		if (!snd_pcm_stream_linked(substream))
+			return NULL;
+		group = substream->group;
+		/* block freeing the group object */
+		refcount_inc(&group->refs);
+
+		trylock = nonatomic ? mutex_trylock(&group->mutex) :
+			spin_trylock(&group->lock);
+		if (trylock)
+			break; /* OK */
+
+		/* re-lock for avoiding ABBA deadlock */
+		snd_pcm_stream_unlock(substream);
+		snd_pcm_group_lock(group, nonatomic);
+		snd_pcm_stream_lock(substream);
+
+		/* check the group again; the above opens a small race window */
+		if (substream->group == group)
+			break; /* OK */
+		/* group changed, try again */
+		snd_pcm_group_unref(group, substream);
+	}
+	return group;
+}
+
 /*
  *  Note: call with stream lock
  */
@@ -1145,28 +1210,15 @@ static int snd_pcm_action(const struct action_ops *ops,
 			  struct snd_pcm_substream *substream,
 			  int state)
 {
+	struct snd_pcm_group *group;
 	int res;
 
-	if (!snd_pcm_stream_linked(substream))
-		return snd_pcm_action_single(ops, substream, state);
-
-	if (substream->pcm->nonatomic) {
-		if (!mutex_trylock(&substream->group->mutex)) {
-			mutex_unlock(&substream->self_group.mutex);
-			mutex_lock(&substream->group->mutex);
-			mutex_lock(&substream->self_group.mutex);
-		}
+	group = snd_pcm_stream_group_ref(substream);
+	if (group)
 		res = snd_pcm_action_group(ops, substream, state, 1);
-		mutex_unlock(&substream->group->mutex);
-	} else {
-		if (!spin_trylock(&substream->group->lock)) {
-			spin_unlock(&substream->self_group.lock);
-			spin_lock(&substream->group->lock);
-			spin_lock(&substream->self_group.lock);
-		}
-		res = snd_pcm_action_group(ops, substream, state, 1);
-		spin_unlock(&substream->group->lock);
-	}
+	else
+		res = snd_pcm_action_single(ops, substream, state);
+	snd_pcm_group_unref(group, substream);
 	return res;
 }
 
@@ -1193,6 +1245,7 @@ static int snd_pcm_action_nonatomic(const struct action_ops *ops,
 {
 	int res;
 
+	/* Guarantee the group members won't change during non-atomic action */
 	down_read(&snd_pcm_link_rwsem);
 	if (snd_pcm_stream_linked(substream))
 		res = snd_pcm_action_group(ops, substream, state, 0);
@@ -1821,6 +1874,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	struct snd_card *card;
 	struct snd_pcm_runtime *runtime;
 	struct snd_pcm_substream *s;
+	struct snd_pcm_group *group;
 	wait_queue_entry_t wait;
 	int result = 0;
 	int nonblock = 0;
@@ -1837,7 +1891,6 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 	} else if (substream->f_flags & O_NONBLOCK)
 		nonblock = 1;
 
-	down_read(&snd_pcm_link_rwsem);
 	snd_pcm_stream_lock_irq(substream);
 	/* resume pause */
 	if (runtime->status->state == SNDRV_PCM_STATE_PAUSED)
@@ -1862,6 +1915,7 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 		}
 		/* find a substream to drain */
 		to_check = NULL;
+		group = snd_pcm_stream_group_ref(substream);
 		snd_pcm_group_for_each_entry(s, substream) {
 			if (s->stream != SNDRV_PCM_STREAM_PLAYBACK)
 				continue;
@@ -1871,12 +1925,12 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 				break;
 			}
 		}
+		snd_pcm_group_unref(group, substream);
 		if (!to_check)
 			break; /* all drained */
 		init_waitqueue_entry(&wait, current);
 		add_wait_queue(&to_check->sleep, &wait);
 		snd_pcm_stream_unlock_irq(substream);
-		up_read(&snd_pcm_link_rwsem);
 		if (runtime->no_period_wakeup)
 			tout = MAX_SCHEDULE_TIMEOUT;
 		else {
@@ -1888,9 +1942,17 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 			tout = msecs_to_jiffies(tout * 1000);
 		}
 		tout = schedule_timeout_interruptible(tout);
-		down_read(&snd_pcm_link_rwsem);
+
 		snd_pcm_stream_lock_irq(substream);
-		remove_wait_queue(&to_check->sleep, &wait);
+		group = snd_pcm_stream_group_ref(substream);
+		snd_pcm_group_for_each_entry(s, substream) {
+			if (s->runtime == to_check) {
+				remove_wait_queue(&to_check->sleep, &wait);
+				break;
+			}
+		}
+		snd_pcm_group_unref(group, substream);
+
 		if (card->shutdown) {
 			result = -ENODEV;
 			break;
@@ -1910,7 +1972,6 @@ static int snd_pcm_drain(struct snd_pcm_substream *substream,
 
  unlock:
 	snd_pcm_stream_unlock_irq(substream);
-	up_read(&snd_pcm_link_rwsem);
 
 	return result;
 }
@@ -1972,7 +2033,8 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 	int res = 0;
 	struct snd_pcm_file *pcm_file;
 	struct snd_pcm_substream *substream1;
-	struct snd_pcm_group *group;
+	struct snd_pcm_group *group, *target_group;
+	bool nonatomic = substream->pcm->nonatomic;
 	struct fd f = fdget(fd);
 
 	if (!f.file)
@@ -1989,8 +2051,8 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 		goto _nolock;
 	}
 	snd_pcm_group_init(group);
+
 	down_write_nonfifo(&snd_pcm_link_rwsem);
-	write_lock_irq(&snd_pcm_link_rwlock);
 	if (substream->runtime->status->state == SNDRV_PCM_STATE_OPEN ||
 	    substream->runtime->status->state != substream1->runtime->status->state ||
 	    substream->pcm->nonatomic != substream1->pcm->nonatomic) {
@@ -2001,13 +2063,21 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 		res = -EALREADY;
 		goto _end;
 	}
+
+	snd_pcm_stream_lock_irq(substream);
 	if (!snd_pcm_stream_linked(substream)) {
 		snd_pcm_group_assign(substream, group);
-		group = NULL;
+		group = NULL; /* assigned, don't free this one below */
 	}
-	snd_pcm_group_assign(substream1, substream->group);
+	target_group = substream->group;
+	snd_pcm_stream_unlock_irq(substream);
+
+	snd_pcm_group_lock_irq(target_group, nonatomic);
+	snd_pcm_stream_lock(substream1);
+	snd_pcm_group_assign(substream1, target_group);
+	snd_pcm_stream_unlock(substream1);
+	snd_pcm_group_unlock_irq(target_group, nonatomic);
  _end:
-	write_unlock_irq(&snd_pcm_link_rwlock);
 	up_write(&snd_pcm_link_rwsem);
  _nolock:
 	kfree(group);
@@ -2018,22 +2088,27 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd)
 
 static void relink_to_local(struct snd_pcm_substream *substream)
 {
+	snd_pcm_stream_lock(substream);
 	snd_pcm_group_assign(substream, &substream->self_group);
+	snd_pcm_stream_unlock(substream);
 }
 
 static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_group *group;
+	bool nonatomic = substream->pcm->nonatomic;
+	bool do_free = false;
 	int res = 0;
 
 	down_write_nonfifo(&snd_pcm_link_rwsem);
-	write_lock_irq(&snd_pcm_link_rwlock);
+
 	if (!snd_pcm_stream_linked(substream)) {
 		res = -EALREADY;
 		goto _end;
 	}
 
 	group = substream->group;
+	snd_pcm_group_lock_irq(group, nonatomic);
 
 	relink_to_local(substream);
 
@@ -2042,11 +2117,14 @@ static int snd_pcm_unlink(struct snd_pcm_substream *substream)
 		relink_to_local(list_first_entry(&group->substreams,
 						 struct snd_pcm_substream,
 						 link_list));
-		kfree(group);
+		do_free = !refcount_read(&group->refs);
 	}
 
+	snd_pcm_group_unlock_irq(group, nonatomic);
+	if (do_free)
+		kfree(group);
+
        _end:
-	write_unlock_irq(&snd_pcm_link_rwlock);
 	up_write(&snd_pcm_link_rwsem);
 	return res;
 }
-- 
cgit v1.2.3


From 51eea52d26d4939b788b7244c28cf47e902b4c4c Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 16 Jan 2019 16:13:31 +0100
Subject: pxa2xx: replace spi_master with spi_controller

It's also a slave controller driver now, calling it "master" is slightly
misleading.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/spi/pxa2xx       |  10 +--
 arch/arm/mach-pxa/cm-x255.c    |   2 +-
 arch/arm/mach-pxa/cm-x270.c    |   2 +-
 arch/arm/mach-pxa/corgi.c      |   2 +-
 arch/arm/mach-pxa/devices.c    |   2 +-
 arch/arm/mach-pxa/em-x270.c    |   4 +-
 arch/arm/mach-pxa/hx4700.c     |   2 +-
 arch/arm/mach-pxa/icontrol.c   |   4 +-
 arch/arm/mach-pxa/littleton.c  |   2 +-
 arch/arm/mach-pxa/lubbock.c    |   2 +-
 arch/arm/mach-pxa/magician.c   |   2 +-
 arch/arm/mach-pxa/pcm027.c     |   2 +-
 arch/arm/mach-pxa/poodle.c     |   2 +-
 arch/arm/mach-pxa/spitz.c      |   2 +-
 arch/arm/mach-pxa/stargate2.c  |   6 +-
 arch/arm/mach-pxa/tosa.c       |   2 +-
 arch/arm/mach-pxa/z2.c         |   4 +-
 arch/arm/mach-pxa/zeus.c       |   2 +-
 drivers/spi/spi-pxa2xx-dma.c   |  58 +++++++--------
 drivers/spi/spi-pxa2xx-pci.c   |   4 +-
 drivers/spi/spi-pxa2xx.c       | 156 ++++++++++++++++++++---------------------
 drivers/spi/spi-pxa2xx.h       |   4 +-
 include/linux/spi/pxa2xx_spi.h |   4 +-
 23 files changed, 140 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
index 13a0b7fb192f..551325b66b23 100644
--- a/Documentation/spi/pxa2xx
+++ b/Documentation/spi/pxa2xx
@@ -21,15 +21,15 @@ Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
 "platform device".  The master configuration is passed to the driver via a table
 found in include/linux/spi/pxa2xx_spi.h:
 
-struct pxa2xx_spi_master {
+struct pxa2xx_spi_controller {
 	u16 num_chipselect;
 	u8 enable_dma;
 };
 
-The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of
+The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of
 slave device (chips) attached to this SPI master.
 
-The "pxa2xx_spi_master.enable_dma" field informs the driver that SSP DMA should
+The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should
 be used.  This caused the driver to acquire two DMA channels: rx_channel and
 tx_channel.  The rx_channel has a higher DMA service priority the tx_channel.
 See the "PXA2xx Developer Manual" section "DMA Controller".
@@ -51,7 +51,7 @@ static struct resource pxa_spi_nssp_resources[] = {
 	},
 };
 
-static struct pxa2xx_spi_master pxa_nssp_master_info = {
+static struct pxa2xx_spi_controller pxa_nssp_master_info = {
 	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
 	.enable_dma = 1, /* Enables NSSP DMA */
 };
@@ -206,7 +206,7 @@ DMA and PIO I/O Support
 -----------------------
 The pxa2xx_spi driver supports both DMA and interrupt driven PIO message
 transfers.  The driver defaults to PIO mode and DMA transfers must be enabled
-by setting the "enable_dma" flag in the "pxa2xx_spi_master" structure.  The DMA
+by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure.  The DMA
 mode supports both coherent and stream based DMA mappings.
 
 The following logic is used to determine the type of I/O to be used on
diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c
index fa8e7dd4d898..4401dfcd7e68 100644
--- a/arch/arm/mach-pxa/cm-x255.c
+++ b/arch/arm/mach-pxa/cm-x255.c
@@ -98,7 +98,7 @@ static unsigned long cmx255_pin_config[] = {
 };
 
 #if defined(CONFIG_SPI_PXA2XX)
-static struct pxa2xx_spi_master pxa_ssp_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/cm-x270.c b/arch/arm/mach-pxa/cm-x270.c
index f7081a50dc67..279eeca7add0 100644
--- a/arch/arm/mach-pxa/cm-x270.c
+++ b/arch/arm/mach-pxa/cm-x270.c
@@ -313,7 +313,7 @@ static inline void cmx270_init_mmc(void) {}
 #endif
 
 #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
-static struct pxa2xx_spi_master cm_x270_spi_info = {
+static struct pxa2xx_spi_controller cm_x270_spi_info = {
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index c9732cace5e3..7ecf559bd71c 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -530,7 +530,7 @@ static struct pxa2xx_udc_mach_info udc_info __initdata = {
 };
 
 #if IS_ENABLED(CONFIG_SPI_PXA2XX)
-static struct pxa2xx_spi_master corgi_spi_info = {
+static struct pxa2xx_spi_controller corgi_spi_info = {
 	.num_chipselect	= 3,
 };
 
diff --git a/arch/arm/mach-pxa/devices.c b/arch/arm/mach-pxa/devices.c
index a24783a03827..524d6093e0c7 100644
--- a/arch/arm/mach-pxa/devices.c
+++ b/arch/arm/mach-pxa/devices.c
@@ -1065,7 +1065,7 @@ struct platform_device pxa93x_device_gpio = {
 
 /* pxa2xx-spi platform-device ID equals respective SSP platform-device ID + 1.
  * See comment in arch/arm/mach-pxa/ssp.c::ssp_probe() */
-void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info)
+void __init pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info)
 {
 	struct platform_device *pd;
 
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 32c1edeb3f14..5e372760f16e 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -689,7 +689,7 @@ static inline void em_x270_init_lcd(void) {}
 #endif
 
 #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
-static struct pxa2xx_spi_master em_x270_spi_info = {
+static struct pxa2xx_spi_controller em_x270_spi_info = {
 	.num_chipselect	= 1,
 };
 
@@ -703,7 +703,7 @@ static struct tdo24m_platform_data em_x270_tdo24m_pdata = {
 	.model = TDO35S,
 };
 
-static struct pxa2xx_spi_master em_x270_spi_2_info = {
+static struct pxa2xx_spi_controller em_x270_spi_2_info = {
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index b79b757fdd41..c3b47557b3f2 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -629,7 +629,7 @@ static struct spi_board_info tsc2046_board_info[] __initdata = {
 	},
 };
 
-static struct pxa2xx_spi_master pxa_ssp2_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 	.num_chipselect = 1,
 	.enable_dma     = 1,
 };
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index cbaf4f6edcda..7e30452e3840 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -115,12 +115,12 @@ static struct spi_board_info mcp251x_board_info[] = {
 	}
 };
 
-static struct pxa2xx_spi_master pxa_ssp3_spi_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp3_spi_master_info = {
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
 
-static struct pxa2xx_spi_master pxa_ssp4_spi_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp4_spi_master_info = {
 	.num_chipselect = 2,
 	.enable_dma     = 1
 };
diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c
index 39db4898dc4a..464b8bd2bcb9 100644
--- a/arch/arm/mach-pxa/littleton.c
+++ b/arch/arm/mach-pxa/littleton.c
@@ -191,7 +191,7 @@ static inline void littleton_init_lcd(void) {};
 #endif /* CONFIG_FB_PXA || CONFIG_FB_PXA_MODULE */
 
 #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
-static struct pxa2xx_spi_master littleton_spi_info = {
+static struct pxa2xx_spi_controller littleton_spi_info = {
 	.num_chipselect		= 1,
 };
 
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index a1391e113ef4..c1bd0d544981 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -197,7 +197,7 @@ static struct platform_device sa1111_device = {
  * (to J5) and poking board registers (as done below).  Else it's only useful
  * for the temperature sensors.
  */
-static struct pxa2xx_spi_master pxa_ssp_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 08b079653c3f..618bcff4cdc9 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -932,7 +932,7 @@ struct pxa2xx_spi_chip tsc2046_chip_info = {
 	.gpio_cs	= GPIO14_MAGICIAN_TSC2046_CS,
 };
 
-static struct pxa2xx_spi_master magician_spi_info = {
+static struct pxa2xx_spi_controller magician_spi_info = {
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
diff --git a/arch/arm/mach-pxa/pcm027.c b/arch/arm/mach-pxa/pcm027.c
index ccca9f7575c3..e2e613449660 100644
--- a/arch/arm/mach-pxa/pcm027.c
+++ b/arch/arm/mach-pxa/pcm027.c
@@ -132,7 +132,7 @@ static struct platform_device smc91x_device = {
 /*
  * SPI host and devices
  */
-static struct pxa2xx_spi_master pxa_ssp_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index c2a43d4cfd3e..9450a523cd0b 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -196,7 +196,7 @@ struct platform_device poodle_locomo_device = {
 EXPORT_SYMBOL(poodle_locomo_device);
 
 #if defined(CONFIG_SPI_PXA2XX) || defined(CONFIG_SPI_PXA2XX_MODULE)
-static struct pxa2xx_spi_master poodle_spi_info = {
+static struct pxa2xx_spi_controller poodle_spi_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 306818e2cf54..8dac824a85df 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -572,7 +572,7 @@ static struct spi_board_info spitz_spi_devices[] = {
 	},
 };
 
-static struct pxa2xx_spi_master spitz_spi_info = {
+static struct pxa2xx_spi_controller spitz_spi_info = {
 	.num_chipselect	= 3,
 };
 
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index e0d6c872270a..c28d19b126a7 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -337,15 +337,15 @@ static struct platform_device stargate2_flash_device = {
 	.num_resources = 1,
 };
 
-static struct pxa2xx_spi_master pxa_ssp_master_0_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_0_info = {
 	.num_chipselect = 1,
 };
 
-static struct pxa2xx_spi_master pxa_ssp_master_1_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_1_info = {
 	.num_chipselect = 1,
 };
 
-static struct pxa2xx_spi_master pxa_ssp_master_2_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_2_info = {
 	.num_chipselect = 1,
 };
 
diff --git a/arch/arm/mach-pxa/tosa.c b/arch/arm/mach-pxa/tosa.c
index e8a93c088c35..7439798d58e4 100644
--- a/arch/arm/mach-pxa/tosa.c
+++ b/arch/arm/mach-pxa/tosa.c
@@ -813,7 +813,7 @@ static struct platform_device tosa_bt_device = {
 	.dev.platform_data = &tosa_bt_data,
 };
 
-static struct pxa2xx_spi_master pxa_ssp_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp_master_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c
index e2353e75bb28..ad082e11e2a4 100644
--- a/arch/arm/mach-pxa/z2.c
+++ b/arch/arm/mach-pxa/z2.c
@@ -607,12 +607,12 @@ static struct spi_board_info spi_board_info[] __initdata = {
 },
 };
 
-static struct pxa2xx_spi_master pxa_ssp1_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp1_master_info = {
 	.num_chipselect	= 1,
 	.enable_dma	= 1,
 };
 
-static struct pxa2xx_spi_master pxa_ssp2_master_info = {
+static struct pxa2xx_spi_controller pxa_ssp2_master_info = {
 	.num_chipselect	= 1,
 };
 
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index c411f79d4cb5..bdbcf46595f9 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -391,7 +391,7 @@ static struct platform_device zeus_sram_device = {
 };
 
 /* SPI interface on SSP3 */
-static struct pxa2xx_spi_master pxa2xx_spi_ssp3_master_info = {
+static struct pxa2xx_spi_controller pxa2xx_spi_ssp3_master_info = {
 	.num_chipselect = 1,
 	.enable_dma     = 1,
 };
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index 2fa7f4b43492..15592598273e 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -23,7 +23,7 @@
 static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 					     bool error)
 {
-	struct spi_message *msg = drv_data->master->cur_msg;
+	struct spi_message *msg = drv_data->controller->cur_msg;
 
 	/*
 	 * It is possible that one CPU is handling ROR interrupt and other
@@ -59,7 +59,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 			msg->status = -EIO;
 		}
 
-		spi_finalize_current_transfer(drv_data->master);
+		spi_finalize_current_transfer(drv_data->controller);
 	}
 }
 
@@ -74,7 +74,7 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 			   struct spi_transfer *xfer)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
 	struct dma_chan *chan;
@@ -102,14 +102,14 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 		cfg.dst_maxburst = chip->dma_burst_size;
 
 		sgt = &xfer->tx_sg;
-		chan = drv_data->master->dma_tx;
+		chan = drv_data->controller->dma_tx;
 	} else {
 		cfg.src_addr = drv_data->ssdr_physical;
 		cfg.src_addr_width = width;
 		cfg.src_maxburst = chip->dma_burst_size;
 
 		sgt = &xfer->rx_sg;
-		chan = drv_data->master->dma_rx;
+		chan = drv_data->controller->dma_rx;
 	}
 
 	ret = dmaengine_slave_config(chan, &cfg);
@@ -130,8 +130,8 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data)
 	if (status & SSSR_ROR) {
 		dev_err(&drv_data->pdev->dev, "FIFO overrun\n");
 
-		dmaengine_terminate_async(drv_data->master->dma_rx);
-		dmaengine_terminate_async(drv_data->master->dma_tx);
+		dmaengine_terminate_async(drv_data->controller->dma_rx);
+		dmaengine_terminate_async(drv_data->controller->dma_tx);
 
 		pxa2xx_spi_dma_transfer_complete(drv_data, true);
 		return IRQ_HANDLED;
@@ -171,15 +171,15 @@ int pxa2xx_spi_dma_prepare(struct driver_data *drv_data,
 	return 0;
 
 err_rx:
-	dmaengine_terminate_async(drv_data->master->dma_tx);
+	dmaengine_terminate_async(drv_data->controller->dma_tx);
 err_tx:
 	return err;
 }
 
 void pxa2xx_spi_dma_start(struct driver_data *drv_data)
 {
-	dma_async_issue_pending(drv_data->master->dma_rx);
-	dma_async_issue_pending(drv_data->master->dma_tx);
+	dma_async_issue_pending(drv_data->controller->dma_rx);
+	dma_async_issue_pending(drv_data->controller->dma_tx);
 
 	atomic_set(&drv_data->dma_running, 1);
 }
@@ -187,30 +187,30 @@ void pxa2xx_spi_dma_start(struct driver_data *drv_data)
 void pxa2xx_spi_dma_stop(struct driver_data *drv_data)
 {
 	atomic_set(&drv_data->dma_running, 0);
-	dmaengine_terminate_sync(drv_data->master->dma_rx);
-	dmaengine_terminate_sync(drv_data->master->dma_tx);
+	dmaengine_terminate_sync(drv_data->controller->dma_rx);
+	dmaengine_terminate_sync(drv_data->controller->dma_tx);
 }
 
 int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 {
-	struct pxa2xx_spi_master *pdata = drv_data->master_info;
+	struct pxa2xx_spi_controller *pdata = drv_data->controller_info;
 	struct device *dev = &drv_data->pdev->dev;
-	struct spi_controller *master = drv_data->master;
+	struct spi_controller *controller = drv_data->controller;
 	dma_cap_mask_t mask;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	master->dma_tx = dma_request_slave_channel_compat(mask,
+	controller->dma_tx = dma_request_slave_channel_compat(mask,
 				pdata->dma_filter, pdata->tx_param, dev, "tx");
-	if (!master->dma_tx)
+	if (!controller->dma_tx)
 		return -ENODEV;
 
-	master->dma_rx = dma_request_slave_channel_compat(mask,
+	controller->dma_rx = dma_request_slave_channel_compat(mask,
 				pdata->dma_filter, pdata->rx_param, dev, "rx");
-	if (!master->dma_rx) {
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	if (!controller->dma_rx) {
+		dma_release_channel(controller->dma_tx);
+		controller->dma_tx = NULL;
 		return -ENODEV;
 	}
 
@@ -219,17 +219,17 @@ int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 
 void pxa2xx_spi_dma_release(struct driver_data *drv_data)
 {
-	struct spi_controller *master = drv_data->master;
+	struct spi_controller *controller = drv_data->controller;
 
-	if (master->dma_rx) {
-		dmaengine_terminate_sync(master->dma_rx);
-		dma_release_channel(master->dma_rx);
-		master->dma_rx = NULL;
+	if (controller->dma_rx) {
+		dmaengine_terminate_sync(controller->dma_rx);
+		dma_release_channel(controller->dma_rx);
+		controller->dma_rx = NULL;
 	}
-	if (master->dma_tx) {
-		dmaengine_terminate_sync(master->dma_tx);
-		dma_release_channel(master->dma_tx);
-		master->dma_tx = NULL;
+	if (controller->dma_tx) {
+		dmaengine_terminate_sync(controller->dma_tx);
+		dma_release_channel(controller->dma_tx);
+		controller->dma_tx = NULL;
 	}
 }
 
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 869f188b02eb..1727fdfbac28 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -197,7 +197,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	struct platform_device_info pi;
 	int ret;
 	struct platform_device *pdev;
-	struct pxa2xx_spi_master spi_pdata;
+	struct pxa2xx_spi_controller spi_pdata;
 	struct ssp_device *ssp;
 	struct pxa_spi_info *c;
 	char buf[40];
@@ -265,7 +265,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
 {
 	struct platform_device *pdev = pci_get_drvdata(dev);
-	struct pxa2xx_spi_master *spi_pdata;
+	struct pxa2xx_spi_controller *spi_pdata;
 
 	spi_pdata = dev_get_platdata(&pdev->dev);
 
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index d84b893a64d7..69e874a2ad1e 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -328,7 +328,7 @@ static void lpss_ssp_setup(struct driver_data *drv_data)
 	__lpss_ssp_write_priv(drv_data, config->reg_cs_ctrl, value);
 
 	/* Enable multiblock DMA transfers */
-	if (drv_data->master_info->enable_dma) {
+	if (drv_data->controller_info->enable_dma) {
 		__lpss_ssp_write_priv(drv_data, config->reg_ssp, 1);
 
 		if (config->reg_general >= 0) {
@@ -368,7 +368,7 @@ static void lpss_ssp_select_cs(struct spi_device *spi,
 		__lpss_ssp_write_priv(drv_data,
 				      config->reg_cs_ctrl, value);
 		ndelay(1000000000 /
-		       (drv_data->master->max_speed_hz / 2));
+		       (drv_data->controller->max_speed_hz / 2));
 	}
 }
 
@@ -567,7 +567,7 @@ static int u32_reader(struct driver_data *drv_data)
 static void reset_sccr1(struct driver_data *drv_data)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	u32 sccr1_reg;
 
 	sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1;
@@ -599,8 +599,8 @@ static void int_error_stop(struct driver_data *drv_data, const char* msg)
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-	drv_data->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(drv_data->master);
+	drv_data->controller->cur_msg->status = -EIO;
+	spi_finalize_current_transfer(drv_data->controller);
 }
 
 static void int_transfer_complete(struct driver_data *drv_data)
@@ -611,7 +611,7 @@ static void int_transfer_complete(struct driver_data *drv_data)
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 
-	spi_finalize_current_transfer(drv_data->master);
+	spi_finalize_current_transfer(drv_data->controller);
 }
 
 static irqreturn_t interrupt_transfer(struct driver_data *drv_data)
@@ -747,7 +747,7 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg & ~drv_data->int_cr1);
 	pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg);
 
-	if (!drv_data->master->cur_msg) {
+	if (!drv_data->controller->cur_msg) {
 		handle_bad_msg(drv_data);
 		/* Never fail */
 		return IRQ_HANDLED;
@@ -879,7 +879,7 @@ static unsigned int quark_x1000_get_clk_div(int rate, u32 *dds)
 
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
-	unsigned long ssp_clk = drv_data->master->max_speed_hz;
+	unsigned long ssp_clk = drv_data->controller->max_speed_hz;
 	const struct ssp_device *ssp = drv_data->ssp;
 
 	rate = min_t(int, ssp_clk, rate);
@@ -894,7 +894,7 @@ static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 					   int rate)
 {
 	struct chip_data *chip =
-		spi_get_ctldata(drv_data->master->cur_msg->spi);
+		spi_get_ctldata(drv_data->controller->cur_msg->spi);
 	unsigned int clk_div;
 
 	switch (drv_data->ssp_type) {
@@ -908,7 +908,7 @@ static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 	return clk_div << 8;
 }
 
-static bool pxa2xx_spi_can_dma(struct spi_controller *master,
+static bool pxa2xx_spi_can_dma(struct spi_controller *controller,
 			       struct spi_device *spi,
 			       struct spi_transfer *xfer)
 {
@@ -919,12 +919,12 @@ static bool pxa2xx_spi_can_dma(struct spi_controller *master,
 	       xfer->len >= chip->dma_burst_size;
 }
 
-static int pxa2xx_spi_transfer_one(struct spi_controller *master,
+static int pxa2xx_spi_transfer_one(struct spi_controller *controller,
 				   struct spi_device *spi,
 				   struct spi_transfer *transfer)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
-	struct spi_message *message = master->cur_msg;
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
+	struct spi_message *message = controller->cur_msg;
 	struct chip_data *chip = spi_get_ctldata(message->spi);
 	u32 dma_thresh = chip->dma_threshold;
 	u32 dma_burst = chip->dma_burst_size;
@@ -1006,9 +1006,9 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 					     "DMA burst size reduced to match bits_per_word\n");
 	}
 
-	dma_mapped = master->can_dma &&
-		     master->can_dma(master, message->spi, transfer) &&
-		     master->cur_msg_mapped;
+	dma_mapped = controller->can_dma &&
+		     controller->can_dma(controller, message->spi, transfer) &&
+		     controller->cur_msg_mapped;
 	if (dma_mapped) {
 
 		/* Ensure we have the correct interrupt handler */
@@ -1036,12 +1036,12 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 	cr0 = pxa2xx_configure_sscr0(drv_data, clk_div, bits);
 	if (!pxa25x_ssp_comp(drv_data))
 		dev_dbg(&message->spi->dev, "%u Hz actual, %s\n",
-			master->max_speed_hz
+			controller->max_speed_hz
 				/ (1 + ((cr0 & SSCR0_SCR(0xfff)) >> 8)),
 			dma_mapped ? "DMA" : "PIO");
 	else
 		dev_dbg(&message->spi->dev, "%u Hz actual, %s\n",
-			master->max_speed_hz / 2
+			controller->max_speed_hz / 2
 				/ (1 + ((cr0 & SSCR0_SCR(0x0ff)) >> 8)),
 			dma_mapped ? "DMA" : "PIO");
 
@@ -1092,7 +1092,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 		}
 	}
 
-	if (spi_controller_is_slave(master)) {
+	if (spi_controller_is_slave(controller)) {
 		while (drv_data->write(drv_data))
 			;
 		if (drv_data->gpiod_ready) {
@@ -1111,9 +1111,9 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *master,
 	return 1;
 }
 
-static int pxa2xx_spi_slave_abort(struct spi_master *master)
+static int pxa2xx_spi_slave_abort(struct spi_controller *controller)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Stop and reset SSP */
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
@@ -1126,16 +1126,16 @@ static int pxa2xx_spi_slave_abort(struct spi_master *master)
 
 	dev_dbg(&drv_data->pdev->dev, "transfer aborted\n");
 
-	drv_data->master->cur_msg->status = -EINTR;
-	spi_finalize_current_transfer(drv_data->master);
+	drv_data->controller->cur_msg->status = -EINTR;
+	spi_finalize_current_transfer(drv_data->controller);
 
 	return 0;
 }
 
-static void pxa2xx_spi_handle_err(struct spi_controller *master,
+static void pxa2xx_spi_handle_err(struct spi_controller *controller,
 				 struct spi_message *msg)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP */
 	pxa2xx_spi_write(drv_data, SSCR0,
@@ -1159,9 +1159,9 @@ static void pxa2xx_spi_handle_err(struct spi_controller *master,
 		pxa2xx_spi_dma_stop(drv_data);
 }
 
-static int pxa2xx_spi_unprepare_transfer(struct spi_controller *master)
+static int pxa2xx_spi_unprepare_transfer(struct spi_controller *controller)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP now */
 	pxa2xx_spi_write(drv_data, SSCR0,
@@ -1260,7 +1260,7 @@ static int setup(struct spi_device *spi)
 		break;
 	default:
 		tx_hi_thres = 0;
-		if (spi_controller_is_slave(drv_data->master)) {
+		if (spi_controller_is_slave(drv_data->controller)) {
 			tx_thres = 1;
 			rx_thres = 2;
 		} else {
@@ -1287,7 +1287,7 @@ static int setup(struct spi_device *spi)
 
 			chip->frm = spi->chip_select;
 		}
-		chip->enable_dma = drv_data->master_info->enable_dma;
+		chip->enable_dma = drv_data->controller_info->enable_dma;
 		chip->timeout = TIMOUT_DFLT;
 	}
 
@@ -1310,7 +1310,7 @@ static int setup(struct spi_device *spi)
 		if (chip_info->enable_loopback)
 			chip->cr1 = SSCR1_LBM;
 	}
-	if (spi_controller_is_slave(drv_data->master)) {
+	if (spi_controller_is_slave(drv_data->controller)) {
 		chip->cr1 |= SSCR1_SCFR;
 		chip->cr1 |= SSCR1_SCLKDIR;
 		chip->cr1 |= SSCR1_SFRMDIR;
@@ -1497,10 +1497,10 @@ static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param)
 
 #endif /* CONFIG_PCI */
 
-static struct pxa2xx_spi_master *
+static struct pxa2xx_spi_controller *
 pxa2xx_spi_init_pdata(struct platform_device *pdev)
 {
-	struct pxa2xx_spi_master *pdata;
+	struct pxa2xx_spi_controller *pdata;
 	struct acpi_device *adev;
 	struct ssp_device *ssp;
 	struct resource *res;
@@ -1568,10 +1568,10 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	return pdata;
 }
 
-static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master,
+static int pxa2xx_spi_fw_translate_cs(struct spi_controller *controller,
 				      unsigned int cs)
 {
-	struct driver_data *drv_data = spi_controller_get_devdata(master);
+	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	if (has_acpi_companion(&drv_data->pdev->dev)) {
 		switch (drv_data->ssp_type) {
@@ -1595,8 +1595,8 @@ static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master,
 static int pxa2xx_spi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct pxa2xx_spi_master *platform_info;
-	struct spi_controller *master;
+	struct pxa2xx_spi_controller *platform_info;
+	struct spi_controller *controller;
 	struct driver_data *drv_data;
 	struct ssp_device *ssp;
 	const struct lpss_config *config;
@@ -1622,37 +1622,37 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	}
 
 	if (platform_info->is_slave)
-		master = spi_alloc_slave(dev, sizeof(struct driver_data));
+		controller = spi_alloc_slave(dev, sizeof(struct driver_data));
 	else
-		master = spi_alloc_master(dev, sizeof(struct driver_data));
+		controller = spi_alloc_master(dev, sizeof(struct driver_data));
 
-	if (!master) {
-		dev_err(&pdev->dev, "cannot alloc spi_master\n");
+	if (!controller) {
+		dev_err(&pdev->dev, "cannot alloc spi_controller\n");
 		pxa_ssp_free(ssp);
 		return -ENOMEM;
 	}
-	drv_data = spi_controller_get_devdata(master);
-	drv_data->master = master;
-	drv_data->master_info = platform_info;
+	drv_data = spi_controller_get_devdata(controller);
+	drv_data->controller = controller;
+	drv_data->controller_info = platform_info;
 	drv_data->pdev = pdev;
 	drv_data->ssp = ssp;
 
-	master->dev.of_node = pdev->dev.of_node;
+	controller->dev.of_node = pdev->dev.of_node;
 	/* the spi->mode bits understood by this driver: */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
-
-	master->bus_num = ssp->port_id;
-	master->dma_alignment = DMA_ALIGNMENT;
-	master->cleanup = cleanup;
-	master->setup = setup;
-	master->set_cs = pxa2xx_spi_set_cs;
-	master->transfer_one = pxa2xx_spi_transfer_one;
-	master->slave_abort = pxa2xx_spi_slave_abort;
-	master->handle_err = pxa2xx_spi_handle_err;
-	master->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
-	master->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
-	master->auto_runtime_pm = true;
-	master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+	controller->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
+
+	controller->bus_num = ssp->port_id;
+	controller->dma_alignment = DMA_ALIGNMENT;
+	controller->cleanup = cleanup;
+	controller->setup = setup;
+	controller->set_cs = pxa2xx_spi_set_cs;
+	controller->transfer_one = pxa2xx_spi_transfer_one;
+	controller->slave_abort = pxa2xx_spi_slave_abort;
+	controller->handle_err = pxa2xx_spi_handle_err;
+	controller->unprepare_transfer_hardware = pxa2xx_spi_unprepare_transfer;
+	controller->fw_translate_cs = pxa2xx_spi_fw_translate_cs;
+	controller->auto_runtime_pm = true;
+	controller->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
 
 	drv_data->ssp_type = ssp->type;
 
@@ -1661,10 +1661,10 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	if (pxa25x_ssp_comp(drv_data)) {
 		switch (drv_data->ssp_type) {
 		case QUARK_X1000_SSP:
-			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+			controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 			break;
 		default:
-			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+			controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
 			break;
 		}
 
@@ -1673,7 +1673,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		drv_data->clear_sr = SSSR_ROR;
 		drv_data->mask_sr = SSSR_RFS | SSSR_TFS | SSSR_ROR;
 	} else {
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+		controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE | SSCR1_TINTE;
 		drv_data->dma_cr1 = DEFAULT_DMA_CR1;
 		drv_data->clear_sr = SSSR_ROR | SSSR_TINT;
@@ -1685,7 +1685,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 			drv_data);
 	if (status < 0) {
 		dev_err(&pdev->dev, "cannot get IRQ %d\n", ssp->irq);
-		goto out_error_master_alloc;
+		goto out_error_controller_alloc;
 	}
 
 	/* Setup DMA if requested */
@@ -1695,7 +1695,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 			dev_dbg(dev, "no DMA channels available, using PIO\n");
 			platform_info->enable_dma = false;
 		} else {
-			master->can_dma = pxa2xx_spi_can_dma;
+			controller->can_dma = pxa2xx_spi_can_dma;
 		}
 	}
 
@@ -1704,7 +1704,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	if (status)
 		goto out_error_dma_irq_alloc;
 
-	master->max_speed_hz = clk_get_rate(ssp->clk);
+	controller->max_speed_hz = clk_get_rate(ssp->clk);
 
 	/* Load default SSP configuration */
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
@@ -1727,7 +1727,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		break;
 	default:
 
-		if (spi_controller_is_slave(master)) {
+		if (spi_controller_is_slave(controller)) {
 			tmp = SSCR1_SCFR |
 			      SSCR1_SCLKDIR |
 			      SSCR1_SFRMDIR |
@@ -1740,7 +1740,7 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 		}
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
 		tmp = SSCR0_Motorola | SSCR0_DataSize(8);
-		if (!spi_controller_is_slave(master))
+		if (!spi_controller_is_slave(controller))
 			tmp |= SSCR0_SCR(2);
 		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
@@ -1765,24 +1765,24 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 			platform_info->num_chipselect = config->cs_num;
 		}
 	}
-	master->num_chipselect = platform_info->num_chipselect;
+	controller->num_chipselect = platform_info->num_chipselect;
 
 	count = gpiod_count(&pdev->dev, "cs");
 	if (count > 0) {
 		int i;
 
-		master->num_chipselect = max_t(int, count,
-			master->num_chipselect);
+		controller->num_chipselect = max_t(int, count,
+			controller->num_chipselect);
 
 		drv_data->cs_gpiods = devm_kcalloc(&pdev->dev,
-			master->num_chipselect, sizeof(struct gpio_desc *),
+			controller->num_chipselect, sizeof(struct gpio_desc *),
 			GFP_KERNEL);
 		if (!drv_data->cs_gpiods) {
 			status = -ENOMEM;
 			goto out_error_clock_enabled;
 		}
 
-		for (i = 0; i < master->num_chipselect; i++) {
+		for (i = 0; i < controller->num_chipselect; i++) {
 			struct gpio_desc *gpiod;
 
 			gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS);
@@ -1815,9 +1815,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	/* Register with the SPI framework */
 	platform_set_drvdata(pdev, drv_data);
-	status = devm_spi_register_controller(&pdev->dev, master);
+	status = devm_spi_register_controller(&pdev->dev, controller);
 	if (status != 0) {
-		dev_err(&pdev->dev, "problem registering spi master\n");
+		dev_err(&pdev->dev, "problem registering spi controller\n");
 		goto out_error_clock_enabled;
 	}
 
@@ -1832,8 +1832,8 @@ out_error_dma_irq_alloc:
 	pxa2xx_spi_dma_release(drv_data);
 	free_irq(ssp->irq, drv_data);
 
-out_error_master_alloc:
-	spi_controller_put(master);
+out_error_controller_alloc:
+	spi_controller_put(controller);
 	pxa_ssp_free(ssp);
 	return status;
 }
@@ -1854,7 +1854,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev)
 	clk_disable_unprepare(ssp->clk);
 
 	/* Release DMA */
-	if (drv_data->master_info->enable_dma)
+	if (drv_data->controller_info->enable_dma)
 		pxa2xx_spi_dma_release(drv_data);
 
 	pm_runtime_put_noidle(&pdev->dev);
@@ -1876,7 +1876,7 @@ static int pxa2xx_spi_suspend(struct device *dev)
 	struct ssp_device *ssp = drv_data->ssp;
 	int status;
 
-	status = spi_controller_suspend(drv_data->master);
+	status = spi_controller_suspend(drv_data->controller);
 	if (status != 0)
 		return status;
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
@@ -1901,7 +1901,7 @@ static int pxa2xx_spi_resume(struct device *dev)
 	}
 
 	/* Start the queue running */
-	return spi_controller_resume(drv_data->master);
+	return spi_controller_resume(drv_data->controller);
 }
 #endif
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 4e324da66ef7..aba777b4502d 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -31,10 +31,10 @@ struct driver_data {
 
 	/* SPI framework hookup */
 	enum pxa_ssp_type ssp_type;
-	struct spi_controller *master;
+	struct spi_controller *controller;
 
 	/* PXA hookup */
-	struct pxa2xx_spi_master *master_info;
+	struct pxa2xx_spi_controller *controller_info;
 
 	/* SSP register addresses */
 	void __iomem *ioaddr;
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index b0674e330ef6..c1c59473cef9 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -22,7 +22,7 @@
 struct dma_chan;
 
 /* device.platform_data for SSP controller devices */
-struct pxa2xx_spi_master {
+struct pxa2xx_spi_controller {
 	u16 num_chipselect;
 	u8 enable_dma;
 	bool is_slave;
@@ -54,7 +54,7 @@ struct pxa2xx_spi_chip {
 
 #include <linux/clk.h>
 
-extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_master *info);
+extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info);
 
 #endif
 #endif
-- 
cgit v1.2.3


From a2d21848d9211dad5e786aa7368709ca8938834e Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Tue, 22 Jan 2019 11:42:24 +0200
Subject: regmap: regmap-irq: Add main status register support

There is bunch of devices with multiple logical blocks which
can generate interrupts. It's not a rare case that the interrupt
reason registers are arranged so that there is own status/ack/mask
register for each logical block. In some devices there is also a
'main interrupt register(s)' which can indicate what sub blocks
have interrupts pending.

When such a device is connected via slow bus like i2c the main
part of interrupt handling latency can be caused by bus accesses.
On systems where it is expected that only one (or few) sub blocks
have active interrupts we can reduce the latency by only reading
the main register and those sub registers which have active
interrupts. Support this with regmap-irq for simple cases where
main register does not require acking or masking.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap-irq.c | 99 ++++++++++++++++++++++++++++++++++++++--
 include/linux/regmap.h           | 31 +++++++++++++
 2 files changed, 126 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c
index 1bd1145ad8b5..22778e9501ff 100644
--- a/drivers/base/regmap/regmap-irq.c
+++ b/drivers/base/regmap/regmap-irq.c
@@ -35,6 +35,7 @@ struct regmap_irq_chip_data {
 	int wake_count;
 
 	void *status_reg_buf;
+	unsigned int *main_status_buf;
 	unsigned int *status_buf;
 	unsigned int *mask_buf;
 	unsigned int *mask_buf_def;
@@ -326,6 +327,33 @@ static const struct irq_chip regmap_irq_chip = {
 	.irq_set_wake		= regmap_irq_set_wake,
 };
 
+static inline int read_sub_irq_data(struct regmap_irq_chip_data *data,
+					   unsigned int b)
+{
+	const struct regmap_irq_chip *chip = data->chip;
+	struct regmap *map = data->map;
+	struct regmap_irq_sub_irq_map *subreg;
+	int i, ret = 0;
+
+	if (!chip->sub_reg_offsets) {
+		/* Assume linear mapping */
+		ret = regmap_read(map, chip->status_base +
+				  (b * map->reg_stride * data->irq_reg_stride),
+				   &data->status_buf[b]);
+	} else {
+		subreg = &chip->sub_reg_offsets[b];
+		for (i = 0; i < subreg->num_regs; i++) {
+			unsigned int offset = subreg->offset[i];
+
+			ret = regmap_read(map, chip->status_base + offset,
+					  &data->status_buf[offset]);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
 static irqreturn_t regmap_irq_thread(int irq, void *d)
 {
 	struct regmap_irq_chip_data *data = d;
@@ -349,11 +377,65 @@ static irqreturn_t regmap_irq_thread(int irq, void *d)
 	}
 
 	/*
-	 * Read in the statuses, using a single bulk read if possible
-	 * in order to reduce the I/O overheads.
+	 * Read only registers with active IRQs if the chip has 'main status
+	 * register'. Else read in the statuses, using a single bulk read if
+	 * possible in order to reduce the I/O overheads.
 	 */
-	if (!map->use_single_read && map->reg_stride == 1 &&
-	    data->irq_reg_stride == 1) {
+
+	if (chip->num_main_regs) {
+		unsigned int max_main_bits;
+		unsigned long size;
+
+		size = chip->num_regs * sizeof(unsigned int);
+
+		max_main_bits = (chip->num_main_status_bits) ?
+				 chip->num_main_status_bits : chip->num_regs;
+		/* Clear the status buf as we don't read all status regs */
+		memset(data->status_buf, 0, size);
+
+		/* We could support bulk read for main status registers
+		 * but I don't expect to see devices with really many main
+		 * status registers so let's only support single reads for the
+		 * sake of simplicity. and add bulk reads only if needed
+		 */
+		for (i = 0; i < chip->num_main_regs; i++) {
+			ret = regmap_read(map, chip->main_status +
+				  (i * map->reg_stride
+				   * data->irq_reg_stride),
+				  &data->main_status_buf[i]);
+			if (ret) {
+				dev_err(map->dev,
+					"Failed to read IRQ status %d\n",
+					ret);
+				goto exit;
+			}
+		}
+
+		/* Read sub registers with active IRQs */
+		for (i = 0; i < chip->num_main_regs; i++) {
+			unsigned int b;
+			const unsigned long mreg = data->main_status_buf[i];
+
+			for_each_set_bit(b, &mreg, map->format.val_bytes * 8) {
+				if (i * map->format.val_bytes * 8 + b >
+				    max_main_bits)
+					break;
+				ret = read_sub_irq_data(data, b);
+
+				if (ret != 0) {
+					dev_err(map->dev,
+						"Failed to read IRQ status %d\n",
+						ret);
+					if (chip->runtime_pm)
+						pm_runtime_put(map->dev);
+					goto exit;
+				}
+			}
+
+		}
+	} else if (!map->use_single_read && map->reg_stride == 1 &&
+		   data->irq_reg_stride == 1) {
+
 		u8 *buf8 = data->status_reg_buf;
 		u16 *buf16 = data->status_reg_buf;
 		u32 *buf32 = data->status_reg_buf;
@@ -518,6 +600,15 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
 	if (!d)
 		return -ENOMEM;
 
+	if (chip->num_main_regs) {
+		d->main_status_buf = kcalloc(chip->num_main_regs,
+					     sizeof(unsigned int),
+					     GFP_KERNEL);
+
+		if (!d->main_status_buf)
+			goto err_alloc;
+	}
+
 	d->status_buf = kcalloc(chip->num_regs, sizeof(unsigned int),
 				GFP_KERNEL);
 	if (!d->status_buf)
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 1781b6cb793c..daeec7dbd65c 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1131,11 +1131,37 @@ struct regmap_irq {
 		.reg_offset = (_id) / (_reg_bits),	\
 	}
 
+#define REGMAP_IRQ_MAIN_REG_OFFSET(arr)				\
+	{ .num_regs = ARRAY_SIZE((arr)), .offset = &(arr)[0] }
+
+struct regmap_irq_sub_irq_map {
+	unsigned int num_regs;
+	unsigned int *offset;
+};
+
 /**
  * struct regmap_irq_chip - Description of a generic regmap irq_chip.
  *
  * @name:        Descriptive name for IRQ controller.
  *
+ * @main_status: Base main status register address. For chips which have
+ *		 interrupts arranged in separate sub-irq blocks with own IRQ
+ *		 registers and which have a main IRQ registers indicating
+ *		 sub-irq blocks with unhandled interrupts. For such chips fill
+ *		 sub-irq register information in status_base, mask_base and
+ *		 ack_base.
+ * @num_main_status_bits: Should be given to chips where number of meaningfull
+ *			  main status bits differs from num_regs.
+ * @sub_reg_offsets: arrays of mappings from main register bits to sub irq
+ *		     registers. First item in array describes the registers
+ *		     for first main status bit. Second array for second bit etc.
+ *		     Offset is given as sub register status offset to
+ *		     status_base. Should contain num_regs arrays.
+ *		     Can be provided for chips with more complex mapping than
+ *		     1.st bit to 1.st sub-reg, 2.nd bit to 2.nd sub-reg, ...
+ * @num_main_regs: Number of 'main status' irq registers for chips which have
+ *		   main_status set.
+ *
  * @status_base: Base status register address.
  * @mask_base:   Base mask register address.
  * @mask_writeonly: Base mask register is write only.
@@ -1181,6 +1207,11 @@ struct regmap_irq {
 struct regmap_irq_chip {
 	const char *name;
 
+	unsigned int main_status;
+	unsigned int num_main_status_bits;
+	struct regmap_irq_sub_irq_map *sub_reg_offsets;
+	int num_main_regs;
+
 	unsigned int status_base;
 	unsigned int mask_base;
 	unsigned int unmask_base;
-- 
cgit v1.2.3


From f0125f1a559be1033055f44e511174aaa75b60cc Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Wed, 23 Jan 2019 17:29:53 +0000
Subject: spi: Go back to immediate teardown

Commit 412e6037324 ("spi: core: avoid waking pump thread from spi_sync
instead run teardown delayed") introduced regressions on some boards,
apparently connected to spi_mem not triggering shutdown properly any
more.  Since we've thus far been unable to figure out exactly where the
breakage is revert the optimisation for now.

Reported-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: kernel@martin.sperl.org
---
 drivers/spi/spi.c       | 122 +++++++++++++-----------------------------------
 include/linux/spi/spi.h |   2 -
 2 files changed, 33 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 06b9139664a3..13f447a67d67 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1225,7 +1225,7 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 		return;
 	}
 
-	/* If another context is idling the device then defer to kthread */
+	/* If another context is idling the device then defer */
 	if (ctlr->idling) {
 		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
@@ -1239,10 +1239,34 @@ static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 			return;
 		}
 
-		/* schedule idle teardown with a delay of 1 second */
-		kthread_mod_delayed_work(&ctlr->kworker,
-					 &ctlr->pump_idle_teardown,
-					 HZ);
+		/* Only do teardown in the thread */
+		if (!in_kthread) {
+			kthread_queue_work(&ctlr->kworker,
+					   &ctlr->pump_messages);
+			spin_unlock_irqrestore(&ctlr->queue_lock, flags);
+			return;
+		}
+
+		ctlr->busy = false;
+		ctlr->idling = true;
+		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
+
+		kfree(ctlr->dummy_rx);
+		ctlr->dummy_rx = NULL;
+		kfree(ctlr->dummy_tx);
+		ctlr->dummy_tx = NULL;
+		if (ctlr->unprepare_transfer_hardware &&
+		    ctlr->unprepare_transfer_hardware(ctlr))
+			dev_err(&ctlr->dev,
+				"failed to unprepare transfer hardware\n");
+		if (ctlr->auto_runtime_pm) {
+			pm_runtime_mark_last_busy(ctlr->dev.parent);
+			pm_runtime_put_autosuspend(ctlr->dev.parent);
+		}
+		trace_spi_controller_idle(ctlr);
+
+		spin_lock_irqsave(&ctlr->queue_lock, flags);
+		ctlr->idling = false;
 		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
@@ -1335,77 +1359,6 @@ static void spi_pump_messages(struct kthread_work *work)
 	__spi_pump_messages(ctlr, true);
 }
 
-/**
- * spi_pump_idle_teardown - kthread delayed work function which tears down
- *                          the controller settings after some delay
- * @work: pointer to kthread work struct contained in the controller struct
- */
-static void spi_pump_idle_teardown(struct kthread_work *work)
-{
-	struct spi_controller *ctlr =
-		container_of(work, struct spi_controller,
-			     pump_idle_teardown.work);
-	unsigned long flags;
-
-	/* Lock queue */
-	spin_lock_irqsave(&ctlr->queue_lock, flags);
-
-	/* Make sure we are not already running a message */
-	if (ctlr->cur_msg)
-		goto out;
-
-	/* if there is anything in the list then exit */
-	if (!list_empty(&ctlr->queue))
-		goto out;
-
-	/* if the controller is running then exit */
-	if (ctlr->running)
-		goto out;
-
-	/* if the controller is busy then exit */
-	if (ctlr->busy)
-		goto out;
-
-	/* if the controller is idling then exit
-	 * this is actually a bit strange and would indicate that
-	 * this function is scheduled twice, which should not happen
-	 */
-	if (ctlr->idling)
-		goto out;
-
-	/* set up the initial states */
-	ctlr->busy = false;
-	ctlr->idling = true;
-	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
-
-	/* free dummy receive buffers */
-	kfree(ctlr->dummy_rx);
-	ctlr->dummy_rx = NULL;
-	kfree(ctlr->dummy_tx);
-	ctlr->dummy_tx = NULL;
-
-	/* unprepare hardware */
-	if (ctlr->unprepare_transfer_hardware &&
-	    ctlr->unprepare_transfer_hardware(ctlr))
-		dev_err(&ctlr->dev,
-			"failed to unprepare transfer hardware\n");
-	/* handle pm */
-	if (ctlr->auto_runtime_pm) {
-		pm_runtime_mark_last_busy(ctlr->dev.parent);
-		pm_runtime_put_autosuspend(ctlr->dev.parent);
-	}
-
-	/* mark controller as idle */
-	trace_spi_controller_idle(ctlr);
-
-	/* finally put us from idling into stopped */
-	spin_lock_irqsave(&ctlr->queue_lock, flags);
-	ctlr->idling = false;
-
-out:
-	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
-}
-
 static int spi_init_queue(struct spi_controller *ctlr)
 {
 	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
@@ -1421,8 +1374,7 @@ static int spi_init_queue(struct spi_controller *ctlr)
 		return PTR_ERR(ctlr->kworker_task);
 	}
 	kthread_init_work(&ctlr->pump_messages, spi_pump_messages);
-	kthread_init_delayed_work(&ctlr->pump_idle_teardown,
-				  spi_pump_idle_teardown);
+
 	/*
 	 * Controller config will indicate if this controller should run the
 	 * message pump with high (realtime) priority to reduce the transfer
@@ -1494,16 +1446,7 @@ void spi_finalize_current_message(struct spi_controller *ctlr)
 	spin_lock_irqsave(&ctlr->queue_lock, flags);
 	ctlr->cur_msg = NULL;
 	ctlr->cur_msg_prepared = false;
-
-	/* if there is something queued, then wake the queue */
-	if (!list_empty(&ctlr->queue))
-		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
-	else
-		/* otherwise schedule delayed teardown */
-		kthread_mod_delayed_work(&ctlr->kworker,
-					 &ctlr->pump_idle_teardown,
-					 HZ);
-
+	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	trace_spi_message_done(mesg);
@@ -1608,7 +1551,7 @@ static int __spi_queued_transfer(struct spi_device *spi,
 	msg->status = -EINPROGRESS;
 
 	list_add_tail(&msg->queue, &ctlr->queue);
-	if (need_pump)
+	if (!ctlr->busy && need_pump)
 		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
 
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
@@ -3783,3 +3726,4 @@ err0:
  * include needing to have boardinfo data structures be much more public.
  */
 postcore_initcall(spi_init);
+
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 79ad62e2487c..916bba47d156 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -334,7 +334,6 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  * @kworker: thread struct for message pump
  * @kworker_task: pointer to task for message pump kworker thread
  * @pump_messages: work struct for scheduling work to the message pump
- * @pump_idle_teardown: work structure for scheduling a teardown delayed
  * @queue_lock: spinlock to syncronise access to message queue
  * @queue: message queue
  * @idling: the device is entering idle state
@@ -533,7 +532,6 @@ struct spi_controller {
 	struct kthread_worker		kworker;
 	struct task_struct		*kworker_task;
 	struct kthread_work		pump_messages;
-	struct kthread_delayed_work     pump_idle_teardown;
 	spinlock_t			queue_lock;
 	struct list_head		queue;
 	struct spi_message		*cur_msg;
-- 
cgit v1.2.3


From 52875a04f4b26e7ef30a288ea096f7cfec0e93cd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 22 Jan 2019 22:45:20 -0800
Subject: bpf: verifier: remove dead code

Instead of overwriting dead code with jmp -1 instructions
remove it completely for root.  Adjust verifier state and
line info appropriately.

v2:
 - adjust func_info (Alexei);
 - make sure first instruction retains line info (Alexei).
v4: (Yonghong)
 - remove unnecessary if (!insn to remove) checks;
 - always keep last line info if first live instruction lacks one.
v5: (Martin Lau)
 - improve and clarify comments.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h |   1 +
 kernel/bpf/core.c      |  12 ++++
 kernel/bpf/verifier.c  | 176 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 186 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ad106d845b22..be9af6b4a9e4 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -778,6 +778,7 @@ static inline bool bpf_dump_raw_ok(void)
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
 
 void bpf_clear_redirect_map(struct bpf_map *map);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ad08ba341197..2a81b8af3748 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -462,6 +462,18 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 	return prog_adj;
 }
 
+int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
+{
+	/* Branch offsets can't overflow when program is shrinking, no need
+	 * to call bpf_adj_branches(..., true) here
+	 */
+	memmove(prog->insnsi + off, prog->insnsi + off + cnt,
+		sizeof(struct bpf_insn) * (prog->len - off - cnt));
+	prog->len -= cnt;
+
+	return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
+}
+
 void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
 {
 	int i;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bf1f98e8beb6..099b2541f87f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6432,6 +6432,150 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
 	return new_prog;
 }
 
+static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
+					      u32 off, u32 cnt)
+{
+	int i, j;
+
+	/* find first prog starting at or after off (first to remove) */
+	for (i = 0; i < env->subprog_cnt; i++)
+		if (env->subprog_info[i].start >= off)
+			break;
+	/* find first prog starting at or after off + cnt (first to stay) */
+	for (j = i; j < env->subprog_cnt; j++)
+		if (env->subprog_info[j].start >= off + cnt)
+			break;
+	/* if j doesn't start exactly at off + cnt, we are just removing
+	 * the front of previous prog
+	 */
+	if (env->subprog_info[j].start != off + cnt)
+		j--;
+
+	if (j > i) {
+		struct bpf_prog_aux *aux = env->prog->aux;
+		int move;
+
+		/* move fake 'exit' subprog as well */
+		move = env->subprog_cnt + 1 - j;
+
+		memmove(env->subprog_info + i,
+			env->subprog_info + j,
+			sizeof(*env->subprog_info) * move);
+		env->subprog_cnt -= j - i;
+
+		/* remove func_info */
+		if (aux->func_info) {
+			move = aux->func_info_cnt - j;
+
+			memmove(aux->func_info + i,
+				aux->func_info + j,
+				sizeof(*aux->func_info) * move);
+			aux->func_info_cnt -= j - i;
+			/* func_info->insn_off is set after all code rewrites,
+			 * in adjust_btf_func() - no need to adjust
+			 */
+		}
+	} else {
+		/* convert i from "first prog to remove" to "first to adjust" */
+		if (env->subprog_info[i].start == off)
+			i++;
+	}
+
+	/* update fake 'exit' subprog as well */
+	for (; i <= env->subprog_cnt; i++)
+		env->subprog_info[i].start -= cnt;
+
+	return 0;
+}
+
+static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
+				      u32 cnt)
+{
+	struct bpf_prog *prog = env->prog;
+	u32 i, l_off, l_cnt, nr_linfo;
+	struct bpf_line_info *linfo;
+
+	nr_linfo = prog->aux->nr_linfo;
+	if (!nr_linfo)
+		return 0;
+
+	linfo = prog->aux->linfo;
+
+	/* find first line info to remove, count lines to be removed */
+	for (i = 0; i < nr_linfo; i++)
+		if (linfo[i].insn_off >= off)
+			break;
+
+	l_off = i;
+	l_cnt = 0;
+	for (; i < nr_linfo; i++)
+		if (linfo[i].insn_off < off + cnt)
+			l_cnt++;
+		else
+			break;
+
+	/* First live insn doesn't match first live linfo, it needs to "inherit"
+	 * last removed linfo.  prog is already modified, so prog->len == off
+	 * means no live instructions after (tail of the program was removed).
+	 */
+	if (prog->len != off && l_cnt &&
+	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
+		l_cnt--;
+		linfo[--i].insn_off = off + cnt;
+	}
+
+	/* remove the line info which refer to the removed instructions */
+	if (l_cnt) {
+		memmove(linfo + l_off, linfo + i,
+			sizeof(*linfo) * (nr_linfo - i));
+
+		prog->aux->nr_linfo -= l_cnt;
+		nr_linfo = prog->aux->nr_linfo;
+	}
+
+	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
+	for (i = l_off; i < nr_linfo; i++)
+		linfo[i].insn_off -= cnt;
+
+	/* fix up all subprogs (incl. 'exit') which start >= off */
+	for (i = 0; i <= env->subprog_cnt; i++)
+		if (env->subprog_info[i].linfo_idx > l_off) {
+			/* program may have started in the removed region but
+			 * may not be fully removed
+			 */
+			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
+				env->subprog_info[i].linfo_idx -= l_cnt;
+			else
+				env->subprog_info[i].linfo_idx = l_off;
+		}
+
+	return 0;
+}
+
+static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	unsigned int orig_prog_len = env->prog->len;
+	int err;
+
+	err = bpf_remove_insns(env->prog, off, cnt);
+	if (err)
+		return err;
+
+	err = adjust_subprog_starts_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	err = bpf_adj_linfo_after_remove(env, off, cnt);
+	if (err)
+		return err;
+
+	memmove(aux_data + off,	aux_data + off + cnt,
+		sizeof(*aux_data) * (orig_prog_len - off - cnt));
+
+	return 0;
+}
+
 /* The verifier does more data flow analysis than llvm and will not
  * explore branches that are dead at run time. Malicious programs can
  * have dead code too. Therefore replace all dead at-run-time code
@@ -6492,6 +6636,30 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
 	}
 }
 
+static int opt_remove_dead_code(struct bpf_verifier_env *env)
+{
+	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
+	int insn_cnt = env->prog->len;
+	int i, err;
+
+	for (i = 0; i < insn_cnt; i++) {
+		int j;
+
+		j = 0;
+		while (i + j < insn_cnt && !aux_data[i + j].seen)
+			j++;
+		if (!j)
+			continue;
+
+		err = verifier_remove_insns(env, i, j);
+		if (err)
+			return err;
+		insn_cnt = env->prog->len;
+	}
+
+	return 0;
+}
+
 /* convert load instructions that access fields of a context type into a
  * sequence of instructions that access fields of the underlying structure:
  *     struct __sk_buff    -> struct sk_buff
@@ -7282,11 +7450,13 @@ skip_full_check:
 	if (is_priv) {
 		if (ret == 0)
 			opt_hard_wire_dead_code_branches(env);
+		if (ret == 0)
+			ret = opt_remove_dead_code(env);
+	} else {
+		if (ret == 0)
+			sanitize_dead_code(env);
 	}
 
-	if (ret == 0)
-		sanitize_dead_code(env);
-
 	if (ret == 0)
 		/* program is valid, convert *(u32*)(ctx + off) accesses */
 		ret = convert_ctx_accesses(env);
-- 
cgit v1.2.3


From 9e4c24e7ee7dfd3898269519103e823892b730d8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 22 Jan 2019 22:45:23 -0800
Subject: bpf: verifier: record original instruction index

The communication between the verifier and advanced JITs is based
on instruction indexes.  We have to keep them stable throughout
the optimizations otherwise referring to a particular instruction
gets messy quickly.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h | 1 +
 kernel/bpf/verifier.c        | 8 +++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 573cca00a0e6..f3ae00ee5516 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -187,6 +187,7 @@ struct bpf_insn_aux_data {
 	int sanitize_stack_off; /* stack slot to be cleared */
 	bool seen; /* this insn was processed by the verifier */
 	u8 alu_state; /* used in combination with alu_limit */
+	unsigned int orig_idx; /* original instruction index */
 };
 
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f39bca188a5c..f2c49b4235df 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7371,7 +7371,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 {
 	struct bpf_verifier_env *env;
 	struct bpf_verifier_log *log;
-	int ret = -EINVAL;
+	int i, len, ret = -EINVAL;
 	bool is_priv;
 
 	/* no program is valid */
@@ -7386,12 +7386,14 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 		return -ENOMEM;
 	log = &env->log;
 
+	len = (*prog)->len;
 	env->insn_aux_data =
-		vzalloc(array_size(sizeof(struct bpf_insn_aux_data),
-				   (*prog)->len));
+		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
 	ret = -ENOMEM;
 	if (!env->insn_aux_data)
 		goto err_free_env;
+	for (i = 0; i < len; i++)
+		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
 
-- 
cgit v1.2.3


From 08ca90afba255d05dc3253caa44056e7aecbe8c5 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 22 Jan 2019 22:45:24 -0800
Subject: bpf: notify offload JITs about optimizations

Let offload JITs know when instructions are replaced and optimized
out, so they can update their state appropriately.  The optimizations
are best effort, if JIT returns an error from any callback verifier
will stop notifying it as state may now be out of sync, but the
verifier continues making progress.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |  7 +++++++
 include/linux/bpf_verifier.h |  5 +++++
 kernel/bpf/offload.c         | 35 +++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c        |  6 ++++++
 4 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e734f163bd0b..3851529062ec 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -268,9 +268,15 @@ struct bpf_verifier_ops {
 };
 
 struct bpf_prog_offload_ops {
+	/* verifier basic callbacks */
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
 	int (*finalize)(struct bpf_verifier_env *env);
+	/* verifier optimization callbacks (called after .finalize) */
+	int (*replace_insn)(struct bpf_verifier_env *env, u32 off,
+			    struct bpf_insn *insn);
+	int (*remove_insns)(struct bpf_verifier_env *env, u32 off, u32 cnt);
+	/* program management callbacks */
 	int (*prepare)(struct bpf_prog *prog);
 	int (*translate)(struct bpf_prog *prog);
 	void (*destroy)(struct bpf_prog *prog);
@@ -283,6 +289,7 @@ struct bpf_prog_offload {
 	void			*dev_priv;
 	struct list_head	offloads;
 	bool			dev_state;
+	bool			opt_failed;
 	void			*jited_image;
 	u32			jited_len;
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index f3ae00ee5516..0620e418dde5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -266,5 +266,10 @@ int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
 int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 				 int insn_idx, int prev_insn_idx);
 int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
+void
+bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
+			      struct bpf_insn *insn);
+void
+bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 54cf2b9c44a4..39dba8c90331 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -173,6 +173,41 @@ int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
 	return ret;
 }
 
+void
+bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
+			      struct bpf_insn *insn)
+{
+	const struct bpf_prog_offload_ops *ops;
+	struct bpf_prog_offload *offload;
+	int ret = -EOPNOTSUPP;
+
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload) {
+		ops = offload->offdev->ops;
+		if (!offload->opt_failed && ops->replace_insn)
+			ret = ops->replace_insn(env, off, insn);
+		offload->opt_failed |= ret;
+	}
+	up_read(&bpf_devs_lock);
+}
+
+void
+bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
+{
+	struct bpf_prog_offload *offload;
+	int ret = -EOPNOTSUPP;
+
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload) {
+		if (!offload->opt_failed && offload->offdev->ops->remove_insns)
+			ret = offload->offdev->ops->remove_insns(env, off, cnt);
+		offload->opt_failed |= ret;
+	}
+	up_read(&bpf_devs_lock);
+}
+
 static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_prog_offload *offload = prog->aux->offload;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f2c49b4235df..8cfe39ef770f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6558,6 +6558,9 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
 	unsigned int orig_prog_len = env->prog->len;
 	int err;
 
+	if (bpf_prog_is_dev_bound(env->prog->aux))
+		bpf_prog_offload_remove_insns(env, off, cnt);
+
 	err = bpf_remove_insns(env->prog, off, cnt);
 	if (err)
 		return err;
@@ -6632,6 +6635,9 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
 		else
 			continue;
 
+		if (bpf_prog_is_dev_bound(env->prog->aux))
+			bpf_prog_offload_replace_insn(env, i, &ja);
+
 		memcpy(insn, &ja, sizeof(ja));
 	}
 }
-- 
cgit v1.2.3


From 643fa9612bf1a29153eee46fd398117632f93cbe Mon Sep 17 00:00:00 2001
From: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Date: Wed, 12 Dec 2018 15:20:12 +0530
Subject: fscrypt: remove filesystem specific build config option

In order to have a common code base for fscrypt "post read" processing
for all filesystems which support encryption, this commit removes
filesystem specific build config option (e.g. CONFIG_EXT4_FS_ENCRYPTION)
and replaces it with a build option (i.e. CONFIG_FS_ENCRYPTION) whose
value affects all the filesystems making use of fscrypt.

Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst   |   4 +-
 arch/mips/configs/generic_defconfig     |   2 +-
 arch/nds32/configs/defconfig            |   2 +-
 arch/s390/configs/debug_defconfig       |   2 +-
 arch/s390/configs/performance_defconfig |   2 +-
 fs/crypto/Kconfig                       |   5 +-
 fs/crypto/fscrypt_private.h             |   1 -
 fs/ext4/Kconfig                         |  15 --
 fs/ext4/dir.c                           |   2 -
 fs/ext4/ext4.h                          |   7 +-
 fs/ext4/inode.c                         |   8 +-
 fs/ext4/ioctl.c                         |   4 +-
 fs/ext4/namei.c                         |  10 +-
 fs/ext4/page-io.c                       |   6 +-
 fs/ext4/readpage.c                      |   2 +-
 fs/ext4/super.c                         |   6 +-
 fs/ext4/sysfs.c                         |   4 +-
 fs/f2fs/Kconfig                         |  12 +-
 fs/f2fs/f2fs.h                          |   7 +-
 fs/f2fs/super.c                         |   8 +-
 fs/f2fs/sysfs.c                         |   4 +-
 fs/ubifs/Kconfig                        |  12 +-
 fs/ubifs/Makefile                       |   2 +-
 fs/ubifs/ioctl.c                        |   4 +-
 fs/ubifs/sb.c                           |   2 +-
 fs/ubifs/super.c                        |   2 +-
 fs/ubifs/ubifs.h                        |   5 +-
 include/linux/fs.h                      |   4 +-
 include/linux/fscrypt.h                 | 416 +++++++++++++++++++++++++++++++-
 include/linux/fscrypt_notsupp.h         | 231 ------------------
 include/linux/fscrypt_supp.h            | 204 ----------------
 31 files changed, 460 insertions(+), 535 deletions(-)
 delete mode 100644 include/linux/fscrypt_notsupp.h
 delete mode 100644 include/linux/fscrypt_supp.h

(limited to 'include')

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 3a7b60521b94..43dd989e2a3f 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -343,9 +343,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors:
 - ``ENOTEMPTY``: the file is unencrypted and is a nonempty directory
 - ``ENOTTY``: this type of filesystem does not implement encryption
 - ``EOPNOTSUPP``: the kernel was not configured with encryption
-  support for this filesystem, or the filesystem superblock has not
+  support for filesystems, or the filesystem superblock has not
   had encryption enabled on it.  (For example, to use encryption on an
-  ext4 filesystem, CONFIG_EXT4_ENCRYPTION must be enabled in the
+  ext4 filesystem, CONFIG_FS_ENCRYPTION must be enabled in the
   kernel config, and the superblock must have had the "encrypt"
   feature flag enabled using ``tune2fs -O encrypt`` or ``mkfs.ext4 -O
   encrypt``.)
diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
index 7c138dab87df..5d80521e5d5a 100644
--- a/arch/mips/configs/generic_defconfig
+++ b/arch/mips/configs/generic_defconfig
@@ -59,7 +59,7 @@ CONFIG_HID_MONTEREY=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_ENCRYPTION=y
+CONFIG_FS_ENCRYPTION=y
 CONFIG_FANOTIFY=y
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=y
diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig
index 2546d8770785..65ce9259081b 100644
--- a/arch/nds32/configs/defconfig
+++ b/arch/nds32/configs/defconfig
@@ -74,7 +74,7 @@ CONFIG_GENERIC_PHY=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_ENCRYPTION=y
+CONFIG_FS_ENCRYPTION=y
 CONFIG_FUSE_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index c69cb04b7a59..9824c7bad9d4 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -500,7 +500,6 @@ CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_ENCRYPTION=y
 CONFIG_JBD2_DEBUG=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
@@ -520,6 +519,7 @@ CONFIG_BTRFS_DEBUG=y
 CONFIG_NILFS2_FS=m
 CONFIG_FS_DAX=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 32f539dc9c19..4fcbe5792744 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -497,7 +497,6 @@ CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_ENCRYPTION=y
 CONFIG_JBD2_DEBUG=y
 CONFIG_JFS_FS=m
 CONFIG_JFS_POSIX_ACL=y
@@ -515,6 +514,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
 CONFIG_FS_DAX=y
 CONFIG_EXPORTFS_BLOCK_OPS=y
+CONFIG_FS_ENCRYPTION=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
index 284b589b4774..f0de238000c0 100644
--- a/fs/crypto/Kconfig
+++ b/fs/crypto/Kconfig
@@ -1,5 +1,5 @@
 config FS_ENCRYPTION
-	tristate "FS Encryption (Per-file encryption)"
+	bool "FS Encryption (Per-file encryption)"
 	select CRYPTO
 	select CRYPTO_AES
 	select CRYPTO_CBC
@@ -12,4 +12,5 @@ config FS_ENCRYPTION
 	  Enable encryption of files and directories.  This
 	  feature is similar to ecryptfs, but it is more memory
 	  efficient since it avoids caching the encrypted and
-	  decrypted pages in the page cache.
+	  decrypted pages in the page cache.  Currently Ext4,
+	  F2FS and UBIFS make use of this feature.
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 7424f851eb5c..7da276159593 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -12,7 +12,6 @@
 #ifndef _FSCRYPT_PRIVATE_H
 #define _FSCRYPT_PRIVATE_H
 
-#define __FS_HAS_ENCRYPTION 1
 #include <linux/fscrypt.h>
 #include <crypto/hash.h>
 
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index a453cc87082b..031e5a82d556 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -96,21 +96,6 @@ config EXT4_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
-config EXT4_ENCRYPTION
-	bool "Ext4 Encryption"
-	depends on EXT4_FS
-	select FS_ENCRYPTION
-	help
-	  Enable encryption of ext4 files and directories.  This
-	  feature is similar to ecryptfs, but it is more memory
-	  efficient since it avoids caching the encrypted and
-	  decrypted pages in the page cache.
-
-config EXT4_FS_ENCRYPTION
-	bool
-	default y
-	depends on EXT4_ENCRYPTION
-
 config EXT4_DEBUG
 	bool "EXT4 debugging support"
 	depends on EXT4_FS
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index fb7a64ea5679..0ccd51f72048 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -283,9 +283,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 done:
 	err = 0;
 errout:
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
 	fscrypt_fname_free_buffer(&fstr);
-#endif
 	brelse(bh);
 	return err;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index afdb9ad8be0e..5012ddb6daf9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -40,7 +40,6 @@
 #include <linux/compat.h>
 #endif
 
-#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION)
 #include <linux/fscrypt.h>
 
 #include <linux/compiler.h>
@@ -1326,7 +1325,7 @@ struct ext4_super_block {
 #define EXT4_MF_FS_ABORTED		0x0002	/* Fatal error detected */
 #define EXT4_MF_TEST_DUMMY_ENCRYPTION	0x0004
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 #define DUMMY_ENCRYPTION_ENABLED(sbi) (unlikely((sbi)->s_mount_flags & \
 						EXT4_MF_TEST_DUMMY_ENCRYPTION))
 #else
@@ -2051,7 +2050,7 @@ struct ext4_filename {
 	const struct qstr *usr_fname;
 	struct fscrypt_str disk_name;
 	struct dx_hash_info hinfo;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	struct fscrypt_str crypto_buf;
 #endif
 };
@@ -2279,7 +2278,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 static inline int ext4_fname_setup_filename(struct inode *dir,
 			const struct qstr *iname,
 			int lookup, struct ext4_filename *fname)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71bd2d28f58d..4356ef6d728e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1150,7 +1150,7 @@ int do_journal_get_write_access(handle_t *handle,
 	return ret;
 }
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
 				  get_block_t *get_block)
 {
@@ -1302,7 +1302,7 @@ retry_journal:
 	/* In case writeback began while the page was unlocked */
 	wait_for_stable_page(page);
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	if (ext4_should_dioread_nolock(inode))
 		ret = ext4_block_write_begin(page, pos, len,
 					     ext4_get_block_unwritten);
@@ -3104,7 +3104,7 @@ retry_journal:
 	/* In case writeback began while the page was unlocked */
 	wait_for_stable_page(page);
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	ret = ext4_block_write_begin(page, pos, len,
 				     ext4_da_get_block_prep);
 #else
@@ -3879,7 +3879,7 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
 		return 0;
 #endif
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d37dafa1d133..d26bcac291bb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -210,7 +210,7 @@ journal_err_out:
 	return err;
 }
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 static int uuid_is_zero(__u8 u[16])
 {
 	int	i;
@@ -978,7 +978,7 @@ resizefs_out:
 		return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
 
 	case EXT4_IOC_GET_ENCRYPTION_PWSALT: {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		int err, err2;
 		struct ext4_sb_info *sbi = EXT4_SB(sb);
 		handle_t *handle;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index be6cb69beb12..980166a8122a 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -612,7 +612,7 @@ static struct stats dx_show_leaf(struct inode *dir,
 		{
 			if (show_names)
 			{
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 				int len;
 				char *name;
 				struct fscrypt_str fname_crypto_str =
@@ -984,7 +984,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 	top = (struct ext4_dir_entry_2 *) ((char *) de +
 					   dir->i_sb->s_blocksize -
 					   EXT4_DIR_REC_LEN(0));
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	/* Check if the directory is encrypted */
 	if (IS_ENCRYPTED(dir)) {
 		err = fscrypt_get_encryption_info(dir);
@@ -1047,7 +1047,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 	}
 errout:
 	brelse(bh);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	fscrypt_fname_free_buffer(&fname_crypto_str);
 #endif
 	return count;
@@ -1267,7 +1267,7 @@ static inline bool ext4_match(const struct ext4_filename *fname,
 
 	f.usr_fname = fname->usr_fname;
 	f.disk_name = fname->disk_name;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	f.crypto_buf = fname->crypto_buf;
 #endif
 	return fscrypt_match_name(&f, de->name, de->name_len);
@@ -1498,7 +1498,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 	ext4_lblk_t block;
 	int retval;
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	*res_dir = NULL;
 #endif
 	frame = dx_probe(fname, dir, NULL, frames);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c398b55da854..b9d6cabe2ea8 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -66,7 +66,7 @@ static void ext4_finish_bio(struct bio *bio)
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		struct page *data_page = NULL;
 #endif
 		struct buffer_head *bh, *head;
@@ -78,7 +78,7 @@ static void ext4_finish_bio(struct bio *bio)
 		if (!page)
 			continue;
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		if (!page->mapping) {
 			/* The bounce data pages are unmapped. */
 			data_page = page;
@@ -111,7 +111,7 @@ static void ext4_finish_bio(struct bio *bio)
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
 		local_irq_restore(flags);
 		if (!under_io) {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 			if (data_page)
 				fscrypt_restore_control_page(data_page);
 #endif
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 52d3ff5a9db1..b18881eb8da6 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -49,7 +49,7 @@
 
 static inline bool ext4_bio_encrypted(struct bio *bio)
 {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	return unlikely(bio->bi_private != NULL);
 #else
 	return false;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index fb12d3c17c1b..60da0a6e4d86 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1232,7 +1232,7 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
 	return try_to_free_buffers(page);
 }
 
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 static int ext4_get_context(struct inode *inode, void *ctx, size_t len)
 {
 	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
@@ -1922,7 +1922,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
 		*journal_ioprio =
 			IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
 	} else if (token == Opt_test_dummy_encryption) {
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
 		ext4_msg(sb, KERN_WARNING,
 			 "Test dummy encryption mode enabled");
@@ -4167,7 +4167,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &ext4_sops;
 	sb->s_export_op = &ext4_export_ops;
 	sb->s_xattr = ext4_xattr_handlers;
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	sb->s_cop = &ext4_cryptops;
 #endif
 #ifdef CONFIG_QUOTA
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9212a026a1f1..5e4e78fc0b3a 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -224,7 +224,7 @@ static struct attribute *ext4_attrs[] = {
 EXT4_ATTR_FEATURE(lazy_itable_init);
 EXT4_ATTR_FEATURE(batched_discard);
 EXT4_ATTR_FEATURE(meta_bg_resize);
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 EXT4_ATTR_FEATURE(encryption);
 #endif
 EXT4_ATTR_FEATURE(metadata_csum_seed);
@@ -233,7 +233,7 @@ static struct attribute *ext4_feat_attrs[] = {
 	ATTR_LIST(lazy_itable_init),
 	ATTR_LIST(batched_discard),
 	ATTR_LIST(meta_bg_resize),
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	ATTR_LIST(encryption),
 #endif
 	ATTR_LIST(metadata_csum_seed),
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index 9a20ef42fadd..e57cc754d543 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -3,6 +3,7 @@ config F2FS_FS
 	depends on BLOCK
 	select CRYPTO
 	select CRYPTO_CRC32
+	select F2FS_FS_XATTR if FS_ENCRYPTION
 	help
 	  F2FS is based on Log-structured File System (LFS), which supports
 	  versatile "flash-friendly" features. The design has been focused on
@@ -70,17 +71,6 @@ config F2FS_CHECK_FS
 
 	  If you want to improve the performance, say N.
 
-config F2FS_FS_ENCRYPTION
-	bool "F2FS Encryption"
-	depends on F2FS_FS
-	depends on F2FS_FS_XATTR
-	select FS_ENCRYPTION
-	help
-	  Enable encryption of f2fs files and directories.  This
-	  feature is similar to ecryptfs, but it is more memory
-	  efficient since it avoids caching the encrypted and
-	  decrypted pages in the page cache.
-
 config F2FS_IO_TRACE
 	bool "F2FS IO tracer"
 	depends on F2FS_FS
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9ef6f38e51cc..95cc885ccb2f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,7 +24,6 @@
 #include <linux/quotaops.h>
 #include <crypto/hash.h>
 
-#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_F2FS_FS_ENCRYPTION)
 #include <linux/fscrypt.h>
 
 #ifdef CONFIG_F2FS_CHECK_FS
@@ -1137,7 +1136,7 @@ enum fsync_mode {
 	FSYNC_MODE_NOBARRIER,	/* fsync behaves nobarrier based on posix */
 };
 
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 #define DUMMY_ENCRYPTION_ENABLED(sbi) \
 			(unlikely(F2FS_OPTION(sbi).test_dummy_encryption))
 #else
@@ -3470,7 +3469,7 @@ static inline bool f2fs_encrypted_file(struct inode *inode)
 
 static inline void f2fs_set_encrypted_inode(struct inode *inode)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	file_set_encrypt(inode);
 	f2fs_set_inode_flags(inode);
 #endif
@@ -3549,7 +3548,7 @@ static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt)
 
 static inline bool f2fs_may_encrypt(struct inode *inode)
 {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	umode_t mode = inode->i_mode;
 
 	return (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode));
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c46a1d4318d4..0f3db3a8e5cb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -757,7 +757,7 @@ static int parse_options(struct super_block *sb, char *options)
 			kvfree(name);
 			break;
 		case Opt_test_dummy_encryption:
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 			if (!f2fs_sb_has_encrypt(sbi)) {
 				f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
 				return -EINVAL;
@@ -1390,7 +1390,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_printf(seq, ",whint_mode=%s", "user-based");
 	else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
 		seq_printf(seq, ",whint_mode=%s", "fs-based");
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	if (F2FS_OPTION(sbi).test_dummy_encryption)
 		seq_puts(seq, ",test_dummy_encryption");
 #endif
@@ -2154,7 +2154,7 @@ static const struct super_operations f2fs_sops = {
 	.remount_fs	= f2fs_remount,
 };
 
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
 {
 	return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
@@ -3116,7 +3116,7 @@ try_onemore:
 #endif
 
 	sb->s_op = &f2fs_sops;
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	sb->s_cop = &f2fs_cryptops;
 #endif
 	sb->s_xattr = f2fs_xattr_handlers;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 0575edbe3ed6..70da6801c86f 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -431,7 +431,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 F2FS_GENERAL_RO_ATTR(features);
 F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
 
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
 #endif
 #ifdef CONFIG_BLK_DEV_ZONED
@@ -492,7 +492,7 @@ static struct attribute *f2fs_attrs[] = {
 };
 
 static struct attribute *f2fs_feat_attrs[] = {
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	ATTR_LIST(encryption),
 #endif
 #ifdef CONFIG_BLK_DEV_ZONED
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index bc1e082d921d..9da2f135121b 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -8,6 +8,7 @@ config UBIFS_FS
 	select CRYPTO_LZO if UBIFS_FS_LZO
 	select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
 	select CRYPTO_HASH_INFO
+	select UBIFS_FS_XATTR if FS_ENCRYPTION
 	depends on MTD_UBI
 	help
 	  UBIFS is a file system for flash devices which works on top of UBI.
@@ -60,17 +61,6 @@ config UBIFS_FS_XATTR
 
 	  If unsure, say Y.
 
-config UBIFS_FS_ENCRYPTION
-	bool "UBIFS Encryption"
-	depends on UBIFS_FS_XATTR && BLOCK
-	select FS_ENCRYPTION
-	default n
-	help
-	  Enable encryption of UBIFS files and directories. This
-	  feature is similar to ecryptfs, but it is more memory
-	  efficient since it avoids caching the encrypted and
-	  decrypted pages in the page cache.
-
 config UBIFS_FS_SECURITY
 	bool "UBIFS Security Labels"
 	depends on UBIFS_FS_XATTR
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 5f838319c8d5..5c4b845754a7 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -6,6 +6,6 @@ ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
 ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
 ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o debug.o
 ubifs-y += misc.o
-ubifs-$(CONFIG_UBIFS_FS_ENCRYPTION) += crypto.o
+ubifs-$(CONFIG_FS_ENCRYPTION) += crypto.o
 ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
 ubifs-$(CONFIG_UBIFS_FS_AUTHENTICATION) += auth.o
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 0164bcc827f8..0f9c362a3402 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -185,7 +185,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return err;
 	}
 	case FS_IOC_SET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_UBIFS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		struct ubifs_info *c = inode->i_sb->s_fs_info;
 
 		err = ubifs_enable_encryption(c);
@@ -198,7 +198,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 #endif
 	}
 	case FS_IOC_GET_ENCRYPTION_POLICY: {
-#ifdef CONFIG_UBIFS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 		return fscrypt_ioctl_get_policy(file, (void __user *)arg);
 #else
 		return -EOPNOTSUPP;
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 3da90c951c23..67fac1e8adfb 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -748,7 +748,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
 		goto out;
 	}
 
-#ifndef CONFIG_UBIFS_FS_ENCRYPTION
+#ifndef CONFIG_FS_ENCRYPTION
 	if (c->encrypted) {
 		ubifs_err(c, "file system contains encrypted files but UBIFS"
 			     " was built without crypto support.");
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1fac1133dadd..8dc2818fdd84 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2146,7 +2146,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 #ifdef CONFIG_UBIFS_FS_XATTR
 	sb->s_xattr = ubifs_xattr_handlers;
 #endif
-#ifdef CONFIG_UBIFS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 	sb->s_cop = &ubifs_crypt_operations;
 #endif
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 38401adaa00d..1ae12900e01d 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -43,7 +43,6 @@
 #include <crypto/hash.h>
 #include <crypto/algapi.h>
 
-#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_UBIFS_FS_ENCRYPTION)
 #include <linux/fscrypt.h>
 
 #include "ubifs-media.h"
@@ -142,7 +141,7 @@
  */
 #define WORST_COMPR_FACTOR 2
 
-#ifdef CONFIG_UBIFS_FS_ENCRYPTION
+#ifdef CONFIG_FS_ENCRYPTION
 #define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE
 #else
 #define UBIFS_CIPHER_BLOCK_SIZE 0
@@ -2072,7 +2071,7 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
 #include "misc.h"
 #include "key.h"
 
-#ifndef CONFIG_UBIFS_FS_ENCRYPTION
+#ifndef CONFIG_FS_ENCRYPTION
 static inline int ubifs_encrypt(const struct inode *inode,
 				struct ubifs_data_node *dn,
 				unsigned int in_len, unsigned int *out_len,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 811c77743dad..ba7889bb9ef6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,7 +698,7 @@ struct inode {
 	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
 #endif
 
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#ifdef CONFIG_FS_ENCRYPTION
 	struct fscrypt_info	*i_crypt_info;
 #endif
 
@@ -1403,7 +1403,7 @@ struct super_block {
 	void                    *s_security;
 #endif
 	const struct xattr_handler **s_xattr;
-#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#ifdef CONFIG_FS_ENCRYPTION
 	const struct fscrypt_operations	*s_cop;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 952ab97af325..eec604840568 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -2,9 +2,8 @@
 /*
  * fscrypt.h: declarations for per-file encryption
  *
- * Filesystems that implement per-file encryption include this header
- * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem
- * is being built with encryption support or not.
+ * Filesystems that implement per-file encryption must include this header
+ * file.
  *
  * Copyright (C) 2015, Google, Inc.
  *
@@ -15,6 +14,8 @@
 #define _LINUX_FSCRYPT_H
 
 #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 
 #define FS_CRYPTO_BLOCK_SIZE		16
 
@@ -42,11 +43,410 @@ struct fscrypt_name {
 /* Maximum value for the third parameter of fscrypt_operations.set_context(). */
 #define FSCRYPT_SET_CONTEXT_MAX_SIZE	28
 
-#if __FS_HAS_ENCRYPTION
-#include <linux/fscrypt_supp.h>
-#else
-#include <linux/fscrypt_notsupp.h>
-#endif
+#ifdef CONFIG_FS_ENCRYPTION
+/*
+ * fscrypt superblock flags
+ */
+#define FS_CFLG_OWN_PAGES (1U << 1)
+
+/*
+ * crypto operations for filesystems
+ */
+struct fscrypt_operations {
+	unsigned int flags;
+	const char *key_prefix;
+	int (*get_context)(struct inode *, void *, size_t);
+	int (*set_context)(struct inode *, const void *, size_t, void *);
+	bool (*dummy_context)(struct inode *);
+	bool (*empty_dir)(struct inode *);
+	unsigned int max_namelen;
+};
+
+struct fscrypt_ctx {
+	union {
+		struct {
+			struct page *bounce_page;	/* Ciphertext page */
+			struct page *control_page;	/* Original page  */
+		} w;
+		struct {
+			struct bio *bio;
+			struct work_struct work;
+		} r;
+		struct list_head free_list;	/* Free list */
+	};
+	u8 flags;				/* Flags */
+};
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+	return (inode->i_crypt_info != NULL);
+}
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+	return inode->i_sb->s_cop->dummy_context &&
+		inode->i_sb->s_cop->dummy_context(inode);
+}
+
+/* crypto.c */
+extern void fscrypt_enqueue_decrypt_work(struct work_struct *);
+extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
+extern void fscrypt_release_ctx(struct fscrypt_ctx *);
+extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
+						unsigned int, unsigned int,
+						u64, gfp_t);
+extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
+				unsigned int, u64);
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+	return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
+}
+
+extern void fscrypt_restore_control_page(struct page *);
+
+/* policy.c */
+extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
+extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
+extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
+extern int fscrypt_inherit_context(struct inode *, struct inode *,
+					void *, bool);
+/* keyinfo.c */
+extern int fscrypt_get_encryption_info(struct inode *);
+extern void fscrypt_put_encryption_info(struct inode *);
+
+/* fname.c */
+extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
+				int lookup, struct fscrypt_name *);
+
+static inline void fscrypt_free_filename(struct fscrypt_name *fname)
+{
+	kfree(fname->crypto_buf.name);
+}
+
+extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
+				struct fscrypt_str *);
+extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
+extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
+			const struct fscrypt_str *, struct fscrypt_str *);
+
+#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE	32
+
+/* Extracts the second-to-last ciphertext block; see explanation below */
+#define FSCRYPT_FNAME_DIGEST(name, len)	\
+	((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \
+			     FS_CRYPTO_BLOCK_SIZE))
+
+#define FSCRYPT_FNAME_DIGEST_SIZE	FS_CRYPTO_BLOCK_SIZE
+
+/**
+ * fscrypt_digested_name - alternate identifier for an on-disk filename
+ *
+ * When userspace lists an encrypted directory without access to the key,
+ * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE
+ * bytes are shown in this abbreviated form (base64-encoded) rather than as the
+ * full ciphertext (base64-encoded).  This is necessary to allow supporting
+ * filenames up to NAME_MAX bytes, since base64 encoding expands the length.
+ *
+ * To make it possible for filesystems to still find the correct directory entry
+ * despite not knowing the full on-disk name, we encode any filesystem-specific
+ * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups,
+ * followed by the second-to-last ciphertext block of the filename.  Due to the
+ * use of the CBC-CTS encryption mode, the second-to-last ciphertext block
+ * depends on the full plaintext.  (Note that ciphertext stealing causes the
+ * last two blocks to appear "flipped".)  This makes accidental collisions very
+ * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they
+ * share the same filesystem-specific hashes.
+ *
+ * However, this scheme isn't immune to intentional collisions, which can be
+ * created by anyone able to create arbitrary plaintext filenames and view them
+ * without the key.  Making the "digest" be a real cryptographic hash like
+ * SHA-256 over the full ciphertext would prevent this, although it would be
+ * less efficient and harder to implement, especially since the filesystem would
+ * need to calculate it for each directory entry examined during a search.
+ */
+struct fscrypt_digested_name {
+	u32 hash;
+	u32 minor_hash;
+	u8 digest[FSCRYPT_FNAME_DIGEST_SIZE];
+};
+
+/**
+ * fscrypt_match_name() - test whether the given name matches a directory entry
+ * @fname: the name being searched for
+ * @de_name: the name from the directory entry
+ * @de_name_len: the length of @de_name in bytes
+ *
+ * Normally @fname->disk_name will be set, and in that case we simply compare
+ * that to the name stored in the directory entry.  The only exception is that
+ * if we don't have the key for an encrypted directory and a filename in it is
+ * very long, then we won't have the full disk_name and we'll instead need to
+ * match against the fscrypt_digested_name.
+ *
+ * Return: %true if the name matches, otherwise %false.
+ */
+static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
+				      const u8 *de_name, u32 de_name_len)
+{
+	if (unlikely(!fname->disk_name.name)) {
+		const struct fscrypt_digested_name *n =
+			(const void *)fname->crypto_buf.name;
+		if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_'))
+			return false;
+		if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)
+			return false;
+		return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len),
+			       n->digest, FSCRYPT_FNAME_DIGEST_SIZE);
+	}
+
+	if (de_name_len != fname->disk_name.len)
+		return false;
+	return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
+}
+
+/* bio.c */
+extern void fscrypt_decrypt_bio(struct bio *);
+extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
+					struct bio *bio);
+extern void fscrypt_pullback_bio_page(struct page **, bool);
+extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
+				 unsigned int);
+
+/* hooks.c */
+extern int fscrypt_file_open(struct inode *inode, struct file *filp);
+extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir);
+extern int __fscrypt_prepare_rename(struct inode *old_dir,
+				    struct dentry *old_dentry,
+				    struct inode *new_dir,
+				    struct dentry *new_dentry,
+				    unsigned int flags);
+extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry);
+extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len,
+				     unsigned int max_len,
+				     struct fscrypt_str *disk_link);
+extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
+				     unsigned int len,
+				     struct fscrypt_str *disk_link);
+extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
+				       unsigned int max_size,
+				       struct delayed_call *done);
+#else  /* !CONFIG_FS_ENCRYPTION */
+
+static inline bool fscrypt_has_encryption_key(const struct inode *inode)
+{
+	return false;
+}
+
+static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
+{
+	return false;
+}
+
+/* crypto.c */
+static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
+{
+}
+
+static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
+						  gfp_t gfp_flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
+{
+	return;
+}
+
+static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
+						struct page *page,
+						unsigned int len,
+						unsigned int offs,
+						u64 lblk_num, gfp_t gfp_flags)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int fscrypt_decrypt_page(const struct inode *inode,
+				       struct page *page,
+				       unsigned int len, unsigned int offs,
+				       u64 lblk_num)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline struct page *fscrypt_control_page(struct page *page)
+{
+	WARN_ON_ONCE(1);
+	return ERR_PTR(-EINVAL);
+}
+
+static inline void fscrypt_restore_control_page(struct page *page)
+{
+	return;
+}
+
+/* policy.c */
+static inline int fscrypt_ioctl_set_policy(struct file *filp,
+					   const void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int fscrypt_has_permitted_context(struct inode *parent,
+						struct inode *child)
+{
+	return 0;
+}
+
+static inline int fscrypt_inherit_context(struct inode *parent,
+					  struct inode *child,
+					  void *fs_data, bool preload)
+{
+	return -EOPNOTSUPP;
+}
+
+/* keyinfo.c */
+static inline int fscrypt_get_encryption_info(struct inode *inode)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_put_encryption_info(struct inode *inode)
+{
+	return;
+}
+
+ /* fname.c */
+static inline int fscrypt_setup_filename(struct inode *dir,
+					 const struct qstr *iname,
+					 int lookup, struct fscrypt_name *fname)
+{
+	if (IS_ENCRYPTED(dir))
+		return -EOPNOTSUPP;
+
+	memset(fname, 0, sizeof(struct fscrypt_name));
+	fname->usr_fname = iname;
+	fname->disk_name.name = (unsigned char *)iname->name;
+	fname->disk_name.len = iname->len;
+	return 0;
+}
+
+static inline void fscrypt_free_filename(struct fscrypt_name *fname)
+{
+	return;
+}
+
+static inline int fscrypt_fname_alloc_buffer(const struct inode *inode,
+					     u32 max_encrypted_len,
+					     struct fscrypt_str *crypto_str)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
+{
+	return;
+}
+
+static inline int fscrypt_fname_disk_to_usr(struct inode *inode,
+					    u32 hash, u32 minor_hash,
+					    const struct fscrypt_str *iname,
+					    struct fscrypt_str *oname)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
+				      const u8 *de_name, u32 de_name_len)
+{
+	/* Encryption support disabled; use standard comparison */
+	if (de_name_len != fname->disk_name.len)
+		return false;
+	return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
+}
+
+/* bio.c */
+static inline void fscrypt_decrypt_bio(struct bio *bio)
+{
+}
+
+static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
+					       struct bio *bio)
+{
+}
+
+static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
+{
+	return;
+}
+
+static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
+					sector_t pblk, unsigned int len)
+{
+	return -EOPNOTSUPP;
+}
+
+/* hooks.c */
+
+static inline int fscrypt_file_open(struct inode *inode, struct file *filp)
+{
+	if (IS_ENCRYPTED(inode))
+		return -EOPNOTSUPP;
+	return 0;
+}
+
+static inline int __fscrypt_prepare_link(struct inode *inode,
+					 struct inode *dir)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_rename(struct inode *old_dir,
+					   struct dentry *old_dentry,
+					   struct inode *new_dir,
+					   struct dentry *new_dentry,
+					   unsigned int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_lookup(struct inode *dir,
+					   struct dentry *dentry)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_symlink(struct inode *dir,
+					    unsigned int len,
+					    unsigned int max_len,
+					    struct fscrypt_str *disk_link)
+{
+	return -EOPNOTSUPP;
+}
+
+
+static inline int __fscrypt_encrypt_symlink(struct inode *inode,
+					    const char *target,
+					    unsigned int len,
+					    struct fscrypt_str *disk_link)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline const char *fscrypt_get_symlink(struct inode *inode,
+					      const void *caddr,
+					      unsigned int max_size,
+					      struct delayed_call *done)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif	/* !CONFIG_FS_ENCRYPTION */
 
 /**
  * fscrypt_require_key - require an inode's encryption key
diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h
deleted file mode 100644
index ee8b43e4c15a..000000000000
--- a/include/linux/fscrypt_notsupp.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fscrypt_notsupp.h
- *
- * This stubs out the fscrypt functions for filesystems configured without
- * encryption support.
- *
- * Do not include this file directly. Use fscrypt.h instead!
- */
-#ifndef _LINUX_FSCRYPT_H
-#error "Incorrect include of linux/fscrypt_notsupp.h!"
-#endif
-
-#ifndef _LINUX_FSCRYPT_NOTSUPP_H
-#define _LINUX_FSCRYPT_NOTSUPP_H
-
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
-{
-	return false;
-}
-
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
-{
-	return false;
-}
-
-/* crypto.c */
-static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
-{
-}
-
-static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode,
-						  gfp_t gfp_flags)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
-{
-	return;
-}
-
-static inline struct page *fscrypt_encrypt_page(const struct inode *inode,
-						struct page *page,
-						unsigned int len,
-						unsigned int offs,
-						u64 lblk_num, gfp_t gfp_flags)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline int fscrypt_decrypt_page(const struct inode *inode,
-				       struct page *page,
-				       unsigned int len, unsigned int offs,
-				       u64 lblk_num)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline struct page *fscrypt_control_page(struct page *page)
-{
-	WARN_ON_ONCE(1);
-	return ERR_PTR(-EINVAL);
-}
-
-static inline void fscrypt_restore_control_page(struct page *page)
-{
-	return;
-}
-
-/* policy.c */
-static inline int fscrypt_ioctl_set_policy(struct file *filp,
-					   const void __user *arg)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int fscrypt_has_permitted_context(struct inode *parent,
-						struct inode *child)
-{
-	return 0;
-}
-
-static inline int fscrypt_inherit_context(struct inode *parent,
-					  struct inode *child,
-					  void *fs_data, bool preload)
-{
-	return -EOPNOTSUPP;
-}
-
-/* keyinfo.c */
-static inline int fscrypt_get_encryption_info(struct inode *inode)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void fscrypt_put_encryption_info(struct inode *inode)
-{
-	return;
-}
-
- /* fname.c */
-static inline int fscrypt_setup_filename(struct inode *dir,
-					 const struct qstr *iname,
-					 int lookup, struct fscrypt_name *fname)
-{
-	if (IS_ENCRYPTED(dir))
-		return -EOPNOTSUPP;
-
-	memset(fname, 0, sizeof(struct fscrypt_name));
-	fname->usr_fname = iname;
-	fname->disk_name.name = (unsigned char *)iname->name;
-	fname->disk_name.len = iname->len;
-	return 0;
-}
-
-static inline void fscrypt_free_filename(struct fscrypt_name *fname)
-{
-	return;
-}
-
-static inline int fscrypt_fname_alloc_buffer(const struct inode *inode,
-					     u32 max_encrypted_len,
-					     struct fscrypt_str *crypto_str)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
-{
-	return;
-}
-
-static inline int fscrypt_fname_disk_to_usr(struct inode *inode,
-					    u32 hash, u32 minor_hash,
-					    const struct fscrypt_str *iname,
-					    struct fscrypt_str *oname)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
-				      const u8 *de_name, u32 de_name_len)
-{
-	/* Encryption support disabled; use standard comparison */
-	if (de_name_len != fname->disk_name.len)
-		return false;
-	return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
-}
-
-/* bio.c */
-static inline void fscrypt_decrypt_bio(struct bio *bio)
-{
-}
-
-static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
-					       struct bio *bio)
-{
-}
-
-static inline void fscrypt_pullback_bio_page(struct page **page, bool restore)
-{
-	return;
-}
-
-static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
-					sector_t pblk, unsigned int len)
-{
-	return -EOPNOTSUPP;
-}
-
-/* hooks.c */
-
-static inline int fscrypt_file_open(struct inode *inode, struct file *filp)
-{
-	if (IS_ENCRYPTED(inode))
-		return -EOPNOTSUPP;
-	return 0;
-}
-
-static inline int __fscrypt_prepare_link(struct inode *inode,
-					 struct inode *dir)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int __fscrypt_prepare_rename(struct inode *old_dir,
-					   struct dentry *old_dentry,
-					   struct inode *new_dir,
-					   struct dentry *new_dentry,
-					   unsigned int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int __fscrypt_prepare_lookup(struct inode *dir,
-					   struct dentry *dentry)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int __fscrypt_prepare_symlink(struct inode *dir,
-					    unsigned int len,
-					    unsigned int max_len,
-					    struct fscrypt_str *disk_link)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int __fscrypt_encrypt_symlink(struct inode *inode,
-					    const char *target,
-					    unsigned int len,
-					    struct fscrypt_str *disk_link)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline const char *fscrypt_get_symlink(struct inode *inode,
-					      const void *caddr,
-					      unsigned int max_size,
-					      struct delayed_call *done)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-#endif	/* _LINUX_FSCRYPT_NOTSUPP_H */
diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h
deleted file mode 100644
index 6456c6b2005f..000000000000
--- a/include/linux/fscrypt_supp.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * fscrypt_supp.h
- *
- * Do not include this file directly. Use fscrypt.h instead!
- */
-#ifndef _LINUX_FSCRYPT_H
-#error "Incorrect include of linux/fscrypt_supp.h!"
-#endif
-
-#ifndef _LINUX_FSCRYPT_SUPP_H
-#define _LINUX_FSCRYPT_SUPP_H
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-
-/*
- * fscrypt superblock flags
- */
-#define FS_CFLG_OWN_PAGES (1U << 1)
-
-/*
- * crypto operations for filesystems
- */
-struct fscrypt_operations {
-	unsigned int flags;
-	const char *key_prefix;
-	int (*get_context)(struct inode *, void *, size_t);
-	int (*set_context)(struct inode *, const void *, size_t, void *);
-	bool (*dummy_context)(struct inode *);
-	bool (*empty_dir)(struct inode *);
-	unsigned int max_namelen;
-};
-
-struct fscrypt_ctx {
-	union {
-		struct {
-			struct page *bounce_page;	/* Ciphertext page */
-			struct page *control_page;	/* Original page  */
-		} w;
-		struct {
-			struct bio *bio;
-			struct work_struct work;
-		} r;
-		struct list_head free_list;	/* Free list */
-	};
-	u8 flags;				/* Flags */
-};
-
-static inline bool fscrypt_has_encryption_key(const struct inode *inode)
-{
-	return (inode->i_crypt_info != NULL);
-}
-
-static inline bool fscrypt_dummy_context_enabled(struct inode *inode)
-{
-	return inode->i_sb->s_cop->dummy_context &&
-		inode->i_sb->s_cop->dummy_context(inode);
-}
-
-/* crypto.c */
-extern void fscrypt_enqueue_decrypt_work(struct work_struct *);
-extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t);
-extern void fscrypt_release_ctx(struct fscrypt_ctx *);
-extern struct page *fscrypt_encrypt_page(const struct inode *, struct page *,
-						unsigned int, unsigned int,
-						u64, gfp_t);
-extern int fscrypt_decrypt_page(const struct inode *, struct page *, unsigned int,
-				unsigned int, u64);
-
-static inline struct page *fscrypt_control_page(struct page *page)
-{
-	return ((struct fscrypt_ctx *)page_private(page))->w.control_page;
-}
-
-extern void fscrypt_restore_control_page(struct page *);
-
-/* policy.c */
-extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
-extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
-extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
-extern int fscrypt_inherit_context(struct inode *, struct inode *,
-					void *, bool);
-/* keyinfo.c */
-extern int fscrypt_get_encryption_info(struct inode *);
-extern void fscrypt_put_encryption_info(struct inode *);
-
-/* fname.c */
-extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
-				int lookup, struct fscrypt_name *);
-
-static inline void fscrypt_free_filename(struct fscrypt_name *fname)
-{
-	kfree(fname->crypto_buf.name);
-}
-
-extern int fscrypt_fname_alloc_buffer(const struct inode *, u32,
-				struct fscrypt_str *);
-extern void fscrypt_fname_free_buffer(struct fscrypt_str *);
-extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32,
-			const struct fscrypt_str *, struct fscrypt_str *);
-
-#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE	32
-
-/* Extracts the second-to-last ciphertext block; see explanation below */
-#define FSCRYPT_FNAME_DIGEST(name, len)	\
-	((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \
-			     FS_CRYPTO_BLOCK_SIZE))
-
-#define FSCRYPT_FNAME_DIGEST_SIZE	FS_CRYPTO_BLOCK_SIZE
-
-/**
- * fscrypt_digested_name - alternate identifier for an on-disk filename
- *
- * When userspace lists an encrypted directory without access to the key,
- * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE
- * bytes are shown in this abbreviated form (base64-encoded) rather than as the
- * full ciphertext (base64-encoded).  This is necessary to allow supporting
- * filenames up to NAME_MAX bytes, since base64 encoding expands the length.
- *
- * To make it possible for filesystems to still find the correct directory entry
- * despite not knowing the full on-disk name, we encode any filesystem-specific
- * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups,
- * followed by the second-to-last ciphertext block of the filename.  Due to the
- * use of the CBC-CTS encryption mode, the second-to-last ciphertext block
- * depends on the full plaintext.  (Note that ciphertext stealing causes the
- * last two blocks to appear "flipped".)  This makes accidental collisions very
- * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they
- * share the same filesystem-specific hashes.
- *
- * However, this scheme isn't immune to intentional collisions, which can be
- * created by anyone able to create arbitrary plaintext filenames and view them
- * without the key.  Making the "digest" be a real cryptographic hash like
- * SHA-256 over the full ciphertext would prevent this, although it would be
- * less efficient and harder to implement, especially since the filesystem would
- * need to calculate it for each directory entry examined during a search.
- */
-struct fscrypt_digested_name {
-	u32 hash;
-	u32 minor_hash;
-	u8 digest[FSCRYPT_FNAME_DIGEST_SIZE];
-};
-
-/**
- * fscrypt_match_name() - test whether the given name matches a directory entry
- * @fname: the name being searched for
- * @de_name: the name from the directory entry
- * @de_name_len: the length of @de_name in bytes
- *
- * Normally @fname->disk_name will be set, and in that case we simply compare
- * that to the name stored in the directory entry.  The only exception is that
- * if we don't have the key for an encrypted directory and a filename in it is
- * very long, then we won't have the full disk_name and we'll instead need to
- * match against the fscrypt_digested_name.
- *
- * Return: %true if the name matches, otherwise %false.
- */
-static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
-				      const u8 *de_name, u32 de_name_len)
-{
-	if (unlikely(!fname->disk_name.name)) {
-		const struct fscrypt_digested_name *n =
-			(const void *)fname->crypto_buf.name;
-		if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_'))
-			return false;
-		if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)
-			return false;
-		return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len),
-			       n->digest, FSCRYPT_FNAME_DIGEST_SIZE);
-	}
-
-	if (de_name_len != fname->disk_name.len)
-		return false;
-	return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len);
-}
-
-/* bio.c */
-extern void fscrypt_decrypt_bio(struct bio *);
-extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
-					struct bio *bio);
-extern void fscrypt_pullback_bio_page(struct page **, bool);
-extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
-				 unsigned int);
-
-/* hooks.c */
-extern int fscrypt_file_open(struct inode *inode, struct file *filp);
-extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir);
-extern int __fscrypt_prepare_rename(struct inode *old_dir,
-				    struct dentry *old_dentry,
-				    struct inode *new_dir,
-				    struct dentry *new_dentry,
-				    unsigned int flags);
-extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry);
-extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len,
-				     unsigned int max_len,
-				     struct fscrypt_str *disk_link);
-extern int __fscrypt_encrypt_symlink(struct inode *inode, const char *target,
-				     unsigned int len,
-				     struct fscrypt_str *disk_link);
-extern const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
-				       unsigned int max_size,
-				       struct delayed_call *done);
-
-#endif	/* _LINUX_FSCRYPT_SUPP_H */
-- 
cgit v1.2.3


From f5e55e777cc93eae1416f0fa4908e8846b6d7825 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Tue, 22 Jan 2019 16:20:21 -0800
Subject: fscrypt: return -EXDEV for incompatible rename or link into encrypted
 dir

Currently, trying to rename or link a regular file, directory, or
symlink into an encrypted directory fails with EPERM when the source
file is unencrypted or is encrypted with a different encryption policy,
and is on the same mountpoint.  It is correct for the operation to fail,
but the choice of EPERM breaks tools like 'mv' that know to copy rather
than rename if they see EXDEV, but don't know what to do with EPERM.

Our original motivation for EPERM was to encourage users to securely
handle their data.  Encrypting files by "moving" them into an encrypted
directory can be insecure because the unencrypted data may remain in
free space on disk, where it can later be recovered by an attacker.
It's much better to encrypt the data from the start, or at least try to
securely delete the source data e.g. using the 'shred' program.

However, the current behavior hasn't been effective at achieving its
goal because users tend to be confused, hack around it, and complain;
see e.g. https://github.com/google/fscrypt/issues/76.  And in some cases
it's actually inconsistent or unnecessary.  For example, 'mv'-ing files
between differently encrypted directories doesn't work even in cases
where it can be secure, such as when in userspace the same passphrase
protects both directories.  Yet, you *can* already 'mv' unencrypted
files into an encrypted directory if the source files are on a different
mountpoint, even though doing so is often insecure.

There are probably better ways to teach users to securely handle their
files.  For example, the 'fscrypt' userspace tool could provide a
command that migrates unencrypted files into an encrypted directory,
acting like 'shred' on the source files and providing appropriate
warnings depending on the type of the source filesystem and disk.

Receiving errors on unimportant files might also force some users to
disable encryption, thus making the behavior counterproductive.  It's
desirable to make encryption as unobtrusive as possible.

Therefore, change the error code from EPERM to EXDEV so that tools
looking for EXDEV will fall back to a copy.

This, of course, doesn't prevent users from still doing the right things
to securely manage their files.  Note that this also matches the
behavior when a file is renamed between two project quota hierarchies;
so there's precedent for using EXDEV for things other than mountpoints.

xfstests generic/398 will require an update with this change.

[Rewritten from an earlier patch series by Michael Halcrow.]

Cc: Michael Halcrow <mhalcrow@google.com>
Cc: Joe Richey <joerichey@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 12 ++++++++++--
 fs/crypto/hooks.c                     |  6 +++---
 fs/crypto/policy.c                    |  3 +--
 include/linux/fscrypt.h               |  4 ++--
 4 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 43dd989e2a3f..08c23b60e016 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -451,10 +451,18 @@ astute users may notice some differences in behavior:
 - Unencrypted files, or files encrypted with a different encryption
   policy (i.e. different key, modes, or flags), cannot be renamed or
   linked into an encrypted directory; see `Encryption policy
-  enforcement`_.  Attempts to do so will fail with EPERM.  However,
+  enforcement`_.  Attempts to do so will fail with EXDEV.  However,
   encrypted files can be renamed within an encrypted directory, or
   into an unencrypted directory.
 
+  Note: "moving" an unencrypted file into an encrypted directory, e.g.
+  with the `mv` program, is implemented in userspace by a copy
+  followed by a delete.  Be aware that the original unencrypted data
+  may remain recoverable from free space on the disk; prefer to keep
+  all files encrypted from the very beginning.  The `shred` program
+  may be used to overwrite the source files but isn't guaranteed to be
+  effective on all filesystems and storage devices.
+
 - Direct I/O is not supported on encrypted files.  Attempts to use
   direct I/O on such files will fall back to buffered I/O.
 
@@ -541,7 +549,7 @@ not be encrypted.
 Except for those special files, it is forbidden to have unencrypted
 files, or files encrypted with a different encryption policy, in an
 encrypted directory tree.  Attempts to link or rename such a file into
-an encrypted directory will fail with EPERM.  This is also enforced
+an encrypted directory will fail with EXDEV.  This is also enforced
 during ->lookup() to provide limited protection against offline
 attacks that try to disable or downgrade encryption in known locations
 where applications may later write sensitive data.  It is recommended
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 926e5df20ec3..56debb1fcf5e 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -58,7 +58,7 @@ int __fscrypt_prepare_link(struct inode *inode, struct inode *dir)
 		return err;
 
 	if (!fscrypt_has_permitted_context(dir, inode))
-		return -EPERM;
+		return -EXDEV;
 
 	return 0;
 }
@@ -82,13 +82,13 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (IS_ENCRYPTED(new_dir) &&
 		    !fscrypt_has_permitted_context(new_dir,
 						   d_inode(old_dentry)))
-			return -EPERM;
+			return -EXDEV;
 
 		if ((flags & RENAME_EXCHANGE) &&
 		    IS_ENCRYPTED(old_dir) &&
 		    !fscrypt_has_permitted_context(old_dir,
 						   d_inode(new_dentry)))
-			return -EPERM;
+			return -EXDEV;
 	}
 	return 0;
 }
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index f490de921ce8..bd7eaf9b3f00 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -151,8 +151,7 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
  * malicious offline violations of this constraint, while the link and rename
  * checks are needed to prevent online violations of this constraint.
  *
- * Return: 1 if permitted, 0 if forbidden.  If forbidden, the caller must fail
- * the filesystem operation with EPERM.
+ * Return: 1 if permitted, 0 if forbidden.
  */
 int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 {
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index eec604840568..e5194fc3983e 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -489,7 +489,7 @@ static inline int fscrypt_require_key(struct inode *inode)
  * in an encrypted directory tree use the same encryption policy.
  *
  * Return: 0 on success, -ENOKEY if the directory's encryption key is missing,
- * -EPERM if the link would result in an inconsistent encryption policy, or
+ * -EXDEV if the link would result in an inconsistent encryption policy, or
  * another -errno code.
  */
 static inline int fscrypt_prepare_link(struct dentry *old_dentry,
@@ -519,7 +519,7 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry,
  * We also verify that the rename will not violate the constraint that all files
  * in an encrypted directory tree use the same encryption policy.
  *
- * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the
+ * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the
  * rename would cause inconsistent encryption policies, or another -errno code.
  */
 static inline int fscrypt_prepare_rename(struct inode *old_dir,
-- 
cgit v1.2.3


From d9ff286a0f59fa7843549e49bd240393dd7d8b87 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 23 Jan 2019 09:22:27 -0800
Subject: bpf: allow BPF programs access skb_shared_info->gso_segs field

This adds the ability to read gso_segs from a BPF program.

v3: Use BPF_REG_AX instead of BPF_REG_TMP for the temporary register,
    as suggested by Martin.

v2: refined Eddie Hao patch to address Alexei feedback.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eddie Hao <eddieh@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h                    |  1 +
 net/core/filter.c                           | 21 +++++++++++++++++
 tools/include/uapi/linux/bpf.h              |  1 +
 tools/testing/selftests/bpf/test_verifier.c | 36 +++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 91c43884f295..2940a9854f6d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2540,6 +2540,7 @@ struct __sk_buff {
 	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 	__u64 tstamp;
 	__u32 wire_len;
+	__u32 gso_segs;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index 2b3b436ef545..8e587dd1da20 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6700,6 +6700,27 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 							     target_size));
 		break;
 
+	case offsetof(struct __sk_buff, gso_segs):
+		/* si->dst_reg = skb_shinfo(SKB); */
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, head));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      BPF_REG_AX, si->src_reg,
+				      offsetof(struct sk_buff, end));
+		*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+#else
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, end));
+#endif
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
+				      si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct skb_shared_info,
+						     gso_segs, 2,
+						     target_size));
+		break;
 	case offsetof(struct __sk_buff, wire_len):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 91c43884f295..2940a9854f6d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2540,6 +2540,7 @@ struct __sk_buff {
 	__bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
 	__u64 tstamp;
 	__u32 wire_len;
+	__u32 gso_segs;
 };
 
 struct bpf_tunnel_key {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4f67afeec8d9..e4fef6ca8071 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -5663,6 +5663,42 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 	},
+	{
+		"read gso_segs from CGROUP_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"write gso_segs from CGROUP_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.result_unpriv = REJECT,
+		.errstr = "invalid bpf_context access off=164 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"read gso_segs from CLS",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, gso_segs)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"multiple registers share map_lookup_elem result",
 		.insns = {
-- 
cgit v1.2.3


From e355477ed9e4f401e3931043df97325d38552d54 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Fri, 18 Jan 2019 16:33:10 -0800
Subject: net/mlx5: Make mlx5_cmd_exec_cb() a safe API

APIs that have deferred callbacks should have some kind of cleanup
function that callers can use to fence the callbacks. Otherwise things
like module unloading can lead to dangling function pointers, or worse.

The IB MR code is the only place that calls this function and had a
really poor attempt at creating this fence. Provide a good version in
the core code as future patches will add more places that need this
fence.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h          |  2 +
 drivers/infiniband/hw/mlx5/mr.c               | 39 ++++---------------
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 ++++++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c  | 11 +++---
 include/linux/mlx5/driver.h                   | 32 +++++++++++++---
 5 files changed, 91 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index efe383c0ac86..eedba0d2ec4b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -587,6 +587,7 @@ struct mlx5_ib_mr {
 	struct mlx5_ib_mr      *parent;
 	atomic_t		num_leaf_free;
 	wait_queue_head_t       q_leaf_free;
+	struct mlx5_async_work  cb_work;
 };
 
 struct mlx5_ib_mw {
@@ -944,6 +945,7 @@ struct mlx5_ib_dev {
 	struct mlx5_memic	memic;
 	u16			devx_whitelist_uid;
 	struct mlx5_srq_table   srq_table;
+	struct mlx5_async_ctx   async_ctx;
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index fd6ea1f75085..bf2b6ea23851 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -123,9 +123,10 @@ static void update_odp_mr(struct mlx5_ib_mr *mr)
 }
 #endif
 
-static void reg_mr_callback(int status, void *context)
+static void reg_mr_callback(int status, struct mlx5_async_work *context)
 {
-	struct mlx5_ib_mr *mr = context;
+	struct mlx5_ib_mr *mr =
+		container_of(context, struct mlx5_ib_mr, cb_work);
 	struct mlx5_ib_dev *dev = mr->dev;
 	struct mlx5_mr_cache *cache = &dev->cache;
 	int c = order2idx(dev, mr->order);
@@ -216,9 +217,9 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
 		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
-					       in, inlen,
+					       &dev->async_ctx, in, inlen,
 					       mr->out, sizeof(mr->out),
-					       reg_mr_callback, mr);
+					       reg_mr_callback, &mr->cb_work);
 		if (err) {
 			spin_lock_irq(&ent->lock);
 			ent->pending--;
@@ -679,6 +680,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 		return -ENOMEM;
 	}
 
+	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
 	timer_setup(&dev->delay_timer, delay_time_func, 0);
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
 		ent = &cache->ent[i];
@@ -725,33 +727,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static void wait_for_async_commands(struct mlx5_ib_dev *dev)
-{
-	struct mlx5_mr_cache *cache = &dev->cache;
-	struct mlx5_cache_ent *ent;
-	int total = 0;
-	int i;
-	int j;
-
-	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-		ent = &cache->ent[i];
-		for (j = 0 ; j < 1000; j++) {
-			if (!ent->pending)
-				break;
-			msleep(50);
-		}
-	}
-	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-		ent = &cache->ent[i];
-		total += ent->pending;
-	}
-
-	if (total)
-		mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
-	else
-		mlx5_ib_warn(dev, "done with all pending requests\n");
-}
-
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 {
 	int i;
@@ -763,12 +738,12 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
 	flush_workqueue(dev->cache.wq);
 
 	mlx5_mr_cache_debugfs_cleanup(dev);
+	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
 
 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
 		clean_keys(dev, i);
 
 	destroy_workqueue(dev->cache.wq);
-	wait_for_async_commands(dev);
 	del_timer_sync(&dev->delay_timer);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3e0fa8a8077b..a25a8c6f938e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1711,12 +1711,57 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-		     void *out, int out_size, mlx5_cmd_cbk_t callback,
-		     void *context)
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+			     struct mlx5_async_ctx *ctx)
 {
-	return cmd_exec(dev, in, in_size, out, out_size, callback, context,
-			false);
+	ctx->dev = dev;
+	/* Starts at 1 to avoid doing wake_up if we are not cleaning up */
+	atomic_set(&ctx->num_inflight, 1);
+	init_waitqueue_head(&ctx->wait);
+}
+EXPORT_SYMBOL(mlx5_cmd_init_async_ctx);
+
+/**
+ * mlx5_cmd_cleanup_async_ctx - Clean up an async_ctx
+ * @ctx: The ctx to clean
+ *
+ * Upon return all callbacks given to mlx5_cmd_exec_cb() have been called. The
+ * caller must ensure that mlx5_cmd_exec_cb() is not called during or after
+ * the call mlx5_cleanup_async_ctx().
+ */
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx)
+{
+	atomic_dec(&ctx->num_inflight);
+	wait_event(ctx->wait, atomic_read(&ctx->num_inflight) == 0);
+}
+EXPORT_SYMBOL(mlx5_cmd_cleanup_async_ctx);
+
+static void mlx5_cmd_exec_cb_handler(int status, void *_work)
+{
+	struct mlx5_async_work *work = _work;
+	struct mlx5_async_ctx *ctx = work->ctx;
+
+	work->user_callback(status, work);
+	if (atomic_dec_and_test(&ctx->num_inflight))
+		wake_up(&ctx->wait);
+}
+
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+		     void *out, int out_size, mlx5_async_cbk_t callback,
+		     struct mlx5_async_work *work)
+{
+	int ret;
+
+	work->ctx = ctx;
+	work->user_callback = callback;
+	if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
+		return -EIO;
+	ret = cmd_exec(ctx->dev, in, in_size, out, out_size,
+		       mlx5_cmd_exec_cb_handler, work, false);
+	if (ret && atomic_dec_and_test(&ctx->num_inflight))
+		wake_up(&ctx->wait);
+
+	return ret;
 }
 EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 0670165afd5f..ea744d8466ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -51,9 +51,10 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev)
 
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 			     struct mlx5_core_mkey *mkey,
-			     u32 *in, int inlen,
-			     u32 *out, int outlen,
-			     mlx5_cmd_cbk_t callback, void *context)
+			     struct mlx5_async_ctx *async_ctx, u32 *in,
+			     int inlen, u32 *out, int outlen,
+			     mlx5_async_cbk_t callback,
+			     struct mlx5_async_work *context)
 {
 	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
@@ -71,7 +72,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 	MLX5_SET(mkc, mkc, mkey_7_0, key);
 
 	if (callback)
-		return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+		return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen,
 					callback, context);
 
 	err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
@@ -105,7 +106,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 			  struct mlx5_core_mkey *mkey,
 			  u32 *in, int inlen)
 {
-	return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+	return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen,
 					NULL, 0, NULL, NULL);
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4e444863054a..039c9398614c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -850,11 +850,30 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 
+struct mlx5_async_ctx {
+	struct mlx5_core_dev *dev;
+	atomic_t num_inflight;
+	struct wait_queue_head wait;
+};
+
+struct mlx5_async_work;
+
+typedef void (*mlx5_async_cbk_t)(int status, struct mlx5_async_work *context);
+
+struct mlx5_async_work {
+	struct mlx5_async_ctx *ctx;
+	mlx5_async_cbk_t user_callback;
+};
+
+void mlx5_cmd_init_async_ctx(struct mlx5_core_dev *dev,
+			     struct mlx5_async_ctx *ctx);
+void mlx5_cmd_cleanup_async_ctx(struct mlx5_async_ctx *ctx);
+int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
+		     void *out, int out_size, mlx5_async_cbk_t callback,
+		     struct mlx5_async_work *work);
+
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
-int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-		     void *out, int out_size, mlx5_cmd_cbk_t callback,
-		     void *context);
 int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
 			  void *out, int out_size);
 void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
@@ -885,9 +904,10 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
 			     struct mlx5_core_mkey *mkey,
-			     u32 *in, int inlen,
-			     u32 *out, int outlen,
-			     mlx5_cmd_cbk_t callback, void *context);
+			     struct mlx5_async_ctx *async_ctx, u32 *in,
+			     int inlen, u32 *out, int outlen,
+			     mlx5_async_cbk_t callback,
+			     struct mlx5_async_work *context);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 			  struct mlx5_core_mkey *mkey,
 			  u32 *in, int inlen);
-- 
cgit v1.2.3


From de89750c56f4bf2f04492c6ce298911381a7597a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Jan 2019 12:47:34 +0100
Subject: ALSA: pcm: Drop unused snd_pcm_substream.file field

It's assigned but nowhere used.  Let's remove it.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h      | 1 -
 sound/core/oss/pcm_oss.c | 1 -
 sound/core/pcm_native.c  | 4 +---
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 2c30c1ad1b0d..a20d3a48df00 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -470,7 +470,6 @@ struct snd_pcm_substream {
 	struct snd_pcm_group self_group;	/* fake group for non linked substream (with substream lock inside) */
 	struct snd_pcm_group *group;		/* pointer to current group */
 	/* -- assigned files -- */
-	void *file;
 	int ref_count;
 	atomic_t mmap_count;
 	unsigned int f_flags;
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index 467039b342b5..d5b0d7ba83c4 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2427,7 +2427,6 @@ static int snd_pcm_oss_open_file(struct file *file,
 		}
 
 		pcm_oss_file->streams[idx] = substream;
-		substream->file = pcm_oss_file;
 		snd_pcm_oss_init_substream(substream, &setup[idx], minor);
 	}
 	
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 26afb6b0889a..63640d3af9db 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2452,10 +2452,8 @@ static int snd_pcm_open_file(struct file *file,
 		return -ENOMEM;
 	}
 	pcm_file->substream = substream;
-	if (substream->ref_count == 1) {
-		substream->file = pcm_file;
+	if (substream->ref_count == 1)
 		substream->pcm_release = pcm_release_private;
-	}
 	file->private_data = pcm_file;
 
 	return 0;
-- 
cgit v1.2.3


From 480e32ebd524ffdf3d50cc5cac179fb9e44a552d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 23 Jan 2019 16:44:38 +0100
Subject: ALSA: pcm: Simplify proc file destruction

The proc files are recursively freed by calling with the root
snd_info_entry object, so we don't have to keep each object for
releasing one by one.  Move the release of the PCM stream proc root at
the beginning, so that we can remove the redundant code and resource.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     | 11 ---------
 sound/core/pcm.c        | 66 +++++++++----------------------------------------
 sound/core/pcm_memory.c | 16 ++----------
 3 files changed, 13 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index a20d3a48df00..eae6d2b82d7a 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -481,15 +481,6 @@ struct snd_pcm_substream {
 #endif
 #ifdef CONFIG_SND_VERBOSE_PROCFS
 	struct snd_info_entry *proc_root;
-	struct snd_info_entry *proc_info_entry;
-	struct snd_info_entry *proc_hw_params_entry;
-	struct snd_info_entry *proc_sw_params_entry;
-	struct snd_info_entry *proc_status_entry;
-	struct snd_info_entry *proc_prealloc_entry;
-	struct snd_info_entry *proc_prealloc_max_entry;
-#ifdef CONFIG_SND_PCM_XRUN_DEBUG
-	struct snd_info_entry *proc_xrun_injection_entry;
-#endif
 #endif /* CONFIG_SND_VERBOSE_PROCFS */
 	/* misc flags */
 	unsigned int hw_opened: 1;
@@ -511,10 +502,8 @@ struct snd_pcm_str {
 #endif
 #ifdef CONFIG_SND_VERBOSE_PROCFS
 	struct snd_info_entry *proc_root;
-	struct snd_info_entry *proc_info_entry;
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
 	unsigned int xrun_debug;	/* 0 = disabled, 1 = verbose, 2 = stacktrace */
-	struct snd_info_entry *proc_xrun_debug_entry;
 #endif
 #endif
 	struct snd_kcontrol *chmap_kctl; /* channel-mapping controls */
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index ca1ea3cf9350..bca0bdf3e33c 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -536,12 +536,9 @@ static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr)
 	entry = snd_info_create_card_entry(pcm->card, "info", pstr->proc_root);
 	if (entry) {
 		snd_info_set_text_ops(entry, pstr, snd_pcm_stream_proc_info_read);
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	pstr->proc_info_entry = entry;
 
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
 	entry = snd_info_create_card_entry(pcm->card, "xrun_debug",
@@ -551,24 +548,15 @@ static int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr)
 		entry->c.text.write = snd_pcm_xrun_debug_write;
 		entry->mode |= 0200;
 		entry->private_data = pstr;
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	pstr->proc_xrun_debug_entry = entry;
 #endif
 	return 0;
 }
 
 static int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr)
 {
-#ifdef CONFIG_SND_PCM_XRUN_DEBUG
-	snd_info_free_entry(pstr->proc_xrun_debug_entry);
-	pstr->proc_xrun_debug_entry = NULL;
-#endif
-	snd_info_free_entry(pstr->proc_info_entry);
-	pstr->proc_info_entry = NULL;
 	snd_info_free_entry(pstr->proc_root);
 	pstr->proc_root = NULL;
 	return 0;
@@ -597,45 +585,33 @@ static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream)
 	if (entry) {
 		snd_info_set_text_ops(entry, substream,
 				      snd_pcm_substream_proc_info_read);
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_info_entry = entry;
 	entry = snd_info_create_card_entry(card, "hw_params",
 					   substream->proc_root);
 	if (entry) {
 		snd_info_set_text_ops(entry, substream,
 				      snd_pcm_substream_proc_hw_params_read);
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_hw_params_entry = entry;
 	entry = snd_info_create_card_entry(card, "sw_params",
 					   substream->proc_root);
 	if (entry) {
 		snd_info_set_text_ops(entry, substream,
 				      snd_pcm_substream_proc_sw_params_read);
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_sw_params_entry = entry;
 	entry = snd_info_create_card_entry(card, "status",
 					   substream->proc_root);
 	if (entry) {
 		snd_info_set_text_ops(entry, substream,
 				      snd_pcm_substream_proc_status_read);
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_status_entry = entry;
 
 #ifdef CONFIG_SND_PCM_XRUN_DEBUG
 	entry = snd_info_create_card_entry(card, "xrun_injection",
@@ -645,40 +621,18 @@ static int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream)
 		entry->c.text.read = NULL;
 		entry->c.text.write = snd_pcm_xrun_injection_write;
 		entry->mode = S_IFREG | 0200;
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_xrun_injection_entry = entry;
 #endif /* CONFIG_SND_PCM_XRUN_DEBUG */
 
 	return 0;
 }
 
-static int snd_pcm_substream_proc_done(struct snd_pcm_substream *substream)
-{
-	snd_info_free_entry(substream->proc_info_entry);
-	substream->proc_info_entry = NULL;
-	snd_info_free_entry(substream->proc_hw_params_entry);
-	substream->proc_hw_params_entry = NULL;
-	snd_info_free_entry(substream->proc_sw_params_entry);
-	substream->proc_sw_params_entry = NULL;
-	snd_info_free_entry(substream->proc_status_entry);
-	substream->proc_status_entry = NULL;
-#ifdef CONFIG_SND_PCM_XRUN_DEBUG
-	snd_info_free_entry(substream->proc_xrun_injection_entry);
-	substream->proc_xrun_injection_entry = NULL;
-#endif
-	snd_info_free_entry(substream->proc_root);
-	substream->proc_root = NULL;
-	return 0;
-}
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
 static inline int snd_pcm_stream_proc_init(struct snd_pcm_str *pstr) { return 0; }
 static inline int snd_pcm_stream_proc_done(struct snd_pcm_str *pstr) { return 0; }
 static inline int snd_pcm_substream_proc_init(struct snd_pcm_substream *substream) { return 0; }
-static inline int snd_pcm_substream_proc_done(struct snd_pcm_substream *substream) { return 0; }
 #endif /* CONFIG_SND_VERBOSE_PROCFS */
 
 static const struct attribute_group *pcm_dev_attr_groups[];
@@ -911,15 +865,17 @@ static void snd_pcm_free_stream(struct snd_pcm_str * pstr)
 #if IS_ENABLED(CONFIG_SND_PCM_OSS)
 	struct snd_pcm_oss_setup *setup, *setupn;
 #endif
+
+	/* free all proc files under the stream */
+	snd_pcm_stream_proc_done(pstr);
+
 	substream = pstr->substream;
 	while (substream) {
 		substream_next = substream->next;
 		snd_pcm_timer_done(substream);
-		snd_pcm_substream_proc_done(substream);
 		kfree(substream);
 		substream = substream_next;
 	}
-	snd_pcm_stream_proc_done(pstr);
 #if IS_ENABLED(CONFIG_SND_PCM_OSS)
 	for (setup = pstr->oss.setup_list; setup; setup = setupn) {
 		setupn = setup->next;
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index 4b5356a10315..9a98bc61461f 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -93,12 +93,6 @@ static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream
 int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
 {
 	snd_pcm_lib_preallocate_dma_free(substream);
-#ifdef CONFIG_SND_VERBOSE_PROCFS
-	snd_info_free_entry(substream->proc_prealloc_max_entry);
-	substream->proc_prealloc_max_entry = NULL;
-	snd_info_free_entry(substream->proc_prealloc_entry);
-	substream->proc_prealloc_entry = NULL;
-#endif
 	return 0;
 }
 
@@ -203,21 +197,15 @@ static inline void preallocate_info_init(struct snd_pcm_substream *substream)
 		entry->c.text.write = snd_pcm_lib_preallocate_proc_write;
 		entry->mode |= 0200;
 		entry->private_data = substream;
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_prealloc_entry = entry;
 	if ((entry = snd_info_create_card_entry(substream->pcm->card, "prealloc_max", substream->proc_root)) != NULL) {
 		entry->c.text.read = snd_pcm_lib_preallocate_max_proc_read;
 		entry->private_data = substream;
-		if (snd_info_register(entry) < 0) {
+		if (snd_info_register(entry) < 0)
 			snd_info_free_entry(entry);
-			entry = NULL;
-		}
 	}
-	substream->proc_prealloc_max_entry = entry;
 }
 
 #else /* !CONFIG_SND_VERBOSE_PROCFS */
-- 
cgit v1.2.3


From ef74f70e5a10cc2a78cc5529e564170cabcda9af Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Sat, 19 Jan 2019 15:42:42 -0500
Subject: gpio: add irq domain activate/deactivate functions

This adds the two new functions gpiochip_irq_domain_activate and
gpiochip_irq_domain_deactivate that can be used as the activate and
deactivate functions in the struct irq_domain_ops. This is for
situations where only gpiochip_{lock,unlock}_as_irq needs to be called.
SPMI and SSBI GPIO are two users that will initially use these
functions.

Signed-off-by: Brian Masney <masneyb@onstation.org>
Suggested-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/gpio/driver.h |  5 +++++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 1651d7f0a303..361a09c8138a 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1775,6 +1775,43 @@ static const struct irq_domain_ops gpiochip_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
+/**
+ * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ
+ * @domain: The IRQ domain used by this IRQ chip
+ * @data: Outermost irq_data associated with the IRQ
+ * @reserve: If set, only reserve an interrupt vector instead of assigning one
+ *
+ * This function is a wrapper that calls gpiochip_lock_as_irq() and is to be
+ * used as the activate function for the &struct irq_domain_ops. The host_data
+ * for the IRQ domain must be the &struct gpio_chip.
+ */
+int gpiochip_irq_domain_activate(struct irq_domain *domain,
+				 struct irq_data *data, bool reserve)
+{
+	struct gpio_chip *chip = domain->host_data;
+
+	return gpiochip_lock_as_irq(chip, data->hwirq);
+}
+EXPORT_SYMBOL_GPL(gpiochip_irq_domain_activate);
+
+/**
+ * gpiochip_irq_domain_deactivate() - Unlock a GPIO used as an IRQ
+ * @domain: The IRQ domain used by this IRQ chip
+ * @data: Outermost irq_data associated with the IRQ
+ *
+ * This function is a wrapper that will call gpiochip_unlock_as_irq() and is to
+ * be used as the deactivate function for the &struct irq_domain_ops. The
+ * host_data for the IRQ domain must be the &struct gpio_chip.
+ */
+void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
+				    struct irq_data *data)
+{
+	struct gpio_chip *chip = domain->host_data;
+
+	return gpiochip_unlock_as_irq(chip, data->hwirq);
+}
+EXPORT_SYMBOL_GPL(gpiochip_irq_domain_deactivate);
+
 static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 {
 	if (!gpiochip_irqchip_irq_valid(chip, offset))
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 07cddbf45186..01497910f023 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -472,6 +472,11 @@ int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 		     irq_hw_number_t hwirq);
 void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq);
 
+int gpiochip_irq_domain_activate(struct irq_domain *domain,
+				 struct irq_data *data, bool reserve);
+void gpiochip_irq_domain_deactivate(struct irq_domain *domain,
+				    struct irq_data *data);
+
 void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
 		struct irq_chip *irqchip,
 		unsigned int parent_irq,
-- 
cgit v1.2.3


From da6a496a34f2fdcab14362cdc5068aac385e7b47 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 09:16:08 +0200
Subject: IB/mlx5: Ranges in implicit ODP MR inherit its write access

A sub-range in ODP implicit MR should take its write permission from the
MR and not be set always to allow.

Fixes: d07d1d70ce1a ("IB/umem: Update on demand page (ODP) support")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c | 5 +++--
 drivers/infiniband/hw/mlx5/odp.c   | 2 +-
 include/rdma/ib_umem_odp.h         | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index dc1d7cb15cfa..eb8a5eb65bfa 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -332,9 +332,10 @@ static void put_per_mm(struct ib_umem_odp *umem_odp)
 	mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
 }
 
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 				      unsigned long addr, size_t size)
 {
+	struct ib_ucontext_per_mm *per_mm = root->per_mm;
 	struct ib_ucontext *ctx = per_mm->context;
 	struct ib_umem_odp *odp_data;
 	struct ib_umem *umem;
@@ -349,7 +350,7 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
 	umem->length     = size;
 	umem->address    = addr;
 	umem->page_shift = PAGE_SHIFT;
-	umem->writable   = 1;
+	umem->writable   = root->umem.writable;
 	umem->is_odp = 1;
 	odp_data->per_mm = per_mm;
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 3abdfc3584c0..3e0d5885c026 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -439,7 +439,7 @@ next_mr:
 		if (nentries)
 			nentries++;
 	} else {
-		odp = ib_alloc_odp_umem(odp_mr->per_mm, addr,
+		odp = ib_alloc_odp_umem(odp_mr, addr,
 					MLX5_IMR_MTT_SIZE);
 		if (IS_ERR(odp)) {
 			mutex_unlock(&odp_mr->umem_mutex);
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index d3725cf13ecd..d0024f53626e 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -116,7 +116,7 @@ struct ib_ucontext_per_mm {
 };
 
 int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_ucontext_per_mm *per_mm,
+struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem,
 				      unsigned long addr, size_t size);
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
-- 
cgit v1.2.3


From 61b2fe3c62e5269408e264b2348f96467246d537 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 09:16:09 +0200
Subject: IB/mlx5: Remove dead code

When CONFIG_INFINIBAND_ON_DEMAND_PAGING is not set there is no caller to
ib_alloc_odp_umem() so let's remove it.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_umem_odp.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index d0024f53626e..dadc96dea39c 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -169,12 +169,6 @@ static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 	return -EINVAL;
 }
 
-static inline struct ib_umem_odp *
-ib_alloc_odp_umem(struct ib_ucontext *context, unsigned long addr, size_t size)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
-- 
cgit v1.2.3


From 1d0dc06930a917eaca4156193c6c49f798b95ce7 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Thu, 24 Jan 2019 19:59:37 +0100
Subject: net: xsk: track AF_XDP sockets on a per-netns list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Track each AF_XDP socket in a per-netns list. This will be used later
by the sock_diag interface for querying sockets from userspace.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/net_namespace.h |  4 ++++
 include/net/netns/xdp.h     | 13 +++++++++++++
 net/xdp/xsk.c               | 30 ++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 include/net/netns/xdp.h

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 99d4148e0f90..a68ced28d8f4 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -31,6 +31,7 @@
 #include <net/netns/xfrm.h>
 #include <net/netns/mpls.h>
 #include <net/netns/can.h>
+#include <net/netns/xdp.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -160,6 +161,9 @@ struct net {
 #endif
 #if IS_ENABLED(CONFIG_CAN)
 	struct netns_can	can;
+#endif
+#ifdef CONFIG_XDP_SOCKETS
+	struct netns_xdp	xdp;
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h
new file mode 100644
index 000000000000..e5734261ba0a
--- /dev/null
+++ b/include/net/netns/xdp.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETNS_XDP_H__
+#define __NETNS_XDP_H__
+
+#include <linux/rculist.h>
+#include <linux/mutex.h>
+
+struct netns_xdp {
+	struct mutex		lock;
+	struct hlist_head	list;
+};
+
+#endif /* __NETNS_XDP_H__ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index a03268454a27..80ca48cefc42 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -350,6 +350,10 @@ static int xsk_release(struct socket *sock)
 
 	net = sock_net(sk);
 
+	mutex_lock(&net->xdp.lock);
+	sk_del_node_init_rcu(sk);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	local_bh_enable();
@@ -746,6 +750,10 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	mutex_init(&xs->mutex);
 	spin_lock_init(&xs->tx_completion_lock);
 
+	mutex_lock(&net->xdp.lock);
+	sk_add_node_rcu(sk, &net->xdp.list);
+	mutex_unlock(&net->xdp.lock);
+
 	local_bh_disable();
 	sock_prot_inuse_add(net, &xsk_proto, 1);
 	local_bh_enable();
@@ -759,6 +767,23 @@ static const struct net_proto_family xsk_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
+static int __net_init xsk_net_init(struct net *net)
+{
+	mutex_init(&net->xdp.lock);
+	INIT_HLIST_HEAD(&net->xdp.list);
+	return 0;
+}
+
+static void __net_exit xsk_net_exit(struct net *net)
+{
+	WARN_ON_ONCE(!hlist_empty(&net->xdp.list));
+}
+
+static struct pernet_operations xsk_net_ops = {
+	.init = xsk_net_init,
+	.exit = xsk_net_exit,
+};
+
 static int __init xsk_init(void)
 {
 	int err;
@@ -771,8 +796,13 @@ static int __init xsk_init(void)
 	if (err)
 		goto out_proto;
 
+	err = register_pernet_subsys(&xsk_net_ops);
+	if (err)
+		goto out_sk;
 	return 0;
 
+out_sk:
+	sock_unregister(PF_XDP);
 out_proto:
 	proto_unregister(&xsk_proto);
 out:
-- 
cgit v1.2.3


From 50e74c0131a5b3a3e387798a5705158c04fb3bd0 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Thu, 24 Jan 2019 19:59:38 +0100
Subject: xsk: add id to umem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit adds an id to the umem structure. The id uniquely
identifies a umem instance, and will be exposed to user-space via the
socket monitoring interface.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h |  1 +
 net/xdp/xdp_umem.c     | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 13acb9803a6d..61cf7dbb6782 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -42,6 +42,7 @@ struct xdp_umem {
 	struct work_struct work;
 	struct page **pgs;
 	u32 npgs;
+	int id;
 	struct net_device *dev;
 	struct xdp_umem_fq_reuse *fq_reuse;
 	u16 queue_id;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index a264cf2accd0..eabdb0f59031 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -13,12 +13,15 @@
 #include <linux/mm.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
+#include <linux/idr.h>
 
 #include "xdp_umem.h"
 #include "xsk_queue.h"
 
 #define XDP_UMEM_MIN_CHUNK_SIZE 2048
 
+static DEFINE_IDA(umem_ida);
+
 void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 {
 	unsigned long flags;
@@ -183,6 +186,8 @@ static void xdp_umem_release(struct xdp_umem *umem)
 
 	xdp_umem_clear_dev(umem);
 
+	ida_simple_remove(&umem_ida, umem->id);
+
 	if (umem->fq) {
 		xskq_destroy(umem->fq);
 		umem->fq = NULL;
@@ -389,8 +394,16 @@ struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
 
+	err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL);
+	if (err < 0) {
+		kfree(umem);
+		return ERR_PTR(err);
+	}
+	umem->id = err;
+
 	err = xdp_umem_reg(umem, mr);
 	if (err) {
+		ida_simple_remove(&umem_ida, umem->id);
 		kfree(umem);
 		return ERR_PTR(err);
 	}
-- 
cgit v1.2.3


From a36b38aa2af61146ea80980a01cf6e952ab021c1 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Thu, 24 Jan 2019 19:59:39 +0100
Subject: xsk: add sock_diag interface for AF_XDP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds the sock_diag interface for querying sockets from user
space. Tools like iproute2 ss(8) can use this interface to list open
AF_XDP sockets.

The user-space ABI is defined in linux/xdp_diag.h and includes netlink
request and response structs. The request can query sockets and the
response contains socket information about the rings, umems, inode and
more.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/xdp_diag.h |  72 ++++++++++++++++
 net/xdp/Kconfig               |   8 ++
 net/xdp/Makefile              |   1 +
 net/xdp/xsk.c                 |   6 +-
 net/xdp/xsk.h                 |  12 +++
 net/xdp/xsk_diag.c            | 191 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 285 insertions(+), 5 deletions(-)
 create mode 100644 include/uapi/linux/xdp_diag.h
 create mode 100644 net/xdp/xsk.h
 create mode 100644 net/xdp/xsk_diag.c

(limited to 'include')

diff --git a/include/uapi/linux/xdp_diag.h b/include/uapi/linux/xdp_diag.h
new file mode 100644
index 000000000000..78b2591a7782
--- /dev/null
+++ b/include/uapi/linux/xdp_diag.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * xdp_diag: interface for query/monitor XDP sockets
+ * Copyright(c) 2019 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_DIAG_H
+#define _LINUX_XDP_DIAG_H
+
+#include <linux/types.h>
+
+struct xdp_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	xdiag_ino;
+	__u32	xdiag_show;
+	__u32	xdiag_cookie[2];
+};
+
+struct xdp_diag_msg {
+	__u8	xdiag_family;
+	__u8	xdiag_type;
+	__u16	pad;
+	__u32	xdiag_ino;
+	__u32	xdiag_cookie[2];
+};
+
+#define XDP_SHOW_INFO		(1 << 0) /* Basic information */
+#define XDP_SHOW_RING_CFG	(1 << 1)
+#define XDP_SHOW_UMEM		(1 << 2)
+#define XDP_SHOW_MEMINFO	(1 << 3)
+
+enum {
+	XDP_DIAG_NONE,
+	XDP_DIAG_INFO,
+	XDP_DIAG_UID,
+	XDP_DIAG_RX_RING,
+	XDP_DIAG_TX_RING,
+	XDP_DIAG_UMEM,
+	XDP_DIAG_UMEM_FILL_RING,
+	XDP_DIAG_UMEM_COMPLETION_RING,
+	XDP_DIAG_MEMINFO,
+	__XDP_DIAG_MAX,
+};
+
+#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1)
+
+struct xdp_diag_info {
+	__u32	ifindex;
+	__u32	queue_id;
+};
+
+struct xdp_diag_ring {
+	__u32	entries; /*num descs */
+};
+
+#define XDP_DU_F_ZEROCOPY (1 << 0)
+
+struct xdp_diag_umem {
+	__u64	size;
+	__u32	id;
+	__u32	num_pages;
+	__u32	chunk_size;
+	__u32	headroom;
+	__u32	ifindex;
+	__u32	queue_id;
+	__u32	flags;
+	__u32	refs;
+};
+
+#endif /* _LINUX_XDP_DIAG_H */
diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
index 90e4a7152854..0255b33cff4b 100644
--- a/net/xdp/Kconfig
+++ b/net/xdp/Kconfig
@@ -5,3 +5,11 @@ config XDP_SOCKETS
 	help
 	  XDP sockets allows a channel between XDP programs and
 	  userspace applications.
+
+config XDP_SOCKETS_DIAG
+	tristate "XDP sockets: monitoring interface"
+	depends on XDP_SOCKETS
+	default n
+	help
+	  Support for PF_XDP sockets monitoring interface used by the ss tool.
+	  If unsure, say Y.
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 04f073146256..59dbfdf93dca 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 80ca48cefc42..949d3bbccb2f 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -27,14 +27,10 @@
 
 #include "xsk_queue.h"
 #include "xdp_umem.h"
+#include "xsk.h"
 
 #define TX_BATCH_SIZE 16
 
-static struct xdp_sock *xdp_sk(struct sock *sk)
-{
-	return (struct xdp_sock *)sk;
-}
-
 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 {
 	return READ_ONCE(xs->rx) &&  READ_ONCE(xs->umem) &&
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
new file mode 100644
index 000000000000..ba8120610426
--- /dev/null
+++ b/net/xdp/xsk.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2019 Intel Corporation. */
+
+#ifndef XSK_H_
+#define XSK_H_
+
+static inline struct xdp_sock *xdp_sk(struct sock *sk)
+{
+	return (struct xdp_sock *)sk;
+}
+
+#endif /* XSK_H_ */
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
new file mode 100644
index 000000000000..661d007c3b28
--- /dev/null
+++ b/net/xdp/xsk_diag.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XDP sockets monitoring support
+ *
+ * Copyright(c) 2019 Intel Corporation.
+ *
+ * Author: Björn Töpel <bjorn.topel@intel.com>
+ */
+
+#include <linux/module.h>
+#include <net/xdp_sock.h>
+#include <linux/xdp_diag.h>
+#include <linux/sock_diag.h>
+
+#include "xsk_queue.h"
+#include "xsk.h"
+
+static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_diag_info di = {};
+
+	di.ifindex = xs->dev ? xs->dev->ifindex : 0;
+	di.queue_id = xs->queue_id;
+	return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di);
+}
+
+static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type,
+			     struct sk_buff *nlskb)
+{
+	struct xdp_diag_ring dr = {};
+
+	dr.entries = queue->nentries;
+	return nla_put(nlskb, nl_type, sizeof(dr), &dr);
+}
+
+static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs,
+				  struct sk_buff *nlskb)
+{
+	int err = 0;
+
+	if (xs->rx)
+		err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb);
+	if (!err && xs->tx)
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb);
+	return err;
+}
+
+static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
+{
+	struct xdp_umem *umem = xs->umem;
+	struct xdp_diag_umem du = {};
+	int err;
+
+	if (!umem)
+		return 0;
+
+	du.id = umem->id;
+	du.size = umem->size;
+	du.num_pages = umem->npgs;
+	du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+	du.headroom = umem->headroom;
+	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
+	du.queue_id = umem->queue_id;
+	du.flags = 0;
+	if (umem->zc)
+		du.flags |= XDP_DU_F_ZEROCOPY;
+	du.refs = refcount_read(&umem->users);
+
+	err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du);
+
+	if (!err && umem->fq)
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb);
+	if (!err && umem->cq) {
+		err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING,
+					nlskb);
+	}
+	return err;
+}
+
+static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
+			 struct xdp_diag_req *req,
+			 struct user_namespace *user_ns,
+			 u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+	struct xdp_sock *xs = xdp_sk(sk);
+	struct xdp_diag_msg *msg;
+	struct nlmsghdr *nlh;
+
+	nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	msg = nlmsg_data(nlh);
+	memset(msg, 0, sizeof(*msg));
+	msg->xdiag_family = AF_XDP;
+	msg->xdiag_type = sk->sk_type;
+	msg->xdiag_ino = sk_ino;
+	sock_diag_save_cookie(sk, msg->xdiag_cookie);
+
+	if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_INFO) &&
+	    nla_put_u32(nlskb, XDP_DIAG_UID,
+			from_kuid_munged(user_ns, sock_i_uid(sk))))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_RING_CFG) &&
+	    xsk_diag_put_rings_cfg(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_UMEM) &&
+	    xsk_diag_put_umem(xs, nlskb))
+		goto out_nlmsg_trim;
+
+	if ((req->xdiag_show & XDP_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	nlmsg_end(nlskb, nlh);
+	return 0;
+
+out_nlmsg_trim:
+	nlmsg_cancel(nlskb, nlh);
+	return -EMSGSIZE;
+}
+
+static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb)
+{
+	struct xdp_diag_req *req = nlmsg_data(cb->nlh);
+	struct net *net = sock_net(nlskb->sk);
+	int num = 0, s_num = cb->args[0];
+	struct sock *sk;
+
+	mutex_lock(&net->xdp.lock);
+
+	sk_for_each(sk, &net->xdp.list) {
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num++ < s_num)
+			continue;
+
+		if (xsk_diag_fill(sk, nlskb, req,
+				  sk_user_ns(NETLINK_CB(cb->skb).sk),
+				  NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				  sock_i_ino(sk)) < 0) {
+			num--;
+			break;
+		}
+	}
+
+	mutex_unlock(&net->xdp.lock);
+	cb->args[0] = num;
+	return nlskb->len;
+}
+
+static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr)
+{
+	struct netlink_dump_control c = { .dump = xsk_diag_dump };
+	int hdrlen = sizeof(struct xdp_diag_req);
+	struct net *net = sock_net(nlskb->sk);
+
+	if (nlmsg_len(hdr) < hdrlen)
+		return -EINVAL;
+
+	if (!(hdr->nlmsg_flags & NLM_F_DUMP))
+		return -EOPNOTSUPP;
+
+	return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c);
+}
+
+static const struct sock_diag_handler xsk_diag_handler = {
+	.family = AF_XDP,
+	.dump = xsk_diag_handler_dump,
+};
+
+static int __init xsk_diag_init(void)
+{
+	return sock_diag_register(&xsk_diag_handler);
+}
+
+static void __exit xsk_diag_exit(void)
+{
+	sock_diag_unregister(&xsk_diag_handler);
+}
+
+module_init(xsk_diag_init);
+module_exit(xsk_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP);
-- 
cgit v1.2.3


From 434a4315b9617bf1742bc64712bf44a208502f7f Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Wed, 23 Jan 2019 07:31:58 +0100
Subject: net: phy: change phy_start_interrupts to phy_request_interrupt

Now that we enable the interrupts in phy_start() we don't have to do it
before. Therefore remove enabling interrupts from phy_start_interrupts()
and rename this function to reflect the changed functionality.

v2:
- improve warning to clearly state that we fall back to polling

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 23 +++++++++++------------
 drivers/net/phy/phy_device.c |  4 ++--
 drivers/net/phy/phylink.c    |  4 ++--
 include/linux/phy.h          |  2 +-
 4 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 079b6a617fc8..d12aa512b7f5 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -785,28 +785,27 @@ static int phy_enable_interrupts(struct phy_device *phydev)
 }
 
 /**
- * phy_start_interrupts - request and enable interrupts for a PHY device
+ * phy_request_interrupt - request interrupt for a PHY device
  * @phydev: target phy_device struct
  *
  * Description: Request the interrupt for the given PHY.
  *   If this fails, then we set irq to PHY_POLL.
- *   Otherwise, we enable the interrupts in the PHY.
  *   This should only be called with a valid IRQ number.
- *   Returns 0 on success or < 0 on error.
  */
-int phy_start_interrupts(struct phy_device *phydev)
+void phy_request_interrupt(struct phy_device *phydev)
 {
-	if (request_threaded_irq(phydev->irq, NULL, phy_interrupt,
-				 IRQF_ONESHOT | IRQF_SHARED,
-				 phydev_name(phydev), phydev) < 0) {
-		phydev_warn(phydev, "Can't get IRQ %d\n", phydev->irq);
+	int err;
+
+	err = request_threaded_irq(phydev->irq, NULL, phy_interrupt,
+				   IRQF_ONESHOT | IRQF_SHARED,
+				   phydev_name(phydev), phydev);
+	if (err) {
+		phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n",
+			    err, phydev->irq);
 		phydev->irq = PHY_POLL;
-		return 0;
 	}
-
-	return phy_enable_interrupts(phydev);
 }
-EXPORT_SYMBOL(phy_start_interrupts);
+EXPORT_SYMBOL(phy_request_interrupt);
 
 /**
  * phy_stop - Bring down the PHY link, and stop checking the status
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 64c25a0684ac..891e0178b97f 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -951,8 +951,8 @@ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
 		return rc;
 
 	phy_prepare_link(phydev, handler);
-	if (phydev->irq > 0)
-		phy_start_interrupts(phydev);
+	if (phy_interrupt_is_valid(phydev))
+		phy_request_interrupt(phydev);
 
 	return 0;
 }
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index c1b6e05ba60c..2e21ce42e388 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -676,8 +676,8 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
 		   __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising);
 
-	if (phy->irq > 0)
-		phy_start_interrupts(phy);
+	if (phy_interrupt_is_valid(phy))
+		phy_request_interrupt(phy);
 
 	return 0;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1f3873a2ff29..70f83d0d7469 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1047,7 +1047,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
 int phy_ethtool_ksettings_set(struct phy_device *phydev,
 			      const struct ethtool_link_ksettings *cmd);
 int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
-int phy_start_interrupts(struct phy_device *phydev);
+void phy_request_interrupt(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
-- 
cgit v1.2.3


From 949e7cea0cbccd7e2afc4b374e2be998b8fcef78 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Wed, 23 Jan 2019 19:14:50 +0200
Subject: bonding: count master 3ad stats separately

I made a dumb mistake when I summed up the slave stats, obviously slaves
can come and go which would make the master stats unreliable.
Count and export the master stats separately.

Fixes: a258aeacd7f0 ("bonding: add support for xstats and export 3ad stats")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_3ad.c     | 39 +++++++++++++-------------------------
 drivers/net/bonding/bond_netlink.c | 18 +++++++-----------
 include/net/bond_3ad.h             |  2 +-
 3 files changed, 21 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d30c21b34858..9274dcc6e9b0 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -853,6 +853,7 @@ static int ad_lacpdu_send(struct port *port)
 		return -ENOMEM;
 
 	atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_tx);
+	atomic64_inc(&BOND_AD_INFO(slave->bond).stats.lacpdu_tx);
 
 	skb->dev = slave->dev;
 	skb_reset_mac_header(skb);
@@ -898,9 +899,11 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
 	switch (marker->tlv_type) {
 	case AD_MARKER_INFORMATION_SUBTYPE:
 		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_tx);
+		atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_tx);
 		break;
 	case AD_MARKER_RESPONSE_SUBTYPE:
 		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.marker_resp_tx);
+		atomic64_inc(&BOND_AD_INFO(slave->bond).stats.marker_resp_tx);
 		break;
 	}
 
@@ -1098,9 +1101,10 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
 	 */
 	last_state = port->sm_rx_state;
 
-	if (lacpdu)
+	if (lacpdu) {
 		atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.lacpdu_rx);
-
+		atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.lacpdu_rx);
+	}
 	/* check if state machine should change state */
 
 	/* first, check if port was reinitialized */
@@ -1938,6 +1942,7 @@ static void ad_marker_info_received(struct bond_marker *marker_info,
 	struct bond_marker marker;
 
 	atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_rx);
+	atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_rx);
 
 	/* copy the received marker data to the response marker */
 	memcpy(&marker, marker_info, sizeof(struct bond_marker));
@@ -1964,6 +1969,7 @@ static void ad_marker_response_received(struct bond_marker *marker,
 					struct port *port)
 {
 	atomic64_inc(&SLAVE_AD_INFO(port->slave)->stats.marker_resp_rx);
+	atomic64_inc(&BOND_AD_INFO(port->slave->bond).stats.marker_resp_rx);
 
 	/* DO NOTHING, SINCE WE DECIDED NOT TO IMPLEMENT THIS FEATURE FOR NOW */
 }
@@ -2374,6 +2380,7 @@ re_arm:
  */
 static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave)
 {
+	struct bonding *bond = slave->bond;
 	int ret = RX_HANDLER_ANOTHER;
 	struct bond_marker *marker;
 	struct port *port;
@@ -2419,10 +2426,13 @@ static int bond_3ad_rx_indication(struct lacpdu *lacpdu, struct slave *slave)
 				   port->actor_port_number);
 			stat = &SLAVE_AD_INFO(slave)->stats.marker_unknown_rx;
 			atomic64_inc(stat);
+			stat = &BOND_AD_INFO(bond).stats.marker_unknown_rx;
+			atomic64_inc(stat);
 		}
 		break;
 	default:
 		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_unknown_rx);
+		atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_unknown_rx);
 	}
 
 	return ret;
@@ -2662,6 +2672,7 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 	lacpdu = skb_header_pointer(skb, 0, sizeof(_lacpdu), &_lacpdu);
 	if (!lacpdu) {
 		atomic64_inc(&SLAVE_AD_INFO(slave)->stats.lacpdu_illegal_rx);
+		atomic64_inc(&BOND_AD_INFO(bond).stats.lacpdu_illegal_rx);
 		return RX_HANDLER_ANOTHER;
 	}
 
@@ -2698,30 +2709,6 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
 	spin_unlock_bh(&bond->mode_lock);
 }
 
-void bond_3ad_stats_add(struct slave *slave, struct bond_3ad_stats *stats)
-{
-	struct bond_3ad_stats *rstats = &SLAVE_AD_INFO(slave)->stats;
-	u64 stat;
-
-	atomic64_add(atomic64_read(&rstats->lacpdu_rx), &stats->lacpdu_rx);
-	atomic64_add(atomic64_read(&rstats->lacpdu_tx), &stats->lacpdu_tx);
-
-	stat = atomic64_read(&rstats->lacpdu_unknown_rx);
-	atomic64_add(stat, &stats->lacpdu_unknown_rx);
-	stat = atomic64_read(&rstats->lacpdu_illegal_rx);
-	atomic64_add(stat, &stats->lacpdu_illegal_rx);
-
-	atomic64_add(atomic64_read(&rstats->marker_rx), &stats->marker_rx);
-	atomic64_add(atomic64_read(&rstats->marker_tx), &stats->marker_tx);
-
-	stat = atomic64_read(&rstats->marker_resp_rx);
-	atomic64_add(stat, &stats->marker_resp_rx);
-	stat = atomic64_read(&rstats->marker_resp_tx);
-	atomic64_add(stat, &stats->marker_resp_tx);
-	stat = atomic64_read(&rstats->marker_unknown_rx);
-	atomic64_add(stat, &stats->marker_unknown_rx);
-}
-
 size_t bond_3ad_stats_size(void)
 {
 	return nla_total_size_64bit(sizeof(u64)) + /* BOND_3AD_STAT_LACPDU_RX */
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index d1338fbe1830..b286f591242e 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -715,16 +715,12 @@ static int bond_fill_linkxstats(struct sk_buff *skb,
 	if (!nest)
 		return -EMSGSIZE;
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
-		struct bond_3ad_stats stats;
-		struct list_head *iter;
-
-		memset(&stats, 0, sizeof(stats));
-		if (slave) {
-			bond_3ad_stats_add(slave, &stats);
-		} else {
-			bond_for_each_slave(bond, slave, iter)
-				bond_3ad_stats_add(slave, &stats);
-		}
+		struct bond_3ad_stats *stats;
+
+		if (slave)
+			stats = &SLAVE_AD_INFO(slave)->stats;
+		else
+			stats = &BOND_AD_INFO(bond).stats;
 
 		nest2 = nla_nest_start(skb, BOND_XSTATS_3AD);
 		if (!nest2) {
@@ -732,7 +728,7 @@ static int bond_fill_linkxstats(struct sk_buff *skb,
 			return -EMSGSIZE;
 		}
 
-		if (bond_3ad_stats_fill(skb, &stats)) {
+		if (bond_3ad_stats_fill(skb, stats)) {
 			nla_nest_cancel(skb, nest2);
 			nla_nest_end(skb, nest);
 			return -EMSGSIZE;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index 25aaf49d19be..c781e1afd683 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -278,6 +278,7 @@ struct ad_system {
 
 struct ad_bond_info {
 	struct ad_system system;	/* 802.3ad system structure */
+	struct bond_3ad_stats stats;
 	u32 agg_select_timer;		/* Timer to select aggregator after all adapter's hand shakes */
 	u16 aggregator_identifier;
 };
@@ -321,7 +322,6 @@ int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 int bond_3ad_set_carrier(struct bonding *bond);
 void bond_3ad_update_lacp_rate(struct bonding *bond);
 void bond_3ad_update_ad_actor_settings(struct bonding *bond);
-void bond_3ad_stats_add(struct slave *slave, struct bond_3ad_stats *stats);
 int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats);
 size_t bond_3ad_stats_size(void);
 #endif /* _NET_BOND_3AD_H */
-- 
cgit v1.2.3


From 78dc70ebaa38aa303274e333be6c98eef87619e2 Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Wed, 23 Jan 2019 12:04:54 -0800
Subject: tcp_bbr: adapt cwnd based on ack aggregation estimation

Aggregation effects are extremely common with wifi, cellular, and cable
modem link technologies, ACK decimation in middleboxes, and LRO and GRO
in receiving hosts. The aggregation can happen in either direction,
data or ACKs, but in either case the aggregation effect is visible
to the sender in the ACK stream.

Previously BBR's sending was often limited by cwnd under severe ACK
aggregation/decimation because BBR sized the cwnd at 2*BDP. If packets
were acked in bursts after long delays (e.g. one ACK acking 5*BDP after
5*RTT), BBR's sending was halted after sending 2*BDP over 2*RTT, leaving
the bottleneck idle for potentially long periods. Note that loss-based
congestion control does not have this issue because when facing
aggregation it continues increasing cwnd after bursts of ACKs, growing
cwnd until the buffer is full.

To achieve good throughput in the presence of aggregation effects, this
algorithm allows the BBR sender to put extra data in flight to keep the
bottleneck utilized during silences in the ACK stream that it has evidence
to suggest were caused by aggregation.

A summary of the algorithm: when a burst of packets are acked by a
stretched ACK or a burst of ACKs or both, BBR first estimates the expected
amount of data that should have been acked, based on its estimated
bandwidth. Then the surplus ("extra_acked") is recorded in a windowed-max
filter to estimate the recent level of observed ACK aggregation. Then cwnd
is increased by the ACK aggregation estimate. The larger cwnd avoids BBR
being cwnd-limited in the face of ACK silences that recent history suggests
were caused by aggregation. As a sanity check, the ACK aggregation degree
is upper-bounded by the cwnd (at the time of measurement) and a global max
of BW * 100ms. The algorithm is further described by the following
presentation:
https://datatracker.ietf.org/meeting/101/materials/slides-101-iccrg-an-update-on-bbr-work-at-google-00

In our internal testing, we observed a significant increase in BBR
throughput (measured using netperf), in a basic wifi setup.
- Host1 (sender on ethernet) -> AP -> Host2 (receiver on wifi)
- 2.4 GHz -> BBR before: ~73 Mbps; BBR after: ~102 Mbps; CUBIC: ~100 Mbps
- 5.0 GHz -> BBR before: ~362 Mbps; BBR after: ~593 Mbps; CUBIC: ~601 Mbps

Also, this code is running globally on YouTube TCP connections and produced
significant bandwidth increases for YouTube traffic.

This is based on Ian Swett's max_ack_height_ algorithm from the
QUIC BBR implementation.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |   4 +-
 net/ipv4/tcp_bbr.c                 | 122 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 371b3b45fd5c..fe0d9b44d6fc 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -139,8 +139,8 @@ struct inet_connection_sock {
 	} icsk_mtup;
 	u32			  icsk_user_timeout;
 
-	u64			  icsk_ca_priv[88 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE      (11 * sizeof(u64))
+	u64			  icsk_ca_priv[104 / sizeof(u64)];
+#define ICSK_CA_PRIV_SIZE      (13 * sizeof(u64))
 };
 
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6b6c7f14ccf9..56be7d27f208 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -115,6 +115,14 @@ struct bbr {
 		unused_b:5;
 	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
 	u32	full_bw;	/* recent bw, to estimate if pipe is full */
+
+	/* For tracking ACK aggregation: */
+	u64	ack_epoch_mstamp;	/* start of ACK sampling epoch */
+	u16	extra_acked[2];		/* max excess data ACKed in epoch */
+	u32	ack_epoch_acked:20,	/* packets (S)ACKed in sampling epoch */
+		extra_acked_win_rtts:5,	/* age of extra_acked, in round trips */
+		extra_acked_win_idx:1,	/* current index in extra_acked array */
+		unused_c:6;
 };
 
 #define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
@@ -182,6 +190,15 @@ static const u32 bbr_lt_bw_diff = 4000 / 8;
 /* If we estimate we're policed, use lt_bw for this many round trips: */
 static const u32 bbr_lt_bw_max_rtts = 48;
 
+/* Gain factor for adding extra_acked to target cwnd: */
+static const int bbr_extra_acked_gain = BBR_UNIT;
+/* Window length of extra_acked window. */
+static const u32 bbr_extra_acked_win_rtts = 5;
+/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */
+static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20;
+/* Time period for clamping cwnd increment due to ack aggregation */
+static const u32 bbr_extra_acked_max_us = 100 * 1000;
+
 static void bbr_check_probe_rtt_done(struct sock *sk);
 
 /* Do we estimate that STARTUP filled the pipe? */
@@ -208,6 +225,16 @@ static u32 bbr_bw(const struct sock *sk)
 	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
 }
 
+/* Return maximum extra acked in past k-2k round trips,
+ * where k = bbr_extra_acked_win_rtts.
+ */
+static u16 bbr_extra_acked(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return max(bbr->extra_acked[0], bbr->extra_acked[1]);
+}
+
 /* Return rate in bytes per second, optionally with a gain.
  * The order here is chosen carefully to avoid overflow of u64. This should
  * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
@@ -305,6 +332,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
 
 	if (event == CA_EVENT_TX_START && tp->app_limited) {
 		bbr->idle_restart = 1;
+		bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+		bbr->ack_epoch_acked = 0;
 		/* Avoid pointless buffer overflows: pace at est. bw if we don't
 		 * need more speed (we're restarting from idle and app-limited).
 		 */
@@ -418,6 +447,22 @@ static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now)
 	return inflight_at_edt - interval_delivered;
 }
 
+/* Find the cwnd increment based on estimate of ack aggregation */
+static u32 bbr_ack_aggregation_cwnd(struct sock *sk)
+{
+	u32 max_aggr_cwnd, aggr_cwnd = 0;
+
+	if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) {
+		max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us)
+				/ BW_UNIT;
+		aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk))
+			     >> BBR_SCALE;
+		aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd);
+	}
+
+	return aggr_cwnd;
+}
+
 /* An optimization in BBR to reduce losses: On the first round of recovery, we
  * follow the packet conservation principle: send P packets per P packets acked.
  * After that, we slow-start and send at most 2*P packets per P packets acked.
@@ -478,9 +523,15 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
 	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
 		goto done;
 
-	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
 	target_cwnd = bbr_bdp(sk, bw, gain);
+
+	/* Increment the cwnd to account for excess ACKed data that seems
+	 * due to aggregation (of data and/or ACKs) visible in the ACK stream.
+	 */
+	target_cwnd += bbr_ack_aggregation_cwnd(sk);
 	target_cwnd = bbr_quantization_budget(sk, target_cwnd, gain);
+
+	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
 	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
 		cwnd = min(cwnd + acked, target_cwnd);
 	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
@@ -745,6 +796,67 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
 	}
 }
 
+/* Estimates the windowed max degree of ack aggregation.
+ * This is used to provision extra in-flight data to keep sending during
+ * inter-ACK silences.
+ *
+ * Degree of ack aggregation is estimated as extra data acked beyond expected.
+ *
+ * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval"
+ * cwnd += max_extra_acked
+ *
+ * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms).
+ * Max filter is an approximate sliding window of 5-10 (packet timed) round
+ * trips.
+ */
+static void bbr_update_ack_aggregation(struct sock *sk,
+				       const struct rate_sample *rs)
+{
+	u32 epoch_us, expected_acked, extra_acked;
+	struct bbr *bbr = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 ||
+	    rs->delivered < 0 || rs->interval_us <= 0)
+		return;
+
+	if (bbr->round_start) {
+		bbr->extra_acked_win_rtts = min(0x1F,
+						bbr->extra_acked_win_rtts + 1);
+		if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) {
+			bbr->extra_acked_win_rtts = 0;
+			bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ?
+						   0 : 1;
+			bbr->extra_acked[bbr->extra_acked_win_idx] = 0;
+		}
+	}
+
+	/* Compute how many packets we expected to be delivered over epoch. */
+	epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp,
+				      bbr->ack_epoch_mstamp);
+	expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT;
+
+	/* Reset the aggregation epoch if ACK rate is below expected rate or
+	 * significantly large no. of ack received since epoch (potentially
+	 * quite old epoch).
+	 */
+	if (bbr->ack_epoch_acked <= expected_acked ||
+	    (bbr->ack_epoch_acked + rs->acked_sacked >=
+	     bbr_ack_epoch_acked_reset_thresh)) {
+		bbr->ack_epoch_acked = 0;
+		bbr->ack_epoch_mstamp = tp->delivered_mstamp;
+		expected_acked = 0;
+	}
+
+	/* Compute excess data delivered, beyond what was expected. */
+	bbr->ack_epoch_acked = min_t(u32, 0xFFFFF,
+				     bbr->ack_epoch_acked + rs->acked_sacked);
+	extra_acked = bbr->ack_epoch_acked - expected_acked;
+	extra_acked = min(extra_acked, tp->snd_cwnd);
+	if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx])
+		bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked;
+}
+
 /* Estimate when the pipe is full, using the change in delivery rate: BBR
  * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
  * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
@@ -899,6 +1011,7 @@ static void bbr_update_gains(struct sock *sk)
 static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
 {
 	bbr_update_bw(sk, rs);
+	bbr_update_ack_aggregation(sk, rs);
 	bbr_update_cycle_phase(sk, rs);
 	bbr_check_full_bw_reached(sk, rs);
 	bbr_check_drain(sk, rs);
@@ -950,6 +1063,13 @@ static void bbr_init(struct sock *sk)
 	bbr_reset_lt_bw_sampling(sk);
 	bbr_reset_startup_mode(sk);
 
+	bbr->ack_epoch_mstamp = tp->tcp_mstamp;
+	bbr->ack_epoch_acked = 0;
+	bbr->extra_acked_win_rtts = 0;
+	bbr->extra_acked_win_idx = 0;
+	bbr->extra_acked[0] = 0;
+	bbr->extra_acked[1] = 0;
+
 	cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
 }
 
-- 
cgit v1.2.3


From e2bef3847e3d0d57dcf316de50c4b1a5a91816b7 Mon Sep 17 00:00:00 2001
From: Rajat Jain <rajatja@google.com>
Date: Thu, 24 Jan 2019 15:28:13 -0800
Subject: Bluetooth: Allow driver specific cmd timeout handling

Add a hook to allow the BT driver to do device or command specific
handling in case of timeouts. This is to be used by Intel driver to
reset the device after certain number of timeouts.

Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index e5ea633ea368..094e61e07030 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -437,6 +437,7 @@ struct hci_dev {
 	int (*post_init)(struct hci_dev *hdev);
 	int (*set_diag)(struct hci_dev *hdev, bool enable);
 	int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
+	void (*cmd_timeout)(struct hci_dev *hdev);
 };
 
 #define HCI_PHY_HANDLE(handle)	(handle & 0xff)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 7352fe85674b..75793265ba9e 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2578,6 +2578,9 @@ static void hci_cmd_timeout(struct work_struct *work)
 		bt_dev_err(hdev, "command tx timeout");
 	}
 
+	if (hdev->cmd_timeout)
+		hdev->cmd_timeout(hdev);
+
 	atomic_set(&hdev->cmd_cnt, 1);
 	queue_work(hdev->workqueue, &hdev->cmd_work);
 }
-- 
cgit v1.2.3


From 12ea20f61637539dfac6e6e41beb07c21ba890d8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 18 Jan 2019 00:14:19 +0100
Subject: crypto: morus - Cleanup license mess

Precise and non-ambiguous license information is important. The recently
added morus header files have a SPDX license identifier, which is nice, but
at the same time they have a contradictionary license boiler plate text.

  SPDX-License-Identifier: GPL-2.0

versus

  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
  * Software Foundation; either version 2 of the License, or (at your option)
  * any later version

Oh well.

As the other morus related files are licensed under the GPL v2 or later,
it's assumed that the boiler plate code is correct, but the SPDX license
identifier is wrong.

Fix the SPDX identifier and remove the boiler plate as it is redundant.

Fixes: 56e8e57fc3a7 ("crypto: morus - Add common SIMD glue code for MORUS")
Fixes: 396be41f16fd ("crypto: morus - Add generic MORUS AEAD implementations")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Ondrej Mosnacek <omosnacek@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-crypto@vger.kernel.org
Acked-by: Ondrej Mosnacek <omosnacek@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/morus1280_glue.h | 7 +------
 include/crypto/morus640_glue.h  | 7 +------
 include/crypto/morus_common.h   | 7 +------
 3 files changed, 3 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/crypto/morus1280_glue.h b/include/crypto/morus1280_glue.h
index ba782e10065e..ad2aa743dd99 100644
--- a/include/crypto/morus1280_glue.h
+++ b/include/crypto/morus1280_glue.h
@@ -1,15 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * The MORUS-1280 Authenticated-Encryption Algorithm
  *   Common glue skeleton -- header file
  *
  * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
  */
 
 #ifndef _CRYPTO_MORUS1280_GLUE_H
diff --git a/include/crypto/morus640_glue.h b/include/crypto/morus640_glue.h
index 27fa790a2362..df8e1103ff94 100644
--- a/include/crypto/morus640_glue.h
+++ b/include/crypto/morus640_glue.h
@@ -1,15 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * The MORUS-640 Authenticated-Encryption Algorithm
  *   Common glue skeleton -- header file
  *
  * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
  */
 
 #ifndef _CRYPTO_MORUS640_GLUE_H
diff --git a/include/crypto/morus_common.h b/include/crypto/morus_common.h
index 39f28c749951..969510a9a56c 100644
--- a/include/crypto/morus_common.h
+++ b/include/crypto/morus_common.h
@@ -1,15 +1,10 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
  * The MORUS Authenticated-Encryption Algorithm
  *   Common definitions
  *
  * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
  */
 
 #ifndef _CRYPTO_MORUS_COMMON_H
-- 
cgit v1.2.3


From 9572442dcf487e534e70b30f43e21a364cf483e9 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <xiongfeng.wang@linaro.org>
Date: Fri, 18 Jan 2019 13:58:11 +0800
Subject: crypto: api - add a helper to (un)register a array of templates

This patch add a helper to (un)register a array of templates. The
following patches will use this helper to simplify the code.

Signed-off-by: Xiongfeng Wang <xiongfeng.wang@linaro.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 27 +++++++++++++++++++++++++++
 include/crypto/algapi.h |  2 ++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 713baabeb643..4c9c86b55738 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -494,6 +494,24 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_register_template);
 
+int crypto_register_templates(struct crypto_template *tmpls, int count)
+{
+	int i, err;
+
+	for (i = 0; i < count; i++) {
+		err = crypto_register_template(&tmpls[i]);
+		if (err)
+			goto out;
+	}
+	return 0;
+
+out:
+	for (--i; i >= 0; --i)
+		crypto_unregister_template(&tmpls[i]);
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_register_templates);
+
 void crypto_unregister_template(struct crypto_template *tmpl)
 {
 	struct crypto_instance *inst;
@@ -523,6 +541,15 @@ void crypto_unregister_template(struct crypto_template *tmpl)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_template);
 
+void crypto_unregister_templates(struct crypto_template *tmpls, int count)
+{
+	int i;
+
+	for (i = count - 1; i >= 0; --i)
+		crypto_unregister_template(&tmpls[i]);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_templates);
+
 static struct crypto_template *__crypto_lookup_template(const char *name)
 {
 	struct crypto_template *q, *tmpl = NULL;
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 093869f175d6..4be38cd0b8d5 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -143,7 +143,9 @@ extern const struct crypto_type crypto_blkcipher_type;
 void crypto_mod_put(struct crypto_alg *alg);
 
 int crypto_register_template(struct crypto_template *tmpl);
+int crypto_register_templates(struct crypto_template *tmpls, int count);
 void crypto_unregister_template(struct crypto_template *tmpl);
+void crypto_unregister_templates(struct crypto_template *tmpls, int count);
 struct crypto_template *crypto_lookup_template(const char *name);
 
 int crypto_register_instance(struct crypto_template *tmpl,
-- 
cgit v1.2.3


From 231baecdef7a906579925ccf1bd45aa734f32320 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Jan 2019 22:48:00 -0800
Subject: crypto: clarify name of WEAK_KEY request flag

CRYPTO_TFM_REQ_WEAK_KEY confuses newcomers to the crypto API because it
sounds like it is requesting a weak key.  Actually, it is requesting
that weak keys be forbidden (for algorithms that have the notion of
"weak keys"; currently only DES and XTS do).

Also it is only one letter away from CRYPTO_TFM_RES_WEAK_KEY, with which
it can be easily confused.  (This in fact happened in the UX500 driver,
though just in some debugging messages.)

Therefore, make the intent clear by renaming it to
CRYPTO_TFM_REQ_FORBID_WEAK_KEYS.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/crypto/des_s390.c                        |  4 ++--
 arch/sparc/crypto/des_glue.c                       |  4 ++--
 crypto/des_generic.c                               |  4 ++--
 crypto/testmgr.c                                   | 14 +++++++-------
 crypto/testmgr.h                                   |  4 ++--
 drivers/crypto/atmel-tdes.c                        |  2 +-
 drivers/crypto/bcm/cipher.c                        |  4 ++--
 drivers/crypto/ccp/ccp-crypto-des3.c               |  2 +-
 drivers/crypto/ccree/cc_cipher.c                   |  3 ++-
 drivers/crypto/hifn_795x.c                         |  3 ++-
 drivers/crypto/inside-secure/safexcel_cipher.c     |  2 +-
 drivers/crypto/ixp4xx_crypto.c                     |  4 ++--
 drivers/crypto/marvell/cipher.c                    |  2 +-
 drivers/crypto/n2_core.c                           |  2 +-
 drivers/crypto/omap-des.c                          |  2 +-
 drivers/crypto/picoxcell_crypto.c                  |  3 ++-
 drivers/crypto/qce/ablkcipher.c                    |  4 ++--
 drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c |  2 +-
 drivers/crypto/sunxi-ss/sun4i-ss-cipher.c          |  2 +-
 drivers/crypto/talitos.c                           |  2 +-
 drivers/crypto/ux500/cryp/cryp_core.c              | 20 +++++++++++---------
 fs/crypto/keyinfo.c                                |  4 ++--
 fs/ecryptfs/crypto.c                               |  5 +++--
 include/crypto/xts.h                               |  4 ++--
 include/linux/crypto.h                             |  2 +-
 25 files changed, 55 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 5346b5a80bb6..0d15383d0ff1 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -38,7 +38,7 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 
 	/* check for weak keys */
 	if (!des_ekey(tmp, key) &&
-	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
@@ -228,7 +228,7 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
 	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
 			  DES_KEY_SIZE)) &&
-	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 56499ea39fd3..4884315daff4 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -53,7 +53,7 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
 	 * weak key detection code.
 	 */
 	ret = des_ekey(tmp, key);
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
@@ -209,7 +209,7 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		     (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index a71720544d11..1e6621665dd9 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -789,7 +789,7 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 	/* Expand to tmp */
 	ret = des_ekey(tmp, key);
 
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
@@ -866,7 +866,7 @@ int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		     (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index e4f3f5f688e7..4ac3d22256c3 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -706,7 +706,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 
 		crypto_aead_clear_flags(tfm, ~0);
 		if (template[i].wk)
-			crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+			crypto_aead_set_flags(tfm,
+					      CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 
 		if (template[i].klen > MAX_KEYLEN) {
 			pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n",
@@ -820,7 +821,8 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
 
 		crypto_aead_clear_flags(tfm, ~0);
 		if (template[i].wk)
-			crypto_aead_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+			crypto_aead_set_flags(tfm,
+					      CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 		if (template[i].klen > MAX_KEYLEN) {
 			pr_err("alg: aead%s: setkey failed on test %d for %s: key size %d > %d\n",
 			       d, j, algo, template[i].klen, MAX_KEYLEN);
@@ -1078,7 +1080,7 @@ static int test_cipher(struct crypto_cipher *tfm, int enc,
 
 		crypto_cipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
-			crypto_cipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+			crypto_cipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 
 		ret = crypto_cipher_setkey(tfm, template[i].key,
 					   template[i].klen);
@@ -1194,8 +1196,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 
 		crypto_skcipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
-			crypto_skcipher_set_flags(tfm,
-						  CRYPTO_TFM_REQ_WEAK_KEY);
+			crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 					     template[i].klen);
@@ -1265,8 +1266,7 @@ static int __test_skcipher(struct crypto_skcipher *tfm, int enc,
 		j++;
 		crypto_skcipher_clear_flags(tfm, ~0);
 		if (template[i].wk)
-			crypto_skcipher_set_flags(tfm,
-						  CRYPTO_TFM_REQ_WEAK_KEY);
+			crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 
 		ret = crypto_skcipher_setkey(tfm, template[i].key,
 					     template[i].klen);
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 95297240b0f1..d8f6035c7ff2 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -50,7 +50,7 @@ struct hash_testvec {
  * @ctext:	Pointer to ciphertext
  * @len:	Length of @ptext and @ctext in bytes
  * @fail:	If set to one, the test need to fail
- * @wk:		Does the test need CRYPTO_TFM_REQ_WEAK_KEY
+ * @wk:		Does the test need CRYPTO_TFM_REQ_FORBID_WEAK_KEYS?
  * 		( e.g. test needs to fail due to a weak key )
  * @np: 	numbers of SG to distribute data in (from 1 to MAX_TAP)
  * @tap:	How to distribute data in @np SGs
@@ -91,7 +91,7 @@ struct cipher_testvec {
  * @anp:	Numbers of SG to distribute assoc data in
  * @fail:	setkey() failure expected?
  * @novrfy:	Decryption verification failure expected?
- * @wk:		Does the test need CRYPTO_TFM_REQ_WEAK_KEY?
+ * @wk:		Does the test need CRYPTO_TFM_REQ_FORBID_WEAK_KEYS?
  *		(e.g. setkey() needs to fail due to a weak key)
  * @klen:	Length of @key in bytes
  * @plen:	Length of @ptext in bytes
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 438e1ffb2ec0..65bf1a299562 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -785,7 +785,7 @@ static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 	}
 
 	err = des_ekey(tmp, key);
-	if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		ctfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 2099d7bcfd44..28f592f7e1b7 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -1818,7 +1818,7 @@ static int des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	if (keylen == DES_KEY_SIZE) {
 		if (des_ekey(tmp, key) == 0) {
 			if (crypto_ablkcipher_get_flags(cipher) &
-			    CRYPTO_TFM_REQ_WEAK_KEY) {
+			    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
 				u32 flags = CRYPTO_TFM_RES_WEAK_KEY;
 
 				crypto_ablkcipher_set_flags(cipher, flags);
@@ -2872,7 +2872,7 @@ static int aead_authenc_setkey(struct crypto_aead *cipher,
 
 			if (des_ekey(tmp, keys.enckey) == 0) {
 				if (crypto_aead_get_flags(cipher) &
-				    CRYPTO_TFM_REQ_WEAK_KEY) {
+				    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
 					crypto_aead_set_flags(cipher, flags);
 					return -EINVAL;
 				}
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index ae87b741f9d5..c2ff551d215b 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -57,7 +57,7 @@ static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		     (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index e202d7c7ea00..5e3361a363b5 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -352,7 +352,8 @@ static int cc_cipher_setkey(struct crypto_skcipher *sktfm, const u8 *key,
 			dev_dbg(dev, "weak 3DES key");
 			return -EINVAL;
 		} else if (!des_ekey(tmp, key) &&
-		    (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_WEAK_KEY)) {
+			   (crypto_tfm_get_flags(tfm) &
+			    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 			dev_dbg(dev, "weak DES key");
 			return -EINVAL;
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index a5a36fe7bf2c..dad212cabe63 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -1961,7 +1961,8 @@ static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		u32 tmp[DES_EXPKEY_WORDS];
 		int ret = des_ekey(tmp, key);
 
-		if (unlikely(ret == 0) && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		if (unlikely(ret == 0) &&
+		    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 			return -EINVAL;
 		}
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index d531c14020dc..7ef30a98cb24 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -940,7 +940,7 @@ static int safexcel_des_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 	}
 
 	ret = des_ekey(tmp, key);
-	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 19fba998b86b..95c1af227bd5 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -847,7 +847,7 @@ static int ablk_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 		goto out;
 
 	if (*flags & CRYPTO_TFM_RES_WEAK_KEY) {
-		if (*flags & CRYPTO_TFM_REQ_WEAK_KEY) {
+		if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
 			ret = -EINVAL;
 		} else {
 			*flags &= ~CRYPTO_TFM_RES_WEAK_KEY;
@@ -1125,7 +1125,7 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
 		goto out;
 
 	if (*flags & CRYPTO_TFM_RES_WEAK_KEY) {
-		if (*flags & CRYPTO_TFM_REQ_WEAK_KEY) {
+		if (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
 			ret = -EINVAL;
 			goto out;
 		} else {
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index 0ae84ec9e21c..066830dcc53e 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -286,7 +286,7 @@ static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key,
 	}
 
 	ret = des_ekey(tmp, key);
-	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 55f34cfc43ff..9450c41211b2 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -772,7 +772,7 @@ static int n2_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	}
 
 	err = des_ekey(tmp, key);
-	if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 6369019219d4..1ba2633e90d6 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -662,7 +662,7 @@ static int omap_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	pr_debug("enter, keylen: %d\n", keylen);
 
 	/* Do we need to test against weak key? */
-	if (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY) {
+	if (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
 		u32 tmp[DES_EXPKEY_WORDS];
 		int ret = des_ekey(tmp, key);
 
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index 17068b55fea5..1b3acdeffede 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -759,7 +759,8 @@ static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 	}
 
 	if (unlikely(!des_ekey(tmp, key)) &&
-	    (crypto_ablkcipher_get_flags(cipher) & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	    (crypto_ablkcipher_get_flags(cipher) &
+	     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
index 25c13e26d012..154b6baa124e 100644
--- a/drivers/crypto/qce/ablkcipher.c
+++ b/drivers/crypto/qce/ablkcipher.c
@@ -180,8 +180,8 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
 		u32 tmp[DES_EXPKEY_WORDS];
 
 		ret = des_ekey(tmp, key);
-		if (!ret && crypto_ablkcipher_get_flags(ablk) &
-		    CRYPTO_TFM_REQ_WEAK_KEY)
+		if (!ret && (crypto_ablkcipher_get_flags(ablk) &
+			     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS))
 			goto weakkey;
 	}
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
index 639c15c5364b..87dd571466c1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
@@ -60,7 +60,7 @@ static int rk_tdes_setkey(struct crypto_ablkcipher *cipher,
 
 	if (keylen == DES_KEY_SIZE) {
 		if (!des_ekey(tmp, key) &&
-		    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 			return -EINVAL;
 		}
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index 5cf64746731a..54fd714d53ca 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -517,7 +517,7 @@ int sun4i_ss_des_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	flags = crypto_skcipher_get_flags(tfm);
 
 	ret = des_ekey(tmp, key);
-	if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
 		dev_dbg(ss->dev, "Weak key %u\n", keylen);
 		return -EINVAL;
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index f8e2c5c3f4eb..de78b54bcfb1 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1535,7 +1535,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	}
 
 	if (unlikely(crypto_ablkcipher_get_flags(cipher) &
-		     CRYPTO_TFM_REQ_WEAK_KEY) &&
+		     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) &&
 	    !des_ekey(tmp, key)) {
 		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY);
 		return -EINVAL;
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index db94f89d8d11..3235611928f2 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1000,10 +1000,11 @@ static int des_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	}
 
 	ret = des_ekey(tmp, key);
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+	if (unlikely(ret == 0) &&
+	    (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_REQ_WEAK_KEY",
-				__func__);
+		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY",
+			 __func__);
 		return -EINVAL;
 	}
 
@@ -1034,18 +1035,19 @@ static int des3_ablkcipher_setkey(struct crypto_ablkcipher *cipher,
 	/* Checking key interdependency for weak key detection. */
 	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
 				!((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-			(*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+			(*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_REQ_WEAK_KEY",
-				__func__);
+		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY",
+			 __func__);
 		return -EINVAL;
 	}
 	for (i = 0; i < 3; i++) {
 		ret = des_ekey(tmp, key + i*DES_KEY_SIZE);
-		if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		if (unlikely(ret == 0) &&
+		    (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
 			*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-			pr_debug(DEV_DBG_NAME " [%s]: "
-					"CRYPTO_TFM_REQ_WEAK_KEY", __func__);
+			pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY",
+				 __func__);
 			return -EINVAL;
 		}
 	}
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 1e11a683f63d..322ce9686bdb 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -47,7 +47,7 @@ static int derive_key_aes(const u8 *master_key,
 		tfm = NULL;
 		goto out;
 	}
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 	req = skcipher_request_alloc(tfm, GFP_NOFS);
 	if (!req) {
 		res = -ENOMEM;
@@ -257,7 +257,7 @@ allocate_skcipher_for_mode(struct fscrypt_mode *mode, const u8 *raw_key,
 			mode->friendly_name,
 			crypto_skcipher_alg(tfm)->base.cra_driver_name);
 	}
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 	err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
 	if (err)
 		goto err_free_tfm;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 4dd842f72846..f664da55234e 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -610,7 +610,8 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
 				full_alg_name);
 		goto out_free;
 	}
-	crypto_skcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	crypto_skcipher_set_flags(crypt_stat->tfm,
+				  CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 	rc = 0;
 out_free:
 	kfree(full_alg_name);
@@ -1590,7 +1591,7 @@ ecryptfs_process_key_cipher(struct crypto_skcipher **key_tfm,
 		       "[%s]; rc = [%d]\n", full_alg_name, rc);
 		goto out;
 	}
-	crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+	crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
 	if (*key_size == 0)
 		*key_size = crypto_skcipher_default_keysize(*key_tfm);
 	get_random_bytes(dummy_key, *key_size);
diff --git a/include/crypto/xts.h b/include/crypto/xts.h
index 34d94c95445a..75fd96ff976b 100644
--- a/include/crypto/xts.h
+++ b/include/crypto/xts.h
@@ -47,8 +47,8 @@ static inline int xts_verify_key(struct crypto_skcipher *tfm,
 	}
 
 	/* ensure that the AES and tweak key are not identical */
-	if ((fips_enabled || crypto_skcipher_get_flags(tfm) &
-			     CRYPTO_TFM_REQ_WEAK_KEY) &&
+	if ((fips_enabled || (crypto_skcipher_get_flags(tfm) &
+			      CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) &&
 	    !crypto_memneq(key, key + (keylen / 2), keylen / 2)) {
 		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
 		return -EINVAL;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index c3c98a62e503..f2565a103158 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -118,7 +118,7 @@
 #define CRYPTO_TFM_REQ_MASK		0x000fff00
 #define CRYPTO_TFM_RES_MASK		0xfff00000
 
-#define CRYPTO_TFM_REQ_WEAK_KEY		0x00000100
+#define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS	0x00000100
 #define CRYPTO_TFM_REQ_MAY_SLEEP	0x00000200
 #define CRYPTO_TFM_REQ_MAY_BACKLOG	0x00000400
 #define CRYPTO_TFM_RES_WEAK_KEY		0x00100000
-- 
cgit v1.2.3


From 390298e86f8a2e8b428ffb259a323667229ff822 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 22 Jan 2019 15:20:16 +0100
Subject: mac80211: Expose ieee80211_schedule_txq() function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since we reworked ieee80211_return_txq() so it assumes that the caller
takes care of logging, we need another function that can be called without
holding any locks. Introduce ieee80211_schedule_txq() which serves this
purpose.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h    | 13 +++++++++++++
 net/mac80211/driver-ops.h |  4 +---
 net/mac80211/tx.c         | 13 +++++++++++++
 3 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a285c2bfd14e..294a8a36012a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6209,6 +6209,19 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac)
 void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac)
 	__releases(txq_lock);
 
+/**
+ * ieee80211_schedule_txq - schedule a TXQ for transmission
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface
+ *
+ * Schedules a TXQ for transmission if it is not already scheduled. Takes a
+ * lock, which means it must *not* be called between
+ * ieee80211_txq_schedule_start() and ieee80211_txq_schedule_end()
+ */
+void ieee80211_schedule_txq(struct ieee80211_hw *hw, struct ieee80211_txq *txq)
+	__acquires(txq_lock) __releases(txq_lock);
+
 /**
  * ieee80211_txq_may_transmit - check whether TXQ is allowed to transmit
  *
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 1aab1734b26f..ba3c07b10cd0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1176,9 +1176,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
 static inline void schedule_and_wake_txq(struct ieee80211_local *local,
 					 struct txq_info *txqi)
 {
-	spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]);
-	ieee80211_return_txq(&local->hw, &txqi->txq);
-	spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]);
+	ieee80211_schedule_txq(&local->hw, &txqi->txq);
 	drv_wake_tx_queue(local, txqi);
 }
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f46d8d822f86..037f95e39a45 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3703,6 +3703,19 @@ void ieee80211_return_txq(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_return_txq);
 
+void ieee80211_schedule_txq(struct ieee80211_hw *hw,
+			    struct ieee80211_txq *txq)
+	__acquires(txq_lock) __releases(txq_lock)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = to_txq_info(txq);
+
+	spin_lock_bh(&local->active_txq_lock[txq->ac]);
+	ieee80211_return_txq(hw, txq);
+	spin_unlock_bh(&local->active_txq_lock[txq->ac]);
+}
+EXPORT_SYMBOL(ieee80211_schedule_txq);
+
 bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
 				struct ieee80211_txq *txq)
 {
-- 
cgit v1.2.3


From ab4dfa20534e32e48de6b761b42d943518fb26f7 Mon Sep 17 00:00:00 2001
From: Veerendranath Jakkam <vjakkam@codeaurora.org>
Date: Wed, 19 Dec 2018 22:52:25 +0530
Subject: cfg80211: Allow drivers to advertise supported AKM suites

There was no such capability advertisement from the driver and thus the
current user space has to assume the driver to support all the AKMs. While
that may be the case with some drivers (e.g., mac80211-based ones), there
are cfg80211-based drivers that implement SME and have constraints on
which AKMs can be supported (e.g., such drivers may need an update to
support SAE AKM using NL80211_CMD_EXTERNAL_AUTH). Allow such drivers to
advertise the exact set of supported AKMs so that user space tools can
determine what network profile options should be allowed to be configured.

Signed-off-by: Veerendranath Jakkam <vjakkam@codeaurora.org>
[pmsr data might be big, start a new netlink message section]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 5 +++++
 include/uapi/linux/nl80211.h | 6 ++++++
 net/wireless/nl80211.c       | 9 +++++++++
 3 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9c1d7979c200..b61ac6e9de08 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4128,6 +4128,8 @@ struct cfg80211_pmsr_capabilities {
  * @signal_type: signal type reported in &struct cfg80211_bss.
  * @cipher_suites: supported cipher suites
  * @n_cipher_suites: number of supported cipher suites
+ * @akm_suites: supported AKM suites
+ * @n_akm_suites: number of supported AKM suites
  * @retry_short: Retry limit for short frames (dot11ShortRetryLimit)
  * @retry_long: Retry limit for long frames (dot11LongRetryLimit)
  * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
@@ -4326,6 +4328,9 @@ struct wiphy {
 	int n_cipher_suites;
 	const u32 *cipher_suites;
 
+	int n_akm_suites;
+	const u32 *akm_suites;
+
 	u8 retry_short;
 	u8 retry_long;
 	u32 frag_threshold;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 426db4d8f71c..5f9d5cd458a1 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1565,6 +1565,12 @@ enum nl80211_commands {
  *	(a u32 with flags from &enum nl80211_wpa_versions).
  * @NL80211_ATTR_AKM_SUITES: Used with CONNECT, ASSOCIATE, and NEW_BEACON to
  *	indicate which key management algorithm(s) to use (an array of u32).
+ *	This attribute is also sent in response to @NL80211_CMD_GET_WIPHY,
+ *	indicating the supported AKM suites, intended for specific drivers which
+ *	implement SME and have constraints on which AKMs are supported and also
+ *	the cases where an AKM support is offloaded to the driver/firmware.
+ *	If there is no such notification from the driver, user space should
+ *	assume the driver supports all the AKM suites.
  *
  * @NL80211_ATTR_REQ_IE: (Re)association request information elements as
  *	sent out by the card, for ROAM and successful CONNECT events.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e5f9c9ceb6c9..eb4437fa0539 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2279,6 +2279,15 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		if (nl80211_send_pmsr_capa(rdev, msg))
 			goto nla_put_failure;
 
+		state->split_start++;
+		break;
+	case 15:
+		if (rdev->wiphy.akm_suites &&
+		    nla_put(msg, NL80211_ATTR_AKM_SUITES,
+			    sizeof(u32) * rdev->wiphy.n_akm_suites,
+			    rdev->wiphy.akm_suites))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
-- 
cgit v1.2.3


From d32dde2c5a110f2d69add70d7a9f1c20d82d0ef1 Mon Sep 17 00:00:00 2001
From: Jon Hunter <jonathanh@nvidia.com>
Date: Fri, 25 Jan 2019 11:22:51 +0100
Subject: soc/tegra: pmc: Make tegra_powergate_is_powered() a local function

Now there are no more external users of tegra_powergate_is_powered(),
make this a local function.

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/soc/tegra/pmc.c | 2 +-
 include/soc/tegra/pmc.h | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c
index 50ebf98e04ee..986f2171ff38 100644
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -637,7 +637,7 @@ EXPORT_SYMBOL(tegra_powergate_power_off);
  * tegra_powergate_is_powered() - check if partition is powered
  * @id: partition ID
  */
-int tegra_powergate_is_powered(unsigned int id)
+static int tegra_powergate_is_powered(unsigned int id)
 {
 	if (!tegra_powergate_is_valid(id))
 		return -EINVAL;
diff --git a/include/soc/tegra/pmc.h b/include/soc/tegra/pmc.h
index a9db1b501de1..b32ee5d82dd4 100644
--- a/include/soc/tegra/pmc.h
+++ b/include/soc/tegra/pmc.h
@@ -161,7 +161,6 @@ enum tegra_io_pad {
 #define TEGRA_IO_RAIL_LVDS	TEGRA_IO_PAD_LVDS
 
 #ifdef CONFIG_SOC_TEGRA_PMC
-int tegra_powergate_is_powered(unsigned int id);
 int tegra_powergate_power_on(unsigned int id);
 int tegra_powergate_power_off(unsigned int id);
 int tegra_powergate_remove_clamping(unsigned int id);
@@ -182,11 +181,6 @@ void tegra_pmc_set_suspend_mode(enum tegra_suspend_mode mode);
 void tegra_pmc_enter_suspend_mode(enum tegra_suspend_mode mode);
 
 #else
-static inline int tegra_powergate_is_powered(unsigned int id)
-{
-	return -ENOSYS;
-}
-
 static inline int tegra_powergate_power_on(unsigned int id)
 {
 	return -ENOSYS;
-- 
cgit v1.2.3


From cdfa358b248efd36c6a9cb4d4d0a3ba7509f8387 Mon Sep 17 00:00:00 2001
From: Timo Alho <talho@nvidia.com>
Date: Thu, 24 Jan 2019 19:03:53 +0200
Subject: firmware: tegra: Refactor BPMP driver

Split BPMP driver into common and chip specific parts to facilitate
adding support for previous and future Tegra chips that are using BPMP
as co-processor.

Signed-off-by: Timo Alho <talho@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/firmware/tegra/Makefile        |   1 +
 drivers/firmware/tegra/bpmp-private.h  |  28 +++
 drivers/firmware/tegra/bpmp-tegra186.c | 310 +++++++++++++++++++++++++++++++++
 drivers/firmware/tegra/bpmp.c          | 257 ++++++---------------------
 include/soc/tegra/bpmp.h               |  12 +-
 5 files changed, 400 insertions(+), 208 deletions(-)
 create mode 100644 drivers/firmware/tegra/bpmp-private.h
 create mode 100644 drivers/firmware/tegra/bpmp-tegra186.c

(limited to 'include')

diff --git a/drivers/firmware/tegra/Makefile b/drivers/firmware/tegra/Makefile
index 1b826dcca719..559355674bca 100644
--- a/drivers/firmware/tegra/Makefile
+++ b/drivers/firmware/tegra/Makefile
@@ -1,4 +1,5 @@
 tegra-bpmp-y			= bpmp.o
+tegra-bpmp-$(CONFIG_ARCH_TEGRA_186_SOC)	+= bpmp-tegra186.o
 tegra-bpmp-$(CONFIG_DEBUG_FS)	+= bpmp-debugfs.o
 obj-$(CONFIG_TEGRA_BPMP)	+= tegra-bpmp.o
 obj-$(CONFIG_TEGRA_IVC)		+= ivc.o
diff --git a/drivers/firmware/tegra/bpmp-private.h b/drivers/firmware/tegra/bpmp-private.h
new file mode 100644
index 000000000000..6fb10f1cfb8f
--- /dev/null
+++ b/drivers/firmware/tegra/bpmp-private.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.
+ */
+
+#ifndef __FIRMWARE_TEGRA_BPMP_PRIVATE_H
+#define __FIRMWARE_TEGRA_BPMP_PRIVATE_H
+
+#include <soc/tegra/bpmp.h>
+
+struct tegra_bpmp_ops {
+	int (*init)(struct tegra_bpmp *bpmp);
+	void (*deinit)(struct tegra_bpmp *bpmp);
+	bool (*is_response_ready)(struct tegra_bpmp_channel *channel);
+	bool (*is_request_ready)(struct tegra_bpmp_channel *channel);
+	int (*ack_response)(struct tegra_bpmp_channel *channel);
+	int (*ack_request)(struct tegra_bpmp_channel *channel);
+	bool (*is_response_channel_free)(struct tegra_bpmp_channel *channel);
+	bool (*is_request_channel_free)(struct tegra_bpmp_channel *channel);
+	int (*post_response)(struct tegra_bpmp_channel *channel);
+	int (*post_request)(struct tegra_bpmp_channel *channel);
+	int (*ring_doorbell)(struct tegra_bpmp *bpmp);
+	int (*resume)(struct tegra_bpmp *bpmp);
+};
+
+extern const struct tegra_bpmp_ops tegra186_bpmp_ops;
+
+#endif
diff --git a/drivers/firmware/tegra/bpmp-tegra186.c b/drivers/firmware/tegra/bpmp-tegra186.c
new file mode 100644
index 000000000000..89bb32b97345
--- /dev/null
+++ b/drivers/firmware/tegra/bpmp-tegra186.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.
+ */
+
+#include <linux/clk/tegra.h>
+#include <linux/genalloc.h>
+#include <linux/mailbox_client.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/semaphore.h>
+
+#include <soc/tegra/bpmp.h>
+#include <soc/tegra/bpmp-abi.h>
+#include <soc/tegra/ivc.h>
+
+#include "bpmp-private.h"
+
+struct tegra186_bpmp {
+	struct tegra_bpmp *parent;
+
+	struct {
+		struct gen_pool *pool;
+		dma_addr_t phys;
+		void *virt;
+	} tx, rx;
+
+	struct {
+		struct mbox_client client;
+		struct mbox_chan *channel;
+	} mbox;
+};
+
+static inline struct tegra_bpmp *
+mbox_client_to_bpmp(struct mbox_client *client)
+{
+	struct tegra186_bpmp *priv;
+
+	priv = container_of(client, struct tegra186_bpmp, mbox.client);
+
+	return priv->parent;
+}
+
+static bool tegra186_bpmp_is_message_ready(struct tegra_bpmp_channel *channel)
+{
+	void *frame;
+
+	frame = tegra_ivc_read_get_next_frame(channel->ivc);
+	if (IS_ERR(frame)) {
+		channel->ib = NULL;
+		return false;
+	}
+
+	channel->ib = frame;
+
+	return true;
+}
+
+static bool tegra186_bpmp_is_channel_free(struct tegra_bpmp_channel *channel)
+{
+	void *frame;
+
+	frame = tegra_ivc_write_get_next_frame(channel->ivc);
+	if (IS_ERR(frame)) {
+		channel->ob = NULL;
+		return false;
+	}
+
+	channel->ob = frame;
+
+	return true;
+}
+
+static int tegra186_bpmp_ack_message(struct tegra_bpmp_channel *channel)
+{
+	return tegra_ivc_read_advance(channel->ivc);
+}
+
+static int tegra186_bpmp_post_message(struct tegra_bpmp_channel *channel)
+{
+	return tegra_ivc_write_advance(channel->ivc);
+}
+
+static int tegra186_bpmp_ring_doorbell(struct tegra_bpmp *bpmp)
+{
+	struct tegra186_bpmp *priv = bpmp->priv;
+	int err;
+
+	err = mbox_send_message(priv->mbox.channel, NULL);
+	if (err < 0)
+		return err;
+
+	mbox_client_txdone(priv->mbox.channel, 0);
+
+	return 0;
+}
+
+static void tegra186_bpmp_ivc_notify(struct tegra_ivc *ivc, void *data)
+{
+	struct tegra_bpmp *bpmp = data;
+	struct tegra186_bpmp *priv = bpmp->priv;
+
+	if (WARN_ON(priv->mbox.channel == NULL))
+		return;
+
+	tegra186_bpmp_ring_doorbell(bpmp);
+}
+
+static int tegra186_bpmp_channel_init(struct tegra_bpmp_channel *channel,
+				      struct tegra_bpmp *bpmp,
+				      unsigned int index)
+{
+	struct tegra186_bpmp *priv = bpmp->priv;
+	size_t message_size, queue_size;
+	unsigned int offset;
+	int err;
+
+	channel->ivc = devm_kzalloc(bpmp->dev, sizeof(*channel->ivc),
+				    GFP_KERNEL);
+	if (!channel->ivc)
+		return -ENOMEM;
+
+	message_size = tegra_ivc_align(MSG_MIN_SZ);
+	queue_size = tegra_ivc_total_queue_size(message_size);
+	offset = queue_size * index;
+
+	err = tegra_ivc_init(channel->ivc, NULL,
+			     priv->rx.virt + offset, priv->rx.phys + offset,
+			     priv->tx.virt + offset, priv->tx.phys + offset,
+			     1, message_size, tegra186_bpmp_ivc_notify,
+			     bpmp);
+	if (err < 0) {
+		dev_err(bpmp->dev, "failed to setup IVC for channel %u: %d\n",
+			index, err);
+		return err;
+	}
+
+	init_completion(&channel->completion);
+	channel->bpmp = bpmp;
+
+	return 0;
+}
+
+static void tegra186_bpmp_channel_reset(struct tegra_bpmp_channel *channel)
+{
+	/* reset the channel state */
+	tegra_ivc_reset(channel->ivc);
+
+	/* sync the channel state with BPMP */
+	while (tegra_ivc_notified(channel->ivc))
+		;
+}
+
+static void tegra186_bpmp_channel_cleanup(struct tegra_bpmp_channel *channel)
+{
+	tegra_ivc_cleanup(channel->ivc);
+}
+
+static void mbox_handle_rx(struct mbox_client *client, void *data)
+{
+	struct tegra_bpmp *bpmp = mbox_client_to_bpmp(client);
+
+	tegra_bpmp_handle_rx(bpmp);
+}
+
+static int tegra186_bpmp_init(struct tegra_bpmp *bpmp)
+{
+	struct tegra186_bpmp *priv;
+	unsigned int i;
+	int err;
+
+	priv = devm_kzalloc(bpmp->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	bpmp->priv = priv;
+	priv->parent = bpmp;
+
+	priv->tx.pool = of_gen_pool_get(bpmp->dev->of_node, "shmem", 0);
+	if (!priv->tx.pool) {
+		dev_err(bpmp->dev, "TX shmem pool not found\n");
+		return -ENOMEM;
+	}
+
+	priv->tx.virt = gen_pool_dma_alloc(priv->tx.pool, 4096, &priv->tx.phys);
+	if (!priv->tx.virt) {
+		dev_err(bpmp->dev, "failed to allocate from TX pool\n");
+		return -ENOMEM;
+	}
+
+	priv->rx.pool = of_gen_pool_get(bpmp->dev->of_node, "shmem", 1);
+	if (!priv->rx.pool) {
+		dev_err(bpmp->dev, "RX shmem pool not found\n");
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	priv->rx.virt = gen_pool_dma_alloc(priv->rx.pool, 4096, &priv->rx.phys);
+	if (!priv->rx.virt) {
+		dev_err(bpmp->dev, "failed to allocate from RX pool\n");
+		err = -ENOMEM;
+		goto free_tx;
+	}
+
+	err = tegra186_bpmp_channel_init(bpmp->tx_channel, bpmp,
+					 bpmp->soc->channels.cpu_tx.offset);
+	if (err < 0)
+		goto free_rx;
+
+	err = tegra186_bpmp_channel_init(bpmp->rx_channel, bpmp,
+					 bpmp->soc->channels.cpu_rx.offset);
+	if (err < 0)
+		goto cleanup_tx_channel;
+
+	for (i = 0; i < bpmp->threaded.count; i++) {
+		unsigned int index = bpmp->soc->channels.thread.offset + i;
+
+		err = tegra186_bpmp_channel_init(&bpmp->threaded_channels[i],
+						 bpmp, index);
+		if (err < 0)
+			goto cleanup_channels;
+	}
+
+	/* mbox registration */
+	priv->mbox.client.dev = bpmp->dev;
+	priv->mbox.client.rx_callback = mbox_handle_rx;
+	priv->mbox.client.tx_block = false;
+	priv->mbox.client.knows_txdone = false;
+
+	priv->mbox.channel = mbox_request_channel(&priv->mbox.client, 0);
+	if (IS_ERR(priv->mbox.channel)) {
+		err = PTR_ERR(priv->mbox.channel);
+		dev_err(bpmp->dev, "failed to get HSP mailbox: %d\n", err);
+		goto cleanup_channels;
+	}
+
+	tegra186_bpmp_channel_reset(bpmp->tx_channel);
+	tegra186_bpmp_channel_reset(bpmp->rx_channel);
+
+	for (i = 0; i < bpmp->threaded.count; i++)
+		tegra186_bpmp_channel_reset(&bpmp->threaded_channels[i]);
+
+	return 0;
+
+cleanup_channels:
+	for (i = 0; i < bpmp->threaded.count; i++) {
+		if (!bpmp->threaded_channels[i].bpmp)
+			continue;
+
+		tegra186_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
+	}
+
+	tegra186_bpmp_channel_cleanup(bpmp->rx_channel);
+cleanup_tx_channel:
+	tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
+free_rx:
+	gen_pool_free(priv->rx.pool, (unsigned long)priv->rx.virt, 4096);
+free_tx:
+	gen_pool_free(priv->tx.pool, (unsigned long)priv->tx.virt, 4096);
+
+	return err;
+}
+
+static void tegra186_bpmp_deinit(struct tegra_bpmp *bpmp)
+{
+	struct tegra186_bpmp *priv = bpmp->priv;
+	unsigned int i;
+
+	mbox_free_channel(priv->mbox.channel);
+
+	for (i = 0; i < bpmp->threaded.count; i++)
+		tegra186_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
+
+	tegra186_bpmp_channel_cleanup(bpmp->rx_channel);
+	tegra186_bpmp_channel_cleanup(bpmp->tx_channel);
+
+	gen_pool_free(priv->rx.pool, (unsigned long)priv->rx.virt, 4096);
+	gen_pool_free(priv->tx.pool, (unsigned long)priv->tx.virt, 4096);
+}
+
+static int tegra186_bpmp_resume(struct tegra_bpmp *bpmp)
+{
+	unsigned int i;
+
+	/* reset message channels */
+	tegra186_bpmp_channel_reset(bpmp->tx_channel);
+	tegra186_bpmp_channel_reset(bpmp->rx_channel);
+
+	for (i = 0; i < bpmp->threaded.count; i++)
+		tegra186_bpmp_channel_reset(&bpmp->threaded_channels[i]);
+
+	return 0;
+}
+
+const struct tegra_bpmp_ops tegra186_bpmp_ops = {
+	.init = tegra186_bpmp_init,
+	.deinit = tegra186_bpmp_deinit,
+	.is_response_ready = tegra186_bpmp_is_message_ready,
+	.is_request_ready = tegra186_bpmp_is_message_ready,
+	.ack_response = tegra186_bpmp_ack_message,
+	.ack_request = tegra186_bpmp_ack_message,
+	.is_response_channel_free = tegra186_bpmp_is_channel_free,
+	.is_request_channel_free = tegra186_bpmp_is_channel_free,
+	.post_response = tegra186_bpmp_post_message,
+	.post_request = tegra186_bpmp_post_message,
+	.ring_doorbell = tegra186_bpmp_ring_doorbell,
+	.resume = tegra186_bpmp_resume,
+};
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index e7fd68ebedd0..2b498deacdbc 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -26,6 +26,8 @@
 #include <soc/tegra/bpmp-abi.h>
 #include <soc/tegra/ivc.h>
 
+#include "bpmp-private.h"
+
 #define MSG_ACK		BIT(0)
 #define MSG_RING	BIT(1)
 #define TAG_SZ		32
@@ -36,6 +38,14 @@ mbox_client_to_bpmp(struct mbox_client *client)
 	return container_of(client, struct tegra_bpmp, mbox.client);
 }
 
+static inline const struct tegra_bpmp_ops *
+channel_to_ops(struct tegra_bpmp_channel *channel)
+{
+	struct tegra_bpmp *bpmp = channel->bpmp;
+
+	return bpmp->soc->ops;
+}
+
 struct tegra_bpmp *tegra_bpmp_get(struct device *dev)
 {
 	struct platform_device *pdev;
@@ -98,22 +108,16 @@ static bool tegra_bpmp_message_valid(const struct tegra_bpmp_message *msg)
 
 static bool tegra_bpmp_is_response_ready(struct tegra_bpmp_channel *channel)
 {
-	void *frame;
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
 
-	frame = tegra_ivc_read_get_next_frame(channel->ivc);
-	if (IS_ERR(frame)) {
-		channel->ib = NULL;
-		return false;
-	}
-
-	channel->ib = frame;
-
-	return true;
+	return ops->is_response_ready(channel);
 }
 
 static bool tegra_bpmp_is_request_ready(struct tegra_bpmp_channel *channel)
 {
-	return tegra_bpmp_is_response_ready(channel);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->is_request_ready(channel);
 }
 
 static int tegra_bpmp_wait_response(struct tegra_bpmp_channel *channel)
@@ -133,34 +137,32 @@ static int tegra_bpmp_wait_response(struct tegra_bpmp_channel *channel)
 
 static int tegra_bpmp_ack_response(struct tegra_bpmp_channel *channel)
 {
-	return tegra_ivc_read_advance(channel->ivc);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->ack_response(channel);
 }
 
 static int tegra_bpmp_ack_request(struct tegra_bpmp_channel *channel)
 {
-	return tegra_ivc_read_advance(channel->ivc);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->ack_request(channel);
 }
 
 static bool
 tegra_bpmp_is_request_channel_free(struct tegra_bpmp_channel *channel)
 {
-	void *frame;
-
-	frame = tegra_ivc_write_get_next_frame(channel->ivc);
-	if (IS_ERR(frame)) {
-		channel->ob = NULL;
-		return false;
-	}
-
-	channel->ob = frame;
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
 
-	return true;
+	return ops->is_request_channel_free(channel);
 }
 
 static bool
 tegra_bpmp_is_response_channel_free(struct tegra_bpmp_channel *channel)
 {
-	return tegra_bpmp_is_request_channel_free(channel);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->is_response_channel_free(channel);
 }
 
 static int
@@ -183,28 +185,21 @@ tegra_bpmp_wait_request_channel_free(struct tegra_bpmp_channel *channel)
 
 static int tegra_bpmp_post_request(struct tegra_bpmp_channel *channel)
 {
-	return tegra_ivc_write_advance(channel->ivc);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->post_request(channel);
 }
 
 static int tegra_bpmp_post_response(struct tegra_bpmp_channel *channel)
 {
-	return tegra_ivc_write_advance(channel->ivc);
+	const struct tegra_bpmp_ops *ops = channel_to_ops(channel);
+
+	return ops->post_response(channel);
 }
 
 static int tegra_bpmp_ring_doorbell(struct tegra_bpmp *bpmp)
 {
-	int err;
-
-	if (WARN_ON(bpmp->mbox.channel == NULL))
-		return -EINVAL;
-
-	err = mbox_send_message(bpmp->mbox.channel, NULL);
-	if (err < 0)
-		return err;
-
-	mbox_client_txdone(bpmp->mbox.channel, 0);
-
-	return 0;
+	return bpmp->soc->ops->ring_doorbell(bpmp);
 }
 
 static ssize_t __tegra_bpmp_channel_read(struct tegra_bpmp_channel *channel,
@@ -668,9 +663,8 @@ static void tegra_bpmp_channel_signal(struct tegra_bpmp_channel *channel)
 	complete(&channel->completion);
 }
 
-static void tegra_bpmp_handle_rx(struct mbox_client *client, void *data)
+void tegra_bpmp_handle_rx(struct tegra_bpmp *bpmp)
 {
-	struct tegra_bpmp *bpmp = mbox_client_to_bpmp(client);
 	struct tegra_bpmp_channel *channel;
 	unsigned int i, count;
 	unsigned long *busy;
@@ -698,66 +692,9 @@ static void tegra_bpmp_handle_rx(struct mbox_client *client, void *data)
 	spin_unlock(&bpmp->lock);
 }
 
-static void tegra_bpmp_ivc_notify(struct tegra_ivc *ivc, void *data)
-{
-	struct tegra_bpmp *bpmp = data;
-
-	WARN_ON(tegra_bpmp_ring_doorbell(bpmp));
-}
-
-static int tegra_bpmp_channel_init(struct tegra_bpmp_channel *channel,
-				   struct tegra_bpmp *bpmp,
-				   unsigned int index)
-{
-	size_t message_size, queue_size;
-	unsigned int offset;
-	int err;
-
-	channel->ivc = devm_kzalloc(bpmp->dev, sizeof(*channel->ivc),
-				    GFP_KERNEL);
-	if (!channel->ivc)
-		return -ENOMEM;
-
-	message_size = tegra_ivc_align(MSG_MIN_SZ);
-	queue_size = tegra_ivc_total_queue_size(message_size);
-	offset = queue_size * index;
-
-	err = tegra_ivc_init(channel->ivc, NULL,
-			     bpmp->rx.virt + offset, bpmp->rx.phys + offset,
-			     bpmp->tx.virt + offset, bpmp->tx.phys + offset,
-			     1, message_size, tegra_bpmp_ivc_notify,
-			     bpmp);
-	if (err < 0) {
-		dev_err(bpmp->dev, "failed to setup IVC for channel %u: %d\n",
-			index, err);
-		return err;
-	}
-
-	init_completion(&channel->completion);
-	channel->bpmp = bpmp;
-
-	return 0;
-}
-
-static void tegra_bpmp_channel_reset(struct tegra_bpmp_channel *channel)
-{
-	/* reset the channel state */
-	tegra_ivc_reset(channel->ivc);
-
-	/* sync the channel state with BPMP */
-	while (tegra_ivc_notified(channel->ivc))
-		;
-}
-
-static void tegra_bpmp_channel_cleanup(struct tegra_bpmp_channel *channel)
-{
-	tegra_ivc_cleanup(channel->ivc);
-}
-
 static int tegra_bpmp_probe(struct platform_device *pdev)
 {
 	struct tegra_bpmp *bpmp;
-	unsigned int i;
 	char tag[TAG_SZ];
 	size_t size;
 	int err;
@@ -769,32 +706,6 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
 	bpmp->soc = of_device_get_match_data(&pdev->dev);
 	bpmp->dev = &pdev->dev;
 
-	bpmp->tx.pool = of_gen_pool_get(pdev->dev.of_node, "shmem", 0);
-	if (!bpmp->tx.pool) {
-		dev_err(&pdev->dev, "TX shmem pool not found\n");
-		return -ENOMEM;
-	}
-
-	bpmp->tx.virt = gen_pool_dma_alloc(bpmp->tx.pool, 4096, &bpmp->tx.phys);
-	if (!bpmp->tx.virt) {
-		dev_err(&pdev->dev, "failed to allocate from TX pool\n");
-		return -ENOMEM;
-	}
-
-	bpmp->rx.pool = of_gen_pool_get(pdev->dev.of_node, "shmem", 1);
-	if (!bpmp->rx.pool) {
-		dev_err(&pdev->dev, "RX shmem pool not found\n");
-		err = -ENOMEM;
-		goto free_tx;
-	}
-
-	bpmp->rx.virt = gen_pool_dma_alloc(bpmp->rx.pool, 4096, &bpmp->rx.phys);
-	if (!bpmp->rx.virt) {
-		dev_err(&pdev->dev, "failed to allocate from RX pool\n");
-		err = -ENOMEM;
-		goto free_tx;
-	}
-
 	INIT_LIST_HEAD(&bpmp->mrqs);
 	spin_lock_init(&bpmp->lock);
 
@@ -804,81 +715,38 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
 	size = BITS_TO_LONGS(bpmp->threaded.count) * sizeof(long);
 
 	bpmp->threaded.allocated = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
-	if (!bpmp->threaded.allocated) {
-		err = -ENOMEM;
-		goto free_rx;
-	}
+	if (!bpmp->threaded.allocated)
+		return -ENOMEM;
 
 	bpmp->threaded.busy = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
-	if (!bpmp->threaded.busy) {
-		err = -ENOMEM;
-		goto free_rx;
-	}
+	if (!bpmp->threaded.busy)
+		return -ENOMEM;
 
 	spin_lock_init(&bpmp->atomic_tx_lock);
 	bpmp->tx_channel = devm_kzalloc(&pdev->dev, sizeof(*bpmp->tx_channel),
 					GFP_KERNEL);
-	if (!bpmp->tx_channel) {
-		err = -ENOMEM;
-		goto free_rx;
-	}
+	if (!bpmp->tx_channel)
+		return -ENOMEM;
 
 	bpmp->rx_channel = devm_kzalloc(&pdev->dev, sizeof(*bpmp->rx_channel),
 	                                GFP_KERNEL);
-	if (!bpmp->rx_channel) {
-		err = -ENOMEM;
-		goto free_rx;
-	}
+	if (!bpmp->rx_channel)
+		return -ENOMEM;
 
 	bpmp->threaded_channels = devm_kcalloc(&pdev->dev, bpmp->threaded.count,
 					       sizeof(*bpmp->threaded_channels),
 					       GFP_KERNEL);
-	if (!bpmp->threaded_channels) {
-		err = -ENOMEM;
-		goto free_rx;
-	}
-
-	err = tegra_bpmp_channel_init(bpmp->tx_channel, bpmp,
-				      bpmp->soc->channels.cpu_tx.offset);
-	if (err < 0)
-		goto free_rx;
+	if (!bpmp->threaded_channels)
+		return -ENOMEM;
 
-	err = tegra_bpmp_channel_init(bpmp->rx_channel, bpmp,
-				      bpmp->soc->channels.cpu_rx.offset);
+	err = bpmp->soc->ops->init(bpmp);
 	if (err < 0)
-		goto cleanup_tx_channel;
-
-	for (i = 0; i < bpmp->threaded.count; i++) {
-		err = tegra_bpmp_channel_init(
-			&bpmp->threaded_channels[i], bpmp,
-			bpmp->soc->channels.thread.offset + i);
-		if (err < 0)
-			goto cleanup_threaded_channels;
-	}
-
-	/* mbox registration */
-	bpmp->mbox.client.dev = &pdev->dev;
-	bpmp->mbox.client.rx_callback = tegra_bpmp_handle_rx;
-	bpmp->mbox.client.tx_block = false;
-	bpmp->mbox.client.knows_txdone = false;
-
-	bpmp->mbox.channel = mbox_request_channel(&bpmp->mbox.client, 0);
-	if (IS_ERR(bpmp->mbox.channel)) {
-		err = PTR_ERR(bpmp->mbox.channel);
-		dev_err(&pdev->dev, "failed to get HSP mailbox: %d\n", err);
-		goto cleanup_threaded_channels;
-	}
-
-	/* reset message channels */
-	tegra_bpmp_channel_reset(bpmp->tx_channel);
-	tegra_bpmp_channel_reset(bpmp->rx_channel);
-	for (i = 0; i < bpmp->threaded.count; i++)
-		tegra_bpmp_channel_reset(&bpmp->threaded_channels[i]);
+		return err;
 
 	err = tegra_bpmp_request_mrq(bpmp, MRQ_PING,
 				     tegra_bpmp_mrq_handle_ping, bpmp);
 	if (err < 0)
-		goto free_mbox;
+		goto deinit;
 
 	err = tegra_bpmp_ping(bpmp);
 	if (err < 0) {
@@ -920,37 +788,21 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
 
 free_mrq:
 	tegra_bpmp_free_mrq(bpmp, MRQ_PING, bpmp);
-free_mbox:
-	mbox_free_channel(bpmp->mbox.channel);
-cleanup_threaded_channels:
-	for (i = 0; i < bpmp->threaded.count; i++) {
-		if (bpmp->threaded_channels[i].bpmp)
-			tegra_bpmp_channel_cleanup(&bpmp->threaded_channels[i]);
-	}
+deinit:
+	if (bpmp->soc->ops->deinit)
+		bpmp->soc->ops->deinit(bpmp);
 
-	tegra_bpmp_channel_cleanup(bpmp->rx_channel);
-cleanup_tx_channel:
-	tegra_bpmp_channel_cleanup(bpmp->tx_channel);
-free_rx:
-	gen_pool_free(bpmp->rx.pool, (unsigned long)bpmp->rx.virt, 4096);
-free_tx:
-	gen_pool_free(bpmp->tx.pool, (unsigned long)bpmp->tx.virt, 4096);
 	return err;
 }
 
 static int __maybe_unused tegra_bpmp_resume(struct device *dev)
 {
 	struct tegra_bpmp *bpmp = dev_get_drvdata(dev);
-	unsigned int i;
-
-	/* reset message channels */
-	tegra_bpmp_channel_reset(bpmp->tx_channel);
-	tegra_bpmp_channel_reset(bpmp->rx_channel);
-
-	for (i = 0; i < bpmp->threaded.count; i++)
-		tegra_bpmp_channel_reset(&bpmp->threaded_channels[i]);
 
-	return 0;
+	if (bpmp->soc->ops->resume)
+		return bpmp->soc->ops->resume(bpmp);
+	else
+		return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(tegra_bpmp_pm_ops, NULL, tegra_bpmp_resume);
@@ -971,6 +823,7 @@ static const struct tegra_bpmp_soc tegra186_soc = {
 			.timeout = 0,
 		},
 	},
+	.ops = &tegra186_bpmp_ops,
 	.num_resets = 193,
 };
 
diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
index b02f926a0216..bdd1bd107aba 100644
--- a/include/soc/tegra/bpmp.h
+++ b/include/soc/tegra/bpmp.h
@@ -23,6 +23,7 @@
 #include <soc/tegra/bpmp-abi.h>
 
 struct tegra_bpmp_clk;
+struct tegra_bpmp_ops;
 
 struct tegra_bpmp_soc {
 	struct {
@@ -32,6 +33,8 @@ struct tegra_bpmp_soc {
 			unsigned int timeout;
 		} cpu_tx, thread, cpu_rx;
 	} channels;
+
+	const struct tegra_bpmp_ops *ops;
 	unsigned int num_resets;
 };
 
@@ -63,12 +66,7 @@ struct tegra_bpmp_mrq {
 struct tegra_bpmp {
 	const struct tegra_bpmp_soc *soc;
 	struct device *dev;
-
-	struct {
-		struct gen_pool *pool;
-		dma_addr_t phys;
-		void *virt;
-	} tx, rx;
+	void *priv;
 
 	struct {
 		struct mbox_client client;
@@ -173,6 +171,8 @@ static inline bool tegra_bpmp_mrq_is_supported(struct tegra_bpmp *bpmp,
 }
 #endif
 
+void tegra_bpmp_handle_rx(struct tegra_bpmp *bpmp);
+
 #if IS_ENABLED(CONFIG_CLK_TEGRA_BPMP)
 int tegra_bpmp_init_clocks(struct tegra_bpmp *bpmp);
 #else
-- 
cgit v1.2.3


From 139251fc220830cc49b71331d281a8ad03a08ab7 Mon Sep 17 00:00:00 2001
From: Timo Alho <talho@nvidia.com>
Date: Thu, 24 Jan 2019 19:03:54 +0200
Subject: firmware: tegra: add bpmp driver for Tegra210

This patch adds driver for Tegra210 BPMP firmware.

The BPMP is a specific processor in Tegra210 chip, which runs firmware
for assisting in entering deep low power states (suspend to ram), and
offloading DRAM memory clock scaling on some platforms.

Based on work by Sivaram Nair <sivaramn@nvidia.com>

Signed-off-by: Timo Alho <talho@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/firmware/tegra/Makefile        |   1 +
 drivers/firmware/tegra/bpmp-private.h  |   1 +
 drivers/firmware/tegra/bpmp-tegra210.c | 243 +++++++++++++++++++++++++++++++++
 drivers/firmware/tegra/bpmp.c          |  46 +++++--
 include/soc/tegra/bpmp.h               |   1 +
 5 files changed, 283 insertions(+), 9 deletions(-)
 create mode 100644 drivers/firmware/tegra/bpmp-tegra210.c

(limited to 'include')

diff --git a/drivers/firmware/tegra/Makefile b/drivers/firmware/tegra/Makefile
index 559355674bca..ba45e58f7647 100644
--- a/drivers/firmware/tegra/Makefile
+++ b/drivers/firmware/tegra/Makefile
@@ -1,4 +1,5 @@
 tegra-bpmp-y			= bpmp.o
+tegra-bpmp-$(CONFIG_ARCH_TEGRA_210_SOC)	+= bpmp-tegra210.o
 tegra-bpmp-$(CONFIG_ARCH_TEGRA_186_SOC)	+= bpmp-tegra186.o
 tegra-bpmp-$(CONFIG_DEBUG_FS)	+= bpmp-debugfs.o
 obj-$(CONFIG_TEGRA_BPMP)	+= tegra-bpmp.o
diff --git a/drivers/firmware/tegra/bpmp-private.h b/drivers/firmware/tegra/bpmp-private.h
index 6fb10f1cfb8f..07c3d46abb87 100644
--- a/drivers/firmware/tegra/bpmp-private.h
+++ b/drivers/firmware/tegra/bpmp-private.h
@@ -24,5 +24,6 @@ struct tegra_bpmp_ops {
 };
 
 extern const struct tegra_bpmp_ops tegra186_bpmp_ops;
+extern const struct tegra_bpmp_ops tegra210_bpmp_ops;
 
 #endif
diff --git a/drivers/firmware/tegra/bpmp-tegra210.c b/drivers/firmware/tegra/bpmp-tegra210.c
new file mode 100644
index 000000000000..ae15940a078e
--- /dev/null
+++ b/drivers/firmware/tegra/bpmp-tegra210.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <soc/tegra/bpmp.h>
+
+#include "bpmp-private.h"
+
+#define TRIGGER_OFFSET		0x000
+#define RESULT_OFFSET(id)	(0xc00 + id * 4)
+#define TRIGGER_ID_SHIFT	16
+#define TRIGGER_CMD_GET		4
+
+#define STA_OFFSET		0
+#define SET_OFFSET		4
+#define CLR_OFFSET		8
+
+#define CH_MASK(ch)	(0x3 << ((ch) * 2))
+#define SL_SIGL(ch)	(0x0 << ((ch) * 2))
+#define SL_QUED(ch)	(0x1 << ((ch) * 2))
+#define MA_FREE(ch)	(0x2 << ((ch) * 2))
+#define MA_ACKD(ch)	(0x3 << ((ch) * 2))
+
+struct tegra210_bpmp {
+	void __iomem *atomics;
+	void __iomem *arb_sema;
+	struct irq_data *tx_irq_data;
+};
+
+static u32 bpmp_channel_status(struct tegra_bpmp *bpmp, unsigned int index)
+{
+	struct tegra210_bpmp *priv = bpmp->priv;
+
+	return __raw_readl(priv->arb_sema + STA_OFFSET) & CH_MASK(index);
+}
+
+static bool tegra210_bpmp_is_response_ready(struct tegra_bpmp_channel *channel)
+{
+	unsigned int index = channel->index;
+
+	return bpmp_channel_status(channel->bpmp, index) == MA_ACKD(index);
+}
+
+static bool tegra210_bpmp_is_request_ready(struct tegra_bpmp_channel *channel)
+{
+	unsigned int index = channel->index;
+
+	return bpmp_channel_status(channel->bpmp, index) == SL_SIGL(index);
+}
+
+static bool
+tegra210_bpmp_is_request_channel_free(struct tegra_bpmp_channel *channel)
+{
+	unsigned int index = channel->index;
+
+	return bpmp_channel_status(channel->bpmp, index) == MA_FREE(index);
+}
+
+static bool
+tegra210_bpmp_is_response_channel_free(struct tegra_bpmp_channel *channel)
+{
+	unsigned int index = channel->index;
+
+	return bpmp_channel_status(channel->bpmp, index) == SL_QUED(index);
+}
+
+static int tegra210_bpmp_post_request(struct tegra_bpmp_channel *channel)
+{
+	struct tegra210_bpmp *priv = channel->bpmp->priv;
+
+	__raw_writel(CH_MASK(channel->index), priv->arb_sema + CLR_OFFSET);
+
+	return 0;
+}
+
+static int tegra210_bpmp_post_response(struct tegra_bpmp_channel *channel)
+{
+	struct tegra210_bpmp *priv = channel->bpmp->priv;
+
+	__raw_writel(MA_ACKD(channel->index), priv->arb_sema + SET_OFFSET);
+
+	return 0;
+}
+
+static int tegra210_bpmp_ack_response(struct tegra_bpmp_channel *channel)
+{
+	struct tegra210_bpmp *priv = channel->bpmp->priv;
+
+	__raw_writel(MA_ACKD(channel->index) ^ MA_FREE(channel->index),
+		     priv->arb_sema + CLR_OFFSET);
+
+	return 0;
+}
+
+static int tegra210_bpmp_ack_request(struct tegra_bpmp_channel *channel)
+{
+	struct tegra210_bpmp *priv = channel->bpmp->priv;
+
+	__raw_writel(SL_QUED(channel->index), priv->arb_sema + SET_OFFSET);
+
+	return 0;
+}
+
+static int tegra210_bpmp_ring_doorbell(struct tegra_bpmp *bpmp)
+{
+	struct tegra210_bpmp *priv = bpmp->priv;
+	struct irq_data *irq_data = priv->tx_irq_data;
+
+	/*
+	 * Tegra Legacy Interrupt Controller (LIC) is used to notify BPMP of
+	 * available messages
+	 */
+	if (irq_data->chip->irq_retrigger)
+		return irq_data->chip->irq_retrigger(irq_data);
+
+	return -EINVAL;
+}
+
+static irqreturn_t rx_irq(int irq, void *data)
+{
+	struct tegra_bpmp *bpmp = data;
+
+	tegra_bpmp_handle_rx(bpmp);
+
+	return IRQ_HANDLED;
+}
+
+static int tegra210_bpmp_channel_init(struct tegra_bpmp_channel *channel,
+				      struct tegra_bpmp *bpmp,
+				      unsigned int index)
+{
+	struct tegra210_bpmp *priv = bpmp->priv;
+	u32 address;
+	void *p;
+
+	/* Retrieve channel base address from BPMP */
+	writel(index << TRIGGER_ID_SHIFT | TRIGGER_CMD_GET,
+	       priv->atomics + TRIGGER_OFFSET);
+	address = readl(priv->atomics + RESULT_OFFSET(index));
+
+	p = devm_ioremap(bpmp->dev, address, 0x80);
+	if (!p)
+		return -ENOMEM;
+
+	channel->ib = p;
+	channel->ob = p;
+	channel->index = index;
+	init_completion(&channel->completion);
+	channel->bpmp = bpmp;
+
+	return 0;
+}
+
+static int tegra210_bpmp_init(struct tegra_bpmp *bpmp)
+{
+	struct platform_device *pdev = to_platform_device(bpmp->dev);
+	struct tegra210_bpmp *priv;
+	struct resource *res;
+	unsigned int i;
+	int err;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	bpmp->priv = priv;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->atomics = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->atomics))
+		return PTR_ERR(priv->atomics);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	priv->arb_sema = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->arb_sema))
+		return PTR_ERR(priv->arb_sema);
+
+	err = tegra210_bpmp_channel_init(bpmp->tx_channel, bpmp,
+					 bpmp->soc->channels.cpu_tx.offset);
+	if (err < 0)
+		return err;
+
+	err = tegra210_bpmp_channel_init(bpmp->rx_channel, bpmp,
+					 bpmp->soc->channels.cpu_rx.offset);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < bpmp->threaded.count; i++) {
+		unsigned int index = bpmp->soc->channels.thread.offset + i;
+
+		err = tegra210_bpmp_channel_init(&bpmp->threaded_channels[i],
+						 bpmp, index);
+		if (err < 0)
+			return err;
+	}
+
+	err = platform_get_irq_byname(pdev, "tx");
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to get TX IRQ: %d\n", err);
+		return err;
+	}
+
+	priv->tx_irq_data = irq_get_irq_data(err);
+	if (!priv->tx_irq_data) {
+		dev_err(&pdev->dev, "failed to get IRQ data for TX IRQ\n");
+		return err;
+	}
+
+	err = platform_get_irq_byname(pdev, "rx");
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to get rx IRQ: %d\n", err);
+		return err;
+	}
+
+	err = devm_request_irq(&pdev->dev, err, rx_irq,
+			       IRQF_NO_SUSPEND, dev_name(&pdev->dev), bpmp);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+const struct tegra_bpmp_ops tegra210_bpmp_ops = {
+	.init = tegra210_bpmp_init,
+	.is_response_ready = tegra210_bpmp_is_response_ready,
+	.is_request_ready = tegra210_bpmp_is_request_ready,
+	.ack_response = tegra210_bpmp_ack_response,
+	.ack_request = tegra210_bpmp_ack_request,
+	.is_response_channel_free = tegra210_bpmp_is_response_channel_free,
+	.is_request_channel_free = tegra210_bpmp_is_request_channel_free,
+	.post_response = tegra210_bpmp_post_response,
+	.post_request = tegra210_bpmp_post_request,
+	.ring_doorbell = tegra210_bpmp_ring_doorbell,
+};
diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c
index 2b498deacdbc..8e3f79959d48 100644
--- a/drivers/firmware/tegra/bpmp.c
+++ b/drivers/firmware/tegra/bpmp.c
@@ -768,17 +768,23 @@ static int tegra_bpmp_probe(struct platform_device *pdev)
 	if (err < 0)
 		goto free_mrq;
 
-	err = tegra_bpmp_init_clocks(bpmp);
-	if (err < 0)
-		goto free_mrq;
+	if (of_find_property(pdev->dev.of_node, "#clock-cells", NULL)) {
+		err = tegra_bpmp_init_clocks(bpmp);
+		if (err < 0)
+			goto free_mrq;
+	}
 
-	err = tegra_bpmp_init_resets(bpmp);
-	if (err < 0)
-		goto free_mrq;
+	if (of_find_property(pdev->dev.of_node, "#reset-cells", NULL)) {
+		err = tegra_bpmp_init_resets(bpmp);
+		if (err < 0)
+			goto free_mrq;
+	}
 
-	err = tegra_bpmp_init_powergates(bpmp);
-	if (err < 0)
-		goto free_mrq;
+	if (of_find_property(pdev->dev.of_node, "#power-domain-cells", NULL)) {
+		err = tegra_bpmp_init_powergates(bpmp);
+		if (err < 0)
+			goto free_mrq;
+	}
 
 	err = tegra_bpmp_init_debugfs(bpmp);
 	if (err < 0)
@@ -827,8 +833,30 @@ static const struct tegra_bpmp_soc tegra186_soc = {
 	.num_resets = 193,
 };
 
+static const struct tegra_bpmp_soc tegra210_soc = {
+	.channels = {
+		.cpu_tx = {
+			.offset = 0,
+			.count = 1,
+			.timeout = 60 * USEC_PER_SEC,
+		},
+		.thread = {
+			.offset = 4,
+			.count = 1,
+			.timeout = 600 * USEC_PER_SEC,
+		},
+		.cpu_rx = {
+			.offset = 8,
+			.count = 1,
+			.timeout = 0,
+		},
+	},
+	.ops = &tegra210_bpmp_ops,
+};
+
 static const struct of_device_id tegra_bpmp_match[] = {
 	{ .compatible = "nvidia,tegra186-bpmp", .data = &tegra186_soc },
+	{ .compatible = "nvidia,tegra210-bpmp", .data = &tegra210_soc },
 	{ }
 };
 
diff --git a/include/soc/tegra/bpmp.h b/include/soc/tegra/bpmp.h
index bdd1bd107aba..45960aa08f4a 100644
--- a/include/soc/tegra/bpmp.h
+++ b/include/soc/tegra/bpmp.h
@@ -50,6 +50,7 @@ struct tegra_bpmp_channel {
 	struct tegra_bpmp_mb_data *ob;
 	struct completion completion;
 	struct tegra_ivc *ivc;
+	unsigned int index;
 };
 
 typedef void (*tegra_bpmp_mrq_handler_t)(unsigned int mrq,
-- 
cgit v1.2.3


From 9359a8cbcc77d4afb07444f341e39015d232bd55 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Jan 2019 16:30:35 +0200
Subject: RAS: Use consistent types for UUIDs

The commit

  297b64c74385 ("ras: acpi / apei: generate trace event for unrecognized CPER section")

brought inconsistency in UUID types which are used across the RAS
subsystem.

Fix this by using guid_t everywhere.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Thomas Tai <thomas.tai@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Tyler Baicar <tbaicar@codeaurora.org>
Link: https://lkml.kernel.org/r/20190125143035.81589-1-andriy.shevchenko@linux.intel.com
---
 drivers/ras/ras.c       | 2 +-
 include/ras/ras_event.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 3f38907320dc..95540ea8dd9d 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -14,7 +14,7 @@
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
-void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
+void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
 			    const char *fru_text, const u8 sev, const u8 *err,
 			    const u32 len)
 {
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index a0794632fd01..36c5c5e38c1d 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -27,7 +27,7 @@
 TRACE_EVENT(extlog_mem_event,
 	TP_PROTO(struct cper_sec_mem_err *mem,
 		 u32 err_seq,
-		 const uuid_le *fru_id,
+		 const guid_t *fru_id,
 		 const char *fru_text,
 		 u8 sev),
 
@@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
 		__field(u8, sev)
 		__field(u64, pa)
 		__field(u8, pa_mask_lsb)
-		__field_struct(uuid_le, fru_id)
+		__field_struct(guid_t, fru_id)
 		__string(fru_text, fru_text)
 		__field_struct(struct cper_mem_err_compact, data)
 	),
@@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
  */
 TRACE_EVENT(non_standard_event,
 
-	TP_PROTO(const uuid_le *sec_type,
-		 const uuid_le *fru_id,
+	TP_PROTO(const guid_t *sec_type,
+		 const guid_t *fru_id,
 		 const char *fru_text,
 		 const u8 sev,
 		 const u8 *err,
-- 
cgit v1.2.3


From 275f22148e8720e84b180d9e0cdf8abfd69bac5b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Dec 2018 22:22:40 +0100
Subject: ipc: rename old-style shmctl/semctl/msgctl syscalls

The behavior of these system calls is slightly different between
architectures, as determined by the CONFIG_ARCH_WANT_IPC_PARSE_VERSION
symbol. Most architectures that implement the split IPC syscalls don't set
that symbol and only get the modern version, but alpha, arm, microblaze,
mips-n32, mips-n64 and xtensa expect the caller to pass the IPC_64 flag.

For the architectures that so far only implement sys_ipc(), i.e. m68k,
mips-o32, powerpc, s390, sh, sparc, and x86-32, we want the new behavior
when adding the split syscalls, so we need to distinguish between the
two groups of architectures.

The method I picked for this distinction is to have a separate system call
entry point: sys_old_*ctl() now uses ipc_parse_version, while sys_*ctl()
does not. The system call tables of the five architectures are changed
accordingly.

As an additional benefit, we no longer need the configuration specific
definition for ipc_parse_version(), it always does the same thing now,
but simply won't get called on architectures with the modern interface.

A small downside is that on architectures that do set
ARCH_WANT_IPC_PARSE_VERSION, we now have an extra set of entry points
that are never called. They only add a few bytes of bloat, so it seems
better to keep them compared to adding yet another Kconfig symbol.
I considered adding new syscall numbers for the IPC_64 variants for
consistency, but decided against that for now.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/kernel/syscalls/syscall.tbl      |  6 ++---
 arch/arm/tools/syscall.tbl                  |  6 ++---
 arch/arm64/include/asm/unistd32.h           |  6 ++---
 arch/microblaze/kernel/syscalls/syscall.tbl |  6 ++---
 arch/mips/kernel/syscalls/syscall_n32.tbl   |  6 ++---
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  6 ++---
 arch/xtensa/kernel/syscalls/syscall.tbl     |  6 ++---
 include/linux/syscalls.h                    |  3 +++
 ipc/msg.c                                   | 39 +++++++++++++++++++++++-----
 ipc/sem.c                                   | 39 +++++++++++++++++++++++-----
 ipc/shm.c                                   | 40 ++++++++++++++++++++++++-----
 ipc/syscall.c                               | 12 ++++-----
 ipc/util.h                                  | 21 +++++----------
 kernel/sys_ni.c                             |  3 +++
 14 files changed, 137 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index f920b65e8c49..b0e247287908 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -174,17 +174,17 @@
 187	common	osf_alt_sigpending		sys_ni_syscall
 188	common	osf_alt_setsid			sys_ni_syscall
 199	common	osf_swapon			sys_swapon
-200	common	msgctl				sys_msgctl
+200	common	msgctl				sys_old_msgctl
 201	common	msgget				sys_msgget
 202	common	msgrcv				sys_msgrcv
 203	common	msgsnd				sys_msgsnd
-204	common	semctl				sys_semctl
+204	common	semctl				sys_old_semctl
 205	common	semget				sys_semget
 206	common	semop				sys_semop
 207	common	osf_utsname			sys_osf_utsname
 208	common	lchown				sys_lchown
 209	common	shmat				sys_shmat
-210	common	shmctl				sys_shmctl
+210	common	shmctl				sys_old_shmctl
 211	common	shmdt				sys_shmdt
 212	common	shmget				sys_shmget
 213	common	osf_mvalid			sys_ni_syscall
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 20ed7e026723..b54b7f2bc24a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -314,15 +314,15 @@
 297	common	recvmsg			sys_recvmsg
 298	common	semop			sys_semop		sys_oabi_semop
 299	common	semget			sys_semget
-300	common	semctl			sys_semctl
+300	common	semctl			sys_old_semctl
 301	common	msgsnd			sys_msgsnd
 302	common	msgrcv			sys_msgrcv
 303	common	msgget			sys_msgget
-304	common	msgctl			sys_msgctl
+304	common	msgctl			sys_old_msgctl
 305	common	shmat			sys_shmat
 306	common	shmdt			sys_shmdt
 307	common	shmget			sys_shmget
-308	common	shmctl			sys_shmctl
+308	common	shmctl			sys_old_shmctl
 309	common	add_key			sys_add_key
 310	common	request_key		sys_request_key
 311	common	keyctl			sys_keyctl
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 8ca1d4c304f4..d10cce69a4b0 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -622,7 +622,7 @@ __SYSCALL(__NR_semop, sys_semop)
 #define __NR_semget 299
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 300
-__SYSCALL(__NR_semctl, compat_sys_semctl)
+__SYSCALL(__NR_semctl, compat_sys_old_semctl)
 #define __NR_msgsnd 301
 __SYSCALL(__NR_msgsnd, compat_sys_msgsnd)
 #define __NR_msgrcv 302
@@ -630,7 +630,7 @@ __SYSCALL(__NR_msgrcv, compat_sys_msgrcv)
 #define __NR_msgget 303
 __SYSCALL(__NR_msgget, sys_msgget)
 #define __NR_msgctl 304
-__SYSCALL(__NR_msgctl, compat_sys_msgctl)
+__SYSCALL(__NR_msgctl, compat_sys_old_msgctl)
 #define __NR_shmat 305
 __SYSCALL(__NR_shmat, compat_sys_shmat)
 #define __NR_shmdt 306
@@ -638,7 +638,7 @@ __SYSCALL(__NR_shmdt, sys_shmdt)
 #define __NR_shmget 307
 __SYSCALL(__NR_shmget, sys_shmget)
 #define __NR_shmctl 308
-__SYSCALL(__NR_shmctl, compat_sys_shmctl)
+__SYSCALL(__NR_shmctl, compat_sys_old_shmctl)
 #define __NR_add_key 309
 __SYSCALL(__NR_add_key, sys_add_key)
 #define __NR_request_key 310
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index a24d09e937dd..7cc0f9554da3 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -335,15 +335,15 @@
 325	common	semtimedop			sys_semtimedop
 326	common	timerfd_settime			sys_timerfd_settime
 327	common	timerfd_gettime			sys_timerfd_gettime
-328	common	semctl				sys_semctl
+328	common	semctl				sys_old_semctl
 329	common	semget				sys_semget
 330	common	semop				sys_semop
-331	common	msgctl				sys_msgctl
+331	common	msgctl				sys_old_msgctl
 332	common	msgget				sys_msgget
 333	common	msgrcv				sys_msgrcv
 334	common	msgsnd				sys_msgsnd
 335	common	shmat				sys_shmat
-336	common	shmctl				sys_shmctl
+336	common	shmctl				sys_old_shmctl
 337	common	shmdt				sys_shmdt
 338	common	shmget				sys_shmget
 339	common	signalfd4			sys_signalfd4
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 53d5862649ae..cc134b1211aa 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -37,7 +37,7 @@
 27	n32	madvise				sys_madvise
 28	n32	shmget				sys_shmget
 29	n32	shmat				sys_shmat
-30	n32	shmctl				compat_sys_shmctl
+30	n32	shmctl				compat_sys_old_shmctl
 31	n32	dup				sys_dup
 32	n32	dup2				sys_dup2
 33	n32	pause				sys_pause
@@ -71,12 +71,12 @@
 61	n32	uname				sys_newuname
 62	n32	semget				sys_semget
 63	n32	semop				sys_semop
-64	n32	semctl				compat_sys_semctl
+64	n32	semctl				compat_sys_old_semctl
 65	n32	shmdt				sys_shmdt
 66	n32	msgget				sys_msgget
 67	n32	msgsnd				compat_sys_msgsnd
 68	n32	msgrcv				compat_sys_msgrcv
-69	n32	msgctl				compat_sys_msgctl
+69	n32	msgctl				compat_sys_old_msgctl
 70	n32	fcntl				compat_sys_fcntl
 71	n32	flock				sys_flock
 72	n32	fsync				sys_fsync
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index a8286ccbb66c..af0da757a7b2 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -37,7 +37,7 @@
 27	n64	madvise				sys_madvise
 28	n64	shmget				sys_shmget
 29	n64	shmat				sys_shmat
-30	n64	shmctl				sys_shmctl
+30	n64	shmctl				sys_old_shmctl
 31	n64	dup				sys_dup
 32	n64	dup2				sys_dup2
 33	n64	pause				sys_pause
@@ -71,12 +71,12 @@
 61	n64	uname				sys_newuname
 62	n64	semget				sys_semget
 63	n64	semop				sys_semop
-64	n64	semctl				sys_semctl
+64	n64	semctl				sys_old_semctl
 65	n64	shmdt				sys_shmdt
 66	n64	msgget				sys_msgget
 67	n64	msgsnd				sys_msgsnd
 68	n64	msgrcv				sys_msgrcv
-69	n64	msgctl				sys_msgctl
+69	n64	msgctl				sys_old_msgctl
 70	n64	fcntl				sys_fcntl
 71	n64	flock				sys_flock
 72	n64	fsync				sys_fsync
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 69cf91b03b26..f8befa11b0c4 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -103,7 +103,7 @@
 91	common	madvise				sys_madvise
 92	common	shmget				sys_shmget
 93	common	shmat				xtensa_shmat
-94	common	shmctl				sys_shmctl
+94	common	shmctl				sys_old_shmctl
 95	common	shmdt				sys_shmdt
 # Socket Operations
 96	common	socket				sys_socket
@@ -177,12 +177,12 @@
 161	common	semtimedop			sys_semtimedop
 162	common	semget				sys_semget
 163	common	semop				sys_semop
-164	common	semctl				sys_semctl
+164	common	semctl				sys_old_semctl
 165	common	available165			sys_ni_syscall
 166	common	msgget				sys_msgget
 167	common	msgsnd				sys_msgsnd
 168	common	msgrcv				sys_msgrcv
-169	common	msgctl				sys_msgctl
+169	common	msgctl				sys_old_msgctl
 170	common	available170			sys_ni_syscall
 # File System
 171	common	umount2				sys_umount
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fb63045a0fb6..938d8908b9e0 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -717,6 +717,7 @@ asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqst
 
 /* ipc/msg.c */
 asmlinkage long sys_msgget(key_t key, int msgflg);
+asmlinkage long sys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
 asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
 asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp,
 				size_t msgsz, long msgtyp, int msgflg);
@@ -726,6 +727,7 @@ asmlinkage long sys_msgsnd(int msqid, struct msgbuf __user *msgp,
 /* ipc/sem.c */
 asmlinkage long sys_semget(key_t key, int nsems, int semflg);
 asmlinkage long sys_semctl(int semid, int semnum, int cmd, unsigned long arg);
+asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg);
 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops,
 				unsigned nsops,
 				const struct __kernel_timespec __user *timeout);
@@ -734,6 +736,7 @@ asmlinkage long sys_semop(int semid, struct sembuf __user *sops,
 
 /* ipc/shm.c */
 asmlinkage long sys_shmget(key_t key, size_t size, int flag);
+asmlinkage long sys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg);
 asmlinkage long sys_shmdt(char __user *shmaddr);
diff --git a/ipc/msg.c b/ipc/msg.c
index 0833c6405915..8dec945fa030 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -567,9 +567,8 @@ out_unlock:
 	return err;
 }
 
-long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
+static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int version)
 {
-	int version;
 	struct ipc_namespace *ns;
 	struct msqid64_ds msqid64;
 	int err;
@@ -577,7 +576,6 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 	if (msqid < 0 || cmd < 0)
 		return -EINVAL;
 
-	version = ipc_parse_version(&cmd);
 	ns = current->nsproxy->ipc_ns;
 
 	switch (cmd) {
@@ -613,9 +611,23 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 
 SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
 {
-	return ksys_msgctl(msqid, cmd, buf);
+	return ksys_msgctl(msqid, cmd, buf, IPC_64);
 }
 
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
+long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
+{
+	int version = ipc_parse_version(&cmd);
+
+	return ksys_msgctl(msqid, cmd, buf, version);
+}
+
+SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
+{
+	return ksys_old_msgctl(msqid, cmd, buf);
+}
+#endif
+
 #ifdef CONFIG_COMPAT
 
 struct compat_msqid_ds {
@@ -689,12 +701,11 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
 	}
 }
 
-long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
+static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int version)
 {
 	struct ipc_namespace *ns;
 	int err;
 	struct msqid64_ds msqid64;
-	int version = compat_ipc_parse_version(&cmd);
 
 	ns = current->nsproxy->ipc_ns;
 
@@ -734,8 +745,22 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
 
 COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
 {
-	return compat_ksys_msgctl(msqid, cmd, uptr);
+	return compat_ksys_msgctl(msqid, cmd, uptr, IPC_64);
 }
+
+#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr)
+{
+	int version = compat_ipc_parse_version(&cmd);
+
+	return compat_ksys_msgctl(msqid, cmd, uptr, version);
+}
+
+COMPAT_SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, void __user *, uptr)
+{
+	return compat_ksys_old_msgctl(msqid, cmd, uptr);
+}
+#endif
 #endif
 
 static int testmsg(struct msg_msg *msg, long type, int mode)
diff --git a/ipc/sem.c b/ipc/sem.c
index 745dc6187e84..d1efff3a81bb 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1634,9 +1634,8 @@ out_up:
 	return err;
 }
 
-long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
+static long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg, int version)
 {
-	int version;
 	struct ipc_namespace *ns;
 	void __user *p = (void __user *)arg;
 	struct semid64_ds semid64;
@@ -1645,7 +1644,6 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
 	if (semid < 0)
 		return -EINVAL;
 
-	version = ipc_parse_version(&cmd);
 	ns = current->nsproxy->ipc_ns;
 
 	switch (cmd) {
@@ -1691,9 +1689,23 @@ long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg)
 
 SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
 {
-	return ksys_semctl(semid, semnum, cmd, arg);
+	return ksys_semctl(semid, semnum, cmd, arg, IPC_64);
 }
 
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
+long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg)
+{
+	int version = ipc_parse_version(&cmd);
+
+	return ksys_semctl(semid, semnum, cmd, arg, version);
+}
+
+SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
+{
+	return ksys_old_semctl(semid, semnum, cmd, arg);
+}
+#endif
+
 #ifdef CONFIG_COMPAT
 
 struct compat_semid_ds {
@@ -1744,12 +1756,11 @@ static int copy_compat_semid_to_user(void __user *buf, struct semid64_ds *in,
 	}
 }
 
-long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
+static long compat_ksys_semctl(int semid, int semnum, int cmd, int arg, int version)
 {
 	void __user *p = compat_ptr(arg);
 	struct ipc_namespace *ns;
 	struct semid64_ds semid64;
-	int version = compat_ipc_parse_version(&cmd);
 	int err;
 
 	ns = current->nsproxy->ipc_ns;
@@ -1792,8 +1803,22 @@ long compat_ksys_semctl(int semid, int semnum, int cmd, int arg)
 
 COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg)
 {
-	return compat_ksys_semctl(semid, semnum, cmd, arg);
+	return compat_ksys_semctl(semid, semnum, cmd, arg, IPC_64);
 }
+
+#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg)
+{
+	int version = compat_ipc_parse_version(&cmd);
+
+	return compat_ksys_semctl(semid, semnum, cmd, arg, version);
+}
+
+COMPAT_SYSCALL_DEFINE4(old_semctl, int, semid, int, semnum, int, cmd, int, arg)
+{
+	return compat_ksys_old_semctl(semid, semnum, cmd, arg);
+}
+#endif
 #endif
 
 /* If the task doesn't already have a undo_list, then allocate one
diff --git a/ipc/shm.c b/ipc/shm.c
index 0842411cb0e9..ce1ca9f7c6e9 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1137,16 +1137,15 @@ out_unlock1:
 	return err;
 }
 
-long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
+static long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf, int version)
 {
-	int err, version;
+	int err;
 	struct ipc_namespace *ns;
 	struct shmid64_ds sem64;
 
 	if (cmd < 0 || shmid < 0)
 		return -EINVAL;
 
-	version = ipc_parse_version(&cmd);
 	ns = current->nsproxy->ipc_ns;
 
 	switch (cmd) {
@@ -1194,8 +1193,22 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
 
 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 {
-	return ksys_shmctl(shmid, cmd, buf);
+	return ksys_shmctl(shmid, cmd, buf, IPC_64);
+}
+
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
+long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
+{
+	int version = ipc_parse_version(&cmd);
+
+	return ksys_shmctl(shmid, cmd, buf, version);
+}
+
+SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+{
+	return ksys_old_shmctl(shmid, cmd, buf);
 }
+#endif
 
 #ifdef CONFIG_COMPAT
 
@@ -1319,11 +1332,10 @@ static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
 	}
 }
 
-long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
+long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr, int version)
 {
 	struct ipc_namespace *ns;
 	struct shmid64_ds sem64;
-	int version = compat_ipc_parse_version(&cmd);
 	int err;
 
 	ns = current->nsproxy->ipc_ns;
@@ -1378,8 +1390,22 @@ long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
 
 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
 {
-	return compat_ksys_shmctl(shmid, cmd, uptr);
+	return compat_ksys_shmctl(shmid, cmd, uptr, IPC_64);
 }
+
+#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr)
+{
+	int version = compat_ipc_parse_version(&cmd);
+
+	return compat_ksys_shmctl(shmid, cmd, uptr, version);
+}
+
+COMPAT_SYSCALL_DEFINE3(old_shmctl, int, shmid, int, cmd, void __user *, uptr)
+{
+	return compat_ksys_old_shmctl(shmid, cmd, uptr);
+}
+#endif
 #endif
 
 /*
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 3cf8ad703a4d..581bdff4e7c5 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -47,7 +47,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second,
 			return -EINVAL;
 		if (get_user(arg, (unsigned long __user *) ptr))
 			return -EFAULT;
-		return ksys_semctl(first, second, third, arg);
+		return ksys_old_semctl(first, second, third, arg);
 	}
 
 	case MSGSND:
@@ -75,7 +75,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second,
 	case MSGGET:
 		return ksys_msgget((key_t) first, second);
 	case MSGCTL:
-		return ksys_msgctl(first, second,
+		return ksys_old_msgctl(first, second,
 				   (struct msqid_ds __user *)ptr);
 
 	case SHMAT:
@@ -100,7 +100,7 @@ int ksys_ipc(unsigned int call, int first, unsigned long second,
 	case SHMGET:
 		return ksys_shmget(first, second, third);
 	case SHMCTL:
-		return ksys_shmctl(first, second,
+		return ksys_old_shmctl(first, second,
 				   (struct shmid_ds __user *) ptr);
 	default:
 		return -ENOSYS;
@@ -152,7 +152,7 @@ int compat_ksys_ipc(u32 call, int first, int second,
 			return -EINVAL;
 		if (get_user(pad, (u32 __user *) compat_ptr(ptr)))
 			return -EFAULT;
-		return compat_ksys_semctl(first, second, third, pad);
+		return compat_ksys_old_semctl(first, second, third, pad);
 
 	case MSGSND:
 		return compat_ksys_msgsnd(first, ptr, second, third);
@@ -177,7 +177,7 @@ int compat_ksys_ipc(u32 call, int first, int second,
 	case MSGGET:
 		return ksys_msgget(first, second);
 	case MSGCTL:
-		return compat_ksys_msgctl(first, second, compat_ptr(ptr));
+		return compat_ksys_old_msgctl(first, second, compat_ptr(ptr));
 
 	case SHMAT: {
 		int err;
@@ -196,7 +196,7 @@ int compat_ksys_ipc(u32 call, int first, int second,
 	case SHMGET:
 		return ksys_shmget(first, (unsigned int)second, third);
 	case SHMCTL:
-		return compat_ksys_shmctl(first, second, compat_ptr(ptr));
+		return compat_ksys_old_shmctl(first, second, compat_ptr(ptr));
 	}
 
 	return -ENOSYS;
diff --git a/ipc/util.h b/ipc/util.h
index d768fdbed515..e272be622ae7 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -160,10 +160,7 @@ static inline void ipc_update_pid(struct pid **pos, struct pid *pid)
 	}
 }
 
-#ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
-/* On IA-64, we always use the "64-bit version" of the IPC structures.  */
-# define ipc_parse_version(cmd)	IPC_64
-#else
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
 int ipc_parse_version(int *cmd);
 #endif
 
@@ -246,13 +243,9 @@ int get_compat_ipc64_perm(struct ipc64_perm *,
 
 static inline int compat_ipc_parse_version(int *cmd)
 {
-#ifdef	CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	int version = *cmd & IPC_64;
 	*cmd &= ~IPC_64;
 	return version;
-#else
-	return IPC_64;
-#endif
 }
 #endif
 
@@ -261,29 +254,29 @@ long ksys_semtimedop(int semid, struct sembuf __user *tsops,
 		     unsigned int nsops,
 		     const struct __kernel_timespec __user *timeout);
 long ksys_semget(key_t key, int nsems, int semflg);
-long ksys_semctl(int semid, int semnum, int cmd, unsigned long arg);
+long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg);
 long ksys_msgget(key_t key, int msgflg);
-long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
+long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
 long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
 		 long msgtyp, int msgflg);
 long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz,
 		 int msgflg);
 long ksys_shmget(key_t key, size_t size, int shmflg);
 long ksys_shmdt(char __user *shmaddr);
-long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
+long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 
 /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
 			    const struct old_timespec32 __user *timeout);
 #ifdef CONFIG_COMPAT
-long compat_ksys_semctl(int semid, int semnum, int cmd, int arg);
-long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr);
+long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg);
+long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr);
 long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
 			compat_long_t msgtyp, int msgflg);
 long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp,
 		       compat_ssize_t msgsz, int msgflg);
-long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr);
+long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr);
 #endif /* CONFIG_COMPAT */
 
 #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index bc934f31ab10..ce04431a40d1 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -197,6 +197,7 @@ COND_SYSCALL_COMPAT(mq_getsetattr);
 
 /* ipc/msg.c */
 COND_SYSCALL(msgget);
+COND_SYSCALL(old_msgctl);
 COND_SYSCALL(msgctl);
 COND_SYSCALL_COMPAT(msgctl);
 COND_SYSCALL(msgrcv);
@@ -206,6 +207,7 @@ COND_SYSCALL_COMPAT(msgsnd);
 
 /* ipc/sem.c */
 COND_SYSCALL(semget);
+COND_SYSCALL(old_semctl);
 COND_SYSCALL(semctl);
 COND_SYSCALL_COMPAT(semctl);
 COND_SYSCALL(semtimedop);
@@ -214,6 +216,7 @@ COND_SYSCALL(semop);
 
 /* ipc/shm.c */
 COND_SYSCALL(shmget);
+COND_SYSCALL(old_shmctl);
 COND_SYSCALL(shmctl);
 COND_SYSCALL_COMPAT(shmctl);
 COND_SYSCALL(shmat);
-- 
cgit v1.2.3


From 0d6040d4681735dfc47565de288525de405a5c99 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 31 Dec 2018 14:38:26 +0100
Subject: arch: add split IPC system calls where needed

The IPC system call handling is highly inconsistent across architectures,
some use sys_ipc, some use separate calls, and some use both.  We also
have some architectures that require passing IPC_64 in the flags, and
others that set it implicitly.

For the addition of a y2038 safe semtimedop() system call, I chose to only
support the separate entry points, but that requires first supporting
the regular ones with their own syscall numbers.

The IPC_64 is now implied by the new semctl/shmctl/msgctl system
calls even on the architectures that require passing it with the ipc()
multiplexer.

I'm not adding the new semtimedop() or semop() on 32-bit architectures,
those will get implemented using the new semtimedop_time64() version
that gets added along with the other time64 calls.
Three 64-bit architectures (powerpc, s390 and sparc) get semtimedop().

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/m68k/kernel/syscalls/syscall.tbl     | 11 +++++++++++
 arch/mips/kernel/syscalls/syscall_o32.tbl | 11 +++++++++++
 arch/powerpc/kernel/syscalls/syscall.tbl  | 13 +++++++++++++
 arch/s390/kernel/syscalls/syscall.tbl     | 12 ++++++++++++
 arch/sh/kernel/syscalls/syscall.tbl       | 11 +++++++++++
 arch/sparc/kernel/syscalls/syscall.tbl    | 12 ++++++++++++
 arch/x86/entry/syscalls/syscall_32.tbl    | 11 +++++++++++
 arch/x86/entry/syscalls/syscall_64.tbl    |  2 ++
 include/uapi/asm-generic/unistd.h         |  1 +
 9 files changed, 84 insertions(+)

(limited to 'include')

diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 85779d6ef935..5354ba02eed2 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -388,3 +388,14 @@
 378	common	pwritev2			sys_pwritev2
 379	common	statx				sys_statx
 380	common	seccomp				sys_seccomp
+# room for arch specific calls
+393	common	semget				sys_semget
+394	common	semctl				sys_semctl
+395	common	shmget				sys_shmget
+396	common	shmctl				sys_shmctl
+397	common	shmat				sys_shmat
+398	common	shmdt				sys_shmdt
+399	common	msgget				sys_msgget
+400	common	msgsnd				sys_msgsnd
+401	common	msgrcv				sys_msgrcv
+402	common	msgctl				sys_msgctl
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 3d5a47b80d2b..fa47ea8cc6ef 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -380,3 +380,14 @@
 366	o32	statx				sys_statx
 367	o32	rseq				sys_rseq
 368	o32	io_pgetevents			sys_io_pgetevents		compat_sys_io_pgetevents
+# room for arch specific calls
+393	o32	semget				sys_semget
+394	o32	semctl				sys_semctl			compat_sys_semctl
+395	o32	shmget				sys_shmget
+396	o32	shmctl				sys_shmctl			compat_sys_shmctl
+397	o32	shmat				sys_shmat			compat_sys_shmat
+398	o32	shmdt				sys_shmdt
+399	o32	msgget				sys_msgget
+400	o32	msgsnd				sys_msgsnd			compat_sys_msgsnd
+401	o32	msgrcv				sys_msgrcv			compat_sys_msgrcv
+402	o32	msgctl				sys_msgctl			compat_sys_msgctl
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index db3bbb8744af..7555874ce39c 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -414,6 +414,7 @@
 363	spu	switch_endian			sys_ni_syscall
 364	common	userfaultfd			sys_userfaultfd
 365	common	membarrier			sys_membarrier
+# 366-377 originally left for IPC, now unused
 378	nospu	mlock2				sys_mlock2
 379	nospu	copy_file_range			sys_copy_file_range
 380	common	preadv2				sys_preadv2			compat_sys_preadv2
@@ -425,3 +426,15 @@
 386	nospu	pkey_mprotect			sys_pkey_mprotect
 387	nospu	rseq				sys_rseq
 388	nospu	io_pgetevents			sys_io_pgetevents		compat_sys_io_pgetevents
+# room for arch specific syscalls
+392	64	semtimedop			sys_semtimedop
+393	common	semget				sys_semget
+394	common	semctl				sys_semctl			compat_sys_semctl
+395	common	shmget				sys_shmget
+396	common	shmctl				sys_shmctl			compat_sys_shmctl
+397	common	shmat				sys_shmat			compat_sys_shmat
+398	common	shmdt				sys_shmdt
+399	common	msgget				sys_msgget
+400	common	msgsnd				sys_msgsnd			compat_sys_msgsnd
+401	common	msgrcv				sys_msgrcv			compat_sys_msgrcv
+402	common	msgctl				sys_msgctl			compat_sys_msgctl
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 7413fd318e2a..0bccb01c6202 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -391,3 +391,15 @@
 381  common	kexec_file_load		sys_kexec_file_load		sys_kexec_file_load
 382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
 383  common	rseq			sys_rseq			sys_rseq
+# room for arch specific syscalls
+392	64	semtimedop		sys_semtimedop			-
+393  common	semget			sys_semget			sys_semget
+394  common	semctl			sys_semctl			compat_sys_semctl
+395  common	shmget			sys_shmget			sys_shmget
+396  common	shmctl			sys_shmctl			compat_sys_shmctl
+397  common	shmat			sys_shmat			compat_sys_shmat
+398  common	shmdt			sys_shmdt 			sys_shmdt
+399  common	msgget			sys_msgget			sys_msgget
+400  common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401  common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402  common	msgctl			sys_msgctl			compat_sys_msgctl
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index a70db013dbc7..6d0b84e3ef2d 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -391,3 +391,14 @@
 381	common	preadv2				sys_preadv2
 382	common	pwritev2			sys_pwritev2
 383	common	statx				sys_statx
+# room for arch specific syscalls
+393	common	semget				sys_semget
+394	common	semctl				sys_semctl
+395	common	shmget				sys_shmget
+396	common	shmctl				sys_shmctl
+397	common	shmat				sys_shmat
+398	common	shmdt				sys_shmdt
+399	common	msgget				sys_msgget
+400	common	msgsnd				sys_msgsnd
+401	common	msgrcv				sys_msgrcv
+402	common	msgctl				sys_msgctl
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index c8c77c05ea97..8c9580302422 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -407,3 +407,15 @@
 359	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
 360	common	statx			sys_statx
 361	common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+# room for arch specific syscalls
+392	64	semtimedop			sys_semtimedop
+393	common	semget			sys_semget
+394	common	semctl			sys_semctl			compat_sys_semctl
+395	common	shmget			sys_shmget
+396	common	shmctl			sys_shmctl			compat_sys_shmctl
+397	common	shmat			sys_shmat			compat_sys_shmat
+398	common	shmdt			sys_shmdt
+399	common	msgget			sys_msgget
+400	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402	common	msgctl			sys_msgctl			compat_sys_msgctl
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b3d1..2be1d0eb7754 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,3 +398,14 @@
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
 385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents
 386	i386	rseq			sys_rseq			__ia32_sys_rseq
+# don't use numbers 387 through 392, add new calls at the end
+393	i386	semget			sys_semget    			__ia32_sys_semget
+394	i386	semctl			sys_semctl    			__ia32_compat_sys_semctl
+395	i386	shmget			sys_shmget    			__ia32_sys_shmget
+396	i386	shmctl			sys_shmctl    			__ia32_compat_sys_shmctl
+397	i386	shmat			sys_shmat     			__ia32_compat_sys_shmat
+398	i386	shmdt			sys_shmdt     			__ia32_sys_shmdt
+399	i386	msgget			sys_msgget    			__ia32_sys_msgget
+400	i386	msgsnd			sys_msgsnd    			__ia32_compat_sys_msgsnd
+401	i386	msgrcv			sys_msgrcv    			__ia32_compat_sys_msgrcv
+402	i386	msgctl			sys_msgctl    			__ia32_compat_sys_msgctl
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..9d8128ef50a9 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,8 @@
 332	common	statx			__x64_sys_statx
 333	common	io_pgetevents		__x64_sys_io_pgetevents
 334	common	rseq			__x64_sys_rseq
+# don't use numbers 387 through 423, add new calls after the last
+# 'common' entry
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d90127298f12..509484dbfd5d 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -740,6 +740,7 @@ __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
+/* 295 through 402 are unassigned to sync up with generic numbers, don't use */
 
 #undef __NR_syscalls
 #define __NR_syscalls 295
-- 
cgit v1.2.3


From 4b7d248b3a1de483ffe9d05c1debbf32a544164d Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Jan 2019 17:06:39 -0500
Subject: audit: move loginuid and sessionid from CONFIG_AUDITSYSCALL to
 CONFIG_AUDIT

loginuid and sessionid (and audit_log_session_info) should be part of
CONFIG_AUDIT scope and not CONFIG_AUDITSYSCALL since it is used in
CONFIG_CHANGE, ANOM_LINK, FEATURE_CHANGE (and INTEGRITY_RULE), none of
which are otherwise dependent on AUDITSYSCALL.

Please see github issue
https://github.com/linux-audit/audit-kernel/issues/104

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: tweaked subject line for better grep'ing]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/proc/base.c        |  6 ++--
 include/linux/audit.h | 42 +++++++++++++------------
 include/linux/sched.h |  2 +-
 init/init_task.c      |  2 +-
 kernel/audit.c        | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/auditsc.c      | 84 --------------------------------------------------
 6 files changed, 113 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 633a63462573..a23651ce6960 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1210,7 +1210,7 @@ static const struct file_operations proc_oom_score_adj_operations = {
 	.llseek		= default_llseek,
 };
 
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
 #define TMPBUFLEN 11
 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
@@ -3002,7 +3002,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	ONE("oom_score",  S_IRUGO, proc_oom_score),
 	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
 	REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
 #endif
@@ -3390,7 +3390,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	ONE("oom_score", S_IRUGO, proc_oom_score),
 	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
 	REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
 	REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
 	REG("sessionid",  S_IRUGO, proc_sessionid_operations),
 #endif
diff --git a/include/linux/audit.h b/include/linux/audit.h
index a625c29a2ea2..ecb5d317d6a2 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -159,6 +159,18 @@ extern int		    audit_update_lsm_rules(void);
 extern int audit_rule_change(int type, int seq, void *data, size_t datasz);
 extern int audit_list_rules_send(struct sk_buff *request_skb, int seq);
 
+extern int audit_set_loginuid(kuid_t loginuid);
+
+static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
+{
+	return tsk->loginuid;
+}
+
+static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
+{
+	return tsk->sessionid;
+}
+
 extern u32 audit_enabled;
 #else /* CONFIG_AUDIT */
 static inline __printf(4, 5)
@@ -201,6 +213,17 @@ static inline int audit_log_task_context(struct audit_buffer *ab)
 }
 static inline void audit_log_task_info(struct audit_buffer *ab)
 { }
+
+static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
+{
+	return INVALID_UID;
+}
+
+static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
+{
+	return AUDIT_SID_UNSET;
+}
+
 #define audit_enabled AUDIT_OFF
 #endif /* CONFIG_AUDIT */
 
@@ -323,17 +346,6 @@ static inline void audit_ptrace(struct task_struct *t)
 extern unsigned int audit_serial(void);
 extern int auditsc_get_stamp(struct audit_context *ctx,
 			      struct timespec64 *t, unsigned int *serial);
-extern int audit_set_loginuid(kuid_t loginuid);
-
-static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
-{
-	return tsk->loginuid;
-}
-
-static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
-{
-	return tsk->sessionid;
-}
 
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
@@ -519,14 +531,6 @@ static inline int auditsc_get_stamp(struct audit_context *ctx,
 {
 	return 0;
 }
-static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
-{
-	return INVALID_UID;
-}
-static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
-{
-	return AUDIT_SID_UNSET;
-}
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 89541d248893..f9788bb122c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -886,7 +886,7 @@ struct task_struct {
 	struct callback_head		*task_works;
 
 	struct audit_context		*audit_context;
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
 	kuid_t				loginuid;
 	unsigned int			sessionid;
 #endif
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3be4d7c..39c3109acc1a 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -121,7 +121,7 @@ struct task_struct init_task
 	.thread_pid	= &init_struct_pid,
 	.thread_group	= LIST_HEAD_INIT(init_task.thread_group),
 	.thread_node	= LIST_HEAD_INIT(init_signals.thread_head),
-#ifdef CONFIG_AUDITSYSCALL
+#ifdef CONFIG_AUDIT
 	.loginuid	= INVALID_UID,
 	.sessionid	= AUDIT_SID_UNSET,
 #endif
diff --git a/kernel/audit.c b/kernel/audit.c
index c2a7662cc254..2a32f304223d 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2335,6 +2335,91 @@ void audit_log_link_denied(const char *operation)
 	audit_log_end(ab);
 }
 
+/* global counter which is incremented every time something logs in */
+static atomic_t session_id = ATOMIC_INIT(0);
+
+static int audit_set_loginuid_perm(kuid_t loginuid)
+{
+	/* if we are unset, we don't need privs */
+	if (!audit_loginuid_set(current))
+		return 0;
+	/* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
+	if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
+		return -EPERM;
+	/* it is set, you need permission */
+	if (!capable(CAP_AUDIT_CONTROL))
+		return -EPERM;
+	/* reject if this is not an unset and we don't allow that */
+	if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID)
+				 && uid_valid(loginuid))
+		return -EPERM;
+	return 0;
+}
+
+static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
+				   unsigned int oldsessionid,
+				   unsigned int sessionid, int rc)
+{
+	struct audit_buffer *ab;
+	uid_t uid, oldloginuid, loginuid;
+	struct tty_struct *tty;
+
+	if (!audit_enabled)
+		return;
+
+	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
+	if (!ab)
+		return;
+
+	uid = from_kuid(&init_user_ns, task_uid(current));
+	oldloginuid = from_kuid(&init_user_ns, koldloginuid);
+	loginuid = from_kuid(&init_user_ns, kloginuid),
+	tty = audit_get_tty();
+
+	audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
+	audit_log_task_context(ab);
+	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
+			 oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
+			 oldsessionid, sessionid, !rc);
+	audit_put_tty(tty);
+	audit_log_end(ab);
+}
+
+/**
+ * audit_set_loginuid - set current task's loginuid
+ * @loginuid: loginuid value
+ *
+ * Returns 0.
+ *
+ * Called (set) from fs/proc/base.c::proc_loginuid_write().
+ */
+int audit_set_loginuid(kuid_t loginuid)
+{
+	unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
+	kuid_t oldloginuid;
+	int rc;
+
+	oldloginuid = audit_get_loginuid(current);
+	oldsessionid = audit_get_sessionid(current);
+
+	rc = audit_set_loginuid_perm(loginuid);
+	if (rc)
+		goto out;
+
+	/* are we setting or clearing? */
+	if (uid_valid(loginuid)) {
+		sessionid = (unsigned int)atomic_inc_return(&session_id);
+		if (unlikely(sessionid == AUDIT_SID_UNSET))
+			sessionid = (unsigned int)atomic_inc_return(&session_id);
+	}
+
+	current->sessionid = sessionid;
+	current->loginuid = loginuid;
+out:
+	audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
+	return rc;
+}
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index b585ceb2f7a2..572d247957fb 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1983,90 +1983,6 @@ int auditsc_get_stamp(struct audit_context *ctx,
 	return 1;
 }
 
-/* global counter which is incremented every time something logs in */
-static atomic_t session_id = ATOMIC_INIT(0);
-
-static int audit_set_loginuid_perm(kuid_t loginuid)
-{
-	/* if we are unset, we don't need privs */
-	if (!audit_loginuid_set(current))
-		return 0;
-	/* if AUDIT_FEATURE_LOGINUID_IMMUTABLE means never ever allow a change*/
-	if (is_audit_feature_set(AUDIT_FEATURE_LOGINUID_IMMUTABLE))
-		return -EPERM;
-	/* it is set, you need permission */
-	if (!capable(CAP_AUDIT_CONTROL))
-		return -EPERM;
-	/* reject if this is not an unset and we don't allow that */
-	if (is_audit_feature_set(AUDIT_FEATURE_ONLY_UNSET_LOGINUID) && uid_valid(loginuid))
-		return -EPERM;
-	return 0;
-}
-
-static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid,
-				   unsigned int oldsessionid, unsigned int sessionid,
-				   int rc)
-{
-	struct audit_buffer *ab;
-	uid_t uid, oldloginuid, loginuid;
-	struct tty_struct *tty;
-
-	if (!audit_enabled)
-		return;
-
-	ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN);
-	if (!ab)
-		return;
-
-	uid = from_kuid(&init_user_ns, task_uid(current));
-	oldloginuid = from_kuid(&init_user_ns, koldloginuid);
-	loginuid = from_kuid(&init_user_ns, kloginuid),
-	tty = audit_get_tty();
-
-	audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
-	audit_log_task_context(ab);
-	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
-			 oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
-			 oldsessionid, sessionid, !rc);
-	audit_put_tty(tty);
-	audit_log_end(ab);
-}
-
-/**
- * audit_set_loginuid - set current task's audit_context loginuid
- * @loginuid: loginuid value
- *
- * Returns 0.
- *
- * Called (set) from fs/proc/base.c::proc_loginuid_write().
- */
-int audit_set_loginuid(kuid_t loginuid)
-{
-	unsigned int oldsessionid, sessionid = AUDIT_SID_UNSET;
-	kuid_t oldloginuid;
-	int rc;
-
-	oldloginuid = audit_get_loginuid(current);
-	oldsessionid = audit_get_sessionid(current);
-
-	rc = audit_set_loginuid_perm(loginuid);
-	if (rc)
-		goto out;
-
-	/* are we setting or clearing? */
-	if (uid_valid(loginuid)) {
-		sessionid = (unsigned int)atomic_inc_return(&session_id);
-		if (unlikely(sessionid == AUDIT_SID_UNSET))
-			sessionid = (unsigned int)atomic_inc_return(&session_id);
-	}
-
-	current->sessionid = sessionid;
-	current->loginuid = loginuid;
-out:
-	audit_log_set_loginuid(oldloginuid, loginuid, oldsessionid, sessionid, rc);
-	return rc;
-}
-
 /**
  * __audit_mq_open - record audit data for a POSIX MQ open
  * @oflag: open flag
-- 
cgit v1.2.3


From 2fec30e245a3b46fef89c4cb1f74eefc5fbb29a6 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 23 Jan 2019 21:36:25 -0500
Subject: audit: add support for fcaps v3

V3 namespaced file capabilities were introduced in
commit 8db6c34f1dbc ("Introduce v3 namespaced file capabilities")

Add support for these by adding the "frootid" field to the existing
fcaps fields in the NAME and BPRM_FCAPS records.

Please see github issue
https://github.com/linux-audit/audit-kernel/issues/103

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Serge Hallyn <serge@hallyn.com>
[PM: comment tweak to fit an 80 char line width]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/capability.h | 5 +++--
 kernel/audit.c             | 6 ++++--
 kernel/audit.h             | 1 +
 kernel/auditsc.c           | 4 ++++
 security/commoncap.c       | 2 ++
 5 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index f640dcbc880c..b769330e9380 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -14,7 +14,7 @@
 #define _LINUX_CAPABILITY_H
 
 #include <uapi/linux/capability.h>
-
+#include <linux/uidgid.h>
 
 #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3
 #define _KERNEL_CAPABILITY_U32S    _LINUX_CAPABILITY_U32S_3
@@ -25,11 +25,12 @@ typedef struct kernel_cap_struct {
 	__u32 cap[_KERNEL_CAPABILITY_U32S];
 } kernel_cap_t;
 
-/* exact same as vfs_cap_data but in cpu endian and always filled completely */
+/* same as vfs_ns_cap_data but in cpu endian and always filled completely */
 struct cpu_vfs_cap_data {
 	__u32 magic_etc;
 	kernel_cap_t permitted;
 	kernel_cap_t inheritable;
+	kuid_t rootid;
 };
 
 #define _USER_CAP_HEADER_SIZE  (sizeof(struct __user_cap_header_struct))
diff --git a/kernel/audit.c b/kernel/audit.c
index 2a32f304223d..3f3f1888cac7 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2084,8 +2084,9 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 {
 	audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
 	audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
-	audit_log_format(ab, " cap_fe=%d cap_fver=%x",
-			 name->fcap.fE, name->fcap_ver);
+	audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d",
+			 name->fcap.fE, name->fcap_ver,
+			 from_kuid(&init_user_ns, name->fcap.rootid));
 }
 
 static inline int audit_copy_fcaps(struct audit_names *name,
@@ -2104,6 +2105,7 @@ static inline int audit_copy_fcaps(struct audit_names *name,
 	name->fcap.permitted = caps.permitted;
 	name->fcap.inheritable = caps.inheritable;
 	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	name->fcap.rootid = caps.rootid;
 	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
 				VFS_CAP_REVISION_SHIFT;
 
diff --git a/kernel/audit.h b/kernel/audit.h
index 6ffb70575082..deefdbe61a47 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -69,6 +69,7 @@ struct audit_cap_data {
 		kernel_cap_t	effective;	/* effective set of process */
 	};
 	kernel_cap_t		ambient;
+	kuid_t			rootid;
 };
 
 /* When fs/namei.c:getname() is called, we store the pointer in name and bump
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 572d247957fb..c16beb25fd0a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1358,6 +1358,9 @@ static void audit_log_exit(void)
 			audit_log_cap(ab, "pi", &axs->new_pcap.inheritable);
 			audit_log_cap(ab, "pe", &axs->new_pcap.effective);
 			audit_log_cap(ab, "pa", &axs->new_pcap.ambient);
+			audit_log_format(ab, " frootid=%d",
+					 from_kuid(&init_user_ns,
+						   axs->fcap.rootid));
 			break; }
 
 		}
@@ -2271,6 +2274,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 	ax->fcap.permitted = vcaps.permitted;
 	ax->fcap.inheritable = vcaps.inheritable;
 	ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	ax->fcap.rootid = vcaps.rootid;
 	ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT;
 
 	ax->old_pcap.permitted   = old->cap_permitted;
diff --git a/security/commoncap.c b/security/commoncap.c
index 232db019f051..c097f3568001 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -643,6 +643,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
 	cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
 	cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
 
+	cpu_caps->rootid = rootkuid;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 30e5c2c6bf285d93dee4c45f23da95d7d50b125a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 25 Jan 2019 10:53:23 -0800
Subject: net: Revert devlink health changes.

This reverts the devlink health changes from 9/17/2019,
Jiri wants things to be designed differently and it was
agreed that the easiest way to do this is start from the
beginning again.

Commits reverted:

cb5ccfbe73b389470e1dc11061bb185ef4bc9aec
880ee82f0313453ec5a6cb122866ac057263066b
c7af343b4e33578b7de91786a3f639c8cfa0d97b
ff253fedab961b22117a73ab808fcfa9e6852b50
6f9d56132eb6d2603d4273cfc65bed914ec47acb
fcd852c69d776c0f46c8f79e8e431e5cc6ddc7b7
8a66704a13d9713593342e29b4f0c19762f5746b
12bd0dcefe88782ac1c9fff632958dd1b71d27e5
aba25279c10094c5c97d09c3491ca86d00b4ad5e
ce019faa70f81555fa17ebc1d5a03651f2e7e15a
b8c45a033acc607201588f7665ba84207e5149e0

And the follow-on build fix:

o33a0efa4baecd689da9474ce0e8b673eb6931c60

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-health.txt        |   86 --
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |    2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   18 +-
 .../net/ethernet/mellanox/mlx5/core/en/reporter.h  |   15 -
 .../ethernet/mellanox/mlx5/core/en/reporter_tx.c   |  356 -------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  186 +++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |    2 +-
 include/net/devlink.h                              |  144 ---
 include/trace/events/devlink.h                     |   62 --
 include/uapi/linux/devlink.h                       |   25 -
 net/core/devlink.c                                 | 1058 --------------------
 11 files changed, 169 insertions(+), 1785 deletions(-)
 delete mode 100644 Documentation/networking/devlink-health.txt
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c

(limited to 'include')

diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
deleted file mode 100644
index 1db3fbea0831..000000000000
--- a/Documentation/networking/devlink-health.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-The health mechanism is targeted for Real Time Alerting, in order to know when
-something bad had happened to a PCI device
-- Provide alert debug information
-- Self healing
-- If problem needs vendor support, provide a way to gather all needed debugging
-  information.
-
-The main idea is to unify and centralize driver health reports in the
-generic devlink instance and allow the user to set different
-attributes of the health reporting and recovery procedures.
-
-The devlink health reporter:
-Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
-or unknown (driver specific).
-For each registered health reporter a driver can issue error/health reports
-asynchronously. All health reports handling is done by devlink.
-Device driver can provide specific callbacks for each "health reporter", e.g.
- - Recovery procedures
- - Diagnostics and object dump procedures
- - OOB initial parameters
-Different parts of the driver can register different types of health reporters
-with different handlers.
-
-Once an error is reported, devlink health will do the following actions:
-  * A log is being send to the kernel trace events buffer
-  * Health status and statistics are being updated for the reporter instance
-  * Object dump is being taken and saved at the reporter instance (as long as
-    there is no other dump which is already stored)
-  * Auto recovery attempt is being done. Depends on:
-    - Auto-recovery configuration
-    - Grace period vs. time passed since last recover
-
-The user interface:
-User can access/change each reporter's parameters and driver specific callbacks
-via devlink, e.g per error type (per health reporter)
- - Configure reporter's generic parameters (like: disable/enable auto recovery)
- - Invoke recovery procedure
- - Run diagnostics
- - Object dump
-
-The devlink health interface (via netlink):
-DEVLINK_CMD_HEALTH_REPORTER_GET
-  Retrieves status and configuration info per DEV and reporter.
-DEVLINK_CMD_HEALTH_REPORTER_SET
-  Allows reporter-related configuration setting.
-DEVLINK_CMD_HEALTH_REPORTER_RECOVER
-  Triggers a reporter's recovery procedure.
-DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
-  Retrieves diagnostics data from a reporter on a device.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
-  Retrieves the last stored dump. Devlink health
-  saves a single dump. If an dump is not already stored by the devlink
-  for this reporter, devlink generates a new dump.
-  dump output is defined by the reporter.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
-  Clears the last saved dump file for the specified reporter.
-
-
-                                               netlink
-                                      +--------------------------+
-                                      |                          |
-                                      |            +             |
-                                      |            |             |
-                                      +--------------------------+
-                                                   |request for ops
-                                                   |(diagnose,
- mlx5_core                             devlink     |recover,
-                                                   |dump)
-+--------+                            +--------------------------+
-|        |                            |    reporter|             |
-|        |                            |  +---------v----------+  |
-|        |   ops execution            |  |                    |  |
-|     <----------------------------------+                    |  |
-|        |                            |  |                    |  |
-|        |                            |  + ^------------------+  |
-|        |                            |    | request for ops     |
-|        |                            |    | (recover, dump)     |
-|        |                            |    |                     |
-|        |                            |  +-+------------------+  |
-|        |     health report          |  | health handler     |  |
-|        +------------------------------->                    |  |
-|        |                            |  +--------------------+  |
-|        |     health reporter create |                          |
-|        +---------------------------->                          |
-+--------+                            +--------------------------+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 6bb2a860b15b..9de9abacf7f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -22,7 +22,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 #
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
-		en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o
+		en_selftest.o en/port.o en/monitor_stats.o
 
 #
 # Netdev extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 27e276c9bf84..8fa8fdd30b85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -388,7 +388,10 @@ struct mlx5e_txqsq {
 	struct mlx5e_channel      *channel;
 	int                        txq_ix;
 	u32                        rate_limit;
-	struct work_struct         recover_work;
+	struct mlx5e_txqsq_recover {
+		struct work_struct         recover_work;
+		u64                        last_recover;
+	} recover;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_dma_info {
@@ -679,13 +682,6 @@ struct mlx5e_rss_params {
 	u8	hfunc;
 };
 
-struct mlx5e_modify_sq_param {
-	int curr_state;
-	int next_state;
-	int rl_update;
-	int rl_index;
-};
-
 struct mlx5e_priv {
 	/* priv data path fields - start */
 	struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC];
@@ -741,7 +737,6 @@ struct mlx5e_priv {
 #ifdef CONFIG_MLX5_EN_TLS
 	struct mlx5e_tls          *tls;
 #endif
-	struct devlink_health_reporter *tx_reporter;
 };
 
 struct mlx5e_profile {
@@ -871,11 +866,6 @@ void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 			       struct mlx5e_params *params);
 
-int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
-		    struct mlx5e_modify_sq_param *p);
-void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq);
-void mlx5e_tx_disable_queue(struct netdev_queue *txq);
-
 static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
 {
 	return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) &&
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
deleted file mode 100644
index 2335c5b48820..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies. */
-
-#ifndef __MLX5E_EN_REPORTER_H
-#define __MLX5E_EN_REPORTER_H
-
-#include <linux/mlx5/driver.h>
-#include "en.h"
-
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv);
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq);
-void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq);
-
-#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
deleted file mode 100644
index d9675afbb924..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2018 Mellanox Technologies. */
-
-#include <net/devlink.h>
-#include "reporter.h"
-#include "lib/eq.h"
-
-#define MLX5E_TX_REPORTER_PER_SQ_MAX_LEN 256
-
-struct mlx5e_tx_err_ctx {
-	int (*recover)(struct mlx5e_txqsq *sq);
-	struct mlx5e_txqsq *sq;
-};
-
-static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
-{
-	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
-
-	while (time_before(jiffies, exp_time)) {
-		if (sq->cc == sq->pc)
-			return 0;
-
-		msleep(20);
-	}
-
-	netdev_err(sq->channel->netdev,
-		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
-		   sq->sqn, sq->cc, sq->pc);
-
-	return -ETIMEDOUT;
-}
-
-static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
-{
-	WARN_ONCE(sq->cc != sq->pc,
-		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
-		  sq->sqn, sq->cc, sq->pc);
-	sq->cc = 0;
-	sq->dma_fifo_cc = 0;
-	sq->pc = 0;
-}
-
-static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
-{
-	struct mlx5_core_dev *mdev = sq->channel->mdev;
-	struct net_device *dev = sq->channel->netdev;
-	struct mlx5e_modify_sq_param msp = {0};
-	int err;
-
-	msp.curr_state = curr_state;
-	msp.next_state = MLX5_SQC_STATE_RST;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
-		return err;
-	}
-
-	memset(&msp, 0, sizeof(msp));
-	msp.curr_state = MLX5_SQC_STATE_RST;
-	msp.next_state = MLX5_SQC_STATE_RDY;
-
-	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
-	if (err) {
-		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
-		return err;
-	}
-
-	return 0;
-}
-
-static int mlx5e_tx_reporter_err_cqe_recover(struct mlx5e_txqsq *sq)
-{
-	struct mlx5_core_dev *mdev = sq->channel->mdev;
-	struct net_device *dev = sq->channel->netdev;
-	u8 state;
-	int err;
-
-	if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
-		return 0;
-
-	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
-	if (err) {
-		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
-			   sq->sqn, err);
-		return err;
-	}
-
-	if (state != MLX5_RQC_STATE_ERR) {
-		netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
-		return -EINVAL;
-	}
-
-	mlx5e_tx_disable_queue(sq->txq);
-
-	err = mlx5e_wait_for_sq_flush(sq);
-	if (err)
-		return err;
-
-	/* At this point, no new packets will arrive from the stack as TXQ is
-	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
-	 * pending WQEs.  SQ can safely reset the SQ.
-	 */
-
-	err = mlx5e_sq_to_ready(sq, state);
-	if (err)
-		return err;
-
-	mlx5e_reset_txqsq_cc_pc(sq);
-	sq->stats->recover++;
-	mlx5e_activate_txqsq(sq);
-
-	return 0;
-}
-
-void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq)
-{
-	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-	struct mlx5e_tx_err_ctx err_ctx = {0};
-
-	err_ctx.sq       = sq;
-	err_ctx.recover  = mlx5e_tx_reporter_err_cqe_recover;
-	sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
-
-	devlink_health_report(sq->channel->priv->tx_reporter, err_str,
-			      &err_ctx);
-}
-
-static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq)
-{
-	struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
-	u32 eqe_count;
-
-	netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
-
-	eqe_count = mlx5_eq_poll_irq_disabled(eq);
-	if (!eqe_count) {
-		clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
-		return 1;
-	}
-
-	netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n",
-		   eqe_count, eq->core.eqn);
-	sq->channel->stats->eq_rearm++;
-	return 0;
-}
-
-void mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq)
-{
-	struct mlx5e_tx_err_ctx err_ctx;
-	char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN];
-
-	err_ctx.sq       = sq;
-	err_ctx.recover  = mlx5e_tx_reporter_timeout_recover;
-	sprintf(err_str,
-		"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
-		sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
-		jiffies_to_usecs(jiffies - sq->txq->trans_start));
-	devlink_health_report(sq->channel->priv->tx_reporter, err_str,
-			      &err_ctx);
-}
-
-/* state lock cannot be grabbed within this function.
- * It can cause a dead lock or a read-after-free.
- */
-int mlx5e_tx_reporter_recover_from_ctx(struct mlx5e_tx_err_ctx *err_ctx)
-{
-	return err_ctx->recover(err_ctx->sq);
-}
-
-static int mlx5e_tx_reporter_recover_all(struct mlx5e_priv *priv)
-{
-	int err;
-
-	mutex_lock(&priv->state_lock);
-	mlx5e_close_locked(priv->netdev);
-	err = mlx5e_open_locked(priv->netdev);
-	mutex_unlock(&priv->state_lock);
-
-	return err;
-}
-
-static int mlx5e_tx_reporter_recover(struct devlink_health_reporter *reporter,
-				     void *context)
-{
-	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	struct mlx5e_tx_err_ctx *err_ctx = context;
-
-	return err_ctx ? mlx5e_tx_reporter_recover_from_ctx(err_ctx) :
-			 mlx5e_tx_reporter_recover_all(priv);
-}
-
-static int
-mlx5e_tx_reporter_build_diagnose_output(struct devlink_health_buffer *buffer,
-					u32 sqn, u8 state, u8 stopped)
-{
-	int err, i;
-	int nest = 0;
-	char name[20];
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	sprintf(name, "SQ 0x%x", sqn);
-	err = devlink_health_buffer_put_object_name(buffer, name);
-	if (err)
-		goto buffer_error;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_put_object_name(buffer, "HW state");
-	if (err)
-		goto buffer_error;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_put_value_u8(buffer, state);
-	if (err)
-		goto buffer_error;
-
-	devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE */
-	nest--;
-
-	devlink_health_buffer_nest_end(buffer); /* DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR */
-	nest--;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_put_object_name(buffer, "stopped");
-	if (err)
-		goto buffer_error;
-
-	err = devlink_health_buffer_nest_start(buffer,
-					       DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE);
-	if (err)
-		goto buffer_error;
-	nest++;
-
-	err = devlink_health_buffer_put_value_u8(buffer, stopped);
-	if (err)
-		goto buffer_error;
-
-	for (i = 0; i < nest; i++)
-		devlink_health_buffer_nest_end(buffer);
-
-	return 0;
-
-buffer_error:
-	for (i = 0; i < nest; i++)
-		devlink_health_buffer_nest_cancel(buffer);
-	return err;
-}
-
-static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
-				      struct devlink_health_buffer **buffers_array,
-				      unsigned int buffer_size,
-				      unsigned int num_buffers)
-{
-	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
-	unsigned int buff = 0;
-	int i = 0, err = 0;
-
-	if (buffer_size < MLX5E_TX_REPORTER_PER_SQ_MAX_LEN)
-		return -ENOMEM;
-
-	mutex_lock(&priv->state_lock);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-		mutex_unlock(&priv->state_lock);
-		return 0;
-	}
-
-	while (i < priv->channels.num * priv->channels.params.num_tc) {
-		struct mlx5e_txqsq *sq = priv->txq2sq[i];
-		u8 state;
-
-		err = mlx5_core_query_sq_state(priv->mdev, sq->sqn, &state);
-		if (err)
-			break;
-
-		err = mlx5e_tx_reporter_build_diagnose_output(buffers_array[buff],
-							      sq->sqn, state,
-							      netif_xmit_stopped(sq->txq));
-		if (err) {
-			if (++buff == num_buffers)
-				break;
-		} else {
-			i++;
-		}
-	}
-
-	mutex_unlock(&priv->state_lock);
-	return err;
-}
-
-static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
-		.name = "TX",
-		.recover = mlx5e_tx_reporter_recover,
-		.diagnose_size = MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC *
-				 MLX5E_TX_REPORTER_PER_SQ_MAX_LEN,
-		.diagnose = mlx5e_tx_reporter_diagnose,
-		.dump_size = 0,
-		.dump = NULL,
-};
-
-#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-int mlx5e_tx_reporter_create(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	struct devlink *devlink = priv_to_devlink(mdev);
-
-	priv->tx_reporter =
-		devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops,
-					       MLX5_REPORTER_TX_GRACEFUL_PERIOD,
-					       true, priv);
-	return PTR_ERR_OR_ZERO(priv->tx_reporter);
-}
-
-void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv)
-{
-	devlink_health_reporter_destroy(priv->tx_reporter);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index dee0c8f3d4e9..8cfd2ec7c0a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -51,7 +51,6 @@
 #include "en/xdp.h"
 #include "lib/eq.h"
 #include "en/monitor_stats.h"
-#include "en/reporter.h"
 
 struct mlx5e_rq_param {
 	u32			rqc[MLX5_ST_SZ_DW(rqc)];
@@ -1161,7 +1160,7 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	return 0;
 }
 
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
+static void mlx5e_sq_recover(struct work_struct *work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
 			     struct mlx5e_params *params,
@@ -1183,7 +1182,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
-	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
+	INIT_WORK(&sq->recover.recover_work, mlx5e_sq_recover);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
 	if (mlx5_accel_is_tls_device(c->priv->mdev))
@@ -1271,8 +1270,15 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
 	return err;
 }
 
-int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
-		    struct mlx5e_modify_sq_param *p)
+struct mlx5e_modify_sq_param {
+	int curr_state;
+	int next_state;
+	bool rl_update;
+	int rl_index;
+};
+
+static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
+			   struct mlx5e_modify_sq_param *p)
 {
 	void *in;
 	void *sqc;
@@ -1370,7 +1376,17 @@ err_free_txqsq:
 	return err;
 }
 
-void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
+static void mlx5e_reset_txqsq_cc_pc(struct mlx5e_txqsq *sq)
+{
+	WARN_ONCE(sq->cc != sq->pc,
+		  "SQ 0x%x: cc (0x%x) != pc (0x%x)\n",
+		  sq->sqn, sq->cc, sq->pc);
+	sq->cc = 0;
+	sq->dma_fifo_cc = 0;
+	sq->pc = 0;
+}
+
+static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
 {
 	sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix);
 	clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
@@ -1379,7 +1395,7 @@ void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
 	netif_tx_start_queue(sq->txq);
 }
 
-void mlx5e_tx_disable_queue(struct netdev_queue *txq)
+static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 {
 	__netif_tx_lock_bh(txq);
 	netif_tx_stop_queue(txq);
@@ -1395,7 +1411,7 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
 	/* prevent netif_tx_wake_queue */
 	napi_synchronize(&c->napi);
 
-	mlx5e_tx_disable_queue(sq->txq);
+	netif_tx_disable_queue(sq->txq);
 
 	/* last doorbell out, godspeed .. */
 	if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
@@ -1415,7 +1431,6 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 	struct mlx5_rate_limit rl = {0};
 
 	cancel_work_sync(&sq->dim.work);
-	cancel_work_sync(&sq->recover_work);
 	mlx5e_destroy_sq(mdev, sq->sqn);
 	if (sq->rate_limit) {
 		rl.rate = sq->rate_limit;
@@ -1425,15 +1440,105 @@ static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
 	mlx5e_free_txqsq(sq);
 }
 
-static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
+static int mlx5e_wait_for_sq_flush(struct mlx5e_txqsq *sq)
+{
+	unsigned long exp_time = jiffies + msecs_to_jiffies(2000);
+
+	while (time_before(jiffies, exp_time)) {
+		if (sq->cc == sq->pc)
+			return 0;
+
+		msleep(20);
+	}
+
+	netdev_err(sq->channel->netdev,
+		   "Wait for SQ 0x%x flush timeout (sq cc = 0x%x, sq pc = 0x%x)\n",
+		   sq->sqn, sq->cc, sq->pc);
+
+	return -ETIMEDOUT;
+}
+
+static int mlx5e_sq_to_ready(struct mlx5e_txqsq *sq, int curr_state)
 {
-	struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
-					      recover_work);
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+	struct net_device *dev = sq->channel->netdev;
+	struct mlx5e_modify_sq_param msp = {0};
+	int err;
 
-	if (!sq->channel->priv->tx_reporter)
+	msp.curr_state = curr_state;
+	msp.next_state = MLX5_SQC_STATE_RST;
+
+	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sq->sqn);
+		return err;
+	}
+
+	memset(&msp, 0, sizeof(msp));
+	msp.curr_state = MLX5_SQC_STATE_RST;
+	msp.next_state = MLX5_SQC_STATE_RDY;
+
+	err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
+	if (err) {
+		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sq->sqn);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlx5e_sq_recover(struct work_struct *work)
+{
+	struct mlx5e_txqsq_recover *recover =
+		container_of(work, struct mlx5e_txqsq_recover,
+			     recover_work);
+	struct mlx5e_txqsq *sq = container_of(recover, struct mlx5e_txqsq,
+					      recover);
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+	struct net_device *dev = sq->channel->netdev;
+	u8 state;
+	int err;
+
+	err = mlx5_core_query_sq_state(mdev, sq->sqn, &state);
+	if (err) {
+		netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n",
+			   sq->sqn, err);
+		return;
+	}
+
+	if (state != MLX5_RQC_STATE_ERR) {
+		netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn);
+		return;
+	}
+
+	netif_tx_disable_queue(sq->txq);
+
+	if (mlx5e_wait_for_sq_flush(sq))
 		return;
 
-	mlx5e_tx_reporter_err_cqe(sq);
+	/* If the interval between two consecutive recovers per SQ is too
+	 * short, don't recover to avoid infinite loop of ERR_CQE -> recover.
+	 * If we reached this state, there is probably a bug that needs to be
+	 * fixed. let's keep the queue close and let tx timeout cleanup.
+	 */
+	if (jiffies_to_msecs(jiffies - recover->last_recover) <
+	    MLX5E_SQ_RECOVER_MIN_INTERVAL) {
+		netdev_err(dev, "Recover SQ 0x%x canceled, too many error CQEs\n",
+			   sq->sqn);
+		return;
+	}
+
+	/* At this point, no new packets will arrive from the stack as TXQ is
+	 * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all
+	 * pending WQEs.  SQ can safely reset the SQ.
+	 */
+	if (mlx5e_sq_to_ready(sq, state))
+		return;
+
+	mlx5e_reset_txqsq_cc_pc(sq);
+	sq->stats->recover++;
+	recover->last_recover = jiffies;
+	mlx5e_activate_txqsq(sq);
 }
 
 static int mlx5e_open_icosq(struct mlx5e_channel *c,
@@ -3102,7 +3207,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
 {
 	int tc;
 
-	mlx5e_tx_reporter_destroy(priv);
 	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 }
@@ -4074,14 +4178,31 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	return features;
 }
 
+static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
+					struct mlx5e_txqsq *sq)
+{
+	struct mlx5_eq_comp *eq = sq->cq.mcq.eq;
+	u32 eqe_count;
+
+	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
+
+	eqe_count = mlx5_eq_poll_irq_disabled(eq);
+	if (!eqe_count)
+		return false;
+
+	netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn);
+	sq->channel->stats->eq_rearm++;
+	return true;
+}
+
 static void mlx5e_tx_timeout_work(struct work_struct *work)
 {
 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
 					       tx_timeout_work);
-	int i;
-
-	if (!priv->tx_reporter)
-		return;
+	struct net_device *dev = priv->netdev;
+	bool reopen_channels = false;
+	int i, err;
 
 	rtnl_lock();
 	mutex_lock(&priv->state_lock);
@@ -4090,16 +4211,36 @@ static void mlx5e_tx_timeout_work(struct work_struct *work)
 		goto unlock;
 
 	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
-		struct netdev_queue *dev_queue =
-			netdev_get_tx_queue(priv->netdev, i);
+		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
 		struct mlx5e_txqsq *sq = priv->txq2sq[i];
 
 		if (!netif_xmit_stopped(dev_queue))
 			continue;
 
-		mlx5e_tx_reporter_timeout(sq);
+		netdev_err(dev,
+			   "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
+			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
+			   jiffies_to_usecs(jiffies - dev_queue->trans_start));
+
+		/* If we recover a lost interrupt, most likely TX timeout will
+		 * be resolved, skip reopening channels
+		 */
+		if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
+			clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+			reopen_channels = true;
+		}
 	}
 
+	if (!reopen_channels)
+		goto unlock;
+
+	mlx5e_close_locked(dev);
+	err = mlx5e_open_locked(dev);
+	if (err)
+		netdev_err(priv->netdev,
+			   "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n",
+			   err);
+
 unlock:
 	mutex_unlock(&priv->state_lock);
 	rtnl_unlock();
@@ -4767,7 +4908,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_initialize(priv);
 #endif
-	mlx5e_tx_reporter_create(priv);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index a8e052a5ce36..598ad7e4d5c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -514,7 +514,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 				mlx5e_dump_error_cqe(sq,
 						     (struct mlx5_err_cqe *)cqe);
 				queue_work(cq->channel->priv->wq,
-					   &sq->recover_work);
+					   &sq->recover.recover_work);
 			}
 			stats->cqe_err++;
 		}
diff --git a/include/net/devlink.h b/include/net/devlink.h
index a81a1b7a67d7..67f4293bc970 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -30,7 +30,6 @@ struct devlink {
 	struct list_head param_list;
 	struct list_head region_list;
 	u32 snapshot_id;
-	struct list_head reporter_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -424,36 +423,6 @@ struct devlink_region;
 
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
-struct devlink_health_buffer;
-struct devlink_health_reporter;
-
-/**
- * struct devlink_health_reporter_ops - Reporter operations
- * @name: reporter name
- * dump_size: dump buffer size allocated by the devlink
- * diagnose_size: diagnose buffer size allocated by the devlink
- * recover: callback to recover from reported error
- *          if priv_ctx is NULL, run a full recover
- * dump: callback to dump an object
- *       if priv_ctx is NULL, run a full dump
- * diagnose: callback to diagnose the current status
- */
-
-struct devlink_health_reporter_ops {
-	char *name;
-	unsigned int dump_size;
-	unsigned int diagnose_size;
-	int (*recover)(struct devlink_health_reporter *reporter,
-		       void *priv_ctx);
-	int (*dump)(struct devlink_health_reporter *reporter,
-		    struct devlink_health_buffer **buffers_array,
-		    unsigned int buffer_size, unsigned int num_buffers,
-		    void *priv_ctx);
-	int (*diagnose)(struct devlink_health_reporter *reporter,
-			struct devlink_health_buffer **buffers_array,
-			unsigned int buffer_size, unsigned int num_buffers);
-};
-
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -615,34 +584,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor);
 
-int devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-				     int attrtype);
-void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer);
-void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer);
-int devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-					  char *name);
-int devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-				       u8 value);
-int devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-					u32 value);
-int devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-					u64 value);
-int devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-					   char *name);
-int devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-					 void *data, int len);
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-			       const struct devlink_health_reporter_ops *ops,
-			       u64 graceful_period, bool auto_recover,
-			       void *priv);
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
-int devlink_health_report(struct devlink_health_reporter *reporter,
-			  const char *msg, void *priv_ctx);
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -903,91 +844,6 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 	return 0;
 }
 
-static inline int
-devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-				 int attrtype)
-{
-	return 0;
-}
-
-static inline void
-devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
-{
-}
-
-static inline void
-devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
-{
-}
-
-static inline int
-devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-				      char *name)
-{
-	return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-				   u8 value)
-{
-	return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-				    u32 value)
-{
-	return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-				    u64 value)
-{
-	return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-				       char *name)
-{
-	return 0;
-}
-
-static inline int
-devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-				     void *data, int len)
-{
-	return 0;
-}
-
-static inline struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-			       const struct devlink_health_reporter_ops *ops,
-			       u64 graceful_period, bool auto_recover,
-			       void *priv)
-{
-	return NULL;
-}
-
-static inline void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
-}
-
-static inline void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
-	return NULL;
-}
-
-static inline int
-devlink_health_report(struct devlink_health_reporter *reporter,
-		      const char *msg, void *priv_ctx)
-{
-	return 0;
-}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 7e39d2fc7c75..44acfbca1266 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,65 +46,6 @@ TRACE_EVENT(devlink_hwmsg,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
-TRACE_EVENT(devlink_health_report,
-	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
-		 const char *msg),
-
-	TP_ARGS(devlink, reporter_name, msg),
-
-	TP_STRUCT__entry(
-		__string(bus_name, devlink->dev->bus->name)
-		__string(dev_name, dev_name(devlink->dev))
-		__string(driver_name, devlink->dev->driver->name)
-		__string(reporter_name, msg)
-		__string(msg, msg)
-	),
-
-	TP_fast_assign(
-		__assign_str(bus_name, devlink->dev->bus->name);
-		__assign_str(dev_name, dev_name(devlink->dev));
-		__assign_str(driver_name, devlink->dev->driver->name);
-		__assign_str(reporter_name, reporter_name);
-		__assign_str(msg, msg);
-	),
-
-	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
-		  __get_str(bus_name), __get_str(dev_name),
-		  __get_str(driver_name), __get_str(reporter_name),
-		  __get_str(msg))
-);
-
-TRACE_EVENT(devlink_health_recover_aborted,
-	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
-		 bool health_state, u64 time_since_last_recover),
-
-	TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
-
-	TP_STRUCT__entry(
-		__string(bus_name, devlink->dev->bus->name)
-		__string(dev_name, dev_name(devlink->dev))
-		__string(driver_name, devlink->dev->driver->name)
-		__string(reporter_name, reporter_name)
-		__field(bool, health_state)
-		__field(u64, time_since_last_recover)
-	),
-
-	TP_fast_assign(
-		__assign_str(bus_name, devlink->dev->bus->name);
-		__assign_str(dev_name, dev_name(devlink->dev));
-		__assign_str(driver_name, devlink->dev->driver->name);
-		__assign_str(reporter_name, reporter_name);
-		__entry->health_state = health_state;
-		__entry->time_since_last_recover = time_since_last_recover;
-	),
-
-	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover = %llu recover aborted",
-		  __get_str(bus_name), __get_str(dev_name),
-		  __get_str(driver_name), __get_str(reporter_name),
-		  __entry->health_state,
-		  __entry->time_since_last_recover)
-);
-
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
@@ -123,9 +64,6 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
 {
 }
 
-static inline void trace_devlink_health(const char *msg)
-{
-}
 #endif /* _TRACE_DEVLINK_H */
 
 #endif
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6b26bb2ce4dc..6e52d3660654 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -89,13 +89,6 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_DEL,
 	DEVLINK_CMD_REGION_READ,
 
-	DEVLINK_CMD_HEALTH_REPORTER_GET,
-	DEVLINK_CMD_HEALTH_REPORTER_SET,
-	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
-	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
-
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -292,24 +285,6 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
 	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
 
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT,		/* nested */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR,		/* nested */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME,		/* string */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE,	/* nested */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY,	/* nested */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,	/* u8 */
-	DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,	/* dynamic */
-
-	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
-	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
-	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
-	DEVLINK_ATTR_HEALTH_REPORTER_ERR,		/* u64 */
-	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,		/* u64 */
-	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,	/* u8 */
-	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
-	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
-	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
-
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 24f266468ca5..abb0da9d7b4b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3597,1015 +3597,6 @@ out:
 	return 0;
 }
 
-#define DEVLINK_HEALTH_BUFFER_SIZE (4096 - GENL_HDRLEN)
-#define DEVLINK_HEALTH_BUFFER_DATA_SIZE (DEVLINK_HEALTH_BUFFER_SIZE / 2)
-#define DEVLINK_HEALTH_SIZE_TO_BUFFERS(size) DIV_ROUND_UP_ULL(size, DEVLINK_HEALTH_BUFFER_DATA_SIZE)
-#define DEVLINK_HEALTH_BUFFER_MAX_CHUNK 1024
-
-struct devlink_health_buffer {
-	void *data;
-	u64 offset;
-	u64 bytes_left;
-	u64 bytes_left_metadata;
-	u64 max_nested_depth;
-	u64 curr_nest;
-};
-
-struct devlink_health_buffer_desc {
-	int attrtype;
-	u16 len;
-	u8 nla_type;
-	u8 nest_end;
-	int value[0];
-};
-
-static void
-devlink_health_buffers_reset(struct devlink_health_buffer **buffers_list,
-			     u64 num_of_buffers)
-{
-	u64 i;
-
-	for (i = 0; i < num_of_buffers; i++) {
-		memset(buffers_list[i]->data, 0, DEVLINK_HEALTH_BUFFER_SIZE);
-		buffers_list[i]->offset = 0;
-		buffers_list[i]->bytes_left = DEVLINK_HEALTH_BUFFER_DATA_SIZE;
-		buffers_list[i]->bytes_left_metadata =
-			DEVLINK_HEALTH_BUFFER_DATA_SIZE;
-		buffers_list[i]->max_nested_depth = 0;
-		buffers_list[i]->curr_nest = 0;
-	}
-}
-
-static void
-devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
-			       u64 size);
-
-static struct devlink_health_buffer **
-devlink_health_buffers_create(u64 size)
-{
-	struct devlink_health_buffer **buffers_list;
-	u64 num_of_buffers = DEVLINK_HEALTH_SIZE_TO_BUFFERS(size);
-	u64 i;
-
-	buffers_list = kcalloc(num_of_buffers,
-			       sizeof(struct devlink_health_buffer *),
-			       GFP_KERNEL);
-	if (!buffers_list)
-		return NULL;
-
-	for (i = 0; i < num_of_buffers; i++) {
-		struct devlink_health_buffer *buffer;
-		void *data;
-
-		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
-		data = kzalloc(DEVLINK_HEALTH_BUFFER_SIZE, GFP_KERNEL);
-		if (!buffer || !data) {
-			kfree(buffer);
-			kfree(data);
-			goto buffers_cleanup;
-		}
-		buffers_list[i] = buffer;
-		buffer->data = data;
-	}
-	devlink_health_buffers_reset(buffers_list, num_of_buffers);
-
-	return buffers_list;
-
-buffers_cleanup:
-	devlink_health_buffers_destroy(buffers_list, --i);
-	kfree(buffers_list);
-	return NULL;
-}
-
-static void
-devlink_health_buffers_destroy(struct devlink_health_buffer **buffers_list,
-			       u64 num_of_buffers)
-{
-	u64 i;
-
-	for (i = 0; i < num_of_buffers; i++) {
-		kfree(buffers_list[i]->data);
-		kfree(buffers_list[i]);
-	}
-}
-
-void
-devlink_health_buffer_offset_inc(struct devlink_health_buffer *buffer,
-				 int len)
-{
-	buffer->offset += len;
-}
-
-/* In order to store a nest, need two descriptors, for start and end */
-#define DEVLINK_HEALTH_BUFFER_NEST_SIZE (sizeof(struct devlink_health_buffer_desc) * 2)
-
-int devlink_health_buffer_verify_len(struct devlink_health_buffer *buffer,
-				     int len, int metadata_len)
-{
-	if (len > DEVLINK_HEALTH_BUFFER_DATA_SIZE)
-		return -EINVAL;
-
-	if (buffer->bytes_left < len ||
-	    buffer->bytes_left_metadata < metadata_len)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static struct devlink_health_buffer_desc *
-devlink_health_buffer_get_desc_from_offset(struct devlink_health_buffer *buffer)
-{
-	return buffer->data + buffer->offset;
-}
-
-int
-devlink_health_buffer_nest_start(struct devlink_health_buffer *buffer,
-				 int attrtype)
-{
-	struct devlink_health_buffer_desc *desc;
-	int err;
-
-	err = devlink_health_buffer_verify_len(buffer, 0,
-					       DEVLINK_HEALTH_BUFFER_NEST_SIZE);
-	if (err)
-		return err;
-
-	if (attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT &&
-	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR &&
-	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE &&
-	    attrtype != DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY)
-		return -EINVAL;
-
-	desc = devlink_health_buffer_get_desc_from_offset(buffer);
-
-	desc->attrtype = attrtype;
-	buffer->bytes_left_metadata -= DEVLINK_HEALTH_BUFFER_NEST_SIZE;
-	devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
-
-	buffer->curr_nest++;
-	buffer->max_nested_depth = max(buffer->max_nested_depth,
-				       buffer->curr_nest);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_start);
-
-enum devlink_health_buffer_nest_end_cancel {
-	DEVLINK_HEALTH_BUFFER_NEST_END = 1,
-	DEVLINK_HEALTH_BUFFER_NEST_CANCEL,
-};
-
-static void
-devlink_health_buffer_nest_end_cancel(struct devlink_health_buffer *buffer,
-				      enum devlink_health_buffer_nest_end_cancel nest)
-{
-	struct devlink_health_buffer_desc *desc;
-
-	WARN_ON(!buffer->curr_nest);
-	buffer->curr_nest--;
-
-	desc = devlink_health_buffer_get_desc_from_offset(buffer);
-	desc->nest_end = nest;
-	devlink_health_buffer_offset_inc(buffer, sizeof(*desc));
-}
-
-void devlink_health_buffer_nest_end(struct devlink_health_buffer *buffer)
-{
-	devlink_health_buffer_nest_end_cancel(buffer,
-					      DEVLINK_HEALTH_BUFFER_NEST_END);
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_end);
-
-void devlink_health_buffer_nest_cancel(struct devlink_health_buffer *buffer)
-{
-	devlink_health_buffer_nest_end_cancel(buffer,
-					      DEVLINK_HEALTH_BUFFER_NEST_CANCEL);
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_nest_cancel);
-
-int
-devlink_health_buffer_put_object_name(struct devlink_health_buffer *buffer,
-				      char *name)
-{
-	struct devlink_health_buffer_desc *desc;
-	int err;
-
-	err = devlink_health_buffer_verify_len(buffer, strlen(name) + 1,
-					       sizeof(*desc));
-	if (err)
-		return err;
-
-	desc = devlink_health_buffer_get_desc_from_offset(buffer);
-	desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME;
-	desc->nla_type = NLA_NUL_STRING;
-	desc->len = strlen(name) + 1;
-	memcpy(&desc->value, name, desc->len);
-	devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
-
-	buffer->bytes_left_metadata -= sizeof(*desc);
-	buffer->bytes_left -= (strlen(name) + 1);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_object_name);
-
-static int
-devlink_health_buffer_put_value(struct devlink_health_buffer *buffer,
-				u8 nla_type, void *value, int len)
-{
-	struct devlink_health_buffer_desc *desc;
-	int err;
-
-	err = devlink_health_buffer_verify_len(buffer, len, sizeof(*desc));
-	if (err)
-		return err;
-
-	desc = devlink_health_buffer_get_desc_from_offset(buffer);
-	desc->attrtype = DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA;
-	desc->nla_type = nla_type;
-	desc->len = len;
-	memcpy(&desc->value, value, len);
-	devlink_health_buffer_offset_inc(buffer, sizeof(*desc) + desc->len);
-
-	buffer->bytes_left_metadata -= sizeof(*desc);
-	buffer->bytes_left -= len;
-
-	return 0;
-}
-
-int
-devlink_health_buffer_put_value_u8(struct devlink_health_buffer *buffer,
-				   u8 value)
-{
-	int err;
-
-	err = devlink_health_buffer_put_value(buffer, NLA_U8, &value,
-					      sizeof(value));
-	if (err)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u8);
-
-int
-devlink_health_buffer_put_value_u32(struct devlink_health_buffer *buffer,
-				    u32 value)
-{
-	int err;
-
-	err = devlink_health_buffer_put_value(buffer, NLA_U32, &value,
-					      sizeof(value));
-	if (err)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u32);
-
-int
-devlink_health_buffer_put_value_u64(struct devlink_health_buffer *buffer,
-				    u64 value)
-{
-	int err;
-
-	err = devlink_health_buffer_put_value(buffer, NLA_U64, &value,
-					      sizeof(value));
-	if (err)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_u64);
-
-int
-devlink_health_buffer_put_value_string(struct devlink_health_buffer *buffer,
-				       char *name)
-{
-	int err;
-
-	if (strlen(name) + 1 > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
-		return -EINVAL;
-
-	err = devlink_health_buffer_put_value(buffer, NLA_NUL_STRING, name,
-					      strlen(name) + 1);
-	if (err)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_string);
-
-int
-devlink_health_buffer_put_value_data(struct devlink_health_buffer *buffer,
-				     void *data, int len)
-{
-	int err;
-
-	if (len > DEVLINK_HEALTH_BUFFER_MAX_CHUNK)
-		return -EINVAL;
-
-	err = devlink_health_buffer_put_value(buffer, NLA_BINARY, data, len);
-	if (err)
-		return err;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_health_buffer_put_value_data);
-
-static int
-devlink_health_buffer_fill_data(struct sk_buff *skb,
-				struct devlink_health_buffer_desc *desc)
-{
-	int err = -EINVAL;
-
-	switch (desc->nla_type) {
-	case NLA_U8:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-				 *(u8 *)desc->value);
-		break;
-	case NLA_U32:
-		err = nla_put_u32(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-				  *(u32 *)desc->value);
-		break;
-	case NLA_U64:
-		err = nla_put_u64_64bit(skb,
-					DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-					*(u64 *)desc->value, DEVLINK_ATTR_PAD);
-		break;
-	case NLA_NUL_STRING:
-		err = nla_put_string(skb,
-				     DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-				     (char *)&desc->value);
-		break;
-	case NLA_BINARY:
-		err = nla_put(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA,
-			      desc->len, (void *)&desc->value);
-		break;
-	}
-
-	return err;
-}
-
-static int
-devlink_health_buffer_fill_type(struct sk_buff *skb,
-				struct devlink_health_buffer_desc *desc)
-{
-	int err = -EINVAL;
-
-	switch (desc->nla_type) {
-	case NLA_U8:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-				 NLA_U8);
-		break;
-	case NLA_U32:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-				 NLA_U32);
-		break;
-	case NLA_U64:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-				 NLA_U64);
-		break;
-	case NLA_NUL_STRING:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-				 NLA_NUL_STRING);
-		break;
-	case NLA_BINARY:
-		err = nla_put_u8(skb, DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_TYPE,
-				 NLA_BINARY);
-		break;
-	}
-
-	return err;
-}
-
-static inline struct devlink_health_buffer_desc *
-devlink_health_buffer_get_next_desc(struct devlink_health_buffer_desc *desc)
-{
-	return (void *)&desc->value + desc->len;
-}
-
-static int
-devlink_health_buffer_prepare_skb(struct sk_buff *skb,
-				  struct devlink_health_buffer *buffer)
-{
-	struct devlink_health_buffer_desc *last_desc, *desc;
-	struct nlattr **buffer_nlattr;
-	int err;
-	int i = 0;
-
-	buffer_nlattr = kcalloc(buffer->max_nested_depth,
-				sizeof(*buffer_nlattr), GFP_KERNEL);
-	if (!buffer_nlattr)
-		return -EINVAL;
-
-	last_desc = devlink_health_buffer_get_desc_from_offset(buffer);
-	desc = buffer->data;
-	while (desc != last_desc) {
-		switch (desc->attrtype) {
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT:
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_PAIR:
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE:
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_ARRAY:
-			buffer_nlattr[i] = nla_nest_start(skb, desc->attrtype);
-			if (!buffer_nlattr[i])
-				goto nla_put_failure;
-			i++;
-			break;
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_VALUE_DATA:
-			err = devlink_health_buffer_fill_data(skb, desc);
-			if (err)
-				goto nla_put_failure;
-			err = devlink_health_buffer_fill_type(skb, desc);
-			if (err)
-				goto nla_put_failure;
-			break;
-		case DEVLINK_ATTR_HEALTH_BUFFER_OBJECT_NAME:
-			err = nla_put_string(skb, desc->attrtype,
-					     (char *)&desc->value);
-			if (err)
-				goto nla_put_failure;
-			break;
-		default:
-			WARN_ON(!desc->nest_end);
-			WARN_ON(i <= 0);
-			if (desc->nest_end == DEVLINK_HEALTH_BUFFER_NEST_END)
-				nla_nest_end(skb, buffer_nlattr[--i]);
-			else
-				nla_nest_cancel(skb, buffer_nlattr[--i]);
-			break;
-		}
-		desc = devlink_health_buffer_get_next_desc(desc);
-	}
-
-	return 0;
-
-nla_put_failure:
-	kfree(buffer_nlattr);
-	return err;
-}
-
-static int
-devlink_health_buffer_snd(struct genl_info *info,
-			  enum devlink_command cmd, int flags,
-			  struct devlink_health_buffer **buffers_array,
-			  u64 num_of_buffers)
-{
-	struct sk_buff *skb;
-	struct nlmsghdr *nlh;
-	void *hdr;
-	int err;
-	u64 i;
-
-	for (i = 0; i < num_of_buffers; i++) {
-		/* Skip buffer if driver did not fill it up with any data */
-		if (!buffers_array[i]->offset)
-			continue;
-
-		skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
-		if (!skb)
-			return -ENOMEM;
-
-		hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
-				  &devlink_nl_family, NLM_F_MULTI, cmd);
-		if (!hdr)
-			goto nla_put_failure;
-
-		err = devlink_health_buffer_prepare_skb(skb, buffers_array[i]);
-		if (err)
-			goto nla_put_failure;
-
-		genlmsg_end(skb, hdr);
-		err = genlmsg_reply(skb, info);
-		if (err)
-			return err;
-	}
-
-	skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
-			NLMSG_DONE, 0, flags | NLM_F_MULTI);
-	if (!nlh)
-		goto nla_put_failure;
-
-	err = genlmsg_reply(skb, info);
-	if (err)
-		return err;
-
-	return 0;
-
-nla_put_failure:
-	err = -EIO;
-	nlmsg_free(skb);
-	return err;
-}
-
-struct devlink_health_reporter {
-	struct list_head list;
-	struct devlink_health_buffer **dump_buffers_array;
-	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
-	struct devlink_health_buffer **diagnose_buffers_array;
-	struct mutex diagnose_lock; /* lock parallel read/write from diagnose buffers */
-	void *priv;
-	const struct devlink_health_reporter_ops *ops;
-	struct devlink *devlink;
-	u64 graceful_period;
-	bool auto_recover;
-	u8 health_state;
-	u8 dump_avail;
-	u64 dump_ts;
-	u64 error_count;
-	u64 recovery_count;
-	u64 last_recovery_ts;
-};
-
-enum devlink_health_reporter_state {
-	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
-	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
-};
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
-	return reporter->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
-				     const char *reporter_name)
-{
-	struct devlink_health_reporter *reporter;
-
-	list_for_each_entry(reporter, &devlink->reporter_list, list)
-		if (!strcmp(reporter->ops->name, reporter_name))
-			return reporter;
-	return NULL;
-}
-
-/**
- *	devlink_health_reporter_create - create devlink health reporter
- *
- *	@devlink: devlink
- *	@ops: ops
- *	@graceful_period: to avoid recovery loops, in msecs
- *	@auto_recover: auto recover when error occurs
- *	@priv: priv
- */
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
-			       const struct devlink_health_reporter_ops *ops,
-			       u64 graceful_period, bool auto_recover,
-			       void *priv)
-{
-	struct devlink_health_reporter *reporter;
-
-	mutex_lock(&devlink->lock);
-	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
-		reporter = ERR_PTR(-EEXIST);
-		goto unlock;
-	}
-
-	if (WARN_ON(ops->dump && !ops->dump_size) ||
-	    WARN_ON(ops->diagnose && !ops->diagnose_size) ||
-	    WARN_ON(auto_recover && !ops->recover) ||
-	    WARN_ON(graceful_period && !ops->recover)) {
-		reporter = ERR_PTR(-EINVAL);
-		goto unlock;
-	}
-
-	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
-	if (!reporter) {
-		reporter = ERR_PTR(-ENOMEM);
-		goto unlock;
-	}
-
-	if (ops->dump) {
-		reporter->dump_buffers_array =
-			devlink_health_buffers_create(ops->dump_size);
-		if (!reporter->dump_buffers_array) {
-			kfree(reporter);
-			reporter = ERR_PTR(-ENOMEM);
-			goto unlock;
-		}
-	}
-
-	if (ops->diagnose) {
-		reporter->diagnose_buffers_array =
-			devlink_health_buffers_create(ops->diagnose_size);
-		if (!reporter->diagnose_buffers_array) {
-			devlink_health_buffers_destroy(reporter->dump_buffers_array,
-						       DEVLINK_HEALTH_SIZE_TO_BUFFERS(ops->dump_size));
-			kfree(reporter);
-			reporter = ERR_PTR(-ENOMEM);
-			goto unlock;
-		}
-	}
-
-	list_add_tail(&reporter->list, &devlink->reporter_list);
-	mutex_init(&reporter->dump_lock);
-	mutex_init(&reporter->diagnose_lock);
-
-	reporter->priv = priv;
-	reporter->ops = ops;
-	reporter->devlink = devlink;
-	reporter->graceful_period = graceful_period;
-	reporter->auto_recover = auto_recover;
-unlock:
-	mutex_unlock(&devlink->lock);
-	return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
-
-/**
- *	devlink_health_reporter_destroy - destroy devlink health reporter
- *
- *	@reporter: devlink health reporter to destroy
- */
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
-	mutex_lock(&reporter->devlink->lock);
-	list_del(&reporter->list);
-	devlink_health_buffers_destroy(reporter->dump_buffers_array,
-				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-	devlink_health_buffers_destroy(reporter->diagnose_buffers_array,
-				       DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size));
-	kfree(reporter);
-	mutex_unlock(&reporter->devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
-				void *priv_ctx)
-{
-	int err;
-
-	if (!reporter->ops->recover)
-		return -EOPNOTSUPP;
-
-	err = reporter->ops->recover(reporter, priv_ctx);
-	if (err)
-		return err;
-
-	reporter->recovery_count++;
-	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
-	reporter->last_recovery_ts = jiffies;
-
-	return 0;
-}
-
-static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
-				  void *priv_ctx)
-{
-	int err;
-
-	if (!reporter->ops->dump)
-		return 0;
-
-	if (reporter->dump_avail)
-		return 0;
-
-	devlink_health_buffers_reset(reporter->dump_buffers_array,
-				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-	err = reporter->ops->dump(reporter, reporter->dump_buffers_array,
-				     DEVLINK_HEALTH_BUFFER_SIZE,
-				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size),
-				     priv_ctx);
-	if (!err) {
-		reporter->dump_avail = true;
-		reporter->dump_ts = jiffies;
-	}
-
-	return err;
-}
-
-int devlink_health_report(struct devlink_health_reporter *reporter,
-			  const char *msg, void *priv_ctx)
-{
-	struct devlink *devlink = reporter->devlink;
-	int err = 0;
-
-	/* write a log message of the current error */
-	WARN_ON(!msg);
-	trace_devlink_health_report(devlink, reporter->ops->name, msg);
-	reporter->error_count++;
-
-	/* abort if the previous error wasn't recovered */
-	if (reporter->auto_recover &&
-	    (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
-	     jiffies - reporter->last_recovery_ts <
-	     msecs_to_jiffies(reporter->graceful_period))) {
-		trace_devlink_health_recover_aborted(devlink,
-						     reporter->ops->name,
-						     reporter->health_state,
-						     jiffies -
-						     reporter->last_recovery_ts);
-		return -ECANCELED;
-	}
-
-	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
-
-	mutex_lock(&reporter->dump_lock);
-	/* store current dump of current error, for later analysis */
-	devlink_health_do_dump(reporter, priv_ctx);
-	mutex_unlock(&reporter->dump_lock);
-
-	if (reporter->auto_recover)
-		err = devlink_health_reporter_recover(reporter, priv_ctx);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(devlink_health_report);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_info(struct devlink *devlink,
-				      struct genl_info *info)
-{
-	char *reporter_name;
-
-	if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
-		return NULL;
-
-	reporter_name =
-		nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
-	return devlink_health_reporter_find_by_name(devlink, reporter_name);
-}
-
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
-				struct devlink *devlink,
-				struct devlink_health_reporter *reporter,
-				enum devlink_command cmd, u32 portid,
-				u32 seq, int flags)
-{
-	struct nlattr *reporter_attr;
-	void *hdr;
-
-	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
-	if (!hdr)
-		return -EMSGSIZE;
-
-	if (devlink_nl_put_handle(msg, devlink))
-		goto genlmsg_cancel;
-
-	reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
-	if (!reporter_attr)
-		goto genlmsg_cancel;
-	if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
-			   reporter->ops->name))
-		goto reporter_nest_cancel;
-	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
-		       reporter->health_state))
-		goto reporter_nest_cancel;
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
-			      reporter->error_count, DEVLINK_ATTR_PAD))
-		goto reporter_nest_cancel;
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
-			      reporter->recovery_count, DEVLINK_ATTR_PAD))
-		goto reporter_nest_cancel;
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
-			      reporter->graceful_period,
-			      DEVLINK_ATTR_PAD))
-		goto reporter_nest_cancel;
-	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
-		       reporter->auto_recover))
-		goto reporter_nest_cancel;
-	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_AVAIL,
-		       reporter->dump_avail))
-		goto reporter_nest_cancel;
-	if (reporter->dump_avail &&
-	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
-			      jiffies_to_msecs(reporter->dump_ts),
-			      DEVLINK_ATTR_PAD))
-		goto reporter_nest_cancel;
-
-	nla_nest_end(msg, reporter_attr);
-	genlmsg_end(msg, hdr);
-	return 0;
-
-reporter_nest_cancel:
-	nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
-	genlmsg_cancel(msg, hdr);
-	return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
-						   struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-	struct sk_buff *msg;
-	int err;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
-					      DEVLINK_CMD_HEALTH_REPORTER_GET,
-					      info->snd_portid, info->snd_seq,
-					      0);
-	if (err) {
-		nlmsg_free(msg);
-		return err;
-	}
-
-	return genlmsg_reply(msg, info);
-}
-
-static int
-devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
-					  struct netlink_callback *cb)
-{
-	struct devlink_health_reporter *reporter;
-	struct devlink *devlink;
-	int start = cb->args[0];
-	int idx = 0;
-	int err;
-
-	mutex_lock(&devlink_mutex);
-	list_for_each_entry(devlink, &devlink_list, list) {
-		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
-			continue;
-		mutex_lock(&devlink->lock);
-		list_for_each_entry(reporter, &devlink->reporter_list,
-				    list) {
-			if (idx < start) {
-				idx++;
-				continue;
-			}
-			err = devlink_nl_health_reporter_fill(msg, devlink,
-							      reporter,
-							      DEVLINK_CMD_HEALTH_REPORTER_GET,
-							      NETLINK_CB(cb->skb).portid,
-							      cb->nlh->nlmsg_seq,
-							      NLM_F_MULTI);
-			if (err) {
-				mutex_unlock(&devlink->lock);
-				goto out;
-			}
-			idx++;
-		}
-		mutex_unlock(&devlink->lock);
-	}
-out:
-	mutex_unlock(&devlink_mutex);
-
-	cb->args[0] = idx;
-	return msg->len;
-}
-
-static int
-devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
-					struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	if (!reporter->ops->recover &&
-	    (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
-	     info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
-		return -EINVAL;
-
-	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
-		reporter->graceful_period =
-			nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
-
-	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
-		reporter->auto_recover =
-			nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
-
-	return 0;
-}
-
-static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
-						       struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	return devlink_health_reporter_recover(reporter, NULL);
-}
-
-static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
-							struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-	u64 num_of_buffers;
-	int err;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	if (!reporter->ops->diagnose)
-		return -EOPNOTSUPP;
-
-	num_of_buffers =
-		DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->diagnose_size);
-
-	mutex_lock(&reporter->diagnose_lock);
-	devlink_health_buffers_reset(reporter->diagnose_buffers_array,
-				     num_of_buffers);
-
-	err = reporter->ops->diagnose(reporter,
-				      reporter->diagnose_buffers_array,
-				      DEVLINK_HEALTH_BUFFER_SIZE,
-				      num_of_buffers);
-	if (err)
-		goto out;
-
-	err = devlink_health_buffer_snd(info,
-					DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-					0, reporter->diagnose_buffers_array,
-					num_of_buffers);
-	if (err)
-		goto out;
-
-	mutex_unlock(&reporter->diagnose_lock);
-	return 0;
-
-out:
-	mutex_unlock(&reporter->diagnose_lock);
-	return err;
-}
-
-static void
-devlink_health_dump_clear(struct devlink_health_reporter *reporter)
-{
-	reporter->dump_avail = false;
-	reporter->dump_ts = 0;
-	devlink_health_buffers_reset(reporter->dump_buffers_array,
-				     DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size));
-}
-
-static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
-							struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-	u64 num_of_buffers;
-	int err;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	if (!reporter->ops->dump)
-		return -EOPNOTSUPP;
-
-	num_of_buffers =
-		DEVLINK_HEALTH_SIZE_TO_BUFFERS(reporter->ops->dump_size);
-
-	mutex_lock(&reporter->dump_lock);
-	err = devlink_health_do_dump(reporter, NULL);
-	if (err)
-		goto out;
-
-	err = devlink_health_buffer_snd(info,
-					DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-					0, reporter->dump_buffers_array,
-					num_of_buffers);
-
-out:
-	mutex_unlock(&reporter->dump_lock);
-	return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
-					       struct genl_info *info)
-{
-	struct devlink *devlink = info->user_ptr[0];
-	struct devlink_health_reporter *reporter;
-
-	reporter = devlink_health_reporter_get_from_info(devlink, info);
-	if (!reporter)
-		return -EINVAL;
-
-	mutex_lock(&reporter->dump_lock);
-	devlink_health_dump_clear(reporter);
-	mutex_unlock(&reporter->dump_lock);
-	return 0;
-}
-
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4631,9 +3622,6 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
-	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
-	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
-	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4854,51 +3842,6 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
-		.doit = devlink_nl_cmd_health_reporter_get_doit,
-		.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
-		.policy = devlink_nl_policy,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-		/* can be retrieved by unprivileged users */
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
-		.doit = devlink_nl_cmd_health_reporter_set_doit,
-		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
-		.doit = devlink_nl_cmd_health_reporter_recover_doit,
-		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
-		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
-		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
-		.doit = devlink_nl_cmd_health_reporter_dump_get_doit,
-		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-				  DEVLINK_NL_FLAG_NO_LOCK,
-	},
-	{
-		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
-		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
-		.policy = devlink_nl_policy,
-		.flags = GENL_ADMIN_PERM,
-		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
-				  DEVLINK_NL_FLAG_NO_LOCK,
-	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -4939,7 +3882,6 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
-	INIT_LIST_HEAD(&devlink->reporter_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
-- 
cgit v1.2.3


From 40852275a94afb3e836be9248399e036982d1a79 Mon Sep 17 00:00:00 2001
From: Micah Morton <mortonm@chromium.org>
Date: Tue, 22 Jan 2019 14:42:09 -0800
Subject: LSM: add SafeSetID module that gates setid calls

This change ensures that the set*uid family of syscalls in kernel/sys.c
(setreuid, setuid, setresuid, setfsuid) all call ns_capable_common with
the CAP_OPT_INSETID flag, so capability checks in the security_capable
hook can know whether they are being called from within a set*uid
syscall. This change is a no-op by itself, but is needed for the
proposed SafeSetID LSM.

Signed-off-by: Micah Morton <mortonm@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/linux/capability.h |  5 +++++
 kernel/capability.c        | 19 +++++++++++++++++++
 kernel/sys.c               | 10 +++++-----
 3 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index f640dcbc880c..c3f9a4d558a0 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -209,6 +209,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
+extern bool ns_capable_setid(struct user_namespace *ns, int cap);
 #else
 static inline bool has_capability(struct task_struct *t, int cap)
 {
@@ -240,6 +241,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
 {
 	return true;
 }
+static inline bool ns_capable_setid(struct user_namespace *ns, int cap)
+{
+	return true;
+}
 #endif /* CONFIG_MULTIUSER */
 extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
 extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
diff --git a/kernel/capability.c b/kernel/capability.c
index cfbbcb68e11e..1444f3954d75 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -415,6 +415,25 @@ bool ns_capable_noaudit(struct user_namespace *ns, int cap)
 }
 EXPORT_SYMBOL(ns_capable_noaudit);
 
+/**
+ * ns_capable_setid - Determine if the current task has a superior capability
+ * in effect, while signalling that this check is being done from within a
+ * setid syscall.
+ * @ns:  The usernamespace we want the capability in
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+bool ns_capable_setid(struct user_namespace *ns, int cap)
+{
+	return ns_capable_common(ns, cap, CAP_OPT_INSETID);
+}
+EXPORT_SYMBOL(ns_capable_setid);
+
 /**
  * capable - Determine if the current task has a superior capability in effect
  * @cap: The capability to be tested for
diff --git a/kernel/sys.c b/kernel/sys.c
index f7eb62eceb24..c5f875048aef 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -516,7 +516,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 		new->uid = kruid;
 		if (!uid_eq(old->uid, kruid) &&
 		    !uid_eq(old->euid, kruid) &&
-		    !ns_capable(old->user_ns, CAP_SETUID))
+		    !ns_capable_setid(old->user_ns, CAP_SETUID))
 			goto error;
 	}
 
@@ -525,7 +525,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
 		if (!uid_eq(old->uid, keuid) &&
 		    !uid_eq(old->euid, keuid) &&
 		    !uid_eq(old->suid, keuid) &&
-		    !ns_capable(old->user_ns, CAP_SETUID))
+		    !ns_capable_setid(old->user_ns, CAP_SETUID))
 			goto error;
 	}
 
@@ -584,7 +584,7 @@ long __sys_setuid(uid_t uid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (ns_capable(old->user_ns, CAP_SETUID)) {
+	if (ns_capable_setid(old->user_ns, CAP_SETUID)) {
 		new->suid = new->uid = kuid;
 		if (!uid_eq(kuid, old->uid)) {
 			retval = set_user(new);
@@ -646,7 +646,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	old = current_cred();
 
 	retval = -EPERM;
-	if (!ns_capable(old->user_ns, CAP_SETUID)) {
+	if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
 		if (ruid != (uid_t) -1        && !uid_eq(kruid, old->uid) &&
 		    !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
 			goto error;
@@ -814,7 +814,7 @@ long __sys_setfsuid(uid_t uid)
 
 	if (uid_eq(kuid, old->uid)  || uid_eq(kuid, old->euid)  ||
 	    uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) ||
-	    ns_capable(old->user_ns, CAP_SETUID)) {
+	    ns_capable_setid(old->user_ns, CAP_SETUID)) {
 		if (!uid_eq(kuid, old->fsuid)) {
 			new->fsuid = kuid;
 			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
-- 
cgit v1.2.3


From fe4943702c850fa07f963eaa6f1530d9d4c2da78 Mon Sep 17 00:00:00 2001
From: Srinivas Dasari <dasaris@codeaurora.org>
Date: Wed, 23 Jan 2019 18:06:56 +0530
Subject: cfg80211: Authentication offload to user space in AP mode

commit 40cbfa90218b ("cfg80211/nl80211: Optional authentication
offload to userspace")' introduced authentication offload to user
space by the host drivers in station mode. This commit extends
the same for the AP mode too.

Extend NL80211_ATTR_EXTERNAL_AUTH_SUPPORT to also claim the
support of external authentication from the user space in AP mode.
A new flag parameter is introduced in cfg80211_ap_settings to
intend the same while "start ap".

Host driver to use NL80211_CMD_FRAME interface to transmit and
receive the authentication frames to / from the user space.

Host driver to indicate the flag NL80211_RXMGMT_FLAG_EXTERNAL_AUTH
while sending the authentication frame to the user space. This
intends to the user space that the driver wishes it to process
the authentication frame for certain protocols, though it had
initially advertised the support for SME functionality.

User space shall accordingly do the authentication and indicate
its final status through the command NL80211_CMD_EXTERNAL_AUTH.
Allow the command even if userspace doesn't include the attribute
NL80211_ATTR_SSID for AP interface.

Host driver shall continue with the association sequence and
indicate the STA connection status through cfg80211_new_sta.

To facilitate the host drivers in AP mode for matching the pmkid
by the stations during the association, NL80211_CMD_EXTERNAL_AUTH
is also enhanced to include the pmkid to drivers after
the authentication.
This pmkid can also be used in the STA mode to include in the
association request.

Also modify nl80211_external_auth to not mandate SSID in AP mode.

Signed-off-by: Srinivas Dasari <dasaris@codeaurora.org>
[remove useless nla_get_flag() usage]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 15 +++++++++++++++
 include/uapi/linux/nl80211.h | 13 +++++++++----
 net/wireless/nl80211.c       | 25 ++++++++++++++++++-------
 3 files changed, 42 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b61ac6e9de08..7033c90850b0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -835,6 +835,17 @@ struct cfg80211_bitrate_mask {
 	} control[NUM_NL80211_BANDS];
 };
 
+/**
+ * enum cfg80211_ap_settings_flags - AP settings flags
+ *
+ * Used by cfg80211_ap_settings
+ *
+ * @AP_SETTINGS_EXTERNAL_AUTH_SUPPORT: AP supports external authentication
+ */
+enum cfg80211_ap_settings_flags {
+	AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = BIT(0),
+};
+
 /**
  * struct cfg80211_ap_settings - AP configuration
  *
@@ -865,6 +876,7 @@ struct cfg80211_bitrate_mask {
  * @he_cap: HE capabilities (or %NULL if HE isn't enabled)
  * @ht_required: stations must support HT
  * @vht_required: stations must support VHT
+ * @flags: flags, as defined in enum cfg80211_ap_settings_flags
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -890,6 +902,7 @@ struct cfg80211_ap_settings {
 	const struct ieee80211_vht_cap *vht_cap;
 	const struct ieee80211_he_cap_elem *he_cap;
 	bool ht_required, vht_required;
+	u32 flags;
 };
 
 /**
@@ -2831,6 +2844,7 @@ struct cfg80211_pmk_conf {
  *	use %WLAN_STATUS_UNSPECIFIED_FAILURE if user space cannot give you
  *	the real status code for failures. Used only for the authentication
  *	response command interface (user space to driver).
+ * @pmkid: The identifier to refer a PMKSA.
  */
 struct cfg80211_external_auth_params {
 	enum nl80211_external_auth_action action;
@@ -2838,6 +2852,7 @@ struct cfg80211_external_auth_params {
 	struct cfg80211_ssid ssid;
 	unsigned int key_mgmt_suite;
 	u16 status;
+	const u8 *pmkid;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5f9d5cd458a1..8b0fdb9e133b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2266,10 +2266,10 @@ enum nl80211_commands {
  *     &enum nl80211_external_auth_action value). This is used with the
  *     %NL80211_CMD_EXTERNAL_AUTH request event.
  * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user
- *     space supports external authentication. This attribute shall be used
- *     only with %NL80211_CMD_CONNECT request. The driver may offload
- *     authentication processing to user space if this capability is indicated
- *     in NL80211_CMD_CONNECT requests from the user space.
+ *	space supports external authentication. This attribute shall be used
+ *	with %NL80211_CMD_CONNECT and %NL80211_CMD_START_AP request. The driver
+ *	may offload authentication processing to user space if this capability
+ *	is indicated in the respective requests from the user space.
  *
  * @NL80211_ATTR_NSS: Station's New/updated  RX_NSS value notified using this
  *	u8 attribute. This is used with %NL80211_CMD_STA_OPMODE_CHANGED.
@@ -5631,9 +5631,14 @@ enum nl80211_crit_proto_id {
  * Used by cfg80211_rx_mgmt()
  *
  * @NL80211_RXMGMT_FLAG_ANSWERED: frame was answered by device/driver.
+ * @NL80211_RXMGMT_FLAG_EXTERNAL_AUTH: Host driver intends to offload
+ *	the authentication. Exclusively defined for host drivers that
+ *	advertises the SME functionality but would like the userspace
+ *	to handle certain authentication algorithms (e.g. SAE).
  */
 enum nl80211_rxmgmt_flags {
 	NL80211_RXMGMT_FLAG_ANSWERED = 1 << 0,
+	NL80211_RXMGMT_FLAG_EXTERNAL_AUTH = 1 << 1,
 };
 
 /*
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eb4437fa0539..dc96077afe5e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4550,6 +4550,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 
 	nl80211_calculate_ap_params(&params);
 
+	if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
+		params.flags |= AP_SETTINGS_EXTERNAL_AUTH_SUPPORT;
+
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {
@@ -13086,7 +13089,9 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->external_auth)
 		return -EOPNOTSUPP;
 
-	if (!info->attrs[NL80211_ATTR_SSID])
+	if (!info->attrs[NL80211_ATTR_SSID] &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
 		return -EINVAL;
 
 	if (!info->attrs[NL80211_ATTR_BSSID])
@@ -13097,18 +13102,24 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
 
 	memset(&params, 0, sizeof(params));
 
-	params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
-	if (params.ssid.ssid_len == 0 ||
-	    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
-		return -EINVAL;
-	memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]),
-	       params.ssid.ssid_len);
+	if (info->attrs[NL80211_ATTR_SSID]) {
+		params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+		if (params.ssid.ssid_len == 0 ||
+		    params.ssid.ssid_len > IEEE80211_MAX_SSID_LEN)
+			return -EINVAL;
+		memcpy(params.ssid.ssid,
+		       nla_data(info->attrs[NL80211_ATTR_SSID]),
+		       params.ssid.ssid_len);
+	}
 
 	memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]),
 	       ETH_ALEN);
 
 	params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]);
 
+	if (info->attrs[NL80211_ATTR_PMKID])
+		params.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
+
 	return rdev_external_auth(rdev, dev, &params);
 }
 
-- 
cgit v1.2.3


From 6c900360e7c0df6a4846ac97d7b548d72cd801b0 Mon Sep 17 00:00:00 2001
From: Liangwei Dong <liangwei@codeaurora.org>
Date: Fri, 18 Jan 2019 16:54:38 +0530
Subject: nl80211: Allow set/del pmksa operations for AP

Host drivers may offload authentication to the user space
through the commit ("cfg80211: Authentication offload to
user space in AP mode").

This interface can be used to implement SAE by having the
userspace do authentication/PMKID key derivation and driver
handle the association.

A step ahead, this interface can get further optimized if the
PMKID is passed to the host driver and also have it respond to
the association request by the STA on a valid PMKID.

This commit enables the userspace to pass the PMKID to the host
drivers through the set/del pmksa operations in AP mode.

Set/Del pmksa is now restricted to STA/P2P client mode only and
thus the drivers might not expect them in any other(AP) mode.

This commit also introduces a feature flag
NL80211_EXT_FEATURE_AP_PMKSA_CACHING (johannes: renamed) to
maintain the backward compatibility of such an expectation by
the host drivers. These operations are allowed in AP mode only
when the drivers advertize the capability through this flag.

Signed-off-by: Liangwei Dong <liangwei@codeaurora.org>
Signed-off-by: Srinivas Dasari <dasaris@codeaurora.org>
[rename flag to NL80211_EXT_FEATURE_AP_PMKSA_CACHING]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 ++++
 net/wireless/nl80211.c       | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8b0fdb9e133b..dd4f86ee286e 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5340,6 +5340,9 @@ enum nl80211_feature_flags {
  *	fairness for transmitted packets and has enabled airtime fairness
  *	scheduling.
  *
+ * @NL80211_EXT_FEATURE_AP_PMKSA_CACHING: Driver/device supports PMKSA caching
+ *	(set/del PMKSA operations) in AP mode.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5380,6 +5383,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CAN_REPLACE_PTK0,
 	NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER,
 	NL80211_EXT_FEATURE_AIRTIME_FAIRNESS,
+	NL80211_EXT_FEATURE_AP_PMKSA_CACHING,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index dc96077afe5e..af89e5c9fd0a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9899,7 +9899,10 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+	    !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP &&
+	      wiphy_ext_feature_isset(&rdev->wiphy,
+				      NL80211_EXT_FEATURE_AP_PMKSA_CACHING)))
 		return -EOPNOTSUPP;
 
 	switch (info->genlhdr->cmd) {
-- 
cgit v1.2.3


From 6ba7d681aca22e53385bdb35b1d7662e61905760 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 9 Jan 2019 15:22:03 -0800
Subject: rcu: Remove wrapper definitions for obsolete RCU update functions

None of synchronize_rcu_bh, synchronize_rcu_bh_expedited, call_rcu_bh,
rcu_barrier_bh, synchronize_sched, synchronize_sched_expedited,
call_rcu_sched, rcu_barrier_sched, get_state_synchronize_sched, and
cond_synchronize_sched are actually used.  This commit therefore removes
their trivial wrapper-function definitions.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcupdate.h | 53 ------------------------------------------------
 1 file changed, 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 4db8bcacc51a..0e39e0d2629e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -896,57 +896,4 @@ rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
 	return false;
 }
 
-
-/* Transitional pre-consolidation compatibility definitions. */
-
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_rcu();
-}
-
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
-static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
-static inline void rcu_barrier_bh(void)
-{
-	rcu_barrier();
-}
-
-static inline void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-
-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
-static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
-static inline void rcu_barrier_sched(void)
-{
-	rcu_barrier();
-}
-
-static inline unsigned long get_state_synchronize_sched(void)
-{
-	return get_state_synchronize_rcu();
-}
-
-static inline void cond_synchronize_sched(unsigned long oldstate)
-{
-	cond_synchronize_rcu(oldstate);
-}
-
 #endif /* __LINUX_RCUPDATE_H */
-- 
cgit v1.2.3


From 2aa5503026ceaa8860697b93c9e5bbbcd025ba89 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 20 Nov 2018 08:29:35 -0800
Subject: rcu: Docbook for rcu_head_init() and rcu_head_after_call_rcu()

This commit adds the missing asterisks required to make Sphinx pick up
the current header comments for these two functions.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcupdate.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0e39e0d2629e..632113946757 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -859,7 +859,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 
 /* Has the specified rcu_head structure been handed to call_rcu()? */
 
-/*
+/**
  * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
  * @rhp: The rcu_head structure to initialize.
  *
@@ -874,10 +874,10 @@ static inline void rcu_head_init(struct rcu_head *rhp)
 	rhp->func = (rcu_callback_t)~0L;
 }
 
-/*
+/**
  * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()?
  * @rhp: The rcu_head structure to test.
- * @func: The function passed to call_rcu() along with @rhp.
+ * @f: The function passed to call_rcu() along with @rhp.
  *
  * Returns @true if the @rhp has been passed to call_rcu() with @func,
  * and @false otherwise.  Emits a warning in any other case, including
-- 
cgit v1.2.3


From c98cac603f1ce7d00e2a802b5640bced3bc3c1f2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 21 Nov 2018 11:35:03 -0800
Subject: rcu: Rename rcu_check_callbacks() to rcu_sched_clock_irq()

The name rcu_check_callbacks() arguably made sense back in the early
2000s when RCU was quite a bit simpler than it is today, but it has
become quite misleading, especially with the advent of dyntick-idle
and NO_HZ_FULL.  The rcu_check_callbacks() function is RCU's hook into
the scheduling-clock interrupt, and is now but one of many ways that
callbacks get promoted to invocable state.

This commit therefore changes the name to rcu_sched_clock_irq(),
which is the same number of characters and clearly indicates this
function's relation to the rest of the Linux kernel.  In addition, for
the sake of consistency, rcu_flavor_check_callbacks() is also renamed
to rcu_flavor_sched_clock_irq().

While in the area, the header comments for both functions are reworked.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 .../Memory-Ordering/Tree-RCU-Memory-Ordering.html  |  4 ++--
 .../TreeRCU-callback-invocation.svg                |  2 +-
 .../RCU/Design/Memory-Ordering/TreeRCU-gp.svg      |  4 ++--
 .../RCU/Design/Memory-Ordering/TreeRCU-qs.svg      |  2 +-
 include/linux/rcupdate.h                           |  2 +-
 kernel/rcu/tiny.c                                  |  2 +-
 kernel/rcu/tree.c                                  | 18 ++++++++--------
 kernel/rcu/tree.h                                  |  2 +-
 kernel/rcu/tree_plugin.h                           | 24 +++++++++-------------
 kernel/time/timer.c                                |  2 +-
 10 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
index e4d94fba6c89..a3acfd49255f 100644
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
@@ -485,7 +485,7 @@ section that the grace period must wait on.
 noted by <tt>rcu_node_context_switch()</tt> on the left.
 On the other hand, if the CPU takes a scheduler-clock interrupt
 while executing in usermode, a quiescent state will be noted by
-<tt>rcu_check_callbacks()</tt> on the right.
+<tt>rcu_sched_clock_irq()</tt> on the right.
 Either way, the passage through a quiescent state will be noted
 in a per-CPU variable.
 
@@ -651,7 +651,7 @@ to end.
 These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
 which is usually invoked by <tt>__note_gp_changes()</tt>.
 As shown in the diagram below, this invocation can be triggered by
-the scheduling-clock interrupt (<tt>rcu_check_callbacks()</tt> on
+the scheduling-clock interrupt (<tt>rcu_sched_clock_irq()</tt> on
 the left) or by idle entry (<tt>rcu_cleanup_after_idle()</tt> on
 the right, but only for kernels build with
 <tt>CONFIG_RCU_FAST_NO_HZ=y</tt>).
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
index 832408313d93..3fcf0c17cef2 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
@@ -349,7 +349,7 @@
        font-weight="bold"
        font-size="192"
        id="text202-7-5"
-       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
+       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
     <rect
        x="7069.6187"
        y="5087.4678"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
index acd73c7ad0f4..f0bbe6f8d729 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -3902,7 +3902,7 @@
          font-style="normal"
          y="-4418.6582"
          x="3745.7725"
-         xml:space="preserve">rcu_check_callbacks()</text>
+         xml:space="preserve">rcu_sched_clock_irq()</text>
     </g>
     <g
        transform="translate(-850.30204,55463.106)"
@@ -4968,7 +4968,7 @@
        font-weight="bold"
        font-size="192"
        id="text202-7-5-19"
-       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
+       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
     <rect
        x="5314.2671"
        y="82817.688"
diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
index 149bec2a4493..3596ffdd4685 100644
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
@@ -775,7 +775,7 @@
          font-style="normal"
          y="-4418.6582"
          x="3745.7725"
-         xml:space="preserve">rcu_check_callbacks()</text>
+         xml:space="preserve">rcu_sched_clock_irq()</text>
     </g>
     <g
        transform="translate(399.7744,828.86448)"
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 632113946757..6f8f047c4068 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -89,7 +89,7 @@ static inline int rcu_preempt_depth(void)
 /* Internal to kernel */
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
-void rcu_check_callbacks(int user);
+void rcu_sched_clock_irq(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
 
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 5f5963ba313e..d7a9135b9471 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -76,7 +76,7 @@ void rcu_qs(void)
  * be called from hardirq context.  It is normally called from the
  * scheduling-clock interrupt.
  */
-void rcu_check_callbacks(int user)
+void rcu_sched_clock_irq(int user)
 {
 	if (user) {
 		rcu_qs();
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1c4add096078..874054b30fe6 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1139,7 +1139,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
 	}
 
 	/*
-	 * NO_HZ_FULL CPUs can run in-kernel without rcu_check_callbacks!
+	 * NO_HZ_FULL CPUs can run in-kernel without rcu_sched_clock_irq!
 	 * The above code handles this, but only for straight cond_resched().
 	 * And some in-kernel loops check need_resched() before calling
 	 * cond_resched(), which defeats the above code for CPUs that are
@@ -2532,14 +2532,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
 }
 
 /*
- * Check to see if this CPU is in a non-context-switch quiescent state
- * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
- * Also schedule RCU core processing.
- *
- * This function must be called from hardirq context.  It is normally
- * invoked from the scheduling-clock interrupt.
+ * This function is invoked from each scheduling-clock interrupt,
+ * and checks to see if this CPU is in a non-context-switch quiescent
+ * state, for example, user mode or idle loop.  It also schedules RCU
+ * core processing.  If the current grace period has gone on too long,
+ * it will ask the scheduler to manufacture a context switch for the sole
+ * purpose of providing a providing the needed quiescent state.
  */
-void rcu_check_callbacks(int user)
+void rcu_sched_clock_irq(int user)
 {
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	raw_cpu_inc(rcu_data.ticks_this_gp);
@@ -2552,7 +2552,7 @@ void rcu_check_callbacks(int user)
 		}
 		__this_cpu_write(rcu_data.rcu_urgent_qs, false);
 	}
-	rcu_flavor_check_callbacks(user);
+	rcu_flavor_sched_clock_irq(user);
 	if (rcu_pending())
 		invoke_rcu_core();
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 149557b7c39c..f37f54cc9080 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -417,7 +417,7 @@ static void rcu_print_detail_task_stall(void);
 static int rcu_print_task_stall(struct rcu_node *rnp);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
-static void rcu_flavor_check_callbacks(int user);
+static void rcu_flavor_sched_clock_irq(int user);
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 8ceed9e25ad5..cdff9bc0c64b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -297,7 +297,7 @@ static void rcu_qs(void)
 				       __this_cpu_read(rcu_data.gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
-		barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
+		barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
 		current->rcu_read_unlock_special.b.need_qs = false;
 	}
 }
@@ -778,13 +778,13 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 }
 
 /*
- * Check for a quiescent state from the current CPU.  When a task blocks,
- * the task is recorded in the corresponding CPU's rcu_node structure,
- * which is checked elsewhere.
- *
- * Caller must disable hard irqs.
+ * Check for a quiescent state from the current CPU, including voluntary
+ * context switches for Tasks RCU.  When a task blocks, the task is
+ * recorded in the corresponding CPU's rcu_node structure, which is checked
+ * elsewhere, hence this function need only check for quiescent states
+ * related to the current CPU, not to those related to tasks.
  */
-static void rcu_flavor_check_callbacks(int user)
+static void rcu_flavor_sched_clock_irq(int user)
 {
 	struct task_struct *t = current;
 
@@ -1030,14 +1030,10 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 }
 
 /*
- * Check to see if this CPU is in a non-context-switch quiescent state
- * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
- * Also schedule RCU core processing.
- *
- * This function must be called from hardirq context.  It is normally
- * invoked from the scheduling-clock interrupt.
+ * Check to see if this CPU is in a non-context-switch quiescent state,
+ * namely user mode and idle loop.
  */
-static void rcu_flavor_check_callbacks(int user)
+static void rcu_flavor_sched_clock_irq(int user)
 {
 	if (user || rcu_is_cpu_rrupt_from_idle()) {
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 444156debfa0..6eb7cc4b6d52 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1632,7 +1632,7 @@ void update_process_times(int user_tick)
 	/* Note: this timer irq context must be accounted for as well. */
 	account_process_tick(p, user_tick);
 	run_local_timers();
-	rcu_check_callbacks(user_tick);
+	rcu_sched_clock_irq(user_tick);
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_tick();
-- 
cgit v1.2.3


From 423a86a610cad121742ebe698ef98a3b4c87b5dd Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Wed, 12 Dec 2018 14:37:10 -0800
Subject: rcu: Add sparse check to rcu_assign_pointer()

The rcu_assign_pointer() function currently doesn't do any sparse checking
on the assigned-to pointer.  So its possible that a pointer that is
not __rcu annotated is assigned with rcu_assign_pointer without sparse
complaints.  Because rcu_dereference() already does such checking,
this commit makes rcu_assign_pointer() to do the same. The extra
error could be helpful in cases where an RCU pointer is assigned with
rcu_assign_pointer() but not annotated with __rcu.

This doesn't generate any code in the normal case because __CHECKER__ is
defined only in the context of sparse.

This commit also renames rcu_dereference_sparse() to rcu_check_parse()
since the checking now happens not only during derereferencing but also
during assignment.

Test: Introduced an rcu_assign_pointer in code and checked the output of
sparse with and without this change. The change correctly causes sparse
to throw an error.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcupdate.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6f8f047c4068..4a2cce4d4bd9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -309,16 +309,16 @@ static inline void rcu_preempt_sleep_check(void) { }
  */
 
 #ifdef __CHECKER__
-#define rcu_dereference_sparse(p, space) \
+#define rcu_check_sparse(p, space) \
 	((void)(((typeof(*p) space *)p) == p))
 #else /* #ifdef __CHECKER__ */
-#define rcu_dereference_sparse(p, space)
+#define rcu_check_sparse(p, space)
 #endif /* #else #ifdef __CHECKER__ */
 
 #define __rcu_access_pointer(p, space) \
 ({ \
 	typeof(*p) *_________p1 = (typeof(*p) *__force)READ_ONCE(p); \
-	rcu_dereference_sparse(p, space); \
+	rcu_check_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(_________p1)); \
 })
 #define __rcu_dereference_check(p, c, space) \
@@ -326,13 +326,13 @@ static inline void rcu_preempt_sleep_check(void) { }
 	/* Dependency order vs. p above. */ \
 	typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \
 	RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \
-	rcu_dereference_sparse(p, space); \
+	rcu_check_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(________p1)); \
 })
 #define __rcu_dereference_protected(p, c, space) \
 ({ \
 	RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \
-	rcu_dereference_sparse(p, space); \
+	rcu_check_sparse(p, space); \
 	((typeof(*p) __force __kernel *)(p)); \
 })
 #define rcu_dereference_raw(p) \
@@ -382,6 +382,7 @@ static inline void rcu_preempt_sleep_check(void) { }
 #define rcu_assign_pointer(p, v)					      \
 ({									      \
 	uintptr_t _r_a_p__v = (uintptr_t)(v);				      \
+	rcu_check_sparse(p, __rcu);				      \
 									      \
 	if (__builtin_constant_p(v) && (_r_a_p__v) == (uintptr_t)NULL)	      \
 		WRITE_ONCE((p), (typeof(p))(_r_a_p__v));		      \
@@ -785,7 +786,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  */
 #define RCU_INIT_POINTER(p, v) \
 	do { \
-		rcu_dereference_sparse(p, __rcu); \
+		rcu_check_sparse(p, __rcu); \
 		WRITE_ONCE(p, RCU_INITIALIZER(v)); \
 	} while (0)
 
-- 
cgit v1.2.3


From 728e3e6178aa5790066b642dd2ebc9e9bdacff1b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Tue, 8 Jan 2019 10:13:44 -0800
Subject: include/asm-generic: Remove spin_is_locked() comment

The WARN_ON_SMP() comment header suggests using spin_is_locked() to
check for locks being held.  But these days we prefer lockdep_assert_held(),
so this commit removes that suggestion.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <linux-arch@vger.kernel.org>
---
 include/asm-generic/bug.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 20561a60db9c..0e9bd9c83870 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -211,9 +211,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 /*
  * WARN_ON_SMP() is for cases that the warning is either
  * meaningless for !SMP or may even cause failures.
- * This is usually used for cases that we have
- * WARN_ON(!spin_is_locked(&lock)) checks, as spin_is_locked()
- * returns 0 for uniprocessor settings.
  * It can also be used with values that are only defined
  * on SMP:
  *
-- 
cgit v1.2.3


From c8ca1aa774b20f182733d1661f3b6aa3105338e7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 30 Nov 2018 10:06:46 -0800
Subject: srcu: Check for invalid idx argument in srcu_read_unlock()

The current SRCU implementation has an idx argument of zero or one,
and never anything else.  This commit therefore adds a WARN_ON_ONCE()
to complain if this restriction is violated.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/srcu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c614375cd264..33cf83b9bda8 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -223,6 +223,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
 static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
 	__releases(ssp)
 {
+	WARN_ON_ONCE(idx & ~0x1);
 	rcu_lock_release(&(ssp)->dep_map);
 	__srcu_read_unlock(ssp, idx);
 }
-- 
cgit v1.2.3


From e81baf4cb19a9b428ba477fd0423f81672a58817 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 11 Dec 2018 12:12:38 +0100
Subject: srcu: Remove srcu_queue_delayed_work_on()

srcu_queue_delayed_work_on() disables preemption (and therefore CPU
hotplug in RCU's case) and then checks based on its own accounting if a
CPU is online. If the CPU is online it uses queue_delayed_work_on()
otherwise it fallbacks to queue_delayed_work().
The problem here is that queue_work() on -RT does not work with disabled
preemption.

queue_work_on() works also on an offlined CPU. queue_delayed_work_on()
has the problem that it is possible to program a timer on an offlined
CPU. This timer will fire once the CPU is online again. But until then,
the timer remains programmed and nothing will happen.

Add a local timer which will fire (as requested per delay) on the local
CPU and then enqueue the work on the specific CPU.

RCUtorture testing with SRCU-P for 24h showed no problems.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/srcutree.h |  3 ++-
 kernel/rcu/srcutree.c    | 55 +++++++++++++++++++++---------------------------
 kernel/rcu/tree.c        |  4 ----
 kernel/rcu/tree.h        |  8 -------
 4 files changed, 26 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 6f292bd3e7db..0faa978c9880 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -45,7 +45,8 @@ struct srcu_data {
 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */
 	unsigned long srcu_gp_seq_needed_exp;	/* Furthest future exp GP. */
 	bool srcu_cblist_invoking;		/* Invoking these CBs? */
-	struct delayed_work work;		/* Context for CB invoking. */
+	struct timer_list delay_work;		/* Delay for CB invoking */
+	struct work_struct work;		/* Context for CB invoking. */
 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */
 	struct srcu_node *mynode;		/* Leaf srcu_node. */
 	unsigned long grpmask;			/* Mask for leaf srcu_node */
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 3600d88d8956..7f041f2435df 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -58,6 +58,7 @@ static bool __read_mostly srcu_init_done;
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
+static void srcu_delay_timer(struct timer_list *t);
 
 /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */
 #define spin_lock_rcu_node(p)					\
@@ -156,7 +157,8 @@ static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static)
 			snp->grphi = cpu;
 		}
 		sdp->cpu = cpu;
-		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
+		INIT_WORK(&sdp->work, srcu_invoke_callbacks);
+		timer_setup(&sdp->delay_work, srcu_delay_timer, 0);
 		sdp->ssp = ssp;
 		sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
 		if (is_static)
@@ -386,13 +388,19 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
 	} else {
 		flush_delayed_work(&ssp->work);
 	}
-	for_each_possible_cpu(cpu)
+	for_each_possible_cpu(cpu) {
+		struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
+
 		if (quiesced) {
-			if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work)))
+			if (WARN_ON(timer_pending(&sdp->delay_work)))
+				return; /* Just leak it! */
+			if (WARN_ON(work_pending(&sdp->work)))
 				return; /* Just leak it! */
 		} else {
-			flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work);
+			del_timer_sync(&sdp->delay_work);
+			flush_work(&sdp->work);
 		}
+	}
 	if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
 	    WARN_ON(srcu_readers_active(ssp))) {
 		pr_info("%s: Active srcu_struct %p state: %d\n",
@@ -463,39 +471,23 @@ static void srcu_gp_start(struct srcu_struct *ssp)
 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
 }
 
-/*
- * Track online CPUs to guide callback workqueue placement.
- */
-DEFINE_PER_CPU(bool, srcu_online);
 
-void srcu_online_cpu(unsigned int cpu)
+static void srcu_delay_timer(struct timer_list *t)
 {
-	WRITE_ONCE(per_cpu(srcu_online, cpu), true);
-}
+	struct srcu_data *sdp = container_of(t, struct srcu_data, delay_work);
 
-void srcu_offline_cpu(unsigned int cpu)
-{
-	WRITE_ONCE(per_cpu(srcu_online, cpu), false);
+	queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
 }
 
-/*
- * Place the workqueue handler on the specified CPU if online, otherwise
- * just run it whereever.  This is useful for placing workqueue handlers
- * that are to invoke the specified CPU's callbacks.
- */
-static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-				       struct delayed_work *dwork,
+static void srcu_queue_delayed_work_on(struct srcu_data *sdp,
 				       unsigned long delay)
 {
-	bool ret;
+	if (!delay) {
+		queue_work_on(sdp->cpu, rcu_gp_wq, &sdp->work);
+		return;
+	}
 
-	preempt_disable();
-	if (READ_ONCE(per_cpu(srcu_online, cpu)))
-		ret = queue_delayed_work_on(cpu, wq, dwork, delay);
-	else
-		ret = queue_delayed_work(wq, dwork, delay);
-	preempt_enable();
-	return ret;
+	timer_reduce(&sdp->delay_work, jiffies + delay);
 }
 
 /*
@@ -504,7 +496,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
  */
 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
 {
-	srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay);
+	srcu_queue_delayed_work_on(sdp, delay);
 }
 
 /*
@@ -1186,7 +1178,8 @@ static void srcu_invoke_callbacks(struct work_struct *work)
 	struct srcu_data *sdp;
 	struct srcu_struct *ssp;
 
-	sdp = container_of(work, struct srcu_data, work.work);
+	sdp = container_of(work, struct srcu_data, work);
+
 	ssp = sdp->ssp;
 	rcu_cblist_init(&ready_cbs);
 	spin_lock_irq_rcu_node(sdp);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1c4add096078..127255795859 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3408,8 +3408,6 @@ int rcutree_online_cpu(unsigned int cpu)
 	raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	rnp->ffmask |= rdp->grpmask;
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-	if (IS_ENABLED(CONFIG_TREE_SRCU))
-		srcu_online_cpu(cpu);
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return 0; /* Too early in boot for scheduler work. */
 	sync_sched_exp_online_cleanup(cpu);
@@ -3434,8 +3432,6 @@ int rcutree_offline_cpu(unsigned int cpu)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
 	rcutree_affinity_setting(cpu, cpu);
-	if (IS_ENABLED(CONFIG_TREE_SRCU))
-		srcu_offline_cpu(cpu);
 	return 0;
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 149557b7c39c..4bba017c703c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -458,11 +458,3 @@ static void rcu_bind_gp_kthread(void);
 static bool rcu_nohz_full_cpu(void);
 static void rcu_dynticks_task_enter(void);
 static void rcu_dynticks_task_exit(void);
-
-#ifdef CONFIG_SRCU
-void srcu_online_cpu(unsigned int cpu);
-void srcu_offline_cpu(unsigned int cpu);
-#else /* #ifdef CONFIG_SRCU */
-void srcu_online_cpu(unsigned int cpu) { }
-void srcu_offline_cpu(unsigned int cpu) { }
-#endif /* #else #ifdef CONFIG_SRCU */
-- 
cgit v1.2.3


From 3a6cb58f159e64241b2af9374acad41a70939349 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 10 Dec 2018 09:44:52 -0800
Subject: rcutorture: Add grace period after CPU offline

Beyond a certain point in the CPU-hotplug offline process, timers get
stranded on the outgoing CPU, and won't fire until that CPU comes back
online, which might well be never.  This commit therefore adds a hook
in torture_onoff_init() that is invoked from torture_offline(), which
rcutorture uses to occasionally wait for a grace period.  This should
result in failures for RCU implementations that rely on stranded timers
eventually firing in the absence of the CPU coming back online.

Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/torture.h      |  3 ++-
 kernel/locking/locktorture.c |  2 +-
 kernel/rcu/rcutorture.c      | 11 ++++++++++-
 kernel/torture.c             |  6 +++++-
 4 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 48fad21109fc..f2d3bcbf4337 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -50,11 +50,12 @@
 	do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0)
 
 /* Definitions for online/offline exerciser. */
+typedef void torture_ofl_func(void);
 bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes,
 		     unsigned long *sum_offl, int *min_onl, int *max_onl);
 bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
 		    unsigned long *sum_onl, int *min_onl, int *max_onl);
-int torture_onoff_init(long ooholdoff, long oointerval);
+int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f);
 void torture_onoff_stats(void);
 bool torture_onoff_failures(void);
 
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 7d0b0ed74404..c8b348097bb5 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -970,7 +970,7 @@ static int __init lock_torture_init(void)
 	/* Prepare torture context. */
 	if (onoff_interval > 0) {
 		firsterr = torture_onoff_init(onoff_holdoff * HZ,
-					      onoff_interval * HZ);
+					      onoff_interval * HZ, NULL);
 		if (firsterr)
 			goto unwind;
 	}
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 0955f3a20952..9eb9235c1ec9 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2243,6 +2243,14 @@ static void rcu_test_debug_objects(void)
 #endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 }
 
+static void rcutorture_sync(void)
+{
+	static unsigned long n;
+
+	if (cur_ops->sync && !(++n & 0xfff))
+		cur_ops->sync();
+}
+
 static int __init
 rcu_torture_init(void)
 {
@@ -2404,7 +2412,8 @@ rcu_torture_init(void)
 	firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
 	if (firsterr)
 		goto unwind;
-	firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval);
+	firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval,
+				      rcutorture_sync);
 	if (firsterr)
 		goto unwind;
 	firsterr = rcu_torture_stall_init();
diff --git a/kernel/torture.c b/kernel/torture.c
index bbf6d473e50c..a03ff722352b 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -75,6 +75,7 @@ static DEFINE_MUTEX(fullstop_mutex);
 static struct task_struct *onoff_task;
 static long onoff_holdoff;
 static long onoff_interval;
+static torture_ofl_func *onoff_f;
 static long n_offline_attempts;
 static long n_offline_successes;
 static unsigned long sum_offline;
@@ -118,6 +119,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
 			pr_alert("%s" TORTURE_FLAG
 				 "torture_onoff task: offlined %d\n",
 				 torture_type, cpu);
+		if (onoff_f)
+			onoff_f();
 		(*n_offl_successes)++;
 		delta = jiffies - starttime;
 		*sum_offl += delta;
@@ -243,11 +246,12 @@ stop:
 /*
  * Initiate online-offline handling.
  */
-int torture_onoff_init(long ooholdoff, long oointerval)
+int torture_onoff_init(long ooholdoff, long oointerval, torture_ofl_func *f)
 {
 #ifdef CONFIG_HOTPLUG_CPU
 	onoff_holdoff = ooholdoff;
 	onoff_interval = oointerval;
+	onoff_f = f;
 	if (onoff_interval <= 0)
 		return 0;
 	return torture_create_kthread(torture_onoff, NULL, onoff_task);
-- 
cgit v1.2.3


From c23f35d19db3b36ffb9e04b08f1d91565d15f84f Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Tue, 22 Jan 2019 10:02:50 -0800
Subject: net: IP defrag: encapsulate rbtree defrag code into callable
 functions

This is a refactoring patch: without changing runtime behavior,
it moves rbtree-related code from IPv4-specific files/functions
into .h/.c defrag files shared with IPv6 defragmentation code.

Signed-off-by: Peter Oskolkov <posk@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h  |  16 ++-
 net/ipv4/inet_fragment.c | 293 +++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/ip_fragment.c   | 289 +++++-----------------------------------------
 3 files changed, 334 insertions(+), 264 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 1662cbc0b46b..b02bf737d019 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -77,8 +77,8 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;  /* Used in IPv6. */
-	struct rb_root		rb_fragments; /* Used in IPv4. */
+	struct sk_buff		*fragments;  /* used in 6lopwpan IPv6. */
+	struct rb_root		rb_fragments; /* Used in IPv4/IPv6. */
 	struct sk_buff		*fragments_tail;
 	struct sk_buff		*last_run_head;
 	ktime_t			stamp;
@@ -153,4 +153,16 @@ static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
 
 extern const u8 ip_frag_ecn_table[16];
 
+/* Return values of inet_frag_queue_insert() */
+#define IPFRAG_OK	0
+#define IPFRAG_DUP	1
+#define IPFRAG_OVERLAP	2
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+			   int offset, int end);
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+			      struct sk_buff *parent);
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+			    void *reasm_data);
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
+
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 760a9e52e02b..9f69411251d0 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,62 @@
 #include <net/sock.h>
 #include <net/inet_frag.h>
 #include <net/inet_ecn.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+
+/* Use skb->cb to track consecutive/adjacent fragments coming at
+ * the end of the queue. Nodes in the rb-tree queue will
+ * contain "runs" of one or more adjacent fragments.
+ *
+ * Invariants:
+ * - next_frag is NULL at the tail of a "run";
+ * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
+ */
+struct ipfrag_skb_cb {
+	union {
+		struct inet_skb_parm	h4;
+		struct inet6_skb_parm	h6;
+	};
+	struct sk_buff		*next_frag;
+	int			frag_run_len;
+};
+
+#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
+
+static void fragcb_clear(struct sk_buff *skb)
+{
+	RB_CLEAR_NODE(&skb->rbnode);
+	FRAG_CB(skb)->next_frag = NULL;
+	FRAG_CB(skb)->frag_run_len = skb->len;
+}
+
+/* Append skb to the last "run". */
+static void fragrun_append_to_last(struct inet_frag_queue *q,
+				   struct sk_buff *skb)
+{
+	fragcb_clear(skb);
+
+	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
+	FRAG_CB(q->fragments_tail)->next_frag = skb;
+	q->fragments_tail = skb;
+}
+
+/* Create a new "run" with the skb. */
+static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
+	fragcb_clear(skb);
+
+	if (q->last_run_head)
+		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
+			     &q->last_run_head->rbnode.rb_right);
+	else
+		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
+	rb_insert_color(&skb->rbnode, &q->rb_fragments);
+
+	q->fragments_tail = skb;
+	q->last_run_head = skb;
+}
 
 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
  * Value : 0xff if frame should be dropped.
@@ -123,6 +179,28 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
 	kmem_cache_free(f->frags_cachep, q);
 }
 
+unsigned int inet_frag_rbtree_purge(struct rb_root *root)
+{
+	struct rb_node *p = rb_first(root);
+	unsigned int sum = 0;
+
+	while (p) {
+		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
+
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, root);
+		while (skb) {
+			struct sk_buff *next = FRAG_CB(skb)->next_frag;
+
+			sum += skb->truesize;
+			kfree_skb(skb);
+			skb = next;
+		}
+	}
+	return sum;
+}
+EXPORT_SYMBOL(inet_frag_rbtree_purge);
+
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
 	struct sk_buff *fp;
@@ -224,3 +302,218 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
 	return fq;
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
+			   int offset, int end)
+{
+	struct sk_buff *last = q->fragments_tail;
+
+	/* RFC5722, Section 4, amended by Errata ID : 3089
+	 *                          When reassembling an IPv6 datagram, if
+	 *   one or more its constituent fragments is determined to be an
+	 *   overlapping fragment, the entire datagram (and any constituent
+	 *   fragments) MUST be silently discarded.
+	 *
+	 * Duplicates, however, should be ignored (i.e. skb dropped, but the
+	 * queue/fragments kept for later reassembly).
+	 */
+	if (!last)
+		fragrun_create(q, skb);  /* First fragment. */
+	else if (last->ip_defrag_offset + last->len < end) {
+		/* This is the common case: skb goes to the end. */
+		/* Detect and discard overlaps. */
+		if (offset < last->ip_defrag_offset + last->len)
+			return IPFRAG_OVERLAP;
+		if (offset == last->ip_defrag_offset + last->len)
+			fragrun_append_to_last(q, skb);
+		else
+			fragrun_create(q, skb);
+	} else {
+		/* Binary search. Note that skb can become the first fragment,
+		 * but not the last (covered above).
+		 */
+		struct rb_node **rbn, *parent;
+
+		rbn = &q->rb_fragments.rb_node;
+		do {
+			struct sk_buff *curr;
+			int curr_run_end;
+
+			parent = *rbn;
+			curr = rb_to_skb(parent);
+			curr_run_end = curr->ip_defrag_offset +
+					FRAG_CB(curr)->frag_run_len;
+			if (end <= curr->ip_defrag_offset)
+				rbn = &parent->rb_left;
+			else if (offset >= curr_run_end)
+				rbn = &parent->rb_right;
+			else if (offset >= curr->ip_defrag_offset &&
+				 end <= curr_run_end)
+				return IPFRAG_DUP;
+			else
+				return IPFRAG_OVERLAP;
+		} while (*rbn);
+		/* Here we have parent properly set, and rbn pointing to
+		 * one of its NULL left/right children. Insert skb.
+		 */
+		fragcb_clear(skb);
+		rb_link_node(&skb->rbnode, parent, rbn);
+		rb_insert_color(&skb->rbnode, &q->rb_fragments);
+	}
+
+	skb->ip_defrag_offset = offset;
+
+	return IPFRAG_OK;
+}
+EXPORT_SYMBOL(inet_frag_queue_insert);
+
+void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
+			      struct sk_buff *parent)
+{
+	struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments);
+	struct sk_buff **nextp;
+	int delta;
+
+	if (head != skb) {
+		fp = skb_clone(skb, GFP_ATOMIC);
+		if (!fp)
+			return NULL;
+		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
+		if (RB_EMPTY_NODE(&skb->rbnode))
+			FRAG_CB(parent)->next_frag = fp;
+		else
+			rb_replace_node(&skb->rbnode, &fp->rbnode,
+					&q->rb_fragments);
+		if (q->fragments_tail == skb)
+			q->fragments_tail = fp;
+		skb_morph(skb, head);
+		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&q->rb_fragments);
+		consume_skb(head);
+		head = skb;
+	}
+	WARN_ON(head->ip_defrag_offset != 0);
+
+	delta = -head->truesize;
+
+	/* Head of list must not be cloned. */
+	if (skb_unclone(head, GFP_ATOMIC))
+		return NULL;
+
+	delta += head->truesize;
+	if (delta)
+		add_frag_mem_limit(q->net, delta);
+
+	/* If the first fragment is fragmented itself, we split
+	 * it to two chunks: the first with data and paged part
+	 * and the second, holding only fragments.
+	 */
+	if (skb_has_frag_list(head)) {
+		struct sk_buff *clone;
+		int i, plen = 0;
+
+		clone = alloc_skb(0, GFP_ATOMIC);
+		if (!clone)
+			return NULL;
+		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
+		skb_frag_list_init(head);
+		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
+			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
+		clone->data_len = head->data_len - plen;
+		clone->len = clone->data_len;
+		head->truesize += clone->truesize;
+		clone->csum = 0;
+		clone->ip_summed = head->ip_summed;
+		add_frag_mem_limit(q->net, clone->truesize);
+		skb_shinfo(head)->frag_list = clone;
+		nextp = &clone->next;
+	} else {
+		nextp = &skb_shinfo(head)->frag_list;
+	}
+
+	return nextp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_prepare);
+
+void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
+			    void *reasm_data)
+{
+	struct sk_buff **nextp = (struct sk_buff **)reasm_data;
+	struct rb_node *rbn;
+	struct sk_buff *fp;
+
+	skb_push(head, head->data - skb_network_header(head));
+
+	/* Traverse the tree in order, to build frag_list. */
+	fp = FRAG_CB(head)->next_frag;
+	rbn = rb_next(&head->rbnode);
+	rb_erase(&head->rbnode, &q->rb_fragments);
+	while (rbn || fp) {
+		/* fp points to the next sk_buff in the current run;
+		 * rbn points to the next run.
+		 */
+		/* Go through the current run. */
+		while (fp) {
+			*nextp = fp;
+			nextp = &fp->next;
+			fp->prev = NULL;
+			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+			fp->sk = NULL;
+			head->data_len += fp->len;
+			head->len += fp->len;
+			if (head->ip_summed != fp->ip_summed)
+				head->ip_summed = CHECKSUM_NONE;
+			else if (head->ip_summed == CHECKSUM_COMPLETE)
+				head->csum = csum_add(head->csum, fp->csum);
+			head->truesize += fp->truesize;
+			fp = FRAG_CB(fp)->next_frag;
+		}
+		/* Move to the next run. */
+		if (rbn) {
+			struct rb_node *rbnext = rb_next(rbn);
+
+			fp = rb_to_skb(rbn);
+			rb_erase(rbn, &q->rb_fragments);
+			rbn = rbnext;
+		}
+	}
+	sub_frag_mem_limit(q->net, head->truesize);
+
+	*nextp = NULL;
+	skb_mark_not_on_list(head);
+	head->prev = NULL;
+	head->tstamp = q->stamp;
+}
+EXPORT_SYMBOL(inet_frag_reasm_finish);
+
+struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
+{
+	struct sk_buff *head;
+
+	if (q->fragments) {
+		head = q->fragments;
+		q->fragments = head->next;
+	} else {
+		struct sk_buff *skb;
+
+		head = skb_rb_first(&q->rb_fragments);
+		if (!head)
+			return NULL;
+		skb = FRAG_CB(head)->next_frag;
+		if (skb)
+			rb_replace_node(&head->rbnode, &skb->rbnode,
+					&q->rb_fragments);
+		else
+			rb_erase(&head->rbnode, &q->rb_fragments);
+		memset(&head->rbnode, 0, sizeof(head->rbnode));
+		barrier();
+	}
+	if (head == q->fragments_tail)
+		q->fragments_tail = NULL;
+
+	sub_frag_mem_limit(q->net, head->truesize);
+
+	return head;
+}
+EXPORT_SYMBOL(inet_frag_pull_head);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 867be8f7f1fa..486ecb0aeb87 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -57,57 +57,6 @@
  */
 static const char ip_frag_cache_name[] = "ip4-frags";
 
-/* Use skb->cb to track consecutive/adjacent fragments coming at
- * the end of the queue. Nodes in the rb-tree queue will
- * contain "runs" of one or more adjacent fragments.
- *
- * Invariants:
- * - next_frag is NULL at the tail of a "run";
- * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
- */
-struct ipfrag_skb_cb {
-	struct inet_skb_parm	h;
-	struct sk_buff		*next_frag;
-	int			frag_run_len;
-};
-
-#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))
-
-static void ip4_frag_init_run(struct sk_buff *skb)
-{
-	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
-
-	FRAG_CB(skb)->next_frag = NULL;
-	FRAG_CB(skb)->frag_run_len = skb->len;
-}
-
-/* Append skb to the last "run". */
-static void ip4_frag_append_to_last_run(struct inet_frag_queue *q,
-					struct sk_buff *skb)
-{
-	RB_CLEAR_NODE(&skb->rbnode);
-	FRAG_CB(skb)->next_frag = NULL;
-
-	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
-	FRAG_CB(q->fragments_tail)->next_frag = skb;
-	q->fragments_tail = skb;
-}
-
-/* Create a new "run" with the skb. */
-static void ip4_frag_create_run(struct inet_frag_queue *q, struct sk_buff *skb)
-{
-	if (q->last_run_head)
-		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
-			     &q->last_run_head->rbnode.rb_right);
-	else
-		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
-	rb_insert_color(&skb->rbnode, &q->rb_fragments);
-
-	ip4_frag_init_run(skb);
-	q->fragments_tail = skb;
-	q->last_run_head = skb;
-}
-
 /* Describe an entry in the "incomplete datagrams" queue. */
 struct ipq {
 	struct inet_frag_queue q;
@@ -212,27 +161,9 @@ static void ip_expire(struct timer_list *t)
 	 * pull the head out of the tree in order to be able to
 	 * deal with head->dev.
 	 */
-	if (qp->q.fragments) {
-		head = qp->q.fragments;
-		qp->q.fragments = head->next;
-	} else {
-		head = skb_rb_first(&qp->q.rb_fragments);
-		if (!head)
-			goto out;
-		if (FRAG_CB(head)->next_frag)
-			rb_replace_node(&head->rbnode,
-					&FRAG_CB(head)->next_frag->rbnode,
-					&qp->q.rb_fragments);
-		else
-			rb_erase(&head->rbnode, &qp->q.rb_fragments);
-		memset(&head->rbnode, 0, sizeof(head->rbnode));
-		barrier();
-	}
-	if (head == qp->q.fragments_tail)
-		qp->q.fragments_tail = NULL;
-
-	sub_frag_mem_limit(qp->q.net, head->truesize);
-
+	head = inet_frag_pull_head(&qp->q);
+	if (!head)
+		goto out;
 	head->dev = dev_get_by_index_rcu(net, qp->iif);
 	if (!head->dev)
 		goto out;
@@ -344,12 +275,10 @@ static int ip_frag_reinit(struct ipq *qp)
 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
-	struct rb_node **rbn, *parent;
-	struct sk_buff *skb1, *prev_tail;
-	int ihl, end, skb1_run_end;
+	int ihl, end, flags, offset;
+	struct sk_buff *prev_tail;
 	struct net_device *dev;
 	unsigned int fragsize;
-	int flags, offset;
 	int err = -ENOENT;
 	u8 ecn;
 
@@ -413,62 +342,13 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	/* Makes sure compiler wont do silly aliasing games */
 	barrier();
 
-	/* RFC5722, Section 4, amended by Errata ID : 3089
-	 *                          When reassembling an IPv6 datagram, if
-	 *   one or more its constituent fragments is determined to be an
-	 *   overlapping fragment, the entire datagram (and any constituent
-	 *   fragments) MUST be silently discarded.
-	 *
-	 * We do the same here for IPv4 (and increment an snmp counter) but
-	 * we do not want to drop the whole queue in response to a duplicate
-	 * fragment.
-	 */
-
-	err = -EINVAL;
-	/* Find out where to put this fragment.  */
 	prev_tail = qp->q.fragments_tail;
-	if (!prev_tail)
-		ip4_frag_create_run(&qp->q, skb);  /* First fragment. */
-	else if (prev_tail->ip_defrag_offset + prev_tail->len < end) {
-		/* This is the common case: skb goes to the end. */
-		/* Detect and discard overlaps. */
-		if (offset < prev_tail->ip_defrag_offset + prev_tail->len)
-			goto overlap;
-		if (offset == prev_tail->ip_defrag_offset + prev_tail->len)
-			ip4_frag_append_to_last_run(&qp->q, skb);
-		else
-			ip4_frag_create_run(&qp->q, skb);
-	} else {
-		/* Binary search. Note that skb can become the first fragment,
-		 * but not the last (covered above).
-		 */
-		rbn = &qp->q.rb_fragments.rb_node;
-		do {
-			parent = *rbn;
-			skb1 = rb_to_skb(parent);
-			skb1_run_end = skb1->ip_defrag_offset +
-				       FRAG_CB(skb1)->frag_run_len;
-			if (end <= skb1->ip_defrag_offset)
-				rbn = &parent->rb_left;
-			else if (offset >= skb1_run_end)
-				rbn = &parent->rb_right;
-			else if (offset >= skb1->ip_defrag_offset &&
-				 end <= skb1_run_end)
-				goto err; /* No new data, potential duplicate */
-			else
-				goto overlap; /* Found an overlap */
-		} while (*rbn);
-		/* Here we have parent properly set, and rbn pointing to
-		 * one of its NULL left/right children. Insert skb.
-		 */
-		ip4_frag_init_run(skb);
-		rb_link_node(&skb->rbnode, parent, rbn);
-		rb_insert_color(&skb->rbnode, &qp->q.rb_fragments);
-	}
+	err = inet_frag_queue_insert(&qp->q, skb, offset, end);
+	if (err)
+		goto insert_error;
 
 	if (dev)
 		qp->iif = dev->ifindex;
-	skb->ip_defrag_offset = offset;
 
 	qp->q.stamp = skb->tstamp;
 	qp->q.meat += skb->len;
@@ -501,10 +381,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	skb_dst_drop(skb);
 	return -EINPROGRESS;
 
-overlap:
+insert_error:
+	if (err == IPFRAG_DUP) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	err = -EINVAL;
 	__IP_INC_STATS(net, IPSTATS_MIB_REASM_OVERLAPS);
 discard_qp:
 	inet_frag_kill(&qp->q);
+	__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
 err:
 	kfree_skb(skb);
 	return err;
@@ -516,13 +402,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 {
 	struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
 	struct iphdr *iph;
-	struct sk_buff *fp, *head = skb_rb_first(&qp->q.rb_fragments);
-	struct sk_buff **nextp; /* To build frag_list. */
-	struct rb_node *rbn;
-	int len;
-	int ihlen;
-	int delta;
-	int err;
+	void *reasm_data;
+	int len, err;
 	u8 ecn;
 
 	ipq_kill(qp);
@@ -532,117 +413,23 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 		err = -EINVAL;
 		goto out_fail;
 	}
-	/* Make the one we just received the head. */
-	if (head != skb) {
-		fp = skb_clone(skb, GFP_ATOMIC);
-		if (!fp)
-			goto out_nomem;
-		FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag;
-		if (RB_EMPTY_NODE(&skb->rbnode))
-			FRAG_CB(prev_tail)->next_frag = fp;
-		else
-			rb_replace_node(&skb->rbnode, &fp->rbnode,
-					&qp->q.rb_fragments);
-		if (qp->q.fragments_tail == skb)
-			qp->q.fragments_tail = fp;
-		skb_morph(skb, head);
-		FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
-		rb_replace_node(&head->rbnode, &skb->rbnode,
-				&qp->q.rb_fragments);
-		consume_skb(head);
-		head = skb;
-	}
 
-	WARN_ON(head->ip_defrag_offset != 0);
-
-	/* Allocate a new buffer for the datagram. */
-	ihlen = ip_hdrlen(head);
-	len = ihlen + qp->q.len;
+	/* Make the one we just received the head. */
+	reasm_data = inet_frag_reasm_prepare(&qp->q, skb, prev_tail);
+	if (!reasm_data)
+		goto out_nomem;
 
+	len = ip_hdrlen(skb) + qp->q.len;
 	err = -E2BIG;
 	if (len > 65535)
 		goto out_oversize;
 
-	delta = - head->truesize;
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		goto out_nomem;
-
-	delta += head->truesize;
-	if (delta)
-		add_frag_mem_limit(qp->q.net, delta);
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments. */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_nomem;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = clone->data_len = head->data_len - plen;
-		head->truesize += clone->truesize;
-		clone->csum = 0;
-		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(qp->q.net, clone->truesize);
-		skb_shinfo(head)->frag_list = clone;
-		nextp = &clone->next;
-	} else {
-		nextp = &skb_shinfo(head)->frag_list;
-	}
-
-	skb_push(head, head->data - skb_network_header(head));
+	inet_frag_reasm_finish(&qp->q, skb, reasm_data);
 
-	/* Traverse the tree in order, to build frag_list. */
-	fp = FRAG_CB(head)->next_frag;
-	rbn = rb_next(&head->rbnode);
-	rb_erase(&head->rbnode, &qp->q.rb_fragments);
-	while (rbn || fp) {
-		/* fp points to the next sk_buff in the current run;
-		 * rbn points to the next run.
-		 */
-		/* Go through the current run. */
-		while (fp) {
-			*nextp = fp;
-			nextp = &fp->next;
-			fp->prev = NULL;
-			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
-			fp->sk = NULL;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			if (head->ip_summed != fp->ip_summed)
-				head->ip_summed = CHECKSUM_NONE;
-			else if (head->ip_summed == CHECKSUM_COMPLETE)
-				head->csum = csum_add(head->csum, fp->csum);
-			head->truesize += fp->truesize;
-			fp = FRAG_CB(fp)->next_frag;
-		}
-		/* Move to the next run. */
-		if (rbn) {
-			struct rb_node *rbnext = rb_next(rbn);
-
-			fp = rb_to_skb(rbn);
-			rb_erase(rbn, &qp->q.rb_fragments);
-			rbn = rbnext;
-		}
-	}
-	sub_frag_mem_limit(qp->q.net, head->truesize);
-
-	*nextp = NULL;
-	skb_mark_not_on_list(head);
-	head->prev = NULL;
-	head->dev = dev;
-	head->tstamp = qp->q.stamp;
-	IPCB(head)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
+	skb->dev = dev;
+	IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
 
-	iph = ip_hdr(head);
+	iph = ip_hdr(skb);
 	iph->tot_len = htons(len);
 	iph->tos |= ecn;
 
@@ -655,7 +442,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	 * from one very small df-fragment and one large non-df frag.
 	 */
 	if (qp->max_df_size == qp->q.max_size) {
-		IPCB(head)->flags |= IPSKB_FRAG_PMTU;
+		IPCB(skb)->flags |= IPSKB_FRAG_PMTU;
 		iph->frag_off = htons(IP_DF);
 	} else {
 		iph->frag_off = 0;
@@ -753,28 +540,6 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
 }
 EXPORT_SYMBOL(ip_check_defrag);
 
-unsigned int inet_frag_rbtree_purge(struct rb_root *root)
-{
-	struct rb_node *p = rb_first(root);
-	unsigned int sum = 0;
-
-	while (p) {
-		struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
-
-		p = rb_next(p);
-		rb_erase(&skb->rbnode, root);
-		while (skb) {
-			struct sk_buff *next = FRAG_CB(skb)->next_frag;
-
-			sum += skb->truesize;
-			kfree_skb(skb);
-			skb = next;
-		}
-	}
-	return sum;
-}
-EXPORT_SYMBOL(inet_frag_rbtree_purge);
-
 #ifdef CONFIG_SYSCTL
 static int dist_min;
 
-- 
cgit v1.2.3


From d4289fcc9b16b89619ee1c54f829e05e56de8b9a Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Tue, 22 Jan 2019 10:02:51 -0800
Subject: net: IP6 defrag: use rbtrees for IPv6 defrag

Currently, IPv6 defragmentation code drops non-last fragments that
are smaller than 1280 bytes: see
commit 0ed4229b08c1 ("ipv6: defrag: drop non-last frags smaller than min mtu")

This behavior is not specified in IPv6 RFCs and appears to break
compatibility with some IPv6 implemenations, as reported here:
https://www.spinics.net/lists/netdev/msg543846.html

This patch re-uses common IP defragmentation queueing and reassembly
code in IPv6, removing the 1280 byte restriction.

Signed-off-by: Peter Oskolkov <posk@google.com>
Reported-by: Tom Herbert <tom@herbertland.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6_frag.h |  11 ++-
 net/ipv6/reassembly.c   | 233 +++++++++++++-----------------------------------
 2 files changed, 71 insertions(+), 173 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 6ced1e6899b6..28aa9b30aece 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -82,8 +82,15 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
 
 	/* Don't send error if the first segment did not arrive. */
-	head = fq->q.fragments;
-	if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+	if (!(fq->q.flags & INET_FRAG_FIRST_IN))
+		goto out;
+
+	/* sk_buff::dev and sk_buff::rbnode are unionized. So we
+	 * pull the head out of the tree in order to be able to
+	 * deal with head->dev.
+	 */
+	head = inet_frag_pull_head(&fq->q);
+	if (!head)
 		goto out;
 
 	head->dev = dev;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 36a3d8dc61f5..24264d0a4b85 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
 
 static struct inet_frags ip6_frags;
 
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
-			  struct net_device *dev);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			  struct sk_buff *prev_tail, struct net_device *dev);
 
 static void ip6_frag_expire(struct timer_list *t)
 {
@@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 			  struct frag_hdr *fhdr, int nhoff,
 			  u32 *prob_offset)
 {
-	struct sk_buff *prev, *next;
-	struct net_device *dev;
-	int offset, end, fragsize;
 	struct net *net = dev_net(skb_dst(skb)->dev);
+	int offset, end, fragsize;
+	struct sk_buff *prev_tail;
+	struct net_device *dev;
+	int err = -ENOENT;
 	u8 ecn;
 
 	if (fq->q.flags & INET_FRAG_COMPLETE)
 		goto err;
 
+	err = -EINVAL;
 	offset = ntohs(fhdr->frag_off) & ~0x7;
 	end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
 			((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
 
 	if ((unsigned int)end > IPV6_MAXPLEN) {
 		*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+		/* note that if prob_offset is set, the skb is freed elsewhere,
+		 * we do not free it here.
+		 */
 		return -1;
 	}
 
@@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	if (end == offset)
 		goto discard_fq;
 
+	err = -ENOMEM;
 	/* Point into the IP datagram 'data' part. */
 	if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
 		goto discard_fq;
 
-	if (pskb_trim_rcsum(skb, end - offset))
+	err = pskb_trim_rcsum(skb, end - offset);
+	if (err)
 		goto discard_fq;
 
-	/* Find out which fragments are in front and at the back of us
-	 * in the chain of fragments so far.  We must know where to put
-	 * this fragment, right?
-	 */
-	prev = fq->q.fragments_tail;
-	if (!prev || prev->ip_defrag_offset < offset) {
-		next = NULL;
-		goto found;
-	}
-	prev = NULL;
-	for (next = fq->q.fragments; next != NULL; next = next->next) {
-		if (next->ip_defrag_offset >= offset)
-			break;	/* bingo! */
-		prev = next;
-	}
-
-found:
-	/* RFC5722, Section 4, amended by Errata ID : 3089
-	 *                          When reassembling an IPv6 datagram, if
-	 *   one or more its constituent fragments is determined to be an
-	 *   overlapping fragment, the entire datagram (and any constituent
-	 *   fragments) MUST be silently discarded.
-	 */
-
-	/* Check for overlap with preceding fragment. */
-	if (prev &&
-	    (prev->ip_defrag_offset + prev->len) > offset)
-		goto discard_fq;
-
-	/* Look for overlap with succeeding segment. */
-	if (next && next->ip_defrag_offset < end)
-		goto discard_fq;
-
-	/* Note : skb->ip_defrag_offset and skb->sk share the same location */
+	/* Note : skb->rbnode and skb->dev share the same location. */
 	dev = skb->dev;
-	if (dev)
-		fq->iif = dev->ifindex;
 	/* Makes sure compiler wont do silly aliasing games */
 	barrier();
-	skb->ip_defrag_offset = offset;
 
-	/* Insert this fragment in the chain of fragments. */
-	skb->next = next;
-	if (!next)
-		fq->q.fragments_tail = skb;
-	if (prev)
-		prev->next = skb;
-	else
-		fq->q.fragments = skb;
+	prev_tail = fq->q.fragments_tail;
+	err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+	if (err)
+		goto insert_error;
+
+	if (dev)
+		fq->iif = dev->ifindex;
 
 	fq->q.stamp = skb->tstamp;
 	fq->q.meat += skb->len;
@@ -246,44 +216,48 @@ found:
 
 	if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
 	    fq->q.meat == fq->q.len) {
-		int res;
 		unsigned long orefdst = skb->_skb_refdst;
 
 		skb->_skb_refdst = 0UL;
-		res = ip6_frag_reasm(fq, prev, dev);
+		err = ip6_frag_reasm(fq, skb, prev_tail, dev);
 		skb->_skb_refdst = orefdst;
-		return res;
+		return err;
 	}
 
 	skb_dst_drop(skb);
-	return -1;
+	return -EINPROGRESS;
 
+insert_error:
+	if (err == IPFRAG_DUP) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+	err = -EINVAL;
+	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+			IPSTATS_MIB_REASM_OVERLAPS);
 discard_fq:
 	inet_frag_kill(&fq->q);
-err:
 	__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			IPSTATS_MIB_REASMFAILS);
+err:
 	kfree_skb(skb);
-	return -1;
+	return err;
 }
 
 /*
  *	Check if this packet is complete.
- *	Returns NULL on failure by any reason, and pointer
- *	to current nexthdr field in reassembled frame.
  *
  *	It is called with locked fq, and caller must check that
  *	queue is eligible for reassembly i.e. it is not COMPLETE,
  *	the last and the first frames arrived and all the bits are here.
  */
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
-			  struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+			  struct sk_buff *prev_tail, struct net_device *dev)
 {
 	struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
-	struct sk_buff *fp, *head = fq->q.fragments;
-	int    payload_len, delta;
 	unsigned int nhoff;
-	int sum_truesize;
+	void *reasm_data;
+	int payload_len;
 	u8 ecn;
 
 	inet_frag_kill(&fq->q);
@@ -292,121 +266,40 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	if (unlikely(ecn == 0xff))
 		goto out_fail;
 
-	/* Make the one we just received the head. */
-	if (prev) {
-		head = prev->next;
-		fp = skb_clone(head, GFP_ATOMIC);
-
-		if (!fp)
-			goto out_oom;
-
-		fp->next = head->next;
-		if (!fp->next)
-			fq->q.fragments_tail = fp;
-		prev->next = fp;
-
-		skb_morph(head, fq->q.fragments);
-		head->next = fq->q.fragments->next;
-
-		consume_skb(fq->q.fragments);
-		fq->q.fragments = head;
-	}
-
-	WARN_ON(head == NULL);
-	WARN_ON(head->ip_defrag_offset != 0);
+	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+	if (!reasm_data)
+		goto out_oom;
 
-	/* Unfragmented part is taken from the first segment. */
-	payload_len = ((head->data - skb_network_header(head)) -
+	payload_len = ((skb->data - skb_network_header(skb)) -
 		       sizeof(struct ipv6hdr) + fq->q.len -
 		       sizeof(struct frag_hdr));
 	if (payload_len > IPV6_MAXPLEN)
 		goto out_oversize;
 
-	delta = - head->truesize;
-
-	/* Head of list must not be cloned. */
-	if (skb_unclone(head, GFP_ATOMIC))
-		goto out_oom;
-
-	delta += head->truesize;
-	if (delta)
-		add_frag_mem_limit(fq->q.net, delta);
-
-	/* If the first fragment is fragmented itself, we split
-	 * it to two chunks: the first with data and paged part
-	 * and the second, holding only fragments. */
-	if (skb_has_frag_list(head)) {
-		struct sk_buff *clone;
-		int i, plen = 0;
-
-		clone = alloc_skb(0, GFP_ATOMIC);
-		if (!clone)
-			goto out_oom;
-		clone->next = head->next;
-		head->next = clone;
-		skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
-		skb_frag_list_init(head);
-		for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
-			plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
-		clone->len = clone->data_len = head->data_len - plen;
-		head->data_len -= clone->len;
-		head->len -= clone->len;
-		clone->csum = 0;
-		clone->ip_summed = head->ip_summed;
-		add_frag_mem_limit(fq->q.net, clone->truesize);
-	}
-
 	/* We have to remove fragment header from datagram and to relocate
 	 * header in order to calculate ICV correctly. */
 	nhoff = fq->nhoffset;
-	skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
-	memmove(head->head + sizeof(struct frag_hdr), head->head,
-		(head->data - head->head) - sizeof(struct frag_hdr));
-	if (skb_mac_header_was_set(head))
-		head->mac_header += sizeof(struct frag_hdr);
-	head->network_header += sizeof(struct frag_hdr);
-
-	skb_reset_transport_header(head);
-	skb_push(head, head->data - skb_network_header(head));
-
-	sum_truesize = head->truesize;
-	for (fp = head->next; fp;) {
-		bool headstolen;
-		int delta;
-		struct sk_buff *next = fp->next;
-
-		sum_truesize += fp->truesize;
-		if (head->ip_summed != fp->ip_summed)
-			head->ip_summed = CHECKSUM_NONE;
-		else if (head->ip_summed == CHECKSUM_COMPLETE)
-			head->csum = csum_add(head->csum, fp->csum);
-
-		if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
-			kfree_skb_partial(fp, headstolen);
-		} else {
-			fp->sk = NULL;
-			if (!skb_shinfo(head)->frag_list)
-				skb_shinfo(head)->frag_list = fp;
-			head->data_len += fp->len;
-			head->len += fp->len;
-			head->truesize += fp->truesize;
-		}
-		fp = next;
-	}
-	sub_frag_mem_limit(fq->q.net, sum_truesize);
+	skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+	memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+		(skb->data - skb->head) - sizeof(struct frag_hdr));
+	if (skb_mac_header_was_set(skb))
+		skb->mac_header += sizeof(struct frag_hdr);
+	skb->network_header += sizeof(struct frag_hdr);
+
+	skb_reset_transport_header(skb);
+
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
 
-	skb_mark_not_on_list(head);
-	head->dev = dev;
-	head->tstamp = fq->q.stamp;
-	ipv6_hdr(head)->payload_len = htons(payload_len);
-	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
-	IP6CB(head)->nhoff = nhoff;
-	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
-	IP6CB(head)->frag_max_size = fq->q.max_size;
+	skb->dev = dev;
+	ipv6_hdr(skb)->payload_len = htons(payload_len);
+	ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+	IP6CB(skb)->nhoff = nhoff;
+	IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+	IP6CB(skb)->frag_max_size = fq->q.max_size;
 
 	/* Yes, and fold redundant checksum back. 8) */
-	skb_postpush_rcsum(head, skb_network_header(head),
-			   skb_network_header_len(head));
+	skb_postpush_rcsum(skb, skb_network_header(skb),
+			   skb_network_header_len(skb));
 
 	rcu_read_lock();
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
@@ -414,6 +307,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
+	fq->q.last_run_head = NULL;
 	return 1;
 
 out_oversize:
@@ -464,10 +358,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		return 1;
 	}
 
-	if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
-	    fhdr->frag_off & htons(IP6_MF))
-		goto fail_hdr;
-
 	iif = skb->dev ? skb->dev->ifindex : 0;
 	fq = fq_find(net, fhdr->identification, hdr, iif);
 	if (fq) {
@@ -485,6 +375,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 		if (prob_offset) {
 			__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
 					IPSTATS_MIB_INHDRERRORS);
+			/* icmpv6_param_prob() calls kfree_skb(skb) */
 			icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
 		}
 		return ret;
-- 
cgit v1.2.3


From f859a448470304135f7a1af0083b99e188873bb4 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 25 Jan 2019 11:17:23 -0500
Subject: tcp: allow zerocopy with fastopen

Accept MSG_ZEROCOPY in all the TCP states that allow sendmsg. Remove
the explicit check for ESTABLISHED and CLOSE_WAIT states.

This requires correctly handling zerocopy state (uarg, sk_zckey) in
all paths reachable from other TCP states. Such as the EPIPE case
in sk_stream_wait_connect, which a sendmsg() in incorrect state will
now hit. Most paths are already safe.

Only extension needed is for TCP Fastopen active open. This can build
an skb with data in tcp_send_syn_data. Pass the uarg along with other
fastopen state, so that this skb also generates a zerocopy
notification on release.

Tested with active and passive tcp fastopen packetdrill scripts at
https://github.com/wdebruij/packetdrill/commit/1747eef03d25a2404e8132817d0f1244fd6f129d

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 +
 net/ipv4/tcp.c        | 11 ++++-------
 net/ipv4/tcp_output.c |  1 +
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5c950180d61b..a6e0355921e1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1608,6 +1608,7 @@ struct tcp_fastopen_request {
 	struct msghdr			*data;  /* data in MSG_FASTOPEN */
 	size_t				size;
 	int				copied;	/* queued in tcp_connect() */
+	struct ubuf_info		*uarg;
 };
 void tcp_free_fastopen_req(struct tcp_sock *tp);
 void tcp_fastopen_destroy_cipher(struct sock *sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5f099c9d04e5..12ba21433dd0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1127,7 +1127,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
 }
 
 static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
-				int *copied, size_t size)
+				int *copied, size_t size,
+				struct ubuf_info *uarg)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_sock *inet = inet_sk(sk);
@@ -1147,6 +1148,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 		return -ENOBUFS;
 	tp->fastopen_req->data = msg;
 	tp->fastopen_req->size = size;
+	tp->fastopen_req->uarg = uarg;
 
 	if (inet->defer_connect) {
 		err = tcp_connect(sk);
@@ -1186,11 +1188,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 	flags = msg->msg_flags;
 
 	if (flags & MSG_ZEROCOPY && size && sock_flag(sk, SOCK_ZEROCOPY)) {
-		if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
-			err = -EINVAL;
-			goto out_err;
-		}
-
 		skb = tcp_write_queue_tail(sk);
 		uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
 		if (!uarg) {
@@ -1205,7 +1202,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
 
 	if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
 	    !tp->repair) {
-		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
+		err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size, uarg);
 		if (err == -EINPROGRESS && copied_syn > 0)
 			goto out;
 		else if (err)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6527f61f59ff..26a2948dca95 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3455,6 +3455,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 			skb_trim(syn_data, copied);
 			space = copied;
 		}
+		skb_zcopy_set(syn_data, fo->uarg, NULL);
 	}
 	/* No more data pending in inet_wait_for_connect() */
 	if (space == fo->size)
-- 
cgit v1.2.3


From c8b34e680a090ab8dc42f8331020e31052b49a70 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Tue, 22 Jan 2019 18:39:50 +0800
Subject: ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit

Add tnl_update_pmtu in ip_md_tunnel_xmit to dynamic modify
the pmtu which packet send through collect_metadata mode
ip tunnel

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h |  2 +-
 net/ipv4/ip_tunnel.c     | 37 +++++++++++++++++++++++++++----------
 net/ipv4/ipip.c          |  2 +-
 3 files changed, 29 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 34f019650941..f069f64ebf29 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -267,7 +267,7 @@ void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
-		       const u8 proto);
+		       const u8 proto, int tunnel_hlen);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f98fc4c983a5..6233a7c434b1 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -501,15 +501,19 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
 
 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 			    struct rtable *rt, __be16 df,
-			    const struct iphdr *inner_iph)
+			    const struct iphdr *inner_iph,
+			    int tunnel_hlen, __be32 dst, bool md)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
-	int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+	int pkt_size;
 	int mtu;
 
+	tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
+	pkt_size = skb->len - tunnel_hlen - dev->hard_header_len;
+
 	if (df)
 		mtu = dst_mtu(&rt->dst) - dev->hard_header_len
-					- sizeof(struct iphdr) - tunnel->hlen;
+					- sizeof(struct iphdr) - tunnel_hlen;
 	else
 		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
@@ -527,11 +531,13 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+		__be32 daddr;
+
+		daddr = md ? dst : tunnel->parms.iph.daddr;
 
 		if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
 			   mtu >= IPV6_MIN_MTU) {
-			if ((tunnel->parms.iph.daddr &&
-			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+			if ((daddr && !ipv4_is_multicast(daddr)) ||
 			    rt6->rt6i_dst.plen == 128) {
 				rt6->rt6i_flags |= RTF_MODIFIED;
 				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
@@ -548,7 +554,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 	return 0;
 }
 
-void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		       u8 proto, int tunnel_hlen)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	u32 headroom = sizeof(struct iphdr);
@@ -598,6 +605,15 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		dev->stats.collisions++;
 		goto tx_error;
 	}
+
+	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		df = htons(IP_DF);
+	if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
+			    key->u.ipv4.dst, true)) {
+		ip_rt_put(rt);
+		goto tx_error;
+	}
+
 	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
 	ttl = key->ttl;
 	if (ttl == 0) {
@@ -608,10 +624,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
 		else
 			ttl = ip4_dst_hoplimit(&rt->dst);
 	}
-	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
-		df = htons(IP_DF);
-	else if (skb->protocol == htons(ETH_P_IP))
+
+	if (!df && skb->protocol == htons(ETH_P_IP))
 		df = inner_iph->frag_off & htons(IP_DF);
+
 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
 	if (headroom > dev->needed_headroom)
 		dev->needed_headroom = headroom;
@@ -741,7 +757,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		goto tx_error;
 	}
 
-	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph,
+			    0, 0, false)) {
 		ip_rt_put(rt);
 		goto tx_error;
 	}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 57c5dd283a2c..fe10b9a2efc8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -302,7 +302,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
 	skb_set_inner_ipproto(skb, ipproto);
 
 	if (tunnel->collect_md)
-		ip_md_tunnel_xmit(skb, dev, ipproto);
+		ip_md_tunnel_xmit(skb, dev, ipproto, 0);
 	else
 		ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
-- 
cgit v1.2.3


From 9b28aa1d0eae1be1016c8f4ba504545caff01da3 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Wed, 12 Dec 2018 23:59:13 +0000
Subject: platform_data/mlxreg: Document fixes for core platform data

Remove "led" from the description, since the structure
"mlxreg_core_platform_data" is used not only for led data.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 include/linux/platform_data/mlxreg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 19f5cb618c55..d823713f94ec 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -107,9 +107,9 @@ struct mlxreg_core_item {
 /**
  * struct mlxreg_core_platform_data - platform data:
  *
- * @led_data: led private data;
+ * @data: instance private data;
  * @regmap: register map of parent device;
- * @counter: number of led instances;
+ * @counter: number of instances;
  */
 struct mlxreg_core_platform_data {
 	struct mlxreg_core_data *data;
-- 
cgit v1.2.3


From 946e4e02b11889cb161b15ff4712a8ba21a50eb6 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@mellanox.com>
Date: Wed, 12 Dec 2018 23:59:14 +0000
Subject: platform_data/mlxreg: Add capability field to core platform data

Add capability field to "mlxreg_core_platform_data" structure.
The purpose of this register is to provide additional info to platform
driver through the atribute related capability register.

Signed-off-by: Vadim Pasternak <vadimp@mellanox.com>
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 include/linux/platform_data/mlxreg.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index d823713f94ec..1b2f86f96743 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -61,6 +61,7 @@ struct mlxreg_hotplug_device {
  * @reg: attribute register;
  * @mask: attribute access mask;
  * @bit: attribute effective bit;
+ * @capability: attribute capability register;
  * @mode: access mode;
  * @np - pointer to node platform associated with attribute;
  * @hpdev - hotplug device data;
@@ -72,6 +73,7 @@ struct mlxreg_core_data {
 	u32 reg;
 	u32 mask;
 	u32 bit;
+	u32 capability;
 	umode_t	mode;
 	struct device_node *np;
 	struct mlxreg_hotplug_device hpdev;
-- 
cgit v1.2.3


From d405c7407a5468d4fc11724d76063e0647d80106 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Sat, 26 Jan 2019 12:25:59 -0500
Subject: bpf: allocate 0x06 to new eBPF instruction class JMP32

The new eBPF instruction class JMP32 uses the reserved class number 0x6.
Kernel BPF ISA documentation updated accordingly.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/networking/filter.txt | 15 ++++++++-------
 include/uapi/linux/bpf.h            |  1 +
 tools/include/uapi/linux/bpf.h      |  1 +
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 2196b824e96c..01603bc2eff1 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -865,7 +865,7 @@ Three LSB bits store instruction class which is one of:
   BPF_STX   0x03          BPF_STX   0x03
   BPF_ALU   0x04          BPF_ALU   0x04
   BPF_JMP   0x05          BPF_JMP   0x05
-  BPF_RET   0x06          [ class 6 unused, for future if needed ]
+  BPF_RET   0x06          BPF_JMP32 0x06
   BPF_MISC  0x07          BPF_ALU64 0x07
 
 When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
@@ -902,9 +902,9 @@ If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
   BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
   BPF_END   0xd0  /* eBPF only: endianness conversion */
 
-If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
+If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:
 
-  BPF_JA    0x00
+  BPF_JA    0x00  /* BPF_JMP only */
   BPF_JEQ   0x10
   BPF_JGT   0x20
   BPF_JGE   0x30
@@ -912,8 +912,8 @@ If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
   BPF_JNE   0x50  /* eBPF only: jump != */
   BPF_JSGT  0x60  /* eBPF only: signed '>' */
   BPF_JSGE  0x70  /* eBPF only: signed '>=' */
-  BPF_CALL  0x80  /* eBPF only: function call */
-  BPF_EXIT  0x90  /* eBPF only: function return */
+  BPF_CALL  0x80  /* eBPF BPF_JMP only: function call */
+  BPF_EXIT  0x90  /* eBPF BPF_JMP only: function return */
   BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
   BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
   BPF_JSLT  0xc0  /* eBPF only: signed '<' */
@@ -936,8 +936,9 @@ Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
 operation. Classic BPF_RET | BPF_K means copy imm32 into return register
 and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
 in eBPF means function exit only. The eBPF program needs to store return
-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
-unused and reserved for future use.
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
+BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
+operands for the comparisons instead.
 
 For load and store instructions the 8-bit 'code' field is divided as:
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2940a9854f6d..60b99b730a41 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -14,6 +14,7 @@
 /* Extended instruction set based on top of classic BPF */
 
 /* instruction classes */
+#define BPF_JMP32	0x06	/* jmp mode in word width */
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2940a9854f6d..60b99b730a41 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -14,6 +14,7 @@
 /* Extended instruction set based on top of classic BPF */
 
 /* instruction classes */
+#define BPF_JMP32	0x06	/* jmp mode in word width */
 #define BPF_ALU64	0x07	/* alu mode in double word width */
 
 /* ld/ldx fields */
-- 
cgit v1.2.3


From a7b76c8857692b0fce063b94ed83da11c396d341 Mon Sep 17 00:00:00 2001
From: Jiong Wang <jiong.wang@netronome.com>
Date: Sat, 26 Jan 2019 12:26:05 -0500
Subject: bpf: JIT blinds support JMP32

This patch adds JIT blinds support for JMP32.

Like BPF_JMP_REG/IMM, JMP32 version are needed for building raw bpf insn.
They are added to both include/linux/filter.h and
tools/include/linux/filter.h.

Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h       | 20 ++++++++++++++++++++
 kernel/bpf/core.c            | 21 +++++++++++++++++++++
 tools/include/linux/filter.h | 20 ++++++++++++++++++++
 3 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index be9af6b4a9e4..e4b473f85b46 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -277,6 +277,26 @@ struct sock_reuseport;
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Unconditional jumps, goto pc + off16 */
 
 #define BPF_JMP_A(OFF)						\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bba11c2565ee..a7bcb23bee84 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -949,6 +949,27 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
 		*to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off);
 		break;
 
+	case BPF_JMP32 | BPF_JEQ  | BPF_K:
+	case BPF_JMP32 | BPF_JNE  | BPF_K:
+	case BPF_JMP32 | BPF_JGT  | BPF_K:
+	case BPF_JMP32 | BPF_JLT  | BPF_K:
+	case BPF_JMP32 | BPF_JGE  | BPF_K:
+	case BPF_JMP32 | BPF_JLE  | BPF_K:
+	case BPF_JMP32 | BPF_JSGT | BPF_K:
+	case BPF_JMP32 | BPF_JSLT | BPF_K:
+	case BPF_JMP32 | BPF_JSGE | BPF_K:
+	case BPF_JMP32 | BPF_JSLE | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		/* Accommodate for extra offset in case of a backjump. */
+		off = from->off;
+		if (off < 0)
+			off -= 2;
+		*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
+		*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+		*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
+				      off);
+		break;
+
 	case BPF_LD | BPF_IMM | BPF_DW:
 		*to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm);
 		*to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index af55acf73e75..cce0b02c0e28 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -199,6 +199,16 @@
 		.off   = OFF,					\
 		.imm   = 0 })
 
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_REG(OP, DST, SRC, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_X,	\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
 /* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
 #define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
@@ -209,6 +219,16 @@
 		.off   = OFF,					\
 		.imm   = IMM })
 
+/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF)			\
+	((struct bpf_insn) {					\
+		.code  = BPF_JMP32 | BPF_OP(OP) | BPF_K,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
 /* Unconditional jumps, goto pc + off16 */
 
 #define BPF_JMP_A(OFF)						\
-- 
cgit v1.2.3


From 8d5d0cfb63cbcb4005e19a332b31d687b1d01e58 Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Mon, 3 Dec 2018 09:56:23 +0000
Subject: sched/topology: Introduce a sysctl for Energy Aware Scheduling

In its current state, Energy Aware Scheduling (EAS) starts automatically
on asymmetric platforms having an Energy Model (EM). However, there are
users who want to have an EM (for thermal management for example), but
don't want EAS with it.

In order to let users disable EAS explicitly, introduce a new sysctl
called 'sched_energy_aware'. It is enabled by default so that EAS can
start automatically on platforms where it makes sense. Flipping it to 0
rebuilds the scheduling domains and disables EAS.

Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adharmap@codeaurora.org
Cc: chris.redpath@arm.com
Cc: currojerez@riseup.net
Cc: dietmar.eggemann@arm.com
Cc: edubezval@gmail.com
Cc: gregkh@linuxfoundation.org
Cc: javi.merino@kernel.org
Cc: joel@joelfernandes.org
Cc: juri.lelli@redhat.com
Cc: morten.rasmussen@arm.com
Cc: patrick.bellasi@arm.com
Cc: pkondeti@codeaurora.org
Cc: rjw@rjwysocki.net
Cc: skannan@codeaurora.org
Cc: smuckle@google.com
Cc: srinivas.pandruvada@linux.intel.com
Cc: thara.gopinath@linaro.org
Cc: tkjos@google.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Cc: viresh.kumar@linaro.org
Link: https://lkml.kernel.org/r/20181203095628.11858-11-quentin.perret@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/sysctl/kernel.txt | 12 ++++++++++++
 include/linux/sched/sysctl.h    |  7 +++++++
 kernel/sched/topology.c         | 29 +++++++++++++++++++++++++++++
 kernel/sysctl.c                 | 11 +++++++++++
 4 files changed, 59 insertions(+)

(limited to 'include')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index c0527d8a468a..379063e58326 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -79,6 +79,7 @@ show up in /proc/sys/kernel:
 - reboot-cmd                  [ SPARC only ]
 - rtsig-max
 - rtsig-nr
+- sched_energy_aware
 - seccomp/                    ==> Documentation/userspace-api/seccomp_filter.rst
 - sem
 - sem_next_id		      [ sysv ipc ]
@@ -890,6 +891,17 @@ rtsig-nr shows the number of RT signals currently queued.
 
 ==============================================================
 
+sched_energy_aware:
+
+Enables/disables Energy Aware Scheduling (EAS). EAS starts
+automatically on platforms where it can run (that is,
+platforms with asymmetric CPU topologies and having an Energy
+Model available). If your platform happens to meet the
+requirements for EAS but you do not want to use it, change
+this value to 0.
+
+==============================================================
+
 sched_schedstats:
 
 Enables/disables scheduler statistics. Enabling this feature
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index a9c32daeb9d8..99ce6d728df7 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -83,4 +83,11 @@ extern int sysctl_schedstats(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp,
 				 loff_t *ppos);
 
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+extern unsigned int sysctl_sched_energy_aware;
+extern int sched_energy_aware_handler(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
+#endif
+
 #endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 3f35ba1d8fde..50c3fc316c54 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -203,9 +203,35 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 
 DEFINE_STATIC_KEY_FALSE(sched_energy_present);
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+unsigned int sysctl_sched_energy_aware = 1;
 DEFINE_MUTEX(sched_energy_mutex);
 bool sched_energy_update;
 
+#ifdef CONFIG_PROC_SYSCTL
+int sched_energy_aware_handler(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, state;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	if (!ret && write) {
+		state = static_branch_unlikely(&sched_energy_present);
+		if (state != sysctl_sched_energy_aware) {
+			mutex_lock(&sched_energy_mutex);
+			sched_energy_update = 1;
+			rebuild_sched_domains();
+			sched_energy_update = 0;
+			mutex_unlock(&sched_energy_mutex);
+		}
+	}
+
+	return ret;
+}
+#endif
+
 static void free_pd(struct perf_domain *pd)
 {
 	struct perf_domain *tmp;
@@ -322,6 +348,9 @@ static bool build_perf_domains(const struct cpumask *cpu_map)
 	struct cpufreq_policy *policy;
 	struct cpufreq_governor *gov;
 
+	if (!sysctl_sched_energy_aware)
+		goto free;
+
 	/* EAS is enabled for asymmetric CPU capacity topologies. */
 	if (!per_cpu(sd_asym_cpucapacity, cpu)) {
 		if (sched_debug()) {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba4d9e85feb8..987ae08147bf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -467,6 +467,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 	},
 #endif
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+	{
+		.procname	= "sched_energy_aware",
+		.data		= &sysctl_sched_energy_aware,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_energy_aware_handler,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
-- 
cgit v1.2.3


From 31954cd8bb667030b1c0d3d77f28fe71f06999f9 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 25 Jan 2019 10:53:19 -0800
Subject: tcp: Refactor pingpong code

Instead of using pingpong as a single bit information, we refactor the
code to treat it as a counter. When interactive session is detected,
we set pingpong count to TCP_PINGPONG_THRESH. And when pingpong count
is >= TCP_PINGPONG_THRESH, we consider the session in pingpong mode.

This patch is a pure refactor and sets foundation for the next patch.
This patch itself does not change any pingpong logic.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 17 +++++++++++++++++
 net/dccp/input.c                   |  2 +-
 net/dccp/timer.c                   |  4 ++--
 net/ipv4/tcp.c                     | 10 +++++-----
 net/ipv4/tcp_input.c               |  8 ++++----
 net/ipv4/tcp_ipv4.c                |  2 +-
 net/ipv4/tcp_output.c              |  4 ++--
 net/ipv4/tcp_timer.c               |  4 ++--
 net/ipv6/tcp_ipv6.c                |  2 +-
 9 files changed, 35 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index fe0d9b44d6fc..179609d1d1ea 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -314,4 +314,21 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
 			       char __user *optval, unsigned int optlen);
 
 struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
+
+#define TCP_PINGPONG_THRESH	1
+
+static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
+{
+	inet_csk(sk)->icsk_ack.pingpong = TCP_PINGPONG_THRESH;
+}
+
+static inline void inet_csk_exit_pingpong_mode(struct sock *sk)
+{
+	inet_csk(sk)->icsk_ack.pingpong = 0;
+}
+
+static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
+{
+	return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 85d6c879383d..8d03707abdac 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -480,7 +480,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
 		}
 
-		if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+		if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) ||
 		    icsk->icsk_accept_queue.rskq_defer_accept) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 1501a20a94ca..74e138495d67 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -199,7 +199,7 @@ static void dccp_delack_timer(struct timer_list *t)
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
 	if (inet_csk_ack_scheduled(sk)) {
-		if (!icsk->icsk_ack.pingpong) {
+		if (!inet_csk_in_pingpong_mode(sk)) {
 			/* Delayed ACK missed: inflate ATO. */
 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
 						 icsk->icsk_rto);
@@ -207,7 +207,7 @@ static void dccp_delack_timer(struct timer_list *t)
 			/* Delayed ACK missed: leave pingpong mode and
 			 * deflate ATO.
 			 */
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			icsk->icsk_ack.ato = TCP_ATO_MIN;
 		}
 		dccp_send_ack(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 12ba21433dd0..6f8d292ad501 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1551,7 +1551,7 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied)
 		    (copied > 0 &&
 		     ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) ||
 		      ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) &&
-		       !icsk->icsk_ack.pingpong)) &&
+		       !inet_csk_in_pingpong_mode(sk))) &&
 		      !atomic_read(&sk->sk_rmem_alloc)))
 			time_to_ack = true;
 	}
@@ -2984,16 +2984,16 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 
 	case TCP_QUICKACK:
 		if (!val) {
-			icsk->icsk_ack.pingpong = 1;
+			inet_csk_enter_pingpong_mode(sk);
 		} else {
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			if ((1 << sk->sk_state) &
 			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
 			    inet_csk_ack_scheduled(sk)) {
 				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
 				tcp_cleanup_rbuf(sk, 1);
 				if (!(val & 1))
-					icsk->icsk_ack.pingpong = 1;
+					inet_csk_enter_pingpong_mode(sk);
 			}
 		}
 		break;
@@ -3407,7 +3407,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return 0;
 	}
 	case TCP_QUICKACK:
-		val = !icsk->icsk_ack.pingpong;
+		val = !inet_csk_in_pingpong_mode(sk);
 		break;
 
 	case TCP_CONGESTION:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 76858b14ebe9..7a027dec649b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -221,7 +221,7 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
 	tcp_incr_quickack(sk, max_quickacks);
-	icsk->icsk_ack.pingpong = 0;
+	inet_csk_exit_pingpong_mode(sk);
 	icsk->icsk_ack.ato = TCP_ATO_MIN;
 }
 EXPORT_SYMBOL(tcp_enter_quickack_mode);
@@ -236,7 +236,7 @@ static bool tcp_in_quickack_mode(struct sock *sk)
 	const struct dst_entry *dst = __sk_dst_get(sk);
 
 	return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
-		(icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
+		(icsk->icsk_ack.quick && !inet_csk_in_pingpong_mode(sk));
 }
 
 static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -4094,7 +4094,7 @@ void tcp_fin(struct sock *sk)
 	case TCP_ESTABLISHED:
 		/* Move to CLOSE_WAIT */
 		tcp_set_state(sk, TCP_CLOSE_WAIT);
-		inet_csk(sk)->icsk_ack.pingpong = 1;
+		inet_csk_enter_pingpong_mode(sk);
 		break;
 
 	case TCP_CLOSE_WAIT:
@@ -5889,7 +5889,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			return -1;
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
-		    icsk->icsk_ack.pingpong) {
+		    inet_csk_in_pingpong_mode(sk)) {
 			/* Save one ACK. Data will be ready after
 			 * several ticks, if write_pending is set.
 			 *
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index efc6fef692ff..662b034f1795 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2437,7 +2437,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 		refcount_read(&sk->sk_refcnt), sk,
 		jiffies_to_clock_t(icsk->icsk_rto),
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
-		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+		(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sk),
 		tp->snd_cwnd,
 		state == TCP_LISTEN ?
 		    fastopenq->max_qlen :
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 26a2948dca95..06228e2d010e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -171,7 +171,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 	 * packet, enter pingpong mode.
 	 */
 	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
-		icsk->icsk_ack.pingpong = 1;
+		inet_csk_enter_pingpong_mode(sk);
 }
 
 /* Account for an ACK we sent. */
@@ -3569,7 +3569,7 @@ void tcp_send_delayed_ack(struct sock *sk)
 		const struct tcp_sock *tp = tcp_sk(sk);
 		int max_ato = HZ / 2;
 
-		if (icsk->icsk_ack.pingpong ||
+		if (inet_csk_in_pingpong_mode(sk) ||
 		    (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
 			max_ato = TCP_DELACK_MAX;
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d7399a89469d..f0c86398e6a7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -277,14 +277,14 @@ void tcp_delack_timer_handler(struct sock *sk)
 	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
 
 	if (inet_csk_ack_scheduled(sk)) {
-		if (!icsk->icsk_ack.pingpong) {
+		if (!inet_csk_in_pingpong_mode(sk)) {
 			/* Delayed ACK missed: inflate ATO. */
 			icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
 		} else {
 			/* Delayed ACK missed: leave pingpong mode and
 			 * deflate ATO.
 			 */
-			icsk->icsk_ack.pingpong = 0;
+			inet_csk_exit_pingpong_mode(sk);
 			icsk->icsk_ack.ato      = TCP_ATO_MIN;
 		}
 		tcp_mstamp_refresh(tcp_sk(sk));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b81eb7cb815e..e51cda79f0cc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1864,7 +1864,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   refcount_read(&sp->sk_refcnt), sp,
 		   jiffies_to_clock_t(icsk->icsk_rto),
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
-		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
 		   tp->snd_cwnd,
 		   state == TCP_LISTEN ?
 			fastopenq->max_qlen :
-- 
cgit v1.2.3


From 4a41f453bedfd5e9cd040bad509d9da49feb3e2c Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@google.com>
Date: Fri, 25 Jan 2019 10:53:20 -0800
Subject: tcp: change pingpong threshold to 3

In order to be more confident about an on-going interactive session, we
increment pingpong count by 1 for every interactive transaction and we
adjust TCP_PINGPONG_THRESH to 3.
This means, we only consider a session in pingpong mode after we see 3
interactive transactions, and start to activate delayed acks in quick
ack mode.
And in order to not over-count the credits, we only increase pingpong
count for the first packet sent in response for the previous received
packet.
This is mainly to prevent delaying the ack immediately after some
handshake protocol but no real interactive traffic pattern afterwards.

Signed-off-by: Wei Wang <weiwan@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 10 +++++++++-
 net/ipv4/tcp_output.c              | 15 +++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 179609d1d1ea..ff40e1d08157 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -315,7 +315,7 @@ int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
 
 struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
 
-#define TCP_PINGPONG_THRESH	1
+#define TCP_PINGPONG_THRESH	3
 
 static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
 {
@@ -331,4 +331,12 @@ static inline bool inet_csk_in_pingpong_mode(struct sock *sk)
 {
 	return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
 }
+
+static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (icsk->icsk_ack.pingpong < U8_MAX)
+		icsk->icsk_ack.pingpong++;
+}
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 06228e2d010e..96bdb8eae9bb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -165,13 +165,16 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 	if (tcp_packets_in_flight(tp) == 0)
 		tcp_ca_event(sk, CA_EVENT_TX_START);
 
-	tp->lsndtime = now;
-
-	/* If it is a reply for ato after last received
-	 * packet, enter pingpong mode.
+	/* If this is the first data packet sent in response to the
+	 * previous received data,
+	 * and it is a reply for ato after last received packet,
+	 * increase pingpong count.
 	 */
-	if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
-		inet_csk_enter_pingpong_mode(sk);
+	if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
+	    (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+		inet_csk_inc_pingpong_cnt(sk);
+
+	tp->lsndtime = now;
 }
 
 /* Account for an ACK we sent. */
-- 
cgit v1.2.3


From fe19a8fea7cb59f202ddd30ec2fa9f5bd907b3d2 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Sat, 19 Jan 2019 15:22:38 +0100
Subject: ipvs: avoid indirect calls when calculating checksums

The function pointer ip_vs_protocol->csum_check is only used in protocol
specific code, and never in the generic one.
Remove the function pointer from struct ip_vs_protocol and call the
checksum functions directly.
This reduces the performance impact of the Spectre mitigation, and
should give a small improvement even with RETPOLINES disabled.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/ip_vs.h                     |  3 ---
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c |  2 --
 net/netfilter/ipvs/ip_vs_proto_sctp.c   |  8 +++++---
 net/netfilter/ipvs/ip_vs_proto_tcp.c    | 12 +++++++-----
 net/netfilter/ipvs/ip_vs_proto_udp.c    | 12 +++++++-----
 5 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a0d2e0bb9a94..047f9a5ccaad 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -453,9 +453,6 @@ struct ip_vs_protocol {
 	int (*dnat_handler)(struct sk_buff *skb, struct ip_vs_protocol *pp,
 			    struct ip_vs_conn *cp, struct ip_vs_iphdr *iph);
 
-	int (*csum_check)(int af, struct sk_buff *skb,
-			  struct ip_vs_protocol *pp);
-
 	const char *(*state_name)(int state);
 
 	void (*state_transition)(struct ip_vs_conn *cp, int direction,
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5320d39976e1..480598cb0f05 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -129,7 +129,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
 	.conn_out_get =		ah_esp_conn_out_get,
 	.snat_handler =		NULL,
 	.dnat_handler =		NULL,
-	.csum_check =		NULL,
 	.state_transition =	NULL,
 	.register_app =		NULL,
 	.unregister_app =	NULL,
@@ -152,7 +151,6 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
 	.conn_out_get =		ah_esp_conn_out_get,
 	.snat_handler =		NULL,
 	.dnat_handler =		NULL,
-	.csum_check =		NULL,
 	.state_transition =	NULL,
 	.register_app =		NULL,
 	.unregister_app =	NULL,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b0cd7d08f2a7..bc3d1625ecc8 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,6 +9,9 @@
 #include <net/sctp/checksum.h>
 #include <net/ip_vs.h>
 
+static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
 static int
 sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		   struct ip_vs_proto_data *pd,
@@ -105,7 +108,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!sctp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -152,7 +155,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!sctp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -587,7 +590,6 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
 	.conn_out_get	= ip_vs_conn_out_get_proto,
 	.snat_handler	= sctp_snat_handler,
 	.dnat_handler	= sctp_dnat_handler,
-	.csum_check	= sctp_csum_check,
 	.state_name	= sctp_state_name,
 	.state_transition = sctp_state_transition,
 	.app_conn_bind	= sctp_app_conn_bind,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 1770fc6ce960..6a275f989085 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -31,6 +31,9 @@
 
 #include <net/ip_vs.h>
 
+static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
 static int
 tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		  struct ip_vs_proto_data *pd,
@@ -166,7 +169,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!tcp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/* Call application helper if needed */
@@ -192,7 +195,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -244,7 +247,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!tcp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -275,7 +278,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -736,7 +739,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
 	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		tcp_snat_handler,
 	.dnat_handler =		tcp_dnat_handler,
-	.csum_check =		tcp_csum_check,
 	.state_name =		tcp_state_name,
 	.state_transition =	tcp_state_transition,
 	.app_conn_bind =	tcp_app_conn_bind,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 0f53c49025f8..3285718264d5 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -28,6 +28,9 @@
 #include <net/ip.h>
 #include <net/ip6_checksum.h>
 
+static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
 static int
 udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
 		  struct ip_vs_proto_data *pd,
@@ -156,7 +159,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!udp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -186,7 +189,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -239,7 +242,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		int ret;
 
 		/* Some checks before mangling */
-		if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+		if (!udp_csum_check(cp->af, skb, pp))
 			return 0;
 
 		/*
@@ -270,7 +273,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
 		udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);
 		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->ip_summed = (cp->app && pp->csum_check) ?
+			skb->ip_summed = cp->app ?
 					 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
 	} else {
 		/* full checksum calculation */
@@ -494,7 +497,6 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
 	.conn_out_get =		ip_vs_conn_out_get_proto,
 	.snat_handler =		udp_snat_handler,
 	.dnat_handler =		udp_dnat_handler,
-	.csum_check =		udp_csum_check,
 	.state_transition =	udp_state_transition,
 	.state_name =		udp_state_name,
 	.register_app =		udp_register_app,
-- 
cgit v1.2.3


From 4cab5bf616f6513b2111ce80f634a9a07c037a0a Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Mon, 21 Jan 2019 18:10:42 -0800
Subject: dt-bindings: reset: imx7: Document usage on i.MX8MQ SoCs

The driver now supports i.MX8MQ, so update bindings accordingly.

Cc: p.zabel@pengutronix.de
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Cc: cphealy@gmail.com
Cc: l.stach@pengutronix.de
Cc: Leonard Crestez <leonard.crestez@nxp.com>
Cc: "A.s. Dong" <aisheng.dong@nxp.com>
Cc: Richard Zhu <hongxing.zhu@nxp.com>
Cc: linux-imx@nxp.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../devicetree/bindings/reset/fsl,imx7-src.txt     |  7 ++-
 include/dt-bindings/reset/imx8mq-reset.h           | 64 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 include/dt-bindings/reset/imx8mq-reset.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
index 1ab1d109318e..2ecf33815d18 100644
--- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
+++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
@@ -5,7 +5,9 @@ Please also refer to reset.txt in this directory for common reset
 controller binding usage.
 
 Required properties:
-- compatible: Should be "fsl,imx7d-src", "syscon"
+- compatible:
+	- For i.MX7 SoCs should be "fsl,imx7d-src", "syscon"
+	- For i.MX8MQ SoCs should be "fsl,imx8mq-src", "syscon"
 - reg: should be register base and length as documented in the
   datasheet
 - interrupts: Should contain SRC interrupt
@@ -44,4 +46,5 @@ Example:
 
 
 For list of all valid reset indicies see
-<dt-bindings/reset/imx7-reset.h>
+<dt-bindings/reset/imx7-reset.h> for i.MX7 and
+<dt-bindings/reset/imx8mq-reset.h> for i.MX8MQ
diff --git a/include/dt-bindings/reset/imx8mq-reset.h b/include/dt-bindings/reset/imx8mq-reset.h
new file mode 100644
index 000000000000..57c592498aa0
--- /dev/null
+++ b/include/dt-bindings/reset/imx8mq-reset.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Zodiac Inflight Innovations
+ *
+ * Author: Andrey Smirnov <andrew.smirnov@gmail.com>
+ */
+
+#ifndef DT_BINDING_RESET_IMX8MQ_H
+#define DT_BINDING_RESET_IMX8MQ_H
+
+#define IMX8MQ_RESET_A53_CORE_POR_RESET0	0
+#define IMX8MQ_RESET_A53_CORE_POR_RESET1	1
+#define IMX8MQ_RESET_A53_CORE_POR_RESET2	2
+#define IMX8MQ_RESET_A53_CORE_POR_RESET3	3
+#define IMX8MQ_RESET_A53_CORE_RESET0		4
+#define IMX8MQ_RESET_A53_CORE_RESET1		5
+#define IMX8MQ_RESET_A53_CORE_RESET2		6
+#define IMX8MQ_RESET_A53_CORE_RESET3		7
+#define IMX8MQ_RESET_A53_DBG_RESET0		8
+#define IMX8MQ_RESET_A53_DBG_RESET1		9
+#define IMX8MQ_RESET_A53_DBG_RESET2		10
+#define IMX8MQ_RESET_A53_DBG_RESET3		11
+#define IMX8MQ_RESET_A53_ETM_RESET0		12
+#define IMX8MQ_RESET_A53_ETM_RESET1		13
+#define IMX8MQ_RESET_A53_ETM_RESET2		14
+#define IMX8MQ_RESET_A53_ETM_RESET3		15
+#define IMX8MQ_RESET_A53_SOC_DBG_RESET		16
+#define IMX8MQ_RESET_A53_L2RESET		17
+#define IMX8MQ_RESET_SW_NON_SCLR_M4C_RST	18
+#define IMX8MQ_RESET_OTG1_PHY_RESET		19
+#define IMX8MQ_RESET_OTG2_PHY_RESET		20
+#define IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N	21
+#define IMX8MQ_RESET_MIPI_DSI_RESET_N		22
+#define IMX8MQ_RESET_MIPI_DIS_DPI_RESET_N	23
+#define IMX8MQ_RESET_MIPI_DIS_ESC_RESET_N	24
+#define IMX8MQ_RESET_MIPI_DIS_PCLK_RESET_N	25
+#define IMX8MQ_RESET_PCIEPHY			26
+#define IMX8MQ_RESET_PCIEPHY_PERST		27
+#define IMX8MQ_RESET_PCIE_CTRL_APPS_EN		28
+#define IMX8MQ_RESET_PCIE_CTRL_APPS_TURNOFF	29
+#define IMX8MQ_RESET_HDMI_PHY_APB_RESET		30
+#define IMX8MQ_RESET_DISP_RESET			31
+#define IMX8MQ_RESET_GPU_RESET			32
+#define IMX8MQ_RESET_VPU_RESET			33
+#define IMX8MQ_RESET_PCIEPHY2			34
+#define IMX8MQ_RESET_PCIEPHY2_PERST		35
+#define IMX8MQ_RESET_PCIE2_CTRL_APPS_EN		36
+#define IMX8MQ_RESET_PCIE2_CTRL_APPS_TURNOFF	37
+#define IMX8MQ_RESET_MIPI_CSI1_CORE_RESET	38
+#define IMX8MQ_RESET_MIPI_CSI1_PHY_REF_RESET	39
+#define IMX8MQ_RESET_MIPI_CSI1_ESC_RESET	40
+#define IMX8MQ_RESET_MIPI_CSI2_CORE_RESET	41
+#define IMX8MQ_RESET_MIPI_CSI2_PHY_REF_RESET	42
+#define IMX8MQ_RESET_MIPI_CSI2_ESC_RESET	43
+#define IMX8MQ_RESET_DDRC1_PRST			44
+#define IMX8MQ_RESET_DDRC1_CORE_RESET		45
+#define IMX8MQ_RESET_DDRC1_PHY_RESET		46
+#define IMX8MQ_RESET_DDRC2_PRST			47
+#define IMX8MQ_RESET_DDRC2_CORE_RESET		48
+#define IMX8MQ_RESET_DDRC2_PHY_RESET		49
+
+#define IMX8MQ_RESET_NUM			50
+
+#endif
-- 
cgit v1.2.3


From fdce60787f6215607dc7ac910cbaf4416684b589 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 13 Dec 2018 12:22:32 +0100
Subject: reset: sunxi: declare sun6i_reset_init in a header file

Avoid declaring extern functions in c files. To make sure function
definition and usage don't get out of sync, declare sun6i_reset_init
in a common header.

Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 arch/arm/mach-sunxi/sunxi.c | 2 +-
 drivers/reset/reset-sunxi.c | 1 +
 include/linux/reset/sunxi.h | 7 +++++++
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/reset/sunxi.h

(limited to 'include')

diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c
index 8a7f301839c2..933b6930f024 100644
--- a/arch/arm/mach-sunxi/sunxi.c
+++ b/arch/arm/mach-sunxi/sunxi.c
@@ -14,6 +14,7 @@
 #include <linux/clocksource.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
+#include <linux/reset/sunxi.h>
 
 #include <asm/mach/arch.h>
 #include <asm/secure_cntvoff.h>
@@ -37,7 +38,6 @@ static const char * const sun6i_board_dt_compat[] = {
 	NULL,
 };
 
-extern void __init sun6i_reset_init(void);
 static void __init sun6i_timer_init(void)
 {
 	of_clk_init(NULL);
diff --git a/drivers/reset/reset-sunxi.c b/drivers/reset/reset-sunxi.c
index db9a1a75523f..b06d724d8f21 100644
--- a/drivers/reset/reset-sunxi.c
+++ b/drivers/reset/reset-sunxi.c
@@ -18,6 +18,7 @@
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
+#include <linux/reset/sunxi.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
diff --git a/include/linux/reset/sunxi.h b/include/linux/reset/sunxi.h
new file mode 100644
index 000000000000..1ad7fffb413e
--- /dev/null
+++ b/include/linux/reset/sunxi.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_RESET_SUNXI_H__
+#define __LINUX_RESET_SUNXI_H__
+
+void __init sun6i_reset_init(void);
+
+#endif /* __LINUX_RESET_SUNXI_H__ */
-- 
cgit v1.2.3


From cdbeb315ed8dcc142a68054899cedd6e4f1fea3f Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Thu, 13 Dec 2018 12:24:36 +0100
Subject: reset: socfpga: declare socfpga_reset_init in a header file

Avoid declaring extern functions in c files. To make sure function
definition and usage don't get out of sync, declare socfpga_reset_init
in a common header.

Suggested-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Dinh Nguyen <dinguyen@kernel.org>
---
 arch/arm/mach-socfpga/socfpga.c | 3 +--
 drivers/reset/reset-socfpga.c   | 2 +-
 include/linux/reset/socfpga.h   | 7 +++++++
 3 files changed, 9 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/reset/socfpga.h

(limited to 'include')

diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index afd98971d903..816da0eb6616 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -19,6 +19,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/reboot.h>
+#include <linux/reset/socfpga.h>
 
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
@@ -32,8 +33,6 @@ void __iomem *rst_manager_base_addr;
 void __iomem *sdr_ctl_base_addr;
 unsigned long socfpga_cpu1start_addr;
 
-extern void __init socfpga_reset_init(void);
-
 static void __init socfpga_sysmgr_init(void)
 {
 	struct device_node *np;
diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c
index 318cfc51c441..96953992c2bb 100644
--- a/drivers/reset/reset-socfpga.c
+++ b/drivers/reset/reset-socfpga.c
@@ -11,6 +11,7 @@
 #include <linux/of_address.h>
 #include <linux/platform_device.h>
 #include <linux/reset-controller.h>
+#include <linux/reset/socfpga.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -18,7 +19,6 @@
 #include "reset-simple.h"
 
 #define SOCFPGA_NR_BANKS	8
-void __init socfpga_reset_init(void);
 
 static int a10_reset_init(struct device_node *np)
 {
diff --git a/include/linux/reset/socfpga.h b/include/linux/reset/socfpga.h
new file mode 100644
index 000000000000..b11a2047c342
--- /dev/null
+++ b/include/linux/reset/socfpga.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_RESET_SOCFPGA_H__
+#define __LINUX_RESET_SOCFPGA_H__
+
+void __init socfpga_reset_init(void);
+
+#endif /* __LINUX_RESET_SOCFPGA_H__ */
-- 
cgit v1.2.3


From 83f529281d7aa42b10c2c5cb64fcbd2c7cab4409 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 27 Jan 2019 19:18:57 +0100
Subject: netfilter: ipv4: remove useless export_symbol

Only one caller; place it where needed and get rid of the EXPORT_SYMBOL.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4.h |  6 ------
 net/ipv4/netfilter.c           | 18 ------------------
 net/netfilter/utils.c          | 19 +++++++++++++++++++
 3 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h
index 95ab5cc64422..082e2c41b7ff 100644
--- a/include/linux/netfilter_ipv4.h
+++ b/include/linux/netfilter_ipv4.h
@@ -25,7 +25,6 @@ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 		       unsigned int dataoff, u_int8_t protocol);
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict);
-int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
 #else
 static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
 				     unsigned int dataoff, u_int8_t protocol)
@@ -37,11 +36,6 @@ static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
 {
 	return -EOPNOTSUPP;
 }
-static inline int nf_ip_reroute(struct sk_buff *skb,
-				const struct nf_queue_entry *entry)
-{
-	return -EOPNOTSUPP;
-}
 #endif /* CONFIG_INET */
 
 #endif /*__LINUX_IP_NETFILTER_H*/
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 8d2e5dc9a827..a058213b77a7 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -80,24 +80,6 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
-int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
-{
-	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
-	if (entry->state.hook == NF_INET_LOCAL_OUT) {
-		const struct iphdr *iph = ip_hdr(skb);
-
-		if (!(iph->tos == rt_info->tos &&
-		      skb->mark == rt_info->mark &&
-		      iph->daddr == rt_info->daddr &&
-		      iph->saddr == rt_info->saddr))
-			return ip_route_me_harder(entry->state.net, skb,
-						  RTN_UNSPEC);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_ip_reroute);
-
 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		bool strict __always_unused)
 {
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index e8da9a9bba73..55af9f247993 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -180,6 +180,25 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 }
 EXPORT_SYMBOL_GPL(nf_route);
 
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+	const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+	if (entry->state.hook == NF_INET_LOCAL_OUT) {
+		const struct iphdr *iph = ip_hdr(skb);
+
+		if (!(iph->tos == rt_info->tos &&
+		      skb->mark == rt_info->mark &&
+		      iph->daddr == rt_info->daddr &&
+		      iph->saddr == rt_info->saddr))
+			return ip_route_me_harder(entry->state.net, skb,
+						  RTN_UNSPEC);
+	}
+#endif
+	return 0;
+}
+
 int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
 {
 	const struct nf_ipv6_ops *v6ops;
-- 
cgit v1.2.3


From 5e484ec1758b95e6420787fc17f0e8c5e152c264 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 25 Jan 2019 14:16:17 -0600
Subject: ASoC: soc-acpi: add static inline fallbacks when CONFIG_ACPI=n

Fix compilation issues reported by 0day-Kbuild with sparc64 w/ SOF.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 266e64e3c24c..6cbbeed9cdd0 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -22,20 +22,37 @@ struct snd_soc_acpi_package_context {
 #define SND_ACPI_I2C_ID_LEN (4 + ACPI_ID_LEN + 3 + 1)
 
 #if IS_ENABLED(CONFIG_ACPI)
+/* acpi match */
+struct snd_soc_acpi_mach *
+snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines);
+
 bool snd_soc_acpi_find_package_from_hid(const u8 hid[ACPI_ID_LEN],
 				    struct snd_soc_acpi_package_context *ctx);
+
+/* check all codecs */
+struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg);
+
 #else
+/* acpi match */
+static inline struct snd_soc_acpi_mach *
+snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines)
+{
+	return NULL;
+}
+
 static inline bool
 snd_soc_acpi_find_package_from_hid(const u8 hid[ACPI_ID_LEN],
 				   struct snd_soc_acpi_package_context *ctx)
 {
 	return false;
 }
-#endif
 
-/* acpi match */
-struct snd_soc_acpi_mach *
-snd_soc_acpi_find_machine(struct snd_soc_acpi_mach *machines);
+/* check all codecs */
+static inline struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg)
+{
+	return NULL;
+}
+#endif
 
 /**
  * snd_soc_acpi_mach_params: interface for machine driver configuration
@@ -105,7 +122,4 @@ struct snd_soc_acpi_codecs {
 	u8 codecs[SND_SOC_ACPI_MAX_CODECS][ACPI_ID_LEN];
 };
 
-/* check all codecs */
-struct snd_soc_acpi_mach *snd_soc_acpi_codec_list(void *arg);
-
 #endif
-- 
cgit v1.2.3


From 87eff9af7efb154cc4a940ed12efc803a0bf3fba Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Tue, 22 Jan 2019 23:18:21 +0200
Subject: pinctrl: remove pinctrl/machine.h inclusion from pinctrl/pinconf.h

The change adds explicit inclusion of linux/pinctrl/machine.h header
to the only needed pinctrl-madera-core.c file, and therefore inclusion
of pinctrl/machine.h header from pinctrl/pinconf.h can be removed.

The change is preparatory to a follow-up reversal of commit f07512e615dd
("pinctrl/pinconfig: add debug interface").

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Charles Keepax <ckeepax@opensource.cirrus.com>
Reviewed-by Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/cirrus/pinctrl-madera-core.c | 1 +
 include/linux/pinctrl/pinconf.h              | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pinctrl/cirrus/pinctrl-madera-core.c b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
index a5dda832024a..7c9694593f79 100644
--- a/drivers/pinctrl/cirrus/pinctrl-madera-core.c
+++ b/drivers/pinctrl/cirrus/pinctrl-madera-core.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/pinctrl/machine.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
 #include <linux/pinctrl/pinconf.h>
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 8dd85d302b90..109468d9d849 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -14,8 +14,6 @@
 
 #ifdef CONFIG_PINCONF
 
-#include <linux/pinctrl/machine.h>
-
 struct pinctrl_dev;
 struct seq_file;
 
-- 
cgit v1.2.3


From e73339037f6b6d65e84f5fd42e56dd3cdf0d9e9c Mon Sep 17 00:00:00 2001
From: Vladimir Zapolskiy <vz@mleia.com>
Date: Tue, 22 Jan 2019 23:18:22 +0200
Subject: pinctrl: remove unused 'pinconf-config' debugfs interface

The main goal of the change is to remove .pin_config_dbg_parse_modify
callback before a driver with its support appears. So far the in-kernel
interface did not attract any users since its introduction 5 years ago.

Originally .pin_config_dbg_parse_modify callback and the associated
'pinconf-config' debugfs file were introduced in commit f07512e615dd
("pinctrl/pinconfig: add debug interface"), a short description of
'pinconf-config' usage for debugging can be expressed this way:

Write to 'pinconf-config' (see pinconf_dbg_config_write() function):

% echo -n modify $map_type $device_name $state_name $pin_name $config > \
	/sys/kernel/debug/pinctrl/$pinctrl/pinconf-config

It supposes to update a global (therefore single!) 'pinconf_dbg_conf'
variable with an alternative setting, the arguments should match
an existing pinconf device and some registered pinctrl mapping 'map':

* $map_type is either 'config_pin' or 'config_group', it should match
  'map->type' value of PIN_MAP_TYPE_CONFIGS_PIN or
   PIN_MAP_TYPE_CONFIGS_GROUP accordingly,
* $device_name should match 'map->dev_name' string value,
* $state_name should match 'map->name' string value,
* $pin_name should match 'map->data.configs.group_or_pin' string value,

If all above has matched, then $config is a new value to be set by calling
pinconfops->pin_config_dbg_parse_modify(pctldev, config, matched_config).

After a successful write into 'pinconf-config' a user can read the file
to get information about that single modified pin configuration.

The fact is .pin_config_dbg_parse_modify callback has never been defined
in 'struct pinconf_ops' of any pinconf driver, thus an actual modification
of a pin or group state on any present pinconf controller does not happen,
and it declares that all related code is no more than dead code.

I discovered the issue while attempting to add .pin_config_dbg_parse_modify
support in some drivers and found that too short 'MAX_NAME_LEN' set by

  drivers/pinctrl/pinconf.c:372:#define MAX_NAME_LEN 15

is practically insufficient to store a regular pinctrl device name,
which are like 'e6060000.pin-controller-sh-pfc' or pin names like
'MX6QDL_PAD_ENET_REF_CLK', thus it is another indicator that the code
is barely usable, insufficiently tested and unprepossessing.

Of course it might be possible to increase MAX_NAME_LEN, and then add
.pin_config_dbg_parse_modify callbacks to the drivers, but the whole
idea of such a limited debug option looks inviable. A more flexible
way to functionally substitute the original approach is to implicitly
or explicitly use pinctrl_select_state() function whenever needed.

Signed-off-by: Vladimir Zapolskiy <vz@mleia.com>
Cc: Laurent Meunier <laurent.meunier@st.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinconf.c       | 222 ----------------------------------------
 include/linux/pinctrl/pinconf.h |   4 -
 2 files changed, 226 deletions(-)

(limited to 'include')

diff --git a/drivers/pinctrl/pinconf.c b/drivers/pinctrl/pinconf.c
index 2c7229380f08..2678603df14b 100644
--- a/drivers/pinctrl/pinconf.c
+++ b/drivers/pinctrl/pinconf.c
@@ -17,7 +17,6 @@
 #include <linux/slab.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/uaccess.h>
 #include <linux/pinctrl/machine.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinconf.h>
@@ -369,225 +368,6 @@ static int pinconf_groups_show(struct seq_file *s, void *what)
 DEFINE_SHOW_ATTRIBUTE(pinconf_pins);
 DEFINE_SHOW_ATTRIBUTE(pinconf_groups);
 
-#define MAX_NAME_LEN 15
-
-struct dbg_cfg {
-	enum pinctrl_map_type map_type;
-	char dev_name[MAX_NAME_LEN + 1];
-	char state_name[MAX_NAME_LEN + 1];
-	char pin_name[MAX_NAME_LEN + 1];
-};
-
-/*
- * Goal is to keep this structure as global in order to simply read the
- * pinconf-config file after a write to check config is as expected
- */
-static struct dbg_cfg pinconf_dbg_conf;
-
-/**
- * pinconf_dbg_config_print() - display the pinctrl config from the pinctrl
- * map, of the dev/pin/state that was last written to pinconf-config file.
- * @s: string filled in  with config description
- * @d: not used
- */
-static int pinconf_dbg_config_print(struct seq_file *s, void *d)
-{
-	struct pinctrl_maps *maps_node;
-	const struct pinctrl_map *map;
-	const struct pinctrl_map *found = NULL;
-	struct pinctrl_dev *pctldev;
-	struct dbg_cfg *dbg = &pinconf_dbg_conf;
-	int i;
-
-	mutex_lock(&pinctrl_maps_mutex);
-
-	/* Parse the pinctrl map and look for the elected pin/state */
-	for_each_maps(maps_node, i, map) {
-		if (map->type != dbg->map_type)
-			continue;
-		if (strcmp(map->dev_name, dbg->dev_name))
-			continue;
-		if (strcmp(map->name, dbg->state_name))
-			continue;
-
-		if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) {
-			/* We found the right pin */
-			found = map;
-			break;
-		}
-	}
-
-	if (!found) {
-		seq_printf(s, "No config found for dev/state/pin, expected:\n");
-		seq_printf(s, "Searched dev:%s\n", dbg->dev_name);
-		seq_printf(s, "Searched state:%s\n", dbg->state_name);
-		seq_printf(s, "Searched pin:%s\n", dbg->pin_name);
-		seq_printf(s, "Use: modify config_pin <devname> "\
-				"<state> <pinname> <value>\n");
-		goto exit;
-	}
-
-	pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name);
-	seq_printf(s, "Dev %s has config of %s in state %s:\n",
-		   dbg->dev_name, dbg->pin_name, dbg->state_name);
-	pinconf_show_config(s, pctldev, found->data.configs.configs,
-			    found->data.configs.num_configs);
-
-exit:
-	mutex_unlock(&pinctrl_maps_mutex);
-
-	return 0;
-}
-
-/**
- * pinconf_dbg_config_write() - modify the pinctrl config in the pinctrl
- * map, of a dev/pin/state entry based on user entries to pinconf-config
- * @user_buf: contains the modification request with expected format:
- *     modify <config> <devicename> <state> <name> <newvalue>
- * modify is literal string, alternatives like add/delete not supported yet
- * <config> is the configuration to be changed. Supported configs are
- *     "config_pin" or "config_group", alternatives like config_mux are not
- *     supported yet.
- * <devicename> <state> <name> are values that should match the pinctrl-maps
- * <newvalue> reflects the new config and is driver dependent
- */
-static ssize_t pinconf_dbg_config_write(struct file *file,
-	const char __user *user_buf, size_t count, loff_t *ppos)
-{
-	struct pinctrl_maps *maps_node;
-	const struct pinctrl_map *map;
-	const struct pinctrl_map *found = NULL;
-	struct pinctrl_dev *pctldev;
-	const struct pinconf_ops *confops = NULL;
-	struct dbg_cfg *dbg = &pinconf_dbg_conf;
-	const struct pinctrl_map_configs *configs;
-	char config[MAX_NAME_LEN + 1];
-	char buf[128];
-	char *b = &buf[0];
-	int buf_size;
-	char *token;
-	int i;
-
-	/* Get userspace string and assure termination */
-	buf_size = min(count, sizeof(buf) - 1);
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-	buf[buf_size] = 0;
-
-	/*
-	 * need to parse entry and extract parameters:
-	 * modify configs_pin devicename state pinname newvalue
-	 */
-
-	/* Get arg: 'modify' */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (strcmp(token, "modify"))
-		return -EINVAL;
-
-	/*
-	 * Get arg type: "config_pin" and "config_group"
-	 *                types are supported so far
-	 */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (!strcmp(token, "config_pin"))
-		dbg->map_type = PIN_MAP_TYPE_CONFIGS_PIN;
-	else if (!strcmp(token, "config_group"))
-		dbg->map_type = PIN_MAP_TYPE_CONFIGS_GROUP;
-	else
-		return -EINVAL;
-
-	/* get arg 'device_name' */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (strlen(token) >= MAX_NAME_LEN)
-		return -EINVAL;
-	strncpy(dbg->dev_name, token, MAX_NAME_LEN);
-
-	/* get arg 'state_name' */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (strlen(token) >= MAX_NAME_LEN)
-		return -EINVAL;
-	strncpy(dbg->state_name, token, MAX_NAME_LEN);
-
-	/* get arg 'pin_name' */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (strlen(token) >= MAX_NAME_LEN)
-		return -EINVAL;
-	strncpy(dbg->pin_name, token, MAX_NAME_LEN);
-
-	/* get new_value of config' */
-	token = strsep(&b, " ");
-	if (!token)
-		return -EINVAL;
-	if (strlen(token) >= MAX_NAME_LEN)
-		return -EINVAL;
-	strncpy(config, token, MAX_NAME_LEN);
-
-	mutex_lock(&pinctrl_maps_mutex);
-
-	/* Parse the pinctrl map and look for the selected dev/state/pin */
-	for_each_maps(maps_node, i, map) {
-		if (strcmp(map->dev_name, dbg->dev_name))
-			continue;
-		if (map->type != dbg->map_type)
-			continue;
-		if (strcmp(map->name, dbg->state_name))
-			continue;
-
-		/*  we found the right pin / state, so overwrite config */
-		if (!strcmp(map->data.configs.group_or_pin, dbg->pin_name)) {
-			found = map;
-			break;
-		}
-	}
-
-	if (!found) {
-		count = -EINVAL;
-		goto exit;
-	}
-
-	pctldev = get_pinctrl_dev_from_devname(found->ctrl_dev_name);
-	if (pctldev)
-		confops = pctldev->desc->confops;
-
-	if (confops && confops->pin_config_dbg_parse_modify) {
-		configs = &found->data.configs;
-		for (i = 0; i < configs->num_configs; i++) {
-			confops->pin_config_dbg_parse_modify(pctldev,
-						     config,
-						     &configs->configs[i]);
-		}
-	}
-
-exit:
-	mutex_unlock(&pinctrl_maps_mutex);
-
-	return count;
-}
-
-static int pinconf_dbg_config_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, pinconf_dbg_config_print, inode->i_private);
-}
-
-static const struct file_operations pinconf_dbg_pinconfig_fops = {
-	.open = pinconf_dbg_config_open,
-	.write = pinconf_dbg_config_write,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
-
 void pinconf_init_device_debugfs(struct dentry *devroot,
 			 struct pinctrl_dev *pctldev)
 {
@@ -595,8 +375,6 @@ void pinconf_init_device_debugfs(struct dentry *devroot,
 			    devroot, pctldev, &pinconf_pins_fops);
 	debugfs_create_file("pinconf-groups", S_IFREG | S_IRUGO,
 			    devroot, pctldev, &pinconf_groups_fops);
-	debugfs_create_file("pinconf-config",  (S_IRUGO | S_IWUSR | S_IWGRP),
-			    devroot, pctldev, &pinconf_dbg_pinconfig_fops);
 }
 
 #endif
diff --git a/include/linux/pinctrl/pinconf.h b/include/linux/pinctrl/pinconf.h
index 109468d9d849..93c9dd133e9d 100644
--- a/include/linux/pinctrl/pinconf.h
+++ b/include/linux/pinctrl/pinconf.h
@@ -29,7 +29,6 @@ struct seq_file;
  * @pin_config_group_get: get configurations for an entire pin group; should
  *	return -ENOTSUPP and -EINVAL using the same rules as pin_config_get.
  * @pin_config_group_set: configure all pins in a group
- * @pin_config_dbg_parse_modify: optional debugfs to modify a pin configuration
  * @pin_config_dbg_show: optional debugfs display hook that will provide
  *	per-device info for a certain pin in debugfs
  * @pin_config_group_dbg_show: optional debugfs display hook that will provide
@@ -55,9 +54,6 @@ struct pinconf_ops {
 				     unsigned selector,
 				     unsigned long *configs,
 				     unsigned num_configs);
-	int (*pin_config_dbg_parse_modify) (struct pinctrl_dev *pctldev,
-					   const char *arg,
-					   unsigned long *config);
 	void (*pin_config_dbg_show) (struct pinctrl_dev *pctldev,
 				     struct seq_file *s,
 				     unsigned offset);
-- 
cgit v1.2.3


From cb50358b83846e4dcb37137c431327c4dd68561b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 25 Jan 2019 14:34:55 -0600
Subject: ASoC: add helper to change platform name for all dailinks

To reuse the same machine drivers with Atom/SST, Skylake and SOF, we
need to change the default platform_name (or platforms->name in the
"modern" representation).

So far, this override was done with an automatic override, which was
broken by a set of changes for DT platforms related to deferred probe
handling.

This automatic override is actually not really needed, the machine
driver can already receive the platform name as a platform_data
parameter. This is used e.g. for HDaudio support where we have
different PCI aliases used for different platforms. We can reuse the
same mechanism and modify the machine drivers to override the dailinks
prior to registrating the card.

This will require additional work for SOF, but with this helper it'll
be just two lines of additional code per machine driver which is
reused, not the end of the world.

This helper can be simplified when all drivers have transitioned to
the "modern" representation of dailinks.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 3089257ead95..95689680336b 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1580,6 +1580,37 @@ struct snd_soc_dai *snd_soc_card_get_codec_dai(struct snd_soc_card *card,
 	return NULL;
 }
 
+static inline
+int snd_soc_fixup_dai_links_platform_name(struct snd_soc_card *card,
+					  const char *platform_name)
+{
+	struct snd_soc_dai_link *dai_link;
+	const char *name;
+	int i;
+
+	if (!platform_name) /* nothing to do */
+		return 0;
+
+	/* set platform name for each dailink */
+	for_each_card_prelinks(card, i, dai_link) {
+		name = devm_kstrdup(card->dev, platform_name, GFP_KERNEL);
+		if (!name)
+			return -ENOMEM;
+
+		if (dai_link->platforms)
+			/* only single platform is supported for now */
+			dai_link->platforms->name = name;
+		else
+			/*
+			 * legacy mode, this case will be removed when all
+			 * derivers are switched to modern style dai_link.
+			 */
+			dai_link->platform_name = name;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_DEBUG_FS
 extern struct dentry *snd_soc_debugfs_root;
 #endif
-- 
cgit v1.2.3


From 64515dc899df898991b2b7e56f69f56f014ea888 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <tomer.tayar@cavium.com>
Date: Mon, 28 Jan 2019 19:27:55 +0200
Subject: qed: Add infrastructure for error detection and recovery

This patch adds the detection and handling of a parity error ("process kill
event"), including the update of the protocol drivers, and the prevention
of any HW access that will lead to device access towards the host while
recovery is in progress.
It also provides the means for the protocol drivers to trigger a recovery
process on their decision.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h          |  4 ++
 drivers/net/ethernet/qlogic/qed/qed_dev.c      | 41 +++++++----
 drivers/net/ethernet/qlogic/qed/qed_hsi.h      |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_hw.c       | 11 +++
 drivers/net/ethernet/qlogic/qed/qed_main.c     | 30 ++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c      | 94 ++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h      | 32 +++++++++
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h |  2 +
 drivers/net/ethernet/qlogic/qed/qed_spq.c      | 22 ++++++
 drivers/net/ethernet/qlogic/qed/qed_sriov.c    |  9 ++-
 include/linux/qed/qed_if.h                     | 20 ++++++
 11 files changed, 251 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b352e313e1f6..3b0955d34716 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -804,6 +804,9 @@ struct qed_dev {
 
 	u32				mcp_nvm_resp;
 
+	/* Recovery */
+	bool recov_in_prog;
+
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
@@ -943,6 +946,7 @@ void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt);
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index fa5f07e65672..b17003d9066c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2140,6 +2140,11 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			   "Load request was sent. Load code: 0x%x\n",
 			   load_code);
 
+		/* Only relevant for recovery:
+		 * Clear the indication after LOAD_REQ is responded by the MFW.
+		 */
+		cdev->recov_in_prog = false;
+
 		qed_mcp_set_capabilities(p_hwfn, p_hwfn->p_main_ptt);
 
 		qed_reset_mb_shadow(p_hwfn, p_hwfn->p_main_ptt);
@@ -2291,6 +2296,9 @@ static void qed_hw_timers_stop(struct qed_dev *cdev,
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_CONN, 0x0);
 	qed_wr(p_hwfn, p_ptt, TM_REG_PF_ENABLE_TASK, 0x0);
 
+	if (cdev->recov_in_prog)
+		return;
+
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
@@ -2353,12 +2361,14 @@ int qed_hw_stop(struct qed_dev *cdev)
 		p_hwfn->hw_init_done = false;
 
 		/* Send unload command to MCP */
-		rc = qed_mcp_unload_req(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_req(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 
 		qed_slowpath_irq_sync(p_hwfn);
@@ -2400,16 +2410,18 @@ int qed_hw_stop(struct qed_dev *cdev)
 		qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
 		qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
 
-		qed_mcp_unload_done(p_hwfn, p_ptt);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
-				  rc);
-			rc2 = -EINVAL;
+		if (!cdev->recov_in_prog) {
+			rc = qed_mcp_unload_done(p_hwfn, p_ptt);
+			if (rc) {
+				DP_NOTICE(p_hwfn,
+					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+					  rc);
+				rc2 = -EINVAL;
+			}
 		}
 	}
 
-	if (IS_PF(cdev)) {
+	if (IS_PF(cdev) && !cdev->recov_in_prog) {
 		p_hwfn = QED_LEADING_HWFN(cdev);
 		p_ptt = QED_LEADING_HWFN(cdev)->p_main_ptt;
 
@@ -3459,6 +3471,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 				 void __iomem *p_doorbells,
 				 enum qed_pci_personality personality)
 {
+	struct qed_dev *cdev = p_hwfn->cdev;
 	int rc = 0;
 
 	/* Split PCI bars evenly between hwfns */
@@ -3511,7 +3524,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
 	/* Sending a mailbox to the MFW should be done after qed_get_hw_info()
 	 * is called as it sets the ports number in an engine.
 	 */
-	if (IS_LEAD_HWFN(p_hwfn)) {
+	if (IS_LEAD_HWFN(p_hwfn) && !cdev->recov_in_prog) {
 		rc = qed_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
 		if (rc)
 			DP_NOTICE(p_hwfn, "Failed to initiate PF FLR\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index b13cfb449d8f..417121e74ee9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12827,7 +12827,7 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_LLDP_DATA_UPDATED,
 	MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
 	MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
-	MFW_DRV_MSG_RESERVED4,
+	MFW_DRV_MSG_ERROR_RECOVERY,
 	MFW_DRV_MSG_BW_UPDATE,
 	MFW_DRV_MSG_S_TAG_UPDATE,
 	MFW_DRV_MSG_GET_LAN_STATS,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 70504dcf4087..72ec1c6bdf70 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -703,6 +703,17 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 	int qed_status = 0;
 	u32 offset = 0;
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_HW,
+			   "Recovery is in progress. Avoid DMAE transaction [{src: addr 0x%llx, type %d}, {dst: addr 0x%llx, type %d}, size %d].\n",
+			   src_addr, src_type, dst_addr, dst_type,
+			   size_in_dwords);
+
+		/* Let the flow complete w/o any error handling */
+		return 0;
+	}
+
 	qed_dmae_opcode(p_hwfn,
 			(src_type == QED_DMAE_ADDRESS_GRC),
 			(dst_type == QED_DMAE_ADDRESS_GRC),
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 6adf5bda9811..b47352643fb5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -359,6 +359,8 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
 
 	qed_init_dp(cdev, params->dp_module, params->dp_level);
 
+	cdev->recov_in_prog = params->recov_in_prog;
+
 	rc = qed_init_pci(cdev, pdev);
 	if (rc) {
 		DP_ERR(cdev, "init pci failed\n");
@@ -2203,6 +2205,15 @@ static int qed_nvm_get_image(struct qed_dev *cdev, enum qed_nvm_images type,
 	return qed_mcp_get_nvm_image(hwfn, type, buf, len);
 }
 
+void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
+{
+	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+	void *cookie = p_hwfn->cdev->ops_cookie;
+
+	if (ops && ops->schedule_recovery_handler)
+		ops->schedule_recovery_handler(cookie);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
@@ -2226,6 +2237,23 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
+static int qed_recovery_process(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EAGAIN;
+
+	rc = qed_start_recovery_process(p_hwfn, p_ptt);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 static int qed_update_wol(struct qed_dev *cdev, bool enabled)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
@@ -2380,6 +2408,8 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.nvm_get_image = &qed_nvm_get_image,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
+	.recovery_process = &qed_recovery_process,
+	.recovery_prolog = &qed_recovery_prolog,
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 1024484d7dd8..bb8541847aa5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1549,6 +1549,60 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up)
 	return 0;
 }
 
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
+{
+	u32 path_offsize_addr, path_offsize, path_addr, proc_kill_cnt;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	path_offsize_addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
+						 PUBLIC_PATH);
+	path_offsize = qed_rd(p_hwfn, p_ptt, path_offsize_addr);
+	path_addr = SECTION_ADDR(path_offsize, QED_PATH_ID(p_hwfn));
+
+	proc_kill_cnt = qed_rd(p_hwfn, p_ptt,
+			       path_addr +
+			       offsetof(struct public_path, process_kill)) &
+			PROCESS_KILL_COUNTER_MASK;
+
+	return proc_kill_cnt;
+}
+
+static void qed_mcp_handle_process_kill(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 proc_kill_cnt;
+
+	/* Prevent possible attentions/interrupts during the recovery handling
+	 * and till its load phase, during which they will be re-enabled.
+	 */
+	qed_int_igu_disable_int(p_hwfn, p_ptt);
+
+	DP_NOTICE(p_hwfn, "Received a process kill indication\n");
+
+	/* The following operations should be done once, and thus in CMT mode
+	 * are carried out by only the first HW function.
+	 */
+	if (p_hwfn != QED_LEADING_HWFN(cdev))
+		return;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Ignoring the indication since a recovery process is already in progress\n");
+		return;
+	}
+
+	cdev->recov_in_prog = true;
+
+	proc_kill_cnt = qed_get_process_kill_counter(p_hwfn, p_ptt);
+	DP_NOTICE(p_hwfn, "Process kill counter: %d\n", proc_kill_cnt);
+
+	qed_schedule_recovery_handler(p_hwfn);
+}
+
 static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
 					struct qed_ptt *p_ptt,
 					enum MFW_DRV_MSG_TYPE type)
@@ -1779,6 +1833,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_ERROR_RECOVERY:
+			qed_mcp_handle_process_kill(p_hwfn, p_ptt);
+			break;
 		case MFW_DRV_MSG_GET_LAN_STATS:
 		case MFW_DRV_MSG_GET_FCOE_STATS:
 		case MFW_DRV_MSG_GET_ISCSI_STATS:
@@ -2324,6 +2381,43 @@ int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+
+	if (cdev->recov_in_prog) {
+		DP_NOTICE(p_hwfn,
+			  "Avoid triggering a recovery since such a process is already in progress\n");
+		return -EAGAIN;
+	}
+
+	DP_NOTICE(p_hwfn, "Triggering a recovery process\n");
+	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_GENERAL_ATTN_35, 0x1);
+
+	return 0;
+}
+
+#define QED_RECOVERY_PROLOG_SLEEP_MS    100
+
+int qed_recovery_prolog(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = p_hwfn->p_main_ptt;
+	int rc;
+
+	/* Allow ongoing PCIe transactions to complete */
+	msleep(QED_RECOVERY_PROLOG_SLEEP_MS);
+
+	/* Clear the PF's internal FID_enable in the PXP */
+	rc = qed_pglueb_set_pfid_enable(p_hwfn, p_ptt, false);
+	if (rc)
+		DP_NOTICE(p_hwfn,
+			  "qed_pglueb_set_pfid_enable() failed. rc = %d.\n",
+			  rc);
+
+	return rc;
+}
+
 static int
 qed_mcp_config_vf_msix_bb(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 vf_id, u8 num)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 387c5e649136..6e1d72a669ae 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -440,6 +440,38 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 struct qed_mcp_drv_version *p_ver);
 
+/**
+ * @brief Read the MFW process kill counter
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return u32
+ */
+u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt);
+
+/**
+ * @brief Trigger a recovery process
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief A recovery handler must call this function as its first step.
+ *        It is assumed that the handler is not run from an interrupt context.
+ *
+ *  @param cdev
+ *  @param p_ptt
+ *
+ * @return int
+ */
+int qed_recovery_prolog(struct qed_dev *cdev);
+
 /**
  * @brief Notify MFW about the change in base device properties
  *
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 8939ed6e08b7..5ce825ca5f24 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -518,6 +518,8 @@
 	0x180824UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
 	0x008400UL
+#define MISC_REG_AEU_GENERAL_ATTN_35 \
+	0x00848cUL
 #define  CAU_REG_SB_ADDR_MEMORY \
 	0x1c8000UL
 #define  CAU_REG_SB_VAR_MEMORY \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index eb88bbc6b193..3e0f7c46bb1b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -790,6 +790,17 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn)
 				 SPQ_HIGH_PRI_RESERVE_DEFAULT);
 }
 
+static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent,
+				       u8 *fw_return_code)
+{
+	if (!fw_return_code)
+		return;
+
+	if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE ||
+	    p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP)
+		*fw_return_code = RDMA_RETURN_OK;
+}
+
 /* Avoid overriding of SPQ entries when getting out-of-order completions, by
  * marking the completions in a bitmap and increasing the chain consumer only
  * for the first successive completed entries.
@@ -825,6 +836,17 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 	}
 
+	if (p_hwfn->cdev->recov_in_prog) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n",
+			   p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id);
+
+		/* Let the flow complete w/o any error handling */
+		qed_spq_recov_set_ret_code(p_ent, fw_return_code);
+		return 0;
+	}
+
 	/* Complete the entry */
 	rc = qed_spq_fill_entry(p_hwfn, p_ent);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index ca6290fa0f30..71e28be58102 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -4447,6 +4447,13 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	if (cdev->p_iov_info && cdev->p_iov_info->num_vfs && pci_enabled)
 		pci_disable_sriov(cdev->pdev);
 
+	if (cdev->recov_in_prog) {
+		DP_VERBOSE(cdev,
+			   QED_MSG_IOV,
+			   "Skip SRIOV disable operations in the device since a recovery is in progress\n");
+		goto out;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[i];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
@@ -4486,7 +4493,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 
 		qed_ptt_release(hwfn, ptt);
 	}
-
+out:
 	qed_iov_set_vfs_to_disable(cdev, false);
 
 	return 0;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 5f818fda96bd..35170f74ed80 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -763,6 +763,7 @@ struct qed_probe_params {
 	u32 dp_module;
 	u8 dp_level;
 	bool is_vf;
+	bool recov_in_prog;
 };
 
 #define QED_DRV_VER_STR_SIZE 12
@@ -809,6 +810,7 @@ struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
+	void (*schedule_recovery_handler)(void *dev);
 	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
@@ -1056,6 +1058,24 @@ struct qed_common_ops {
 	int (*db_recovery_del)(struct qed_dev *cdev,
 			       void __iomem *db_addr, void *db_data);
 
+/**
+ * @brief recovery_process - Trigger a recovery process
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*recovery_process)(struct qed_dev *cdev);
+
+/**
+ * @brief recovery_prolog - Execute the prolog operations of a recovery process
+ *
+ * @param cdev
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*recovery_prolog)(struct qed_dev *cdev);
+
 /**
  * @brief update_drv_state - API to inform the change in the driver state.
  *
-- 
cgit v1.2.3


From ccc67ef50b9085b895738d7720840eb6fe98745e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <tomer.tayar@cavium.com>
Date: Mon, 28 Jan 2019 19:27:56 +0200
Subject: qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.

Signed-off-by: Tomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |   3 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 292 ++++++++++++++++++++++-----
 drivers/net/ethernet/qlogic/qede/qede_rdma.c |  63 ++++--
 include/linux/qed/qede_rdma.h                |  10 +-
 4 files changed, 294 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 613249d1e967..843416404aeb 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -162,6 +162,7 @@ struct qede_rdma_dev {
 	struct list_head entry;
 	struct list_head rdma_event_list;
 	struct workqueue_struct *rdma_wq;
+	bool exp_recovery;
 };
 
 struct qede_ptp;
@@ -264,6 +265,7 @@ struct qede_dev {
 enum QEDE_STATE {
 	QEDE_STATE_CLOSED,
 	QEDE_STATE_OPEN,
+	QEDE_STATE_RECOVERY,
 };
 
 #define HILO_U64(hi, lo)		((((u64)(hi)) << 32) + (lo))
@@ -462,6 +464,7 @@ struct qede_fastpath {
 #define QEDE_CSUM_UNNECESSARY		BIT(1)
 #define QEDE_TUNN_CSUM_UNNECESSARY	BIT(2)
 
+#define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
 
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5a74fcbdbc2b..6b4d96635238 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 static void qede_remove(struct pci_dev *pdev);
 static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
+static void qede_schedule_recovery_handler(void *dev);
+static void qede_recovery_handler(struct qede_dev *edev);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
 				      struct qed_generic_tlvs *data);
 
-/* The qede lock is used to protect driver state change and driver flows that
- * are not reentrant.
- */
-void __qede_lock(struct qede_dev *edev)
-{
-	mutex_lock(&edev->qede_lock);
-}
-
-void __qede_unlock(struct qede_dev *edev)
-{
-	mutex_unlock(&edev->qede_lock);
-}
-
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
 		.arfs_filter_op = qede_arfs_filter_op,
 #endif
 		.link_update = qede_link_update,
+		.schedule_recovery_handler = qede_schedule_recovery_handler,
 		.get_generic_tlv_data = qede_get_generic_tlv_data,
 		.get_protocol_tlv_data = qede_get_eth_tlv_data,
 	},
@@ -950,11 +940,57 @@ err:
 	return -ENOMEM;
 }
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+	mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+	mutex_unlock(&edev->qede_lock);
+}
+
+/* This version of the lock should be used when acquiring the RTNL lock is also
+ * needed in addition to the internal qede lock.
+ */
+void qede_lock(struct qede_dev *edev)
+{
+	rtnl_lock();
+	__qede_lock(edev);
+}
+
+void qede_unlock(struct qede_dev *edev)
+{
+	__qede_unlock(edev);
+	rtnl_unlock();
+}
+
 static void qede_sp_task(struct work_struct *work)
 {
 	struct qede_dev *edev = container_of(work, struct qede_dev,
 					     sp_task.work);
 
+	/* The locking scheme depends on the specific flag:
+	 * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
+	 * ensure that ongoing flows are ended and new ones are not started.
+	 * In other cases - only the internal qede lock should be acquired.
+	 */
+
+	if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+		/* SRIOV must be disabled outside the lock to avoid a deadlock.
+		 * The recovery of the active VFs is currently not supported.
+		 */
+		qede_sriov_configure(edev->pdev, 0);
+#endif
+		qede_lock(edev);
+		qede_recovery_handler(edev);
+		qede_unlock(edev);
+	}
+
 	__qede_lock(edev);
 
 	if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
@@ -1031,6 +1067,7 @@ static void qede_log_probe(struct qede_dev *edev)
 
 enum qede_probe_mode {
 	QEDE_PROBE_NORMAL,
+	QEDE_PROBE_RECOVERY,
 };
 
 static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
@@ -1051,6 +1088,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	probe_params.dp_module = dp_module;
 	probe_params.dp_level = dp_level;
 	probe_params.is_vf = is_vf;
+	probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
 	cdev = qed_ops->common->probe(pdev, &probe_params);
 	if (!cdev) {
 		rc = -ENODEV;
@@ -1078,11 +1116,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	if (rc)
 		goto err2;
 
-	edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
-				   dp_level);
-	if (!edev) {
-		rc = -ENOMEM;
-		goto err2;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
+					   dp_level);
+		if (!edev) {
+			rc = -ENOMEM;
+			goto err2;
+		}
+	} else {
+		struct net_device *ndev = pci_get_drvdata(pdev);
+
+		edev = netdev_priv(ndev);
+		edev->cdev = cdev;
+		memset(&edev->stats, 0, sizeof(edev->stats));
+		memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
 	}
 
 	if (is_vf)
@@ -1090,28 +1137,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 
 	qede_init_ndev(edev);
 
-	rc = qede_rdma_dev_add(edev);
+	rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY));
 	if (rc)
 		goto err3;
 
-	/* Prepare the lock prior to the registration of the netdev,
-	 * as once it's registered we might reach flows requiring it
-	 * [it's even possible to reach a flow needing it directly
-	 * from there, although it's unlikely].
-	 */
-	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
-	mutex_init(&edev->qede_lock);
-	rc = register_netdev(edev->ndev);
-	if (rc) {
-		DP_NOTICE(edev, "Cannot register net-device\n");
-		goto err4;
+	if (mode != QEDE_PROBE_RECOVERY) {
+		/* Prepare the lock prior to the registration of the netdev,
+		 * as once it's registered we might reach flows requiring it
+		 * [it's even possible to reach a flow needing it directly
+		 * from there, although it's unlikely].
+		 */
+		INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
+		mutex_init(&edev->qede_lock);
+
+		rc = register_netdev(edev->ndev);
+		if (rc) {
+			DP_NOTICE(edev, "Cannot register net-device\n");
+			goto err4;
+		}
 	}
 
 	edev->ops->common->set_name(cdev, edev->ndev->name);
 
 	/* PTP not supported on VFs */
 	if (!is_vf)
-		qede_ptp_enable(edev, true);
+		qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
 
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
@@ -1126,7 +1176,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
 	return 0;
 
 err4:
-	qede_rdma_dev_remove(edev);
+	qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY));
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -1162,6 +1212,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 enum qede_remove_mode {
 	QEDE_REMOVE_NORMAL,
+	QEDE_REMOVE_RECOVERY,
 };
 
 static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
@@ -1172,15 +1223,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	DP_INFO(edev, "Starting qede_remove\n");
 
-	qede_rdma_dev_remove(edev);
-	unregister_netdev(ndev);
-	cancel_delayed_work_sync(&edev->sp_task);
+	qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
 
-	qede_ptp_disable(edev);
+	if (mode != QEDE_REMOVE_RECOVERY) {
+		unregister_netdev(ndev);
 
-	edev->ops->common->set_power_state(cdev, PCI_D0);
+		cancel_delayed_work_sync(&edev->sp_task);
 
-	pci_set_drvdata(pdev, NULL);
+		edev->ops->common->set_power_state(cdev, PCI_D0);
+
+		pci_set_drvdata(pdev, NULL);
+	}
+
+	qede_ptp_disable(edev);
 
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
@@ -1194,7 +1249,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 	 * [e.g., QED register callbacks] won't break anything when
 	 * accessing the netdevice.
 	 */
-	 free_netdev(ndev);
+	if (mode != QEDE_REMOVE_RECOVERY)
+		free_netdev(ndev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
@@ -1539,6 +1595,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 	return 0;
 }
 
+static void qede_empty_tx_queue(struct qede_dev *edev,
+				struct qede_tx_queue *txq)
+{
+	unsigned int pkts_compl = 0, bytes_compl = 0;
+	struct netdev_queue *netdev_txq;
+	int rc, len = 0;
+
+	netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
+
+	while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+	       qed_chain_get_prod_idx(&txq->tx_pbl)) {
+		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
+			   "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+			   txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
+			   qed_chain_get_prod_idx(&txq->tx_pbl));
+
+		rc = qede_free_tx_pkt(edev, txq, &len);
+		if (rc) {
+			DP_NOTICE(edev,
+				  "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
+				  txq->index,
+				  qed_chain_get_cons_idx(&txq->tx_pbl),
+				  qed_chain_get_prod_idx(&txq->tx_pbl));
+			break;
+		}
+
+		bytes_compl += len;
+		pkts_compl++;
+		txq->sw_tx_cons++;
+	}
+
+	netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
+}
+
+static void qede_empty_tx_queues(struct qede_dev *edev)
+{
+	int i;
+
+	for_each_queue(i)
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			int cos;
+
+			for_each_cos_in_txq(edev, cos) {
+				struct qede_fastpath *fp;
+
+				fp = &edev->fp_array[i];
+				qede_empty_tx_queue(edev,
+						    &fp->txq[cos]);
+			}
+		}
+}
+
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
@@ -2053,6 +2161,7 @@ out:
 
 enum qede_unload_mode {
 	QEDE_UNLOAD_NORMAL,
+	QEDE_UNLOAD_RECOVERY,
 };
 
 static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
@@ -2068,7 +2177,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
 
-	edev->state = QEDE_STATE_CLOSED;
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		edev->state = QEDE_STATE_CLOSED;
 
 	qede_rdma_dev_event_close(edev);
 
@@ -2076,17 +2186,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 	netif_tx_disable(edev->ndev);
 	netif_carrier_off(edev->ndev);
 
-	/* Reset the link */
-	memset(&link_params, 0, sizeof(link_params));
-	link_params.link_up = false;
-	edev->ops->common->set_link(edev->cdev, &link_params);
-	rc = qede_stop_queues(edev);
-	if (rc) {
-		qede_sync_free_irqs(edev);
-		goto out;
-	}
+	if (mode != QEDE_UNLOAD_RECOVERY) {
+		/* Reset the link */
+		memset(&link_params, 0, sizeof(link_params));
+		link_params.link_up = false;
+		edev->ops->common->set_link(edev->cdev, &link_params);
 
-	DP_INFO(edev, "Stopped Queues\n");
+		rc = qede_stop_queues(edev);
+		if (rc) {
+			qede_sync_free_irqs(edev);
+			goto out;
+		}
+
+		DP_INFO(edev, "Stopped Queues\n");
+	}
 
 	qede_vlan_mark_nonconfigured(edev);
 	edev->ops->fastpath_stop(edev->cdev);
@@ -2102,18 +2215,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
 
 	qede_napi_disable_remove(edev);
 
+	if (mode == QEDE_UNLOAD_RECOVERY)
+		qede_empty_tx_queues(edev);
+
 	qede_free_mem_load(edev);
 	qede_free_fp_array(edev);
 
 out:
 	if (!is_locked)
 		__qede_unlock(edev);
+
+	if (mode != QEDE_UNLOAD_RECOVERY)
+		DP_NOTICE(edev, "Link is down\n");
+
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
 enum qede_load_mode {
 	QEDE_LOAD_NORMAL,
 	QEDE_LOAD_RELOAD,
+	QEDE_LOAD_RECOVERY,
 };
 
 static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
@@ -2293,6 +2414,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
 	}
 }
 
+static void qede_schedule_recovery_handler(void *dev)
+{
+	struct qede_dev *edev = dev;
+
+	if (edev->state == QEDE_STATE_RECOVERY) {
+		DP_NOTICE(edev,
+			  "Avoid scheduling a recovery handling since already in recovery state\n");
+		return;
+	}
+
+	set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+
+	DP_INFO(edev, "Scheduled a recovery handler\n");
+}
+
+static void qede_recovery_failed(struct qede_dev *edev)
+{
+	netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
+
+	netif_device_detach(edev->ndev);
+
+	if (edev->cdev)
+		edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
+}
+
+static void qede_recovery_handler(struct qede_dev *edev)
+{
+	u32 curr_state = edev->state;
+	int rc;
+
+	DP_NOTICE(edev, "Starting a recovery process\n");
+
+	/* No need to acquire first the qede_lock since is done by qede_sp_task
+	 * before calling this function.
+	 */
+	edev->state = QEDE_STATE_RECOVERY;
+
+	edev->ops->common->recovery_prolog(edev->cdev);
+
+	if (curr_state == QEDE_STATE_OPEN)
+		qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
+
+	__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
+
+	rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
+			  IS_VF(edev), QEDE_PROBE_RECOVERY);
+	if (rc) {
+		edev->cdev = NULL;
+		goto err;
+	}
+
+	if (curr_state == QEDE_STATE_OPEN) {
+		rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
+		if (rc)
+			goto err;
+
+		qede_config_rx_mode(edev->ndev);
+		udp_tunnel_get_rx_info(edev->ndev);
+	}
+
+	edev->state = curr_state;
+
+	DP_NOTICE(edev, "Recovery handling is done\n");
+
+	return;
+
+err:
+	qede_recovery_failed(edev);
+}
+
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
index 1900bf7e67d1..ffabc2d2f082 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c
@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
 	if (!qedr_drv)
 		return;
 
+	/* Leftovers from previous error recovery */
+	edev->rdma_info.exp_recovery = false;
 	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
 						 edev->ndev);
 }
@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
 	destroy_workqueue(edev->rdma_info.rdma_wq);
 }
 
-int qede_rdma_dev_add(struct qede_dev *edev)
+int qede_rdma_dev_add(struct qede_dev *edev, bool recovery)
 {
-	int rc = 0;
+	int rc;
 
-	if (qede_rdma_supported(edev)) {
-		rc = qede_rdma_create_wq(edev);
-		if (rc)
-			return rc;
+	if (!qede_rdma_supported(edev))
+		return 0;
 
-		INIT_LIST_HEAD(&edev->rdma_info.entry);
-		mutex_lock(&qedr_dev_list_lock);
-		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
-		_qede_rdma_dev_add(edev);
-		mutex_unlock(&qedr_dev_list_lock);
-	}
+	/* Cannot start qedr while recovering since it wasn't fully stopped */
+	if (recovery)
+		return 0;
+
+	rc = qede_rdma_create_wq(edev);
+	if (rc)
+		return rc;
+
+	INIT_LIST_HEAD(&edev->rdma_info.entry);
+	mutex_lock(&qedr_dev_list_lock);
+	list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+	_qede_rdma_dev_add(edev);
+	mutex_unlock(&qedr_dev_list_lock);
 
 	return rc;
 }
@@ -110,19 +117,30 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
 {
 	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
 		qedr_drv->remove(edev->rdma_info.qedr_dev);
-	edev->rdma_info.qedr_dev = NULL;
 }
 
-void qede_rdma_dev_remove(struct qede_dev *edev)
+void qede_rdma_dev_remove(struct qede_dev *edev, bool recovery)
 {
 	if (!qede_rdma_supported(edev))
 		return;
 
-	qede_rdma_destroy_wq(edev);
-	mutex_lock(&qedr_dev_list_lock);
-	_qede_rdma_dev_remove(edev);
-	list_del(&edev->rdma_info.entry);
-	mutex_unlock(&qedr_dev_list_lock);
+	/* Cannot remove qedr while recovering since it wasn't fully stopped */
+	if (!recovery) {
+		qede_rdma_destroy_wq(edev);
+		mutex_lock(&qedr_dev_list_lock);
+		if (!edev->rdma_info.exp_recovery)
+			_qede_rdma_dev_remove(edev);
+		edev->rdma_info.qedr_dev = NULL;
+		list_del(&edev->rdma_info.entry);
+		mutex_unlock(&qedr_dev_list_lock);
+	} else {
+		if (!edev->rdma_info.exp_recovery) {
+			mutex_lock(&qedr_dev_list_lock);
+			_qede_rdma_dev_remove(edev);
+			mutex_unlock(&qedr_dev_list_lock);
+		}
+		edev->rdma_info.exp_recovery = true;
+	}
 }
 
 static void _qede_rdma_dev_open(struct qede_dev *edev)
@@ -204,7 +222,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
 
 	mutex_lock(&qedr_dev_list_lock);
 	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
-		if (edev->rdma_info.qedr_dev)
+		/* If device has experienced recovery it was already removed */
+		if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
 			_qede_rdma_dev_remove(edev);
 	}
 	qedr_drv = NULL;
@@ -284,6 +303,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
 {
 	struct qede_rdma_event_work *event_node;
 
+	/* If a recovery was experienced avoid adding the event */
+	if (edev->rdma_info.exp_recovery)
+		return;
+
 	if (!edev->rdma_info.qedr_dev)
 		return;
 
diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h
index 9904617a9730..5a00c7a473bf 100644
--- a/include/linux/qed/qede_rdma.h
+++ b/include/linux/qed/qede_rdma.h
@@ -74,21 +74,23 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
 bool qede_rdma_supported(struct qede_dev *dev);
 
 #if IS_ENABLED(CONFIG_QED_RDMA)
-int qede_rdma_dev_add(struct qede_dev *dev);
+int qede_rdma_dev_add(struct qede_dev *dev, bool recovery);
 void qede_rdma_dev_event_open(struct qede_dev *dev);
 void qede_rdma_dev_event_close(struct qede_dev *dev);
-void qede_rdma_dev_remove(struct qede_dev *dev);
+void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery);
 void qede_rdma_event_changeaddr(struct qede_dev *edr);
 
 #else
-static inline int qede_rdma_dev_add(struct qede_dev *dev)
+static inline int qede_rdma_dev_add(struct qede_dev *dev,
+				    bool recovery)
 {
 	return 0;
 }
 
 static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
 static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
-static inline void qede_rdma_dev_remove(struct qede_dev *dev) {}
+static inline void qede_rdma_dev_remove(struct qede_dev *dev,
+					bool recovery) {}
 static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
 #endif
 #endif
-- 
cgit v1.2.3


From c8aa703822bf811269975cf7251b5eaad4c38e9c Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 08:53:53 -0800
Subject: net/flow_dissector: move bpf case into __skb_flow_bpf_dissect

This way, we can reuse it for flow dissector in BPF_PROG_TEST_RUN.

No functional changes.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skbuff.h    |  5 +++
 net/core/flow_dissector.c | 92 +++++++++++++++++++++++++++--------------------
 2 files changed, 59 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 93f56fddd92a..be762fc34ff3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1221,6 +1221,11 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
 }
 #endif
 
+struct bpf_flow_keys;
+bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
+			    const struct sk_buff *skb,
+			    struct flow_dissector *flow_dissector,
+			    struct bpf_flow_keys *flow_keys);
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 9f2840510e63..bb1a54747d64 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -683,6 +683,46 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 	}
 }
 
+bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
+			    const struct sk_buff *skb,
+			    struct flow_dissector *flow_dissector,
+			    struct bpf_flow_keys *flow_keys)
+{
+	struct bpf_skb_data_end cb_saved;
+	struct bpf_skb_data_end *cb;
+	u32 result;
+
+	/* Note that even though the const qualifier is discarded
+	 * throughout the execution of the BPF program, all changes(the
+	 * control block) are reverted after the BPF program returns.
+	 * Therefore, __skb_flow_dissect does not alter the skb.
+	 */
+
+	cb = (struct bpf_skb_data_end *)skb->cb;
+
+	/* Save Control Block */
+	memcpy(&cb_saved, cb, sizeof(cb_saved));
+	memset(cb, 0, sizeof(*cb));
+
+	/* Pass parameters to the BPF program */
+	memset(flow_keys, 0, sizeof(*flow_keys));
+	cb->qdisc_cb.flow_keys = flow_keys;
+	flow_keys->nhoff = skb_network_offset(skb);
+	flow_keys->thoff = flow_keys->nhoff;
+
+	bpf_compute_data_pointers((struct sk_buff *)skb);
+	result = BPF_PROG_RUN(prog, skb);
+
+	/* Restore state */
+	memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+	flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
+	flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
+				   flow_keys->nhoff, skb->len);
+
+	return result == BPF_OK;
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -714,7 +754,6 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
-	struct bpf_prog *attached = NULL;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -754,53 +793,30 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
-	rcu_read_lock();
 	if (skb) {
+		struct bpf_flow_keys flow_keys;
+		struct bpf_prog *attached = NULL;
+
+		rcu_read_lock();
+
 		if (skb->dev)
 			attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog);
 		else if (skb->sk)
 			attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog);
 		else
 			WARN_ON_ONCE(1);
-	}
-	if (attached) {
-		/* Note that even though the const qualifier is discarded
-		 * throughout the execution of the BPF program, all changes(the
-		 * control block) are reverted after the BPF program returns.
-		 * Therefore, __skb_flow_dissect does not alter the skb.
-		 */
-		struct bpf_flow_keys flow_keys = {};
-		struct bpf_skb_data_end cb_saved;
-		struct bpf_skb_data_end *cb;
-		u32 result;
-
-		cb = (struct bpf_skb_data_end *)skb->cb;
-
-		/* Save Control Block */
-		memcpy(&cb_saved, cb, sizeof(cb_saved));
-		memset(cb, 0, sizeof(cb_saved));
 
-		/* Pass parameters to the BPF program */
-		cb->qdisc_cb.flow_keys = &flow_keys;
-		flow_keys.nhoff = nhoff;
-		flow_keys.thoff = nhoff;
-
-		bpf_compute_data_pointers((struct sk_buff *)skb);
-		result = BPF_PROG_RUN(attached, skb);
-
-		/* Restore state */
-		memcpy(cb, &cb_saved, sizeof(cb_saved));
-
-		flow_keys.nhoff = clamp_t(u16, flow_keys.nhoff, 0, skb->len);
-		flow_keys.thoff = clamp_t(u16, flow_keys.thoff,
-					  flow_keys.nhoff, skb->len);
-
-		__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
-					 target_container);
+		if (attached) {
+			ret = __skb_flow_bpf_dissect(attached, skb,
+						     flow_dissector,
+						     &flow_keys);
+			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+						 target_container);
+			rcu_read_unlock();
+			return ret;
+		}
 		rcu_read_unlock();
-		return result == BPF_OK;
 	}
-	rcu_read_unlock();
 
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-- 
cgit v1.2.3


From b7a1848e8398b8396c990279e6a10272d818577e Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 08:53:54 -0800
Subject: bpf: add BPF_PROG_TEST_RUN support for flow dissector

The input is packet data, the output is struct bpf_flow_key. This should
make it easy to test flow dissector programs without elaborate
setup.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h |  3 ++
 net/bpf/test_run.c  | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/filter.c   |  1 +
 3 files changed, 86 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3851529062ec..0394f1f9213b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -404,6 +404,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr);
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr);
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+				     const union bpf_attr *kattr,
+				     union bpf_attr __user *uattr);
 
 /* an array of programs to be executed under rcu_lock.
  *
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index fa2644d276ef..2c5172b33209 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -240,3 +240,85 @@ out:
 	kfree(data);
 	return ret;
 }
+
+int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
+				     const union bpf_attr *kattr,
+				     union bpf_attr __user *uattr)
+{
+	u32 size = kattr->test.data_size_in;
+	u32 repeat = kattr->test.repeat;
+	struct bpf_flow_keys flow_keys;
+	u64 time_start, time_spent = 0;
+	struct bpf_skb_data_end *cb;
+	u32 retval, duration;
+	struct sk_buff *skb;
+	struct sock *sk;
+	void *data;
+	int ret;
+	u32 i;
+
+	if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
+		return -EINVAL;
+
+	data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
+			     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	sk = kzalloc(sizeof(*sk), GFP_USER);
+	if (!sk) {
+		kfree(data);
+		return -ENOMEM;
+	}
+	sock_net_set(sk, current->nsproxy->net_ns);
+	sock_init_data(NULL, sk);
+
+	skb = build_skb(data, 0);
+	if (!skb) {
+		kfree(data);
+		kfree(sk);
+		return -ENOMEM;
+	}
+	skb->sk = sk;
+
+	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+	__skb_put(skb, size);
+	skb->protocol = eth_type_trans(skb,
+				       current->nsproxy->net_ns->loopback_dev);
+	skb_reset_network_header(skb);
+
+	cb = (struct bpf_skb_data_end *)skb->cb;
+	cb->qdisc_cb.flow_keys = &flow_keys;
+
+	if (!repeat)
+		repeat = 1;
+
+	time_start = ktime_get_ns();
+	for (i = 0; i < repeat; i++) {
+		preempt_disable();
+		rcu_read_lock();
+		retval = __skb_flow_bpf_dissect(prog, skb,
+						&flow_keys_dissector,
+						&flow_keys);
+		rcu_read_unlock();
+		preempt_enable();
+
+		if (need_resched()) {
+			if (signal_pending(current))
+				break;
+			time_spent += ktime_get_ns() - time_start;
+			cond_resched();
+			time_start = ktime_get_ns();
+		}
+	}
+	time_spent += ktime_get_ns() - time_start;
+	do_div(time_spent, repeat);
+	duration = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
+
+	ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
+			      retval, duration);
+
+	kfree_skb(skb);
+	kfree(sk);
+	return ret;
+}
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e587dd1da20..8ce421796ac6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7711,6 +7711,7 @@ const struct bpf_verifier_ops flow_dissector_verifier_ops = {
 };
 
 const struct bpf_prog_ops flow_dissector_prog_ops = {
+	.test_run		= bpf_prog_test_run_flow_dissector,
 };
 
 int sk_detach_filter(struct sock *sk)
-- 
cgit v1.2.3


From 59abc8cc05856cb31d6e07a8e25f0d749d559233 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 23 Jan 2019 11:08:05 -0800
Subject: scsi: libsas: Remove scsi_to_u32()

Since the function scsi_to_u32() is identical to get_unaligned_be32(),
change all scsi_to_u32() calls into get_unaligned_be32() calls.

Cc: Jian Luo <luojian5@huawei.com>
Cc: John Garry <john.garry@huawei.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_expander.c | 9 +++++----
 include/scsi/scsi.h                | 6 ------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 83e3715d30e4..b13b245791b0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -25,6 +25,7 @@
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <asm/unaligned.h>
 
 #include "sas_internal.h"
 
@@ -696,10 +697,10 @@ int sas_smp_get_phy_events(struct sas_phy *phy)
 	if (res)
 		goto out;
 
-	phy->invalid_dword_count = scsi_to_u32(&resp[12]);
-	phy->running_disparity_error_count = scsi_to_u32(&resp[16]);
-	phy->loss_of_dword_sync_count = scsi_to_u32(&resp[20]);
-	phy->phy_reset_problem_count = scsi_to_u32(&resp[24]);
+	phy->invalid_dword_count = get_unaligned_be32(&resp[12]);
+	phy->running_disparity_error_count = get_unaligned_be32(&resp[16]);
+	phy->loss_of_dword_sync_count = get_unaligned_be32(&resp[20]);
+	phy->phy_reset_problem_count = get_unaligned_be32(&resp[24]);
 
  out:
 	kfree(req);
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index eb7853c1a23b..5339baadc082 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -274,10 +274,4 @@ static inline int scsi_is_wlun(u64 lun)
 /* Used to obtain the PCI location of a device */
 #define SCSI_IOCTL_GET_PCI		0x5387
 
-/* Pull a u32 out of a SCSI message (using BE SCSI conventions) */
-static inline __u32 scsi_to_u32(__u8 *ptr)
-{
-	return (ptr[0]<<24) + (ptr[1]<<16) + (ptr[2]<<8) + ptr[3];
-}
-
 #endif /* _SCSI_SCSI_H */
-- 
cgit v1.2.3


From 2b6e492467c78183bb629bb0a100ea3509b615a5 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 23 Jan 2019 17:44:16 +0300
Subject: device property: Fix the length used in PROPERTY_ENTRY_STRING()

With string type property entries we need to use
sizeof(const char *) instead of the number of characters as
the length of the entry.

If the string was shorter then sizeof(const char *),
attempts to read it would have failed with -EOVERFLOW. The
problem has been hidden because all build-in string
properties have had a string longer then 8 characters until
now.

Fixes: a85f42047533 ("device property: helper macros for property entry creation")
Cc: 4.5+ <stable@vger.kernel.org> # 4.5+
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/property.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/property.h b/include/linux/property.h
index 3789ec755fb6..65d3420dd5d1 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -258,7 +258,7 @@ struct property_entry {
 #define PROPERTY_ENTRY_STRING(_name_, _val_)		\
 (struct property_entry) {				\
 	.name = _name_,					\
-	.length = sizeof(_val_),			\
+	.length = sizeof(const char *),			\
 	.type = DEV_PROP_STRING,			\
 	{ .value = { .str = _val_ } },			\
 }
-- 
cgit v1.2.3


From 625c85a62cb7d3c79f6e16de3cfa972033658250 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 25 Jan 2019 12:53:07 +0530
Subject: cpufreq: Use struct kobj_attribute instead of struct global_attr

The cpufreq_global_kobject is created using kobject_create_and_add()
helper, which assigns the kobj_type as dynamic_kobj_ktype and show/store
routines are set to kobj_attr_show() and kobj_attr_store().

These routines pass struct kobj_attribute as an argument to the
show/store callbacks. But all the cpufreq files created using the
cpufreq_global_kobject expect the argument to be of type struct
attribute. Things work fine currently as no one accesses the "attr"
argument. We may not see issues even if the argument is used, as struct
kobj_attribute has struct attribute as its first element and so they
will both get same address.

But this is logically incorrect and we should rather use struct
kobj_attribute instead of struct global_attr in the cpufreq core and
drivers and the show/store callbacks should take struct kobj_attribute
as argument instead.

This bug is caught using CFI CLANG builds in android kernel which
catches mismatch in function prototypes for such callbacks.

Reported-by: Donghee Han <dh.han@samsung.com>
Reported-by: Sangkyu Kim <skwith.kim@samsung.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c      |  6 +++---
 drivers/cpufreq/intel_pstate.c | 23 ++++++++++++-----------
 include/linux/cpufreq.h        | 12 ++----------
 3 files changed, 17 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a8fa684f5f90..3eff158d9750 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -545,13 +545,13 @@ EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us);
  *                          SYSFS INTERFACE                          *
  *********************************************************************/
 static ssize_t show_boost(struct kobject *kobj,
-				 struct attribute *attr, char *buf)
+			  struct kobj_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%d\n", cpufreq_driver->boost_enabled);
 }
 
-static ssize_t store_boost(struct kobject *kobj, struct attribute *attr,
-				  const char *buf, size_t count)
+static ssize_t store_boost(struct kobject *kobj, struct kobj_attribute *attr,
+			   const char *buf, size_t count)
 {
 	int ret, enable;
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index dd66decf2087..5ab6a4fe93aa 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -895,7 +895,7 @@ static void intel_pstate_update_policies(void)
 /************************** sysfs begin ************************/
 #define show_one(file_name, object)					\
 	static ssize_t show_##file_name					\
-	(struct kobject *kobj, struct attribute *attr, char *buf)	\
+	(struct kobject *kobj, struct kobj_attribute *attr, char *buf)	\
 	{								\
 		return sprintf(buf, "%u\n", global.object);		\
 	}
@@ -904,7 +904,7 @@ static ssize_t intel_pstate_show_status(char *buf);
 static int intel_pstate_update_status(const char *buf, size_t size);
 
 static ssize_t show_status(struct kobject *kobj,
-			   struct attribute *attr, char *buf)
+			   struct kobj_attribute *attr, char *buf)
 {
 	ssize_t ret;
 
@@ -915,7 +915,7 @@ static ssize_t show_status(struct kobject *kobj,
 	return ret;
 }
 
-static ssize_t store_status(struct kobject *a, struct attribute *b,
+static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
 			    const char *buf, size_t count)
 {
 	char *p = memchr(buf, '\n', count);
@@ -929,7 +929,7 @@ static ssize_t store_status(struct kobject *a, struct attribute *b,
 }
 
 static ssize_t show_turbo_pct(struct kobject *kobj,
-				struct attribute *attr, char *buf)
+				struct kobj_attribute *attr, char *buf)
 {
 	struct cpudata *cpu;
 	int total, no_turbo, turbo_pct;
@@ -955,7 +955,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 }
 
 static ssize_t show_num_pstates(struct kobject *kobj,
-				struct attribute *attr, char *buf)
+				struct kobj_attribute *attr, char *buf)
 {
 	struct cpudata *cpu;
 	int total;
@@ -976,7 +976,7 @@ static ssize_t show_num_pstates(struct kobject *kobj,
 }
 
 static ssize_t show_no_turbo(struct kobject *kobj,
-			     struct attribute *attr, char *buf)
+			     struct kobj_attribute *attr, char *buf)
 {
 	ssize_t ret;
 
@@ -998,7 +998,7 @@ static ssize_t show_no_turbo(struct kobject *kobj,
 	return ret;
 }
 
-static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
+static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
 			      const char *buf, size_t count)
 {
 	unsigned int input;
@@ -1045,7 +1045,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 	return count;
 }
 
-static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
+static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
 				  const char *buf, size_t count)
 {
 	unsigned int input;
@@ -1075,7 +1075,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 	return count;
 }
 
-static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
+static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
 				  const char *buf, size_t count)
 {
 	unsigned int input;
@@ -1107,12 +1107,13 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 }
 
 static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
-				struct attribute *attr, char *buf)
+				struct kobj_attribute *attr, char *buf)
 {
 	return sprintf(buf, "%u\n", hwp_boost);
 }
 
-static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
+static ssize_t store_hwp_dynamic_boost(struct kobject *a,
+				       struct kobj_attribute *b,
 				       const char *buf, size_t count)
 {
 	unsigned int input;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index bd7fbd6a4478..c19142911554 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -254,20 +254,12 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 static struct freq_attr _name =			\
 __ATTR(_name, 0200, NULL, store_##_name)
 
-struct global_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct kobject *kobj,
-			struct attribute *attr, char *buf);
-	ssize_t (*store)(struct kobject *a, struct attribute *b,
-			 const char *c, size_t count);
-};
-
 #define define_one_global_ro(_name)		\
-static struct global_attr _name =		\
+static struct kobj_attribute _name =		\
 __ATTR(_name, 0444, show_##_name, NULL)
 
 #define define_one_global_rw(_name)		\
-static struct global_attr _name =		\
+static struct kobj_attribute _name =		\
 __ATTR(_name, 0644, show_##_name, store_##_name)
 
 
-- 
cgit v1.2.3


From bc3843d4d357061d92e7800c7da342e2d068772c Mon Sep 17 00:00:00 2001
From: Nava kishore Manne <nava.manne@xilinx.com>
Date: Fri, 25 Jan 2019 13:16:52 +0530
Subject: firmware: xilinx: Add reset API's

This Patch Adds reset API's to support release, assert
and status functionalities by using firmware interface.

Signed-off-by: Nava kishore Manne <nava.manne@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     |  40 +++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 136 +++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 9a1c72a9280f..70b50377ae5f 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -469,6 +469,44 @@ static int zynqmp_pm_ioctl(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2,
 				   arg1, arg2, out);
 }
 
+/**
+ * zynqmp_pm_reset_assert - Request setting of reset (1 - assert, 0 - release)
+ * @reset:		Reset to be configured
+ * @assert_flag:	Flag stating should reset be asserted (1) or
+ *			released (0)
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
+				  const enum zynqmp_pm_reset_action assert_flag)
+{
+	return zynqmp_pm_invoke_fn(PM_RESET_ASSERT, reset, assert_flag,
+				   0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_reset_get_status - Get status of the reset
+ * @reset:      Reset whose status should be returned
+ * @status:     Returned status
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset,
+				      u32 *status)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!status)
+		return -EINVAL;
+
+	ret = zynqmp_pm_invoke_fn(PM_RESET_GET_STATUS, reset, 0,
+				  0, 0, ret_payload);
+	*status = ret_payload[1];
+
+	return ret;
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.query_data = zynqmp_pm_query_data,
@@ -482,6 +520,8 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.clock_setparent = zynqmp_pm_clock_setparent,
 	.clock_getparent = zynqmp_pm_clock_getparent,
 	.ioctl = zynqmp_pm_ioctl,
+	.reset_assert = zynqmp_pm_reset_assert,
+	.reset_get_status = zynqmp_pm_reset_get_status,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 3c3c28eff56a..07c587a0b06e 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -34,6 +34,8 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
+	PM_RESET_ASSERT = 17,
+	PM_RESET_GET_STATUS,
 	PM_IOCTL = 34,
 	PM_QUERY_DATA,
 	PM_CLOCK_ENABLE,
@@ -75,6 +77,137 @@ enum pm_query_id {
 	PM_QID_CLOCK_GET_NUM_CLOCKS = 12,
 };
 
+enum zynqmp_pm_reset_action {
+	PM_RESET_ACTION_RELEASE,
+	PM_RESET_ACTION_ASSERT,
+	PM_RESET_ACTION_PULSE,
+};
+
+enum zynqmp_pm_reset {
+	ZYNQMP_PM_RESET_START = 1000,
+	ZYNQMP_PM_RESET_PCIE_CFG = ZYNQMP_PM_RESET_START,
+	ZYNQMP_PM_RESET_PCIE_BRIDGE,
+	ZYNQMP_PM_RESET_PCIE_CTRL,
+	ZYNQMP_PM_RESET_DP,
+	ZYNQMP_PM_RESET_SWDT_CRF,
+	ZYNQMP_PM_RESET_AFI_FM5,
+	ZYNQMP_PM_RESET_AFI_FM4,
+	ZYNQMP_PM_RESET_AFI_FM3,
+	ZYNQMP_PM_RESET_AFI_FM2,
+	ZYNQMP_PM_RESET_AFI_FM1,
+	ZYNQMP_PM_RESET_AFI_FM0,
+	ZYNQMP_PM_RESET_GDMA,
+	ZYNQMP_PM_RESET_GPU_PP1,
+	ZYNQMP_PM_RESET_GPU_PP0,
+	ZYNQMP_PM_RESET_GPU,
+	ZYNQMP_PM_RESET_GT,
+	ZYNQMP_PM_RESET_SATA,
+	ZYNQMP_PM_RESET_ACPU3_PWRON,
+	ZYNQMP_PM_RESET_ACPU2_PWRON,
+	ZYNQMP_PM_RESET_ACPU1_PWRON,
+	ZYNQMP_PM_RESET_ACPU0_PWRON,
+	ZYNQMP_PM_RESET_APU_L2,
+	ZYNQMP_PM_RESET_ACPU3,
+	ZYNQMP_PM_RESET_ACPU2,
+	ZYNQMP_PM_RESET_ACPU1,
+	ZYNQMP_PM_RESET_ACPU0,
+	ZYNQMP_PM_RESET_DDR,
+	ZYNQMP_PM_RESET_APM_FPD,
+	ZYNQMP_PM_RESET_SOFT,
+	ZYNQMP_PM_RESET_GEM0,
+	ZYNQMP_PM_RESET_GEM1,
+	ZYNQMP_PM_RESET_GEM2,
+	ZYNQMP_PM_RESET_GEM3,
+	ZYNQMP_PM_RESET_QSPI,
+	ZYNQMP_PM_RESET_UART0,
+	ZYNQMP_PM_RESET_UART1,
+	ZYNQMP_PM_RESET_SPI0,
+	ZYNQMP_PM_RESET_SPI1,
+	ZYNQMP_PM_RESET_SDIO0,
+	ZYNQMP_PM_RESET_SDIO1,
+	ZYNQMP_PM_RESET_CAN0,
+	ZYNQMP_PM_RESET_CAN1,
+	ZYNQMP_PM_RESET_I2C0,
+	ZYNQMP_PM_RESET_I2C1,
+	ZYNQMP_PM_RESET_TTC0,
+	ZYNQMP_PM_RESET_TTC1,
+	ZYNQMP_PM_RESET_TTC2,
+	ZYNQMP_PM_RESET_TTC3,
+	ZYNQMP_PM_RESET_SWDT_CRL,
+	ZYNQMP_PM_RESET_NAND,
+	ZYNQMP_PM_RESET_ADMA,
+	ZYNQMP_PM_RESET_GPIO,
+	ZYNQMP_PM_RESET_IOU_CC,
+	ZYNQMP_PM_RESET_TIMESTAMP,
+	ZYNQMP_PM_RESET_RPU_R50,
+	ZYNQMP_PM_RESET_RPU_R51,
+	ZYNQMP_PM_RESET_RPU_AMBA,
+	ZYNQMP_PM_RESET_OCM,
+	ZYNQMP_PM_RESET_RPU_PGE,
+	ZYNQMP_PM_RESET_USB0_CORERESET,
+	ZYNQMP_PM_RESET_USB1_CORERESET,
+	ZYNQMP_PM_RESET_USB0_HIBERRESET,
+	ZYNQMP_PM_RESET_USB1_HIBERRESET,
+	ZYNQMP_PM_RESET_USB0_APB,
+	ZYNQMP_PM_RESET_USB1_APB,
+	ZYNQMP_PM_RESET_IPI,
+	ZYNQMP_PM_RESET_APM_LPD,
+	ZYNQMP_PM_RESET_RTC,
+	ZYNQMP_PM_RESET_SYSMON,
+	ZYNQMP_PM_RESET_AFI_FM6,
+	ZYNQMP_PM_RESET_LPD_SWDT,
+	ZYNQMP_PM_RESET_FPD,
+	ZYNQMP_PM_RESET_RPU_DBG1,
+	ZYNQMP_PM_RESET_RPU_DBG0,
+	ZYNQMP_PM_RESET_DBG_LPD,
+	ZYNQMP_PM_RESET_DBG_FPD,
+	ZYNQMP_PM_RESET_APLL,
+	ZYNQMP_PM_RESET_DPLL,
+	ZYNQMP_PM_RESET_VPLL,
+	ZYNQMP_PM_RESET_IOPLL,
+	ZYNQMP_PM_RESET_RPLL,
+	ZYNQMP_PM_RESET_GPO3_PL_0,
+	ZYNQMP_PM_RESET_GPO3_PL_1,
+	ZYNQMP_PM_RESET_GPO3_PL_2,
+	ZYNQMP_PM_RESET_GPO3_PL_3,
+	ZYNQMP_PM_RESET_GPO3_PL_4,
+	ZYNQMP_PM_RESET_GPO3_PL_5,
+	ZYNQMP_PM_RESET_GPO3_PL_6,
+	ZYNQMP_PM_RESET_GPO3_PL_7,
+	ZYNQMP_PM_RESET_GPO3_PL_8,
+	ZYNQMP_PM_RESET_GPO3_PL_9,
+	ZYNQMP_PM_RESET_GPO3_PL_10,
+	ZYNQMP_PM_RESET_GPO3_PL_11,
+	ZYNQMP_PM_RESET_GPO3_PL_12,
+	ZYNQMP_PM_RESET_GPO3_PL_13,
+	ZYNQMP_PM_RESET_GPO3_PL_14,
+	ZYNQMP_PM_RESET_GPO3_PL_15,
+	ZYNQMP_PM_RESET_GPO3_PL_16,
+	ZYNQMP_PM_RESET_GPO3_PL_17,
+	ZYNQMP_PM_RESET_GPO3_PL_18,
+	ZYNQMP_PM_RESET_GPO3_PL_19,
+	ZYNQMP_PM_RESET_GPO3_PL_20,
+	ZYNQMP_PM_RESET_GPO3_PL_21,
+	ZYNQMP_PM_RESET_GPO3_PL_22,
+	ZYNQMP_PM_RESET_GPO3_PL_23,
+	ZYNQMP_PM_RESET_GPO3_PL_24,
+	ZYNQMP_PM_RESET_GPO3_PL_25,
+	ZYNQMP_PM_RESET_GPO3_PL_26,
+	ZYNQMP_PM_RESET_GPO3_PL_27,
+	ZYNQMP_PM_RESET_GPO3_PL_28,
+	ZYNQMP_PM_RESET_GPO3_PL_29,
+	ZYNQMP_PM_RESET_GPO3_PL_30,
+	ZYNQMP_PM_RESET_GPO3_PL_31,
+	ZYNQMP_PM_RESET_RPU_LS,
+	ZYNQMP_PM_RESET_PS_ONLY,
+	ZYNQMP_PM_RESET_PL,
+	ZYNQMP_PM_RESET_PS_PL0,
+	ZYNQMP_PM_RESET_PS_PL1,
+	ZYNQMP_PM_RESET_PS_PL2,
+	ZYNQMP_PM_RESET_PS_PL3,
+	ZYNQMP_PM_RESET_END = ZYNQMP_PM_RESET_PS_PL3
+};
+
 /**
  * struct zynqmp_pm_query_data - PM query data
  * @qid:	query ID
@@ -102,6 +235,9 @@ struct zynqmp_eemi_ops {
 	int (*clock_setparent)(u32 clock_id, u32 parent_id);
 	int (*clock_getparent)(u32 clock_id, u32 *parent_id);
 	int (*ioctl)(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, u32 *out);
+	int (*reset_assert)(const enum zynqmp_pm_reset reset,
+			    const enum zynqmp_pm_reset_action assert_flag);
+	int (*reset_get_status)(const enum zynqmp_pm_reset reset, u32 *status);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3


From 3f1b66be4aaa5dbe0a16197bfdfc355cf1da7701 Mon Sep 17 00:00:00 2001
From: Nava kishore Manne <nava.manne@xilinx.com>
Date: Fri, 25 Jan 2019 13:16:53 +0530
Subject: dt-bindings: reset: Add bindings for ZynqMP reset driver

Add documentation to describe Xilinx ZynqMP reset driver
bindings.

Signed-off-by: Nava kishore Manne <nava.manne@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 .../bindings/reset/xlnx,zynqmp-reset.txt           |  52 +++++++++
 include/dt-bindings/reset/xlnx-zynqmp-resets.h     | 130 +++++++++++++++++++++
 2 files changed, 182 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.txt
 create mode 100644 include/dt-bindings/reset/xlnx-zynqmp-resets.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.txt b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.txt
new file mode 100644
index 000000000000..27a45fe5ecf1
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/xlnx,zynqmp-reset.txt
@@ -0,0 +1,52 @@
+--------------------------------------------------------------------------
+ =  Zynq UltraScale+ MPSoC reset driver binding =
+--------------------------------------------------------------------------
+The Zynq UltraScale+ MPSoC has several different resets.
+
+See Chapter 36 of the Zynq UltraScale+ MPSoC TRM (UG) for more information
+about zynqmp resets.
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required Properties:
+- compatible:	"xlnx,zynqmp-reset"
+- #reset-cells:	Specifies the number of cells needed to encode reset
+		line, should be 1
+
+-------
+Example
+-------
+
+firmware {
+	zynqmp_firmware: zynqmp-firmware {
+		compatible = "xlnx,zynqmp-firmware";
+		method = "smc";
+
+		zynqmp_reset: reset-controller {
+			compatible = "xlnx,zynqmp-reset";
+			#reset-cells = <1>;
+		};
+	};
+};
+
+Specifying reset lines connected to IP modules
+==============================================
+
+Device nodes that need access to reset lines should
+specify them as a reset phandle in their corresponding node as
+specified in reset.txt.
+
+For list of all valid reset indicies see
+<dt-bindings/reset/xlnx-zynqmp-resets.h>
+
+Example:
+
+serdes: zynqmp_phy@fd400000 {
+	...
+
+	resets = <&zynqmp_reset ZYNQMP_RESET_SATA>;
+	reset-names = "sata_rst";
+
+	...
+};
diff --git a/include/dt-bindings/reset/xlnx-zynqmp-resets.h b/include/dt-bindings/reset/xlnx-zynqmp-resets.h
new file mode 100644
index 000000000000..d44525b9f8db
--- /dev/null
+++ b/include/dt-bindings/reset/xlnx-zynqmp-resets.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2018 Xilinx, Inc.
+ */
+
+#ifndef _DT_BINDINGS_ZYNQMP_RESETS_H
+#define _DT_BINDINGS_ZYNQMP_RESETS_H
+
+#define		ZYNQMP_RESET_PCIE_CFG		0
+#define		ZYNQMP_RESET_PCIE_BRIDGE	1
+#define		ZYNQMP_RESET_PCIE_CTRL		2
+#define		ZYNQMP_RESET_DP			3
+#define		ZYNQMP_RESET_SWDT_CRF		4
+#define		ZYNQMP_RESET_AFI_FM5		5
+#define		ZYNQMP_RESET_AFI_FM4		6
+#define		ZYNQMP_RESET_AFI_FM3		7
+#define		ZYNQMP_RESET_AFI_FM2		8
+#define		ZYNQMP_RESET_AFI_FM1		9
+#define		ZYNQMP_RESET_AFI_FM0		10
+#define		ZYNQMP_RESET_GDMA		11
+#define		ZYNQMP_RESET_GPU_PP1		12
+#define		ZYNQMP_RESET_GPU_PP0		13
+#define		ZYNQMP_RESET_GPU		14
+#define		ZYNQMP_RESET_GT			15
+#define		ZYNQMP_RESET_SATA		16
+#define		ZYNQMP_RESET_ACPU3_PWRON	17
+#define		ZYNQMP_RESET_ACPU2_PWRON	18
+#define		ZYNQMP_RESET_ACPU1_PWRON	19
+#define		ZYNQMP_RESET_ACPU0_PWRON	20
+#define		ZYNQMP_RESET_APU_L2		21
+#define		ZYNQMP_RESET_ACPU3		22
+#define		ZYNQMP_RESET_ACPU2		23
+#define		ZYNQMP_RESET_ACPU1		24
+#define		ZYNQMP_RESET_ACPU0		25
+#define		ZYNQMP_RESET_DDR		26
+#define		ZYNQMP_RESET_APM_FPD		27
+#define		ZYNQMP_RESET_SOFT		28
+#define		ZYNQMP_RESET_GEM0		29
+#define		ZYNQMP_RESET_GEM1		30
+#define		ZYNQMP_RESET_GEM2		31
+#define		ZYNQMP_RESET_GEM3		32
+#define		ZYNQMP_RESET_QSPI		33
+#define		ZYNQMP_RESET_UART0		34
+#define		ZYNQMP_RESET_UART1		35
+#define		ZYNQMP_RESET_SPI0		36
+#define		ZYNQMP_RESET_SPI1		37
+#define		ZYNQMP_RESET_SDIO0		38
+#define		ZYNQMP_RESET_SDIO1		39
+#define		ZYNQMP_RESET_CAN0		40
+#define		ZYNQMP_RESET_CAN1		41
+#define		ZYNQMP_RESET_I2C0		42
+#define		ZYNQMP_RESET_I2C1		43
+#define		ZYNQMP_RESET_TTC0		44
+#define		ZYNQMP_RESET_TTC1		45
+#define		ZYNQMP_RESET_TTC2		46
+#define		ZYNQMP_RESET_TTC3		47
+#define		ZYNQMP_RESET_SWDT_CRL		48
+#define		ZYNQMP_RESET_NAND		49
+#define		ZYNQMP_RESET_ADMA		50
+#define		ZYNQMP_RESET_GPIO		51
+#define		ZYNQMP_RESET_IOU_CC		52
+#define		ZYNQMP_RESET_TIMESTAMP		53
+#define		ZYNQMP_RESET_RPU_R50		54
+#define		ZYNQMP_RESET_RPU_R51		55
+#define		ZYNQMP_RESET_RPU_AMBA		56
+#define		ZYNQMP_RESET_OCM		57
+#define		ZYNQMP_RESET_RPU_PGE		58
+#define		ZYNQMP_RESET_USB0_CORERESET	59
+#define		ZYNQMP_RESET_USB1_CORERESET	60
+#define		ZYNQMP_RESET_USB0_HIBERRESET	61
+#define		ZYNQMP_RESET_USB1_HIBERRESET	62
+#define		ZYNQMP_RESET_USB0_APB		63
+#define		ZYNQMP_RESET_USB1_APB		64
+#define		ZYNQMP_RESET_IPI		65
+#define		ZYNQMP_RESET_APM_LPD		66
+#define		ZYNQMP_RESET_RTC		67
+#define		ZYNQMP_RESET_SYSMON		68
+#define		ZYNQMP_RESET_AFI_FM6		69
+#define		ZYNQMP_RESET_LPD_SWDT		70
+#define		ZYNQMP_RESET_FPD		71
+#define		ZYNQMP_RESET_RPU_DBG1		72
+#define		ZYNQMP_RESET_RPU_DBG0		73
+#define		ZYNQMP_RESET_DBG_LPD		74
+#define		ZYNQMP_RESET_DBG_FPD		75
+#define		ZYNQMP_RESET_APLL		76
+#define		ZYNQMP_RESET_DPLL		77
+#define		ZYNQMP_RESET_VPLL		78
+#define		ZYNQMP_RESET_IOPLL		79
+#define		ZYNQMP_RESET_RPLL		80
+#define		ZYNQMP_RESET_GPO3_PL_0		81
+#define		ZYNQMP_RESET_GPO3_PL_1		82
+#define		ZYNQMP_RESET_GPO3_PL_2		83
+#define		ZYNQMP_RESET_GPO3_PL_3		84
+#define		ZYNQMP_RESET_GPO3_PL_4		85
+#define		ZYNQMP_RESET_GPO3_PL_5		86
+#define		ZYNQMP_RESET_GPO3_PL_6		87
+#define		ZYNQMP_RESET_GPO3_PL_7		88
+#define		ZYNQMP_RESET_GPO3_PL_8		89
+#define		ZYNQMP_RESET_GPO3_PL_9		90
+#define		ZYNQMP_RESET_GPO3_PL_10		91
+#define		ZYNQMP_RESET_GPO3_PL_11		92
+#define		ZYNQMP_RESET_GPO3_PL_12		93
+#define		ZYNQMP_RESET_GPO3_PL_13		94
+#define		ZYNQMP_RESET_GPO3_PL_14		95
+#define		ZYNQMP_RESET_GPO3_PL_15		96
+#define		ZYNQMP_RESET_GPO3_PL_16		97
+#define		ZYNQMP_RESET_GPO3_PL_17		98
+#define		ZYNQMP_RESET_GPO3_PL_18		99
+#define		ZYNQMP_RESET_GPO3_PL_19		100
+#define		ZYNQMP_RESET_GPO3_PL_20		101
+#define		ZYNQMP_RESET_GPO3_PL_21		102
+#define		ZYNQMP_RESET_GPO3_PL_22		103
+#define		ZYNQMP_RESET_GPO3_PL_23		104
+#define		ZYNQMP_RESET_GPO3_PL_24		105
+#define		ZYNQMP_RESET_GPO3_PL_25		106
+#define		ZYNQMP_RESET_GPO3_PL_26		107
+#define		ZYNQMP_RESET_GPO3_PL_27		108
+#define		ZYNQMP_RESET_GPO3_PL_28		109
+#define		ZYNQMP_RESET_GPO3_PL_29		110
+#define		ZYNQMP_RESET_GPO3_PL_30		111
+#define		ZYNQMP_RESET_GPO3_PL_31		112
+#define		ZYNQMP_RESET_RPU_LS		113
+#define		ZYNQMP_RESET_PS_ONLY		114
+#define		ZYNQMP_RESET_PL			115
+#define		ZYNQMP_RESET_PS_PL0		116
+#define		ZYNQMP_RESET_PS_PL1		117
+#define		ZYNQMP_RESET_PS_PL2		118
+#define		ZYNQMP_RESET_PS_PL3		119
+
+#endif
-- 
cgit v1.2.3


From 5c30f43f0625a792c30e465f21dbeb1bb4dfc40b Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 25 Jan 2019 14:06:46 -0600
Subject: ASoC: topology: add SND_SOC_DOBJ_GRAPH type for dapm routes

Add a new dobj type SND_SOC_DOBJ_GRAPH for dapm routes
and add snd_soc_dobj member to struct snd_soc_dapm_route.
This enables device drivers to save driver specific
data pertaining to dapm routes and also be able
to clean up the data when the driver module is unloaded.

Also, reorder the snd_soc_dobj_type types to align with
matching topology header types.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h     | 2 ++
 include/sound/soc-topology.h | 7 ++++---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index bd8163f151cb..46f2ba3ffcb7 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -540,6 +540,8 @@ struct snd_soc_dapm_route {
 	/* Note: currently only supported for links where source is a supply */
 	int (*connected)(struct snd_soc_dapm_widget *source,
 			 struct snd_soc_dapm_widget *sink);
+
+	struct snd_soc_dobj dobj;
 };
 
 /* dapm audio path between two widgets */
diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index fa4b8413d2e2..8c43cfc240fa 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -38,12 +38,13 @@ struct snd_soc_dapm_route;
 enum snd_soc_dobj_type {
 	SND_SOC_DOBJ_NONE		= 0,	/* object is not dynamic */
 	SND_SOC_DOBJ_MIXER,
-	SND_SOC_DOBJ_ENUM,
 	SND_SOC_DOBJ_BYTES,
-	SND_SOC_DOBJ_PCM,
+	SND_SOC_DOBJ_ENUM,
+	SND_SOC_DOBJ_GRAPH,
+	SND_SOC_DOBJ_WIDGET,
 	SND_SOC_DOBJ_DAI_LINK,
+	SND_SOC_DOBJ_PCM,
 	SND_SOC_DOBJ_CODEC_LINK,
-	SND_SOC_DOBJ_WIDGET,
 };
 
 /* dynamic control object */
-- 
cgit v1.2.3


From 15917dc02841862840efcbfe1da0830f88078b5c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 19 Dec 2018 13:04:41 +0100
Subject: sched: Remove stale PF_MUTEX_TESTER bit

The RTMUTEX tester was removed long ago but the PF bit stayed
around. Remove it and free up the space.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2f90fa92468..e2bba022827d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1409,7 +1409,6 @@ extern struct pid *cad_pid;
 #define PF_UMH			0x02000000	/* I'm an Usermodehelper process */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
-#define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
 #define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
 
-- 
cgit v1.2.3


From 6bf8f22aea0ddd93af822aed8afeeee4acdf7694 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 22 Jan 2019 08:29:56 +0200
Subject: IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD

Introduce MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD and its initial implementation.

This object is from type class FD and will be used to read DEVX async
commands completion.

The core layer should allow the driver to set object from type FD in a
safe mode, this option was added with a matching comment in place.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c      |  1 +
 drivers/infiniband/core/uverbs_uapi.c    | 15 ++++--
 drivers/infiniband/hw/mlx5/devx.c        | 83 ++++++++++++++++++++++++++++++++
 include/rdma/uverbs_types.h              |  1 +
 include/uapi/rdma/mlx5_user_ioctl_cmds.h |  9 ++++
 5 files changed, 104 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 6c4747e61d2b..a260d2f8e0b7 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -801,6 +801,7 @@ void uverbs_close_fd(struct file *f)
 	/* Pairs with filp->private_data in alloc_begin_fd_uobject */
 	uverbs_uobject_put(uobj);
 }
+EXPORT_SYMBOL(uverbs_close_fd);
 
 /*
  * Drop the ucontext off the ufile and completely disconnect it from the
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 9ae08e4b78a3..7a987acf0c0b 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -188,13 +188,18 @@ static int uapi_merge_obj_tree(struct uverbs_api *uapi,
 		obj_elm->type_attrs = obj->type_attrs;
 		obj_elm->type_class = obj->type_attrs->type_class;
 		/*
-		 * Today drivers are only permitted to use idr_class
-		 * types. They cannot use FD types because we currently have
-		 * no way to revoke the fops pointer after device
-		 * disassociation.
+		 * Today drivers are only permitted to use idr_class and
+		 * fd_class types. We can revoke the IDR types during
+		 * disassociation, and the FD types require the driver to use
+		 * struct file_operations.owner to prevent the driver module
+		 * code from unloading while the file is open. This provides
+		 * enough safety that uverbs_close_fd() will continue to work.
+		 * Drivers using FD are responsible to handle disassociation of
+		 * the device on their own.
 		 */
 		if (WARN_ON(is_driver &&
-			    obj->type_attrs->type_class != &uverbs_idr_class))
+			    obj->type_attrs->type_class != &uverbs_idr_class &&
+			    obj->type_attrs->type_class != &uverbs_fd_class))
 			return -EINVAL;
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 5a588f3cfb1b..9933bcf83a6b 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1168,6 +1168,38 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
 			      cmd_out, cmd_out_len);
 }
 
+struct devx_async_event_queue {
+	spinlock_t		lock;
+	wait_queue_head_t	poll_wait;
+	struct list_head	event_list;
+};
+
+struct devx_async_cmd_event_file {
+	struct ib_uobject		uobj;
+	struct devx_async_event_queue	ev_queue;
+};
+
+static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
+{
+	spin_lock_init(&ev_queue->lock);
+	INIT_LIST_HEAD(&ev_queue->event_list);
+	init_waitqueue_head(&ev_queue->poll_wait);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
+	struct uverbs_attr_bundle *attrs)
+{
+	struct devx_async_cmd_event_file *ev_file;
+
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
+
+	ev_file = container_of(uobj, struct devx_async_cmd_event_file,
+			       uobj);
+	devx_init_event_queue(&ev_file->ev_queue);
+	return 0;
+}
+
 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 			 struct uverbs_attr_bundle *attrs,
 			 struct devx_umem *obj)
@@ -1313,6 +1345,38 @@ static int devx_umem_cleanup(struct ib_uobject *uobject,
 	return 0;
 }
 
+static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
+					 size_t count, loff_t *pos)
+{
+	return -EINVAL;
+}
+
+static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
+{
+	uverbs_close_fd(filp);
+	return 0;
+}
+
+static __poll_t devx_async_cmd_event_poll(struct file *filp,
+					      struct poll_table_struct *wait)
+{
+	return 0;
+}
+
+const struct file_operations devx_async_cmd_event_fops = {
+	.owner	 = THIS_MODULE,
+	.read	 = devx_async_cmd_event_read,
+	.poll    = devx_async_cmd_event_poll,
+	.release = devx_async_cmd_event_close,
+	.llseek	 = no_llseek,
+};
+
+static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
+						   enum rdma_remove_reason why)
+{
+	return 0;
+};
+
 DECLARE_UVERBS_NAMED_METHOD(
 	MLX5_IB_METHOD_DEVX_UMEM_REG,
 	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
@@ -1440,6 +1504,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
 
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC,
+	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE,
+			MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+	UVERBS_TYPE_ALLOC_FD(sizeof(struct devx_async_cmd_event_file),
+			     devx_hot_unplug_async_cmd_event_file,
+			     &devx_async_cmd_event_fops, "[devx_async_cmd]",
+			     O_RDONLY),
+	&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC));
+
 static bool devx_is_supported(struct ib_device *device)
 {
 	struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1457,5 +1537,8 @@ const struct uapi_definition mlx5_ib_devx_defs[] = {
 	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
 		MLX5_IB_OBJECT_DEVX_UMEM,
 		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+	UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+		UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
 	{},
 };
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index acb1bfa3cc99..175d761695e1 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -157,6 +157,7 @@ struct uverbs_obj_fd_type {
 
 extern const struct uverbs_obj_type_class uverbs_idr_class;
 extern const struct uverbs_obj_type_class uverbs_fd_class;
+void uverbs_close_fd(struct file *f);
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index b8d121d457f1..6ceae29d77cd 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -113,11 +113,20 @@ enum mlx5_ib_devx_umem_methods {
 	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
 };
 
+enum mlx5_ib_devx_async_cmd_fd_alloc_attrs {
+	MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_async_cmd_fd_methods {
+	MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC = (1U << UVERBS_ID_NS_SHIFT),
+};
+
 enum mlx5_ib_objects {
 	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_OBJECT_DEVX_OBJ,
 	MLX5_IB_OBJECT_DEVX_UMEM,
 	MLX5_IB_OBJECT_FLOW_MATCHER,
+	MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
 };
 
 enum mlx5_ib_flow_matcher_create_attrs {
-- 
cgit v1.2.3


From a124edba26270697540f1058bfcd490c1c65b116 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Tue, 22 Jan 2019 08:29:57 +0200
Subject: IB/mlx5: Introduce async DEVX obj query API

Introduce async DEVX obj query API to get the command response back to
user space once it's ready without blocking when calling the firmware.

The event's data includes a header with some meta data then the firmware
output command data.

The header includes:
- The input 'wr_id' to let application recognizing the response.

The input FD attribute is used to have the event data ready on.
Downstream patches from this series will implement the file ops to let
application read it.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c         | 164 +++++++++++++++++++++++++++++-
 include/uapi/rdma/mlx5_user_ioctl_cmds.h  |   9 ++
 include/uapi/rdma/mlx5_user_ioctl_verbs.h |   5 +
 3 files changed, 177 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 9933bcf83a6b..9ca116155f9c 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -8,6 +8,7 @@
 #include <rdma/uverbs_types.h>
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/mlx5_user_ioctl_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/uverbs_std_types.h>
 #include <linux/mlx5/driver.h>
@@ -17,6 +18,16 @@
 #define UVERBS_MODULE_NAME mlx5_ib
 #include <rdma/uverbs_named_ioctl.h>
 
+struct devx_async_data {
+	struct mlx5_ib_dev *mdev;
+	struct list_head list;
+	struct ib_uobject *fd_uobj;
+	struct mlx5_async_work cb_work;
+	u16 cmd_out_len;
+	/* must be last field in this structure */
+	struct mlx5_ib_uapi_devx_async_cmd_hdr hdr;
+};
+
 #define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
 struct devx_obj {
 	struct mlx5_core_dev	*mdev;
@@ -1172,11 +1183,13 @@ struct devx_async_event_queue {
 	spinlock_t		lock;
 	wait_queue_head_t	poll_wait;
 	struct list_head	event_list;
+	atomic_t		bytes_in_use;
 };
 
 struct devx_async_cmd_event_file {
 	struct ib_uobject		uobj;
 	struct devx_async_event_queue	ev_queue;
+	struct mlx5_async_ctx		async_ctx;
 };
 
 static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
@@ -1184,6 +1197,7 @@ static void devx_init_event_queue(struct devx_async_event_queue *ev_queue)
 	spin_lock_init(&ev_queue->lock);
 	INIT_LIST_HEAD(&ev_queue->event_list);
 	init_waitqueue_head(&ev_queue->poll_wait);
+	atomic_set(&ev_queue->bytes_in_use, 0);
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
@@ -1193,13 +1207,123 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_ASYNC_CMD_FD_ALLOC)(
 
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
 		attrs, MLX5_IB_ATTR_DEVX_ASYNC_CMD_FD_ALLOC_HANDLE);
+	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
 
 	ev_file = container_of(uobj, struct devx_async_cmd_event_file,
 			       uobj);
 	devx_init_event_queue(&ev_file->ev_queue);
+	mlx5_cmd_init_async_ctx(mdev->mdev, &ev_file->async_ctx);
 	return 0;
 }
 
+static void devx_query_callback(int status, struct mlx5_async_work *context)
+{
+	struct devx_async_data *async_data =
+		container_of(context, struct devx_async_data, cb_work);
+	struct ib_uobject *fd_uobj = async_data->fd_uobj;
+	struct devx_async_cmd_event_file *ev_file;
+	struct devx_async_event_queue *ev_queue;
+	unsigned long flags;
+
+	ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+			       uobj);
+	ev_queue = &ev_file->ev_queue;
+
+	spin_lock_irqsave(&ev_queue->lock, flags);
+	list_add_tail(&async_data->list, &ev_queue->event_list);
+	spin_unlock_irqrestore(&ev_queue->lock, flags);
+
+	wake_up_interruptible(&ev_queue->poll_wait);
+	fput(fd_uobj->object);
+}
+
+#define MAX_ASYNC_BYTES_IN_USE (1024 * 1024) /* 1MB */
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
+	struct uverbs_attr_bundle *attrs)
+{
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs,
+				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+				attrs,
+				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
+	u16 cmd_out_len;
+	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct ib_uobject *fd_uobj;
+	int err;
+	int uid;
+	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	struct devx_async_cmd_event_file *ev_file;
+	struct devx_async_data *async_data;
+
+	uid = devx_get_uid(c, cmd_in);
+	if (uid < 0)
+		return uid;
+
+	if (!devx_is_obj_query_cmd(cmd_in))
+		return -EINVAL;
+
+	err = uverbs_get_const(&cmd_out_len, attrs,
+			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN);
+	if (err)
+		return err;
+
+	if (!devx_is_valid_obj_id(uobj, cmd_in))
+		return -EINVAL;
+
+	fd_uobj = uverbs_attr_get_uobject(attrs,
+				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD);
+	if (IS_ERR(fd_uobj))
+		return PTR_ERR(fd_uobj);
+
+	ev_file = container_of(fd_uobj, struct devx_async_cmd_event_file,
+			       uobj);
+
+	if (atomic_add_return(cmd_out_len, &ev_file->ev_queue.bytes_in_use) >
+			MAX_ASYNC_BYTES_IN_USE) {
+		atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+		return -EAGAIN;
+	}
+
+	async_data = kvzalloc(struct_size(async_data, hdr.out_data,
+					  cmd_out_len), GFP_KERNEL);
+	if (!async_data) {
+		err = -ENOMEM;
+		goto sub_bytes;
+	}
+
+	err = uverbs_copy_from(&async_data->hdr.wr_id, attrs,
+			       MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID);
+	if (err)
+		goto free_async;
+
+	async_data->cmd_out_len = cmd_out_len;
+	async_data->mdev = mdev;
+	async_data->fd_uobj = fd_uobj;
+
+	get_file(fd_uobj->object);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
+	err = mlx5_cmd_exec_cb(&ev_file->async_ctx, cmd_in,
+		    uverbs_attr_get_len(attrs,
+				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN),
+		    async_data->hdr.out_data,
+		    async_data->cmd_out_len,
+		    devx_query_callback, &async_data->cb_work);
+
+	if (err)
+		goto cb_err;
+
+	return 0;
+
+cb_err:
+	fput(fd_uobj->object);
+free_async:
+	kvfree(async_data);
+sub_bytes:
+	atomic_sub(cmd_out_len, &ev_file->ev_queue.bytes_in_use);
+	return err;
+}
+
 static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
 			 struct uverbs_attr_bundle *attrs,
 			 struct devx_umem *obj)
@@ -1353,6 +1477,17 @@ static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
 
 static int devx_async_cmd_event_close(struct inode *inode, struct file *filp)
 {
+	struct ib_uobject *uobj = filp->private_data;
+	struct devx_async_cmd_event_file *comp_ev_file = container_of(
+		uobj, struct devx_async_cmd_event_file, uobj);
+	struct devx_async_data *entry, *tmp;
+
+	spin_lock_irq(&comp_ev_file->ev_queue.lock);
+	list_for_each_entry_safe(entry, tmp,
+				 &comp_ev_file->ev_queue.event_list, list)
+		kvfree(entry);
+	spin_unlock_irq(&comp_ev_file->ev_queue.lock);
+
 	uverbs_close_fd(filp);
 	return 0;
 }
@@ -1374,6 +1509,11 @@ const struct file_operations devx_async_cmd_event_fops = {
 static int devx_hot_unplug_async_cmd_event_file(struct ib_uobject *uobj,
 						   enum rdma_remove_reason why)
 {
+	struct devx_async_cmd_event_file *comp_ev_file =
+		container_of(uobj, struct devx_async_cmd_event_file,
+			     uobj);
+
+	mlx5_cmd_cleanup_async_ctx(&comp_ev_file->async_ctx);
 	return 0;
 };
 
@@ -1487,6 +1627,27 @@ DECLARE_UVERBS_NAMED_METHOD(
 		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
 		UA_MANDATORY));
 
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+			UVERBS_IDR_ANY_OBJECT,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+		u16, UA_MANDATORY),
+	UVERBS_ATTR_FD(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+		MLX5_IB_OBJECT_DEVX_ASYNC_CMD_FD,
+		UVERBS_ACCESS_READ,
+		UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+		UVERBS_ATTR_TYPE(u64),
+		UA_MANDATORY));
+
 DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
 			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
@@ -1497,7 +1658,8 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
 			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
-			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY));
 
 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
 			    UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index 6ceae29d77cd..8149d224030b 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -84,6 +84,14 @@ enum mlx5_ib_devx_obj_query_attrs {
 	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
 };
 
+enum mlx5_ib_devx_obj_query_async_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_FD,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_WR_ID,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_OUT_LEN,
+};
+
 enum  mlx5_ib_devx_query_eqn_attrs {
 	MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
 	MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
@@ -94,6 +102,7 @@ enum mlx5_ib_devx_obj_methods {
 	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
 	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
 	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+	MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY,
 };
 
 enum mlx5_ib_devx_umem_reg_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 4ef62c0e8452..4a701033b93f 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -51,5 +51,10 @@ enum mlx5_ib_uapi_flow_action_packet_reformat_type {
 	MLX5_IB_UAPI_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x3,
 };
 
+struct mlx5_ib_uapi_devx_async_cmd_hdr {
+	__aligned_u64	wr_id;
+	__u8		out_data[];
+};
+
 #endif
 
-- 
cgit v1.2.3


From 71368af9027f18fe5d1c6f372cfdff7e4bde8b48 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 16 Jan 2019 17:01:36 -0500
Subject: x86/speculation: Add PR_SPEC_DISABLE_NOEXEC

With the default SPEC_STORE_BYPASS_SECCOMP/SPEC_STORE_BYPASS_PRCTL mode,
the TIF_SSBD bit will be inherited when a new task is fork'ed or cloned.
It will also remain when a new program is execve'ed.

Only certain class of applications (like Java) that can run on behalf of
multiple users on a single thread will require disabling speculative store
bypass for security purposes. Those applications will call prctl(2) at
startup time to disable SSB. They won't rely on the fact the SSB might have
been disabled. Other applications that don't need SSBD will just move on
without checking if SSBD has been turned on or not.

The fact that the TIF_SSBD is inherited across execve(2) boundary will
cause performance of applications that don't need SSBD but their
predecessors have SSBD on to be unwittingly impacted especially if they
write to memory a lot.

To remedy this problem, a new PR_SPEC_DISABLE_NOEXEC argument for the
PR_SET_SPECULATION_CTRL option of prctl(2) is added to allow applications
to specify that the SSBD feature bit on the task structure should be
cleared whenever a new program is being execve'ed.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Woodhouse <dwmw@amazon.co.uk>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: KarimAllah Ahmed <karahmed@amazon.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Link: https://lkml.kernel.org/r/1547676096-3281-1-git-send-email-longman@redhat.com
---
 Documentation/userspace-api/spec_ctrl.rst | 27 +++++++++++++++------------
 arch/x86/kernel/cpu/bugs.c                | 12 ++++++++++++
 arch/x86/kernel/process.c                 | 12 ++++++++++++
 include/linux/sched.h                     |  5 +++++
 include/uapi/linux/prctl.h                |  1 +
 tools/include/uapi/linux/prctl.h          |  1 +
 6 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/userspace-api/spec_ctrl.rst b/Documentation/userspace-api/spec_ctrl.rst
index c4dbe6f7cdae..1129c7550a48 100644
--- a/Documentation/userspace-api/spec_ctrl.rst
+++ b/Documentation/userspace-api/spec_ctrl.rst
@@ -28,18 +28,20 @@ PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature
 which is selected with arg2 of prctl(2). The return value uses bits 0-3 with
 the following meaning:
 
-==== ===================== ===================================================
-Bit  Define                Description
-==== ===================== ===================================================
-0    PR_SPEC_PRCTL         Mitigation can be controlled per task by
-                           PR_SET_SPECULATION_CTRL.
-1    PR_SPEC_ENABLE        The speculation feature is enabled, mitigation is
-                           disabled.
-2    PR_SPEC_DISABLE       The speculation feature is disabled, mitigation is
-                           enabled.
-3    PR_SPEC_FORCE_DISABLE Same as PR_SPEC_DISABLE, but cannot be undone. A
-                           subsequent prctl(..., PR_SPEC_ENABLE) will fail.
-==== ===================== ===================================================
+==== ====================== ==================================================
+Bit  Define                 Description
+==== ====================== ==================================================
+0    PR_SPEC_PRCTL          Mitigation can be controlled per task by
+                            PR_SET_SPECULATION_CTRL.
+1    PR_SPEC_ENABLE         The speculation feature is enabled, mitigation is
+                            disabled.
+2    PR_SPEC_DISABLE        The speculation feature is disabled, mitigation is
+                            enabled.
+3    PR_SPEC_FORCE_DISABLE  Same as PR_SPEC_DISABLE, but cannot be undone. A
+                            subsequent prctl(..., PR_SPEC_ENABLE) will fail.
+4    PR_SPEC_DISABLE_NOEXEC Same as PR_SPEC_DISABLE, but the state will be
+                            cleared on :manpage:`execve(2)`.
+==== ====================== ==================================================
 
 If all bits are 0 the CPU is not affected by the speculation misfeature.
 
@@ -92,6 +94,7 @@ Speculation misfeature controls
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_ENABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE, 0, 0);
    * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_FORCE_DISABLE, 0, 0);
+   * prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_STORE_BYPASS, PR_SPEC_DISABLE_NOEXEC, 0, 0);
 
 - PR_SPEC_INDIR_BRANCH: Indirect Branch Speculation in User Processes
                         (Mitigate Spectre V2 style attacks against user processes)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 1de0f4170178..2faeaf46347a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -798,15 +798,25 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 		if (task_spec_ssb_force_disable(task))
 			return -EPERM;
 		task_clear_spec_ssb_disable(task);
+		task_clear_spec_ssb_noexec(task);
 		task_update_spec_tif(task);
 		break;
 	case PR_SPEC_DISABLE:
 		task_set_spec_ssb_disable(task);
+		task_clear_spec_ssb_noexec(task);
 		task_update_spec_tif(task);
 		break;
 	case PR_SPEC_FORCE_DISABLE:
 		task_set_spec_ssb_disable(task);
 		task_set_spec_ssb_force_disable(task);
+		task_clear_spec_ssb_noexec(task);
+		task_update_spec_tif(task);
+		break;
+	case PR_SPEC_DISABLE_NOEXEC:
+		if (task_spec_ssb_force_disable(task))
+			return -EPERM;
+		task_set_spec_ssb_disable(task);
+		task_set_spec_ssb_noexec(task);
 		task_update_spec_tif(task);
 		break;
 	default:
@@ -885,6 +895,8 @@ static int ssb_prctl_get(struct task_struct *task)
 	case SPEC_STORE_BYPASS_PRCTL:
 		if (task_spec_ssb_force_disable(task))
 			return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+		if (task_spec_ssb_noexec(task))
+			return PR_SPEC_PRCTL | PR_SPEC_DISABLE_NOEXEC;
 		if (task_spec_ssb_disable(task))
 			return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
 		return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 90ae0ca51083..58ac7be52c7a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -255,6 +255,18 @@ void arch_setup_new_exec(void)
 	/* If cpuid was previously disabled for this task, re-enable it. */
 	if (test_thread_flag(TIF_NOCPUID))
 		enable_cpuid();
+
+	/*
+	 * Don't inherit TIF_SSBD across exec boundary when
+	 * PR_SPEC_DISABLE_NOEXEC is used.
+	 */
+	if (test_thread_flag(TIF_SSBD) &&
+	    task_spec_ssb_noexec(current)) {
+		clear_thread_flag(TIF_SSBD);
+		task_clear_spec_ssb_disable(current);
+		task_clear_spec_ssb_noexec(current);
+		speculation_ctrl_update(task_thread_info(current)->flags);
+	}
 }
 
 static inline void switch_to_bitmap(struct thread_struct *prev,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2f90fa92468..fc836dc71bba 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1459,6 +1459,7 @@ static inline bool is_percpu_thread(void)
 #define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/
 #define PFA_SPEC_IB_DISABLE		5	/* Indirect branch speculation restricted */
 #define PFA_SPEC_IB_FORCE_DISABLE	6	/* Indirect branch speculation permanently restricted */
+#define PFA_SPEC_SSB_NOEXEC		7	/* Speculative Store Bypass clear on execve() */
 
 #define TASK_PFA_TEST(name, func)					\
 	static inline bool task_##func(struct task_struct *p)		\
@@ -1487,6 +1488,10 @@ TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable)
 TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable)
 TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable)
 
+TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec)
+
 TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable)
 
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index b4875a93363a..094bb03b9cc2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -219,6 +219,7 @@ struct prctl_mm_map {
 # define PR_SPEC_ENABLE			(1UL << 1)
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
+# define PR_SPEC_DISABLE_NOEXEC		(1UL << 4)
 
 /* Reset arm64 pointer authentication keys */
 #define PR_PAC_RESET_KEYS		54
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index b4875a93363a..094bb03b9cc2 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -219,6 +219,7 @@ struct prctl_mm_map {
 # define PR_SPEC_ENABLE			(1UL << 1)
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
+# define PR_SPEC_DISABLE_NOEXEC		(1UL << 4)
 
 /* Reset arm64 pointer authentication keys */
 #define PR_PAC_RESET_KEYS		54
-- 
cgit v1.2.3


From 1194c4133195dfcb6c5fc0935d54bbed872a5285 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Tue, 29 Jan 2019 00:56:17 +0000
Subject: nfit: Add Hyper-V NVDIMM DSM command set to white list

Add the Hyper-V _DSM command set to the white list of NVDIMM command
sets.

This command set is documented at http://www.uefi.org/RFIC_LIST
(see "Virtual NVDIMM 0x1901").

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c   | 17 ++++++++++++++---
 drivers/acpi/nfit/nfit.h   |  6 +++++-
 include/uapi/linux/ndctl.h |  1 +
 3 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 1598e3a121a6..4a7e8b1fa43b 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1846,9 +1846,17 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	dev_set_drvdata(&adev_dimm->dev, nfit_mem);
 
 	/*
-	 * Until standardization materializes we need to consider 4
-	 * different command sets.  Note, that checking for function0 (bit0)
-	 * tells us if any commands are reachable through this GUID.
+	 * There are 4 "legacy" NVDIMM command sets
+	 * (NVDIMM_FAMILY_{INTEL,MSFT,HPE1,HPE2}) that were created before
+	 * an EFI working group was established to constrain this
+	 * proliferation. The nfit driver probes for the supported command
+	 * set by GUID. Note, if you're a platform developer looking to add
+	 * a new command set to this probe, consider using an existing set,
+	 * or otherwise seek approval to publish the command set at
+	 * http://www.uefi.org/RFIC_LIST.
+	 *
+	 * Note, that checking for function0 (bit0) tells us if any commands
+	 * are reachable through this GUID.
 	 */
 	for (i = 0; i <= NVDIMM_FAMILY_MAX; i++)
 		if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
@@ -1871,6 +1879,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 			dsm_mask &= ~(1 << 8);
 	} else if (nfit_mem->family == NVDIMM_FAMILY_MSFT) {
 		dsm_mask = 0xffffffff;
+	} else if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) {
+		dsm_mask = 0x1f;
 	} else {
 		dev_dbg(dev, "unknown dimm command family\n");
 		nfit_mem->family = -1;
@@ -3714,6 +3724,7 @@ static __init int nfit_init(void)
 	guid_parse(UUID_NFIT_DIMM_N_HPE1, &nfit_uuid[NFIT_DEV_DIMM_N_HPE1]);
 	guid_parse(UUID_NFIT_DIMM_N_HPE2, &nfit_uuid[NFIT_DEV_DIMM_N_HPE2]);
 	guid_parse(UUID_NFIT_DIMM_N_MSFT, &nfit_uuid[NFIT_DEV_DIMM_N_MSFT]);
+	guid_parse(UUID_NFIT_DIMM_N_HYPERV, &nfit_uuid[NFIT_DEV_DIMM_N_HYPERV]);
 
 	nfit_wq = create_singlethread_workqueue("nfit");
 	if (!nfit_wq)
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 33691aecfcee..4de167b4f76f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -34,11 +34,14 @@
 /* https://msdn.microsoft.com/library/windows/hardware/mt604741 */
 #define UUID_NFIT_DIMM_N_MSFT "1ee68b36-d4bd-4a1a-9a16-4f8e53d46e05"
 
+/* http://www.uefi.org/RFIC_LIST (see "Virtual NVDIMM 0x1901") */
+#define UUID_NFIT_DIMM_N_HYPERV "5746c5f2-a9a2-4264-ad0e-e4ddc9e09e80"
+
 #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
 		| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
 		| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
 
-#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_MSFT
+#define NVDIMM_FAMILY_MAX NVDIMM_FAMILY_HYPERV
 
 #define NVDIMM_STANDARD_CMDMASK \
 (1 << ND_CMD_SMART | 1 << ND_CMD_SMART_THRESHOLD | 1 << ND_CMD_DIMM_FLAGS \
@@ -94,6 +97,7 @@ enum nfit_uuids {
 	NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1,
 	NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2,
 	NFIT_DEV_DIMM_N_MSFT = NVDIMM_FAMILY_MSFT,
+	NFIT_DEV_DIMM_N_HYPERV = NVDIMM_FAMILY_HYPERV,
 	NFIT_SPA_VOLATILE,
 	NFIT_SPA_PM,
 	NFIT_SPA_DCR,
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index f57c9e434d2d..de5d90212409 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -243,6 +243,7 @@ struct nd_cmd_pkg {
 #define NVDIMM_FAMILY_HPE1 1
 #define NVDIMM_FAMILY_HPE2 2
 #define NVDIMM_FAMILY_MSFT 3
+#define NVDIMM_FAMILY_HYPERV 4
 
 #define ND_IOCTL_CALL			_IOWR(ND_IOCTL, ND_CMD_CALL,\
 					struct nd_cmd_pkg)
-- 
cgit v1.2.3


From 39e6160e14bd011988e4f88fd3d3f1205c3b9776 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:20 +0530
Subject: devlink: Add devlink_param for port register and unregister

Add functions to register and unregister for the driver supported
configuration parameters table per port.

v7->v8:
- Order the definitions following way as suggested by Jiri.
__devlink_params_register
__devlink_params_unregister
devlink_params_register
devlink_params_unregister
devlink_port_params_register
devlink_port_params_unregister
- Append with Acked-by: Jiri Pirko <jiri@mellanox.com>.

v2->v3:
- Add a helper __devlink_params_register() with common code used by
  both devlink_params_register() and devlink_port_params_register().

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h |  22 +++++++++
 net/core/devlink.c    | 133 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 117 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 67f4293bc970..7fc0748c58e3 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -48,6 +48,7 @@ struct devlink_port_attrs {
 
 struct devlink_port {
 	struct list_head list;
+	struct list_head param_list;
 	struct devlink *devlink;
 	unsigned index;
 	bool registered;
@@ -567,6 +568,12 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
+int devlink_port_params_register(struct devlink_port *devlink_port,
+				 const struct devlink_param *params,
+				 size_t params_count);
+void devlink_port_params_unregister(struct devlink_port *devlink_port,
+				    const struct devlink_param *params,
+				    size_t params_count);
 int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
@@ -791,6 +798,21 @@ devlink_params_unregister(struct devlink *devlink,
 
 }
 
+static inline int
+devlink_port_params_register(struct devlink_port *devlink_port,
+			     const struct devlink_param *params,
+			     size_t params_count)
+{
+	return 0;
+}
+
+static inline void
+devlink_port_params_unregister(struct devlink_port *devlink_port,
+			       const struct devlink_param *params,
+			       size_t params_count)
+{
+}
+
 static inline int
 devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				   union devlink_param_value *init_val)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index abb0da9d7b4b..371481ca2afd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3147,12 +3147,12 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 }
 
 static int devlink_param_register_one(struct devlink *devlink,
+				      struct list_head *param_list,
 				      const struct devlink_param *param)
 {
 	struct devlink_param_item *param_item;
 
-	if (devlink_param_find_by_name(&devlink->param_list,
-				       param->name))
+	if (devlink_param_find_by_name(param_list, param->name))
 		return -EEXIST;
 
 	if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
@@ -3165,18 +3165,18 @@ static int devlink_param_register_one(struct devlink *devlink,
 		return -ENOMEM;
 	param_item->param = param;
 
-	list_add_tail(&param_item->list, &devlink->param_list);
+	list_add_tail(&param_item->list, param_list);
 	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
 	return 0;
 }
 
 static void devlink_param_unregister_one(struct devlink *devlink,
+					 struct list_head *param_list,
 					 const struct devlink_param *param)
 {
 	struct devlink_param_item *param_item;
 
-	param_item = devlink_param_find_by_name(&devlink->param_list,
-						param->name);
+	param_item = devlink_param_find_by_name(param_list, param->name);
 	WARN_ON(!param_item);
 	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
 	list_del(&param_item->list);
@@ -3954,6 +3954,7 @@ int devlink_port_register(struct devlink *devlink,
 	devlink_port->index = port_index;
 	devlink_port->registered = true;
 	list_add_tail(&devlink_port->list, &devlink->port_list);
+	INIT_LIST_HEAD(&devlink_port->param_list);
 	mutex_unlock(&devlink->lock);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 	return 0;
@@ -4471,18 +4472,20 @@ out:
 }
 EXPORT_SYMBOL_GPL(devlink_resource_occ_get_unregister);
 
-/**
- *	devlink_params_register - register configuration parameters
- *
- *	@devlink: devlink
- *	@params: configuration parameters array
- *	@params_count: number of parameters provided
- *
- *	Register the configuration parameters supported by the driver.
- */
-int devlink_params_register(struct devlink *devlink,
-			    const struct devlink_param *params,
-			    size_t params_count)
+static int devlink_param_verify(const struct devlink_param *param)
+{
+	if (!param || !param->name || !param->supported_cmodes)
+		return -EINVAL;
+	if (param->generic)
+		return devlink_param_generic_verify(param);
+	else
+		return devlink_param_driver_verify(param);
+}
+
+static int __devlink_params_register(struct devlink *devlink,
+				     struct list_head *param_list,
+				     const struct devlink_param *params,
+				     size_t params_count)
 {
 	const struct devlink_param *param = params;
 	int i;
@@ -4490,20 +4493,11 @@ int devlink_params_register(struct devlink *devlink,
 
 	mutex_lock(&devlink->lock);
 	for (i = 0; i < params_count; i++, param++) {
-		if (!param || !param->name || !param->supported_cmodes) {
-			err = -EINVAL;
+		err = devlink_param_verify(param);
+		if (err)
 			goto rollback;
-		}
-		if (param->generic) {
-			err = devlink_param_generic_verify(param);
-			if (err)
-				goto rollback;
-		} else {
-			err = devlink_param_driver_verify(param);
-			if (err)
-				goto rollback;
-		}
-		err = devlink_param_register_one(devlink, param);
+
+		err = devlink_param_register_one(devlink, param_list, param);
 		if (err)
 			goto rollback;
 	}
@@ -4515,11 +4509,42 @@ rollback:
 	if (!i)
 		goto unlock;
 	for (param--; i > 0; i--, param--)
-		devlink_param_unregister_one(devlink, param);
+		devlink_param_unregister_one(devlink, param_list, param);
 unlock:
 	mutex_unlock(&devlink->lock);
 	return err;
 }
+
+static void __devlink_params_unregister(struct devlink *devlink,
+					struct list_head *param_list,
+					const struct devlink_param *params,
+					size_t params_count)
+{
+	const struct devlink_param *param = params;
+	int i;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < params_count; i++, param++)
+		devlink_param_unregister_one(devlink, param_list, param);
+	mutex_unlock(&devlink->lock);
+}
+
+/**
+ *	devlink_params_register - register configuration parameters
+ *
+ *	@devlink: devlink
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the driver.
+ */
+int devlink_params_register(struct devlink *devlink,
+			    const struct devlink_param *params,
+			    size_t params_count)
+{
+	return __devlink_params_register(devlink, &devlink->param_list, params,
+					 params_count);
+}
 EXPORT_SYMBOL_GPL(devlink_params_register);
 
 /**
@@ -4532,16 +4557,48 @@ void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count)
 {
-	const struct devlink_param *param = params;
-	int i;
-
-	mutex_lock(&devlink->lock);
-	for (i = 0; i < params_count; i++, param++)
-		devlink_param_unregister_one(devlink, param);
-	mutex_unlock(&devlink->lock);
+	return __devlink_params_unregister(devlink, &devlink->param_list,
+					   params, params_count);
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
+/**
+ *	devlink_port_params_register - register port configuration parameters
+ *
+ *	@devlink_port: devlink port
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ *
+ *	Register the configuration parameters supported by the port.
+ */
+int devlink_port_params_register(struct devlink_port *devlink_port,
+				 const struct devlink_param *params,
+				 size_t params_count)
+{
+	return __devlink_params_register(devlink_port->devlink,
+					 &devlink_port->param_list, params,
+					 params_count);
+}
+EXPORT_SYMBOL_GPL(devlink_port_params_register);
+
+/**
+ *	devlink_port_params_unregister - unregister port configuration
+ *	parameters
+ *
+ *	@devlink_port: devlink port
+ *	@params: configuration parameters array
+ *	@params_count: number of parameters provided
+ */
+void devlink_port_params_unregister(struct devlink_port *devlink_port,
+				    const struct devlink_param *params,
+				    size_t params_count)
+{
+	return __devlink_params_unregister(devlink_port->devlink,
+					   &devlink_port->param_list,
+					   params, params_count);
+}
+EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
+
 /**
  *	devlink_param_driverinit_value_get - get configuration parameter
  *					     value for driver initializing
-- 
cgit v1.2.3


From f4601dee25d5fe8010023552b10879f3d62e45ce Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:21 +0530
Subject: devlink: Add port param get command

Add port param get command which gets data per parameter.
It also has option to dump the parameters data per port.

v7->v8: Append "Acked-by: Jiri Pirko <jiri@mellanox.com>"

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |   2 +
 net/core/devlink.c           | 102 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 6e52d3660654..448973beac9d 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -89,6 +89,8 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_DEL,
 	DEVLINK_CMD_REGION_READ,
 
+	DEVLINK_CMD_PORT_PARAM_GET,	/* can dump */
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 371481ca2afd..66313dcdbdca 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2843,6 +2843,7 @@ nla_put_failure:
 }
 
 static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
+				 unsigned int port_index,
 				 struct devlink_param_item *param_item,
 				 enum devlink_command cmd,
 				 u32 portid, u32 seq, int flags)
@@ -2880,6 +2881,11 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
 
 	if (devlink_nl_put_handle(msg, devlink))
 		goto genlmsg_cancel;
+
+	if (cmd == DEVLINK_CMD_PORT_PARAM_GET)
+		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
+			goto genlmsg_cancel;
+
 	param_attr = nla_nest_start(msg, DEVLINK_ATTR_PARAM);
 	if (!param_attr)
 		goto genlmsg_cancel;
@@ -2933,7 +2939,7 @@ static void devlink_param_notify(struct devlink *devlink,
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
-	err = devlink_nl_param_fill(msg, devlink, param_item, cmd, 0, 0, 0);
+	err = devlink_nl_param_fill(msg, devlink, 0, param_item, cmd, 0, 0, 0);
 	if (err) {
 		nlmsg_free(msg);
 		return;
@@ -2962,7 +2968,7 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
 				idx++;
 				continue;
 			}
-			err = devlink_nl_param_fill(msg, devlink, param_item,
+			err = devlink_nl_param_fill(msg, devlink, 0, param_item,
 						    DEVLINK_CMD_PARAM_GET,
 						    NETLINK_CB(cb->skb).portid,
 						    cb->nlh->nlmsg_seq,
@@ -3051,7 +3057,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
 }
 
 static struct devlink_param_item *
-devlink_param_get_from_info(struct devlink *devlink,
+devlink_param_get_from_info(struct list_head *param_list,
 			    struct genl_info *info)
 {
 	char *param_name;
@@ -3060,7 +3066,7 @@ devlink_param_get_from_info(struct devlink *devlink,
 		return NULL;
 
 	param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
-	return devlink_param_find_by_name(&devlink->param_list, param_name);
+	return devlink_param_find_by_name(param_list, param_name);
 }
 
 static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
@@ -3071,7 +3077,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
 	struct sk_buff *msg;
 	int err;
 
-	param_item = devlink_param_get_from_info(devlink, info);
+	param_item = devlink_param_get_from_info(&devlink->param_list, info);
 	if (!param_item)
 		return -EINVAL;
 
@@ -3079,7 +3085,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
 	if (!msg)
 		return -ENOMEM;
 
-	err = devlink_nl_param_fill(msg, devlink, param_item,
+	err = devlink_nl_param_fill(msg, devlink, 0, param_item,
 				    DEVLINK_CMD_PARAM_GET,
 				    info->snd_portid, info->snd_seq, 0);
 	if (err) {
@@ -3102,7 +3108,7 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 	union devlink_param_value value;
 	int err = 0;
 
-	param_item = devlink_param_get_from_info(devlink, info);
+	param_item = devlink_param_get_from_info(&devlink->param_list, info);
 	if (!param_item)
 		return -EINVAL;
 	param = param_item->param;
@@ -3183,6 +3189,80 @@ static void devlink_param_unregister_one(struct devlink *devlink,
 	kfree(param_item);
 }
 
+static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
+						struct netlink_callback *cb)
+{
+	struct devlink_param_item *param_item;
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(devlink_port, &devlink->port_list, list) {
+			list_for_each_entry(param_item,
+					    &devlink_port->param_list, list) {
+				if (idx < start) {
+					idx++;
+					continue;
+				}
+				err = devlink_nl_param_fill(msg,
+						devlink_port->devlink,
+						devlink_port->index, param_item,
+						DEVLINK_CMD_PORT_PARAM_GET,
+						NETLINK_CB(cb->skb).portid,
+						cb->nlh->nlmsg_seq,
+						NLM_F_MULTI);
+				if (err) {
+					mutex_unlock(&devlink->lock);
+					goto out;
+				}
+				idx++;
+			}
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+	struct devlink_param_item *param_item;
+	struct sk_buff *msg;
+	int err;
+
+	param_item = devlink_param_get_from_info(&devlink_port->param_list,
+						 info);
+	if (!param_item)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_param_fill(msg, devlink_port->devlink,
+				    devlink_port->index, param_item,
+				    DEVLINK_CMD_PORT_PARAM_GET,
+				    info->snd_portid, info->snd_seq, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
 static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
 					     struct devlink *devlink,
 					     struct devlink_snapshot *snapshot)
@@ -3820,6 +3900,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_GET,
+		.doit = devlink_nl_cmd_port_param_get_doit,
+		.dumpit = devlink_nl_cmd_port_param_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+		/* can be retrieved by unprivileged users */
+	},
 	{
 		.cmd = DEVLINK_CMD_REGION_GET,
 		.doit = devlink_nl_cmd_region_get_doit,
-- 
cgit v1.2.3


From 9c54873b4e2ee22507627b1adac9e3a8407741bd Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:22 +0530
Subject: devlink: Add port param set command

Add port param set command to set the value for a parameter.
Value can be set to any of the supported configuration modes.

v7->v8: Append "Acked-by: Jiri Pirko <jiri@mellanox.com>"

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 37 ++++++++++++++++++++++++++++++++-----
 2 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 448973beac9d..3658fb20b190 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -90,6 +90,7 @@ enum devlink_command {
 	DEVLINK_CMD_REGION_READ,
 
 	DEVLINK_CMD_PORT_PARAM_GET,	/* can dump */
+	DEVLINK_CMD_PORT_PARAM_SET,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 66313dcdbdca..113ad9f529e1 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3096,10 +3096,11 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 }
 
-static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
-					 struct genl_info *info)
+static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+					   struct list_head *param_list,
+					   struct genl_info *info,
+					   enum devlink_command cmd)
 {
-	struct devlink *devlink = info->user_ptr[0];
 	enum devlink_param_type param_type;
 	struct devlink_param_gset_ctx ctx;
 	enum devlink_param_cmode cmode;
@@ -3108,7 +3109,7 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 	union devlink_param_value value;
 	int err = 0;
 
-	param_item = devlink_param_get_from_info(&devlink->param_list, info);
+	param_item = devlink_param_get_from_info(param_list, info);
 	if (!param_item)
 		return -EINVAL;
 	param = param_item->param;
@@ -3148,10 +3149,19 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 			return err;
 	}
 
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, param_item, cmd);
 	return 0;
 }
 
+static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+
+	return __devlink_nl_cmd_param_set_doit(devlink, &devlink->param_list,
+					       info, DEVLINK_CMD_PARAM_NEW);
+}
+
 static int devlink_param_register_one(struct devlink *devlink,
 				      struct list_head *param_list,
 				      const struct devlink_param *param)
@@ -3263,6 +3273,16 @@ static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
 	return genlmsg_reply(msg, info);
 }
 
+static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct devlink_port *devlink_port = info->user_ptr[0];
+
+	return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
+					       &devlink_port->param_list,
+					       info, 0);
+}
+
 static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
 					     struct devlink *devlink,
 					     struct devlink_snapshot *snapshot)
@@ -3908,6 +3928,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_PORT_PARAM_SET,
+		.doit = devlink_nl_cmd_port_param_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
+	},
 	{
 		.cmd = DEVLINK_CMD_REGION_GET,
 		.doit = devlink_nl_cmd_region_get_doit,
-- 
cgit v1.2.3


From ffd19b9a490a6b022b58bada054ee130fff06a1c Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:23 +0530
Subject: devlink: Add support for driverinit get value for devlink_port

Add support for "driverinit" configuration mode value for devlink_port
configuration parameters. Add devlink_port_param_driverinit_value_get()
function to help the driver get the value from devlink_port.

Also, move the common code to __devlink_param_driverinit_value_get()
to be used by both device and port params.

v7->v8:
-Add the missing devlink_port_param_driverinit_value_get() declaration.
-Also, order devlink_port_param_driverinit_value_get() after
devlink_param_driverinit_value_get/set() calls

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 12 +++++++++
 net/core/devlink.c    | 67 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 62 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7fc0748c58e3..3943072d3552 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -578,6 +578,10 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val);
+int
+devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
+					u32 param_id,
+					union devlink_param_value *init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
 void devlink_param_value_str_fill(union devlink_param_value *dst_val,
 				  const char *src);
@@ -827,6 +831,14 @@ devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 	return -EOPNOTSUPP;
 }
 
+static inline int
+devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
+					u32 param_id,
+					union devlink_param_value *init_val)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void
 devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 113ad9f529e1..fdfdb9b571a9 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4714,26 +4714,13 @@ void devlink_port_params_unregister(struct devlink_port *devlink_port,
 }
 EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
 
-/**
- *	devlink_param_driverinit_value_get - get configuration parameter
- *					     value for driver initializing
- *
- *	@devlink: devlink
- *	@param_id: parameter ID
- *	@init_val: value of parameter in driverinit configuration mode
- *
- *	This function should be used by the driver to get driverinit
- *	configuration for initialization after reload command.
- */
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
-				       union devlink_param_value *init_val)
+static int
+__devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
+				     union devlink_param_value *init_val)
 {
 	struct devlink_param_item *param_item;
 
-	if (!devlink->ops || !devlink->ops->reload)
-		return -EOPNOTSUPP;
-
-	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+	param_item = devlink_param_find_by_id(param_list, param_id);
 	if (!param_item)
 		return -EINVAL;
 
@@ -4749,6 +4736,27 @@ int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 
 	return 0;
 }
+
+/**
+ *	devlink_param_driverinit_value_get - get configuration parameter
+ *					     value for driver initializing
+ *
+ *	@devlink: devlink
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter in driverinit configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ */
+int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+				       union devlink_param_value *init_val)
+{
+	if (!devlink->ops || !devlink->ops->reload)
+		return -EOPNOTSUPP;
+
+	return __devlink_param_driverinit_value_get(&devlink->param_list,
+						    param_id, init_val);
+}
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
 
 /**
@@ -4787,6 +4795,31 @@ int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 }
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
 
+/**
+ *	devlink_port_param_driverinit_value_get - get configuration parameter
+ *						value for driver initializing
+ *
+ *	@devlink_port: devlink_port
+ *	@param_id: parameter ID
+ *	@init_val: value of parameter in driverinit configuration mode
+ *
+ *	This function should be used by the driver to get driverinit
+ *	configuration for initialization after reload command.
+ */
+int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
+					    u32 param_id,
+					    union devlink_param_value *init_val)
+{
+	struct devlink *devlink = devlink_port->devlink;
+
+	if (!devlink->ops || !devlink->ops->reload)
+		return -EOPNOTSUPP;
+
+	return __devlink_param_driverinit_value_get(&devlink_port->param_list,
+						    param_id, init_val);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
+
 /**
  *	devlink_param_value_changed - notify devlink on a parameter's value
  *				      change. Should be called by the driver
-- 
cgit v1.2.3


From 5473a7bdad78f2efe8ee508e8c7bbb762896e78f Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:24 +0530
Subject: devlink: Add support for driverinit set value for devlink_port

Add support for "driverinit" configuration mode value for devlink_port
configuration parameters. Add devlink_port_param_driverinit_value_set()
function to help the driver set the value to devlink_port.

Also, move the common code to __devlink_param_driverinit_value_set()
to be used by both device and port params.

v7->v8:
Re-order the definitions as follows:
__devlink_param_driverinit_value_get
__devlink_param_driverinit_value_set
devlink_param_driverinit_value_get
devlink_param_driverinit_value_set
devlink_port_param_driverinit_value_get
devlink_port_param_driverinit_value_set

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 11 ++++++++
 net/core/devlink.c    | 70 ++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 63 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3943072d3552..ae2ccf297946 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -582,6 +582,9 @@ int
 devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
 					u32 param_id,
 					union devlink_param_value *init_val);
+int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
+					    u32 param_id,
+					    union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
 void devlink_param_value_str_fill(union devlink_param_value *dst_val,
 				  const char *src);
@@ -839,6 +842,14 @@ devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
 	return -EOPNOTSUPP;
 }
 
+static inline int
+devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
+					u32 param_id,
+					union devlink_param_value init_val)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void
 devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index fdfdb9b571a9..55456cc36833 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4737,6 +4737,32 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
 	return 0;
 }
 
+static int
+__devlink_param_driverinit_value_set(struct devlink *devlink,
+				     struct list_head *param_list, u32 param_id,
+				     union devlink_param_value init_val,
+				     enum devlink_command cmd)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(param_list, param_id);
+	if (!param_item)
+		return -EINVAL;
+
+	if (!devlink_param_cmode_is_supported(param_item->param,
+					      DEVLINK_PARAM_CMODE_DRIVERINIT))
+		return -EOPNOTSUPP;
+
+	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
+		strcpy(param_item->driverinit_value.vstr, init_val.vstr);
+	else
+		param_item->driverinit_value = init_val;
+	param_item->driverinit_value_valid = true;
+
+	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	return 0;
+}
+
 /**
  *	devlink_param_driverinit_value_get - get configuration parameter
  *					     value for driver initializing
@@ -4774,24 +4800,10 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val)
 {
-	struct devlink_param_item *param_item;
-
-	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
-	if (!param_item)
-		return -EINVAL;
-
-	if (!devlink_param_cmode_is_supported(param_item->param,
-					      DEVLINK_PARAM_CMODE_DRIVERINIT))
-		return -EOPNOTSUPP;
-
-	if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
-		strcpy(param_item->driverinit_value.vstr, init_val.vstr);
-	else
-		param_item->driverinit_value = init_val;
-	param_item->driverinit_value_valid = true;
-
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
-	return 0;
+	return __devlink_param_driverinit_value_set(devlink,
+						    &devlink->param_list,
+						    param_id, init_val,
+						    DEVLINK_CMD_PARAM_NEW);
 }
 EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
 
@@ -4820,6 +4832,28 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
 }
 EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_get);
 
+/**
+ *     devlink_port_param_driverinit_value_set - set value of configuration
+ *                                               parameter for driverinit
+ *                                               configuration mode
+ *
+ *     @devlink_port: devlink_port
+ *     @param_id: parameter ID
+ *     @init_val: value of parameter to set for driverinit configuration mode
+ *
+ *     This function should be used by the driver to set driverinit
+ *     configuration mode default value.
+ */
+int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
+					    u32 param_id,
+					    union devlink_param_value init_val)
+{
+	return __devlink_param_driverinit_value_set(devlink_port->devlink,
+						    &devlink_port->param_list,
+						    param_id, init_val, 0);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
+
 /**
  *	devlink_param_value_changed - notify devlink on a parameter's value
  *				      change. Should be called by the driver
-- 
cgit v1.2.3


From c1e5786d6771c67fe044c3bcaa23e631e0503261 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:25 +0530
Subject: devlink: Add devlink notifications support for port params

Add notification call for devlink port param set, register and unregister
functions.
Add devlink_port_param_value_changed() function to enable the driver notify
devlink on value change. Driver should use this function after value was
changed on any configuration mode part to driverinit.

v7->v8:
Order devlink_port_param_value_changed() definitions followed by
devlink_param_value_changed()

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |   8 ++++
 include/uapi/linux/devlink.h |   2 +
 net/core/devlink.c           | 111 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 94 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ae2ccf297946..ceb5e89d74d6 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -586,6 +586,8 @@ int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
 					    u32 param_id,
 					    union devlink_param_value init_val);
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id);
+void devlink_port_param_value_changed(struct devlink_port *devlink_port,
+				      u32 param_id);
 void devlink_param_value_str_fill(union devlink_param_value *dst_val,
 				  const char *src);
 struct devlink_region *devlink_region_create(struct devlink *devlink,
@@ -855,6 +857,12 @@ devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
 }
 
+static inline void
+devlink_port_param_value_changed(struct devlink_port *devlink_port,
+				 u32 param_id)
+{
+}
+
 static inline void
 devlink_param_value_str_fill(union devlink_param_value *dst_val,
 			     const char *src)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 3658fb20b190..61b4447a6c5b 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -91,6 +91,8 @@ enum devlink_command {
 
 	DEVLINK_CMD_PORT_PARAM_GET,	/* can dump */
 	DEVLINK_CMD_PORT_PARAM_SET,
+	DEVLINK_CMD_PORT_PARAM_NEW,
+	DEVLINK_CMD_PORT_PARAM_DEL,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 55456cc36833..451ab4725340 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2882,7 +2882,9 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (devlink_nl_put_handle(msg, devlink))
 		goto genlmsg_cancel;
 
-	if (cmd == DEVLINK_CMD_PORT_PARAM_GET)
+	if (cmd == DEVLINK_CMD_PORT_PARAM_GET ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_NEW ||
+	    cmd == DEVLINK_CMD_PORT_PARAM_DEL)
 		if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index))
 			goto genlmsg_cancel;
 
@@ -2928,18 +2930,22 @@ genlmsg_cancel:
 }
 
 static void devlink_param_notify(struct devlink *devlink,
+				 unsigned int port_index,
 				 struct devlink_param_item *param_item,
 				 enum devlink_command cmd)
 {
 	struct sk_buff *msg;
 	int err;
 
-	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL);
+	WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
+		cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
+		cmd != DEVLINK_CMD_PORT_PARAM_DEL);
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
-	err = devlink_nl_param_fill(msg, devlink, 0, param_item, cmd, 0, 0, 0);
+	err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd,
+				    0, 0, 0);
 	if (err) {
 		nlmsg_free(msg);
 		return;
@@ -3097,6 +3103,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
 }
 
 static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
+					   unsigned int port_index,
 					   struct list_head *param_list,
 					   struct genl_info *info,
 					   enum devlink_command cmd)
@@ -3149,7 +3156,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
 			return err;
 	}
 
-	devlink_param_notify(devlink, param_item, cmd);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	return 0;
 }
 
@@ -3158,13 +3165,15 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
 {
 	struct devlink *devlink = info->user_ptr[0];
 
-	return __devlink_nl_cmd_param_set_doit(devlink, &devlink->param_list,
+	return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list,
 					       info, DEVLINK_CMD_PARAM_NEW);
 }
 
 static int devlink_param_register_one(struct devlink *devlink,
+				      unsigned int port_index,
 				      struct list_head *param_list,
-				      const struct devlink_param *param)
+				      const struct devlink_param *param,
+				      enum devlink_command cmd)
 {
 	struct devlink_param_item *param_item;
 
@@ -3182,19 +3191,21 @@ static int devlink_param_register_one(struct devlink *devlink,
 	param_item->param = param;
 
 	list_add_tail(&param_item->list, param_list);
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	return 0;
 }
 
 static void devlink_param_unregister_one(struct devlink *devlink,
+					 unsigned int port_index,
 					 struct list_head *param_list,
-					 const struct devlink_param *param)
+					 const struct devlink_param *param,
+					 enum devlink_command cmd)
 {
 	struct devlink_param_item *param_item;
 
 	param_item = devlink_param_find_by_name(param_list, param->name);
 	WARN_ON(!param_item);
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_DEL);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	list_del(&param_item->list);
 	kfree(param_item);
 }
@@ -3279,8 +3290,9 @@ static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
 	struct devlink_port *devlink_port = info->user_ptr[0];
 
 	return __devlink_nl_cmd_param_set_doit(devlink_port->devlink,
-					       &devlink_port->param_list,
-					       info, 0);
+					       devlink_port->index,
+					       &devlink_port->param_list, info,
+					       DEVLINK_CMD_PORT_PARAM_NEW);
 }
 
 static int devlink_nl_region_snapshot_id_put(struct sk_buff *msg,
@@ -4598,9 +4610,12 @@ static int devlink_param_verify(const struct devlink_param *param)
 }
 
 static int __devlink_params_register(struct devlink *devlink,
+				     unsigned int port_index,
 				     struct list_head *param_list,
 				     const struct devlink_param *params,
-				     size_t params_count)
+				     size_t params_count,
+				     enum devlink_command reg_cmd,
+				     enum devlink_command unreg_cmd)
 {
 	const struct devlink_param *param = params;
 	int i;
@@ -4612,7 +4627,8 @@ static int __devlink_params_register(struct devlink *devlink,
 		if (err)
 			goto rollback;
 
-		err = devlink_param_register_one(devlink, param_list, param);
+		err = devlink_param_register_one(devlink, port_index,
+						 param_list, param, reg_cmd);
 		if (err)
 			goto rollback;
 	}
@@ -4624,23 +4640,27 @@ rollback:
 	if (!i)
 		goto unlock;
 	for (param--; i > 0; i--, param--)
-		devlink_param_unregister_one(devlink, param_list, param);
+		devlink_param_unregister_one(devlink, port_index, param_list,
+					     param, unreg_cmd);
 unlock:
 	mutex_unlock(&devlink->lock);
 	return err;
 }
 
 static void __devlink_params_unregister(struct devlink *devlink,
+					unsigned int port_index,
 					struct list_head *param_list,
 					const struct devlink_param *params,
-					size_t params_count)
+					size_t params_count,
+					enum devlink_command cmd)
 {
 	const struct devlink_param *param = params;
 	int i;
 
 	mutex_lock(&devlink->lock);
 	for (i = 0; i < params_count; i++, param++)
-		devlink_param_unregister_one(devlink, param_list, param);
+		devlink_param_unregister_one(devlink, 0, param_list, param,
+					     cmd);
 	mutex_unlock(&devlink->lock);
 }
 
@@ -4657,8 +4677,10 @@ int devlink_params_register(struct devlink *devlink,
 			    const struct devlink_param *params,
 			    size_t params_count)
 {
-	return __devlink_params_register(devlink, &devlink->param_list, params,
-					 params_count);
+	return __devlink_params_register(devlink, 0, &devlink->param_list,
+					 params, params_count,
+					 DEVLINK_CMD_PARAM_NEW,
+					 DEVLINK_CMD_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_params_register);
 
@@ -4672,8 +4694,9 @@ void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count)
 {
-	return __devlink_params_unregister(devlink, &devlink->param_list,
-					   params, params_count);
+	return __devlink_params_unregister(devlink, 0, &devlink->param_list,
+					   params, params_count,
+					   DEVLINK_CMD_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
@@ -4691,8 +4714,11 @@ int devlink_port_params_register(struct devlink_port *devlink_port,
 				 size_t params_count)
 {
 	return __devlink_params_register(devlink_port->devlink,
+					 devlink_port->index,
 					 &devlink_port->param_list, params,
-					 params_count);
+					 params_count,
+					 DEVLINK_CMD_PORT_PARAM_NEW,
+					 DEVLINK_CMD_PORT_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_port_params_register);
 
@@ -4709,8 +4735,10 @@ void devlink_port_params_unregister(struct devlink_port *devlink_port,
 				    size_t params_count)
 {
 	return __devlink_params_unregister(devlink_port->devlink,
+					   devlink_port->index,
 					   &devlink_port->param_list,
-					   params, params_count);
+					   params, params_count,
+					   DEVLINK_CMD_PORT_PARAM_DEL);
 }
 EXPORT_SYMBOL_GPL(devlink_port_params_unregister);
 
@@ -4739,6 +4767,7 @@ __devlink_param_driverinit_value_get(struct list_head *param_list, u32 param_id,
 
 static int
 __devlink_param_driverinit_value_set(struct devlink *devlink,
+				     unsigned int port_index,
 				     struct list_head *param_list, u32 param_id,
 				     union devlink_param_value init_val,
 				     enum devlink_command cmd)
@@ -4759,7 +4788,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink,
 		param_item->driverinit_value = init_val;
 	param_item->driverinit_value_valid = true;
 
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, port_index, param_item, cmd);
 	return 0;
 }
 
@@ -4800,7 +4829,7 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
 int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value init_val)
 {
-	return __devlink_param_driverinit_value_set(devlink,
+	return __devlink_param_driverinit_value_set(devlink, 0,
 						    &devlink->param_list,
 						    param_id, init_val,
 						    DEVLINK_CMD_PARAM_NEW);
@@ -4849,8 +4878,10 @@ int devlink_port_param_driverinit_value_set(struct devlink_port *devlink_port,
 					    union devlink_param_value init_val)
 {
 	return __devlink_param_driverinit_value_set(devlink_port->devlink,
+						    devlink_port->index,
 						    &devlink_port->param_list,
-						    param_id, init_val, 0);
+						    param_id, init_val,
+						    DEVLINK_CMD_PORT_PARAM_NEW);
 }
 EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
 
@@ -4865,7 +4896,6 @@ EXPORT_SYMBOL_GPL(devlink_port_param_driverinit_value_set);
  *	This function should be used by the driver to notify devlink on value
  *	change, excluding driverinit configuration mode.
  *	For driverinit configuration mode driver should use the function
- *	devlink_param_driverinit_value_set() instead.
  */
 void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 {
@@ -4874,10 +4904,37 @@ void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
 	param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
 	WARN_ON(!param_item);
 
-	devlink_param_notify(devlink, param_item, DEVLINK_CMD_PARAM_NEW);
+	devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
 }
 EXPORT_SYMBOL_GPL(devlink_param_value_changed);
 
+/**
+ *     devlink_port_param_value_changed - notify devlink on a parameter's value
+ *                                      change. Should be called by the driver
+ *                                      right after the change.
+ *
+ *     @devlink_port: devlink_port
+ *     @param_id: parameter ID
+ *
+ *     This function should be used by the driver to notify devlink on value
+ *     change, excluding driverinit configuration mode.
+ *     For driverinit configuration mode driver should use the function
+ *     devlink_port_param_driverinit_value_set() instead.
+ */
+void devlink_port_param_value_changed(struct devlink_port *devlink_port,
+				      u32 param_id)
+{
+	struct devlink_param_item *param_item;
+
+	param_item = devlink_param_find_by_id(&devlink_port->param_list,
+					      param_id);
+	WARN_ON(!param_item);
+
+	devlink_param_notify(devlink_port->devlink, devlink_port->index,
+			     param_item, DEVLINK_CMD_PORT_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devlink_port_param_value_changed);
+
 /**
  *	devlink_param_value_str_fill - Safely fill-up the string preventing
  *				       from overflow of the preallocated buffer
-- 
cgit v1.2.3


From b639583f9e36d044ac1b13090ae812266992cbac Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 28 Jan 2019 18:00:26 +0530
Subject: devlink: Add a generic wake_on_lan port parameter

wake_on_lan - Enables Wake on Lan for this port. If enabled,
the controller asserts a wake pin based on the WOL type.

v2->v3:
- Define only WOL types used now and define them as bitfield, so that
  mutliple WOL types can be enabled upon power on.
- Modify "wake-on-lan" name to "wake_on_lan" to be symmetric with
  previous definitions.
- Rename DEVLINK_PARAM_WOL_XXX to DEVLINK_PARAM_WAKE_XXX to be
  symmetrical with ethtool WOL definitions.

Cc: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 8 ++++++++
 net/core/devlink.c    | 5 +++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index ceb5e89d74d6..85c9eabaf056 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -367,12 +367,17 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
 	DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+	DEVLINK_PARAM_GENERIC_ID_WOL,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
 };
 
+enum devlink_param_wol_types {
+	DEVLINK_PARAM_WAKE_MAGIC = (1 << 0),
+};
+
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
 
@@ -397,6 +402,9 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy"
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8
 
+#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan"
+#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 451ab4725340..e6f170caf449 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2697,6 +2697,11 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
 		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
 	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_WOL,
+		.name = DEVLINK_PARAM_GENERIC_WOL_NAME,
+		.type = DEVLINK_PARAM_GENERIC_WOL_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
-- 
cgit v1.2.3


From 2d908b38d40921a03225d42fd6e48eb51bffd606 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 23 Jan 2019 11:28:19 +0100
Subject: serial: Add Tegra Combined UART driver

The Tegra Combined UART (TCU) is a mailbox-based mechanism that allows
multiplexing multiple "virtual UARTs" into a single hardware serial
port. The TCU is the primary serial port on Tegra194 devices.

Add a TCU driver utilizing the mailbox framework, as the used mailboxes
are part of Tegra HSP blocks that are already controlled by the Tegra
HSP mailbox driver.

Based on work by  Mikko Perttunen <mperttunen@nvidia.com>.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  22 +++
 drivers/tty/serial/Makefile      |   1 +
 drivers/tty/serial/tegra-tcu.c   | 298 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |   3 +
 4 files changed, 324 insertions(+)
 create mode 100644 drivers/tty/serial/tegra-tcu.c

(limited to 'include')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 089a6f285d5e..72966bc0ac76 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -335,6 +335,28 @@ config SERIAL_TEGRA
 	  are enabled). This driver uses the APB DMA to achieve higher baudrate
 	  and better performance.
 
+config SERIAL_TEGRA_TCU
+	tristate "NVIDIA Tegra Combined UART"
+	depends on ARCH_TEGRA && TEGRA_HSP_MBOX
+	select SERIAL_CORE
+	help
+	  Support for the mailbox-based TCU (Tegra Combined UART) serial port.
+	  TCU is a virtual serial port that allows multiplexing multiple data
+	  streams into a single hardware serial port.
+
+config SERIAL_TEGRA_TCU_CONSOLE
+	bool "Support for console on a Tegra TCU serial port"
+	depends on SERIAL_TEGRA_TCU=y
+	select SERIAL_CORE_CONSOLE
+	default y
+	---help---
+	  If you say Y here, it will be possible to use a the Tegra TCU as the
+	  system console (the system console is the device which receives all
+	  kernel messages and warnings and which allows logins in single user
+	  mode).
+
+	  If unsure, say Y.
+
 config SERIAL_MAX3100
 	tristate "MAX3100 support"
 	depends on SPI
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 1511e8a9f856..40b702aaa85e 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_SERIAL_LANTIQ)	+= lantiq.o
 obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o
 obj-$(CONFIG_SERIAL_SIRFSOC) += sirfsoc_uart.o
 obj-$(CONFIG_SERIAL_TEGRA) += serial-tegra.o
+obj-$(CONFIG_SERIAL_TEGRA_TCU) += tegra-tcu.o
 obj-$(CONFIG_SERIAL_AR933X)   += ar933x_uart.o
 obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o
 obj-$(CONFIG_SERIAL_ARC)	+= arc_uart.o
diff --git a/drivers/tty/serial/tegra-tcu.c b/drivers/tty/serial/tegra-tcu.c
new file mode 100644
index 000000000000..aaf8748a6147
--- /dev/null
+++ b/drivers/tty/serial/tegra-tcu.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ */
+
+#include <linux/console.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+
+#define TCU_MBOX_BYTE(i, x)			((x) << (i * 8))
+#define TCU_MBOX_BYTE_V(x, i)			(((x) >> (i * 8)) & 0xff)
+#define TCU_MBOX_NUM_BYTES(x)			((x) << 24)
+#define TCU_MBOX_NUM_BYTES_V(x)			(((x) >> 24) & 0x3)
+
+struct tegra_tcu {
+	struct uart_driver driver;
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+	struct console console;
+#endif
+	struct uart_port port;
+
+	struct mbox_client tx_client, rx_client;
+	struct mbox_chan *tx, *rx;
+};
+
+static unsigned int tegra_tcu_uart_tx_empty(struct uart_port *port)
+{
+	return TIOCSER_TEMT;
+}
+
+static void tegra_tcu_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static unsigned int tegra_tcu_uart_get_mctrl(struct uart_port *port)
+{
+	return 0;
+}
+
+static void tegra_tcu_uart_stop_tx(struct uart_port *port)
+{
+}
+
+static void tegra_tcu_write_one(struct tegra_tcu *tcu, u32 value,
+				unsigned int count)
+{
+	void *msg;
+
+	value |= TCU_MBOX_NUM_BYTES(count);
+	msg = (void *)(unsigned long)value;
+	mbox_send_message(tcu->tx, msg);
+	mbox_flush(tcu->tx, 1000);
+}
+
+static void tegra_tcu_write(struct tegra_tcu *tcu, const char *s,
+			    unsigned int count)
+{
+	unsigned int written = 0, i = 0;
+	bool insert_nl = false;
+	u32 value = 0;
+
+	while (i < count) {
+		if (insert_nl) {
+			value |= TCU_MBOX_BYTE(written++, '\n');
+			insert_nl = false;
+			i++;
+		} else if (s[i] == '\n') {
+			value |= TCU_MBOX_BYTE(written++, '\r');
+			insert_nl = true;
+		} else {
+			value |= TCU_MBOX_BYTE(written++, s[i++]);
+		}
+
+		if (written == 3) {
+			tegra_tcu_write_one(tcu, value, 3);
+			value = written = 0;
+		}
+	}
+
+	if (written)
+		tegra_tcu_write_one(tcu, value, written);
+}
+
+static void tegra_tcu_uart_start_tx(struct uart_port *port)
+{
+	struct tegra_tcu *tcu = port->private_data;
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned long count;
+
+	for (;;) {
+		count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+		if (!count)
+			break;
+
+		tegra_tcu_write(tcu, &xmit->buf[xmit->tail], count);
+		xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
+	}
+
+	uart_write_wakeup(port);
+}
+
+static void tegra_tcu_uart_stop_rx(struct uart_port *port)
+{
+}
+
+static void tegra_tcu_uart_break_ctl(struct uart_port *port, int ctl)
+{
+}
+
+static int tegra_tcu_uart_startup(struct uart_port *port)
+{
+	return 0;
+}
+
+static void tegra_tcu_uart_shutdown(struct uart_port *port)
+{
+}
+
+static void tegra_tcu_uart_set_termios(struct uart_port *port,
+				       struct ktermios *new,
+				       struct ktermios *old)
+{
+}
+
+static const struct uart_ops tegra_tcu_uart_ops = {
+	.tx_empty = tegra_tcu_uart_tx_empty,
+	.set_mctrl = tegra_tcu_uart_set_mctrl,
+	.get_mctrl = tegra_tcu_uart_get_mctrl,
+	.stop_tx = tegra_tcu_uart_stop_tx,
+	.start_tx = tegra_tcu_uart_start_tx,
+	.stop_rx = tegra_tcu_uart_stop_rx,
+	.break_ctl = tegra_tcu_uart_break_ctl,
+	.startup = tegra_tcu_uart_startup,
+	.shutdown = tegra_tcu_uart_shutdown,
+	.set_termios = tegra_tcu_uart_set_termios,
+};
+
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+static void tegra_tcu_console_write(struct console *cons, const char *s,
+				    unsigned int count)
+{
+	struct tegra_tcu *tcu = container_of(cons, struct tegra_tcu, console);
+
+	tegra_tcu_write(tcu, s, count);
+}
+
+static int tegra_tcu_console_setup(struct console *cons, char *options)
+{
+	return 0;
+}
+#endif
+
+static void tegra_tcu_receive(struct mbox_client *cl, void *msg)
+{
+	struct tegra_tcu *tcu = container_of(cl, struct tegra_tcu, rx_client);
+	struct tty_port *port = &tcu->port.state->port;
+	u32 value = (u32)(unsigned long)msg;
+	unsigned int num_bytes, i;
+
+	num_bytes = TCU_MBOX_NUM_BYTES_V(value);
+
+	for (i = 0; i < num_bytes; i++)
+		tty_insert_flip_char(port, TCU_MBOX_BYTE_V(value, i),
+				     TTY_NORMAL);
+
+	tty_flip_buffer_push(port);
+}
+
+static int tegra_tcu_probe(struct platform_device *pdev)
+{
+	struct uart_port *port;
+	struct tegra_tcu *tcu;
+	int err;
+
+	tcu = devm_kzalloc(&pdev->dev, sizeof(*tcu), GFP_KERNEL);
+	if (!tcu)
+		return -ENOMEM;
+
+	tcu->tx_client.dev = &pdev->dev;
+	tcu->rx_client.dev = &pdev->dev;
+	tcu->rx_client.rx_callback = tegra_tcu_receive;
+
+	tcu->tx = mbox_request_channel_byname(&tcu->tx_client, "tx");
+	if (IS_ERR(tcu->tx)) {
+		err = PTR_ERR(tcu->tx);
+		dev_err(&pdev->dev, "failed to get tx mailbox: %d\n", err);
+		return err;
+	}
+
+	tcu->rx = mbox_request_channel_byname(&tcu->rx_client, "rx");
+	if (IS_ERR(tcu->rx)) {
+		err = PTR_ERR(tcu->rx);
+		dev_err(&pdev->dev, "failed to get rx mailbox: %d\n", err);
+		goto free_tx;
+	}
+
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+	/* setup the console */
+	strcpy(tcu->console.name, "ttyTCU");
+	tcu->console.device = uart_console_device;
+	tcu->console.flags = CON_PRINTBUFFER | CON_ANYTIME;
+	tcu->console.index = -1;
+	tcu->console.write = tegra_tcu_console_write;
+	tcu->console.setup = tegra_tcu_console_setup;
+	tcu->console.data = &tcu->driver;
+#endif
+
+	/* setup the driver */
+	tcu->driver.owner = THIS_MODULE;
+	tcu->driver.driver_name = "tegra-tcu";
+	tcu->driver.dev_name = "ttyTCU";
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+	tcu->driver.cons = &tcu->console;
+#endif
+	tcu->driver.nr = 1;
+
+	err = uart_register_driver(&tcu->driver);
+	if (err) {
+		dev_err(&pdev->dev, "failed to register UART driver: %d\n",
+			err);
+		goto free_rx;
+	}
+
+	/* setup the port */
+	port = &tcu->port;
+	spin_lock_init(&port->lock);
+	port->dev = &pdev->dev;
+	port->type = PORT_TEGRA_TCU;
+	port->ops = &tegra_tcu_uart_ops;
+	port->fifosize = 1;
+	port->iotype = UPIO_MEM;
+	port->flags = UPF_BOOT_AUTOCONF;
+	port->private_data = tcu;
+
+	err = uart_add_one_port(&tcu->driver, port);
+	if (err) {
+		dev_err(&pdev->dev, "failed to add UART port: %d\n", err);
+		goto unregister_uart;
+	}
+
+	platform_set_drvdata(pdev, tcu);
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+	register_console(&tcu->console);
+#endif
+
+	return 0;
+
+unregister_uart:
+	uart_unregister_driver(&tcu->driver);
+free_rx:
+	mbox_free_channel(tcu->rx);
+free_tx:
+	mbox_free_channel(tcu->tx);
+
+	return err;
+}
+
+static int tegra_tcu_remove(struct platform_device *pdev)
+{
+	struct tegra_tcu *tcu = platform_get_drvdata(pdev);
+
+#if IS_ENABLED(CONFIG_SERIAL_TEGRA_TCU_CONSOLE)
+	unregister_console(&tcu->console);
+#endif
+	uart_remove_one_port(&tcu->driver, &tcu->port);
+	uart_unregister_driver(&tcu->driver);
+	mbox_free_channel(tcu->rx);
+	mbox_free_channel(tcu->tx);
+
+	return 0;
+}
+
+static const struct of_device_id tegra_tcu_match[] = {
+	{ .compatible = "nvidia,tegra194-tcu" },
+	{ }
+};
+
+static struct platform_driver tegra_tcu_driver = {
+	.driver = {
+		.name = "tegra-tcu",
+		.of_match_table = tegra_tcu_match,
+	},
+	.probe = tegra_tcu_probe,
+	.remove = tegra_tcu_remove,
+};
+module_platform_driver(tegra_tcu_driver);
+
+MODULE_AUTHOR("Mikko Perttunen <mperttunen@nvidia.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NVIDIA Tegra Combined UART driver");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index df4a7534e239..6009ee2c2e99 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -79,6 +79,9 @@
 /* Nuvoton UART */
 #define PORT_NPCM	40
 
+/* NVIDIA Tegra Combined UART */
+#define PORT_TEGRA_TCU	41
+
 /* Intel EG20 */
 #define PORT_PCH_8LINE	44
 #define PORT_PCH_2LINE	45
-- 
cgit v1.2.3


From 80df2704a375bb4b3c9c5cce9c00052361b16d61 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 28 Jan 2019 15:08:23 +0800
Subject: sctp: introduce SCTP_FUTURE/CURRENT/ALL_ASSOC

This patch is to add 3 constants SCTP_FUTURE_ASSOC,
SCTP_CURRENT_ASSOC and SCTP_ALL_ASSOC for reserved
assoc_ids, as defined in rfc6458#section-7.2.

And add the process for them when doing lookup and
inserting in sctp_id2assoc and sctp_assoc_set_id.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h | 4 ++++
 net/sctp/associola.c      | 7 +++++--
 net/sctp/socket.c         | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index d584073532b8..b8f2c4d56532 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -59,6 +59,10 @@
 
 typedef __s32 sctp_assoc_t;
 
+#define SCTP_FUTURE_ASSOC	0
+#define SCTP_CURRENT_ASSOC	1
+#define SCTP_ALL_ASSOC		2
+
 /* The following symbols come from the Sockets API Extensions for
  * SCTP <draft-ietf-tsvwg-sctpsocket-07.txt>.
  */
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 201c888604e4..b99f163e33ac 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1651,8 +1651,11 @@ int sctp_assoc_set_id(struct sctp_association *asoc, gfp_t gfp)
 	if (preload)
 		idr_preload(gfp);
 	spin_lock_bh(&sctp_assocs_id_lock);
-	/* 0 is not a valid assoc_id, must be >= 1 */
-	ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, 1, 0, GFP_NOWAIT);
+	/* 0, 1, 2 are used as SCTP_FUTURE_ASSOC, SCTP_CURRENT_ASSOC and
+	 * SCTP_ALL_ASSOC, so an available id must be > SCTP_ALL_ASSOC.
+	 */
+	ret = idr_alloc_cyclic(&sctp_assocs_id, asoc, SCTP_ALL_ASSOC + 1, 0,
+			       GFP_NOWAIT);
 	spin_unlock_bh(&sctp_assocs_id_lock);
 	if (preload)
 		idr_preload_end();
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f93c3cf9e567..a52d132470ea 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -248,7 +248,7 @@ struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
 	}
 
 	/* Otherwise this is a UDP-style socket. */
-	if (!id || (id == (sctp_assoc_t)-1))
+	if (id <= SCTP_ALL_ASSOC)
 		return NULL;
 
 	spin_lock_bh(&sctp_assocs_id_lock);
-- 
cgit v1.2.3


From 8add543e369d67ccd42f7e67d68866b4d606f632 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 28 Jan 2019 15:08:29 +0800
Subject: sctp: add SCTP_FUTURE_ASSOC for SCTP_PEER_ADDR_THLDS sockopt

Check with SCTP_FUTURE_ASSOC instead in
sctp_set/getsockopt_paddr_thresholds, it's compatible with 0.

It also adds pf_retrans in sctp_sock to support SCTP_FUTURE_ASSOC.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  2 ++
 net/sctp/associola.c       |  2 +-
 net/sctp/socket.c          | 58 ++++++++++++++++++++++++++++++++--------------
 3 files changed, 43 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 003020eb6e66..a5a46b1c9261 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -199,6 +199,8 @@ struct sctp_sock {
 	__u32 flowlabel;
 	__u8  dscp;
 
+	int pf_retrans;
+
 	/* The initial Path MTU to use for new associations. */
 	__u32 pathmtu;
 
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index b99f163e33ac..d2c7d0d2abc1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -101,7 +101,7 @@ static struct sctp_association *sctp_association_init(
 	 * socket values.
 	 */
 	asoc->max_retrans = sp->assocparams.sasoc_asocmaxrxt;
-	asoc->pf_retrans  = net->sctp.pf_retrans;
+	asoc->pf_retrans  = sp->pf_retrans;
 
 	asoc->rto_initial = msecs_to_jiffies(sp->rtoinfo.srto_initial);
 	asoc->rto_max = msecs_to_jiffies(sp->rtoinfo.srto_max);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7df2123ae64c..1d49b7dfcba2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3888,11 +3888,25 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
 			   sizeof(struct sctp_paddrthlds)))
 		return -EFAULT;
 
-
-	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
-		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
-		if (!asoc)
+	if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
+		trans = sctp_addr_id2transport(sk, &val.spt_address,
+					       val.spt_assoc_id);
+		if (!trans)
 			return -ENOENT;
+
+		if (val.spt_pathmaxrxt)
+			trans->pathmaxrxt = val.spt_pathmaxrxt;
+		trans->pf_retrans = val.spt_pathpfthld;
+
+		return 0;
+	}
+
+	asoc = sctp_id2assoc(sk, val.spt_assoc_id);
+	if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
 		list_for_each_entry(trans, &asoc->peer.transport_addr_list,
 				    transports) {
 			if (val.spt_pathmaxrxt)
@@ -3904,14 +3918,11 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk,
 			asoc->pathmaxrxt = val.spt_pathmaxrxt;
 		asoc->pf_retrans = val.spt_pathpfthld;
 	} else {
-		trans = sctp_addr_id2transport(sk, &val.spt_address,
-					       val.spt_assoc_id);
-		if (!trans)
-			return -ENOENT;
+		struct sctp_sock *sp = sctp_sk(sk);
 
 		if (val.spt_pathmaxrxt)
-			trans->pathmaxrxt = val.spt_pathmaxrxt;
-		trans->pf_retrans = val.spt_pathpfthld;
+			sp->pathmaxrxt = val.spt_pathmaxrxt;
+		sp->pf_retrans = val.spt_pathpfthld;
 	}
 
 	return 0;
@@ -4781,6 +4792,7 @@ static int sctp_init_sock(struct sock *sk)
 	 */
 	sp->hbinterval  = net->sctp.hb_interval;
 	sp->pathmaxrxt  = net->sctp.max_retrans_path;
+	sp->pf_retrans  = net->sctp.pf_retrans;
 	sp->pathmtu     = 0; /* allow default discovery */
 	sp->sackdelay   = net->sctp.sack_timeout;
 	sp->sackfreq	= 2;
@@ -6917,14 +6929,7 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
 	if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len))
 		return -EFAULT;
 
-	if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
-		asoc = sctp_id2assoc(sk, val.spt_assoc_id);
-		if (!asoc)
-			return -ENOENT;
-
-		val.spt_pathpfthld = asoc->pf_retrans;
-		val.spt_pathmaxrxt = asoc->pathmaxrxt;
-	} else {
+	if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) {
 		trans = sctp_addr_id2transport(sk, &val.spt_address,
 					       val.spt_assoc_id);
 		if (!trans)
@@ -6932,6 +6937,23 @@ static int sctp_getsockopt_paddr_thresholds(struct sock *sk,
 
 		val.spt_pathmaxrxt = trans->pathmaxrxt;
 		val.spt_pathpfthld = trans->pf_retrans;
+
+		return 0;
+	}
+
+	asoc = sctp_id2assoc(sk, val.spt_assoc_id);
+	if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
+
+	if (asoc) {
+		val.spt_pathpfthld = asoc->pf_retrans;
+		val.spt_pathmaxrxt = asoc->pathmaxrxt;
+	} else {
+		struct sctp_sock *sp = sctp_sk(sk);
+
+		val.spt_pathpfthld = sp->pf_retrans;
+		val.spt_pathmaxrxt = sp->pathmaxrxt;
 	}
 
 	if (put_user(len, optlen) || copy_to_user(optval, &val, len))
-- 
cgit v1.2.3


From 7efba10d6bd22030fb5931e50bf97496f932f00e Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 28 Jan 2019 15:08:46 +0800
Subject: sctp: add SCTP_FUTURE_ASOC and SCTP_CURRENT_ASSOC for
 SCTP_STREAM_SCHEDULER sockopt

Check with SCTP_ALL_ASSOC instead in sctp_setsockopt_scheduler and
check with SCTP_FUTURE_ASSOC instead in sctp_getsockopt_scheduler,
it's compatible with 0.

SCTP_CURRENT_ASSOC is supported for SCTP_STREAM_SCHEDULER in this
patch. It also adds default_ss in sctp_sock to support
SCTP_FUTURE_ASSOC.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  2 ++
 net/sctp/outqueue.c        |  2 +-
 net/sctp/socket.c          | 44 +++++++++++++++++++++++++++++++-------------
 3 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a5a46b1c9261..58e4b23cecf4 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -211,6 +211,8 @@ struct sctp_sock {
 	/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
 	__u32 param_flags;
 
+	__u32 default_ss;
+
 	struct sctp_rtoinfo rtoinfo;
 	struct sctp_paddrparams paddrparam;
 	struct sctp_assocparams assocparams;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c37e1c2dec9d..fd33281999b5 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -212,7 +212,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->retransmit);
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
-	sctp_sched_set_sched(asoc, SCTP_SS_DEFAULT);
+	sctp_sched_set_sched(asoc, sctp_sk(asoc->base.sk)->default_ss);
 }
 
 /* Free the outqueue structure and any related pending chunks.
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 00b0f6659251..9644bdc8e85c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4352,29 +4352,44 @@ static int sctp_setsockopt_scheduler(struct sock *sk,
 				     char __user *optval,
 				     unsigned int optlen)
 {
+	struct sctp_sock *sp = sctp_sk(sk);
 	struct sctp_association *asoc;
 	struct sctp_assoc_value params;
-	int retval = -EINVAL;
+	int retval = 0;
 
 	if (optlen < sizeof(params))
-		goto out;
+		return -EINVAL;
 
 	optlen = sizeof(params);
-	if (copy_from_user(&params, optval, optlen)) {
-		retval = -EFAULT;
-		goto out;
-	}
+	if (copy_from_user(&params, optval, optlen))
+		return -EFAULT;
 
 	if (params.assoc_value > SCTP_SS_MAX)
-		goto out;
+		return -EINVAL;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc)
-		goto out;
+	if (!asoc && params.assoc_id > SCTP_ALL_ASSOC &&
+	    sctp_style(sk, UDP))
+		return -EINVAL;
 
-	retval = sctp_sched_set_sched(asoc, params.assoc_value);
+	if (asoc)
+		return sctp_sched_set_sched(asoc, params.assoc_value);
+
+	if (params.assoc_id == SCTP_FUTURE_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC)
+		sp->default_ss = params.assoc_value;
+
+	if (params.assoc_id == SCTP_CURRENT_ASSOC ||
+	    params.assoc_id == SCTP_ALL_ASSOC) {
+		list_for_each_entry(asoc, &sp->ep->asocs, asocs) {
+			int ret = sctp_sched_set_sched(asoc,
+						       params.assoc_value);
+
+			if (ret && !retval)
+				retval = ret;
+		}
+	}
 
-out:
 	return retval;
 }
 
@@ -5005,6 +5020,7 @@ static int sctp_init_sock(struct sock *sk)
 	sp->param_flags = SPP_HB_ENABLE |
 			  SPP_PMTUD_ENABLE |
 			  SPP_SACKDELAY_ENABLE;
+	sp->default_ss = SCTP_SS_DEFAULT;
 
 	/* If enabled no SCTP message fragmentation will be performed.
 	 * Configure through SCTP_DISABLE_FRAGMENTS socket option.
@@ -7572,12 +7588,14 @@ static int sctp_getsockopt_scheduler(struct sock *sk, int len,
 		goto out;
 
 	asoc = sctp_id2assoc(sk, params.assoc_id);
-	if (!asoc) {
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
 		retval = -EINVAL;
 		goto out;
 	}
 
-	params.assoc_value = sctp_sched_get_sched(asoc);
+	params.assoc_value = asoc ? sctp_sched_get_sched(asoc)
+				  : sctp_sk(sk)->default_ss;
 
 	if (put_user(len, optlen))
 		goto out;
-- 
cgit v1.2.3


From fab940755d1d78377901450b6ee7c77356e06821 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 27 Jan 2019 14:03:57 +0100
Subject: x86/hw_breakpoints, kprobes: Remove kprobes ifdeffery

Remove the ifdeffery in the breakpoint parsing arch_build_bp_info() by
adding a within_kprobe_blacklist() stub for the !CONFIG_KPROBES case.

It is returning true when kprobes are not enabled to mean that any
address is within the kprobes blacklist on such kernels and thus not
allow kernel breakpoints on non-kprobes kernels.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190127131237.4557-1-bp@alien8.de
---
 arch/x86/kernel/hw_breakpoint.c | 4 ----
 include/linux/kprobes.h         | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 7d6f91f2869a..ff9bfd40429e 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -261,12 +261,8 @@ static int arch_build_bp_info(struct perf_event *bp,
 		 * allow kernel breakpoints at all.
 		 */
 		if (attr->bp_addr >= TASK_SIZE_MAX) {
-#ifdef CONFIG_KPROBES
 			if (within_kprobe_blacklist(attr->bp_addr))
 				return -EINVAL;
-#else
-			return -EINVAL;
-#endif
 		}
 
 		hw->type = X86_BREAKPOINT_EXECUTE;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e07e91daaacc..201f0f2683f2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -442,6 +442,11 @@ static inline int enable_kprobe(struct kprobe *kp)
 {
 	return -ENOSYS;
 }
+
+static inline bool within_kprobe_blacklist(unsigned long addr)
+{
+	return true;
+}
 #endif /* CONFIG_KPROBES */
 static inline int disable_kretprobe(struct kretprobe *rp)
 {
-- 
cgit v1.2.3


From a97cfe4de1bee89db3286ad0a4201dc6d2b72456 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 30 Jan 2019 08:58:34 +0000
Subject: mlxsw: spectrum_acl: Add C-TCAM spill tracepoint

Add some visibility to the rule addition process and trace whenever rule
spilled into C-TCAM.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_acl_atcam.c   |  3 ++
 include/trace/events/mlxsw.h                       | 38 ++++++++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 include/trace/events/mlxsw.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
index cda0a7170c34..a74a390901ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_atcam.c
@@ -7,6 +7,8 @@
 #include <linux/gfp.h>
 #include <linux/refcount.h>
 #include <linux/rhashtable.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/mlxsw.h>
 
 #include "reg.h"
 #include "core.h"
@@ -578,6 +580,7 @@ int mlxsw_sp_acl_atcam_entry_add(struct mlxsw_sp *mlxsw_sp,
 	/* It is possible we failed to add the rule to the A-TCAM due to
 	 * exceeded number of masks. Try to spill into C-TCAM.
 	 */
+	trace_mlxsw_sp_acl_atcam_entry_add_ctcam_spill(mlxsw_sp, aregion);
 	err = mlxsw_sp_acl_ctcam_entry_add(mlxsw_sp, &aregion->cregion,
 					   &achunk->cchunk, &aentry->centry,
 					   rulei, true);
diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h
new file mode 100644
index 000000000000..6c2bafcade18
--- /dev/null
+++ b/include/trace/events/mlxsw.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlxsw
+
+#if !defined(_MLXSW_TRACEPOINT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLXSW_TRACEPOINT_H
+
+#include <linux/tracepoint.h>
+
+struct mlxsw_sp;
+struct mlxsw_sp_acl_atcam_region;
+
+TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill,
+	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+		 const struct mlxsw_sp_acl_atcam_region *aregion),
+
+	TP_ARGS(mlxsw_sp, aregion),
+
+	TP_STRUCT__entry(
+		__field(const void *, mlxsw_sp)
+		__field(const void *, aregion)
+	),
+
+	TP_fast_assign(
+		__entry->mlxsw_sp = mlxsw_sp;
+		__entry->aregion = aregion;
+	),
+
+	TP_printk("mlxsw_sp %p, aregion %p",
+		  __entry->mlxsw_sp, __entry->aregion)
+);
+
+#endif /* _MLXSW_TRACEPOINT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 5c238a8b599f1ae25eaeb08ad0e9e13e2b9eb023 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@linaro.org>
Date: Wed, 30 Jan 2019 10:52:01 +0530
Subject: cpufreq: Auto-register the driver as a thermal cooling device if
 asked

All cpufreq drivers do similar things to register as a cooling device.
Provide a cpufreq driver flag so drivers can just ask the cpufreq core
to register the cooling device on their behalf. This allows us to get
rid of duplicated code in the drivers.

In order to allow this, we add a struct thermal_cooling_device pointer
to struct cpufreq_policy so that drivers don't need to store it in a
private data structure.

Suggested-by: Stephen Boyd <swboyd@chromium.org>
Suggested-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Amit Kucheria <amit.kucheria@linaro.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 11 +++++++++++
 include/linux/cpufreq.h   |  9 +++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3eff158d9750..96a69c67a545 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -19,6 +19,7 @@
 
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/cpu_cooling.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -1316,6 +1317,10 @@ static int cpufreq_online(unsigned int cpu)
 	if (cpufreq_driver->ready)
 		cpufreq_driver->ready(policy);
 
+	if (IS_ENABLED(CONFIG_CPU_THERMAL) &&
+	    cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV)
+		policy->cdev = of_cpufreq_cooling_register(policy);
+
 	pr_debug("initialization complete\n");
 
 	return 0;
@@ -1403,6 +1408,12 @@ static int cpufreq_offline(unsigned int cpu)
 		goto unlock;
 	}
 
+	if (IS_ENABLED(CONFIG_CPU_THERMAL) &&
+	    cpufreq_driver->flags & CPUFREQ_IS_COOLING_DEV) {
+		cpufreq_cooling_unregister(policy->cdev);
+		policy->cdev = NULL;
+	}
+
 	if (cpufreq_driver->stop_cpu)
 		cpufreq_driver->stop_cpu(policy);
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c19142911554..9db074ecbbd7 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -151,6 +151,9 @@ struct cpufreq_policy {
 
 	/* For cpufreq driver's internal use */
 	void			*driver_data;
+
+	/* Pointer to the cooling device if used for thermal mitigation */
+	struct thermal_cooling_device *cdev;
 };
 
 /* Only for ACPI */
@@ -378,6 +381,12 @@ struct cpufreq_driver {
  */
 #define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING	BIT(6)
 
+/*
+ * Set by drivers that want the core to automatically register the cpufreq
+ * driver as a thermal cooling device.
+ */
+#define CPUFREQ_IS_COOLING_DEV			BIT(7)
+
 int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
-- 
cgit v1.2.3


From 0b5cb3300ae59ed7e93b465dfa2384a6a4df8eb4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 22 Jan 2019 10:25:20 -0800
Subject: RDMA/srp: Increase max_segment_size

The default behavior of the SCSI core is to set the block layer request
queue parameter max_segment_size to 64 KB. That means that elements of
scatterlists are limited to 64 KB. Since RDMA adapters support larger
sizes, increase max_segment_size for the SRP initiator.

Notes:
- The SCSI max_segment_size parameter was introduced in kernel v5.0. See
  also commit 50c2e9107f17 ("scsi: introduce a max_segment_size
  host_template parameters").
- Some other block drivers already set max_segment_size to UINT_MAX,
  e.g. nbd and rbd.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/ulp/srp/ib_srp.c |  1 +
 include/rdma/ib_verbs.h             | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 0f855294ff3f..29fe46dbdbf1 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3823,6 +3823,7 @@ static ssize_t srp_create_target(struct device *dev,
 	target_host->max_id      = 1;
 	target_host->max_lun     = -1LL;
 	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
+	target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
 
 	target = host_to_target(target_host);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 94b6e1dd4dab..71ea144ec823 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3715,6 +3715,19 @@ static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
 	return sg_dma_len(sg);
 }
 
+/**
+ * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
+ * @dev: The device to query
+ *
+ * The returned value represents a size in bytes.
+ */
+static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
+{
+	struct device_dma_parameters *p = dev->dma_device->dma_parms;
+
+	return p ? p->max_segment_size : UINT_MAX;
+}
+
 /**
  * ib_dma_sync_single_for_cpu - Prepare DMA region to be accessed by CPU
  * @dev: The device for which the DMA address was created
-- 
cgit v1.2.3


From 9bc61ab18b1d41f26dc06b9e6d3c203e65f83fe6 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 4 Nov 2018 03:19:03 -0500
Subject: vfs: Introduce fs_context, switch vfs_kern_mount() to it.

Introduce a filesystem context concept to be used during superblock
creation for mount and superblock reconfiguration for remount.  This is
allocated at the beginning of the mount procedure and into it is placed:

 (1) Filesystem type.

 (2) Namespaces.

 (3) Source/Device names (there may be multiple).

 (4) Superblock flags (SB_*).

 (5) Security details.

 (6) Filesystem-specific data, as set by the mount options.

Accessor functions are then provided to set up a context, parameterise it
from monolithic mount data (the data page passed to mount(2)) and tear it
down again.

A legacy wrapper is provided that implements what will be the basic
operations, wrapping access to filesystems that aren't yet aware of the
fs_context.

Finally, vfs_kern_mount() is changed to make use of the fs_context and
mount_fs() is replaced by vfs_get_tree(), called from vfs_kern_mount().
[AV -- add missing kstrdup()]
[AV -- put_cred() can be unconditional - fc->cred can't be NULL]
[AV -- take legacy_validate() contents into legacy_parse_monolithic()]
[AV -- merge KERNEL_MOUNT and USER_MOUNT]
[AV -- don't unlock superblock on success return from vfs_get_tree()]
[AV -- kill 'reference' argument of init_fs_context()]

Signed-off-by: David Howells <dhowells@redhat.com>
Co-developed-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile                |   3 +-
 fs/fs_context.c            | 182 +++++++++++++++++++++++++++++++++++++++++++++
 fs/internal.h              |   9 ++-
 fs/namespace.c             |  46 ++++++++----
 fs/super.c                 |  50 ++++++-------
 include/linux/fs_context.h |  64 ++++++++++++++++
 6 files changed, 310 insertions(+), 44 deletions(-)
 create mode 100644 fs/fs_context.c
 create mode 100644 include/linux/fs_context.h

(limited to 'include')

diff --git a/fs/Makefile b/fs/Makefile
index 293733f61594..5563cf34f7c2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -12,7 +12,8 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o d_path.o \
-		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
+		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
+		fs_context.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/fs_context.c b/fs/fs_context.c
new file mode 100644
index 000000000000..4294091b689d
--- /dev/null
+++ b/fs/fs_context.c
@@ -0,0 +1,182 @@
+/* Provide a way to create a superblock configuration context within the kernel
+ * that allows a superblock to be set up prior to mounting.
+ *
+ * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/fs_context.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/security.h>
+#include <linux/mnt_namespace.h>
+#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
+#include <net/net_namespace.h>
+#include "mount.h"
+#include "internal.h"
+
+struct legacy_fs_context {
+	char			*legacy_data;	/* Data page for legacy filesystems */
+	size_t			data_size;
+};
+
+static int legacy_init_fs_context(struct fs_context *fc);
+
+/**
+ * alloc_fs_context - Create a filesystem context.
+ * @fs_type: The filesystem type.
+ * @reference: The dentry from which this one derives (or NULL)
+ * @sb_flags: Filesystem/superblock flags (SB_*)
+ * @sb_flags_mask: Applicable members of @sb_flags
+ * @purpose: The purpose that this configuration shall be used for.
+ *
+ * Open a filesystem and create a mount context.  The mount context is
+ * initialised with the supplied flags and, if a submount/automount from
+ * another superblock (referred to by @reference) is supplied, may have
+ * parameters such as namespaces copied across from that superblock.
+ */
+static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
+				      struct dentry *reference,
+				      unsigned int sb_flags,
+				      unsigned int sb_flags_mask,
+				      enum fs_context_purpose purpose)
+{
+	struct fs_context *fc;
+	int ret = -ENOMEM;
+
+	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
+	if (!fc)
+		return ERR_PTR(-ENOMEM);
+
+	fc->purpose	= purpose;
+	fc->sb_flags	= sb_flags;
+	fc->sb_flags_mask = sb_flags_mask;
+	fc->fs_type	= get_filesystem(fs_type);
+	fc->cred	= get_current_cred();
+	fc->net_ns	= get_net(current->nsproxy->net_ns);
+
+	switch (purpose) {
+	case FS_CONTEXT_FOR_MOUNT:
+		fc->user_ns = get_user_ns(fc->cred->user_ns);
+		break;
+	}
+
+	ret = legacy_init_fs_context(fc);
+	if (ret < 0)
+		goto err_fc;
+	fc->need_free = true;
+	return fc;
+
+err_fc:
+	put_fs_context(fc);
+	return ERR_PTR(ret);
+}
+
+struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
+					unsigned int sb_flags)
+{
+	return alloc_fs_context(fs_type, NULL, sb_flags, 0,
+					FS_CONTEXT_FOR_MOUNT);
+}
+EXPORT_SYMBOL(fs_context_for_mount);
+
+static void legacy_fs_context_free(struct fs_context *fc);
+/**
+ * put_fs_context - Dispose of a superblock configuration context.
+ * @fc: The context to dispose of.
+ */
+void put_fs_context(struct fs_context *fc)
+{
+	struct super_block *sb;
+
+	if (fc->root) {
+		sb = fc->root->d_sb;
+		dput(fc->root);
+		fc->root = NULL;
+		deactivate_super(sb);
+	}
+
+	if (fc->need_free)
+		legacy_fs_context_free(fc);
+
+	security_free_mnt_opts(&fc->security);
+	if (fc->net_ns)
+		put_net(fc->net_ns);
+	put_user_ns(fc->user_ns);
+	put_cred(fc->cred);
+	kfree(fc->subtype);
+	put_filesystem(fc->fs_type);
+	kfree(fc->source);
+	kfree(fc);
+}
+EXPORT_SYMBOL(put_fs_context);
+
+/*
+ * Free the config for a filesystem that doesn't support fs_context.
+ */
+static void legacy_fs_context_free(struct fs_context *fc)
+{
+	kfree(fc->fs_private);
+}
+
+/*
+ * Add monolithic mount data.
+ */
+static int legacy_parse_monolithic(struct fs_context *fc, void *data)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	ctx->legacy_data = data;
+	if (!ctx->legacy_data)
+		return 0;
+	if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
+		return 0;
+	return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
+}
+
+/*
+ * Get a mountable root with the legacy mount command.
+ */
+int legacy_get_tree(struct fs_context *fc)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	struct super_block *sb;
+	struct dentry *root;
+
+	root = fc->fs_type->mount(fc->fs_type, fc->sb_flags,
+				      fc->source, ctx->legacy_data);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	sb = root->d_sb;
+	BUG_ON(!sb);
+
+	fc->root = root;
+	return 0;
+}
+
+/*
+ * Initialise a legacy context for a filesystem that doesn't support
+ * fs_context.
+ */
+static int legacy_init_fs_context(struct fs_context *fc)
+{
+	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
+	if (!fc->fs_private)
+		return -ENOMEM;
+	return 0;
+}
+
+int parse_monolithic_mount_data(struct fs_context *fc, void *data)
+{
+	return legacy_parse_monolithic(fc, data);
+}
diff --git a/fs/internal.h b/fs/internal.h
index d410186bc369..f85c3212d25d 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -17,6 +17,7 @@ struct linux_binprm;
 struct path;
 struct mount;
 struct shrink_control;
+struct fs_context;
 
 /*
  * block_dev.c
@@ -51,6 +52,12 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
  */
 extern void __init chrdev_init(void);
 
+/*
+ * fs_context.c
+ */
+extern int legacy_get_tree(struct fs_context *fc);
+extern int parse_monolithic_mount_data(struct fs_context *, void *);
+
 /*
  * namei.c
  */
@@ -101,8 +108,6 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
  */
 extern int do_remount_sb(struct super_block *, int, void *, int);
 extern bool trylock_super(struct super_block *sb);
-extern struct dentry *mount_fs(struct file_system_type *,
-			       int, const char *, void *);
 extern struct super_block *user_get_super(dev_t);
 
 /*
diff --git a/fs/namespace.c b/fs/namespace.c
index f0b8a8ca08df..3f2fd7a34733 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,7 @@
 #include <linux/task_work.h>
 #include <linux/sched/task.h>
 #include <uapi/linux/mount.h>
+#include <linux/fs_context.h>
 
 #include "pnode.h"
 #include "internal.h"
@@ -940,36 +941,53 @@ static struct mount *skip_mnt_tree(struct mount *p)
 	return p;
 }
 
-struct vfsmount *
-vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
+struct vfsmount *vfs_kern_mount(struct file_system_type *type,
+				int flags, const char *name,
+				void *data)
 {
+	struct fs_context *fc;
 	struct mount *mnt;
-	struct dentry *root;
+	int ret = 0;
 
 	if (!type)
 		return ERR_PTR(-ENODEV);
 
+	fc = fs_context_for_mount(type, flags);
+	if (IS_ERR(fc))
+		return ERR_CAST(fc);
+
+	if (name) {
+		fc->source = kstrdup(name, GFP_KERNEL);
+		if (!fc->source)
+			ret = -ENOMEM;
+	}
+	if (!ret)
+		ret = parse_monolithic_mount_data(fc, data);
+	if (!ret)
+		ret = vfs_get_tree(fc);
+	if (ret) {
+		put_fs_context(fc);
+		return ERR_PTR(ret);
+	}
+	up_write(&fc->root->d_sb->s_umount);
 	mnt = alloc_vfsmnt(name);
-	if (!mnt)
+	if (!mnt) {
+		put_fs_context(fc);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	if (flags & SB_KERNMOUNT)
 		mnt->mnt.mnt_flags = MNT_INTERNAL;
 
-	root = mount_fs(type, flags, name, data);
-	if (IS_ERR(root)) {
-		mnt_free_id(mnt);
-		free_vfsmnt(mnt);
-		return ERR_CAST(root);
-	}
-
-	mnt->mnt.mnt_root = root;
-	mnt->mnt.mnt_sb = root->d_sb;
+	atomic_inc(&fc->root->d_sb->s_active);
+	mnt->mnt.mnt_root = dget(fc->root);
+	mnt->mnt.mnt_sb = fc->root->d_sb;
 	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
 	mnt->mnt_parent = mnt;
 	lock_mount_hash();
-	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
+	list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts);
 	unlock_mount_hash();
+	put_fs_context(fc);
 	return &mnt->mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
diff --git a/fs/super.c b/fs/super.c
index 48e25eba8465..fc3887277ad1 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -35,6 +35,7 @@
 #include <linux/fsnotify.h>
 #include <linux/lockdep.h>
 #include <linux/user_namespace.h>
+#include <linux/fs_context.h>
 #include <uapi/linux/mount.h>
 #include "internal.h"
 
@@ -1241,27 +1242,24 @@ struct dentry *mount_single(struct file_system_type *fs_type,
 }
 EXPORT_SYMBOL(mount_single);
 
-struct dentry *
-mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
+/**
+ * vfs_get_tree - Get the mountable root
+ * @fc: The superblock configuration context.
+ *
+ * The filesystem is invoked to get or create a superblock which can then later
+ * be used for mounting.  The filesystem places a pointer to the root to be
+ * used for mounting in @fc->root.
+ */
+int vfs_get_tree(struct fs_context *fc)
 {
-	struct dentry *root;
 	struct super_block *sb;
-	int error = -ENOMEM;
-	void *sec_opts = NULL;
+	int error;
 
-	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
-		error = security_sb_eat_lsm_opts(data, &sec_opts);
-		if (error)
-			return ERR_PTR(error);
-	}
+	error = legacy_get_tree(fc);
+	if (error < 0)
+		return error;
 
-	root = type->mount(type, flags, name, data);
-	if (IS_ERR(root)) {
-		error = PTR_ERR(root);
-		goto out_free_secdata;
-	}
-	sb = root->d_sb;
-	BUG_ON(!sb);
+	sb = fc->root->d_sb;
 	WARN_ON(!sb->s_bdi);
 
 	/*
@@ -1273,11 +1271,11 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 	smp_wmb();
 	sb->s_flags |= SB_BORN;
 
-	error = security_sb_set_mnt_opts(sb, sec_opts, 0, NULL);
+	error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
 	if (error)
 		goto out_sb;
 
-	if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT))) {
+	if (!(fc->sb_flags & (MS_KERNMOUNT|MS_SUBMOUNT))) {
 		error = security_sb_kern_mount(sb);
 		if (error)
 			goto out_sb;
@@ -1290,18 +1288,16 @@ mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
 	 * violate this rule.
 	 */
 	WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
-		"negative value (%lld)\n", type->name, sb->s_maxbytes);
+		"negative value (%lld)\n", fc->fs_type->name, sb->s_maxbytes);
 
-	up_write(&sb->s_umount);
-	security_free_mnt_opts(&sec_opts);
-	return root;
+	return 0;
 out_sb:
-	dput(root);
+	dput(fc->root);
+	fc->root = NULL;
 	deactivate_locked_super(sb);
-out_free_secdata:
-	security_free_mnt_opts(&sec_opts);
-	return ERR_PTR(error);
+	return error;
 }
+EXPORT_SYMBOL(vfs_get_tree);
 
 /*
  * Setup private BDI for given superblock. It gets automatically cleaned up
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
new file mode 100644
index 000000000000..9805514444c9
--- /dev/null
+++ b/include/linux/fs_context.h
@@ -0,0 +1,64 @@
+/* Filesystem superblock creation and reconfiguration context.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_FS_CONTEXT_H
+#define _LINUX_FS_CONTEXT_H
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/security.h>
+
+struct cred;
+struct dentry;
+struct file_operations;
+struct file_system_type;
+struct net;
+struct user_namespace;
+
+enum fs_context_purpose {
+	FS_CONTEXT_FOR_MOUNT,		/* New superblock for explicit mount */
+};
+
+/*
+ * Filesystem context for holding the parameters used in the creation or
+ * reconfiguration of a superblock.
+ *
+ * Superblock creation fills in ->root whereas reconfiguration begins with this
+ * already set.
+ *
+ * See Documentation/filesystems/mounting.txt
+ */
+struct fs_context {
+	struct file_system_type	*fs_type;
+	void			*fs_private;	/* The filesystem's context */
+	struct dentry		*root;		/* The root and superblock */
+	struct user_namespace	*user_ns;	/* The user namespace for this mount */
+	struct net		*net_ns;	/* The network namespace for this mount */
+	const struct cred	*cred;		/* The mounter's credentials */
+	const char		*source;	/* The source name (eg. dev path) */
+	const char		*subtype;	/* The subtype to set on the superblock */
+	void			*security;	/* Linux S&M options */
+	unsigned int		sb_flags;	/* Proposed superblock flags (SB_*) */
+	unsigned int		sb_flags_mask;	/* Superblock flags that were changed */
+	enum fs_context_purpose	purpose:8;
+	bool			need_free:1;	/* Need to call ops->free() */
+};
+
+/*
+ * fs_context manipulation functions.
+ */
+extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
+						unsigned int sb_flags);
+
+extern int vfs_get_tree(struct fs_context *fc);
+extern void put_fs_context(struct fs_context *fc);
+
+#endif /* _LINUX_FS_CONTEXT_H */
-- 
cgit v1.2.3


From 8f2918898eb5fe25845dde7f4a77bda0e2966e05 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Nov 2018 06:48:34 -0500
Subject: new helpers: vfs_create_mount(), fc_mount()

Create a new helper, vfs_create_mount(), that creates a detached vfsmount
object from an fs_context that has a superblock attached to it.

Almost all uses will be paired with immediately preceding vfs_get_tree();
add a helper for such combination.

Switch vfs_kern_mount() to use this.

NOTE: mild behaviour change; passing NULL as 'device name' to
something like procfs will change /proc/*/mountstats - "device none"
instead on "no device".  That is consistent with /proc/mounts et.al.

[do'h - EXPORT_SYMBOL_GPL slipped in by mistake; removed]
[AV -- remove confused comment from vfs_create_mount()]
[AV -- removed the second argument]

Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c        | 76 +++++++++++++++++++++++++++++++++++----------------
 include/linux/mount.h |  3 ++
 2 files changed, 55 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 3f2fd7a34733..156771f5745a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -941,12 +941,59 @@ static struct mount *skip_mnt_tree(struct mount *p)
 	return p;
 }
 
+/**
+ * vfs_create_mount - Create a mount for a configured superblock
+ * @fc: The configuration context with the superblock attached
+ *
+ * Create a mount to an already configured superblock.  If necessary, the
+ * caller should invoke vfs_get_tree() before calling this.
+ *
+ * Note that this does not attach the mount to anything.
+ */
+struct vfsmount *vfs_create_mount(struct fs_context *fc)
+{
+	struct mount *mnt;
+
+	if (!fc->root)
+		return ERR_PTR(-EINVAL);
+
+	mnt = alloc_vfsmnt(fc->source ?: "none");
+	if (!mnt)
+		return ERR_PTR(-ENOMEM);
+
+	if (fc->sb_flags & SB_KERNMOUNT)
+		mnt->mnt.mnt_flags = MNT_INTERNAL;
+
+	atomic_inc(&fc->root->d_sb->s_active);
+	mnt->mnt.mnt_sb		= fc->root->d_sb;
+	mnt->mnt.mnt_root	= dget(fc->root);
+	mnt->mnt_mountpoint	= mnt->mnt.mnt_root;
+	mnt->mnt_parent		= mnt;
+
+	lock_mount_hash();
+	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
+	unlock_mount_hash();
+	return &mnt->mnt;
+}
+EXPORT_SYMBOL(vfs_create_mount);
+
+struct vfsmount *fc_mount(struct fs_context *fc)
+{
+	int err = vfs_get_tree(fc);
+	if (!err) {
+		up_write(&fc->root->d_sb->s_umount);
+		return vfs_create_mount(fc);
+	}
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(fc_mount);
+
 struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				int flags, const char *name,
 				void *data)
 {
 	struct fs_context *fc;
-	struct mount *mnt;
+	struct vfsmount *mnt;
 	int ret = 0;
 
 	if (!type)
@@ -964,31 +1011,12 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 	if (!ret)
 		ret = parse_monolithic_mount_data(fc, data);
 	if (!ret)
-		ret = vfs_get_tree(fc);
-	if (ret) {
-		put_fs_context(fc);
-		return ERR_PTR(ret);
-	}
-	up_write(&fc->root->d_sb->s_umount);
-	mnt = alloc_vfsmnt(name);
-	if (!mnt) {
-		put_fs_context(fc);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	if (flags & SB_KERNMOUNT)
-		mnt->mnt.mnt_flags = MNT_INTERNAL;
+		mnt = fc_mount(fc);
+	else
+		mnt = ERR_PTR(ret);
 
-	atomic_inc(&fc->root->d_sb->s_active);
-	mnt->mnt.mnt_root = dget(fc->root);
-	mnt->mnt.mnt_sb = fc->root->d_sb;
-	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
-	mnt->mnt_parent = mnt;
-	lock_mount_hash();
-	list_add_tail(&mnt->mnt_instance, &fc->root->d_sb->s_mounts);
-	unlock_mount_hash();
 	put_fs_context(fc);
-	return &mnt->mnt;
+	return mnt;
 }
 EXPORT_SYMBOL_GPL(vfs_kern_mount);
 
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 037eed52164b..9197ddbf35fb 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -21,6 +21,7 @@ struct super_block;
 struct vfsmount;
 struct dentry;
 struct mnt_namespace;
+struct fs_context;
 
 #define MNT_NOSUID	0x01
 #define MNT_NODEV	0x02
@@ -88,6 +89,8 @@ struct path;
 extern struct vfsmount *clone_private_mount(const struct path *path);
 
 struct file_system_type;
+extern struct vfsmount *fc_mount(struct fs_context *fc);
+extern struct vfsmount *vfs_create_mount(struct fs_context *fc);
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
 				      void *data);
-- 
cgit v1.2.3


From a0c9a8b8fd9fd572b0d60276beb2142c8f59f9b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 4 Nov 2018 07:18:51 -0500
Subject: teach vfs_get_tree() to handle subtype, switch do_new_mount() to it

Roll the handling of subtypes into do_new_mount() and vfs_get_tree().  The
former determines any subtype string and hangs it off the fs_context; the
latter applies it.

Make do_new_mount() create, parameterise and commit an fs_context and
create a mount for itself rather than calling vfs_kern_mount().

[AV -- missing kstrdup()]
[AV -- ... and no kstrdup() if we get to setting ->s_submount - we
simply transfer it from fc, leaving NULL behind]
[AV -- constify ->s_submount, while we are at it]

Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c     | 77 ++++++++++++++++++++++++++++++++----------------------
 fs/super.c         |  5 ++++
 include/linux/fs.h |  2 +-
 3 files changed, 52 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 156771f5745a..0354cb6ac2d3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2479,29 +2479,6 @@ out:
 	return err;
 }
 
-static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
-{
-	int err;
-	const char *subtype = strchr(fstype, '.');
-	if (subtype) {
-		subtype++;
-		err = -EINVAL;
-		if (!subtype[0])
-			goto err;
-	} else
-		subtype = "";
-
-	mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
-	err = -ENOMEM;
-	if (!mnt->mnt_sb->s_subtype)
-		goto err;
-	return mnt;
-
- err:
-	mntput(mnt);
-	return ERR_PTR(err);
-}
-
 /*
  * add a mount into a namespace's mount tree
  */
@@ -2557,7 +2534,9 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
 {
 	struct file_system_type *type;
 	struct vfsmount *mnt;
-	int err;
+	struct fs_context *fc;
+	const char *subtype = NULL;
+	int err = 0;
 
 	if (!fstype)
 		return -EINVAL;
@@ -2566,23 +2545,59 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
 	if (!type)
 		return -ENODEV;
 
-	mnt = vfs_kern_mount(type, sb_flags, name, data);
-	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
-	    !mnt->mnt_sb->s_subtype)
-		mnt = fs_set_subtype(mnt, fstype);
+	if (type->fs_flags & FS_HAS_SUBTYPE) {
+		subtype = strchr(fstype, '.');
+		if (subtype) {
+			subtype++;
+			if (!*subtype) {
+				put_filesystem(type);
+				return -EINVAL;
+			}
+		} else {
+			subtype = "";
+		}
+	}
 
+	fc = fs_context_for_mount(type, sb_flags);
 	put_filesystem(type);
-	if (IS_ERR(mnt))
-		return PTR_ERR(mnt);
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
+
+	if (subtype) {
+		fc->subtype = kstrdup(subtype, GFP_KERNEL);
+		if (!fc->subtype)
+			err = -ENOMEM;
+	}
+	if (!err && name) {
+		fc->source = kstrdup(name, GFP_KERNEL);
+		if (!fc->source)
+			err = -ENOMEM;
+	}
+	if (!err)
+		err = parse_monolithic_mount_data(fc, data);
+	if (!err)
+		err = vfs_get_tree(fc);
+	if (err)
+		goto out;
+
+	up_write(&fc->root->d_sb->s_umount);
+	mnt = vfs_create_mount(fc);
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
+		goto out;
+	}
 
 	if (mount_too_revealing(mnt, &mnt_flags)) {
 		mntput(mnt);
-		return -EPERM;
+		err = -EPERM;
+		goto out;
 	}
 
 	err = do_add_mount(real_mount(mnt), path, mnt_flags);
 	if (err)
 		mntput(mnt);
+out:
+	put_fs_context(fc);
 	return err;
 }
 
diff --git a/fs/super.c b/fs/super.c
index fc3887277ad1..b91b6df05b67 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1262,6 +1262,11 @@ int vfs_get_tree(struct fs_context *fc)
 	sb = fc->root->d_sb;
 	WARN_ON(!sb->s_bdi);
 
+	if (fc->subtype && !sb->s_subtype) {
+		sb->s_subtype = fc->subtype;
+		fc->subtype = NULL;
+	}
+
 	/*
 	 * Write barrier is for super_cache_count(). We place it before setting
 	 * SB_BORN as the data dependency between the two functions is the
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 811c77743dad..36fff12ab890 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1447,7 +1447,7 @@ struct super_block {
 	 * Filesystem subtype.  If non-empty the filesystem type field
 	 * in /proc/mounts will be "type.subtype"
 	 */
-	char *s_subtype;
+	const char *s_subtype;
 
 	const struct dentry_operations *s_d_op; /* default d_op for dentries */
 
-- 
cgit v1.2.3


From 8d0347f6c3a9d4953ddd636a31c6584da082e084 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sun, 4 Nov 2018 09:28:36 -0500
Subject: convert do_remount_sb() to fs_context

Replace do_remount_sb() with a function, reconfigure_super(), that's
fs_context aware.  The fs_context is expected to be parameterised already
and have ->root pointing to the superblock to be reconfigured.

A legacy wrapper is provided that is intended to be called from the
fs_context ops when those appear, but for now is called directly from
reconfigure_super().  This wrapper invokes the ->remount_fs() superblock op
for the moment.  It is intended that the remount_fs() op will be phased
out.

The fs_context->purpose is set to FS_CONTEXT_FOR_RECONFIGURE to indicate
that the context is being used for reconfiguration.

do_umount_root() is provided to consolidate remount-to-R/O for umount and
emergency remount by creating a context and invoking reconfiguration.

do_remount(), do_umount() and do_emergency_remount_callback() are switched
to use the new process.

[AV -- fold UMOUNT and EMERGENCY_REMOUNT in; fixes the
umount / bug, gets rid of pointless complexity]
[AV -- set ->net_ns in all cases; nfs remount will need that]
[AV -- shift security_sb_remount() call into reconfigure_super(); the callers
that didn't do security_sb_remount() have NULL fc->security anyway, so it's
a no-op for them]

Signed-off-by: David Howells <dhowells@redhat.com>
Co-developed-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_context.c            |  35 ++++++++++++++-
 fs/internal.h              |   3 +-
 fs/namespace.c             |  61 ++++++++++++++++----------
 fs/super.c                 | 107 +++++++++++++++++++++++++++++++--------------
 include/linux/fs.h         |   1 +
 include/linux/fs_context.h |   4 ++
 6 files changed, 152 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 857cd46a687b..5e2c3aba1dd8 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -69,6 +69,13 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 	case FS_CONTEXT_FOR_MOUNT:
 		fc->user_ns = get_user_ns(fc->cred->user_ns);
 		break;
+	case FS_CONTEXT_FOR_RECONFIGURE:
+		/* We don't pin any namespaces as the superblock's
+		 * subscriptions cannot be changed at this point.
+		 */
+		atomic_inc(&reference->d_sb->s_active);
+		fc->root = dget(reference);
+		break;
 	}
 
 	ret = legacy_init_fs_context(fc);
@@ -90,6 +97,15 @@ struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
 }
 EXPORT_SYMBOL(fs_context_for_mount);
 
+struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
+					unsigned int sb_flags,
+					unsigned int sb_flags_mask)
+{
+	return alloc_fs_context(dentry->d_sb->s_type, dentry, sb_flags,
+				sb_flags_mask, FS_CONTEXT_FOR_RECONFIGURE);
+}
+EXPORT_SYMBOL(fs_context_for_reconfigure);
+
 void fc_drop_locked(struct fs_context *fc)
 {
 	struct super_block *sb = fc->root->d_sb;
@@ -99,6 +115,7 @@ void fc_drop_locked(struct fs_context *fc)
 }
 
 static void legacy_fs_context_free(struct fs_context *fc);
+
 /**
  * put_fs_context - Dispose of a superblock configuration context.
  * @fc: The context to dispose of.
@@ -118,8 +135,7 @@ void put_fs_context(struct fs_context *fc)
 		legacy_fs_context_free(fc);
 
 	security_free_mnt_opts(&fc->security);
-	if (fc->net_ns)
-		put_net(fc->net_ns);
+	put_net(fc->net_ns);
 	put_user_ns(fc->user_ns);
 	put_cred(fc->cred);
 	kfree(fc->subtype);
@@ -172,6 +188,21 @@ int legacy_get_tree(struct fs_context *fc)
 	return 0;
 }
 
+/*
+ * Handle remount.
+ */
+int legacy_reconfigure(struct fs_context *fc)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	struct super_block *sb = fc->root->d_sb;
+
+	if (!sb->s_op->remount_fs)
+		return 0;
+
+	return sb->s_op->remount_fs(sb, &fc->sb_flags,
+				    ctx ? ctx->legacy_data : NULL);
+}
+
 /*
  * Initialise a legacy context for a filesystem that doesn't support
  * fs_context.
diff --git a/fs/internal.h b/fs/internal.h
index 6af26d897034..016a5b8dd305 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -56,6 +56,7 @@ extern void __init chrdev_init(void);
  * fs_context.c
  */
 extern int legacy_get_tree(struct fs_context *fc);
+extern int legacy_reconfigure(struct fs_context *fc);
 extern int parse_monolithic_mount_data(struct fs_context *, void *);
 extern void fc_drop_locked(struct fs_context *);
 
@@ -107,7 +108,7 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
 /*
  * super.c
  */
-extern int do_remount_sb(struct super_block *, int, void *, int);
+extern int reconfigure_super(struct fs_context *);
 extern bool trylock_super(struct super_block *sb);
 extern struct super_block *user_get_super(dev_t);
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 750500c6c33d..931228d8518a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1489,6 +1489,29 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 
 static void shrink_submounts(struct mount *mnt);
 
+static int do_umount_root(struct super_block *sb)
+{
+	int ret = 0;
+
+	down_write(&sb->s_umount);
+	if (!sb_rdonly(sb)) {
+		struct fs_context *fc;
+
+		fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
+						SB_RDONLY);
+		if (IS_ERR(fc)) {
+			ret = PTR_ERR(fc);
+		} else {
+			ret = parse_monolithic_mount_data(fc, NULL);
+			if (!ret)
+				ret = reconfigure_super(fc);
+			put_fs_context(fc);
+		}
+	}
+	up_write(&sb->s_umount);
+	return ret;
+}
+
 static int do_umount(struct mount *mnt, int flags)
 {
 	struct super_block *sb = mnt->mnt.mnt_sb;
@@ -1554,11 +1577,7 @@ static int do_umount(struct mount *mnt, int flags)
 		 */
 		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
 			return -EPERM;
-		down_write(&sb->s_umount);
-		if (!sb_rdonly(sb))
-			retval = do_remount_sb(sb, SB_RDONLY, NULL, 0);
-		up_write(&sb->s_umount);
-		return retval;
+		return do_umount_root(sb);
 	}
 
 	namespace_lock();
@@ -2367,7 +2386,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	int err;
 	struct super_block *sb = path->mnt->mnt_sb;
 	struct mount *mnt = real_mount(path->mnt);
-	void *sec_opts = NULL;
+	struct fs_context *fc;
 
 	if (!check_mnt(mnt))
 		return -EINVAL;
@@ -2378,24 +2397,22 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
 	if (!can_change_locked_flags(mnt, mnt_flags))
 		return -EPERM;
 
-	if (data && !(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)) {
-		err = security_sb_eat_lsm_opts(data, &sec_opts);
-		if (err)
-			return err;
-	}
-	err = security_sb_remount(sb, sec_opts);
-	security_free_mnt_opts(&sec_opts);
-	if (err)
-		return err;
+	fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
 
-	down_write(&sb->s_umount);
-	err = -EPERM;
-	if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
-		err = do_remount_sb(sb, sb_flags, data, 0);
-		if (!err)
-			set_mount_attributes(mnt, mnt_flags);
+	err = parse_monolithic_mount_data(fc, data);
+	if (!err) {
+		down_write(&sb->s_umount);
+		err = -EPERM;
+		if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+			err = reconfigure_super(fc);
+			if (!err)
+				set_mount_attributes(mnt, mnt_flags);
+		}
+		up_write(&sb->s_umount);
 	}
-	up_write(&sb->s_umount);
+	put_fs_context(fc);
 	return err;
 }
 
diff --git a/fs/super.c b/fs/super.c
index 11e2a6cb3baf..50553233dd15 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -836,28 +836,35 @@ rescan:
 }
 
 /**
- *	do_remount_sb - asks filesystem to change mount options.
- *	@sb:	superblock in question
- *	@sb_flags: revised superblock flags
- *	@data:	the rest of options
- *      @force: whether or not to force the change
+ * reconfigure_super - asks filesystem to change superblock parameters
+ * @fc: The superblock and configuration
  *
- *	Alters the mount options of a mounted file system.
+ * Alters the configuration parameters of a live superblock.
  */
-int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
+int reconfigure_super(struct fs_context *fc)
 {
+	struct super_block *sb = fc->root->d_sb;
 	int retval;
-	int remount_ro;
+	bool remount_ro = false;
+	bool force = fc->sb_flags & SB_FORCE;
 
+	if (fc->sb_flags_mask & ~MS_RMT_MASK)
+		return -EINVAL;
 	if (sb->s_writers.frozen != SB_UNFROZEN)
 		return -EBUSY;
 
+	retval = security_sb_remount(sb, fc->security);
+	if (retval)
+		return retval;
+
+	if (fc->sb_flags_mask & SB_RDONLY) {
 #ifdef CONFIG_BLOCK
-	if (!(sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
-		return -EACCES;
+		if (!(fc->sb_flags & SB_RDONLY) && bdev_read_only(sb->s_bdev))
+			return -EACCES;
 #endif
 
-	remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb);
+		remount_ro = (fc->sb_flags & SB_RDONLY) && !sb_rdonly(sb);
+	}
 
 	if (remount_ro) {
 		if (!hlist_empty(&sb->s_pins)) {
@@ -868,13 +875,14 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
 				return 0;
 			if (sb->s_writers.frozen != SB_UNFROZEN)
 				return -EBUSY;
-			remount_ro = (sb_flags & SB_RDONLY) && !sb_rdonly(sb);
+			remount_ro = !sb_rdonly(sb);
 		}
 	}
 	shrink_dcache_sb(sb);
 
-	/* If we are remounting RDONLY and current sb is read/write,
-	   make sure there are no rw files opened */
+	/* If we are reconfiguring to RDONLY and current sb is read/write,
+	 * make sure there are no files open for writing.
+	 */
 	if (remount_ro) {
 		if (force) {
 			sb->s_readonly_remount = 1;
@@ -886,17 +894,17 @@ int do_remount_sb(struct super_block *sb, int sb_flags, void *data, int force)
 		}
 	}
 
-	if (sb->s_op->remount_fs) {
-		retval = sb->s_op->remount_fs(sb, &sb_flags, data);
-		if (retval) {
-			if (!force)
-				goto cancel_readonly;
-			/* If forced remount, go ahead despite any errors */
-			WARN(1, "forced remount of a %s fs returned %i\n",
-			     sb->s_type->name, retval);
-		}
+	retval = legacy_reconfigure(fc);
+	if (retval) {
+		if (!force)
+			goto cancel_readonly;
+		/* If forced remount, go ahead despite any errors */
+		WARN(1, "forced remount of a %s fs returned %i\n",
+		     sb->s_type->name, retval);
 	}
-	sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (sb_flags & MS_RMT_MASK);
+
+	WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
+				 (fc->sb_flags & fc->sb_flags_mask)));
 	/* Needs to be ordered wrt mnt_is_readonly() */
 	smp_wmb();
 	sb->s_readonly_remount = 0;
@@ -923,10 +931,15 @@ static void do_emergency_remount_callback(struct super_block *sb)
 	down_write(&sb->s_umount);
 	if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
 	    !sb_rdonly(sb)) {
-		/*
-		 * What lock protects sb->s_flags??
-		 */
-		do_remount_sb(sb, SB_RDONLY, NULL, 1);
+		struct fs_context *fc;
+
+		fc = fs_context_for_reconfigure(sb->s_root,
+					SB_RDONLY | SB_FORCE, SB_RDONLY);
+		if (!IS_ERR(fc)) {
+			if (parse_monolithic_mount_data(fc, NULL) == 0)
+				(void)reconfigure_super(fc);
+			put_fs_context(fc);
+		}
 	}
 	up_write(&sb->s_umount);
 }
@@ -1213,6 +1226,31 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
 }
 EXPORT_SYMBOL(mount_nodev);
 
+static int reconfigure_single(struct super_block *s,
+			      int flags, void *data)
+{
+	struct fs_context *fc;
+	int ret;
+
+	/* The caller really need to be passing fc down into mount_single(),
+	 * then a chunk of this can be removed.  [Bollocks -- AV]
+	 * Better yet, reconfiguration shouldn't happen, but rather the second
+	 * mount should be rejected if the parameters are not compatible.
+	 */
+	fc = fs_context_for_reconfigure(s->s_root, flags, MS_RMT_MASK);
+	if (IS_ERR(fc))
+		return PTR_ERR(fc);
+
+	ret = parse_monolithic_mount_data(fc, data);
+	if (ret < 0)
+		goto out;
+
+	ret = reconfigure_super(fc);
+out:
+	put_fs_context(fc);
+	return ret;
+}
+
 static int compare_single(struct super_block *s, void *p)
 {
 	return 1;
@@ -1230,13 +1268,14 @@ struct dentry *mount_single(struct file_system_type *fs_type,
 		return ERR_CAST(s);
 	if (!s->s_root) {
 		error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
-		if (error) {
-			deactivate_locked_super(s);
-			return ERR_PTR(error);
-		}
-		s->s_flags |= SB_ACTIVE;
+		if (!error)
+			s->s_flags |= SB_ACTIVE;
 	} else {
-		do_remount_sb(s, flags, data, 0);
+		error = reconfigure_single(s, flags, data);
+	}
+	if (unlikely(error)) {
+		deactivate_locked_super(s);
+		return ERR_PTR(error);
 	}
 	return dget(s->s_root);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 36fff12ab890..c65d02c5c512 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1337,6 +1337,7 @@ extern int send_sigurg(struct fown_struct *fown);
 
 /* These sb flags are internal to the kernel */
 #define SB_SUBMOUNT     (1<<26)
+#define SB_FORCE    	(1<<27)
 #define SB_NOSEC	(1<<28)
 #define SB_BORN		(1<<29)
 #define SB_ACTIVE	(1<<30)
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 9805514444c9..98772f882a3e 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -25,6 +25,7 @@ struct user_namespace;
 
 enum fs_context_purpose {
 	FS_CONTEXT_FOR_MOUNT,		/* New superblock for explicit mount */
+	FS_CONTEXT_FOR_RECONFIGURE,	/* Superblock reconfiguration (remount) */
 };
 
 /*
@@ -57,6 +58,9 @@ struct fs_context {
  */
 extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
 						unsigned int sb_flags);
+extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
+						unsigned int sb_flags,
+						unsigned int sb_flags_mask);
 
 extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
-- 
cgit v1.2.3


From e1a91586d5da6f879b6dd385a2e7227bf1653570 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Dec 2018 16:25:31 -0500
Subject: fs_context flavour for submounts

This is an eventual replacement for vfs_submount() uses.  Unlike the
"mount" and "remount" cases, the users of that thing are not in VFS -
they are buried in various ->d_automount() instances and rather than
converting them all at once we introduce the (thankfully small and
simple) infrastructure here and deal with the prospective users in
afs, nfs, etc. parts of the series.

Here we just introduce a new constructor (fs_context_for_submount())
along with the corresponding enum constant to be put into fc->purpose
for those.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_context.c            | 10 ++++++++++
 include/linux/fs_context.h |  3 +++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 5e2c3aba1dd8..2bd652b6e848 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -69,6 +69,9 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 	case FS_CONTEXT_FOR_MOUNT:
 		fc->user_ns = get_user_ns(fc->cred->user_ns);
 		break;
+	case FS_CONTEXT_FOR_SUBMOUNT:
+		fc->user_ns = get_user_ns(reference->d_sb->s_user_ns);
+		break;
 	case FS_CONTEXT_FOR_RECONFIGURE:
 		/* We don't pin any namespaces as the superblock's
 		 * subscriptions cannot be changed at this point.
@@ -106,6 +109,13 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 }
 EXPORT_SYMBOL(fs_context_for_reconfigure);
 
+struct fs_context *fs_context_for_submount(struct file_system_type *type,
+					   struct dentry *reference)
+{
+	return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+}
+EXPORT_SYMBOL(fs_context_for_submount);
+
 void fc_drop_locked(struct fs_context *fc)
 {
 	struct super_block *sb = fc->root->d_sb;
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 98772f882a3e..7feb018c7a9e 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -25,6 +25,7 @@ struct user_namespace;
 
 enum fs_context_purpose {
 	FS_CONTEXT_FOR_MOUNT,		/* New superblock for explicit mount */
+	FS_CONTEXT_FOR_SUBMOUNT,	/* New superblock for automatic submount */
 	FS_CONTEXT_FOR_RECONFIGURE,	/* Superblock reconfiguration (remount) */
 };
 
@@ -61,6 +62,8 @@ extern struct fs_context *fs_context_for_mount(struct file_system_type *fs_type,
 extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 						unsigned int sb_flags,
 						unsigned int sb_flags_mask);
+extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type,
+						struct dentry *reference);
 
 extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
-- 
cgit v1.2.3


From f3a09c92018a91ad0981146a4ac59414f814d801 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Dec 2018 18:55:56 -0500
Subject: introduce fs_context methods

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_context.c            | 28 ++++++++++++++++++++++------
 fs/internal.h              |  2 --
 fs/super.c                 | 36 ++++++++++++++++++++++++++++--------
 include/linux/fs.h         |  2 ++
 include/linux/fs_context.h | 13 +++++++++++++
 5 files changed, 65 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 2bd652b6e848..825d1b2c8807 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -51,6 +51,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 				      unsigned int sb_flags_mask,
 				      enum fs_context_purpose purpose)
 {
+	int (*init_fs_context)(struct fs_context *);
 	struct fs_context *fc;
 	int ret = -ENOMEM;
 
@@ -81,7 +82,12 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type,
 		break;
 	}
 
-	ret = legacy_init_fs_context(fc);
+	/* TODO: Make all filesystems support this unconditionally */
+	init_fs_context = fc->fs_type->init_fs_context;
+	if (!init_fs_context)
+		init_fs_context = legacy_init_fs_context;
+
+	ret = init_fs_context(fc);
 	if (ret < 0)
 		goto err_fc;
 	fc->need_free = true;
@@ -141,8 +147,8 @@ void put_fs_context(struct fs_context *fc)
 		deactivate_super(sb);
 	}
 
-	if (fc->need_free)
-		legacy_fs_context_free(fc);
+	if (fc->need_free && fc->ops && fc->ops->free)
+		fc->ops->free(fc);
 
 	security_free_mnt_opts(&fc->security);
 	put_net(fc->net_ns);
@@ -180,7 +186,7 @@ static int legacy_parse_monolithic(struct fs_context *fc, void *data)
 /*
  * Get a mountable root with the legacy mount command.
  */
-int legacy_get_tree(struct fs_context *fc)
+static int legacy_get_tree(struct fs_context *fc)
 {
 	struct legacy_fs_context *ctx = fc->fs_private;
 	struct super_block *sb;
@@ -201,7 +207,7 @@ int legacy_get_tree(struct fs_context *fc)
 /*
  * Handle remount.
  */
-int legacy_reconfigure(struct fs_context *fc)
+static int legacy_reconfigure(struct fs_context *fc)
 {
 	struct legacy_fs_context *ctx = fc->fs_private;
 	struct super_block *sb = fc->root->d_sb;
@@ -213,6 +219,13 @@ int legacy_reconfigure(struct fs_context *fc)
 				    ctx ? ctx->legacy_data : NULL);
 }
 
+const struct fs_context_operations legacy_fs_context_ops = {
+	.free			= legacy_fs_context_free,
+	.parse_monolithic	= legacy_parse_monolithic,
+	.get_tree		= legacy_get_tree,
+	.reconfigure		= legacy_reconfigure,
+};
+
 /*
  * Initialise a legacy context for a filesystem that doesn't support
  * fs_context.
@@ -222,10 +235,13 @@ static int legacy_init_fs_context(struct fs_context *fc)
 	fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL);
 	if (!fc->fs_private)
 		return -ENOMEM;
+	fc->ops = &legacy_fs_context_ops;
 	return 0;
 }
 
 int parse_monolithic_mount_data(struct fs_context *fc, void *data)
 {
-	return legacy_parse_monolithic(fc, data);
+	int (*monolithic_mount_data)(struct fs_context *, void *);
+	monolithic_mount_data = fc->ops->parse_monolithic;
+	return monolithic_mount_data(fc, data);
 }
diff --git a/fs/internal.h b/fs/internal.h
index 016a5b8dd305..8f8d07cc433f 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -55,8 +55,6 @@ extern void __init chrdev_init(void);
 /*
  * fs_context.c
  */
-extern int legacy_get_tree(struct fs_context *fc);
-extern int legacy_reconfigure(struct fs_context *fc);
 extern int parse_monolithic_mount_data(struct fs_context *, void *);
 extern void fc_drop_locked(struct fs_context *);
 
diff --git a/fs/super.c b/fs/super.c
index 50553233dd15..76b3181c782d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -894,13 +894,15 @@ int reconfigure_super(struct fs_context *fc)
 		}
 	}
 
-	retval = legacy_reconfigure(fc);
-	if (retval) {
-		if (!force)
-			goto cancel_readonly;
-		/* If forced remount, go ahead despite any errors */
-		WARN(1, "forced remount of a %s fs returned %i\n",
-		     sb->s_type->name, retval);
+	if (fc->ops->reconfigure) {
+		retval = fc->ops->reconfigure(fc);
+		if (retval) {
+			if (!force)
+				goto cancel_readonly;
+			/* If forced remount, go ahead despite any errors */
+			WARN(1, "forced remount of a %s fs returned %i\n",
+			     sb->s_type->name, retval);
+		}
 	}
 
 	WRITE_ONCE(sb->s_flags, ((sb->s_flags & ~fc->sb_flags_mask) |
@@ -1294,10 +1296,28 @@ int vfs_get_tree(struct fs_context *fc)
 	struct super_block *sb;
 	int error;
 
-	error = legacy_get_tree(fc);
+	if (fc->fs_type->fs_flags & FS_REQUIRES_DEV && !fc->source)
+		return -ENOENT;
+
+	if (fc->root)
+		return -EBUSY;
+
+	/* Get the mountable root in fc->root, with a ref on the root and a ref
+	 * on the superblock.
+	 */
+	error = fc->ops->get_tree(fc);
 	if (error < 0)
 		return error;
 
+	if (!fc->root) {
+		pr_err("Filesystem %s get_tree() didn't set fc->root\n",
+		       fc->fs_type->name);
+		/* We don't know what the locking state of the superblock is -
+		 * if there is a superblock.
+		 */
+		BUG();
+	}
+
 	sb = fc->root->d_sb;
 	WARN_ON(!sb->s_bdi);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c65d02c5c512..8d578a9e1e8c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -61,6 +61,7 @@ struct workqueue_struct;
 struct iov_iter;
 struct fscrypt_info;
 struct fscrypt_operations;
+struct fs_context;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -2173,6 +2174,7 @@ struct file_system_type {
 #define FS_HAS_SUBTYPE		4
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
+	int (*init_fs_context)(struct fs_context *);
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
 	void (*kill_sb) (struct super_block *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7feb018c7a9e..087c12954360 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -20,8 +20,13 @@ struct cred;
 struct dentry;
 struct file_operations;
 struct file_system_type;
+struct mnt_namespace;
 struct net;
+struct pid_namespace;
+struct super_block;
 struct user_namespace;
+struct vfsmount;
+struct path;
 
 enum fs_context_purpose {
 	FS_CONTEXT_FOR_MOUNT,		/* New superblock for explicit mount */
@@ -39,6 +44,7 @@ enum fs_context_purpose {
  * See Documentation/filesystems/mounting.txt
  */
 struct fs_context {
+	const struct fs_context_operations *ops;
 	struct file_system_type	*fs_type;
 	void			*fs_private;	/* The filesystem's context */
 	struct dentry		*root;		/* The root and superblock */
@@ -54,6 +60,13 @@ struct fs_context {
 	bool			need_free:1;	/* Need to call ops->free() */
 };
 
+struct fs_context_operations {
+	void (*free)(struct fs_context *fc);
+	int (*parse_monolithic)(struct fs_context *fc, void *data);
+	int (*get_tree)(struct fs_context *fc);
+	int (*reconfigure)(struct fs_context *fc);
+};
+
 /*
  * fs_context manipulation functions.
  */
-- 
cgit v1.2.3


From c6b82263f9c6e745eb4c5dfc2578d147c4cd7604 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:23 +0000
Subject: vfs: Introduce logging functions

Introduce a set of logging functions through which informational messages,
warnings and error messages incurred by the mount procedure can be logged
and, in a future patch, passed to userspace instead by way of the
filesystem configuration context file descriptor.

There are four functions:

 (1) infof(const char *fmt, ...);

     Logs an informational message.

 (2) warnf(const char *fmt, ...);

     Logs a warning message.

 (3) errorf(const char *fmt, ...);

     Logs an error message.

 (4) invalf(const char *fmt, ...);

     As errof(), but returns -EINVAL so can be used on a return statement.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs_context.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 087c12954360..d208cc40b868 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -81,4 +81,46 @@ extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_ty
 extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 
+#define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__)
+
+/**
+ * infof - Store supplementary informational message
+ * @fc: The context in which to log the informational message
+ * @fmt: The format string
+ *
+ * Store the supplementary informational message for the process if the process
+ * has enabled the facility.
+ */
+#define infof(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+
+/**
+ * warnf - Store supplementary warning message
+ * @fc: The context in which to log the error message
+ * @fmt: The format string
+ *
+ * Store the supplementary warning message for the process if the process has
+ * enabled the facility.
+ */
+#define warnf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+
+/**
+ * errorf - Store supplementary error message
+ * @fc: The context in which to log the error message
+ * @fmt: The format string
+ *
+ * Store the supplementary error message for the process if the process has
+ * enabled the facility.
+ */
+#define errorf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+
+/**
+ * invalf - Store supplementary invalid argument error message
+ * @fc: The context in which to log the error message
+ * @fmt: The format string
+ *
+ * Store the supplementary error message for the process if the process has
+ * enabled the facility and return -EINVAL.
+ */
+#define invalf(fc, fmt, ...) ({	errorf(fc, fmt, ## __VA_ARGS__); -EINVAL; })
+
 #endif /* _LINUX_FS_CONTEXT_H */
-- 
cgit v1.2.3


From 459cc69fa4c17caf21de596693d8a07170820a58 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 30 Jan 2019 12:49:11 +0200
Subject: RDMA: Provide safe ib_alloc_device() function

All callers to ib_alloc_device() provide a larger size than struct
ib_device and rely on the fact that struct ib_device is embedded in their
driver specific structure as the first member.

Provide a safer variant of ib_alloc_device() that checks and enforces this
approach to make sure the drivers are using it right.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c               | 6 +++---
 drivers/infiniband/hw/bnxt_re/main.c           | 2 +-
 drivers/infiniband/hw/cxgb3/iwch.c             | 2 +-
 drivers/infiniband/hw/cxgb4/device.c           | 2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c     | 2 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c     | 2 +-
 drivers/infiniband/hw/i40iw/i40iw_verbs.c      | 2 +-
 drivers/infiniband/hw/mlx4/main.c              | 2 +-
 drivers/infiniband/hw/mlx5/ib_rep.c            | 2 +-
 drivers/infiniband/hw/mlx5/main.c              | 2 +-
 drivers/infiniband/hw/mthca/mthca_main.c       | 2 +-
 drivers/infiniband/hw/nes/nes_verbs.c          | 2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c     | 2 +-
 drivers/infiniband/hw/qedr/main.c              | 2 +-
 drivers/infiniband/hw/usnic/usnic_ib_main.c    | 2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 2 +-
 drivers/infiniband/sw/rdmavt/vt.c              | 2 +-
 drivers/infiniband/sw/rxe/rxe_net.c            | 2 +-
 include/rdma/ib_verbs.h                        | 8 +++++++-
 19 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 200431c540f2..b511cfa00bdb 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -268,7 +268,7 @@ static struct class ib_class = {
 };
 
 /**
- * ib_alloc_device - allocate an IB device struct
+ * _ib_alloc_device - allocate an IB device struct
  * @size:size of structure to allocate
  *
  * Low-level drivers should use ib_alloc_device() to allocate &struct
@@ -277,7 +277,7 @@ static struct class ib_class = {
  * ib_dealloc_device() must be used to free structures allocated with
  * ib_alloc_device().
  */
-struct ib_device *ib_alloc_device(size_t size)
+struct ib_device *_ib_alloc_device(size_t size)
 {
 	struct ib_device *device;
 
@@ -303,7 +303,7 @@ struct ib_device *ib_alloc_device(size_t size)
 
 	return device;
 }
-EXPORT_SYMBOL(ib_alloc_device);
+EXPORT_SYMBOL(_ib_alloc_device);
 
 /**
  * ib_dealloc_device - free an IB device struct
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 16eecfa5882c..08777506d0b1 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -688,7 +688,7 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
 	struct bnxt_re_dev *rdev;
 
 	/* Allocate bnxt_re_dev instance here */
-	rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
+	rdev = ib_alloc_device(bnxt_re_dev, ibdev);
 	if (!rdev) {
 		dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
 			ROCE_DRV_MODULE_NAME);
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 591de319c178..fb03bc492ef7 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -146,7 +146,7 @@ static void open_rnic_dev(struct t3cdev *tdev)
 
 	pr_debug("%s t3cdev %p\n", __func__,  tdev);
 	pr_info_once("Chelsio T3 RDMA Driver - version %s\n", DRV_VERSION);
-	rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp));
+	rnicp = ib_alloc_device(iwch_dev, ibdev);
 	if (!rnicp) {
 		pr_err("Cannot allocate ib device\n");
 		return;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 9c10fff6dcfb..4b4e2464b705 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -966,7 +966,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
 		pr_info("%s: On-Chip Queues not supported on this device\n",
 			pci_name(infop->pdev));
 
-	devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
+	devp = ib_alloc_device(c4iw_dev, ibdev);
 	if (!devp) {
 		pr_err("Cannot allocate ib device\n");
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index b74c742b000c..fa08c22aad66 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -5002,7 +5002,7 @@ static int hns_roce_probe(struct platform_device *pdev)
 	struct hns_roce_dev *hr_dev;
 	struct device *dev = &pdev->dev;
 
-	hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
+	hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
 	if (!hr_dev)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 5c483b437bdd..48a5d6548cd4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -6059,7 +6059,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
 	struct hns_roce_dev *hr_dev;
 	int ret;
 
-	hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev));
+	hr_dev = ib_alloc_device(hns_roce_dev, ib_dev);
 	if (!hr_dev)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 12b31a8440be..d4ab46dd9e6c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2762,7 +2762,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
 	struct net_device *netdev = iwdev->netdev;
 	struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context;
 
-	iwibdev = (struct i40iw_ib_device *)ib_alloc_device(sizeof(*iwibdev));
+	iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
 	if (!iwibdev) {
 		i40iw_pr_err("iwdev == NULL\n");
 		return NULL;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index dc2ffd293a11..d66002a31000 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2635,7 +2635,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	if (num_ports == 0)
 		return NULL;
 
-	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
+	ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
 	if (!ibdev) {
 		dev_err(&dev->persist->pdev->dev,
 			"Device struct alloc failed\n");
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 46a9ddc8ca56..6d7b8bad4b61 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -70,7 +70,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
 	struct mlx5_ib_dev *ibdev;
 
-	ibdev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*ibdev));
+	ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev);
 	if (!ibdev)
 		return -ENOMEM;
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 96bf1b2f9dd7..8161acda64e6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -6508,7 +6508,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
 		return mlx5_ib_add_slave_port(mdev);
 
-	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
+	dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 92c49bff22bc..fe9654a7af71 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -961,7 +961,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 	/* We can handle large RDMA requests, so allow larger segments. */
 	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
 
-	mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
+	mdev = ib_alloc_device(mthca_dev, ib_dev);
 	if (!mdev) {
 		dev_err(&pdev->dev, "Device struct alloc failed, "
 			"aborting.\n");
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 034156f7e9ed..6eb991d40035 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3669,7 +3669,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	struct nes_vnic *nesvnic = netdev_priv(netdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
 
-	nesibdev = (struct nes_ib_device *)ib_alloc_device(sizeof(struct nes_ib_device));
+	nesibdev = ib_alloc_device(nes_ib_device, ibdev);
 	if (nesibdev == NULL) {
 		return NULL;
 	}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index b0491b9ecfe4..88970a6bb555 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -297,7 +297,7 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 	u8 lstate = 0;
 	struct ocrdma_dev *dev;
 
-	dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
+	dev = ib_alloc_device(ocrdma_dev, ibdev);
 	if (!dev) {
 		pr_err("Unable to allocate ib device\n");
 		return NULL;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index f85e72b65a10..878e9e23652b 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -853,7 +853,7 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
 	struct qedr_dev *dev;
 	int rc = 0;
 
-	dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
+	dev = ib_alloc_device(qedr_dev, ibdev);
 	if (!dev) {
 		pr_err("Unable to allocate ib device\n");
 		return NULL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 3201dd1899c7..0c0a288cb585 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -372,7 +372,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
 	usnic_dbg("\n");
 	netdev = pci_get_drvdata(dev);
 
-	us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+	us_ibdev = ib_alloc_device(usnic_ib_dev, ib_dev);
 	if (!us_ibdev) {
 		usnic_err("Device %s context alloc failed\n",
 				netdev_name(pci_get_drvdata(dev)));
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index a5f02276d903..e582beaf9430 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -795,7 +795,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
 
 	/* Allocate zero-out device */
-	dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev));
+	dev = ib_alloc_device(pvrdma_dev, ib_dev);
 	if (!dev) {
 		dev_err(&pdev->dev, "failed to allocate IB device\n");
 		return -ENOMEM;
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 7de7389d0235..b3f0c5578925 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -91,7 +91,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports)
 {
 	struct rvt_dev_info *rdi;
 
-	rdi = (struct rvt_dev_info *)ib_alloc_device(size);
+	rdi = container_of(_ib_alloc_device(size), struct rvt_dev_info, ibdev);
 	if (!rdi)
 		return rdi;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8fd03ae20efc..19f3c69916b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -555,7 +555,7 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
 	int err;
 	struct rxe_dev *rxe = NULL;
 
-	rxe = (struct rxe_dev *)ib_alloc_device(sizeof(*rxe));
+	rxe = ib_alloc_device(rxe_dev, ib_dev);
 	if (!rxe)
 		return NULL;
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 71ea144ec823..a1a1e710642c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2621,7 +2621,13 @@ struct ib_client {
 	struct list_head list;
 };
 
-struct ib_device *ib_alloc_device(size_t size);
+struct ib_device *_ib_alloc_device(size_t size);
+#define ib_alloc_device(drv_struct, member)                                    \
+	container_of(_ib_alloc_device(sizeof(struct drv_struct) +              \
+				      BUILD_BUG_ON_ZERO(offsetof(              \
+					      struct drv_struct, member))),    \
+		     struct drv_struct, member)
+
 void ib_dealloc_device(struct ib_device *device);
 
 void ib_get_device_fw_str(struct ib_device *device, char *str);
-- 
cgit v1.2.3


From b7bb367afa4bf9de60830683305c63030c3e581d Mon Sep 17 00:00:00 2001
From: Jonas Bonn <jonas@norrbonn.se>
Date: Wed, 30 Jan 2019 09:40:04 +0100
Subject: spi: support inter-word delay requirement for devices

Some devices are slow and cannot keep up with the SPI bus and therefore
require a short delay between words of the SPI transfer.

The example of this that I'm looking at is a SAMA5D2 with a minimum SPI
clock of 400kHz talking to an AVR-based SPI slave.  The AVR cannot put
bytes on the bus fast enough to keep up with the SoC's SPI controller
even at the lowest bus speed.

This patch introduces the ability to specify a required inter-word
delay for SPI devices.  It is up to the controller driver to configure
itself accordingly in order to introduce the requested delay.

Note that, for spi_transfer, there is already a field word_delay that
provides similar functionality.  This field, however, is specified in
clock cycles (and worse, SPI controller cycles, not SCK cycles); that
makes this value dependent on the master clock instead of the device
clock for which the delay is intended to provide some relief.  This
patch leaves this old word_delay in place and provides a time-based
word_delay_us alongside it; the new field fits in the struct padding
so struct size is constant.  There is only one in-kernel user of the
word_delay field and presumably that driver could be reworked to use
the time-based value instead.

The time-based delay is limited to 8 bits as these delays are intended
to be short.  The SAMA5D2 that I've tested this on limits delays to a
maximum of ~100us, which is already many word-transfer periods even at
the minimum transfer speed supported by the controller.

Signed-off-by: Jonas Bonn <jonas@norrbonn.se>
CC: Mark Brown <broonie@kernel.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Mark Rutland <mark.rutland@arm.com>
CC: linux-spi@vger.kernel.org
CC: devicetree@vger.kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 5 +++++
 include/linux/spi/spi.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 0e0f2c62973c..2f7176f07591 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3050,6 +3050,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 	 * it is not set for this transfer.
 	 * Set transfer tx_nbits and rx_nbits as single transfer default
 	 * (SPI_NBITS_SINGLE) if it is not set for this transfer.
+	 * Ensure transfer word_delay is at least as long as that required by
+	 * device itself.
 	 */
 	message->frame_length = 0;
 	list_for_each_entry(xfer, &message->transfers, transfer_list) {
@@ -3120,6 +3122,9 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 				!(spi->mode & SPI_RX_QUAD))
 				return -EINVAL;
 		}
+
+		if (xfer->word_delay_usecs < spi->word_delay_usecs)
+			xfer->word_delay_usecs = spi->word_delay_usecs;
 	}
 
 	message->status = -EINPROGRESS;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 916bba47d156..662b336aa2e4 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -122,6 +122,8 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
  *	the spi_master.
  * @cs_gpiod: gpio descriptor of the chipselect line (optional, NULL when
  *	not using a GPIO line)
+ * @word_delay_usecs: microsecond delay to be inserted between consecutive
+ *	words of a transfer
  *
  * @statistics: statistics for the spi_device
  *
@@ -169,6 +171,7 @@ struct spi_device {
 	const char		*driver_override;
 	int			cs_gpio;	/* LEGACY: chip select gpio */
 	struct gpio_desc	*cs_gpiod;	/* chip select gpio desc */
+	uint8_t			word_delay_usecs; /* inter-word delay */
 
 	/* the statistics */
 	struct spi_statistics	statistics;
@@ -721,6 +724,8 @@ extern void spi_res_release(struct spi_controller *ctlr,
  * @delay_usecs: microseconds to delay after this transfer before
  *	(optionally) changing the chipselect status, then starting
  *	the next transfer or completing this @spi_message.
+ * @word_delay_usecs: microseconds to inter word delay after each word size
+ *	(set by bits_per_word) transmission.
  * @word_delay: clock cycles to inter word delay after each word size
  *	(set by bits_per_word) transmission.
  * @transfer_list: transfers are sequenced through @spi_message.transfers
@@ -803,6 +808,7 @@ struct spi_transfer {
 #define	SPI_NBITS_DUAL		0x02 /* 2bits transfer */
 #define	SPI_NBITS_QUAD		0x04 /* 4bits transfer */
 	u8		bits_per_word;
+	u8		word_delay_usecs;
 	u16		delay_usecs;
 	u32		speed_hz;
 	u16		word_delay;
-- 
cgit v1.2.3


From 57d4657716aca81ef4d7ec23e8123d26e3d28954 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 23 Jan 2019 13:35:00 -0500
Subject: audit: ignore fcaps on umount

Don't fetch fcaps when umount2 is called to avoid a process hang while
it waits for the missing resource to (possibly never) re-appear.

Note the comment above user_path_mountpoint_at():
 * A umount is a special case for path walking. We're not actually interested
 * in the inode in this situation, and ESTALE errors can be a problem.  We
 * simply want track down the dentry and vfsmount attached at the mountpoint
 * and avoid revalidating the last component.

This can happen on ceph, cifs, 9p, lustre, fuse (gluster) or NFS.

Please see the github issue tracker
https://github.com/linux-audit/audit-kernel/issues/100

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: merge fuzz in audit_log_fcaps()]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/namei.c            |  2 +-
 fs/namespace.c        |  2 ++
 include/linux/audit.h | 15 ++++++++++-----
 include/linux/namei.h |  3 +++
 kernel/audit.c        | 10 +++++++++-
 kernel/audit.h        |  2 +-
 kernel/auditsc.c      |  6 +++---
 7 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 914178cdbe94..87d7710a2e1d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2720,7 +2720,7 @@ filename_mountpoint(int dfd, struct filename *name, struct path *path,
 	if (unlikely(error == -ESTALE))
 		error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
 	if (likely(!error))
-		audit_inode(name, path->dentry, 0);
+		audit_inode(name, path->dentry, flags & LOOKUP_NO_EVAL);
 	restore_nameidata();
 	putname(name);
 	return error;
diff --git a/fs/namespace.c b/fs/namespace.c
index a677b59efd74..e5de0e372df2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1640,6 +1640,8 @@ int ksys_umount(char __user *name, int flags)
 	if (!(flags & UMOUNT_NOFOLLOW))
 		lookup_flags |= LOOKUP_FOLLOW;
 
+	lookup_flags |= LOOKUP_NO_EVAL;
+
 	retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
 	if (retval)
 		goto out;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index ecb5d317d6a2..29251b18331a 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -25,6 +25,7 @@
 
 #include <linux/sched.h>
 #include <linux/ptrace.h>
+#include <linux/namei.h>  /* LOOKUP_* */
 #include <uapi/linux/audit.h>
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
@@ -248,6 +249,7 @@ extern void __audit_getname(struct filename *name);
 
 #define AUDIT_INODE_PARENT	1	/* dentry represents the parent */
 #define AUDIT_INODE_HIDDEN	2	/* audit record should be hidden */
+#define AUDIT_INODE_NOEVAL	4	/* audit record incomplete */
 extern void __audit_inode(struct filename *name, const struct dentry *dentry,
 				unsigned int flags);
 extern void __audit_file(const struct file *);
@@ -308,12 +310,15 @@ static inline void audit_getname(struct filename *name)
 }
 static inline void audit_inode(struct filename *name,
 				const struct dentry *dentry,
-				unsigned int parent) {
+				unsigned int flags) {
 	if (unlikely(!audit_dummy_context())) {
-		unsigned int flags = 0;
-		if (parent)
-			flags |= AUDIT_INODE_PARENT;
-		__audit_inode(name, dentry, flags);
+		unsigned int aflags = 0;
+
+		if (flags & LOOKUP_PARENT)
+			aflags |= AUDIT_INODE_PARENT;
+		if (flags & LOOKUP_NO_EVAL)
+			aflags |= AUDIT_INODE_NOEVAL;
+		__audit_inode(name, dentry, aflags);
 	}
 }
 static inline void audit_file(struct file *file)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index a78606e8e3df..9138b4471dbf 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -24,6 +24,8 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - internal "there are more path components" flag
  *  - dentry cache is untrusted; force a real lookup
  *  - suppress terminal automount
+ *  - skip revalidation
+ *  - don't fetch xattrs on audit_inode
  */
 #define LOOKUP_FOLLOW		0x0001
 #define LOOKUP_DIRECTORY	0x0002
@@ -33,6 +35,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_REVAL		0x0020
 #define LOOKUP_RCU		0x0040
 #define LOOKUP_NO_REVAL		0x0080
+#define LOOKUP_NO_EVAL		0x0100
 
 /*
  * Intent data
diff --git a/kernel/audit.c b/kernel/audit.c
index 3f3f1888cac7..b7177a8def2e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2082,6 +2082,10 @@ void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
 
 static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
 {
+	if (name->fcap_ver == -1) {
+		audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?");
+		return;
+	}
 	audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
 	audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
 	audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d",
@@ -2114,7 +2118,7 @@ static inline int audit_copy_fcaps(struct audit_names *name,
 
 /* Copy inode data into an audit_names. */
 void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
-		      struct inode *inode)
+		      struct inode *inode, unsigned int flags)
 {
 	name->ino   = inode->i_ino;
 	name->dev   = inode->i_sb->s_dev;
@@ -2123,6 +2127,10 @@ void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
 	name->gid   = inode->i_gid;
 	name->rdev  = inode->i_rdev;
 	security_inode_getsecid(inode, &name->osid);
+	if (flags & AUDIT_INODE_NOEVAL) {
+		name->fcap_ver = -1;
+		return;
+	}
 	audit_copy_fcaps(name, dentry);
 }
 
diff --git a/kernel/audit.h b/kernel/audit.h
index 9acb8691ed87..002f0f7ba732 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -215,7 +215,7 @@ extern void audit_log_session_info(struct audit_buffer *ab);
 
 extern void audit_copy_inode(struct audit_names *name,
 			     const struct dentry *dentry,
-			     struct inode *inode);
+			     struct inode *inode, unsigned int flags);
 extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
 			  kernel_cap_t *cap);
 extern void audit_log_name(struct audit_context *context,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a2696ce790f9..68da71001096 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1856,7 +1856,7 @@ out:
 		n->type = AUDIT_TYPE_NORMAL;
 	}
 	handle_path(dentry);
-	audit_copy_inode(n, dentry, inode);
+	audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL);
 }
 
 void __audit_file(const struct file *file)
@@ -1955,7 +1955,7 @@ void __audit_inode_child(struct inode *parent,
 		n = audit_alloc_name(context, AUDIT_TYPE_PARENT);
 		if (!n)
 			return;
-		audit_copy_inode(n, NULL, parent);
+		audit_copy_inode(n, NULL, parent, 0);
 	}
 
 	if (!found_child) {
@@ -1974,7 +1974,7 @@ void __audit_inode_child(struct inode *parent,
 	}
 
 	if (inode)
-		audit_copy_inode(found_child, dentry, inode);
+		audit_copy_inode(found_child, dentry, inode, 0);
 	else
 		found_child->ino = AUDIT_INO_UNSET;
 }
-- 
cgit v1.2.3


From 6780c4fa9d6e091b2f206ac429a40e2e8d2e45f3 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Tue, 22 Jan 2019 10:08:22 +0200
Subject: RDMA: Add indication for in kernel API support to IB device

Drivers that do not provide kernel verbs support should not be used by ib
kernel clients at all.

In case a device does not implement all mandatory verbs for kverbs usage
mark it as a non kverbs provider and prevent its usage for all clients
except for uverbs.

The device is marked as a non kverbs provider using the 'kverbs_provider'
flag which should only be set by the core code.  The clients can choose
whether kverbs are requested for its usage using the 'no_kverbs_req' flag
which is currently set for uverbs only.

This patch allows drivers to remove mandatory verbs stubs and simply set
the callbacks to NULL. The IB device will be registered as a non-kverbs
provider. Note that verbs that are required for the device registration
process must be implemented.

Signed-off-by: Gal Pressman <galpress@amazon.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c      | 10 ++++++----
 drivers/infiniband/core/uverbs_main.c |  1 +
 include/rdma/ib_verbs.h               |  5 +++++
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index b511cfa00bdb..9d2e108235e9 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -121,13 +121,12 @@ static int ib_device_check_mandatory(struct ib_device *device)
 	};
 	int i;
 
+	device->kverbs_provider = true;
 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
 		if (!*(void **) ((void *) &device->ops +
 				 mandatory_table[i].offset)) {
-			dev_warn(&device->dev,
-				 "Device is missing mandatory function %s\n",
-				 mandatory_table[i].name);
-			return -EINVAL;
+			device->kverbs_provider = false;
+			break;
 		}
 	}
 
@@ -325,6 +324,9 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
 {
 	struct ib_client_data *context;
 
+	if (!device->kverbs_provider && !client->no_kverbs_req)
+		return -EOPNOTSUPP;
+
 	context = kmalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
 		return -ENOMEM;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 996f167d1436..d628747e058c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1151,6 +1151,7 @@ static const struct file_operations uverbs_mmap_fops = {
 
 static struct ib_client uverbs_client = {
 	.name   = "uverbs",
+	.no_kverbs_req = true,
 	.add    = ib_uverbs_add_one,
 	.remove = ib_uverbs_remove_one
 };
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a1a1e710642c..4183a03b46b5 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2565,6 +2565,8 @@ struct ib_device {
 	__be64			     node_guid;
 	u32			     local_dma_lkey;
 	u16                          is_switch:1;
+	/* Indicates kernel verbs support, should not be used in drivers */
+	u16                          kverbs_provider:1;
 	u8                           node_type;
 	u8                           phys_port_cnt;
 	struct ib_device_attr        attrs;
@@ -2619,6 +2621,9 @@ struct ib_client {
 			const struct sockaddr *addr,
 			void *client_data);
 	struct list_head list;
+
+	/* kverbs are not required by the client */
+	u8 no_kverbs_req:1;
 };
 
 struct ib_device *_ib_alloc_device(size_t size);
-- 
cgit v1.2.3


From 0ad699c0edc97a864177679dd67f2ccd73b07cb7 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 30 Jan 2019 12:48:58 +0200
Subject: RDMA/core: Simplify restrack interface

In the current implementation, we have one restrack root per-device and
all users are simply providing it directly. Let's simplify the interface
and have callers provide the ib_device and internally access the
restrack_root.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c   |  4 ++--
 drivers/infiniband/core/nldev.c    |  4 ++--
 drivers/infiniband/core/restrack.c | 26 +++++++++++++++++++++-----
 include/rdma/restrack.h            | 23 ++++-------------------
 4 files changed, 29 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 9d2e108235e9..919e94ff4b25 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -287,7 +287,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 	if (!device)
 		return NULL;
 
-	rdma_restrack_init(&device->res);
+	rdma_restrack_init(device);
 
 	device->dev.class = &ib_class;
 	device_initialize(&device->dev);
@@ -315,7 +315,7 @@ void ib_dealloc_device(struct ib_device *device)
 	WARN_ON(!list_empty(&device->client_data_list));
 	WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
 		device->reg_state != IB_DEV_UNINITIALIZED);
-	rdma_restrack_clean(&device->res);
+	rdma_restrack_clean(device);
 	put_device(&device->dev);
 }
 EXPORT_SYMBOL(ib_dealloc_device);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 1742ff4fbf79..ee98fc9058b1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -314,7 +314,6 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 		[RDMA_RESTRACK_CTX] = "ctx",
 	};
 
-	struct rdma_restrack_root *res = &device->res;
 	struct nlattr *table_attr;
 	int ret, i, curr;
 
@@ -328,7 +327,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
 		if (!names[i])
 			continue;
-		curr = rdma_restrack_count(res, i, task_active_pid_ns(current));
+		curr = rdma_restrack_count(device, i,
+					   task_active_pid_ns(current));
 		ret = fill_res_info_entry(msg, names[i], curr);
 		if (ret)
 			goto err;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 46a5c553c624..0ade3da0a5c7 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -18,8 +18,14 @@ static int fill_res_noop(struct sk_buff *msg,
 	return 0;
 }
 
-void rdma_restrack_init(struct rdma_restrack_root *res)
+/**
+ * rdma_restrack_init() - initialize resource tracking
+ * @dev:  IB device
+ */
+void rdma_restrack_init(struct ib_device *dev)
 {
+	struct rdma_restrack_root *res = &dev->res;
+
 	init_rwsem(&res->rwsem);
 	res->fill_res_entry = fill_res_noop;
 }
@@ -38,11 +44,15 @@ static const char *type2str(enum rdma_restrack_type type)
 	return names[type];
 };
 
-void rdma_restrack_clean(struct rdma_restrack_root *res)
+/**
+ * rdma_restrack_clean() - clean resource tracking
+ * @dev:  IB device
+ */
+void rdma_restrack_clean(struct ib_device *dev)
 {
+	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
 	char buf[TASK_COMM_LEN];
-	struct ib_device *dev;
 	const char *owner;
 	int bkt;
 
@@ -72,10 +82,16 @@ void rdma_restrack_clean(struct rdma_restrack_root *res)
 	pr_err("restrack: %s", CUT_HERE);
 }
 
-int rdma_restrack_count(struct rdma_restrack_root *res,
-			enum rdma_restrack_type type,
+/**
+ * rdma_restrack_count() - the current usage of specific object
+ * @dev:  IB device
+ * @type: actual type of object to operate
+ * @ns:   PID namespace
+ */
+int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 			struct pid_namespace *ns)
 {
+	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
 	u32 cnt = 0;
 
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 8f179be9d9a9..f756fc48eee5 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -49,6 +49,7 @@ enum rdma_restrack_type {
 };
 
 #define RDMA_RESTRACK_HASH_BITS	8
+struct ib_device;
 struct rdma_restrack_entry;
 
 /**
@@ -122,25 +123,9 @@ struct rdma_restrack_entry {
 	bool			user;
 };
 
-/**
- * rdma_restrack_init() - initialize resource tracking
- * @res:  resource tracking root
- */
-void rdma_restrack_init(struct rdma_restrack_root *res);
-
-/**
- * rdma_restrack_clean() - clean resource tracking
- * @res:  resource tracking root
- */
-void rdma_restrack_clean(struct rdma_restrack_root *res);
-
-/**
- * rdma_restrack_count() - the current usage of specific object
- * @res:  resource entry
- * @type: actual type of object to operate
- * @ns:   PID namespace
- */
-int rdma_restrack_count(struct rdma_restrack_root *res,
+void rdma_restrack_init(struct ib_device *dev);
+void rdma_restrack_clean(struct ib_device *dev);
+int rdma_restrack_count(struct ib_device *dev,
 			enum rdma_restrack_type type,
 			struct pid_namespace *ns);
 
-- 
cgit v1.2.3


From 02da37509705d3ba6a58fe4799a0caf6b4baecb0 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 30 Jan 2019 12:49:02 +0200
Subject: RDMA/core: Use the ops infrastructure to keep all callbacks in one
 place

As preparation to hide rdma_restrack_root, refactor the code to use the
ops structure instead of a special callback which is hidden in
rdma_restrack_root.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c       |  1 +
 drivers/infiniband/core/nldev.c        | 28 ++++++++++++++++++----------
 drivers/infiniband/core/restrack.c     |  7 -------
 drivers/infiniband/hw/cxgb4/provider.c |  2 +-
 include/rdma/ib_verbs.h                |  5 +++++
 include/rdma/restrack.h                |  7 -------
 6 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 919e94ff4b25..b9f725df4195 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1263,6 +1263,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, disassociate_ucontext);
 	SET_DEVICE_OP(dev_ops, drain_rq);
 	SET_DEVICE_OP(dev_ops, drain_sq);
+	SET_DEVICE_OP(dev_ops, fill_res_entry);
 	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
 	SET_DEVICE_OP(dev_ops, get_dma_mr);
 	SET_DEVICE_OP(dev_ops, get_hw_stats);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index ee98fc9058b1..25a248847575 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -361,11 +361,19 @@ static int fill_res_name_pid(struct sk_buff *msg,
 	return 0;
 }
 
+static bool fill_res_entry(struct ib_device *dev, struct sk_buff *msg,
+			   struct rdma_restrack_entry *res)
+{
+	if (!dev->ops.fill_res_entry)
+		return false;
+	return dev->ops.fill_res_entry(msg, res);
+}
+
 static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			     struct rdma_restrack_entry *res, uint32_t port)
 {
 	struct ib_qp *qp = container_of(res, struct ib_qp, res);
-	struct rdma_restrack_root *resroot = &qp->device->res;
+	struct ib_device *dev = qp->device;
 	struct ib_qp_init_attr qp_init_attr;
 	struct nlattr *entry_attr;
 	struct ib_qp_attr qp_attr;
@@ -415,7 +423,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
-	if (resroot->fill_res_entry(msg, res))
+	if (fill_res_entry(dev, msg, res))
 		goto err;
 
 	nla_nest_end(msg, entry_attr);
@@ -432,7 +440,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
 {
 	struct rdma_id_private *id_priv =
 				container_of(res, struct rdma_id_private, res);
-	struct rdma_restrack_root *resroot = &id_priv->id.device->res;
+	struct ib_device *dev = id_priv->id.device;
 	struct rdma_cm_id *cm_id = &id_priv->id;
 	struct nlattr *entry_attr;
 
@@ -474,7 +482,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
-	if (resroot->fill_res_entry(msg, res))
+	if (fill_res_entry(dev, msg, res))
 		goto err;
 
 	nla_nest_end(msg, entry_attr);
@@ -490,7 +498,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			     struct rdma_restrack_entry *res, uint32_t port)
 {
 	struct ib_cq *cq = container_of(res, struct ib_cq, res);
-	struct rdma_restrack_root *resroot = &cq->device->res;
+	struct ib_device *dev = cq->device;
 	struct nlattr *entry_attr;
 
 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
@@ -511,7 +519,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
-	if (resroot->fill_res_entry(msg, res))
+	if (fill_res_entry(dev, msg, res))
 		goto err;
 
 	nla_nest_end(msg, entry_attr);
@@ -527,7 +535,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			     struct rdma_restrack_entry *res, uint32_t port)
 {
 	struct ib_mr *mr = container_of(res, struct ib_mr, res);
-	struct rdma_restrack_root *resroot = &mr->pd->device->res;
+	struct ib_device *dev = mr->pd->device;
 	struct nlattr *entry_attr;
 
 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
@@ -548,7 +556,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
-	if (resroot->fill_res_entry(msg, res))
+	if (fill_res_entry(dev, msg, res))
 		goto err;
 
 	nla_nest_end(msg, entry_attr);
@@ -564,7 +572,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			     struct rdma_restrack_entry *res, uint32_t port)
 {
 	struct ib_pd *pd = container_of(res, struct ib_pd, res);
-	struct rdma_restrack_root *resroot = &pd->device->res;
+	struct ib_device *dev = pd->device;
 	struct nlattr *entry_attr;
 
 	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
@@ -591,7 +599,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
-	if (resroot->fill_res_entry(msg, res))
+	if (fill_res_entry(dev, msg, res))
 		goto err;
 
 	nla_nest_end(msg, entry_attr);
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index bd7770ed4174..f80b37d437ac 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -12,12 +12,6 @@
 
 #include "cma_priv.h"
 
-static int fill_res_noop(struct sk_buff *msg,
-			 struct rdma_restrack_entry *entry)
-{
-	return 0;
-}
-
 /**
  * rdma_restrack_init() - initialize resource tracking
  * @dev:  IB device
@@ -27,7 +21,6 @@ void rdma_restrack_init(struct ib_device *dev)
 	struct rdma_restrack_root *res = &dev->res;
 
 	init_rwsem(&res->rwsem);
-	res->fill_res_entry = fill_res_noop;
 }
 
 static const char *type2str(enum rdma_restrack_type type)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index f977f8e7e162..cb5b713bbf39 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -549,6 +549,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
 	.destroy_cq = c4iw_destroy_cq,
 	.destroy_qp = c4iw_destroy_qp,
 	.destroy_srq = c4iw_destroy_srq,
+	.fill_res_entry = fill_res_entry,
 	.get_dev_fw_str = get_dev_fw_str,
 	.get_dma_mr = c4iw_get_dma_mr,
 	.get_hw_stats = c4iw_get_mib,
@@ -629,7 +630,6 @@ void c4iw_register_device(struct work_struct *work)
 	dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
 	dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
 	dev->ibdev.iwcm->get_qp = c4iw_get_qp;
-	dev->ibdev.res.fill_res_entry = fill_res_entry;
 	memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
 	       sizeof(dev->ibdev.iwcm->ifname));
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4183a03b46b5..5fc3be884444 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2510,6 +2510,11 @@ struct ib_device_ops {
 	 */
 	int (*init_port)(struct ib_device *device, u8 port_num,
 			 struct kobject *port_sysfs);
+	/**
+	 * Allows rdma drivers to add their own restrack attributes.
+	 */
+	int (*fill_res_entry)(struct sk_buff *msg,
+			      struct rdma_restrack_entry *entry);
 };
 
 struct ib_device {
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index f756fc48eee5..cc66cc7a11d3 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -65,13 +65,6 @@ struct rdma_restrack_root {
 	 * @hash: global database for all resources per-device
 	 */
 	DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
-	/**
-	 * @fill_res_entry: driver-specific fill function
-	 *
-	 * Allows rdma drivers to add their own restrack attributes.
-	 */
-	int (*fill_res_entry)(struct sk_buff *msg,
-			      struct rdma_restrack_entry *entry);
 };
 
 /**
-- 
cgit v1.2.3


From befa618112a0a4590ce21d70aa35c9d341337774 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 28 Jan 2019 09:21:19 -0800
Subject: bpf: BPF_PROG_TYPE_CGROUP_{SKB, SOCK, SOCK_ADDR} require cgroups
 enabled

There is no way to exercise appropriate attach points without cgroups
enabled. This lets test_verifier correctly skip tests for these
prog_types if kernel was compiled without BPF cgroup support.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 44d9ab4809bd..08bf2f1fe553 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -6,9 +6,11 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
 BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
+#ifdef CONFIG_CGROUP_BPF
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
 BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
+#endif
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
 BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
-- 
cgit v1.2.3


From 116bfa96a255123ed209da6544f74a4f2eaca5da Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Tue, 29 Jan 2019 01:04:25 -0500
Subject: bpf: fix missing prototype warnings

Compiling with W=1 generates warnings:

  CC      kernel/bpf/core.o
kernel/bpf/core.c:721:12: warning: no previous prototype for ?bpf_jit_alloc_exec_limit? [-Wmissing-prototypes]
  721 | u64 __weak bpf_jit_alloc_exec_limit(void)
      |            ^~~~~~~~~~~~~~~~~~~~~~~~
kernel/bpf/core.c:757:14: warning: no previous prototype for ?bpf_jit_alloc_exec? [-Wmissing-prototypes]
  757 | void *__weak bpf_jit_alloc_exec(unsigned long size)
      |              ^~~~~~~~~~~~~~~~~~
kernel/bpf/core.c:762:13: warning: no previous prototype for ?bpf_jit_free_exec? [-Wmissing-prototypes]
  762 | void __weak bpf_jit_free_exec(void *addr)
      |             ^~~~~~~~~~~~~~~~~

All three are weak functions that archs can override, provide
proper prototypes for when a new arch provides their own.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index e4b473f85b46..7317376734f7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -880,7 +880,9 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     unsigned int alignment,
 		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
 void bpf_jit_binary_free(struct bpf_binary_header *hdr);
-
+u64 bpf_jit_alloc_exec_limit(void);
+void *bpf_jit_alloc_exec(unsigned long size);
+void bpf_jit_free_exec(void *addr);
 void bpf_jit_free(struct bpf_prog *fp);
 
 int bpf_jit_get_func_addr(const struct bpf_prog *prog,
-- 
cgit v1.2.3


From 1832f4ef5867fd3898d8a6c6c1978b75d76fc246 Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Tue, 29 Jan 2019 01:47:06 -0500
Subject: bpf, cgroups: clean up kerneldoc warnings

Building with W=1 reveals some bitrot:

  CC      kernel/bpf/cgroup.o
kernel/bpf/cgroup.c:238: warning: Function parameter or member 'flags' not described in '__cgroup_bpf_attach'
kernel/bpf/cgroup.c:367: warning: Function parameter or member 'unused_flags' not described in '__cgroup_bpf_detach'

Add a kerneldoc line for 'flags'.

Fixing the warning for 'unused_flags' is best approached by
removing the unused parameter on the function call.

Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 kernel/bpf/cgroup.c        | 3 ++-
 kernel/cgroup/cgroup.c     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 588dd5f0bd85..695b2a880d9a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -78,7 +78,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 			enum bpf_attach_type type, u32 flags);
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 flags);
+			enum bpf_attach_type type);
 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		       union bpf_attr __user *uattr);
 
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ab612fe9862f..d78cfec5807d 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -230,6 +230,7 @@ cleanup:
  * @cgrp: The cgroup which descendants to traverse
  * @prog: A program to attach
  * @type: Type of attach operation
+ * @flags: Option flags
  *
  * Must be called with cgroup_mutex held.
  */
@@ -363,7 +364,7 @@ cleanup:
  * Must be called with cgroup_mutex held.
  */
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			enum bpf_attach_type type, u32 unused_flags)
+			enum bpf_attach_type type)
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	enum bpf_cgroup_storage_type stype;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f31bd61c9466..9f617605dacb 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5996,7 +5996,7 @@ int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 	int ret;
 
 	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
+	ret = __cgroup_bpf_detach(cgrp, prog, type);
 	mutex_unlock(&cgroup_mutex);
 	return ret;
 }
-- 
cgit v1.2.3


From a08c2a5a31941131c41feaa0429e4c8854cf48f2 Mon Sep 17 00:00:00 2001
From: Thara Gopinath <thara.gopinath@linaro.org>
Date: Wed, 23 Jan 2019 08:50:14 +0100
Subject: PM-runtime: Replace jiffies-based accounting with ktime-based
 accounting

Replace jiffies-based accounting for runtime_active_time and
runtime_suspended_time with ktime-based accounting. This makes the
runtime debug counters inline with genpd and other PM subsytems which
use ktime-based accounting.

Timekeeping is initialized before driver_init(). It's only at that time
that PM-runtime can be enabled.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>
[switch from ktime to raw nsec]
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c | 17 +++++++++--------
 drivers/base/power/sysfs.c   | 11 ++++++++---
 include/linux/pm.h           |  6 +++---
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index f23b7ecfce3b..eb1a3b878e1e 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -66,8 +66,8 @@ static int rpm_suspend(struct device *dev, int rpmflags);
  */
 void update_pm_runtime_accounting(struct device *dev)
 {
-	unsigned long now = jiffies;
-	unsigned long delta;
+	u64 now = ktime_to_ns(ktime_get());
+	u64 delta;
 
 	delta = now - dev->power.accounting_timestamp;
 
@@ -77,9 +77,9 @@ void update_pm_runtime_accounting(struct device *dev)
 		return;
 
 	if (dev->power.runtime_status == RPM_SUSPENDED)
-		dev->power.suspended_jiffies += delta;
+		dev->power.suspended_time += delta;
 	else
-		dev->power.active_jiffies += delta;
+		dev->power.active_time += delta;
 }
 
 static void __update_runtime_status(struct device *dev, enum rpm_status status)
@@ -90,16 +90,17 @@ static void __update_runtime_status(struct device *dev, enum rpm_status status)
 
 u64 pm_runtime_suspended_time(struct device *dev)
 {
-	unsigned long flags, time;
+	u64 time;
+	unsigned long flags;
 
 	spin_lock_irqsave(&dev->power.lock, flags);
 
 	update_pm_runtime_accounting(dev);
-	time = dev->power.suspended_jiffies;
+	time = dev->power.suspended_time;
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 
-	return jiffies_to_nsecs(time);
+	return time;
 }
 EXPORT_SYMBOL_GPL(pm_runtime_suspended_time);
 
@@ -1314,7 +1315,7 @@ void pm_runtime_enable(struct device *dev)
 
 		/* About to enable runtime pm, set accounting_timestamp to now */
 		if (!dev->power.disable_depth)
-			dev->power.accounting_timestamp = jiffies;
+			dev->power.accounting_timestamp = ktime_to_ns(ktime_get());
 	} else {
 		dev_warn(dev, "Unbalanced %s!\n", __func__);
 	}
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index d713738ce796..96c8a227610a 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -125,9 +125,12 @@ static ssize_t runtime_active_time_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	int ret;
+	u64 tmp;
 	spin_lock_irq(&dev->power.lock);
 	update_pm_runtime_accounting(dev);
-	ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies));
+	tmp = dev->power.active_time;
+	do_div(tmp, NSEC_PER_MSEC);
+	ret = sprintf(buf, "%llu\n", tmp);
 	spin_unlock_irq(&dev->power.lock);
 	return ret;
 }
@@ -138,10 +141,12 @@ static ssize_t runtime_suspended_time_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	int ret;
+	u64 tmp;
 	spin_lock_irq(&dev->power.lock);
 	update_pm_runtime_accounting(dev);
-	ret = sprintf(buf, "%i\n",
-		jiffies_to_msecs(dev->power.suspended_jiffies));
+	tmp = dev->power.suspended_time;
+	do_div(tmp, NSEC_PER_MSEC);
+	ret = sprintf(buf, "%llu\n", tmp);
 	spin_unlock_irq(&dev->power.lock);
 	return ret;
 }
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 0bd9de116826..3d2cbf947768 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -633,9 +633,9 @@ struct dev_pm_info {
 	int			runtime_error;
 	int			autosuspend_delay;
 	u64			last_busy;
-	unsigned long		active_jiffies;
-	unsigned long		suspended_jiffies;
-	unsigned long		accounting_timestamp;
+	u64			active_time;
+	u64			suspended_time;
+	u64			accounting_timestamp;
 #endif
 	struct pm_subsys_data	*subsys_data;  /* Owned by the subsystem. */
 	void (*set_latency_tolerance)(struct device *, s32);
-- 
cgit v1.2.3


From 03535e7a3a9937da99ee18304309e0574d2504fc Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 24 Jan 2019 06:47:49 -0200
Subject: media: vb2: vb2_find_timestamp: drop restriction on buffer state

There really is no reason why vb2_find_timestamp can't just find
buffers in any state. Drop that part of the test.

This also means that vb->timestamp should only be set to 0 when
the driver doesn't copy timestamps.

This change allows for more efficient pipelining (i.e. you can use
a buffer for a reference frame even when it is queued).

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Reviewed-by: Alexandre Courbot <acourbot@chromium.org>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-v4l2.c | 11 +++--------
 include/media/videobuf2-v4l2.h                  |  3 +--
 2 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 75ea90e795d8..2f3b3ca5bde6 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -567,7 +567,7 @@ static int __fill_vb2_buffer(struct vb2_buffer *vb, struct vb2_plane *planes)
 	struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb);
 	unsigned int plane;
 
-	if (!vb->vb2_queue->is_output || !vb->vb2_queue->copy_timestamp)
+	if (!vb->vb2_queue->copy_timestamp)
 		vb->timestamp = 0;
 
 	for (plane = 0; plane < vb->num_planes; ++plane) {
@@ -594,14 +594,9 @@ int vb2_find_timestamp(const struct vb2_queue *q, u64 timestamp,
 {
 	unsigned int i;
 
-	for (i = start_idx; i < q->num_buffers; i++) {
-		struct vb2_buffer *vb = q->bufs[i];
-
-		if ((vb->state == VB2_BUF_STATE_DEQUEUED ||
-		     vb->state == VB2_BUF_STATE_DONE) &&
-		    vb->timestamp == timestamp)
+	for (i = start_idx; i < q->num_buffers; i++)
+		if (q->bufs[i]->timestamp == timestamp)
 			return i;
-	}
 	return -1;
 }
 EXPORT_SYMBOL_GPL(vb2_find_timestamp);
diff --git a/include/media/videobuf2-v4l2.h b/include/media/videobuf2-v4l2.h
index a9961bc776dc..8a10889dc2fd 100644
--- a/include/media/videobuf2-v4l2.h
+++ b/include/media/videobuf2-v4l2.h
@@ -59,8 +59,7 @@ struct vb2_v4l2_buffer {
  * vb2_find_timestamp() - Find buffer with given timestamp in the queue
  *
  * @q:		pointer to &struct vb2_queue with videobuf2 queue.
- * @timestamp:	the timestamp to find. Only buffers in state DEQUEUED or DONE
- *		are considered.
+ * @timestamp:	the timestamp to find.
  * @start_idx:	the start index (usually 0) in the buffer array to start
  *		searching from. Note that there may be multiple buffers
  *		with the same timestamp value, so you can restart the search
-- 
cgit v1.2.3


From 28d77c21cbeb2c6039d48ef88401b87a56a7a07f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Wed, 16 Jan 2019 10:01:13 -0200
Subject: media: vb2: add buf_out_validate callback

When queueing a buffer to a request the 'field' value is not validated.
That field is only validated when the _buf_prepare() is called,
which happens when the request is queued.

However, this validation should happen at QBUF time, since you want
to know about this as soon as possible. Also, the spec requires that
the 'field' value is validated at QBUF time.

This patch adds a new buf_out_validate callback to validate the
output buffer at buf_prepare time or when QBUF queues an unprepared
buffer to a request. This callback is mandatory for output queues
that support requests.

This issue was found by v4l2-compliance since it failed to replace
V4L2_FIELD_ANY by a proper field value when testing the vivid video
output in combination with requests.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-core.c | 22 +++++++++++++++++++---
 include/media/videobuf2-core.h                  |  5 +++++
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index ce9294a635cc..e07b6bdb6982 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -499,9 +499,9 @@ static int __vb2_queue_free(struct vb2_queue *q, unsigned int buffers)
 			pr_info("     buf_init: %u buf_cleanup: %u buf_prepare: %u buf_finish: %u\n",
 				vb->cnt_buf_init, vb->cnt_buf_cleanup,
 				vb->cnt_buf_prepare, vb->cnt_buf_finish);
-			pr_info("     buf_queue: %u buf_done: %u buf_request_complete: %u\n",
-				vb->cnt_buf_queue, vb->cnt_buf_done,
-				vb->cnt_buf_request_complete);
+			pr_info("     buf_out_validate: %u buf_queue: %u buf_done: %u buf_request_complete: %u\n",
+				vb->cnt_buf_out_validate, vb->cnt_buf_queue,
+				vb->cnt_buf_done, vb->cnt_buf_request_complete);
 			pr_info("     alloc: %u put: %u prepare: %u finish: %u mmap: %u\n",
 				vb->cnt_mem_alloc, vb->cnt_mem_put,
 				vb->cnt_mem_prepare, vb->cnt_mem_finish,
@@ -1277,6 +1277,14 @@ static int __buf_prepare(struct vb2_buffer *vb)
 		return 0;
 	WARN_ON(vb->synced);
 
+	if (q->is_output) {
+		ret = call_vb_qop(vb, buf_out_validate, vb);
+		if (ret) {
+			dprintk(1, "buffer validation failed\n");
+			return ret;
+		}
+	}
+
 	vb->state = VB2_BUF_STATE_PREPARING;
 
 	switch (q->memory) {
@@ -1523,6 +1531,14 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb,
 			return -EINVAL;
 		}
 
+		if (q->is_output && !vb->prepared) {
+			ret = call_vb_qop(vb, buf_out_validate, vb);
+			if (ret) {
+				dprintk(1, "buffer validation failed\n");
+				return ret;
+			}
+		}
+
 		media_request_object_init(&vb->req_obj);
 
 		/* Make sure the request is in a safe state for updating. */
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 4a737b2c610b..4849b865b908 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -296,6 +296,7 @@ struct vb2_buffer {
 	u32		cnt_mem_num_users;
 	u32		cnt_mem_mmap;
 
+	u32		cnt_buf_out_validate;
 	u32		cnt_buf_init;
 	u32		cnt_buf_prepare;
 	u32		cnt_buf_finish;
@@ -342,6 +343,9 @@ struct vb2_buffer {
  * @wait_finish:	reacquire all locks released in the previous callback;
  *			required to continue operation after sleeping while
  *			waiting for a new buffer to arrive.
+ * @buf_out_validate:	called when the output buffer is prepared or queued
+ *			to a request; drivers can use this to validate
+ *			userspace-provided information; optional.
  * @buf_init:		called once after allocating a buffer (in MMAP case)
  *			or after acquiring a new USERPTR buffer; drivers may
  *			perform additional buffer-related initialization;
@@ -409,6 +413,7 @@ struct vb2_ops {
 	void (*wait_prepare)(struct vb2_queue *q);
 	void (*wait_finish)(struct vb2_queue *q);
 
+	int (*buf_out_validate)(struct vb2_buffer *vb);
 	int (*buf_init)(struct vb2_buffer *vb);
 	int (*buf_prepare)(struct vb2_buffer *vb);
 	void (*buf_finish)(struct vb2_buffer *vb);
-- 
cgit v1.2.3


From 8204e0c1113d6b7f599bcd7ebfbfde72e76c102f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Tue, 22 Jan 2019 10:39:26 -0800
Subject: workqueue: Provide queue_work_node to queue work near a given NUMA
 node

Provide a new function, queue_work_node, which is meant to schedule work on
a "random" CPU of the requested NUMA node. The main motivation for this is
to help assist asynchronous init to better improve boot times for devices
that are local to a specific node.

For now we just default to the first CPU that is in the intersection of the
cpumask of the node and the online cpumask. The only exception is if the
CPU is local to the node we will just use the current CPU. This should work
for our purposes as we are currently only using this for unbound work so
the CPU will be translated to a node anyway instead of being directly used.

As we are only using the first CPU to represent the NUMA node for now I am
limiting the scope of the function so that it can only be used with unbound
workqueues.

Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..1f50c1e586e7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
+extern bool queue_work_node(int node, struct workqueue_struct *wq,
+			    struct work_struct *work);
 extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..d5a26e456f7a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL(queue_work_on);
 
+/**
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
+ * @node: NUMA node ID that we want to select a CPU from
+ *
+ * This function will attempt to find a "random" cpu available on a given
+ * node. If there are no CPUs available on the given node it will return
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
+ * available CPU if we need to schedule this work.
+ */
+static int workqueue_select_cpu_near(int node)
+{
+	int cpu;
+
+	/* No point in doing this if NUMA isn't enabled for workqueues */
+	if (!wq_numa_enabled)
+		return WORK_CPU_UNBOUND;
+
+	/* Delay binding to CPU if node is not valid or online */
+	if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
+		return WORK_CPU_UNBOUND;
+
+	/* Use local node/cpu if we are already there */
+	cpu = raw_smp_processor_id();
+	if (node == cpu_to_node(cpu))
+		return cpu;
+
+	/* Use "random" otherwise know as "first" online CPU of node */
+	cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+
+	/* If CPU is valid return that, otherwise just defer */
+	return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
+}
+
+/**
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
+ * @node: NUMA node that we are targeting the work for
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
+ * idea here is to provide a way to somehow associate work with a given
+ * NUMA node.
+ *
+ * This function will only make a best effort attempt at getting this onto
+ * the right NUMA node. If no node is requested or the requested node is
+ * offline then we just fall back to standard queue_work behavior.
+ *
+ * Currently the "random" CPU ends up being the first available CPU in the
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
+ * are running on the node. In that case we just use the current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_work_node(int node, struct workqueue_struct *wq,
+		     struct work_struct *work)
+{
+	unsigned long flags;
+	bool ret = false;
+
+	/*
+	 * This current implementation is specific to unbound workqueues.
+	 * Specifically we only return the first available CPU for a given
+	 * node instead of cycling through individual CPUs within the node.
+	 *
+	 * If this is used with a per-cpu workqueue then the logic in
+	 * workqueue_select_cpu_near would need to be updated to allow for
+	 * some round robin type logic.
+	 */
+	WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
+
+	local_irq_save(flags);
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+		int cpu = workqueue_select_cpu_near(node);
+
+		__queue_work(cpu, wq, work);
+		ret = true;
+	}
+
+	local_irq_restore(flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_node);
+
 void delayed_work_timer_fn(struct timer_list *t)
 {
 	struct delayed_work *dwork = from_timer(dwork, t, timer);
-- 
cgit v1.2.3


From 6be9238e5cb64741ff95c3ae440b112753ad93de Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Tue, 22 Jan 2019 10:39:31 -0800
Subject: async: Add support for queueing on specific NUMA node

Introduce four new variants of the async_schedule_ functions that allow
scheduling on a specific NUMA node.

The first two functions are async_schedule_near and
async_schedule_near_domain end up mapping to async_schedule and
async_schedule_domain, but provide NUMA node specific functionality. They
replace the original functions which were moved to inline function
definitions that call the new functions while passing NUMA_NO_NODE.

The second two functions are async_schedule_dev and
async_schedule_dev_domain which provide NUMA specific functionality when
passing a device as the data member and that device has a NUMA node other
than NUMA_NO_NODE.

The main motivation behind this is to address the need to be able to
schedule device specific init work on specific NUMA nodes in order to
improve performance of memory initialization.

I have seen a significant improvement in initialziation time for persistent
memory as a result of this approach. In the case of 3TB of memory on a
single node the initialization time in the worst case went from 36s down to
about 26s for a 10s improvement. As such the data shows a general benefit
for affinitizing the async work to the node local to the device.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/async.h | 82 +++++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/async.c        | 53 ++++++++++++++++++---------------
 2 files changed, 108 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/async.h b/include/linux/async.h
index 6b0226bdaadc..f81d6dbffe68 100644
--- a/include/linux/async.h
+++ b/include/linux/async.h
@@ -14,6 +14,8 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/numa.h>
+#include <linux/device.h>
 
 typedef u64 async_cookie_t;
 typedef void (*async_func_t) (void *data, async_cookie_t cookie);
@@ -37,9 +39,83 @@ struct async_domain {
 	struct async_domain _name = { .pending = LIST_HEAD_INIT(_name.pending), \
 				      .registered = 0 }
 
-extern async_cookie_t async_schedule(async_func_t func, void *data);
-extern async_cookie_t async_schedule_domain(async_func_t func, void *data,
-					    struct async_domain *domain);
+async_cookie_t async_schedule_node(async_func_t func, void *data,
+				   int node);
+async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
+					  int node,
+					  struct async_domain *domain);
+
+/**
+ * async_schedule - schedule a function for asynchronous execution
+ * @func: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+static inline async_cookie_t async_schedule(async_func_t func, void *data)
+{
+	return async_schedule_node(func, data, NUMA_NO_NODE);
+}
+
+/**
+ * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
+ * @func: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ * @domain: the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+static inline async_cookie_t
+async_schedule_domain(async_func_t func, void *data,
+		      struct async_domain *domain)
+{
+	return async_schedule_node_domain(func, data, NUMA_NO_NODE, domain);
+}
+
+/**
+ * async_schedule_dev - A device specific version of async_schedule
+ * @func: function to execute asynchronously
+ * @dev: device argument to be passed to function
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @dev is used as both the argument for the function and to provide NUMA
+ * context for where to run the function. By doing this we can try to
+ * provide for the best possible outcome by operating on the device on the
+ * CPUs closest to the device.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+static inline async_cookie_t
+async_schedule_dev(async_func_t func, struct device *dev)
+{
+	return async_schedule_node(func, dev, dev_to_node(dev));
+}
+
+/**
+ * async_schedule_dev_domain - A device specific version of async_schedule_domain
+ * @func: function to execute asynchronously
+ * @dev: device argument to be passed to function
+ * @domain: the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @dev is used as both the argument for the function and to provide NUMA
+ * context for where to run the function. By doing this we can try to
+ * provide for the best possible outcome by operating on the device on the
+ * CPUs closest to the device.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.
+ * Note: This function may be called from atomic or non-atomic contexts.
+ */
+static inline async_cookie_t
+async_schedule_dev_domain(async_func_t func, struct device *dev,
+			  struct async_domain *domain)
+{
+	return async_schedule_node_domain(func, dev, dev_to_node(dev), domain);
+}
+
 void async_unregister_domain(struct async_domain *domain);
 extern void async_synchronize_full(void);
 extern void async_synchronize_full_domain(struct async_domain *domain);
diff --git a/kernel/async.c b/kernel/async.c
index a893d6170944..f6bd0d9885e1 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -149,7 +149,25 @@ static void async_run_entry_fn(struct work_struct *work)
 	wake_up(&async_done);
 }
 
-static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain)
+/**
+ * async_schedule_node_domain - NUMA specific version of async_schedule_domain
+ * @func: function to execute asynchronously
+ * @data: data pointer to pass to the function
+ * @node: NUMA node that we want to schedule this on or close to
+ * @domain: the domain
+ *
+ * Returns an async_cookie_t that may be used for checkpointing later.
+ * @domain may be used in the async_synchronize_*_domain() functions to
+ * wait within a certain synchronization domain rather than globally.
+ *
+ * Note: This function may be called from atomic or non-atomic contexts.
+ *
+ * The node requested will be honored on a best effort basis. If the node
+ * has no CPUs associated with it then the work is distributed among all
+ * available CPUs.
+ */
+async_cookie_t async_schedule_node_domain(async_func_t func, void *data,
+					  int node, struct async_domain *domain)
 {
 	struct async_entry *entry;
 	unsigned long flags;
@@ -195,43 +213,30 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy
 	current->flags |= PF_USED_ASYNC;
 
 	/* schedule for execution */
-	queue_work(system_unbound_wq, &entry->work);
+	queue_work_node(node, system_unbound_wq, &entry->work);
 
 	return newcookie;
 }
+EXPORT_SYMBOL_GPL(async_schedule_node_domain);
 
 /**
- * async_schedule - schedule a function for asynchronous execution
+ * async_schedule_node - NUMA specific version of async_schedule
  * @func: function to execute asynchronously
  * @data: data pointer to pass to the function
+ * @node: NUMA node that we want to schedule this on or close to
  *
  * Returns an async_cookie_t that may be used for checkpointing later.
  * Note: This function may be called from atomic or non-atomic contexts.
- */
-async_cookie_t async_schedule(async_func_t func, void *data)
-{
-	return __async_schedule(func, data, &async_dfl_domain);
-}
-EXPORT_SYMBOL_GPL(async_schedule);
-
-/**
- * async_schedule_domain - schedule a function for asynchronous execution within a certain domain
- * @func: function to execute asynchronously
- * @data: data pointer to pass to the function
- * @domain: the domain
  *
- * Returns an async_cookie_t that may be used for checkpointing later.
- * @domain may be used in the async_synchronize_*_domain() functions to
- * wait within a certain synchronization domain rather than globally.  A
- * synchronization domain is specified via @domain.  Note: This function
- * may be called from atomic or non-atomic contexts.
+ * The node requested will be honored on a best effort basis. If the node
+ * has no CPUs associated with it then the work is distributed among all
+ * available CPUs.
  */
-async_cookie_t async_schedule_domain(async_func_t func, void *data,
-				     struct async_domain *domain)
+async_cookie_t async_schedule_node(async_func_t func, void *data, int node)
 {
-	return __async_schedule(func, data, domain);
+	return async_schedule_node_domain(func, data, node, &async_dfl_domain);
 }
-EXPORT_SYMBOL_GPL(async_schedule_domain);
+EXPORT_SYMBOL_GPL(async_schedule_node);
 
 /**
  * async_synchronize_full - synchronize all asynchronous function calls
-- 
cgit v1.2.3


From 51bee5abeab2058ea5813c5615d6197a23dbf041 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 28 Jan 2019 17:00:13 +0100
Subject: cgroup/pids: turn cgroup_subsys->free() into cgroup_subsys->release()
 to fix the accounting

The only user of cgroup_subsys->free() callback is pids_cgrp_subsys which
needs pids_free() to uncharge the pid.

However, ->free() is called from __put_task_struct()->cgroup_free() and this
is too late. Even the trivial program which does

	for (;;) {
		int pid = fork();
		assert(pid >= 0);
		if (pid)
			wait(NULL);
		else
			exit(0);
	}

can run out of limits because release_task()->call_rcu(delayed_put_task_struct)
implies an RCU gp after the task/pid goes away and before the final put().

Test-case:

	mkdir -p /tmp/CG
	mount -t cgroup2 none /tmp/CG
	echo '+pids' > /tmp/CG/cgroup.subtree_control

	mkdir /tmp/CG/PID
	echo 2 > /tmp/CG/PID/pids.max

	perl -e 'while ($p = fork) { wait; } $p // die "fork failed: $!\n"' &
	echo $! > /tmp/CG/PID/cgroup.procs

Without this patch the forking process fails soon after migration.

Rename cgroup_subsys->free() to cgroup_subsys->release() and move the callsite
into the new helper, cgroup_release(), called by release_task() which actually
frees the pid(s).

Reported-by: Herton R. Krzesinski <hkrzesin@redhat.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup-defs.h |  2 +-
 include/linux/cgroup.h      |  2 ++
 kernel/cgroup/cgroup.c      | 15 +++++++++------
 kernel/cgroup/pids.c        |  4 ++--
 kernel/exit.c               |  1 +
 5 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8fcbae1b8db0..120d1d40704b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -602,7 +602,7 @@ struct cgroup_subsys {
 	void (*cancel_fork)(struct task_struct *task);
 	void (*fork)(struct task_struct *task);
 	void (*exit)(struct task_struct *task);
-	void (*free)(struct task_struct *task);
+	void (*release)(struct task_struct *task);
 	void (*bind)(struct cgroup_subsys_state *root_css);
 
 	bool early_init:1;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9968332cceed..81f58b4a5418 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -121,6 +121,7 @@ extern int cgroup_can_fork(struct task_struct *p);
 extern void cgroup_cancel_fork(struct task_struct *p);
 extern void cgroup_post_fork(struct task_struct *p);
 void cgroup_exit(struct task_struct *p);
+void cgroup_release(struct task_struct *p);
 void cgroup_free(struct task_struct *p);
 
 int cgroup_init_early(void);
@@ -697,6 +698,7 @@ static inline int cgroup_can_fork(struct task_struct *p) { return 0; }
 static inline void cgroup_cancel_fork(struct task_struct *p) {}
 static inline void cgroup_post_fork(struct task_struct *p) {}
 static inline void cgroup_exit(struct task_struct *p) {}
+static inline void cgroup_release(struct task_struct *p) {}
 static inline void cgroup_free(struct task_struct *p) {}
 
 static inline int cgroup_init_early(void) { return 0; }
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f31bd61c9466..f4418371c83b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -197,7 +197,7 @@ static u64 css_serial_nr_next = 1;
  */
 static u16 have_fork_callback __read_mostly;
 static u16 have_exit_callback __read_mostly;
-static u16 have_free_callback __read_mostly;
+static u16 have_release_callback __read_mostly;
 static u16 have_canfork_callback __read_mostly;
 
 /* cgroup namespace for init task */
@@ -5313,7 +5313,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 
 	have_fork_callback |= (bool)ss->fork << ss->id;
 	have_exit_callback |= (bool)ss->exit << ss->id;
-	have_free_callback |= (bool)ss->free << ss->id;
+	have_release_callback |= (bool)ss->release << ss->id;
 	have_canfork_callback |= (bool)ss->can_fork << ss->id;
 
 	/* At system boot, before all subsystems have been
@@ -5749,16 +5749,19 @@ void cgroup_exit(struct task_struct *tsk)
 	} while_each_subsys_mask();
 }
 
-void cgroup_free(struct task_struct *task)
+void cgroup_release(struct task_struct *task)
 {
-	struct css_set *cset = task_css_set(task);
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	do_each_subsys_mask(ss, ssid, have_free_callback) {
-		ss->free(task);
+	do_each_subsys_mask(ss, ssid, have_release_callback) {
+		ss->release(task);
 	} while_each_subsys_mask();
+}
 
+void cgroup_free(struct task_struct *task)
+{
+	struct css_set *cset = task_css_set(task);
 	put_css_set(cset);
 }
 
diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c
index 9829c67ebc0a..c9960baaa14f 100644
--- a/kernel/cgroup/pids.c
+++ b/kernel/cgroup/pids.c
@@ -247,7 +247,7 @@ static void pids_cancel_fork(struct task_struct *task)
 	pids_uncharge(pids, 1);
 }
 
-static void pids_free(struct task_struct *task)
+static void pids_release(struct task_struct *task)
 {
 	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
 
@@ -342,7 +342,7 @@ struct cgroup_subsys pids_cgrp_subsys = {
 	.cancel_attach 	= pids_cancel_attach,
 	.can_fork	= pids_can_fork,
 	.cancel_fork	= pids_cancel_fork,
-	.free		= pids_free,
+	.release	= pids_release,
 	.legacy_cftypes	= pids_files,
 	.dfl_cftypes	= pids_files,
 	.threaded	= true,
diff --git a/kernel/exit.c b/kernel/exit.c
index 3fb7be001964..c2b8443f30b4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -219,6 +219,7 @@ repeat:
 	}
 
 	write_unlock_irq(&tasklist_lock);
+	cgroup_release(p);
 	release_thread(p);
 	call_rcu(&p->rcu, delayed_put_task_struct);
 
-- 
cgit v1.2.3


From ddf922c31fedd19c5b89a269c35e5c8b68c64327 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 19:21:01 -0800
Subject: IB/hfi1, IB/rdmavt: Allow for extending of QP's s_ack_queue

The OPFN protocol uses the COMPARE_SWAP request to exchange data
between the requester and the responder and therefore needs to
be stored in the QP's s_ack_queue when the request is received
on the responder side. However, because the user does not know
anything about the OPFN protocol, this extra entry in the
queue cannot be advertised to the user. This patch adds an extra
entry in a QP's s_ack_queue.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/rc.c    | 13 +++++++------
 drivers/infiniband/hw/hfi1/verbs.c |  1 +
 include/rdma/rdma_vt.h             | 10 +++++++++-
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 940e9236c328..8970fc7ffd4b 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -122,7 +122,8 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		 * response has been sent instead of only being
 		 * constructed.
 		 */
-		if (++qp->s_tail_ack_queue > HFI1_MAX_RDMA_ATOMIC)
+		if (++qp->s_tail_ack_queue >
+		    rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 			qp->s_tail_ack_queue = 0;
 		/* FALLTHROUGH */
 	case OP(SEND_ONLY):
@@ -1818,7 +1819,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		if (i)
 			prev = i - 1;
 		else
-			prev = HFI1_MAX_RDMA_ATOMIC;
+			prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
 		if (prev == qp->r_head_ack_queue) {
 			e = NULL;
 			break;
@@ -1942,7 +1943,7 @@ static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
 	unsigned next;
 
 	next = n + 1;
-	if (next > HFI1_MAX_RDMA_ATOMIC)
+	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 		next = 0;
 	qp->s_tail_ack_queue = next;
 	qp->s_ack_state = OP(ACKNOWLEDGE);
@@ -2298,8 +2299,8 @@ send_last:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto nack_inv;
 		next = qp->r_head_ack_queue + 1;
-		/* s_ack_queue is size HFI1_MAX_RDMA_ATOMIC+1 so use > not >= */
-		if (next > HFI1_MAX_RDMA_ATOMIC)
+		/* s_ack_queue is size rvt_size_atomic()+1 so use > not >= */
+		if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
 		if (unlikely(next == qp->s_tail_ack_queue)) {
@@ -2373,7 +2374,7 @@ send_last:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_inv;
 		next = qp->r_head_ack_queue + 1;
-		if (next > HFI1_MAX_RDMA_ATOMIC)
+		if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
 		if (unlikely(next == qp->s_tail_ack_queue)) {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index c980345cf1e1..ec3899c0874c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1735,6 +1735,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
 	dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
 	dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
+	dd->verbs_dev.rdi.dparms.extra_rdma_atomic = 1;
 
 	/* post send table */
 	dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index acb3bc96dfa7..168e40be183c 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -182,6 +182,7 @@ struct rvt_driver_params {
 	u32 max_mad_size;
 	u8 qos_shift;
 	u8 max_rdma_atomic;
+	u8 extra_rdma_atomic;
 	u8 reserved_operations;
 };
 
@@ -519,7 +520,14 @@ static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi)
  */
 static inline unsigned int rvt_max_atomic(struct rvt_dev_info *rdi)
 {
-	return rdi->dparms.max_rdma_atomic + 1;
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic + 1;
+}
+
+static inline unsigned int rvt_size_atomic(struct rvt_dev_info *rdi)
+{
+	return rdi->dparms.max_rdma_atomic +
+		rdi->dparms.extra_rdma_atomic;
 }
 
 /*
-- 
cgit v1.2.3


From 90462a5bd30c6ed91c6758e59537d047d7878ff9 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Thu, 31 Jan 2019 11:52:11 -0500
Subject: audit: remove unused actx param from audit_rule_match

The audit_rule_match() struct audit_context *actx parameter is not used
by any in-tree consumers (selinux, apparmour, integrity, smack).

The audit context is an internal audit structure that should only be
accessed by audit accessor functions.

It was part of commit 03d37d25e0f9 ("LSM/Audit: Introduce generic
Audit LSM hooks") but appears to have never been used.

Remove it.

Please see the github issue
https://github.com/linux-audit/audit-kernel/issues/107

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
[PM: fixed the referenced commit title]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/lsm_hooks.h           |  4 +---
 include/linux/security.h            |  5 ++---
 kernel/auditfilter.c                |  2 +-
 kernel/auditsc.c                    | 21 ++++++++++++---------
 security/apparmor/audit.c           |  3 +--
 security/apparmor/include/audit.h   |  3 +--
 security/integrity/ima/ima.h        |  3 +--
 security/integrity/ima/ima_policy.c |  6 ++----
 security/security.c                 |  6 ++----
 security/selinux/include/audit.h    |  4 +---
 security/selinux/ss/services.c      |  3 +--
 security/smack/smack_lsm.c          |  4 +---
 12 files changed, 26 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9a0bdf91e646..d0b5c7a05832 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1344,7 +1344,6 @@
  *	@field contains the field which relates to current LSM.
  *	@op contains the operator that will be used for matching.
  *	@rule points to the audit rule that will be checked against.
- *	@actx points to the audit context associated with the check.
  *	Return 1 if secid matches the rule, 0 if it does not, -ERRNO on failure.
  *
  * @audit_rule_free:
@@ -1764,8 +1763,7 @@ union security_list_options {
 	int (*audit_rule_init)(u32 field, u32 op, char *rulestr,
 				void **lsmrule);
 	int (*audit_rule_known)(struct audit_krule *krule);
-	int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule,
-				struct audit_context *actx);
+	int (*audit_rule_match)(u32 secid, u32 field, u32 op, void *lsmrule);
 	void (*audit_rule_free)(void *lsmrule);
 #endif /* CONFIG_AUDIT */
 
diff --git a/include/linux/security.h b/include/linux/security.h
index dbfb5a66babb..e8febec62ffb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1674,8 +1674,7 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
 #ifdef CONFIG_SECURITY
 int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule);
 int security_audit_rule_known(struct audit_krule *krule);
-int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule,
-			      struct audit_context *actx);
+int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
 void security_audit_rule_free(void *lsmrule);
 
 #else
@@ -1692,7 +1691,7 @@ static inline int security_audit_rule_known(struct audit_krule *krule)
 }
 
 static inline int security_audit_rule_match(u32 secid, u32 field, u32 op,
-				   void *lsmrule, struct audit_context *actx)
+					    void *lsmrule)
 {
 	return 0;
 }
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 26a80a9d43a9..add360b46b38 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1355,7 +1355,7 @@ int audit_filter(int msgtype, unsigned int listtype)
 				if (f->lsm_rule) {
 					security_task_getsecid(current, &sid);
 					result = security_audit_rule_match(sid,
-							f->type, f->op, f->lsm_rule, NULL);
+						   f->type, f->op, f->lsm_rule);
 				}
 				break;
 			case AUDIT_EXE:
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 68da71001096..7d37cb1e4aef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -631,9 +631,8 @@ static int audit_filter_rules(struct task_struct *tsk,
 					need_sid = 0;
 				}
 				result = security_audit_rule_match(sid, f->type,
-				                                  f->op,
-				                                  f->lsm_rule,
-				                                  ctx);
+								   f->op,
+								   f->lsm_rule);
 			}
 			break;
 		case AUDIT_OBJ_USER:
@@ -647,13 +646,17 @@ static int audit_filter_rules(struct task_struct *tsk,
 				/* Find files that match */
 				if (name) {
 					result = security_audit_rule_match(
-					           name->osid, f->type, f->op,
-					           f->lsm_rule, ctx);
+								name->osid,
+								f->type,
+								f->op,
+								f->lsm_rule);
 				} else if (ctx) {
 					list_for_each_entry(n, &ctx->names_list, list) {
-						if (security_audit_rule_match(n->osid, f->type,
-									      f->op, f->lsm_rule,
-									      ctx)) {
+						if (security_audit_rule_match(
+								n->osid,
+								f->type,
+								f->op,
+								f->lsm_rule)) {
 							++result;
 							break;
 						}
@@ -664,7 +667,7 @@ static int audit_filter_rules(struct task_struct *tsk,
 					break;
 				if (security_audit_rule_match(ctx->ipc.osid,
 							      f->type, f->op,
-							      f->lsm_rule, ctx))
+							      f->lsm_rule))
 					++result;
 			}
 			break;
diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index eeaddfe0c0fb..5a8b9cded4f2 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -225,8 +225,7 @@ int aa_audit_rule_known(struct audit_krule *rule)
 	return 0;
 }
 
-int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
-			struct audit_context *actx)
+int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule)
 {
 	struct aa_audit_rule *rule = vrule;
 	struct aa_label *label;
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index b8c8b1066b0a..ee559bc2acb8 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -192,7 +192,6 @@ static inline int complain_error(int error)
 void aa_audit_rule_free(void *vrule);
 int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule);
 int aa_audit_rule_known(struct audit_krule *rule);
-int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
-			struct audit_context *actx);
+int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule);
 
 #endif /* __AA_AUDIT_H */
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index cc12f3449a72..026163f37ba1 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -307,8 +307,7 @@ static inline int security_filter_rule_init(u32 field, u32 op, char *rulestr,
 }
 
 static inline int security_filter_rule_match(u32 secid, u32 field, u32 op,
-					     void *lsmrule,
-					     struct audit_context *actx)
+					     void *lsmrule)
 {
 	return -EINVAL;
 }
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 8bc8a1c8cb3f..26fa9d9723f6 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -340,8 +340,7 @@ retry:
 			rc = security_filter_rule_match(osid,
 							rule->lsm[i].type,
 							Audit_equal,
-							rule->lsm[i].rule,
-							NULL);
+							rule->lsm[i].rule);
 			break;
 		case LSM_SUBJ_USER:
 		case LSM_SUBJ_ROLE:
@@ -349,8 +348,7 @@ retry:
 			rc = security_filter_rule_match(secid,
 							rule->lsm[i].type,
 							Audit_equal,
-							rule->lsm[i].rule,
-							NULL);
+							rule->lsm[i].rule);
 		default:
 			break;
 		}
diff --git a/security/security.c b/security/security.c
index f1b8d2587639..5f954b179a8e 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1783,11 +1783,9 @@ void security_audit_rule_free(void *lsmrule)
 	call_void_hook(audit_rule_free, lsmrule);
 }
 
-int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule,
-			      struct audit_context *actx)
+int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule)
 {
-	return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule,
-				actx);
+	return call_int_hook(audit_rule_match, 0, secid, field, op, lsmrule);
 }
 #endif /* CONFIG_AUDIT */
 
diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h
index 1bdf973433cc..e51a81ffb8c9 100644
--- a/security/selinux/include/audit.h
+++ b/security/selinux/include/audit.h
@@ -46,13 +46,11 @@ void selinux_audit_rule_free(void *rule);
  *	@field: the field this rule refers to
  *	@op: the operater the rule uses
  *	@rule: pointer to the audit rule to check against
- *	@actx: the audit context (can be NULL) associated with the check
  *
  *	Returns 1 if the context id matches the rule, 0 if it does not, and
  *	-errno on failure.
  */
-int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule,
-			     struct audit_context *actx);
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *rule);
 
 /**
  *	selinux_audit_rule_known - check to see if rule contains selinux fields.
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index dd44126c8d14..0b7e33f6aa59 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -3376,8 +3376,7 @@ int selinux_audit_rule_known(struct audit_krule *rule)
 	return 0;
 }
 
-int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule,
-			     struct audit_context *actx)
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule)
 {
 	struct selinux_state *state = &selinux_state;
 	struct context *ctxt;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 430d4f35e55c..403513df42fc 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -4393,13 +4393,11 @@ static int smack_audit_rule_known(struct audit_krule *krule)
  * @field: audit rule flags given from user-space
  * @op: required testing operator
  * @vrule: smack internal rule presentation
- * @actx: audit context associated with the check
  *
  * The core Audit hook. It's used to take the decision of
  * whether to audit or not to audit a given object.
  */
-static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule,
-				  struct audit_context *actx)
+static int smack_audit_rule_match(u32 secid, u32 field, u32 op, void *vrule)
 {
 	struct smack_known *skp;
 	char *rule = vrule;
-- 
cgit v1.2.3


From a0ce2f0aa6ad97c3d4927bf2ca54bcebdf062d55 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 23 Jan 2019 15:19:17 +0100
Subject: splice: don't merge into linked buffers

Before this patch, it was possible for two pipes to affect each other after
data had been transferred between them with tee():

============
$ cat tee_test.c

int main(void) {
  int pipe_a[2];
  if (pipe(pipe_a)) err(1, "pipe");
  int pipe_b[2];
  if (pipe(pipe_b)) err(1, "pipe");
  if (write(pipe_a[1], "abcd", 4) != 4) err(1, "write");
  if (tee(pipe_a[0], pipe_b[1], 2, 0) != 2) err(1, "tee");
  if (write(pipe_b[1], "xx", 2) != 2) err(1, "write");

  char buf[5];
  if (read(pipe_a[0], buf, 4) != 4) err(1, "read");
  buf[4] = 0;
  printf("got back: '%s'\n", buf);
}
$ gcc -o tee_test tee_test.c
$ ./tee_test
got back: 'abxx'
$
============

As suggested by Al Viro, fix it by creating a separate type for
non-mergeable pipe buffers, then changing the types of buffers in
splice_pipe_to_pipe() and link_pipe().

Cc: <stable@vger.kernel.org>
Fixes: 7c77f0b3f920 ("splice: implement pipe to pipe splicing")
Fixes: 70524490ee2e ("[PATCH] splice: add support for sys_tee()")
Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c                 | 14 ++++++++++++++
 fs/splice.c               |  4 ++++
 include/linux/pipe_fs_i.h |  1 +
 3 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..c51750ed4011 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -234,6 +234,14 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 	.get = generic_pipe_buf_get,
 };
 
+static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
+	.can_merge = 0,
+	.confirm = generic_pipe_buf_confirm,
+	.release = anon_pipe_buf_release,
+	.steal = anon_pipe_buf_steal,
+	.get = generic_pipe_buf_get,
+};
+
 static const struct pipe_buf_operations packet_pipe_buf_ops = {
 	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
@@ -242,6 +250,12 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = {
 	.get = generic_pipe_buf_get,
 };
 
+void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
+{
+	if (buf->ops == &anon_pipe_buf_ops)
+		buf->ops = &anon_pipe_buf_nomerge_ops;
+}
+
 static ssize_t
 pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
diff --git a/fs/splice.c b/fs/splice.c
index de2ede048473..90c29675d573 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1597,6 +1597,8 @@ retry:
 			 */
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
+			pipe_buf_mark_unmergeable(obuf);
+
 			obuf->len = len;
 			opipe->nrbufs++;
 			ibuf->offset += obuf->len;
@@ -1671,6 +1673,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 */
 		obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
+		pipe_buf_mark_unmergeable(obuf);
+
 		if (obuf->len > len)
 			obuf->len = len;
 
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b7c9ad..3ecd7ea212ae 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -182,6 +182,7 @@ void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
+void pipe_buf_mark_unmergeable(struct pipe_buffer *buf);
 
 extern const struct pipe_buf_operations nosteal_pipe_buf_ops;
 
-- 
cgit v1.2.3


From 01e7187b41191376cee8bea8de9f907b001e87b4 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Wed, 23 Jan 2019 15:19:18 +0100
Subject: pipe: stop using ->can_merge

Al Viro pointed out that since there is only one pipe buffer type to which
new data can be appended, it isn't necessary to have a ->can_merge field in
struct pipe_buf_operations, we can just check for a magic type.

Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c                 | 20 ++++++++++++++++----
 fs/splice.c               |  4 ----
 include/linux/pipe_fs_i.h |  7 -------
 kernel/relay.c            |  1 -
 kernel/trace/trace.c      |  2 --
 net/smc/smc_rx.c          |  1 -
 6 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/fs/pipe.c b/fs/pipe.c
index c51750ed4011..0ff09b490ddf 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -226,8 +226,8 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 EXPORT_SYMBOL(generic_pipe_buf_release);
 
+/* New data written to a pipe may be appended to a buffer with this type. */
 static const struct pipe_buf_operations anon_pipe_buf_ops = {
-	.can_merge = 1,
 	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
@@ -235,7 +235,6 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
 };
 
 static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
@@ -243,19 +242,32 @@ static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
 };
 
 static const struct pipe_buf_operations packet_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = anon_pipe_buf_release,
 	.steal = anon_pipe_buf_steal,
 	.get = generic_pipe_buf_get,
 };
 
+/**
+ * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable
+ * @buf:	the buffer to mark
+ *
+ * Description:
+ *	This function ensures that no future writes will be merged into the
+ *	given &struct pipe_buffer. This is necessary when multiple pipe buffers
+ *	share the same backing page.
+ */
 void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
 {
 	if (buf->ops == &anon_pipe_buf_ops)
 		buf->ops = &anon_pipe_buf_nomerge_ops;
 }
 
+static bool pipe_buf_can_merge(struct pipe_buffer *buf)
+{
+	return buf->ops == &anon_pipe_buf_ops;
+}
+
 static ssize_t
 pipe_read(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -393,7 +405,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		struct pipe_buffer *buf = pipe->bufs + lastbuf;
 		int offset = buf->offset + buf->len;
 
-		if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) {
+		if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) {
 			ret = pipe_buf_confirm(pipe, buf);
 			if (ret)
 				goto out;
diff --git a/fs/splice.c b/fs/splice.c
index 90c29675d573..fc71e9733f7a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -138,7 +138,6 @@ error:
 }
 
 const struct pipe_buf_operations page_cache_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = page_cache_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = page_cache_pipe_buf_steal,
@@ -156,7 +155,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
 }
 
 static const struct pipe_buf_operations user_page_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = page_cache_pipe_buf_release,
 	.steal = user_page_pipe_buf_steal,
@@ -326,7 +324,6 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 EXPORT_SYMBOL(generic_file_splice_read);
 
 const struct pipe_buf_operations default_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = generic_pipe_buf_release,
 	.steal = generic_pipe_buf_steal,
@@ -341,7 +338,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
 
 /* Pipe buffer operations for a socket and similar. */
 const struct pipe_buf_operations nosteal_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = generic_pipe_buf_release,
 	.steal = generic_pipe_buf_nosteal,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 3ecd7ea212ae..787d224ff43e 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -73,13 +73,6 @@ struct pipe_inode_info {
  * in fs/pipe.c for the pipe and generic variants of these hooks.
  */
 struct pipe_buf_operations {
-	/*
-	 * This is set to 1, if the generic pipe read/write may coalesce
-	 * data into an existing buffer. If this is set to 0, a new pipe
-	 * page segment is always used for new data.
-	 */
-	int can_merge;
-
 	/*
 	 * ->confirm() verifies that the data in the pipe buffer is there
 	 * and that the contents are good. If the pages in the pipe belong
diff --git a/kernel/relay.c b/kernel/relay.c
index 04f248644e06..db3e419c25a6 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1175,7 +1175,6 @@ static void relay_pipe_buf_release(struct pipe_inode_info *pipe,
 }
 
 static const struct pipe_buf_operations relay_pipe_buf_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = relay_pipe_buf_release,
 	.steal = generic_pipe_buf_steal,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c521b7347482..f380139e972c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5823,7 +5823,6 @@ static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
 }
 
 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
-	.can_merge		= 0,
 	.confirm		= generic_pipe_buf_confirm,
 	.release		= generic_pipe_buf_release,
 	.steal			= generic_pipe_buf_steal,
@@ -6843,7 +6842,6 @@ static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 
 /* Pipe buffer operations for a buffer. */
 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
-	.can_merge		= 0,
 	.confirm		= generic_pipe_buf_confirm,
 	.release		= buffer_pipe_buf_release,
 	.steal			= generic_pipe_buf_steal,
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index bbcf0fe4ae10..413a6abf227e 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -136,7 +136,6 @@ static int smc_rx_pipe_buf_nosteal(struct pipe_inode_info *pipe,
 }
 
 static const struct pipe_buf_operations smc_pipe_ops = {
-	.can_merge = 0,
 	.confirm = generic_pipe_buf_confirm,
 	.release = smc_rx_pipe_buf_release,
 	.steal = smc_rx_pipe_buf_nosteal,
-- 
cgit v1.2.3


From 4bc59c2f7e306775f3d2e1bbafaa854dd1e09335 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 12 Dec 2018 18:33:56 +0100
Subject: mfd / platform: cros_ec: Use devm_mfd_add_devices

Use devm_mfd_add_devices() for adding cros-ec core MFD child devices. This
reduces the need of remove callback from platform/chrome for removing the
MFD child devices.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c                 | 14 +++-----------
 drivers/platform/chrome/cros_ec_i2c.c | 10 ----------
 drivers/platform/chrome/cros_ec_lpc.c |  4 ----
 drivers/platform/chrome/cros_ec_spi.c | 11 -----------
 include/linux/mfd/cros_ec.h           | 10 ----------
 5 files changed, 3 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index fe6f83766144..6acfe036d522 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -129,8 +129,8 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 		}
 	}
 
-	err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell, 1,
-			      NULL, ec_dev->irq, NULL);
+	err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell,
+				   1, NULL, ec_dev->irq, NULL);
 	if (err) {
 		dev_err(dev,
 			"Failed to register Embedded Controller subdevice %d\n",
@@ -147,7 +147,7 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 		 * - the EC is responsive at init time (it is not true for a
 		 *   sensor hub.
 		 */
-		err = mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO,
+		err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO,
 				      &ec_pd_cell, 1, NULL, ec_dev->irq, NULL);
 		if (err) {
 			dev_err(dev,
@@ -181,14 +181,6 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 }
 EXPORT_SYMBOL(cros_ec_register);
 
-int cros_ec_remove(struct cros_ec_device *ec_dev)
-{
-	mfd_remove_devices(ec_dev->dev);
-
-	return 0;
-}
-EXPORT_SYMBOL(cros_ec_remove);
-
 #ifdef CONFIG_PM_SLEEP
 int cros_ec_suspend(struct cros_ec_device *ec_dev)
 {
diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c
index ef9b4763356f..9a009eaa4ada 100644
--- a/drivers/platform/chrome/cros_ec_i2c.c
+++ b/drivers/platform/chrome/cros_ec_i2c.c
@@ -317,15 +317,6 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int cros_ec_i2c_remove(struct i2c_client *client)
-{
-	struct cros_ec_device *ec_dev = i2c_get_clientdata(client);
-
-	cros_ec_remove(ec_dev);
-
-	return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int cros_ec_i2c_suspend(struct device *dev)
 {
@@ -376,7 +367,6 @@ static struct i2c_driver cros_ec_driver = {
 		.pm	= &cros_ec_i2c_pm_ops,
 	},
 	.probe		= cros_ec_i2c_probe,
-	.remove		= cros_ec_i2c_remove,
 	.id_table	= cros_ec_i2c_id,
 };
 
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index e1b75775cd4a..14684a56e40f 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -327,7 +327,6 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 
 static int cros_ec_lpc_remove(struct platform_device *pdev)
 {
-	struct cros_ec_device *ec_dev;
 	struct acpi_device *adev;
 
 	adev = ACPI_COMPANION(&pdev->dev);
@@ -335,9 +334,6 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
 		acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
 					   cros_ec_lpc_acpi_notify);
 
-	ec_dev = platform_get_drvdata(pdev);
-	cros_ec_remove(ec_dev);
-
 	return 0;
 }
 
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 2060d1483043..6cfbc2835beb 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -685,16 +685,6 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
-static int cros_ec_spi_remove(struct spi_device *spi)
-{
-	struct cros_ec_device *ec_dev;
-
-	ec_dev = spi_get_drvdata(spi);
-	cros_ec_remove(ec_dev);
-
-	return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int cros_ec_spi_suspend(struct device *dev)
 {
@@ -733,7 +723,6 @@ static struct spi_driver cros_ec_driver_spi = {
 		.pm	= &cros_ec_spi_pm_ops,
 	},
 	.probe		= cros_ec_spi_probe,
-	.remove		= cros_ec_spi_remove,
 	.id_table	= cros_ec_spi_id,
 };
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index de8b588c8776..977ebaa78e99 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -281,16 +281,6 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
 int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
 			    struct cros_ec_command *msg);
 
-/**
- * cros_ec_remove() - Remove a ChromeOS EC.
- * @ec_dev: Device to register.
- *
- * Call this to deregister a ChromeOS EC, then clean up any private data.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_remove(struct cros_ec_device *ec_dev);
-
 /**
  * cros_ec_register() - Register a new ChromeOS EC, using the provided info.
  * @ec_dev: Device to register.
-- 
cgit v1.2.3


From ecf8a6cd949ef236ce435ae488ceb6b3354e677e Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 12 Dec 2018 18:33:57 +0100
Subject: mfd / platform: cros_ec: Move lightbar attributes to its own driver

The entire way how cros sysfs attibutes are created is broken.
cros_ec_lightbar should be its own driver and its attributes should be
associated with a lightbar driver not the mfd driver. In order to retain
the path, the lightbar attributes are attached to the cros_class.

The patch also adds the sysfs documentation.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../sysfs-class-chromeos-driver-cros-ec-lightbar   | 74 +++++++++++++++++
 drivers/mfd/cros_ec_dev.c                          | 24 +++---
 drivers/mfd/cros_ec_dev.h                          |  6 --
 drivers/platform/chrome/Kconfig                    | 11 +++
 drivers/platform/chrome/Makefile                   |  3 +-
 drivers/platform/chrome/cros_ec_lightbar.c         | 95 +++++++++++++++++-----
 include/linux/mfd/cros_ec.h                        |  1 -
 7 files changed, 173 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar
new file mode 100644
index 000000000000..57a037791403
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-lightbar
@@ -0,0 +1,74 @@
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/brightness
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		Writing to this file adjusts the overall brightness of
+		the lightbar, separate from any color intensity. The
+		valid range is 0 (off) to 255 (maximum brightness).
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/interval_msec
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		The lightbar is controlled by an embedded controller (EC),
+		which also manages the keyboard, battery charging, fans,
+		and other system hardware. To prevent unprivileged users
+		from interfering with the other EC functions, the rate at
+		which the lightbar control files can be read or written is
+		limited.
+
+		Reading this file will return the number of milliseconds
+		that must elapse between accessing any of the lightbar
+		functions through this interface. Going faster will simply
+		block until the necessary interval has lapsed. The interval
+		applies uniformly to all accesses of any kind by any user.
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/led_rgb
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		This allows you to control each LED segment. If the
+		lightbar is already running one of the automatic
+		sequences, you probably won’t see anything change because
+		your color setting will be almost immediately replaced.
+		To get useful results, you should stop the lightbar
+		sequence first.
+
+		The values written to this file are sets of four integers,
+		indicating LED, RED, GREEN, BLUE. The LED number is 0 to 3
+		to select a single segment, or 4 to set all four segments
+		to the same value at once. The RED, GREEN, and BLUE
+		numbers should be in the range 0 (off) to 255 (maximum).
+		You can update more than one segment at a time by writing
+		more than one set of four integers.
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/program
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		This allows you to upload and run custom lightbar sequences.
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/sequence
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		The Pixel lightbar has a number of built-in sequences
+		that it displays under various conditions, such as at
+		power on, shut down, or while running. Reading from this
+		file displays the current sequence that the lightbar is
+		displaying. Writing to this file allows you to change the
+		sequence.
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/userspace_control
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		This allows you to take the control of the lightbar. This
+		prevents the kernel from going through its normal
+		sequences.
+
+What:		/sys/class/chromeos/<ec-device-name>/lightbar/version
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		Show the information about the lightbar version.
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 2d0fee488c5a..b227718e0ec2 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -36,7 +36,6 @@ static int ec_major;
 
 static const struct attribute_group *cros_ec_groups[] = {
 	&cros_ec_attr_group,
-	&cros_ec_lightbar_attr_group,
 	&cros_ec_vbc_attr_group,
 	NULL,
 };
@@ -395,6 +394,10 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = {
 	{ .name = "cros-usbpd-charger" }
 };
 
+static const struct mfd_cell cros_ec_platform_cells[] = {
+	{ .name = "cros-ec-lightbar" },
+};
+
 static int ec_device_probe(struct platform_device *pdev)
 {
 	int retval = -ENOMEM;
@@ -470,9 +473,6 @@ static int ec_device_probe(struct platform_device *pdev)
 				retval);
 	}
 
-	/* Take control of the lightbar from the EC. */
-	lb_manual_suspend_ctrl(ec, 1);
-
 	/* We can now add the sysfs class, we know which parameter to show */
 	retval = cdev_device_add(&ec->cdev, &ec->class_dev);
 	if (retval) {
@@ -480,6 +480,15 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	retval = mfd_add_devices(ec->dev, PLATFORM_DEVID_AUTO,
+				 cros_ec_platform_cells,
+				 ARRAY_SIZE(cros_ec_platform_cells),
+				 NULL, 0, NULL);
+	if (retval)
+		dev_warn(ec->dev,
+			 "failed to add cros-ec platform devices: %d\n",
+			 retval);
+
 	if (cros_ec_debugfs_init(ec))
 		dev_warn(dev, "failed to create debugfs directory\n");
 
@@ -494,9 +503,6 @@ static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
-	/* Let the EC take over the lightbar again. */
-	lb_manual_suspend_ctrl(ec, 0);
-
 	cros_ec_debugfs_remove(ec);
 
 	mfd_remove_devices(ec->dev);
@@ -525,8 +531,6 @@ static __maybe_unused int ec_device_suspend(struct device *dev)
 
 	cros_ec_debugfs_suspend(ec);
 
-	lb_suspend(ec);
-
 	return 0;
 }
 
@@ -536,8 +540,6 @@ static __maybe_unused int ec_device_resume(struct device *dev)
 
 	cros_ec_debugfs_resume(ec);
 
-	lb_resume(ec);
-
 	return 0;
 }
 
diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h
index 978d836a0248..ec750433455a 100644
--- a/drivers/mfd/cros_ec_dev.h
+++ b/drivers/mfd/cros_ec_dev.h
@@ -44,10 +44,4 @@ struct cros_ec_readmem {
 #define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
 #define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
 
-/* Lightbar utilities */
-extern bool ec_has_lightbar(struct cros_ec_dev *ec);
-extern int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable);
-extern int lb_suspend(struct cros_ec_dev *ec);
-extern int lb_resume(struct cros_ec_dev *ec);
-
 #endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 16b1615958aa..6c05752a3334 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -111,4 +111,15 @@ config CROS_KBD_LED_BACKLIGHT
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_kbd_led_backlight.
 
+config CROS_EC_LIGHTBAR
+	tristate "Chromebook Pixel's lightbar support"
+	depends on MFD_CROS_EC_CHARDEV
+	default MFD_CROS_EC_CHARDEV
+	help
+	  This option exposes the Chromebook Pixel's lightbar to
+	  userspace.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_lightbar.
+
 endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index cd591bf872bb..3c29a4b405d5 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -3,7 +3,7 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 obj-$(CONFIG_CHROMEOS_TBMC)		+= chromeos_tbmc.o
-cros_ec_ctl-objs			:= cros_ec_sysfs.o cros_ec_lightbar.o \
+cros_ec_ctl-objs			:= cros_ec_sysfs.o \
 					   cros_ec_vbc.o cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CTL)		+= cros_ec_ctl.o
 obj-$(CONFIG_CROS_EC_I2C)		+= cros_ec_i2c.o
@@ -13,3 +13,4 @@ cros_ec_lpcs-$(CONFIG_CROS_EC_LPC_MEC)	+= cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
+obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 68193bb53383..80eed6317570 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -33,6 +33,8 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+#define DRV_NAME "cros-ec-lightbar"
+
 /* Rate-limit the lightbar interface to prevent DoS. */
 static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
 
@@ -373,7 +375,7 @@ error:
 	return ret;
 }
 
-int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
+static int lb_manual_suspend_ctrl(struct cros_ec_dev *ec, uint8_t enable)
 {
 	struct ec_params_lightbar *param;
 	struct cros_ec_command *msg;
@@ -408,25 +410,6 @@ error:
 
 	return ret;
 }
-EXPORT_SYMBOL(lb_manual_suspend_ctrl);
-
-int lb_suspend(struct cros_ec_dev *ec)
-{
-	if (userspace_control || ec != ec_with_lightbar)
-		return 0;
-
-	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_SUSPEND);
-}
-EXPORT_SYMBOL(lb_suspend);
-
-int lb_resume(struct cros_ec_dev *ec)
-{
-	if (userspace_control || ec != ec_with_lightbar)
-		return 0;
-
-	return lb_send_empty_cmd(ec, LIGHTBAR_CMD_RESUME);
-}
-EXPORT_SYMBOL(lb_resume);
 
 static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
@@ -584,7 +567,7 @@ static struct attribute *__lb_cmds_attrs[] = {
 	NULL,
 };
 
-bool ec_has_lightbar(struct cros_ec_dev *ec)
+static bool ec_has_lightbar(struct cros_ec_dev *ec)
 {
 	return !!get_lightbar_version(ec, NULL, NULL);
 }
@@ -616,4 +599,72 @@ struct attribute_group cros_ec_lightbar_attr_group = {
 	.attrs = __lb_cmds_attrs,
 	.is_visible = cros_ec_lightbar_attrs_are_visible,
 };
-EXPORT_SYMBOL(cros_ec_lightbar_attr_group);
+
+static int cros_ec_lightbar_probe(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+	struct device *dev = &pd->dev;
+	int ret;
+
+	/* Take control of the lightbar from the EC. */
+	lb_manual_suspend_ctrl(ec_dev, 1);
+
+	ret = sysfs_create_group(&ec_dev->class_dev.kobj,
+				 &cros_ec_lightbar_attr_group);
+	if (ret < 0)
+		dev_err(dev, "failed to create %s attributes. err=%d\n",
+			cros_ec_lightbar_attr_group.name, ret);
+
+	return ret;
+}
+
+static int cros_ec_lightbar_remove(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+
+	sysfs_remove_group(&ec_dev->class_dev.kobj,
+			   &cros_ec_lightbar_attr_group);
+
+	/* Let the EC take over the lightbar again. */
+	lb_manual_suspend_ctrl(ec_dev, 0);
+
+	return 0;
+}
+
+static int __maybe_unused cros_ec_lightbar_resume(struct device *dev)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(dev);
+
+	if (userspace_control || ec_dev != ec_with_lightbar)
+		return 0;
+
+	return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_RESUME);
+}
+
+static int __maybe_unused cros_ec_lightbar_suspend(struct device *dev)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(dev);
+
+	if (userspace_control || ec_dev != ec_with_lightbar)
+		return 0;
+
+	return lb_send_empty_cmd(ec_dev, LIGHTBAR_CMD_SUSPEND);
+}
+
+static SIMPLE_DEV_PM_OPS(cros_ec_lightbar_pm_ops,
+			 cros_ec_lightbar_suspend, cros_ec_lightbar_resume);
+
+static struct platform_driver cros_ec_lightbar_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.pm = &cros_ec_lightbar_pm_ops,
+	},
+	.probe = cros_ec_lightbar_probe,
+	.remove = cros_ec_lightbar_remove,
+};
+
+module_platform_driver(cros_ec_lightbar_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Expose the Chromebook Pixel's lightbar to userspace");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 977ebaa78e99..1e9b569564ea 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
-extern struct attribute_group cros_ec_lightbar_attr_group;
 extern struct attribute_group cros_ec_vbc_attr_group;
 
 /* debugfs stuff */
-- 
cgit v1.2.3


From acb9900f9e8074858738f48bee9a705138961258 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 12 Dec 2018 18:33:58 +0100
Subject: mfd / platform: cros_ec: Move vbc attributes to its own driver

The entire way how cros sysfs attibutes are created is broken.
cros_ec_vbc should be its own driver and its attributes should be
associated with a vbc driver not the mfd driver. In order to retain
the path, the vbc attributes are attached to the cros_class.

The patch also adds the sysfs documentation.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../sysfs-class-chromeos-driver-cros-ec-vbc        |  6 +++
 drivers/mfd/cros_ec_dev.c                          |  2 +-
 drivers/platform/chrome/Kconfig                    | 11 ++++++
 drivers/platform/chrome/Makefile                   |  3 +-
 drivers/platform/chrome/cros_ec_vbc.c              | 43 +++++++++++++++++++++-
 include/linux/mfd/cros_ec.h                        |  1 -
 6 files changed, 62 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc
new file mode 100644
index 000000000000..38c5aaaaa89a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos-driver-cros-ec-vbc
@@ -0,0 +1,6 @@
+What:		/sys/class/chromeos/<ec-device-name>/vbc/vboot_context
+Date:		October 2015
+KernelVersion:	4.4
+Description:
+		Read/write the verified boot context data included on a
+		small nvram space on some EC implementations.
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index b227718e0ec2..40c98808fa1c 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -36,7 +36,6 @@ static int ec_major;
 
 static const struct attribute_group *cros_ec_groups[] = {
 	&cros_ec_attr_group,
-	&cros_ec_vbc_attr_group,
 	NULL,
 };
 
@@ -396,6 +395,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = {
 
 static const struct mfd_cell cros_ec_platform_cells[] = {
 	{ .name = "cros-ec-lightbar" },
+	{ .name = "cros-ec-vbc" },
 };
 
 static int ec_device_probe(struct platform_device *pdev)
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 6c05752a3334..1e9dc9626e84 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -122,4 +122,15 @@ config CROS_EC_LIGHTBAR
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_lightbar.
 
+config CROS_EC_VBC
+	tristate "ChromeOS EC vboot context support"
+	depends on MFD_CROS_EC_CHARDEV && OF
+	default MFD_CROS_EC_CHARDEV
+	help
+	  This option exposes the ChromeOS EC vboot context nvram to
+	  userspace.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_vbc.
+
 endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 3c29a4b405d5..4081b7179df7 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 obj-$(CONFIG_CHROMEOS_TBMC)		+= chromeos_tbmc.o
 cros_ec_ctl-objs			:= cros_ec_sysfs.o \
-					   cros_ec_vbc.o cros_ec_debugfs.o
+					   cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_CTL)		+= cros_ec_ctl.o
 obj-$(CONFIG_CROS_EC_I2C)		+= cros_ec_i2c.o
 obj-$(CONFIG_CROS_EC_SPI)		+= cros_ec_spi.o
@@ -14,3 +14,4 @@ obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
 obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
+obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
index 5356f26bc022..da3bbf05e86f 100644
--- a/drivers/platform/chrome/cros_ec_vbc.c
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -22,8 +22,11 @@
 #include <linux/platform_device.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 
+#define DRV_NAME "cros-ec-vbc"
+
 static ssize_t vboot_context_read(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *att, char *buf,
 				  loff_t pos, size_t count)
@@ -132,4 +135,42 @@ struct attribute_group cros_ec_vbc_attr_group = {
 	.bin_attrs = cros_ec_vbc_bin_attrs,
 	.is_bin_visible = cros_ec_vbc_is_visible,
 };
-EXPORT_SYMBOL(cros_ec_vbc_attr_group);
+
+static int cros_ec_vbc_probe(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+	struct device *dev = &pd->dev;
+	int ret;
+
+	ret = sysfs_create_group(&ec_dev->class_dev.kobj,
+				 &cros_ec_vbc_attr_group);
+	if (ret < 0)
+		dev_err(dev, "failed to create %s attributes. err=%d\n",
+			cros_ec_vbc_attr_group.name, ret);
+
+	return ret;
+}
+
+static int cros_ec_vbc_remove(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+
+	sysfs_remove_group(&ec_dev->class_dev.kobj,
+			   &cros_ec_vbc_attr_group);
+
+	return 0;
+}
+
+static struct platform_driver cros_ec_vbc_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = cros_ec_vbc_probe,
+	.remove = cros_ec_vbc_remove,
+};
+
+module_platform_driver(cros_ec_vbc_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Expose the vboot context nvram to userspace");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 1e9b569564ea..fdc3152cca1d 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -327,7 +327,6 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
-extern struct attribute_group cros_ec_vbc_attr_group;
 
 /* debugfs stuff */
 int cros_ec_debugfs_init(struct cros_ec_dev *ec);
-- 
cgit v1.2.3


From 6fce0a2cf5a050e8a3326556d7d293e69be303be Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 12 Dec 2018 18:33:59 +0100
Subject: mfd / platform: cros_ec: Move debugfs attributes to its own driver

The entire way how cros debugfs attibutes are created is broken.
cros_ec_debugfs should be its own driver and its attributes should be
associated with a debugfs driver not the mfd driver.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec_dev.c                 | 41 +-------------------
 drivers/platform/chrome/Kconfig           | 11 ++++++
 drivers/platform/chrome/Makefile          |  4 +-
 drivers/platform/chrome/cros_ec_debugfs.c | 62 +++++++++++++++++++++----------
 include/linux/mfd/cros_ec.h               |  6 ---
 5 files changed, 56 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 40c98808fa1c..9955937b821d 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -394,6 +394,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = {
 };
 
 static const struct mfd_cell cros_ec_platform_cells[] = {
+	{ .name = "cros-ec-debugfs" },
 	{ .name = "cros-ec-lightbar" },
 	{ .name = "cros-ec-vbc" },
 };
@@ -489,9 +490,6 @@ static int ec_device_probe(struct platform_device *pdev)
 			 "failed to add cros-ec platform devices: %d\n",
 			 retval);
 
-	if (cros_ec_debugfs_init(ec))
-		dev_warn(dev, "failed to create debugfs directory\n");
-
 	return 0;
 
 failed:
@@ -503,62 +501,25 @@ static int ec_device_remove(struct platform_device *pdev)
 {
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
-	cros_ec_debugfs_remove(ec);
-
 	mfd_remove_devices(ec->dev);
 	cdev_del(&ec->cdev);
 	device_unregister(&ec->class_dev);
 	return 0;
 }
 
-static void ec_device_shutdown(struct platform_device *pdev)
-{
-	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
-
-	/* Be sure to clear up debugfs delayed works */
-	cros_ec_debugfs_remove(ec);
-}
-
 static const struct platform_device_id cros_ec_id[] = {
 	{ DRV_NAME, 0 },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(platform, cros_ec_id);
 
-static __maybe_unused int ec_device_suspend(struct device *dev)
-{
-	struct cros_ec_dev *ec = dev_get_drvdata(dev);
-
-	cros_ec_debugfs_suspend(ec);
-
-	return 0;
-}
-
-static __maybe_unused int ec_device_resume(struct device *dev)
-{
-	struct cros_ec_dev *ec = dev_get_drvdata(dev);
-
-	cros_ec_debugfs_resume(ec);
-
-	return 0;
-}
-
-static const struct dev_pm_ops cros_ec_dev_pm_ops = {
-#ifdef CONFIG_PM_SLEEP
-	.suspend = ec_device_suspend,
-	.resume = ec_device_resume,
-#endif
-};
-
 static struct platform_driver cros_ec_dev_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.pm = &cros_ec_dev_pm_ops,
 	},
 	.id_table = cros_ec_id,
 	.probe = ec_device_probe,
 	.remove = ec_device_remove,
-	.shutdown = ec_device_shutdown,
 };
 
 static int __init cros_ec_dev_init(void)
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 1e9dc9626e84..6cbf5b69d156 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -133,4 +133,15 @@ config CROS_EC_VBC
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_vbc.
 
+config CROS_EC_DEBUGFS
+	tristate "Export ChromeOS EC internals in DebugFS"
+	depends on MFD_CROS_EC_CHARDEV && DEBUG_FS
+	default MFD_CROS_EC_CHARDEV
+	help
+	  This option exposes the ChromeOS EC device internals to
+	  userspace.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_debugfs.
+
 endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 4081b7179df7..12a5c4d18c17 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -3,8 +3,7 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 obj-$(CONFIG_CHROMEOS_TBMC)		+= chromeos_tbmc.o
-cros_ec_ctl-objs			:= cros_ec_sysfs.o \
-					   cros_ec_debugfs.o
+cros_ec_ctl-objs			:= cros_ec_sysfs.o
 obj-$(CONFIG_CROS_EC_CTL)		+= cros_ec_ctl.o
 obj-$(CONFIG_CROS_EC_I2C)		+= cros_ec_i2c.o
 obj-$(CONFIG_CROS_EC_SPI)		+= cros_ec_spi.o
@@ -15,3 +14,4 @@ obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
 obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
 obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
+obj-$(CONFIG_CROS_EC_DEBUGFS)		+= cros_ec_debugfs.o
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index c62ee8e610a0..6cacac53dfce 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -23,12 +23,16 @@
 #include <linux/fs.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/platform_device.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 
+#define DRV_NAME "cros-ec-debugfs"
+
 #define LOG_SHIFT		14
 #define LOG_SIZE		(1 << LOG_SHIFT)
 #define LOG_POLL_SEC		10
@@ -423,8 +427,9 @@ static int cros_ec_create_pdinfo(struct cros_ec_debugfs *debug_info)
 	return 0;
 }
 
-int cros_ec_debugfs_init(struct cros_ec_dev *ec)
+static int cros_ec_debugfs_probe(struct platform_device *pd)
 {
+	struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
 	struct cros_ec_platform *ec_platform = dev_get_platdata(ec->dev);
 	const char *name = ec_platform->ec_name;
 	struct cros_ec_debugfs *debug_info;
@@ -453,40 +458,57 @@ int cros_ec_debugfs_init(struct cros_ec_dev *ec)
 
 	ec->debug_info = debug_info;
 
+	dev_set_drvdata(&pd->dev, ec);
+
 	return 0;
 
 remove_debugfs:
 	debugfs_remove_recursive(debug_info->dir);
 	return ret;
 }
-EXPORT_SYMBOL(cros_ec_debugfs_init);
 
-void cros_ec_debugfs_remove(struct cros_ec_dev *ec)
+static int cros_ec_debugfs_remove(struct platform_device *pd)
 {
-	if (!ec->debug_info)
-		return;
+	struct cros_ec_dev *ec = dev_get_drvdata(pd->dev.parent);
 
 	debugfs_remove_recursive(ec->debug_info->dir);
 	cros_ec_cleanup_console_log(ec->debug_info);
+
+	return 0;
 }
-EXPORT_SYMBOL(cros_ec_debugfs_remove);
 
-void cros_ec_debugfs_suspend(struct cros_ec_dev *ec)
+static int __maybe_unused cros_ec_debugfs_suspend(struct device *dev)
 {
-	/*
-	 * cros_ec_debugfs_init() failures are non-fatal; it's also possible
-	 * that we initted things but decided that console log wasn't supported.
-	 * We'll use the same set of checks that cros_ec_debugfs_remove() +
-	 * cros_ec_cleanup_console_log() end up using to handle those cases.
-	 */
-	if (ec->debug_info && ec->debug_info->log_buffer.buf)
-		cancel_delayed_work_sync(&ec->debug_info->log_poll_work);
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	cancel_delayed_work_sync(&ec->debug_info->log_poll_work);
+
+	return 0;
 }
-EXPORT_SYMBOL(cros_ec_debugfs_suspend);
 
-void cros_ec_debugfs_resume(struct cros_ec_dev *ec)
+static int __maybe_unused cros_ec_debugfs_resume(struct device *dev)
 {
-	if (ec->debug_info && ec->debug_info->log_buffer.buf)
-		schedule_delayed_work(&ec->debug_info->log_poll_work, 0);
+	struct cros_ec_dev *ec = dev_get_drvdata(dev);
+
+	schedule_delayed_work(&ec->debug_info->log_poll_work, 0);
+
+	return 0;
 }
-EXPORT_SYMBOL(cros_ec_debugfs_resume);
+
+static SIMPLE_DEV_PM_OPS(cros_ec_debugfs_pm_ops,
+			 cros_ec_debugfs_suspend, cros_ec_debugfs_resume);
+
+static struct platform_driver cros_ec_debugfs_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.pm = &cros_ec_debugfs_pm_ops,
+	},
+	.probe = cros_ec_debugfs_probe,
+	.remove = cros_ec_debugfs_remove,
+};
+
+module_platform_driver(cros_ec_debugfs_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Debug logs for ChromeOS EC");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index fdc3152cca1d..e50860d190db 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -328,10 +328,4 @@ u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 /* sysfs stuff */
 extern struct attribute_group cros_ec_attr_group;
 
-/* debugfs stuff */
-int cros_ec_debugfs_init(struct cros_ec_dev *ec);
-void cros_ec_debugfs_remove(struct cros_ec_dev *ec);
-void cros_ec_debugfs_suspend(struct cros_ec_dev *ec);
-void cros_ec_debugfs_resume(struct cros_ec_dev *ec);
-
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From 6fd7f2bbd4422e7635bc771cd1ec440378158cb1 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 12 Dec 2018 18:34:00 +0100
Subject: mfd / platform: cros_ec: Move device sysfs attributes to its own
 driver

The entire way how cros debugfs attibutes are created is broken.
cros_ec_sysfs should be its own driver and its attributes should be
associated with the sysfs driver not the mfd driver.

The patch also adds the sysfs documentation.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/ABI/testing/sysfs-class-chromeos | 32 +++++++++++++++++++++++
 drivers/mfd/Kconfig                            |  1 -
 drivers/mfd/cros_ec_dev.c                      |  7 +----
 drivers/platform/chrome/Kconfig                | 14 +++++++---
 drivers/platform/chrome/Makefile               |  3 +--
 drivers/platform/chrome/cros_ec_lightbar.c     |  2 +-
 drivers/platform/chrome/cros_ec_sysfs.c        | 36 +++++++++++++++++++++++++-
 include/linux/mfd/cros_ec.h                    |  3 ---
 8 files changed, 81 insertions(+), 17 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-chromeos

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-chromeos b/Documentation/ABI/testing/sysfs-class-chromeos
new file mode 100644
index 000000000000..5819699d66ec
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-chromeos
@@ -0,0 +1,32 @@
+What:		/sys/class/chromeos/<ec-device-name>/flashinfo
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		Show the EC flash information.
+
+What:		/sys/class/chromeos/<ec-device-name>/kb_wake_angle
+Date:		March 2018
+KernelVersion:	4.17
+Description:
+		Control the keyboard wake lid angle. Values are between
+		0 and 360. This file will also show the keyboard wake lid
+		angle by querying the hardware.
+
+What:		/sys/class/chromeos/<ec-device-name>/reboot
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		Tell the EC to reboot in various ways. Options are:
+		"cancel": Cancel a pending reboot.
+		"ro": Jump to RO without rebooting.
+		"rw": Jump to RW without rebooting.
+		"cold": Cold reboot.
+		"disable-jump": Disable jump until next reboot.
+		"hibernate": Hibernate the EC.
+		"at-shutdown": Reboot after an AP shutdown.
+
+What:		/sys/class/chromeos/<ec-device-name>/version
+Date:		August 2015
+KernelVersion:	4.2
+Description:
+		Show the information about the EC software and hardware.
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f461460a2aeb..2acc105e43cc 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -215,7 +215,6 @@ config MFD_CROS_EC
 config MFD_CROS_EC_CHARDEV
         tristate "Chrome OS Embedded Controller userspace device interface"
         depends on MFD_CROS_EC
-        select CROS_EC_CTL
         ---help---
           This driver adds support to talk with the ChromeOS EC from userspace.
 
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 9955937b821d..b9ec2a798dbb 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -34,15 +34,9 @@
 #define CROS_MAX_DEV 128
 static int ec_major;
 
-static const struct attribute_group *cros_ec_groups[] = {
-	&cros_ec_attr_group,
-	NULL,
-};
-
 static struct class cros_class = {
 	.owner          = THIS_MODULE,
 	.name           = "chromeos",
-	.dev_groups     = cros_ec_groups,
 };
 
 /* Basic communication */
@@ -396,6 +390,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = {
 static const struct mfd_cell cros_ec_platform_cells[] = {
 	{ .name = "cros-ec-debugfs" },
 	{ .name = "cros-ec-lightbar" },
+	{ .name = "cros-ec-sysfs" },
 	{ .name = "cros-ec-vbc" },
 };
 
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 6cbf5b69d156..5e2fde5ff63d 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -49,9 +49,6 @@ config CHROMEOS_TBMC
 	  To compile this driver as a module, choose M here: the
 	  module will be called chromeos_tbmc.
 
-config CROS_EC_CTL
-        tristate
-
 config CROS_EC_I2C
 	tristate "ChromeOS Embedded Controller (I2C)"
 	depends on MFD_CROS_EC && I2C
@@ -144,4 +141,15 @@ config CROS_EC_DEBUGFS
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_debugfs.
 
+config CROS_EC_SYSFS
+	tristate "ChromeOS EC control and information through sysfs"
+	depends on MFD_CROS_EC_CHARDEV && SYSFS
+	default MFD_CROS_EC_CHARDEV
+	help
+	  This option exposes some sysfs attributes to control and get
+	  information from ChromeOS EC.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_sysfs.
+
 endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 12a5c4d18c17..fdbee501931b 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -3,8 +3,6 @@
 obj-$(CONFIG_CHROMEOS_LAPTOP)		+= chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)		+= chromeos_pstore.o
 obj-$(CONFIG_CHROMEOS_TBMC)		+= chromeos_tbmc.o
-cros_ec_ctl-objs			:= cros_ec_sysfs.o
-obj-$(CONFIG_CROS_EC_CTL)		+= cros_ec_ctl.o
 obj-$(CONFIG_CROS_EC_I2C)		+= cros_ec_i2c.o
 obj-$(CONFIG_CROS_EC_SPI)		+= cros_ec_spi.o
 cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_reg.o
@@ -15,3 +13,4 @@ obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
 obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
 obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_DEBUGFS)		+= cros_ec_debugfs.o
+obj-$(CONFIG_CROS_EC_SYSFS)		+= cros_ec_sysfs.o
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 80eed6317570..c22318ba93aa 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -594,7 +594,7 @@ static umode_t cros_ec_lightbar_attrs_are_visible(struct kobject *kobj,
 	return 0;
 }
 
-struct attribute_group cros_ec_lightbar_attr_group = {
+static struct attribute_group cros_ec_lightbar_attr_group = {
 	.name = "lightbar",
 	.attrs = __lb_cmds_attrs,
 	.is_visible = cros_ec_lightbar_attrs_are_visible,
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index f34a50121064..0ff5aa30c070 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -34,6 +34,8 @@
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
+#define DRV_NAME "cros-ec-sysfs"
+
 /* Accessor functions */
 
 static ssize_t reboot_show(struct device *dev,
@@ -353,7 +355,39 @@ struct attribute_group cros_ec_attr_group = {
 	.attrs = __ec_attrs,
 	.is_visible = cros_ec_ctrl_visible,
 };
-EXPORT_SYMBOL(cros_ec_attr_group);
+
+static int cros_ec_sysfs_probe(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+	struct device *dev = &pd->dev;
+	int ret;
+
+	ret = sysfs_create_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group);
+	if (ret < 0)
+		dev_err(dev, "failed to create attributes. err=%d\n", ret);
+
+	return ret;
+}
+
+static int cros_ec_sysfs_remove(struct platform_device *pd)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pd->dev.parent);
+
+	sysfs_remove_group(&ec_dev->class_dev.kobj, &cros_ec_attr_group);
+
+	return 0;
+}
+
+static struct platform_driver cros_ec_sysfs_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = cros_ec_sysfs_probe,
+	.remove = cros_ec_sysfs_remove,
+};
+
+module_platform_driver(cros_ec_sysfs_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ChromeOS EC control driver");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index e50860d190db..8f2a8918bfa3 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -325,7 +325,4 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
  */
 u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
-/* sysfs stuff */
-extern struct attribute_group cros_ec_attr_group;
-
 #endif /* __LINUX_MFD_CROS_EC_H */
-- 
cgit v1.2.3


From efb5a790dfc33b36bc64dd5a41ffc3ae5a709770 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 13 Jan 2019 13:36:46 -0500
Subject: mfd: wm831x-core: Drop unused module infrastructure from non-modular
 code

The Kconfig currently controlling compilation of this code is:

drivers/mfd/Kconfig:config MFD_WM831X
drivers/mfd/Kconfig:    bool

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

We delete the MODULE_LICENSE tag etc. since all that information
is already contained at the top of the file in the comments.

Previous demodularizaion work has made wm831x_device_exit() no longer
used, so it is also removed from the 831x core code.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm831x-core.c       | 15 ++-------------
 include/linux/mfd/wm831x/core.h |  1 -
 2 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index e70d35ef5c6d..25fbbaf39cb9 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -13,7 +13,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
 #include <linux/bcd.h>
 #include <linux/delay.h>
 #include <linux/mfd/core.h>
@@ -1892,14 +1893,6 @@ err:
 	return ret;
 }
 
-void wm831x_device_exit(struct wm831x *wm831x)
-{
-	wm831x_otp_exit(wm831x);
-	mfd_remove_devices(wm831x->dev);
-	free_irq(wm831x_irq(wm831x, WM831X_IRQ_AUXADC_DATA), wm831x);
-	wm831x_irq_exit(wm831x);
-}
-
 int wm831x_device_suspend(struct wm831x *wm831x)
 {
 	int reg, mask;
@@ -1944,7 +1937,3 @@ void wm831x_device_shutdown(struct wm831x *wm831x)
 	}
 }
 EXPORT_SYMBOL_GPL(wm831x_device_shutdown);
-
-MODULE_DESCRIPTION("Core support for the WM831X AudioPlus PMIC");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mark Brown");
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index b49fa67612f1..6fcb8eb00282 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -418,7 +418,6 @@ int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
 		     int count, u16 *buf);
 
 int wm831x_device_init(struct wm831x *wm831x, int irq);
-void wm831x_device_exit(struct wm831x *wm831x);
 int wm831x_device_suspend(struct wm831x *wm831x);
 void wm831x_device_shutdown(struct wm831x *wm831x);
 int wm831x_irq_init(struct wm831x *wm831x, int irq);
-- 
cgit v1.2.3


From 0db88688e1bb0180d6348742bdba8927cd0e5670 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Sun, 13 Jan 2019 13:36:48 -0500
Subject: mfd: wm8350-core: Drop unused module infrastructure from non-modular
 code

The Kconfig currently controlling compilation of this code is:

drivers/mfd/Kconfig:config MFD_WM8350
drivers/mfd/Kconfig:    bool

...meaning that it currently is not being built as a module by anyone.

Lets remove the couple traces of modular infrastructure use, so that
when reading the driver there is no doubt it is builtin-only.

We delete the MODULE_LICENSE tag etc. since all that information
is already contained at the top of the file in the comments.

We replace module.h with init.h and export.h ; the latter since the
file does export some symbols.

Previous demodularizaion work has made wm8350_device_exit() no longer
used, so it is also removed from the 8350 core code.

Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Acked-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm8350-core.c       | 30 ++----------------------------
 include/linux/mfd/wm8350/core.h |  1 -
 2 files changed, 2 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 8a07c5634aee..9e1070f26b11 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -13,7 +13,8 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
 #include <linux/device.h>
@@ -442,30 +443,3 @@ err:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(wm8350_device_init);
-
-void wm8350_device_exit(struct wm8350 *wm8350)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(wm8350->pmic.led); i++)
-		platform_device_unregister(wm8350->pmic.led[i].pdev);
-
-	for (i = 0; i < ARRAY_SIZE(wm8350->pmic.pdev); i++)
-		platform_device_unregister(wm8350->pmic.pdev[i]);
-
-	platform_device_unregister(wm8350->wdt.pdev);
-	platform_device_unregister(wm8350->rtc.pdev);
-	platform_device_unregister(wm8350->power.pdev);
-	platform_device_unregister(wm8350->hwmon.pdev);
-	platform_device_unregister(wm8350->gpio.pdev);
-	platform_device_unregister(wm8350->codec.pdev);
-
-	if (wm8350->irq_base)
-		free_irq(wm8350->irq_base + WM8350_IRQ_AUXADC_DATARDY, wm8350);
-
-	wm8350_irq_exit(wm8350);
-}
-EXPORT_SYMBOL_GPL(wm8350_device_exit);
-
-MODULE_DESCRIPTION("WM8350 AudioPlus PMIC core driver");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 509481d9cf19..202d9bde2c7c 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -643,7 +643,6 @@ struct wm8350_platform_data {
  */
 int wm8350_device_init(struct wm8350 *wm8350, int irq,
 		       struct wm8350_platform_data *pdata);
-void wm8350_device_exit(struct wm8350 *wm8350);
 
 /*
  * WM8350 device IO
-- 
cgit v1.2.3


From d57f72875eed3f26afaca176c0f425f209bc99d7 Mon Sep 17 00:00:00 2001
From: Christian Hohnstaedt <Christian.Hohnstaedt@wago.com>
Date: Mon, 14 Jan 2019 09:16:34 +0100
Subject: mfd: tps65218.c: Add input voltage options

These options apply to all regulators in this chip.

ti,strict-supply-voltage-supervision:
  Set STRICT flag in CONFIG1
ti,under-voltage-limit-microvolt:
  Select 2.75, 2.95, 3.25 or 3.35 V UVLO in CONFIG1
ti,under-voltage-hyst-microvolt:
  Select 200mV or 400mV UVLOHYS in CONFIG2

Signed-off-by: Christian Hohnstaedt <Christian.Hohnstaedt@wago.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Reviewed-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65218.c       | 89 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps65218.h |  4 ++
 2 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 8bcdecf494d0..a62ea4cb8be7 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -211,6 +211,83 @@ static const struct of_device_id of_tps65218_match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, of_tps65218_match_table);
 
+static int tps65218_voltage_set_strict(struct tps65218 *tps)
+{
+	u32 strict;
+
+	if (of_property_read_u32(tps->dev->of_node,
+				 "ti,strict-supply-voltage-supervision",
+				 &strict))
+		return 0;
+
+	if (strict != 0 && strict != 1) {
+		dev_err(tps->dev,
+			"Invalid ti,strict-supply-voltage-supervision value\n");
+		return -EINVAL;
+	}
+
+	tps65218_update_bits(tps, TPS65218_REG_CONFIG1,
+			     TPS65218_CONFIG1_STRICT,
+			     strict ? TPS65218_CONFIG1_STRICT : 0,
+			     TPS65218_PROTECT_L1);
+	return 0;
+}
+
+static int tps65218_voltage_set_uv_hyst(struct tps65218 *tps)
+{
+	u32 hyst;
+
+	if (of_property_read_u32(tps->dev->of_node,
+				 "ti,under-voltage-hyst-microvolt", &hyst))
+		return 0;
+
+	if (hyst != 400000 && hyst != 200000) {
+		dev_err(tps->dev,
+			"Invalid ti,under-voltage-hyst-microvolt value\n");
+		return -EINVAL;
+	}
+
+	tps65218_update_bits(tps, TPS65218_REG_CONFIG2,
+			     TPS65218_CONFIG2_UVLOHYS,
+			     hyst == 400000 ? TPS65218_CONFIG2_UVLOHYS : 0,
+			     TPS65218_PROTECT_L1);
+	return 0;
+}
+
+static int tps65218_voltage_set_uvlo(struct tps65218 *tps)
+{
+	u32 uvlo;
+	int uvloval;
+
+	if (of_property_read_u32(tps->dev->of_node,
+				 "ti,under-voltage-limit-microvolt", &uvlo))
+		return 0;
+
+	switch (uvlo) {
+	case 2750000:
+		uvloval = TPS65218_CONFIG1_UVLO_2750000;
+		break;
+	case 2950000:
+		uvloval = TPS65218_CONFIG1_UVLO_2950000;
+		break;
+	case 3250000:
+		uvloval = TPS65218_CONFIG1_UVLO_3250000;
+		break;
+	case 3350000:
+		uvloval = TPS65218_CONFIG1_UVLO_3350000;
+		break;
+	default:
+		dev_err(tps->dev,
+			"Invalid ti,under-voltage-limit-microvolt value\n");
+		return -EINVAL;
+	}
+
+	tps65218_update_bits(tps, TPS65218_REG_CONFIG1,
+			     TPS65218_CONFIG1_UVLO_MASK, uvloval,
+			     TPS65218_PROTECT_L1);
+	return 0;
+}
+
 static int tps65218_probe(struct i2c_client *client,
 				const struct i2c_device_id *ids)
 {
@@ -249,6 +326,18 @@ static int tps65218_probe(struct i2c_client *client,
 
 	tps->rev = chipid & TPS65218_CHIPID_REV_MASK;
 
+	ret = tps65218_voltage_set_strict(tps);
+	if (ret)
+		return ret;
+
+	ret = tps65218_voltage_set_uvlo(tps);
+	if (ret)
+		return ret;
+
+	ret = tps65218_voltage_set_uv_hyst(tps);
+	if (ret)
+		return ret;
+
 	ret = mfd_add_devices(tps->dev, PLATFORM_DEVID_AUTO, tps65218_cells,
 			      ARRAY_SIZE(tps65218_cells), NULL, 0,
 			      regmap_irq_get_domain(tps->irq_data));
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index c204d9a79436..3cbe103495ab 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -137,6 +137,10 @@
 #define TPS65218_CONFIG1_PGDLY_MASK	0x18
 #define TPS65218_CONFIG1_STRICT		BIT(2)
 #define TPS65218_CONFIG1_UVLO_MASK	0x3
+#define TPS65218_CONFIG1_UVLO_2750000	0x0
+#define TPS65218_CONFIG1_UVLO_2950000	0x1
+#define TPS65218_CONFIG1_UVLO_3250000	0x2
+#define TPS65218_CONFIG1_UVLO_3350000	0x3
 
 #define TPS65218_CONFIG2_DC12_RST	BIT(7)
 #define TPS65218_CONFIG2_UVLOHYS	BIT(6)
-- 
cgit v1.2.3


From cfced786969c2a3e1bca45d7055a00311d93ae6c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jan 2019 18:20:05 +0100
Subject: dma-mapping: remove the default map_resource implementation

Instead provide a proper implementation in the direct mapping code, and
also wire it up for arm and powerpc, leaving an error return for all the
IOMMU or virtual mapping instances for which we'd have to wire up an
actual implementation

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/mm/dma-mapping.c         |  2 ++
 arch/powerpc/kernel/dma-swiotlb.c |  1 +
 arch/powerpc/kernel/dma.c         |  1 +
 include/linux/dma-mapping.h       | 12 +++++++-----
 kernel/dma/direct.c               | 14 ++++++++++++++
 5 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index f1e2922e447c..3c8534904209 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -188,6 +188,7 @@ const struct dma_map_ops arm_dma_ops = {
 	.unmap_page		= arm_dma_unmap_page,
 	.map_sg			= arm_dma_map_sg,
 	.unmap_sg		= arm_dma_unmap_sg,
+	.map_resource		= dma_direct_map_resource,
 	.sync_single_for_cpu	= arm_dma_sync_single_for_cpu,
 	.sync_single_for_device	= arm_dma_sync_single_for_device,
 	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
@@ -211,6 +212,7 @@ const struct dma_map_ops arm_coherent_dma_ops = {
 	.get_sgtable		= arm_dma_get_sgtable,
 	.map_page		= arm_coherent_dma_map_page,
 	.map_sg			= arm_dma_map_sg,
+	.map_resource		= dma_direct_map_resource,
 	.dma_supported		= arm_dma_supported,
 };
 EXPORT_SYMBOL(arm_coherent_dma_ops);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 7d5fc9751622..fbb2506a414e 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -55,6 +55,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
 	.dma_supported = swiotlb_dma_supported,
 	.map_page = dma_direct_map_page,
 	.unmap_page = dma_direct_unmap_page,
+	.map_resource = dma_direct_map_resource,
 	.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
 	.sync_single_for_device = dma_direct_sync_single_for_device,
 	.sync_sg_for_cpu = dma_direct_sync_sg_for_cpu,
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index b1903ebb2e9c..258b9e8ebb99 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -273,6 +273,7 @@ const struct dma_map_ops dma_nommu_ops = {
 	.dma_supported			= dma_nommu_dma_supported,
 	.map_page			= dma_nommu_map_page,
 	.unmap_page			= dma_nommu_unmap_page,
+	.map_resource			= dma_direct_map_resource,
 	.get_required_mask		= dma_nommu_get_required_mask,
 #ifdef CONFIG_NOT_COHERENT_CACHE
 	.sync_single_for_cpu 		= dma_nommu_sync_single,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f6ded992c183..9842085e6774 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -208,6 +208,8 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 		unsigned long attrs);
 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		enum dma_data_direction dir, unsigned long attrs);
+dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs);
 
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
     defined(CONFIG_SWIOTLB)
@@ -346,19 +348,19 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
 					  unsigned long attrs)
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
-	dma_addr_t addr;
+	dma_addr_t addr = DMA_MAPPING_ERROR;
 
 	BUG_ON(!valid_dma_direction(dir));
 
 	/* Don't allow RAM to be mapped */
 	BUG_ON(pfn_valid(PHYS_PFN(phys_addr)));
 
-	addr = phys_addr;
-	if (ops && ops->map_resource)
+	if (dma_is_direct(ops))
+		addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+	else if (ops->map_resource)
 		addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
 
 	debug_dma_map_resource(dev, phys_addr, size, dir, addr);
-
 	return addr;
 }
 
@@ -369,7 +371,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr,
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 
 	BUG_ON(!valid_dma_direction(dir));
-	if (ops && ops->unmap_resource)
+	if (!dma_is_direct(ops) && ops->unmap_resource)
 		ops->unmap_resource(dev, addr, size, dir, attrs);
 	debug_dma_unmap_resource(dev, addr, size, dir);
 }
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 355d16acee6d..25bd19974223 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -356,6 +356,20 @@ out_unmap:
 }
 EXPORT_SYMBOL(dma_direct_map_sg);
 
+dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	dma_addr_t dma_addr = paddr;
+
+	if (unlikely(!dma_direct_possible(dev, dma_addr, size))) {
+		report_addr(dev, dma_addr, size);
+		return DMA_MAPPING_ERROR;
+	}
+
+	return dma_addr;
+}
+EXPORT_SYMBOL(dma_direct_map_resource);
+
 /*
  * Because 32-bit DMA masks are so common we expect every architecture to be
  * able to satisfy them - either by not supporting more physical memory, or by
-- 
cgit v1.2.3


From 645386dfe6307dbb28f10a4513792a59beda0efa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 4 Jan 2019 17:17:53 +0100
Subject: dma-mapping: don't BUG when calling dma_map_resource on RAM

Use WARN_ON_ONCE to print a stack trace and return a proper error
code instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/dma-mapping.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 9842085e6774..b904d55247ab 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -353,7 +353,8 @@ static inline dma_addr_t dma_map_resource(struct device *dev,
 	BUG_ON(!valid_dma_direction(dir));
 
 	/* Don't allow RAM to be mapped */
-	BUG_ON(pfn_valid(PHYS_PFN(phys_addr)));
+	if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr))))
+		return DMA_MAPPING_ERROR;
 
 	if (dma_is_direct(ops))
 		addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
-- 
cgit v1.2.3


From e2f3cd831a280fc226118d9369bf3f77aab58c56 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 1 Feb 2019 01:49:14 +0100
Subject: driver core: Fix handling of runtime PM flags in device_link_add()

After commit ead18c23c263 ("driver core: Introduce device links
reference counting"), if there is a link between the given supplier
and the given consumer already, device_link_add() will refcount it
and return it unconditionally without updating its flags.  It is
possible, however, that the second (or any subsequent) caller of
device_link_add() for the same consumer-supplier pair will pass
DL_FLAG_PM_RUNTIME, possibly along with DL_FLAG_RPM_ACTIVE, in flags
to it and the existing link may not behave as expected then.

First, if DL_FLAG_PM_RUNTIME is not set in the existing link's flags
at all, it needs to be set like during the original initialization of
the link.

Second, if DL_FLAG_RPM_ACTIVE is passed to device_link_add() in flags
(in addition to DL_FLAG_PM_RUNTIME), the existing link should to be
updated to reflect the "active" runtime PM configuration of the
consumer-supplier pair and extra care must be taken here to avoid
possible destructive races with runtime PM of the consumer.

To that end, redefine the rpm_active field in struct device_link
as a refcount, initialize it to 1 and make rpm_resume() (for the
consumer) and device_link_add() increment it whenever they acquire
a runtime PM reference on the supplier device.  Accordingly, make
rpm_suspend() (for the consumer) and pm_runtime_clean_up_links()
decrement it and drop runtime PM references to the supplier
device in a loop until rpm_active becones 1 again.

Fixes: ead18c23c263 ("driver core: Introduce device links reference counting")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c          | 45 +++++++++++++++++++++++++++++---------------
 drivers/base/power/runtime.c | 26 +++++++++++--------------
 include/linux/device.h       |  2 +-
 3 files changed, 42 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 50610cd87e71..8611385e44b5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -165,6 +165,19 @@ void device_pm_move_to_tail(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
+static void device_link_rpm_prepare(struct device *consumer,
+				    struct device *supplier)
+{
+	pm_runtime_new_link(consumer);
+	/*
+	 * If the link is being added by the consumer driver at probe time,
+	 * balance the decrementation of the supplier's runtime PM usage counter
+	 * after consumer probe in driver_probe_device().
+	 */
+	if (consumer->links.status == DL_DEV_PROBING)
+		pm_runtime_get_noresume(supplier);
+}
+
 /**
  * device_link_add - Create a link between two devices.
  * @consumer: Consumer end of the link.
@@ -201,7 +214,6 @@ struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags)
 {
 	struct device_link *link;
-	bool rpm_put_supplier = false;
 
 	if (!consumer || !supplier ||
 	    (flags & DL_FLAG_STATELESS &&
@@ -213,7 +225,6 @@ struct device_link *device_link_add(struct device *consumer,
 			pm_runtime_put_noidle(supplier);
 			return NULL;
 		}
-		rpm_put_supplier = true;
 	}
 
 	device_links_write_lock();
@@ -249,6 +260,15 @@ struct device_link *device_link_add(struct device *consumer,
 		if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
 			link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER;
 
+		if (flags & DL_FLAG_PM_RUNTIME) {
+			if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
+				device_link_rpm_prepare(consumer, supplier);
+				link->flags |= DL_FLAG_PM_RUNTIME;
+			}
+			if (flags & DL_FLAG_RPM_ACTIVE)
+				refcount_inc(&link->rpm_active);
+		}
+
 		kref_get(&link->kref);
 		goto out;
 	}
@@ -257,20 +277,15 @@ struct device_link *device_link_add(struct device *consumer,
 	if (!link)
 		goto out;
 
+	refcount_set(&link->rpm_active, 1);
+
 	if (flags & DL_FLAG_PM_RUNTIME) {
-		if (flags & DL_FLAG_RPM_ACTIVE) {
-			link->rpm_active = true;
-			rpm_put_supplier = false;
-		}
-		pm_runtime_new_link(consumer);
-		/*
-		 * If the link is being added by the consumer driver at probe
-		 * time, balance the decrementation of the supplier's runtime PM
-		 * usage counter after consumer probe in driver_probe_device().
-		 */
-		if (consumer->links.status == DL_DEV_PROBING)
-			pm_runtime_get_noresume(supplier);
+		if (flags & DL_FLAG_RPM_ACTIVE)
+			refcount_inc(&link->rpm_active);
+
+		device_link_rpm_prepare(consumer, supplier);
 	}
+
 	get_device(supplier);
 	link->supplier = supplier;
 	INIT_LIST_HEAD(&link->s_node);
@@ -333,7 +348,7 @@ struct device_link *device_link_add(struct device *consumer,
 	device_pm_unlock();
 	device_links_write_unlock();
 
-	if (rpm_put_supplier)
+	if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link)
 		pm_runtime_put(supplier);
 
 	return link;
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 457be03b744d..8bc9a432de70 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -259,11 +259,8 @@ static int rpm_get_suppliers(struct device *dev)
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
 		int retval;
 
-		if (!(link->flags & DL_FLAG_PM_RUNTIME))
-			continue;
-
-		if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND ||
-		    link->rpm_active)
+		if (!(link->flags & DL_FLAG_PM_RUNTIME) ||
+		    READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
 			continue;
 
 		retval = pm_runtime_get_sync(link->supplier);
@@ -272,7 +269,7 @@ static int rpm_get_suppliers(struct device *dev)
 			pm_runtime_put_noidle(link->supplier);
 			return retval;
 		}
-		link->rpm_active = true;
+		refcount_inc(&link->rpm_active);
 	}
 	return 0;
 }
@@ -281,12 +278,13 @@ static void rpm_put_suppliers(struct device *dev)
 {
 	struct device_link *link;
 
-	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->rpm_active &&
-		    READ_ONCE(link->status) != DL_STATE_SUPPLIER_UNBIND) {
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node) {
+		if (READ_ONCE(link->status) == DL_STATE_SUPPLIER_UNBIND)
+			continue;
+
+		while (refcount_dec_not_one(&link->rpm_active))
 			pm_runtime_put(link->supplier);
-			link->rpm_active = false;
-		}
+	}
 }
 
 /**
@@ -1539,7 +1537,7 @@ void pm_runtime_remove(struct device *dev)
  *
  * Check links from this device to any consumers and if any of them have active
  * runtime PM references to the device, drop the usage counter of the device
- * (once per link).
+ * (as many times as needed).
  *
  * Links with the DL_FLAG_STATELESS flag set are ignored.
  *
@@ -1561,10 +1559,8 @@ void pm_runtime_clean_up_links(struct device *dev)
 		if (link->flags & DL_FLAG_STATELESS)
 			continue;
 
-		if (link->rpm_active) {
+		while (refcount_dec_not_one(&link->rpm_active))
 			pm_runtime_put_noidle(dev);
-			link->rpm_active = false;
-		}
 	}
 
 	device_links_read_unlock(idx);
diff --git a/include/linux/device.h b/include/linux/device.h
index d0e452fd0bff..5f49d2eff6ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -853,7 +853,7 @@ struct device_link {
 	struct list_head c_node;
 	enum device_link_state status;
 	u32 flags;
-	bool rpm_active;
+	refcount_t rpm_active;
 	struct kref kref;
 #ifdef CONFIG_SRCU
 	struct rcu_head rcu_head;
-- 
cgit v1.2.3


From e7dd40105aac9ba051e44ad711123bc53a5e4c71 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 1 Feb 2019 01:59:42 +0100
Subject: driver core: Add device link flag DL_FLAG_AUTOPROBE_CONSUMER

Add a new device link flag, DL_FLAG_AUTOPROBE_CONSUMER, to request the
driver core to probe for a consumer driver automatically after binding
a driver to the supplier device on a persistent managed device link.

As unbinding the supplier driver on a managed device link causes the
consumer driver to be detached from its device automatically, this
flag provides a complementary mechanism which is needed to address
some "composite device" use cases.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/device_link.rst |  9 +++++++++
 drivers/base/core.c                      | 16 +++++++++++++++-
 drivers/base/dd.c                        |  2 +-
 include/linux/device.h                   |  3 +++
 4 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index e249e074a8d2..c764755121c7 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -94,6 +94,15 @@ Similarly, when the device link is added from supplier's ``->probe`` callback,
 ``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically
 purged when the supplier fails to probe or later unbinds.
 
+If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER``
+is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core
+to probe for a driver for the consumer driver on the link automatically after
+a driver has been bound to the supplier device.
+
+Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``,
+``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with
+``DL_FLAG_STATELESS`` are invalid and cannot be used.
+
 Limitations
 ===========
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 9d49b461b1d9..abfce4f613f8 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -208,6 +208,12 @@ static void device_link_rpm_prepare(struct device *consumer,
  * the link will be maintained until one of the devices pointed to by it (either
  * the consumer or the supplier) is unregistered.
  *
+ * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and
+ * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent
+ * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can
+ * be used to request the driver core to automaticall probe for a consmer
+ * driver after successfully binding a driver to the supplier device.
+ *
  * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER
  * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and
  * will cause NULL to be returned upfront.
@@ -228,7 +234,12 @@ struct device_link *device_link_add(struct device *consumer,
 
 	if (!consumer || !supplier ||
 	    (flags & DL_FLAG_STATELESS &&
-	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER)))
+	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
+		      DL_FLAG_AUTOREMOVE_SUPPLIER |
+		      DL_FLAG_AUTOPROBE_CONSUMER)) ||
+	    (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
+	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
+		      DL_FLAG_AUTOREMOVE_SUPPLIER)))
 		return NULL;
 
 	if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) {
@@ -589,6 +600,9 @@ void device_links_driver_bound(struct device *dev)
 
 		WARN_ON(link->status != DL_STATE_DORMANT);
 		WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
+
+		if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER)
+			driver_deferred_probe_add(link->consumer);
 	}
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index aa6a9c613595..2e898cbba79b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -116,7 +116,7 @@ static void deferred_probe_work_func(struct work_struct *work)
 }
 static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func);
 
-static void driver_deferred_probe_add(struct device *dev)
+void driver_deferred_probe_add(struct device *dev)
 {
 	mutex_lock(&deferred_probe_mutex);
 	if (list_empty(&dev->p->deferred_probe)) {
diff --git a/include/linux/device.h b/include/linux/device.h
index 5f49d2eff6ed..0ab0a3a80ec3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -341,6 +341,7 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, void *data,
 				  int (*match)(struct device *dev, void *data));
 
+void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 
 /**
@@ -827,12 +828,14 @@ enum device_link_state {
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
  * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
+ * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds.
  */
 #define DL_FLAG_STATELESS		BIT(0)
 #define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
 #define DL_FLAG_PM_RUNTIME		BIT(2)
 #define DL_FLAG_RPM_ACTIVE		BIT(3)
 #define DL_FLAG_AUTOREMOVE_SUPPLIER	BIT(4)
+#define DL_FLAG_AUTOPROBE_CONSUMER	BIT(5)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From cb86880ee493ea41e95476be054a605b3e434035 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 29 Jan 2019 11:31:26 +0100
Subject: mac80211: Fix documentation strings for airtime-related variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There was a typo in the documentation for weight_multiplier in mac80211.h,
and the doc was missing entirely for airtime and airtime_weight in sta_info.h.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h  | 2 +-
 net/mac80211/sta_info.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 294a8a36012a..396b0c079c3b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2365,7 +2365,7 @@ enum ieee80211_hw_flags {
  *	them are encountered. The default should typically not be changed,
  *	unless the driver has good reasons for needing more buffers.
  *
- * @weight_multipler: Driver specific airtime weight multiplier used while
+ * @weight_multiplier: Driver specific airtime weight multiplier used while
  *	refilling deficit of each TXQ.
  */
 struct ieee80211_hw {
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 90e03ebccd3f..05647d835894 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -473,6 +473,9 @@ struct ieee80211_sta_rx_stats {
  * @last_seq_ctrl: last received seq/frag number from this STA (per TID
  *	plus one for non-QoS frames)
  * @tid_seq: per-TID sequence numbers for sending to this STA
+ * @airtime: per-AC struct airtime_info describing airtime statistics for this
+ *	station
+ * @airtime_weight: station weight for airtime fairness calculation purposes
  * @ampdu_mlme: A-MPDU state machine state
  * @mesh: mesh STA information
  * @debugfs_dir: debug filesystem directory dentry
-- 
cgit v1.2.3


From 5ac4a12df58121f0cf874b518406ec2a8e2d4ac4 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@redhat.com>
Date: Tue, 29 Jan 2019 19:25:53 +0100
Subject: cfg80211: fix typo

Fix spelling mistake in cfg80211.h: "lenght" -> "length".
The typo is also in the special comment block which
translates to documentation.

Signed-off-by: Matteo Croce <mcroce@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7033c90850b0..5862f1efb52c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5604,7 +5604,7 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
  * @dev: network device
  * @macaddr: the MAC address of the new candidate
  * @ie: information elements advertised by the peer candidate
- * @ie_len: lenght of the information elements buffer
+ * @ie_len: length of the information elements buffer
  * @gfp: allocation flags
  *
  * This function notifies cfg80211 that the mesh peer candidate has been
-- 
cgit v1.2.3


From 23323289b1543b445d1f10aeab1bcfd84f2ca50a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 25 Jan 2019 10:08:28 +0100
Subject: netlink: reduce NLA_POLICY_NESTED{,_ARRAY} arguments

In typical cases, there's no need to pass both the maxattr
and the policy array pointer, as the maxattr should just be
ARRAY_SIZE(policy) - 1. Therefore, to be less error prone,
just remove the maxattr argument from the default macros
and deduce the size accordingly.

Leave the original macros with a leading underscore to use
here and in case somebody needs to pass a policy pointer
where the policy isn't declared in the same place and thus
ARRAY_SIZE() cannot be used.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/netlink.h  |  8 ++++++--
 net/wireless/nl80211.c | 15 +++++----------
 2 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 4c1e99303b5a..23f27b0b3cef 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -306,10 +306,14 @@ struct nla_policy {
 #define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
 #define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
 
-#define NLA_POLICY_NESTED(maxattr, policy) \
+#define _NLA_POLICY_NESTED(maxattr, policy) \
 	{ .type = NLA_NESTED, .validation_data = policy, .len = maxattr }
-#define NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
+#define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
 	{ .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
+#define NLA_POLICY_NESTED(policy) \
+	_NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy)
+#define NLA_POLICY_NESTED_ARRAY(policy) \
+	_NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy)
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
 #define NLA_ENSURE_INT_TYPE(tp)				\
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ed53f8332ec8..a3cc039b9f55 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -259,15 +259,13 @@ nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = {
 static const struct nla_policy
 nl80211_pmsr_req_data_policy[NL80211_PMSR_TYPE_MAX + 1] = {
 	[NL80211_PMSR_TYPE_FTM] =
-		NLA_POLICY_NESTED(NL80211_PMSR_FTM_REQ_ATTR_MAX,
-				  nl80211_pmsr_ftm_req_attr_policy),
+		NLA_POLICY_NESTED(nl80211_pmsr_ftm_req_attr_policy),
 };
 
 static const struct nla_policy
 nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
 	[NL80211_PMSR_REQ_ATTR_DATA] =
-		NLA_POLICY_NESTED(NL80211_PMSR_TYPE_MAX,
-				  nl80211_pmsr_req_data_policy),
+		NLA_POLICY_NESTED(nl80211_pmsr_req_data_policy),
 	[NL80211_PMSR_REQ_ATTR_GET_AP_TSF] = { .type = NLA_FLAG },
 };
 
@@ -280,8 +278,7 @@ nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
 	 */
 	[NL80211_PMSR_PEER_ATTR_CHAN] = { .type = NLA_NESTED },
 	[NL80211_PMSR_PEER_ATTR_REQ] =
-		NLA_POLICY_NESTED(NL80211_PMSR_REQ_ATTR_MAX,
-				  nl80211_pmsr_req_attr_policy),
+		NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy),
 	[NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT },
 };
 
@@ -292,8 +289,7 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
 	[NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT },
 	[NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT },
 	[NL80211_PMSR_ATTR_PEERS] =
-		NLA_POLICY_NESTED_ARRAY(NL80211_PMSR_PEER_ATTR_MAX,
-					nl80211_psmr_peer_attr_policy),
+		NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
 };
 
 const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
@@ -555,8 +551,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	},
 	[NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1),
 	[NL80211_ATTR_PEER_MEASUREMENTS] =
-		NLA_POLICY_NESTED(NL80211_PMSR_ATTR_MAX,
-				  nl80211_pmsr_attr_policy),
+		NLA_POLICY_NESTED(nl80211_pmsr_attr_policy),
 	[NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1),
 };
 
-- 
cgit v1.2.3


From 9874b71fa19c3ae4d13e02081326975ab2734061 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 1 Feb 2019 11:52:44 +0100
Subject: cfg80211: add missing documentation that kernel-doc warns about

Add the missing documentation that kernel-doc continually warns
about, to get rid of all that noise.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5862f1efb52c..7f2739a90bdb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1443,6 +1443,7 @@ enum monitor_flags {
  * @MPATH_INFO_DISCOVERY_RETRIES: @discovery_retries filled
  * @MPATH_INFO_FLAGS: @flags filled
  * @MPATH_INFO_HOP_COUNT: @hop_count filled
+ * @MPATH_INFO_PATH_CHANGE: @path_change_count filled
  */
 enum mpath_info_flags {
 	MPATH_INFO_FRAME_QLEN		= BIT(0),
@@ -4609,6 +4610,17 @@ struct cfg80211_cqm_config;
  * @mesh_id_len: (private) Used by the internal configuration code
  * @mesh_id_up_len: (private) Used by the internal configuration code
  * @wext: (private) Used by the internal wireless extensions compat code
+ * @wext.ibss: (private) IBSS data part of wext handling
+ * @wext.connect: (private) connection handling data
+ * @wext.keys: (private) (WEP) key data
+ * @wext.ie: (private) extra elements for association
+ * @wext.ie_len: (private) length of extra elements
+ * @wext.bssid: (private) selected network BSSID
+ * @wext.ssid: (private) selected network SSID
+ * @wext.default_key: (private) selected default key index
+ * @wext.default_mgmt_key: (private) selected default management key index
+ * @wext.prev_bssid: (private) previous BSSID for reassociation
+ * @wext.prev_bssid_valid: (private) previous BSSID validity
  * @use_4addr: indicates 4addr mode is used on this interface, must be
  *	set by driver (if supported) on add_interface BEFORE registering the
  *	netdev and may otherwise be used by driver read-only, will be update
@@ -4708,7 +4720,8 @@ struct wireless_dev {
 		struct cfg80211_cached_keys *keys;
 		const u8 *ie;
 		size_t ie_len;
-		u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
+		u8 bssid[ETH_ALEN];
+		u8 prev_bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
 		s8 default_key, default_mgmt_key;
 		bool prev_bssid_valid;
-- 
cgit v1.2.3


From 7d4194633b29342d93501b53accebf638da134ad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 1 Feb 2019 11:53:32 +0100
Subject: mac80211: fix missing/malformed documentation

Fix the missing and malformed documentation that kernel-doc and
sphinx warn about. While at it, also add some things to the docs
to fix missing links.

Sadly, the only way I could find to fix this was to add some
trailing whitespace.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 Documentation/driver-api/80211/mac80211.rst |  3 ++
 include/net/mac80211.h                      | 44 +++++++++++++++++++++++------
 net/mac80211/sta_info.h                     | 18 ++++++++++++
 3 files changed, 57 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/80211/mac80211.rst b/Documentation/driver-api/80211/mac80211.rst
index 85a8335e80b6..eab40bcf3987 100644
--- a/Documentation/driver-api/80211/mac80211.rst
+++ b/Documentation/driver-api/80211/mac80211.rst
@@ -125,6 +125,9 @@ functions/definitions
 .. kernel-doc:: include/net/mac80211.h
    :functions: ieee80211_rx_status
 
+.. kernel-doc:: include/net/mac80211.h
+   :functions: mac80211_rx_encoding_flags
+
 .. kernel-doc:: include/net/mac80211.h
    :functions: mac80211_rx_flags
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 396b0c079c3b..de866a7253c9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -942,8 +942,32 @@ ieee80211_rate_get_vht_nss(const struct ieee80211_tx_rate *rate)
  * @band: the band to transmit on (use for checking for races)
  * @hw_queue: HW queue to put the frame on, skb_get_queue_mapping() gives the AC
  * @ack_frame_id: internal frame ID for TX status, used internally
- * @control: union for control data
- * @status: union for status data
+ * @control: union part for control data
+ * @control.rates: TX rates array to try
+ * @control.rts_cts_rate_idx: rate for RTS or CTS
+ * @control.use_rts: use RTS
+ * @control.use_cts_prot: use RTS/CTS
+ * @control.short_preamble: use short preamble (CCK only)
+ * @control.skip_table: skip externally configured rate table
+ * @control.jiffies: timestamp for expiry on powersave clients
+ * @control.vif: virtual interface (may be NULL)
+ * @control.hw_key: key to encrypt with (may be NULL)
+ * @control.flags: control flags, see &enum mac80211_tx_control_flags
+ * @control.enqueue_time: enqueue time (for iTXQs)
+ * @driver_rates: alias to @control.rates to reserve space
+ * @pad: padding
+ * @rate_driver_data: driver use area if driver needs @control.rates
+ * @status: union part for status data
+ * @status.rates: attempted rates
+ * @status.ack_signal: ACK signal
+ * @status.ampdu_ack_len: AMPDU ack length
+ * @status.ampdu_len: AMPDU length
+ * @status.antenna: (legacy, kept only for iwlegacy)
+ * @status.tx_time: airtime consumed for transmission
+ * @status.is_valid_ack_signal: ACK signal is valid
+ * @status.status_driver_data: driver use area
+ * @ack: union part for pure ACK data
+ * @ack.cookie: cookie for the ACK
  * @driver_data: array of driver_data pointers
  * @ampdu_ack_len: number of acked aggregated frames.
  * 	relevant only if IEEE80211_TX_STAT_AMPDU was set.
@@ -1163,6 +1187,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  * @RX_FLAG_AMPDU_EOF_BIT_KNOWN: The EOF value is known
  * @RX_FLAG_RADIOTAP_HE: HE radiotap data is present
  *	(&struct ieee80211_radiotap_he, mac80211 will fill in
+ *	
  *	 - DATA3_DATA_MCS
  *	 - DATA3_DATA_DCM
  *	 - DATA3_CODING
@@ -1170,6 +1195,7 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info)
  *	 - DATA5_DATA_BW_RU_ALLOC
  *	 - DATA6_NSTS
  *	 - DATA3_STBC
+ *	
  *	from the RX info data, so leave those zeroed when building this data)
  * @RX_FLAG_RADIOTAP_HE_MU: HE MU radiotap data is present
  *	(&struct ieee80211_radiotap_he_mu)
@@ -1220,7 +1246,7 @@ enum mac80211_rx_flags {
  * @RX_ENC_FLAG_HT_GF: This frame was received in a HT-greenfield transmission,
  *	if the driver fills this value it should add
  *	%IEEE80211_RADIOTAP_MCS_HAVE_FMT
- *	to hw.radiotap_mcs_details to advertise that fact
+ *	to @hw.radiotap_mcs_details to advertise that fact.
  * @RX_ENC_FLAG_LDPC: LDPC was used
  * @RX_ENC_FLAG_STBC_MASK: STBC 2 bit bitmask. 1 - Nss=1, 2 - Nss=2, 3 - Nss=3
  * @RX_ENC_FLAG_BF: packet was beamformed
@@ -2333,12 +2359,14 @@ enum ieee80211_hw_flags {
  * @radiotap_he: HE radiotap validity flags
  *
  * @radiotap_timestamp: Information for the radiotap timestamp field; if the
- *	'units_pos' member is set to a non-negative value it must be set to
- *	a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
- *	IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp
+ *	@units_pos member is set to a non-negative value then the timestamp
  *	field will be added and populated from the &struct ieee80211_rx_status
- *	device_timestamp. If the 'accuracy' member is non-negative, it's put
- *	into the accuracy radiotap field and the accuracy known flag is set.
+ *	device_timestamp.
+ * @radiotap_timestamp.units_pos: Must be set to a combination of a
+ *	IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
+ *	IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value.
+ * @radiotap_timestamp.accuracy: If non-negative, fills the accuracy in the
+ *	radiotap field and the accuracy known flag will be set.
  *
  * @netdev_features: netdev features to be set in each netdev created
  *	from this HW. Note that not all features are usable with mac80211,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 05647d835894..71f7e4973329 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -497,10 +497,28 @@ struct ieee80211_sta_rx_stats {
  * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
  *	the BSS one.
  * @tx_stats: TX statistics
+ * @tx_stats.packets: # of packets transmitted
+ * @tx_stats.bytes: # of bytes in all packets transmitted
+ * @tx_stats.last_rate: last TX rate
+ * @tx_stats.msdu: # of transmitted MSDUs per TID
  * @rx_stats: RX statistics
+ * @rx_stats_avg: averaged RX statistics
+ * @rx_stats_avg.signal: averaged signal
+ * @rx_stats_avg.chain_signal: averaged per-chain signal
  * @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
  *	this (by advertising the USES_RSS hw flag)
  * @status_stats: TX status statistics
+ * @status_stats.filtered: # of filtered frames
+ * @status_stats.retry_failed: # of frames that failed after retry
+ * @status_stats.retry_count: # of retries attempted
+ * @status_stats.lost_packets: # of lost packets
+ * @status_stats.last_tdls_pkt_time: timestamp of last TDLS packet
+ * @status_stats.msdu_retries: # of MSDU retries
+ * @status_stats.msdu_failed: # of failed MSDUs
+ * @status_stats.last_ack: last ack timestamp (jiffies)
+ * @status_stats.last_ack_signal: last ACK signal
+ * @status_stats.ack_signal_filled: last ACK signal validity
+ * @status_stats.avg_ack_signal: average ACK signal
  */
 struct sta_info {
 	/* General information, mostly static */
-- 
cgit v1.2.3


From 7403e48d7abd9a0114e9056e7615cfbbb5123b15 Mon Sep 17 00:00:00 2001
From: Kamil Konieczny <k.konieczny@partner.samsung.com>
Date: Tue, 22 Jan 2019 10:02:31 +0100
Subject: clk: samsung: dt-bindings: Add Exynos5433 IMEM CMU clock IDs

Add DT bindings to describe the IMEM CMU clocks for the SlimSSS
(Slim Security SubSystem).

Signed-off-by: Kamil Konieczny <k.konieczny@partner.samsung.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by:  Chanwoo Choi <cw00.choi@samsung.com>
[s.nawrocki@samsung.com: edited commit description]
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 include/dt-bindings/clock/exynos5433.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/exynos5433.h b/include/dt-bindings/clock/exynos5433.h
index 98bd85ce1e45..7cfce3a62050 100644
--- a/include/dt-bindings/clock/exynos5433.h
+++ b/include/dt-bindings/clock/exynos5433.h
@@ -1406,4 +1406,10 @@
 
 #define CAM1_NR_CLK					113
 
+/* CMU_IMEM */
+#define CLK_ACLK_SLIMSSS		2
+#define CLK_PCLK_SLIMSSS		35
+
+#define IMEM_NR_CLK			36
+
 #endif /* _DT_BINDINGS_CLOCK_EXYNOS5433_H */
-- 
cgit v1.2.3


From b80a40c659ba38e1295ab7e55b2c781abee64739 Mon Sep 17 00:00:00 2001
From: Kamil Konieczny <k.konieczny@partner.samsung.com>
Date: Tue, 22 Jan 2019 10:02:29 +0100
Subject: clk: samsung: exynos5433: Fix name typo in sssx

Fix typo in sssx name, there should be three letters 's'.

Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Kamil Konieczny <k.konieczny@partner.samsung.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 drivers/clk/samsung/clk-exynos5433.c   | 2 +-
 include/dt-bindings/clock/exynos5433.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index ea47f49abc7f..24c3360db65b 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -559,7 +559,7 @@ static const struct samsung_gate_clock top_gate_clks[] __initconst = {
 	/* ENABLE_ACLK_TOP */
 	GATE(CLK_ACLK_G3D_400, "aclk_g3d_400", "div_aclk_g3d_400",
 			ENABLE_ACLK_TOP, 30, CLK_IS_CRITICAL, 0),
-	GATE(CLK_ACLK_IMEM_SSX_266, "aclk_imem_ssx_266",
+	GATE(CLK_ACLK_IMEM_SSSX_266, "aclk_imem_sssx_266",
 			"div_aclk_imem_sssx_266", ENABLE_ACLK_TOP,
 			29, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_ACLK_BUS0_400, "aclk_bus0_400", "div_aclk_bus0_400",
diff --git a/include/dt-bindings/clock/exynos5433.h b/include/dt-bindings/clock/exynos5433.h
index 7cfce3a62050..25ffa53573a5 100644
--- a/include/dt-bindings/clock/exynos5433.h
+++ b/include/dt-bindings/clock/exynos5433.h
@@ -156,7 +156,7 @@
 #define CLK_ACLK_G2D_266		220
 #define CLK_ACLK_G2D_400		221
 #define CLK_ACLK_G3D_400		222
-#define CLK_ACLK_IMEM_SSX_266		223
+#define CLK_ACLK_IMEM_SSSX_266		223
 #define CLK_ACLK_BUS0_400		224
 #define CLK_ACLK_BUS1_400		225
 #define CLK_ACLK_IMEM_200		226
-- 
cgit v1.2.3


From d0a060be573bfbf8753a15dca35497db5e968bb0 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 30 Jan 2019 12:02:44 +0000
Subject: arm64: add ptrace regsets for ptrauth key management

Add two new ptrace regsets, which can be used to request and change the
pointer authentication keys of a thread. NT_ARM_PACA_KEYS gives access
to the instruction/data address keys, and NT_ARM_PACG_KEYS to the
generic authentication key. The keys are also part of the core dump file
of the process.

The regsets are only exposed if the kernel is compiled with
CONFIG_CHECKPOINT_RESTORE=y, as the only intended use case is
checkpointing and restoring processes that are using pointer
authentication. (This can be changed later if there are other use
cases.)

Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 Documentation/arm64/pointer-authentication.txt |   5 +
 arch/arm64/include/uapi/asm/ptrace.h           |  13 +++
 arch/arm64/kernel/ptrace.c                     | 147 +++++++++++++++++++++++++
 include/uapi/linux/elf.h                       |   2 +
 4 files changed, 167 insertions(+)

(limited to 'include')

diff --git a/Documentation/arm64/pointer-authentication.txt b/Documentation/arm64/pointer-authentication.txt
index a25cd21290e9..5baca42ba146 100644
--- a/Documentation/arm64/pointer-authentication.txt
+++ b/Documentation/arm64/pointer-authentication.txt
@@ -78,6 +78,11 @@ bits can vary between the two. Note that the masks apply to TTBR0
 addresses, and are not valid to apply to TTBR1 addresses (e.g. kernel
 pointers).
 
+Additionally, when CONFIG_CHECKPOINT_RESTORE is also set, the kernel
+will expose the NT_ARM_PACA_KEYS and NT_ARM_PACG_KEYS regsets (struct
+user_pac_address_keys and struct user_pac_generic_keys). These can be
+used to get and set the keys for a thread.
+
 
 Virtualization
 --------------
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 28d77c9ed531..d78623acb649 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -233,6 +233,19 @@ struct user_pac_mask {
 	__u64		insn_mask;
 };
 
+/* pointer authentication keys (NT_ARM_PACA_KEYS, NT_ARM_PACG_KEYS) */
+
+struct user_pac_address_keys {
+	__uint128_t	apiakey;
+	__uint128_t	apibkey;
+	__uint128_t	apdakey;
+	__uint128_t	apdbkey;
+};
+
+struct user_pac_generic_keys {
+	__uint128_t	apgakey;
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 9dce33b0e260..a86413be5a2d 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -979,6 +979,131 @@ static int pac_mask_get(struct task_struct *target,
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1);
 }
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static __uint128_t pac_key_to_user(const struct ptrauth_key *key)
+{
+	return (__uint128_t)key->hi << 64 | key->lo;
+}
+
+static struct ptrauth_key pac_key_from_user(__uint128_t ukey)
+{
+	struct ptrauth_key key = {
+		.lo = (unsigned long)ukey,
+		.hi = (unsigned long)(ukey >> 64),
+	};
+
+	return key;
+}
+
+static void pac_address_keys_to_user(struct user_pac_address_keys *ukeys,
+				     const struct ptrauth_keys *keys)
+{
+	ukeys->apiakey = pac_key_to_user(&keys->apia);
+	ukeys->apibkey = pac_key_to_user(&keys->apib);
+	ukeys->apdakey = pac_key_to_user(&keys->apda);
+	ukeys->apdbkey = pac_key_to_user(&keys->apdb);
+}
+
+static void pac_address_keys_from_user(struct ptrauth_keys *keys,
+				       const struct user_pac_address_keys *ukeys)
+{
+	keys->apia = pac_key_from_user(ukeys->apiakey);
+	keys->apib = pac_key_from_user(ukeys->apibkey);
+	keys->apda = pac_key_from_user(ukeys->apdakey);
+	keys->apdb = pac_key_from_user(ukeys->apdbkey);
+}
+
+static int pac_address_keys_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	struct ptrauth_keys *keys = &target->thread.keys_user;
+	struct user_pac_address_keys user_keys;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	pac_address_keys_to_user(&user_keys, keys);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &user_keys, 0, -1);
+}
+
+static int pac_address_keys_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	struct ptrauth_keys *keys = &target->thread.keys_user;
+	struct user_pac_address_keys user_keys;
+	int ret;
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	pac_address_keys_to_user(&user_keys, keys);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &user_keys, 0, -1);
+	if (ret)
+		return ret;
+	pac_address_keys_from_user(keys, &user_keys);
+
+	return 0;
+}
+
+static void pac_generic_keys_to_user(struct user_pac_generic_keys *ukeys,
+				     const struct ptrauth_keys *keys)
+{
+	ukeys->apgakey = pac_key_to_user(&keys->apga);
+}
+
+static void pac_generic_keys_from_user(struct ptrauth_keys *keys,
+				       const struct user_pac_generic_keys *ukeys)
+{
+	keys->apga = pac_key_from_user(ukeys->apgakey);
+}
+
+static int pac_generic_keys_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	struct ptrauth_keys *keys = &target->thread.keys_user;
+	struct user_pac_generic_keys user_keys;
+
+	if (!system_supports_generic_auth())
+		return -EINVAL;
+
+	pac_generic_keys_to_user(&user_keys, keys);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &user_keys, 0, -1);
+}
+
+static int pac_generic_keys_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	struct ptrauth_keys *keys = &target->thread.keys_user;
+	struct user_pac_generic_keys user_keys;
+	int ret;
+
+	if (!system_supports_generic_auth())
+		return -EINVAL;
+
+	pac_generic_keys_to_user(&user_keys, keys);
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				 &user_keys, 0, -1);
+	if (ret)
+		return ret;
+	pac_generic_keys_from_user(keys, &user_keys);
+
+	return 0;
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
 enum aarch64_regset {
@@ -995,6 +1120,10 @@ enum aarch64_regset {
 #endif
 #ifdef CONFIG_ARM64_PTR_AUTH
 	REGSET_PAC_MASK,
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	REGSET_PACA_KEYS,
+	REGSET_PACG_KEYS,
+#endif
 #endif
 };
 
@@ -1074,6 +1203,24 @@ static const struct user_regset aarch64_regsets[] = {
 		.get = pac_mask_get,
 		/* this cannot be set dynamically */
 	},
+#ifdef CONFIG_CHECKPOINT_RESTORE
+	[REGSET_PACA_KEYS] = {
+		.core_note_type = NT_ARM_PACA_KEYS,
+		.n = sizeof(struct user_pac_address_keys) / sizeof(__uint128_t),
+		.size = sizeof(__uint128_t),
+		.align = sizeof(__uint128_t),
+		.get = pac_address_keys_get,
+		.set = pac_address_keys_set,
+	},
+	[REGSET_PACG_KEYS] = {
+		.core_note_type = NT_ARM_PACG_KEYS,
+		.n = sizeof(struct user_pac_generic_keys) / sizeof(__uint128_t),
+		.size = sizeof(__uint128_t),
+		.align = sizeof(__uint128_t),
+		.get = pac_generic_keys_get,
+		.set = pac_generic_keys_set,
+	},
+#endif
 #endif
 };
 
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index e4d6ddd93567..34c02e4290fe 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -421,6 +421,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
 #define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
 #define NT_ARM_PAC_MASK		0x406	/* ARM pointer authentication code masks */
+#define NT_ARM_PACA_KEYS	0x407	/* ARM pointer authentication address keys */
+#define NT_ARM_PACG_KEYS	0x408	/* ARM pointer authentication generic key */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit v1.2.3


From 42bf4152d8a79f89f5456dee63a1f364fbce2dd6 Mon Sep 17 00:00:00 2001
From: Sumit Garg <sumit.garg@linaro.org>
Date: Tue, 29 Jan 2019 11:19:36 +0530
Subject: tee: add supp_nowait flag in tee_context struct

This flag indicates that requests in this context should not wait for
tee-supplicant daemon to be started if not present and just return
with an error code. It is needed for requests which should be
non-blocking in nature like ones arising from TEE based kernel drivers
or any in kernel api that uses TEE internal client interface.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/supp.c | 10 +++++++++-
 drivers/tee/tee_core.c   | 13 +++++++++++++
 include/linux/tee_drv.h  |  6 ++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c
index 43626e15703a..92f56b8645e3 100644
--- a/drivers/tee/optee/supp.c
+++ b/drivers/tee/optee/supp.c
@@ -88,10 +88,18 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params,
 {
 	struct optee *optee = tee_get_drvdata(ctx->teedev);
 	struct optee_supp *supp = &optee->supp;
-	struct optee_supp_req *req = kzalloc(sizeof(*req), GFP_KERNEL);
+	struct optee_supp_req *req;
 	bool interruptable;
 	u32 ret;
 
+	/*
+	 * Return in case there is no supplicant available and
+	 * non-blocking request.
+	 */
+	if (!supp->ctx && ctx->supp_nowait)
+		return TEEC_ERROR_COMMUNICATION;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (!req)
 		return TEEC_ERROR_OUT_OF_MEMORY;
 
diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 7b2bb4c50058..adf2588282fc 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -106,6 +106,11 @@ static int tee_open(struct inode *inode, struct file *filp)
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
+	/*
+	 * Default user-space behaviour is to wait for tee-supplicant
+	 * if not present for any requests in this context.
+	 */
+	ctx->supp_nowait = false;
 	filp->private_data = ctx;
 	return 0;
 }
@@ -982,6 +987,14 @@ tee_client_open_context(struct tee_context *start,
 	} while (IS_ERR(ctx) && PTR_ERR(ctx) != -ENOMEM);
 
 	put_device(put_dev);
+	/*
+	 * Default behaviour for in kernel client is to not wait for
+	 * tee-supplicant if not present for any requests in this context.
+	 * Also this flag could be configured again before call to
+	 * tee_client_open_session() if any in kernel client requires
+	 * different behaviour.
+	 */
+	ctx->supp_nowait = true;
 	return ctx;
 }
 EXPORT_SYMBOL_GPL(tee_client_open_context);
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 6cfe05893a76..5076502c07d7 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -47,6 +47,11 @@ struct tee_shm_pool;
  * @releasing:  flag that indicates if context is being released right now.
  *		It is needed to break circular dependency on context during
  *              shared memory release.
+ * @supp_nowait: flag that indicates that requests in this context should not
+ *              wait for tee-supplicant daemon to be started if not present
+ *              and just return with an error code. It is needed for requests
+ *              that arises from TEE based kernel drivers that should be
+ *              non-blocking in nature.
  */
 struct tee_context {
 	struct tee_device *teedev;
@@ -54,6 +59,7 @@ struct tee_context {
 	void *data;
 	struct kref refcount;
 	bool releasing;
+	bool supp_nowait;
 };
 
 struct tee_param_memref {
-- 
cgit v1.2.3


From 0fc1db9d105915021260eb241661b8e96f5c0f1a Mon Sep 17 00:00:00 2001
From: Sumit Garg <sumit.garg@linaro.org>
Date: Tue, 29 Jan 2019 11:19:35 +0530
Subject: tee: add bus driver framework for TEE based devices

Introduce a generic TEE bus driver concept for TEE based kernel drivers
which would like to communicate with TEE based devices/services. Also
add support in module device table for these new TEE based devices.

In this TEE bus concept, devices/services are identified via Universally
Unique Identifier (UUID) and drivers register a table of device UUIDs
which they can support.

So this TEE bus framework registers following apis:
- match(): Iterates over the driver UUID table to find a corresponding
  match for device UUID. If a match is found, then this particular device
  is probed via corresponding probe api registered by the driver. This
  process happens whenever a device or a driver is registered with TEE
  bus.
- uevent(): Notifies user-space (udev) whenever a new device is registered
  on this bus for auto-loading of modularized drivers.

Also this framework allows for device enumeration to be specific to
corresponding TEE implementation like OP-TEE etc.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/tee_core.c            | 54 ++++++++++++++++++++++++++++++++++++---
 include/linux/mod_devicetable.h   |  9 +++++++
 include/linux/tee_drv.h           | 32 ++++++++++++++++++++++-
 scripts/mod/devicetable-offsets.c |  3 +++
 scripts/mod/file2alias.c          | 19 ++++++++++++++
 5 files changed, 112 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index adf2588282fc..25f3b9cc8908 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -15,7 +15,6 @@
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
 #include <linux/cdev.h>
-#include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/idr.h>
 #include <linux/module.h>
@@ -1040,6 +1039,39 @@ int tee_client_invoke_func(struct tee_context *ctx,
 }
 EXPORT_SYMBOL_GPL(tee_client_invoke_func);
 
+static int tee_client_device_match(struct device *dev,
+				   struct device_driver *drv)
+{
+	const struct tee_client_device_id *id_table;
+	struct tee_client_device *tee_device;
+
+	id_table = to_tee_client_driver(drv)->id_table;
+	tee_device = to_tee_client_device(dev);
+
+	while (!uuid_is_null(&id_table->uuid)) {
+		if (uuid_equal(&tee_device->id.uuid, &id_table->uuid))
+			return 1;
+		id_table++;
+	}
+
+	return 0;
+}
+
+static int tee_client_device_uevent(struct device *dev,
+				    struct kobj_uevent_env *env)
+{
+	uuid_t *dev_id = &to_tee_client_device(dev)->id.uuid;
+
+	return add_uevent_var(env, "MODALIAS=tee:%pUb", dev_id);
+}
+
+struct bus_type tee_bus_type = {
+	.name		= "tee",
+	.match		= tee_client_device_match,
+	.uevent		= tee_client_device_uevent,
+};
+EXPORT_SYMBOL_GPL(tee_bus_type);
+
 static int __init tee_init(void)
 {
 	int rc;
@@ -1053,18 +1085,32 @@ static int __init tee_init(void)
 	rc = alloc_chrdev_region(&tee_devt, 0, TEE_NUM_DEVICES, "tee");
 	if (rc) {
 		pr_err("failed to allocate char dev region\n");
-		class_destroy(tee_class);
-		tee_class = NULL;
+		goto out_unreg_class;
+	}
+
+	rc = bus_register(&tee_bus_type);
+	if (rc) {
+		pr_err("failed to register tee bus\n");
+		goto out_unreg_chrdev;
 	}
 
+	return 0;
+
+out_unreg_chrdev:
+	unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES);
+out_unreg_class:
+	class_destroy(tee_class);
+	tee_class = NULL;
+
 	return rc;
 }
 
 static void __exit tee_exit(void)
 {
+	bus_unregister(&tee_bus_type);
+	unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES);
 	class_destroy(tee_class);
 	tee_class = NULL;
-	unregister_chrdev_region(tee_devt, TEE_NUM_DEVICES);
 }
 
 subsys_initcall(tee_init);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f9bd2f34b99f..14eaeeb46f41 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -779,4 +779,13 @@ struct typec_device_id {
 	kernel_ulong_t driver_data;
 };
 
+/**
+ * struct tee_client_device_id - tee based device identifier
+ * @uuid: For TEE based client devices we use the device uuid as
+ *        the identifier.
+ */
+struct tee_client_device_id {
+	uuid_t uuid;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 5076502c07d7..56d7f1b4516d 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -15,11 +15,14 @@
 #ifndef __TEE_DRV_H
 #define __TEE_DRV_H
 
-#include <linux/types.h>
+#include <linux/device.h>
 #include <linux/idr.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/mod_devicetable.h>
 #include <linux/tee.h>
+#include <linux/types.h>
+#include <linux/uuid.h>
 
 /*
  * The file describes the API provided by the generic TEE driver to the
@@ -544,4 +547,31 @@ static inline bool tee_param_is_memref(struct tee_param *param)
 	}
 }
 
+extern struct bus_type tee_bus_type;
+
+/**
+ * struct tee_client_device - tee based device
+ * @id:			device identifier
+ * @dev:		device structure
+ */
+struct tee_client_device {
+	struct tee_client_device_id id;
+	struct device dev;
+};
+
+#define to_tee_client_device(d) container_of(d, struct tee_client_device, dev)
+
+/**
+ * struct tee_client_driver - tee client driver
+ * @id_table:		device id table supported by this driver
+ * @driver:		driver structure
+ */
+struct tee_client_driver {
+	const struct tee_client_device_id *id_table;
+	struct device_driver driver;
+};
+
+#define to_tee_client_driver(d) \
+		container_of(d, struct tee_client_driver, driver)
+
 #endif /*__TEE_DRV_H*/
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 293004499b4d..160718383a71 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -225,5 +225,8 @@ int main(void)
 	DEVID_FIELD(typec_device_id, svid);
 	DEVID_FIELD(typec_device_id, mode);
 
+	DEVID(tee_client_device_id);
+	DEVID_FIELD(tee_client_device_id, uuid);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index a37af7d71973..d0e41723627f 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -37,6 +37,9 @@ typedef unsigned char	__u8;
 typedef struct {
 	__u8 b[16];
 } uuid_le;
+typedef struct {
+	__u8 b[16];
+} uuid_t;
 
 /* Big exception to the "don't include kernel headers into userspace, which
  * even potentially has different endianness and word sizes, since
@@ -1287,6 +1290,21 @@ static int do_typec_entry(const char *filename, void *symval, char *alias)
 	return 1;
 }
 
+/* Looks like: tee:uuid */
+static int do_tee_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, tee_client_device_id, uuid);
+
+	sprintf(alias, "tee:%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+		uuid.b[0], uuid.b[1], uuid.b[2], uuid.b[3], uuid.b[4],
+		uuid.b[5], uuid.b[6], uuid.b[7], uuid.b[8], uuid.b[9],
+		uuid.b[10], uuid.b[11], uuid.b[12], uuid.b[13], uuid.b[14],
+		uuid.b[15]);
+
+	add_wildcard(alias);
+	return 1;
+}
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
@@ -1357,6 +1375,7 @@ static const struct devtable devtable[] = {
 	{"fslmc", SIZE_fsl_mc_device_id, do_fsl_mc_entry},
 	{"tbsvc", SIZE_tb_service_id, do_tbsvc_entry},
 	{"typec", SIZE_typec_device_id, do_typec_entry},
+	{"tee", SIZE_tee_client_device_id, do_tee_entry},
 };
 
 /* Create MODULE_ALIAS() statements.
-- 
cgit v1.2.3


From 0b6a2c9cf4a00f54a0916499ece8a5cf3cced385 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 1 Feb 2019 12:14:53 +0100
Subject: ALSA: isa: Avoid passing NULL to memory allocators

We used to pass NULL to memory allocators for ISA devices due to
historical reasons.  But we prefer rather a proper device object to be
assigned, so let's fix it by replacing snd_dma_isa_data() call with
card->dev reference, and kill snd_dma_isa_data() definition.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 Documentation/sound/kernel-api/writing-an-alsa-driver.rst | 10 +++++-----
 include/sound/memalloc.h                                  |  1 -
 sound/isa/ad1816a/ad1816a_lib.c                           |  2 +-
 sound/isa/cmi8330.c                                       |  2 +-
 sound/isa/es1688/es1688_lib.c                             |  2 +-
 sound/isa/es18xx.c                                        |  2 +-
 sound/isa/gus/gus_pcm.c                                   |  4 ++--
 sound/isa/sb/sb16_main.c                                  |  2 +-
 sound/isa/sb/sb8_main.c                                   |  2 +-
 sound/isa/sscape.c                                        |  7 ++++---
 sound/isa/wss/wss_lib.c                                   |  2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
index 7c2f2032d30a..6b154dbb02cc 100644
--- a/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
+++ b/Documentation/sound/kernel-api/writing-an-alsa-driver.rst
@@ -3520,14 +3520,14 @@ allocator will try to get an area as large as possible within the
 given size.
 
 The second argument (type) and the third argument (device pointer) are
-dependent on the bus. In the case of the ISA bus, pass
-:c:func:`snd_dma_isa_data()` as the third argument with
+dependent on the bus. For normal devices, pass the device pointer
+(typically identical as ``card->dev``) to the third argument with
 ``SNDRV_DMA_TYPE_DEV`` type. For the continuous buffer unrelated to the
 bus can be pre-allocated with ``SNDRV_DMA_TYPE_CONTINUOUS`` type and the
 ``snd_dma_continuous_data(GFP_KERNEL)`` device pointer, where
-``GFP_KERNEL`` is the kernel allocation flag to use. For the PCI
-scatter-gather buffers, use ``SNDRV_DMA_TYPE_DEV_SG`` with
-``snd_dma_pci_data(pci)`` (see the `Non-Contiguous Buffers`_
+``GFP_KERNEL`` is the kernel allocation flag to use. For the
+scatter-gather buffers, use ``SNDRV_DMA_TYPE_DEV_SG`` with the device
+pointer (see the `Non-Contiguous Buffers`_
 section).
 
 Once the buffer is pre-allocated, you can use the allocator in the
diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index af3fa577fa06..1ac0dd82a916 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -37,7 +37,6 @@ struct snd_dma_device {
 };
 
 #define snd_dma_pci_data(pci)	(&(pci)->dev)
-#define snd_dma_isa_data()	NULL
 #define snd_dma_continuous_data(x)	((struct device *)(__force unsigned long)(x))
 
 
diff --git a/sound/isa/ad1816a/ad1816a_lib.c b/sound/isa/ad1816a/ad1816a_lib.c
index 61e8c7e524db..94b381a78e9e 100644
--- a/sound/isa/ad1816a/ad1816a_lib.c
+++ b/sound/isa/ad1816a/ad1816a_lib.c
@@ -693,7 +693,7 @@ int snd_ad1816a_pcm(struct snd_ad1816a *chip, int device)
 	snd_ad1816a_init(chip);
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      chip->card->dev,
 					      64*1024, chip->dma1 > 3 || chip->dma2 > 3 ? 128*1024 : 64*1024);
 
 	chip->pcm = pcm;
diff --git a/sound/isa/cmi8330.c b/sound/isa/cmi8330.c
index 7e5aa06414c4..1868b73aa49c 100644
--- a/sound/isa/cmi8330.c
+++ b/sound/isa/cmi8330.c
@@ -470,7 +470,7 @@ static int snd_cmi8330_pcm(struct snd_card *card, struct snd_cmi8330 *chip)
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &chip->streams[SNDRV_PCM_STREAM_CAPTURE].ops);
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024, 128*1024);
 	chip->pcm = pcm;
 
diff --git a/sound/isa/es1688/es1688_lib.c b/sound/isa/es1688/es1688_lib.c
index 50cdce0e8946..da341969e650 100644
--- a/sound/isa/es1688/es1688_lib.c
+++ b/sound/isa/es1688/es1688_lib.c
@@ -746,7 +746,7 @@ int snd_es1688_pcm(struct snd_card *card, struct snd_es1688 *chip, int device)
 	chip->pcm = pcm;
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024, 64*1024);
 	return 0;
 }
diff --git a/sound/isa/es18xx.c b/sound/isa/es18xx.c
index 77aa9a27fb3b..07abc7f7840c 100644
--- a/sound/isa/es18xx.c
+++ b/sound/isa/es18xx.c
@@ -1717,7 +1717,7 @@ static int snd_es18xx_pcm(struct snd_card *card, int device)
         chip->pcm = pcm;
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024,
 					      chip->dma1 > 3 || chip->dma2 > 3 ? 128*1024 : 64*1024);
 	return 0;
diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c
index 131b28997e1d..b9efc6dff45d 100644
--- a/sound/isa/gus/gus_pcm.c
+++ b/sound/isa/gus/gus_pcm.c
@@ -891,7 +891,7 @@ int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index)
 
 	for (substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; substream; substream = substream->next)
 		snd_pcm_lib_preallocate_pages(substream, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024, gus->gf1.dma1 > 3 ? 128*1024 : 64*1024);
 	
 	pcm->info_flags = 0;
@@ -901,7 +901,7 @@ int snd_gf1_pcm_new(struct snd_gus_card *gus, int pcm_dev, int control_index)
 		if (gus->gf1.dma2 == gus->gf1.dma1)
 			pcm->info_flags |= SNDRV_PCM_INFO_HALF_DUPLEX;
 		snd_pcm_lib_preallocate_pages(pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream,
-					      SNDRV_DMA_TYPE_DEV, snd_dma_isa_data(),
+					      SNDRV_DMA_TYPE_DEV, card->dev,
 					      64*1024, gus->gf1.dma2 > 3 ? 128*1024 : 64*1024);
 	}
 	strcpy(pcm->name, pcm->id);
diff --git a/sound/isa/sb/sb16_main.c b/sound/isa/sb/sb16_main.c
index 981d65d122b6..473ec74ae48c 100644
--- a/sound/isa/sb/sb16_main.c
+++ b/sound/isa/sb/sb16_main.c
@@ -889,7 +889,7 @@ int snd_sb16dsp_pcm(struct snd_sb *chip, int device)
 	}
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024, 128*1024);
 	return 0;
 }
diff --git a/sound/isa/sb/sb8_main.c b/sound/isa/sb/sb8_main.c
index 8288fae90085..97645a732a71 100644
--- a/sound/isa/sb/sb8_main.c
+++ b/sound/isa/sb/sb8_main.c
@@ -610,7 +610,7 @@ int snd_sb8dsp_pcm(struct snd_sb *chip, int device)
 	if (chip->dma8 > 3 || chip->dma16 >= 0)
 		max_prealloc = 128 * 1024;
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      card->dev,
 					      64*1024, max_prealloc);
 
 	return 0;
diff --git a/sound/isa/sscape.c b/sound/isa/sscape.c
index 733adee5afbf..8181db4db019 100644
--- a/sound/isa/sscape.c
+++ b/sound/isa/sscape.c
@@ -167,12 +167,13 @@ static inline struct soundscape *get_card_soundscape(struct snd_card *c)
  * I think this means that the memory has to map to
  * contiguous pages of physical memory.
  */
-static struct snd_dma_buffer *get_dmabuf(struct snd_dma_buffer *buf,
+static struct snd_dma_buffer *get_dmabuf(struct soundscape *s,
+					 struct snd_dma_buffer *buf,
 					 unsigned long size)
 {
 	if (buf) {
 		if (snd_dma_alloc_pages_fallback(SNDRV_DMA_TYPE_DEV,
-						 snd_dma_isa_data(),
+						 s->chip->card->dev,
 						 size, buf) < 0) {
 			snd_printk(KERN_ERR "sscape: Failed to allocate "
 					    "%lu bytes for DMA\n",
@@ -443,7 +444,7 @@ static int upload_dma_data(struct soundscape *s, const unsigned char *data,
 	int ret;
 	unsigned char val;
 
-	if (!get_dmabuf(&dma, PAGE_ALIGN(32 * 1024)))
+	if (!get_dmabuf(s, &dma, PAGE_ALIGN(32 * 1024)))
 		return -ENOMEM;
 
 	spin_lock_irqsave(&s->lock, flags);
diff --git a/sound/isa/wss/wss_lib.c b/sound/isa/wss/wss_lib.c
index b11ef97bce1b..0dfb8065b403 100644
--- a/sound/isa/wss/wss_lib.c
+++ b/sound/isa/wss/wss_lib.c
@@ -1942,7 +1942,7 @@ int snd_wss_pcm(struct snd_wss *chip, int device)
 	strcpy(pcm->name, snd_wss_chip_id(chip));
 
 	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
-					      snd_dma_isa_data(),
+					      chip->card->dev,
 					      64*1024, chip->dma1 > 3 || chip->dma2 > 3 ? 128*1024 : 64*1024);
 
 	chip->pcm = pcm;
-- 
cgit v1.2.3


From d83525ca62cf8ebe3271d14c36fb900c294274a2 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:04 -0800
Subject: bpf: introduce bpf_spin_lock

Introduce 'struct bpf_spin_lock' and bpf_spin_lock/unlock() helpers to let
bpf program serialize access to other variables.

Example:
struct hash_elem {
    int cnt;
    struct bpf_spin_lock lock;
};
struct hash_elem * val = bpf_map_lookup_elem(&hash_map, &key);
if (val) {
    bpf_spin_lock(&val->lock);
    val->cnt++;
    bpf_spin_unlock(&val->lock);
}

Restrictions and safety checks:
- bpf_spin_lock is only allowed inside HASH and ARRAY maps.
- BTF description of the map is mandatory for safety analysis.
- bpf program can take one bpf_spin_lock at a time, since two or more can
  cause dead locks.
- only one 'struct bpf_spin_lock' is allowed per map element.
  It drastically simplifies implementation yet allows bpf program to use
  any number of bpf_spin_locks.
- when bpf_spin_lock is taken the calls (either bpf2bpf or helpers) are not allowed.
- bpf program must bpf_spin_unlock() before return.
- bpf program can access 'struct bpf_spin_lock' only via
  bpf_spin_lock()/bpf_spin_unlock() helpers.
- load/store into 'struct bpf_spin_lock lock;' field is not allowed.
- to use bpf_spin_lock() helper the BTF description of map value must be
  a struct and have 'struct bpf_spin_lock anyname;' field at the top level.
  Nested lock inside another struct is not allowed.
- syscall map_lookup doesn't copy bpf_spin_lock field to user space.
- syscall map_update and program map_update do not update bpf_spin_lock field.
- bpf_spin_lock cannot be on the stack or inside networking packet.
  bpf_spin_lock can only be inside HASH or ARRAY map value.
- bpf_spin_lock is available to root only and to all program types.
- bpf_spin_lock is not allowed in inner maps of map-in-map.
- ld_abs is not allowed inside spin_lock-ed region.
- tracing progs and socket filter progs cannot use bpf_spin_lock due to
  insufficient preemption checks

Implementation details:
- cgroup-bpf class of programs can nest with xdp/tc programs.
  Hence bpf_spin_lock is equivalent to spin_lock_irqsave.
  Other solutions to avoid nested bpf_spin_lock are possible.
  Like making sure that all networking progs run with softirq disabled.
  spin_lock_irqsave is the simplest and doesn't add overhead to the
  programs that don't use it.
- arch_spinlock_t is used when its implemented as queued_spin_lock
- archs can force their own arch_spinlock_t
- on architectures where queued_spin_lock is not available and
  sizeof(arch_spinlock_t) != sizeof(__u32) trivial lock is used.
- presence of bpf_spin_lock inside map value could have been indicated via
  extra flag during map_create, but specifying it via BTF is cleaner.
  It provides introspection for map key/value and reduces user mistakes.

Next steps:
- allow bpf_spin_lock in other map types (like cgroup local storage)
- introduce BPF_F_LOCK flag for bpf_map_update() syscall and helper
  to request kernel to grab bpf_spin_lock before rewriting the value.
  That will serialize access to map elements.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          |  37 +++++++++-
 include/linux/bpf_verifier.h |   1 +
 include/linux/btf.h          |   1 +
 include/uapi/linux/bpf.h     |   7 +-
 kernel/Kconfig.locks         |   3 +
 kernel/bpf/arraymap.c        |   7 +-
 kernel/bpf/btf.c             |  42 +++++++++++
 kernel/bpf/core.c            |   2 +
 kernel/bpf/hashtab.c         |  21 +++---
 kernel/bpf/helpers.c         |  80 ++++++++++++++++++++
 kernel/bpf/map_in_map.c      |   5 ++
 kernel/bpf/syscall.c         |  21 +++++-
 kernel/bpf/verifier.c        | 169 ++++++++++++++++++++++++++++++++++++++++++-
 net/core/filter.c            |  16 +++-
 14 files changed, 386 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0394f1f9213b..2ae615b48bb8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -72,14 +72,15 @@ struct bpf_map {
 	u32 value_size;
 	u32 max_entries;
 	u32 map_flags;
-	u32 pages;
+	int spin_lock_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
 	struct btf *btf;
+	u32 pages;
 	bool unpriv_array;
-	/* 55 bytes hole */
+	/* 51 bytes hole */
 
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
@@ -91,6 +92,34 @@ struct bpf_map {
 	char name[BPF_OBJ_NAME_LEN];
 };
 
+static inline bool map_value_has_spin_lock(const struct bpf_map *map)
+{
+	return map->spin_lock_off >= 0;
+}
+
+static inline void check_and_init_map_lock(struct bpf_map *map, void *dst)
+{
+	if (likely(!map_value_has_spin_lock(map)))
+		return;
+	*(struct bpf_spin_lock *)(dst + map->spin_lock_off) =
+		(struct bpf_spin_lock){};
+}
+
+/* copy everything but bpf_spin_lock */
+static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
+{
+	if (unlikely(map_value_has_spin_lock(map))) {
+		u32 off = map->spin_lock_off;
+
+		memcpy(dst, src, off);
+		memcpy(dst + off + sizeof(struct bpf_spin_lock),
+		       src + off + sizeof(struct bpf_spin_lock),
+		       map->value_size - off - sizeof(struct bpf_spin_lock));
+	} else {
+		memcpy(dst, src, map->value_size);
+	}
+}
+
 struct bpf_offload_dev;
 struct bpf_offloaded_map;
 
@@ -162,6 +191,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
+	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 };
 
 /* type of values returned from helper functions */
@@ -879,7 +909,8 @@ extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
 extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
 extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
 extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
-
+extern const struct bpf_func_proto bpf_spin_lock_proto;
+extern const struct bpf_func_proto bpf_spin_unlock_proto;
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
 /* Shared helpers among cBPF and eBPF. */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 0620e418dde5..69f7a3449eda 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -148,6 +148,7 @@ struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
 	u32 curframe;
+	u32 active_spin_lock;
 	bool speculative;
 };
 
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 12502e25e767..455d31b55828 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -50,6 +50,7 @@ u32 btf_id(const struct btf *btf);
 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
 			   const struct btf_member *m,
 			   u32 expected_offset, u32 expected_size);
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
 
 #ifdef CONFIG_BPF_SYSCALL
 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 60b99b730a41..86f7c438d40f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2422,7 +2422,9 @@ union bpf_attr {
 	FN(map_peek_elem),		\
 	FN(msg_push_data),		\
 	FN(msg_pop_data),		\
-	FN(rc_pointer_rel),
+	FN(rc_pointer_rel),		\
+	FN(spin_lock),			\
+	FN(spin_unlock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3056,4 +3058,7 @@ struct bpf_line_info {
 	__u32	line_col;
 };
 
+struct bpf_spin_lock {
+	__u32	val;
+};
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index 84d882f3e299..fbba478ae522 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -242,6 +242,9 @@ config QUEUED_SPINLOCKS
 	def_bool y if ARCH_USE_QUEUED_SPINLOCKS
 	depends on SMP
 
+config BPF_ARCH_SPINLOCK
+	bool
+
 config ARCH_USE_QUEUED_RWLOCKS
 	bool
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 25632a75d630..d6d979910a2a 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -270,9 +270,10 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
 		       value, map->value_size);
 	else
-		memcpy(array->value +
-		       array->elem_size * (index & array->index_mask),
-		       value, map->value_size);
+		copy_map_value(map,
+			       array->value +
+			       array->elem_size * (index & array->index_mask),
+			       value);
 	return 0;
 }
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 3d661f0606fe..7019c1f05cab 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -355,6 +355,11 @@ static bool btf_type_is_struct(const struct btf_type *t)
 	return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
 }
 
+static bool __btf_type_is_struct(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
+}
+
 static bool btf_type_is_array(const struct btf_type *t)
 {
 	return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
@@ -2045,6 +2050,43 @@ static void btf_struct_log(struct btf_verifier_env *env,
 	btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
 }
 
+/* find 'struct bpf_spin_lock' in map value.
+ * return >= 0 offset if found
+ * and < 0 in case of error
+ */
+int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
+{
+	const struct btf_member *member;
+	u32 i, off = -ENOENT;
+
+	if (!__btf_type_is_struct(t))
+		return -EINVAL;
+
+	for_each_member(i, t, member) {
+		const struct btf_type *member_type = btf_type_by_id(btf,
+								    member->type);
+		if (!__btf_type_is_struct(member_type))
+			continue;
+		if (member_type->size != sizeof(struct bpf_spin_lock))
+			continue;
+		if (strcmp(__btf_name_by_offset(btf, member_type->name_off),
+			   "bpf_spin_lock"))
+			continue;
+		if (off != -ENOENT)
+			/* only one 'struct bpf_spin_lock' is allowed */
+			return -E2BIG;
+		off = btf_member_bit_offset(t, member);
+		if (off % 8)
+			/* valid C code cannot generate such BTF */
+			return -EINVAL;
+		off /= 8;
+		if (off % __alignof__(struct bpf_spin_lock))
+			/* valid struct bpf_spin_lock will be 4 byte aligned */
+			return -EINVAL;
+	}
+	return off;
+}
+
 static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
 				u32 type_id, void *data, u8 bits_offset,
 				struct seq_file *m)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f13c543b7b36..ef88b167959d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2002,6 +2002,8 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_map_push_elem_proto __weak;
 const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
 const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
+const struct bpf_func_proto bpf_spin_lock_proto __weak;
+const struct bpf_func_proto bpf_spin_unlock_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4b7c76765d9d..6d3b22c5ad68 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -718,21 +718,12 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
 	       BITS_PER_LONG == 64;
 }
 
-static u32 htab_size_value(const struct bpf_htab *htab, bool percpu)
-{
-	u32 size = htab->map.value_size;
-
-	if (percpu || fd_htab_map_needs_adjust(htab))
-		size = round_up(size, 8);
-	return size;
-}
-
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
 					 bool percpu, bool onallcpus,
 					 struct htab_elem *old_elem)
 {
-	u32 size = htab_size_value(htab, percpu);
+	u32 size = htab->map.value_size;
 	bool prealloc = htab_is_prealloc(htab);
 	struct htab_elem *l_new, **pl_new;
 	void __percpu *pptr;
@@ -770,10 +761,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			l_new = ERR_PTR(-ENOMEM);
 			goto dec_count;
 		}
+		check_and_init_map_lock(&htab->map,
+					l_new->key + round_up(key_size, 8));
 	}
 
 	memcpy(l_new->key, key, key_size);
 	if (percpu) {
+		size = round_up(size, 8);
 		if (prealloc) {
 			pptr = htab_elem_get_ptr(l_new, key_size);
 		} else {
@@ -791,8 +785,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 
 		if (!prealloc)
 			htab_elem_set_ptr(l_new, key_size, pptr);
-	} else {
+	} else if (fd_htab_map_needs_adjust(htab)) {
+		size = round_up(size, 8);
 		memcpy(l_new->key + round_up(key_size, 8), value, size);
+	} else {
+		copy_map_value(&htab->map,
+			       l_new->key + round_up(key_size, 8),
+			       value);
 	}
 
 	l_new->hash = hash;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a74972b07e74..fbe544761628 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -221,6 +221,86 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
+#if defined(CONFIG_QUEUED_SPINLOCKS) || defined(CONFIG_BPF_ARCH_SPINLOCK)
+
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+	arch_spinlock_t *l = (void *)lock;
+	union {
+		__u32 val;
+		arch_spinlock_t lock;
+	} u = { .lock = __ARCH_SPIN_LOCK_UNLOCKED };
+
+	compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0");
+	BUILD_BUG_ON(sizeof(*l) != sizeof(__u32));
+	BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32));
+	arch_spin_lock(l);
+}
+
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+	arch_spinlock_t *l = (void *)lock;
+
+	arch_spin_unlock(l);
+}
+
+#else
+
+static inline void __bpf_spin_lock(struct bpf_spin_lock *lock)
+{
+	atomic_t *l = (void *)lock;
+
+	BUILD_BUG_ON(sizeof(*l) != sizeof(*lock));
+	do {
+		atomic_cond_read_relaxed(l, !VAL);
+	} while (atomic_xchg(l, 1));
+}
+
+static inline void __bpf_spin_unlock(struct bpf_spin_lock *lock)
+{
+	atomic_t *l = (void *)lock;
+
+	atomic_set_release(l, 0);
+}
+
+#endif
+
+static DEFINE_PER_CPU(unsigned long, irqsave_flags);
+
+notrace BPF_CALL_1(bpf_spin_lock, struct bpf_spin_lock *, lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__bpf_spin_lock(lock);
+	__this_cpu_write(irqsave_flags, flags);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_spin_lock_proto = {
+	.func		= bpf_spin_lock,
+	.gpl_only	= false,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+};
+
+notrace BPF_CALL_1(bpf_spin_unlock, struct bpf_spin_lock *, lock)
+{
+	unsigned long flags;
+
+	flags = __this_cpu_read(irqsave_flags);
+	__bpf_spin_unlock(lock);
+	local_irq_restore(flags);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_spin_unlock_proto = {
+	.func		= bpf_spin_unlock,
+	.gpl_only	= false,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
+};
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 52378d3e34b3..583346a0ab29 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -37,6 +37,11 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (map_value_has_spin_lock(inner_map)) {
+		fdput(f);
+		return ERR_PTR(-ENOTSUPP);
+	}
+
 	inner_map_meta_size = sizeof(*inner_map_meta);
 	/* In some cases verifier needs to access beyond just base map. */
 	if (inner_map->ops == &array_map_ops)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b155cd17c1bd..ebf0a673cb83 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -463,7 +463,7 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
-static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
+static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			 u32 btf_key_id, u32 btf_value_id)
 {
 	const struct btf_type *key_type, *value_type;
@@ -478,6 +478,21 @@ static int map_check_btf(const struct bpf_map *map, const struct btf *btf,
 	if (!value_type || value_size != map->value_size)
 		return -EINVAL;
 
+	map->spin_lock_off = btf_find_spin_lock(btf, value_type);
+
+	if (map_value_has_spin_lock(map)) {
+		if (map->map_type != BPF_MAP_TYPE_HASH &&
+		    map->map_type != BPF_MAP_TYPE_ARRAY)
+			return -ENOTSUPP;
+		if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
+		    map->value_size) {
+			WARN_ONCE(1,
+				  "verifier bug spin_lock_off %d value_size %d\n",
+				  map->spin_lock_off, map->value_size);
+			return -EFAULT;
+		}
+	}
+
 	if (map->ops->map_check_btf)
 		ret = map->ops->map_check_btf(map, btf, key_type, value_type);
 
@@ -542,6 +557,8 @@ static int map_create(union bpf_attr *attr)
 		map->btf = btf;
 		map->btf_key_type_id = attr->btf_key_type_id;
 		map->btf_value_type_id = attr->btf_value_type_id;
+	} else {
+		map->spin_lock_off = -EINVAL;
 	}
 
 	err = security_bpf_map_alloc(map);
@@ -740,7 +757,7 @@ static int map_lookup_elem(union bpf_attr *attr)
 			err = -ENOENT;
 		} else {
 			err = 0;
-			memcpy(value, ptr, value_size);
+			copy_map_value(map, value, ptr);
 		}
 		rcu_read_unlock();
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8c1c21cd50b4..38892bdee651 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -213,6 +213,7 @@ struct bpf_call_arg_meta {
 	s64 msize_smax_value;
 	u64 msize_umax_value;
 	int ptr_id;
+	int func_id;
 };
 
 static DEFINE_MUTEX(bpf_verifier_lock);
@@ -351,6 +352,12 @@ static bool reg_is_refcounted(const struct bpf_reg_state *reg)
 	return type_is_refcounted(reg->type);
 }
 
+static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
+{
+	return reg->type == PTR_TO_MAP_VALUE &&
+		map_value_has_spin_lock(reg->map_ptr);
+}
+
 static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
 {
 	return type_is_refcounted_or_null(reg->type);
@@ -712,6 +719,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 	}
 	dst_state->speculative = src->speculative;
 	dst_state->curframe = src->curframe;
+	dst_state->active_spin_lock = src->active_spin_lock;
 	for (i = 0; i <= src->curframe; i++) {
 		dst = dst_state->frame[i];
 		if (!dst) {
@@ -1483,6 +1491,21 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 	if (err)
 		verbose(env, "R%d max value is outside of the array range\n",
 			regno);
+
+	if (map_value_has_spin_lock(reg->map_ptr)) {
+		u32 lock = reg->map_ptr->spin_lock_off;
+
+		/* if any part of struct bpf_spin_lock can be touched by
+		 * load/store reject this program.
+		 * To check that [x1, x2) overlaps with [y1, y2)
+		 * it is sufficient to check x1 < y2 && y1 < x2.
+		 */
+		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
+		     lock < reg->umax_value + off + size) {
+			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
+			return -EACCES;
+		}
+	}
 	return err;
 }
 
@@ -2192,6 +2215,91 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	}
 }
 
+/* Implementation details:
+ * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
+ * Two bpf_map_lookups (even with the same key) will have different reg->id.
+ * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
+ * value_or_null->value transition, since the verifier only cares about
+ * the range of access to valid map value pointer and doesn't care about actual
+ * address of the map element.
+ * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
+ * reg->id > 0 after value_or_null->value transition. By doing so
+ * two bpf_map_lookups will be considered two different pointers that
+ * point to different bpf_spin_locks.
+ * The verifier allows taking only one bpf_spin_lock at a time to avoid
+ * dead-locks.
+ * Since only one bpf_spin_lock is allowed the checks are simpler than
+ * reg_is_refcounted() logic. The verifier needs to remember only
+ * one spin_lock instead of array of acquired_refs.
+ * cur_state->active_spin_lock remembers which map value element got locked
+ * and clears it after bpf_spin_unlock.
+ */
+static int process_spin_lock(struct bpf_verifier_env *env, int regno,
+			     bool is_lock)
+{
+	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+	struct bpf_verifier_state *cur = env->cur_state;
+	bool is_const = tnum_is_const(reg->var_off);
+	struct bpf_map *map = reg->map_ptr;
+	u64 val = reg->var_off.value;
+
+	if (reg->type != PTR_TO_MAP_VALUE) {
+		verbose(env, "R%d is not a pointer to map_value\n", regno);
+		return -EINVAL;
+	}
+	if (!is_const) {
+		verbose(env,
+			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
+			regno);
+		return -EINVAL;
+	}
+	if (!map->btf) {
+		verbose(env,
+			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
+			map->name);
+		return -EINVAL;
+	}
+	if (!map_value_has_spin_lock(map)) {
+		if (map->spin_lock_off == -E2BIG)
+			verbose(env,
+				"map '%s' has more than one 'struct bpf_spin_lock'\n",
+				map->name);
+		else if (map->spin_lock_off == -ENOENT)
+			verbose(env,
+				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
+				map->name);
+		else
+			verbose(env,
+				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
+				map->name);
+		return -EINVAL;
+	}
+	if (map->spin_lock_off != val + reg->off) {
+		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
+			val + reg->off);
+		return -EINVAL;
+	}
+	if (is_lock) {
+		if (cur->active_spin_lock) {
+			verbose(env,
+				"Locking two bpf_spin_locks are not allowed\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = reg->id;
+	} else {
+		if (!cur->active_spin_lock) {
+			verbose(env, "bpf_spin_unlock without taking a lock\n");
+			return -EINVAL;
+		}
+		if (cur->active_spin_lock != reg->id) {
+			verbose(env, "bpf_spin_unlock of different lock\n");
+			return -EINVAL;
+		}
+		cur->active_spin_lock = 0;
+	}
+	return 0;
+}
+
 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 {
 	return type == ARG_PTR_TO_MEM ||
@@ -2268,6 +2376,17 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			return -EFAULT;
 		}
 		meta->ptr_id = reg->id;
+	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+		if (meta->func_id == BPF_FUNC_spin_lock) {
+			if (process_spin_lock(env, regno, true))
+				return -EACCES;
+		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
+			if (process_spin_lock(env, regno, false))
+				return -EACCES;
+		} else {
+			verbose(env, "verifier internal error\n");
+			return -EFAULT;
+		}
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -2887,6 +3006,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		return err;
 	}
 
+	meta.func_id = func_id;
 	/* check args */
 	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
 	if (err)
@@ -4473,7 +4593,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 			reg->type = PTR_TO_SOCKET;
 		}
-		if (is_null || !reg_is_refcounted(reg)) {
+		if (is_null || !(reg_is_refcounted(reg) ||
+				 reg_may_point_to_spin_lock(reg))) {
 			/* We don't need id from this point onwards anymore,
 			 * thus we should better reset it, so that state
 			 * pruning has chances to take effect.
@@ -4871,6 +4992,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return err;
 	}
 
+	if (env->cur_state->active_spin_lock) {
+		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
+		return -EINVAL;
+	}
+
 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
 		verbose(env,
 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -5607,8 +5733,11 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_MAP_VALUE:
 		/* If the new min/max/var_off satisfy the old ones and
 		 * everything else matches, we are OK.
-		 * We don't care about the 'id' value, because nothing
-		 * uses it for PTR_TO_MAP_VALUE (only for ..._OR_NULL)
+		 * 'id' is not compared, since it's only used for maps with
+		 * bpf_spin_lock inside map element and in such cases if
+		 * the rest of the prog is valid for one map element then
+		 * it's valid for all map elements regardless of the key
+		 * used in bpf_map_lookup()
 		 */
 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
 		       range_within(rold, rcur) &&
@@ -5811,6 +5940,9 @@ static bool states_equal(struct bpf_verifier_env *env,
 	if (old->speculative && !cur->speculative)
 		return false;
 
+	if (old->active_spin_lock != cur->active_spin_lock)
+		return false;
+
 	/* for states to be equal callsites have to be the same
 	 * and all frame states need to be equivalent
 	 */
@@ -6229,6 +6361,12 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				if (env->cur_state->active_spin_lock &&
+				    (insn->src_reg == BPF_PSEUDO_CALL ||
+				     insn->imm != BPF_FUNC_spin_unlock)) {
+					verbose(env, "function calls are not allowed while holding a lock\n");
+					return -EINVAL;
+				}
 				if (insn->src_reg == BPF_PSEUDO_CALL)
 					err = check_func_call(env, insn, &env->insn_idx);
 				else
@@ -6259,6 +6397,11 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				if (env->cur_state->active_spin_lock) {
+					verbose(env, "bpf_spin_unlock is missing\n");
+					return -EINVAL;
+				}
+
 				if (state->curframe) {
 					/* exit from nested function */
 					env->prev_insn_idx = env->insn_idx;
@@ -6356,6 +6499,19 @@ static int check_map_prealloc(struct bpf_map *map)
 		!(map->map_flags & BPF_F_NO_PREALLOC);
 }
 
+static bool is_tracing_prog_type(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_KPROBE:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+		return true;
+	default:
+		return false;
+	}
+}
+
 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 					struct bpf_map *map,
 					struct bpf_prog *prog)
@@ -6378,6 +6534,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		}
 	}
 
+	if ((is_tracing_prog_type(prog->type) ||
+	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
+	    map_value_has_spin_lock(map)) {
+		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
+		return -EINVAL;
+	}
+
 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
 	    !bpf_offload_prog_map_match(prog, map)) {
 		verbose(env, "offload device mismatch between prog and map\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 41984ad4b9b4..3a49f68eda10 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5314,10 +5314,20 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	default:
+		break;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return NULL;
+
+	switch (func_id) {
+	case BPF_FUNC_spin_lock:
+		return &bpf_spin_lock_proto;
+	case BPF_FUNC_spin_unlock:
+		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
-			return bpf_get_trace_printk_proto();
-		/* else, fall through */
+		return bpf_get_trace_printk_proto();
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 96049f3afd50fe8db69fa0068cdca822e747b1e4 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 31 Jan 2019 15:40:09 -0800
Subject: bpf: introduce BPF_F_LOCK flag

Introduce BPF_F_LOCK flag for map_lookup and map_update syscall commands
and for map_update() helper function.
In all these cases take a lock of existing element (which was provided
in BTF description) before copying (in or out) the rest of map value.

Implementation details that are part of uapi:

Array:
The array map takes the element lock for lookup/update.

Hash:
hash map also takes the lock for lookup/update and tries to avoid the bucket lock.
If old element exists it takes the element lock and updates the element in place.
If element doesn't exist it allocates new one and inserts into hash table
while holding the bucket lock.
In rare case the hashmap has to take both the bucket lock and the element lock
to update old value in place.

Cgroup local storage:
It is similar to array. update in place and lookup are done with lock taken.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h        |  2 ++
 include/uapi/linux/bpf.h   |  1 +
 kernel/bpf/arraymap.c      | 24 ++++++++++++++++--------
 kernel/bpf/hashtab.c       | 42 +++++++++++++++++++++++++++++++++++++++---
 kernel/bpf/helpers.c       | 16 ++++++++++++++++
 kernel/bpf/local_storage.c | 14 +++++++++++++-
 kernel/bpf/syscall.c       | 25 +++++++++++++++++++++++--
 7 files changed, 110 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2ae615b48bb8..bd169a7bcc93 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -119,6 +119,8 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 		memcpy(dst, src, map->value_size);
 	}
 }
+void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+			   bool lock_src);
 
 struct bpf_offload_dev;
 struct bpf_offloaded_map;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 86f7c438d40f..1777fa0c61e4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -267,6 +267,7 @@ enum bpf_attach_type {
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
+#define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
 
 /* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d6d979910a2a..c72e0d8e1e65 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -253,8 +253,9 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 {
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	u32 index = *(u32 *)key;
+	char *val;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -262,18 +263,25 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* all elements were pre-allocated, cannot insert a new one */
 		return -E2BIG;
 
-	if (unlikely(map_flags == BPF_NOEXIST))
+	if (unlikely(map_flags & BPF_NOEXIST))
 		/* all elements already exist */
 		return -EEXIST;
 
-	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+	if (unlikely((map_flags & BPF_F_LOCK) &&
+		     !map_value_has_spin_lock(map)))
+		return -EINVAL;
+
+	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
 		       value, map->value_size);
-	else
-		copy_map_value(map,
-			       array->value +
-			       array->elem_size * (index & array->index_mask),
-			       value);
+	} else {
+		val = array->value +
+			array->elem_size * (index & array->index_mask);
+		if (map_flags & BPF_F_LOCK)
+			copy_map_value_locked(map, val, value, false);
+		else
+			copy_map_value(map, val, value);
+	}
 	return 0;
 }
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6d3b22c5ad68..937776531998 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -804,11 +804,11 @@ dec_count:
 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 		       u64 map_flags)
 {
-	if (l_old && map_flags == BPF_NOEXIST)
+	if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
 		/* elem already exists */
 		return -EEXIST;
 
-	if (!l_old && map_flags == BPF_EXIST)
+	if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
 		/* elem doesn't exist, cannot update it */
 		return -ENOENT;
 
@@ -827,7 +827,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	u32 key_size, hash;
 	int ret;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -840,6 +840,28 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	b = __select_bucket(htab, hash);
 	head = &b->head;
 
+	if (unlikely(map_flags & BPF_F_LOCK)) {
+		if (unlikely(!map_value_has_spin_lock(map)))
+			return -EINVAL;
+		/* find an element without taking the bucket lock */
+		l_old = lookup_nulls_elem_raw(head, hash, key, key_size,
+					      htab->n_buckets);
+		ret = check_flags(htab, l_old, map_flags);
+		if (ret)
+			return ret;
+		if (l_old) {
+			/* grab the element lock and update value in place */
+			copy_map_value_locked(map,
+					      l_old->key + round_up(key_size, 8),
+					      value, false);
+			return 0;
+		}
+		/* fall through, grab the bucket lock and lookup again.
+		 * 99.9% chance that the element won't be found,
+		 * but second lookup under lock has to be done.
+		 */
+	}
+
 	/* bpf_map_update_elem() can be called in_irq() */
 	raw_spin_lock_irqsave(&b->lock, flags);
 
@@ -849,6 +871,20 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	if (ret)
 		goto err;
 
+	if (unlikely(l_old && (map_flags & BPF_F_LOCK))) {
+		/* first lookup without the bucket lock didn't find the element,
+		 * but second lookup with the bucket lock found it.
+		 * This case is highly unlikely, but has to be dealt with:
+		 * grab the element lock in addition to the bucket lock
+		 * and update element in place
+		 */
+		copy_map_value_locked(map,
+				      l_old->key + round_up(key_size, 8),
+				      value, false);
+		ret = 0;
+		goto err;
+	}
+
 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
 				l_old);
 	if (IS_ERR(l_new)) {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index fbe544761628..a411fc17d265 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -301,6 +301,22 @@ const struct bpf_func_proto bpf_spin_unlock_proto = {
 	.arg1_type	= ARG_PTR_TO_SPIN_LOCK,
 };
 
+void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
+			   bool lock_src)
+{
+	struct bpf_spin_lock *lock;
+
+	if (lock_src)
+		lock = src + map->spin_lock_off;
+	else
+		lock = dst + map->spin_lock_off;
+	preempt_disable();
+	____bpf_spin_lock(lock);
+	copy_map_value(map, dst, src);
+	____bpf_spin_unlock(lock);
+	preempt_enable();
+}
+
 #ifdef CONFIG_CGROUPS
 BPF_CALL_0(bpf_get_current_cgroup_id)
 {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 0295427f06e2..6b572e2de7fb 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -131,7 +131,14 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	struct bpf_cgroup_storage *storage;
 	struct bpf_storage_buffer *new;
 
-	if (flags != BPF_ANY && flags != BPF_EXIST)
+	if (unlikely(flags & ~(BPF_F_LOCK | BPF_EXIST | BPF_NOEXIST)))
+		return -EINVAL;
+
+	if (unlikely(flags & BPF_NOEXIST))
+		return -EINVAL;
+
+	if (unlikely((flags & BPF_F_LOCK) &&
+		     !map_value_has_spin_lock(map)))
 		return -EINVAL;
 
 	storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -139,6 +146,11 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	if (!storage)
 		return -ENOENT;
 
+	if (flags & BPF_F_LOCK) {
+		copy_map_value_locked(map, storage->buf->data, value, false);
+		return 0;
+	}
+
 	new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
 			   map->value_size,
 			   __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b29e6dc44650..0834958f1dc4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -682,7 +682,7 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
 
 static int map_lookup_elem(union bpf_attr *attr)
 {
@@ -698,6 +698,9 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 		return -EINVAL;
 
+	if (attr->flags & ~BPF_F_LOCK)
+		return -EINVAL;
+
 	f = fdget(ufd);
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
@@ -708,6 +711,12 @@ static int map_lookup_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
@@ -758,7 +767,13 @@ static int map_lookup_elem(union bpf_attr *attr)
 			err = -ENOENT;
 		} else {
 			err = 0;
-			copy_map_value(map, value, ptr);
+			if (attr->flags & BPF_F_LOCK)
+				/* lock 'ptr' and copy everything but lock */
+				copy_map_value_locked(map, value, ptr, true);
+			else
+				copy_map_value(map, value, ptr);
+			/* mask lock, since value wasn't zero inited */
+			check_and_init_map_lock(map, value);
 		}
 		rcu_read_unlock();
 	}
@@ -818,6 +833,12 @@ static int map_update_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	if ((attr->flags & BPF_F_LOCK) &&
+	    !map_value_has_spin_lock(map)) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
-- 
cgit v1.2.3


From fb99bce7120014307dde57b3d7def6977a9a62a1 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 30 Jan 2019 21:58:05 +0000
Subject: net: tls: Support 256 bit keys

Wire up support for 256 bit keys from the setsockopt to the crypto
framework

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h                 |  5 +++-
 include/uapi/linux/tls.h          | 15 ++++++++++
 net/tls/tls_main.c                | 33 +++++++++++++++++++--
 net/tls/tls_sw.c                  | 29 +++++++++++++++---
 tools/testing/selftests/net/tls.c | 62 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 137 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 4592606e136a..da616db48413 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -206,7 +206,10 @@ struct cipher_context {
 
 union tls_crypto_context {
 	struct tls_crypto_info info;
-	struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+	union {
+		struct tls12_crypto_info_aes_gcm_128 aes_gcm_128;
+		struct tls12_crypto_info_aes_gcm_256 aes_gcm_256;
+	};
 };
 
 struct tls_context {
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index ff02287495ac..9affceaa3db4 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -59,6 +59,13 @@
 #define TLS_CIPHER_AES_GCM_128_TAG_SIZE		16
 #define TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE		8
 
+#define TLS_CIPHER_AES_GCM_256				52
+#define TLS_CIPHER_AES_GCM_256_IV_SIZE			8
+#define TLS_CIPHER_AES_GCM_256_KEY_SIZE		32
+#define TLS_CIPHER_AES_GCM_256_SALT_SIZE		4
+#define TLS_CIPHER_AES_GCM_256_TAG_SIZE		16
+#define TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE		8
+
 #define TLS_SET_RECORD_TYPE	1
 #define TLS_GET_RECORD_TYPE	2
 
@@ -75,4 +82,12 @@ struct tls12_crypto_info_aes_gcm_128 {
 	unsigned char rec_seq[TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE];
 };
 
+struct tls12_crypto_info_aes_gcm_256 {
+	struct tls_crypto_info info;
+	unsigned char iv[TLS_CIPHER_AES_GCM_256_IV_SIZE];
+	unsigned char key[TLS_CIPHER_AES_GCM_256_KEY_SIZE];
+	unsigned char salt[TLS_CIPHER_AES_GCM_256_SALT_SIZE];
+	unsigned char rec_seq[TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE];
+};
+
 #endif /* _UAPI_LINUX_TLS_H */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d36d095cbcf0..0f028cfdf835 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -372,6 +372,30 @@ static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
 			rc = -EFAULT;
 		break;
 	}
+	case TLS_CIPHER_AES_GCM_256: {
+		struct tls12_crypto_info_aes_gcm_256 *
+		  crypto_info_aes_gcm_256 =
+		  container_of(crypto_info,
+			       struct tls12_crypto_info_aes_gcm_256,
+			       info);
+
+		if (len != sizeof(*crypto_info_aes_gcm_256)) {
+			rc = -EINVAL;
+			goto out;
+		}
+		lock_sock(sk);
+		memcpy(crypto_info_aes_gcm_256->iv,
+		       ctx->tx.iv + TLS_CIPHER_AES_GCM_256_SALT_SIZE,
+		       TLS_CIPHER_AES_GCM_256_IV_SIZE);
+		memcpy(crypto_info_aes_gcm_256->rec_seq, ctx->tx.rec_seq,
+		       TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE);
+		release_sock(sk);
+		if (copy_to_user(optval,
+				 crypto_info_aes_gcm_256,
+				 sizeof(*crypto_info_aes_gcm_256)))
+			rc = -EFAULT;
+		break;
+	}
 	default:
 		rc = -EINVAL;
 	}
@@ -412,6 +436,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 {
 	struct tls_crypto_info *crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
+	size_t optsize;
 	int rc = 0;
 	int conf;
 
@@ -444,8 +469,12 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	}
 
 	switch (crypto_info->cipher_type) {
-	case TLS_CIPHER_AES_GCM_128: {
-		if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) {
+	case TLS_CIPHER_AES_GCM_128:
+	case TLS_CIPHER_AES_GCM_256: {
+		optsize = crypto_info->cipher_type == TLS_CIPHER_AES_GCM_128 ?
+			sizeof(struct tls12_crypto_info_aes_gcm_128) :
+			sizeof(struct tls12_crypto_info_aes_gcm_256);
+		if (optlen != optsize) {
 			rc = -EINVAL;
 			goto err_crypto_info;
 		}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 3f2a6af27e62..9326c06c2ffe 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1999,6 +1999,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
+	struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
 	struct tls_sw_context_tx *sw_ctx_tx = NULL;
 	struct tls_sw_context_rx *sw_ctx_rx = NULL;
 	struct cipher_context *cctx;
@@ -2006,7 +2007,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	struct strp_callbacks cb;
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
 	struct crypto_tfm *tfm;
-	char *iv, *rec_seq;
+	char *iv, *rec_seq, *key, *salt;
+	size_t keysize;
 	int rc = 0;
 
 	if (!ctx) {
@@ -2067,6 +2069,24 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		 ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq;
 		gcm_128_info =
 			(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
+		keysize = TLS_CIPHER_AES_GCM_128_KEY_SIZE;
+		key = gcm_128_info->key;
+		salt = gcm_128_info->salt;
+		break;
+	}
+	case TLS_CIPHER_AES_GCM_256: {
+		nonce_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
+		tag_size = TLS_CIPHER_AES_GCM_256_TAG_SIZE;
+		iv_size = TLS_CIPHER_AES_GCM_256_IV_SIZE;
+		iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv;
+		rec_seq_size = TLS_CIPHER_AES_GCM_256_REC_SEQ_SIZE;
+		rec_seq =
+		 ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq;
+		gcm_256_info =
+			(struct tls12_crypto_info_aes_gcm_256 *)crypto_info;
+		keysize = TLS_CIPHER_AES_GCM_256_KEY_SIZE;
+		key = gcm_256_info->key;
+		salt = gcm_256_info->salt;
 		break;
 	}
 	default:
@@ -2090,7 +2110,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		rc = -ENOMEM;
 		goto free_priv;
 	}
-	memcpy(cctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	/* Note: 128 & 256 bit salt are the same size */
+	memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
 	memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
 	cctx->rec_seq_size = rec_seq_size;
 	cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
@@ -2110,8 +2131,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	ctx->push_pending_record = tls_sw_push_pending_record;
 
-	rc = crypto_aead_setkey(*aead, gcm_128_info->key,
-				TLS_CIPHER_AES_GCM_128_KEY_SIZE);
+	rc = crypto_aead_setkey(*aead, key, keysize);
+
 	if (rc)
 		goto free_aead;
 
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index ff68ed19c0ef..c356f481de79 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -763,4 +763,66 @@ TEST_F(tls, control_msg)
 	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
 }
 
+TEST(keysizes) {
+	struct tls12_crypto_info_aes_gcm_256 tls12;
+	struct sockaddr_in addr;
+	int sfd, ret, fd, cfd;
+	socklen_t len;
+	bool notls;
+
+	notls = false;
+	len = sizeof(addr);
+
+	memset(&tls12, 0, sizeof(tls12));
+	tls12.info.version = TLS_1_2_VERSION;
+	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+	ret = bind(sfd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+	ret = listen(sfd, 10);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(sfd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
+	ret = connect(fd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret != 0) {
+		notls = true;
+		printf("Failure setting TCP_ULP, testing without tls\n");
+	}
+
+	if (!notls) {
+		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
+				 sizeof(tls12));
+		EXPECT_EQ(ret, 0);
+	}
+
+	cfd = accept(sfd, &addr, &len);
+	ASSERT_GE(cfd, 0);
+
+	if (!notls) {
+		ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
+				 sizeof("tls"));
+		EXPECT_EQ(ret, 0);
+
+		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
+				 sizeof(tls12));
+		EXPECT_EQ(ret, 0);
+	}
+
+	close(sfd);
+	close(fd);
+	close(cfd);
+}
+
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3


From a2ef9b6a22bd22841bde53e52cc50476fb4d1a5d Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 30 Jan 2019 21:58:12 +0000
Subject: net: tls: Refactor tls aad space size calculation

TLS 1.3 has a different AAD size, use a variable in the code to
make TLS 1.3 support easy.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h |  1 +
 net/tls/tls_sw.c  | 17 +++++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index da616db48413..754b130672f0 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -202,6 +202,7 @@ struct cipher_context {
 	char *iv;
 	u16 rec_seq_size;
 	char *rec_seq;
+	u16 aad_size;
 };
 
 union tls_crypto_context {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9326c06c2ffe..7b6386f4c685 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -185,7 +185,7 @@ static int tls_do_decryption(struct sock *sk,
 	int ret;
 
 	aead_request_set_tfm(aead_req, ctx->aead_recv);
-	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_ad(aead_req, tls_ctx->rx.aad_size);
 	aead_request_set_crypt(aead_req, sgin, sgout,
 			       data_len + tls_ctx->rx.tag_size,
 			       (u8 *)iv_recv);
@@ -289,12 +289,12 @@ static struct tls_rec *tls_get_rec(struct sock *sk)
 
 	sg_init_table(rec->sg_aead_in, 2);
 	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
-		   sizeof(rec->aad_space));
+		   tls_ctx->tx.aad_size);
 	sg_unmark_end(&rec->sg_aead_in[1]);
 
 	sg_init_table(rec->sg_aead_out, 2);
 	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
-		   sizeof(rec->aad_space));
+		   tls_ctx->tx.aad_size);
 	sg_unmark_end(&rec->sg_aead_out[1]);
 
 	return rec;
@@ -455,7 +455,7 @@ static int tls_do_encryption(struct sock *sk,
 	msg_en->sg.curr = start;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
-	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
+	aead_request_set_ad(aead_req, tls_ctx->tx.aad_size);
 	aead_request_set_crypt(aead_req, rec->sg_aead_in,
 			       rec->sg_aead_out,
 			       data_len, rec->iv_data);
@@ -1317,7 +1317,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 
 	aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
 	mem_size = aead_size + (nsg * sizeof(struct scatterlist));
-	mem_size = mem_size + TLS_AAD_SPACE_SIZE;
+	mem_size = mem_size + tls_ctx->rx.aad_size;
 	mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
 
 	/* Allocate a single block of memory which contains
@@ -1333,7 +1333,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	sgin = (struct scatterlist *)(mem + aead_size);
 	sgout = sgin + n_sgin;
 	aad = (u8 *)(sgout + n_sgout);
-	iv = aad + TLS_AAD_SPACE_SIZE;
+	iv = aad + tls_ctx->rx.aad_size;
 
 	/* Prepare IV */
 	err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
@@ -1352,7 +1352,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 
 	/* Prepare sgin */
 	sg_init_table(sgin, n_sgin);
-	sg_set_buf(&sgin[0], aad, TLS_AAD_SPACE_SIZE);
+	sg_set_buf(&sgin[0], aad, tls_ctx->rx.aad_size);
 	err = skb_to_sgvec(skb, &sgin[1],
 			   rxm->offset + tls_ctx->rx.prepend_size,
 			   rxm->full_len - tls_ctx->rx.prepend_size);
@@ -1364,7 +1364,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	if (n_sgout) {
 		if (out_iov) {
 			sg_init_table(sgout, n_sgout);
-			sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE);
+			sg_set_buf(&sgout[0], aad, tls_ctx->rx.aad_size);
 
 			*chunk = 0;
 			err = tls_setup_from_iter(sk, out_iov, data_len,
@@ -2100,6 +2100,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		goto free_priv;
 	}
 
+	cctx->aad_size = TLS_AAD_SPACE_SIZE;
 	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
 	cctx->tag_size = tag_size;
 	cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
-- 
cgit v1.2.3


From 130b392c6cd6b2aed1b7eb32253d4920babb4891 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 30 Jan 2019 21:58:31 +0000
Subject: net: tls: Add tls 1.3 support

TLS 1.3 has minor changes from TLS 1.2 at the record layer.

* Header now hardcodes the same version and application content type in
  the header.
* The real content type is appended after the data, before encryption (or
  after decryption).
* The IV is xored with the sequence number, instead of concatinating four
  bytes of IV with the explicit IV.
* Zero-padding:  No exlicit length is given, we search backwards from the
  end of the decrypted data for the first non-zero byte, which is the
  content type.  Currently recv supports reading zero-padding, but there
  is no way for send to add zero padding.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h             |  66 +++++++++++++++++-------
 include/uapi/linux/tls.h      |   4 ++
 net/tls/tls_device.c          |   5 +-
 net/tls/tls_device_fallback.c |   3 +-
 net/tls/tls_main.c            |   3 +-
 net/tls/tls_sw.c              | 116 ++++++++++++++++++++++++++++++++++--------
 6 files changed, 154 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 754b130672f0..004bf01ce868 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -119,6 +119,9 @@ struct tls_rec {
 	/* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
 	struct scatterlist sg_aead_out[2];
 
+	char content_type;
+	struct scatterlist sg_content_type;
+
 	char aad_space[TLS_AAD_SPACE_SIZE];
 	u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE +
 		   TLS_CIPHER_AES_GCM_128_SALT_SIZE];
@@ -203,6 +206,7 @@ struct cipher_context {
 	u16 rec_seq_size;
 	char *rec_seq;
 	u16 aad_size;
+	u16 tail_size;
 };
 
 union tls_crypto_context {
@@ -397,49 +401,77 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
-					 struct cipher_context *ctx)
+					 struct cipher_context *ctx,
+					 int version)
 {
 	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
 		tls_err_abort(sk, EBADMSG);
-	tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			     ctx->iv_size);
+
+	if (version != TLS_1_3_VERSION) {
+		tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
+				     ctx->iv_size);
+	}
 }
 
 static inline void tls_fill_prepend(struct tls_context *ctx,
 			     char *buf,
 			     size_t plaintext_len,
-			     unsigned char record_type)
+			     unsigned char record_type,
+			     int version)
 {
 	size_t pkt_len, iv_size = ctx->tx.iv_size;
 
-	pkt_len = plaintext_len + iv_size + ctx->tx.tag_size;
+	pkt_len = plaintext_len + ctx->tx.tag_size;
+	if (version != TLS_1_3_VERSION) {
+		pkt_len += iv_size;
+
+		memcpy(buf + TLS_NONCE_OFFSET,
+		       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
+	}
 
 	/* we cover nonce explicit here as well, so buf should be of
 	 * size KTLS_DTLS_HEADER_SIZE + KTLS_DTLS_NONCE_EXPLICIT_SIZE
 	 */
-	buf[0] = record_type;
-	buf[1] = TLS_VERSION_MINOR(ctx->crypto_send.info.version);
-	buf[2] = TLS_VERSION_MAJOR(ctx->crypto_send.info.version);
+	buf[0] = version == TLS_1_3_VERSION ?
+		   TLS_RECORD_TYPE_DATA : record_type;
+	/* Note that VERSION must be TLS_1_2 for both TLS1.2 and TLS1.3 */
+	buf[1] = TLS_1_2_VERSION_MINOR;
+	buf[2] = TLS_1_2_VERSION_MAJOR;
 	/* we can use IV for nonce explicit according to spec */
 	buf[3] = pkt_len >> 8;
 	buf[4] = pkt_len & 0xFF;
-	memcpy(buf + TLS_NONCE_OFFSET,
-	       ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv_size);
 }
 
 static inline void tls_make_aad(char *buf,
 				size_t size,
 				char *record_sequence,
 				int record_sequence_size,
-				unsigned char record_type)
+				unsigned char record_type,
+				int version)
+{
+	if (version != TLS_1_3_VERSION) {
+		memcpy(buf, record_sequence, record_sequence_size);
+		buf += 8;
+	} else {
+		size += TLS_CIPHER_AES_GCM_128_TAG_SIZE;
+	}
+
+	buf[0] = version == TLS_1_3_VERSION ?
+		  TLS_RECORD_TYPE_DATA : record_type;
+	buf[1] = TLS_1_2_VERSION_MAJOR;
+	buf[2] = TLS_1_2_VERSION_MINOR;
+	buf[3] = size >> 8;
+	buf[4] = size & 0xFF;
+}
+
+static inline void xor_iv_with_seq(int version, char *iv, char *seq)
 {
-	memcpy(buf, record_sequence, record_sequence_size);
+	int i;
 
-	buf[8] = record_type;
-	buf[9] = TLS_1_2_VERSION_MAJOR;
-	buf[10] = TLS_1_2_VERSION_MINOR;
-	buf[11] = size >> 8;
-	buf[12] = size & 0xFF;
+	if (version == TLS_1_3_VERSION) {
+		for (i = 0; i < 8; i++)
+			iv[i + 4] ^= seq[i];
+	}
 }
 
 static inline struct tls_context *tls_get_ctx(const struct sock *sk)
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 9affceaa3db4..401d6f01de6a 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -51,6 +51,10 @@
 #define TLS_1_2_VERSION_MINOR	0x3
 #define TLS_1_2_VERSION		TLS_VERSION_NUMBER(TLS_1_2)
 
+#define TLS_1_3_VERSION_MAJOR	0x3
+#define TLS_1_3_VERSION_MINOR	0x4
+#define TLS_1_3_VERSION		TLS_VERSION_NUMBER(TLS_1_3)
+
 /* Supported ciphers */
 #define TLS_CIPHER_AES_GCM_128				51
 #define TLS_CIPHER_AES_GCM_128_IV_SIZE			8
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index d753e362d2d9..7ee9008b2187 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -257,7 +257,8 @@ static int tls_push_record(struct sock *sk,
 	tls_fill_prepend(ctx,
 			 skb_frag_address(frag),
 			 record->len - ctx->tx.prepend_size,
-			 record_type);
+			 record_type,
+			 ctx->crypto_send.info.version);
 
 	/* HW doesn't care about the data in the tag, because it fills it. */
 	dummy_tag_frag.page = skb_frag_page(frag);
@@ -270,7 +271,7 @@ static int tls_push_record(struct sock *sk,
 	spin_unlock_irq(&offload_ctx->lock);
 	offload_ctx->open_record = NULL;
 	set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
-	tls_advance_record_sn(sk, &ctx->tx);
+	tls_advance_record_sn(sk, &ctx->tx, ctx->crypto_send.info.version);
 
 	for (i = 0; i < record->num_frags; i++) {
 		frag = &record->frags[i];
diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c
index 450a6dbc5a88..54c3a758f2a7 100644
--- a/net/tls/tls_device_fallback.c
+++ b/net/tls/tls_device_fallback.c
@@ -73,7 +73,8 @@ static int tls_enc_record(struct aead_request *aead_req,
 	len -= TLS_CIPHER_AES_GCM_128_IV_SIZE;
 
 	tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE,
-		     (char *)&rcd_sn, sizeof(rcd_sn), buf[0]);
+		(char *)&rcd_sn, sizeof(rcd_sn), buf[0],
+		TLS_1_2_VERSION);
 
 	memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE,
 	       TLS_CIPHER_AES_GCM_128_IV_SIZE);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 0f028cfdf835..d1c2fd9a3f63 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -463,7 +463,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	}
 
 	/* check version */
-	if (crypto_info->version != TLS_1_2_VERSION) {
+	if (crypto_info->version != TLS_1_2_VERSION &&
+	    crypto_info->version != TLS_1_3_VERSION) {
 		rc = -ENOTSUPP;
 		goto err_crypto_info;
 	}
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 34f3523f668e..06d7ae97b929 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -120,6 +120,34 @@ static int skb_nsg(struct sk_buff *skb, int offset, int len)
         return __skb_nsg(skb, offset, len, 0);
 }
 
+static int padding_length(struct tls_sw_context_rx *ctx,
+			  struct tls_context *tls_ctx, struct sk_buff *skb)
+{
+	struct strp_msg *rxm = strp_msg(skb);
+	int sub = 0;
+
+	/* Determine zero-padding length */
+	if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) {
+		char content_type = 0;
+		int err;
+		int back = 17;
+
+		while (content_type == 0) {
+			if (back > rxm->full_len)
+				return -EBADMSG;
+			err = skb_copy_bits(skb,
+					    rxm->offset + rxm->full_len - back,
+					    &content_type, 1);
+			if (content_type)
+				break;
+			sub++;
+			back++;
+		}
+		ctx->control = content_type;
+	}
+	return sub;
+}
+
 static void tls_decrypt_done(struct crypto_async_request *req, int err)
 {
 	struct aead_request *aead_req = (struct aead_request *)req;
@@ -142,7 +170,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 		tls_err_abort(skb->sk, err);
 	} else {
 		struct strp_msg *rxm = strp_msg(skb);
-
+		rxm->full_len -= padding_length(ctx, tls_ctx, skb);
 		rxm->offset += tls_ctx->rx.prepend_size;
 		rxm->full_len -= tls_ctx->rx.overhead_size;
 	}
@@ -448,6 +476,8 @@ static int tls_do_encryption(struct sock *sk,
 	int rc;
 
 	memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data));
+	xor_iv_with_seq(tls_ctx->crypto_send.info.version, rec->iv_data,
+			tls_ctx->tx.rec_seq);
 
 	sge->offset += tls_ctx->tx.prepend_size;
 	sge->length -= tls_ctx->tx.prepend_size;
@@ -483,7 +513,8 @@ static int tls_do_encryption(struct sock *sk,
 
 	/* Unhook the record from context if encryption is not failure */
 	ctx->open_rec = NULL;
-	tls_advance_record_sn(sk, &tls_ctx->tx);
+	tls_advance_record_sn(sk, &tls_ctx->tx,
+			      tls_ctx->crypto_send.info.version);
 	return rc;
 }
 
@@ -640,7 +671,17 @@ static int tls_push_record(struct sock *sk, int flags,
 
 	i = msg_pl->sg.end;
 	sk_msg_iter_var_prev(i);
-	sg_mark_end(sk_msg_elem(msg_pl, i));
+
+	rec->content_type = record_type;
+	if (tls_ctx->crypto_send.info.version == TLS_1_3_VERSION) {
+		/* Add content type to end of message.  No padding added */
+		sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
+		sg_mark_end(&rec->sg_content_type);
+		sg_chain(msg_pl->sg.data, msg_pl->sg.end + 1,
+			 &rec->sg_content_type);
+	} else {
+		sg_mark_end(sk_msg_elem(msg_pl, i));
+	}
 
 	i = msg_pl->sg.start;
 	sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ?
@@ -653,18 +694,22 @@ static int tls_push_record(struct sock *sk, int flags,
 	i = msg_en->sg.start;
 	sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
 
-	tls_make_aad(rec->aad_space, msg_pl->sg.size,
+	tls_make_aad(rec->aad_space, msg_pl->sg.size + tls_ctx->tx.tail_size,
 		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
-		     record_type);
+		     record_type,
+		     tls_ctx->crypto_send.info.version);
 
 	tls_fill_prepend(tls_ctx,
 			 page_address(sg_page(&msg_en->sg.data[i])) +
-			 msg_en->sg.data[i].offset, msg_pl->sg.size,
-			 record_type);
+			 msg_en->sg.data[i].offset,
+			 msg_pl->sg.size + tls_ctx->tx.tail_size,
+			 record_type,
+			 tls_ctx->crypto_send.info.version);
 
 	tls_ctx->pending_open_record_frags = false;
 
-	rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size, i);
+	rc = tls_do_encryption(sk, tls_ctx, ctx, req,
+			       msg_pl->sg.size + tls_ctx->tx.tail_size, i);
 	if (rc < 0) {
 		if (rc != -EINPROGRESS) {
 			tls_err_abort(sk, EBADMSG);
@@ -1292,7 +1337,8 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	u8 *aad, *iv, *mem = NULL;
 	struct scatterlist *sgin = NULL;
 	struct scatterlist *sgout = NULL;
-	const int data_len = rxm->full_len - tls_ctx->rx.overhead_size;
+	const int data_len = rxm->full_len - tls_ctx->rx.overhead_size +
+		tls_ctx->rx.tail_size;
 
 	if (*zc && (out_iov || out_sg)) {
 		if (out_iov)
@@ -1343,12 +1389,20 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 		kfree(mem);
 		return err;
 	}
-	memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+	if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION)
+		memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv));
+	else
+		memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
+
+	xor_iv_with_seq(tls_ctx->crypto_recv.info.version, iv,
+			tls_ctx->rx.rec_seq);
 
 	/* Prepare AAD */
-	tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size,
+	tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size +
+		     tls_ctx->rx.tail_size,
 		     tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size,
-		     ctx->control);
+		     ctx->control,
+		     tls_ctx->crypto_recv.info.version);
 
 	/* Prepare sgin */
 	sg_init_table(sgin, n_sgin);
@@ -1405,6 +1459,7 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	int version = tls_ctx->crypto_recv.info.version;
 	struct strp_msg *rxm = strp_msg(skb);
 	int err = 0;
 
@@ -1417,13 +1472,17 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 		err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, async);
 		if (err < 0) {
 			if (err == -EINPROGRESS)
-				tls_advance_record_sn(sk, &tls_ctx->rx);
+				tls_advance_record_sn(sk, &tls_ctx->rx,
+						      version);
 
 			return err;
 		}
+
+		rxm->full_len -= padding_length(ctx, tls_ctx, skb);
+
 		rxm->offset += tls_ctx->rx.prepend_size;
 		rxm->full_len -= tls_ctx->rx.overhead_size;
-		tls_advance_record_sn(sk, &tls_ctx->rx);
+		tls_advance_record_sn(sk, &tls_ctx->rx, version);
 		ctx->decrypted = true;
 		ctx->saved_data_ready(sk);
 	} else {
@@ -1611,7 +1670,8 @@ int tls_sw_recvmsg(struct sock *sk,
 		to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size;
 
 		if (to_decrypt <= len && !is_kvec && !is_peek &&
-		    ctx->control == TLS_RECORD_TYPE_DATA)
+		    ctx->control == TLS_RECORD_TYPE_DATA &&
+		    tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
 			zc = true;
 
 		err = decrypt_skb_update(sk, skb, &msg->msg_iter,
@@ -1835,9 +1895,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 
 	data_len = ((header[4] & 0xFF) | (header[3] << 8));
 
-	cipher_overhead = tls_ctx->rx.tag_size + tls_ctx->rx.iv_size;
+	cipher_overhead = tls_ctx->rx.tag_size;
+	if (tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
+		cipher_overhead += tls_ctx->rx.iv_size;
 
-	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead) {
+	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead +
+	    tls_ctx->rx.tail_size) {
 		ret = -EMSGSIZE;
 		goto read_failure;
 	}
@@ -1846,12 +1909,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 		goto read_failure;
 	}
 
-	if (header[1] != TLS_VERSION_MINOR(tls_ctx->crypto_recv.info.version) ||
-	    header[2] != TLS_VERSION_MAJOR(tls_ctx->crypto_recv.info.version)) {
+	/* Note that both TLS1.3 and TLS1.2 use TLS_1_2 version here */
+	if (header[1] != TLS_1_2_VERSION_MINOR ||
+	    header[2] != TLS_1_2_VERSION_MAJOR) {
 		ret = -EINVAL;
 		goto read_failure;
 	}
-
 #ifdef CONFIG_TLS_DEVICE
 	handle_device_resync(strp->sk, TCP_SKB_CB(skb)->seq + rxm->offset,
 			     *(u64*)tls_ctx->rx.rec_seq);
@@ -2100,10 +2163,19 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		goto free_priv;
 	}
 
-	cctx->aad_size = TLS_AAD_SPACE_SIZE;
+	if (crypto_info->version == TLS_1_3_VERSION) {
+		nonce_size = 0;
+		cctx->aad_size = TLS_HEADER_SIZE;
+		cctx->tail_size = 1;
+	} else {
+		cctx->aad_size = TLS_AAD_SPACE_SIZE;
+		cctx->tail_size = 0;
+	}
+
 	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
 	cctx->tag_size = tag_size;
-	cctx->overhead_size = cctx->prepend_size + cctx->tag_size;
+	cctx->overhead_size = cctx->prepend_size + cctx->tag_size +
+		cctx->tail_size;
 	cctx->iv_size = iv_size;
 	cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			   GFP_KERNEL);
-- 
cgit v1.2.3


From 5b053e121ffdec851dc3a7046e9bece287a3c5b1 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 30 Jan 2019 22:08:21 +0000
Subject: net: tls: Set async_capable for tls zerocopy only if we see
 EINPROGRESS

Currently we don't zerocopy if the crypto framework async bit is set.
However some crypto algorithms (such as x86 AESNI) support async,
but in the context of sendmsg, will never run asynchronously.  Instead,
check for actual EINPROGRESS return code before assuming algorithm is
async.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 1 +
 net/tls/tls_sw.c  | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 004bf01ce868..a93a8ed8f716 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -142,6 +142,7 @@ struct tls_sw_context_tx {
 	struct list_head tx_list;
 	atomic_t encrypt_pending;
 	int async_notify;
+	int async_capable;
 
 #define BIT_TX_SCHEDULED	0
 	unsigned long tx_bitmask;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 06d7ae97b929..8051a9164139 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -718,6 +718,7 @@ static int tls_push_record(struct sock *sk, int flags,
 				tls_merge_open_record(sk, rec, tmp, orig_end);
 			}
 		}
+		ctx->async_capable = 1;
 		return rc;
 	} else if (split) {
 		msg_pl = &tmp->msg_plaintext;
@@ -859,8 +860,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
-	bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+	bool async_capable = ctx->async_capable;
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
 	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool eor = !(msg->msg_flags & MSG_MORE);
-- 
cgit v1.2.3


From b8580e9de48bf32b884910d22330ef2fa027cf01 Mon Sep 17 00:00:00 2001
From: Shunyong Yang <shunyong.yang@hxt-semitech.com>
Date: Fri, 1 Feb 2019 17:11:14 -0600
Subject: PCI: Add HXT vendor ID

Add the HXT vendor ID to pci_ids.h.

Signed-off-by: Shunyong Yang <shunyong.yang@hxt-semitech.com>
[bhelgaas: split to separate patch]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Sinan Kaya <okaya@kernel.org>
CC: Joey Zheng <yu.zheng@hxt-semitech.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 5eaf39dbc388..26420e619dd7 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2573,6 +2573,8 @@
 
 #define PCI_VENDOR_ID_HYGON		0x1d94
 
+#define PCI_VENDOR_ID_HXT		0x1dbf
+
 #define PCI_VENDOR_ID_TEKRAM		0x1de1
 #define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29
 
-- 
cgit v1.2.3


From 0ce26a1c31ca928df4dfc7504c8898b71ff9f5d5 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 1 Feb 2019 17:24:52 -0600
Subject: PCI: Move Rohm Vendor ID to generic list

Move the Rohm Vendor ID to pci_ids.h instead of defining it in several
drivers.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/dma/pch_dma.c                                | 1 -
 drivers/gpio/gpio-ml-ioh.c                           | 2 --
 drivers/gpio/gpio-pch.c                              | 1 -
 drivers/i2c/busses/i2c-eg20t.c                       | 1 -
 drivers/misc/pch_phub.c                              | 1 -
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c | 7 ++-----
 drivers/spi/spi-topcliff-pch.c                       | 1 -
 drivers/tty/serial/pch_uart.c                        | 2 --
 drivers/usb/gadget/udc/pch_udc.c                     | 1 -
 include/linux/pci_ids.h                              | 2 ++
 10 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index afd8f27bda96..538b6e0e17bb 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -972,7 +972,6 @@ static void pch_dma_remove(struct pci_dev *pdev)
 }
 
 /* PCI Device ID of DMA device */
-#define PCI_VENDOR_ID_ROHM             0x10DB
 #define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH        0x8810
 #define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH        0x8815
 #define PCI_DEVICE_ID_ML7213_DMA1_8CH	0x8026
diff --git a/drivers/gpio/gpio-ml-ioh.c b/drivers/gpio/gpio-ml-ioh.c
index 51c7d1b84c2e..0c076dce9e17 100644
--- a/drivers/gpio/gpio-ml-ioh.c
+++ b/drivers/gpio/gpio-ml-ioh.c
@@ -31,8 +31,6 @@
 
 #define IOH_IRQ_BASE		0
 
-#define PCI_VENDOR_ID_ROHM             0x10DB
-
 struct ioh_reg_comn {
 	u32	ien;
 	u32	istatus;
diff --git a/drivers/gpio/gpio-pch.c b/drivers/gpio/gpio-pch.c
index ee79e5f88b5a..1d99293096f2 100644
--- a/drivers/gpio/gpio-pch.c
+++ b/drivers/gpio/gpio-pch.c
@@ -437,7 +437,6 @@ static int __maybe_unused pch_gpio_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(pch_gpio_pm_ops, pch_gpio_suspend, pch_gpio_resume);
 
-#define PCI_VENDOR_ID_ROHM             0x10DB
 static const struct pci_device_id pch_gpio_pcidev_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x8014) },
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 835d54ac2971..231675b10376 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -177,7 +177,6 @@ static wait_queue_head_t pch_event;
 static DEFINE_MUTEX(pch_mutex);
 
 /* Definition for ML7213 by LAPIS Semiconductor */
-#define PCI_VENDOR_ID_ROHM		0x10DB
 #define PCI_DEVICE_ID_ML7213_I2C	0x802D
 #define PCI_DEVICE_ID_ML7223_I2C	0x8010
 #define PCI_DEVICE_ID_ML7831_I2C	0x8817
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
index 540845651b8c..309703e9c42e 100644
--- a/drivers/misc/pch_phub.c
+++ b/drivers/misc/pch_phub.c
@@ -64,7 +64,6 @@
 #define CLKCFG_UARTCLKSEL			(1 << 18)
 
 /* Macros for ML7213 */
-#define PCI_VENDOR_ID_ROHM			0x10db
 #define PCI_DEVICE_ID_ROHM_ML7213_PHUB		0x801A
 
 /* Macros for ML7223 */
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 43c0c10dfeb7..3a4225837049 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -27,7 +27,6 @@
 #define DRV_VERSION     "1.01"
 const char pch_driver_version[] = DRV_VERSION;
 
-#define PCI_DEVICE_ID_INTEL_IOH1_GBE	0x8802		/* Pci device ID */
 #define PCH_GBE_MAR_ENTRIES		16
 #define PCH_GBE_SHORT_PKT		64
 #define DSC_INIT16			0xC000
@@ -37,11 +36,9 @@ const char pch_driver_version[] = DRV_VERSION;
 #define PCH_GBE_PCI_BAR			1
 #define PCH_GBE_RESERVE_MEMORY		0x200000	/* 2MB */
 
-/* Macros for ML7223 */
-#define PCI_VENDOR_ID_ROHM			0x10db
-#define PCI_DEVICE_ID_ROHM_ML7223_GBE		0x8013
+#define PCI_DEVICE_ID_INTEL_IOH1_GBE		0x8802
 
-/* Macros for ML7831 */
+#define PCI_DEVICE_ID_ROHM_ML7223_GBE		0x8013
 #define PCI_DEVICE_ID_ROHM_ML7831_GBE		0x8802
 
 #define PCH_GBE_TX_WEIGHT         64
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 97d137591b18..d794180e83dc 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -92,7 +92,6 @@
 #define PCH_MAX_SPBR		1023
 
 /* Definition for ML7213/ML7223/ML7831 by LAPIS Semiconductor */
-#define PCI_VENDOR_ID_ROHM		0x10DB
 #define PCI_DEVICE_ID_ML7213_SPI	0x802c
 #define PCI_DEVICE_ID_ML7223_SPI	0x800F
 #define PCI_DEVICE_ID_ML7831_SPI	0x8816
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index 9ed121f08a54..6157213a8359 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -192,8 +192,6 @@ enum {
 #define PCH_UART_HAL_LOOP		(PCH_UART_MCR_LOOP)
 #define PCH_UART_HAL_AFE		(PCH_UART_MCR_AFE)
 
-#define PCI_VENDOR_ID_ROHM		0x10DB
-
 #define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
 
 #define DEFAULT_UARTCLK   1843200 /*   1.8432 MHz */
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index 55c8c8abeacd..cded51f36fc1 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -368,7 +368,6 @@ struct pch_udc_dev {
 #define PCI_DEVICE_ID_INTEL_QUARK_X1000_UDC	0x0939
 #define PCI_DEVICE_ID_INTEL_EG20T_UDC		0x8808
 
-#define PCI_VENDOR_ID_ROHM		0x10DB
 #define PCI_DEVICE_ID_ML7213_IOH_UDC	0x801D
 #define PCI_DEVICE_ID_ML7831_IOH_UDC	0x8808
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 26420e619dd7..70e86148cb1e 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1140,6 +1140,8 @@
 #define PCI_VENDOR_ID_TCONRAD		0x10da
 #define PCI_DEVICE_ID_TCONRAD_TOKENRING	0x0508
 
+#define PCI_VENDOR_ID_ROHM		0x10db
+
 #define PCI_VENDOR_ID_NVIDIA			0x10de
 #define PCI_DEVICE_ID_NVIDIA_TNT		0x0020
 #define PCI_DEVICE_ID_NVIDIA_TNT2		0x0028
-- 
cgit v1.2.3


From f9cf22882c606f3ffe06f620bb6d03b9eff18d3d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 31 Jan 2019 10:50:40 -0800
Subject: devlink: add device information API

ethtool -i has served us well for a long time, but its showing
its limitations more and more. The device information should
also be reported per device not per-netdev.

Lay foundation for a simple devlink-based way of reading device
info. Add driver name and device serial number as initial pieces
of information exposed via this new API.

v3:
 - rename helpers (Jiri);
 - rename driver name attr (Jiri);
 - remove double spacing in commit message (Jiri).
RFC v2:
 - wrap the skb into an opaque structure (Jiri);
 - allow the serial number of be any length (Jiri & Andrew);
 - add driver name (Jonathan).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  18 +++++++
 include/uapi/linux/devlink.h |   5 ++
 net/core/devlink.c           | 112 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 85c9eabaf056..a6d0a530483d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -429,6 +429,7 @@ enum devlink_param_wol_types {
 }
 
 struct devlink_region;
+struct devlink_info_req;
 
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
@@ -484,6 +485,8 @@ struct devlink_ops {
 	int (*eswitch_encap_mode_get)(struct devlink *devlink, u8 *p_encap_mode);
 	int (*eswitch_encap_mode_set)(struct devlink *devlink, u8 encap_mode,
 				      struct netlink_ext_ack *extack);
+	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
+			struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
@@ -607,6 +610,10 @@ u32 devlink_region_shapshot_id_get(struct devlink *devlink);
 int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor);
+int devlink_info_serial_number_put(struct devlink_info_req *req,
+				   const char *sn);
+int devlink_info_driver_name_put(struct devlink_info_req *req,
+				 const char *name);
 
 #else
 
@@ -905,6 +912,17 @@ devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 	return 0;
 }
 
+static inline int
+devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
+{
+	return 0;
+}
+
+static inline int
+devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+	return 0;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 61b4447a6c5b..142710d45093 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -94,6 +94,8 @@ enum devlink_command {
 	DEVLINK_CMD_PORT_PARAM_NEW,
 	DEVLINK_CMD_PORT_PARAM_DEL,
 
+	DEVLINK_CMD_INFO_GET,		/* can dump */
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -290,6 +292,9 @@ enum devlink_attr {
 	DEVLINK_ATTR_REGION_CHUNK_ADDR,         /* u64 */
 	DEVLINK_ATTR_REGION_CHUNK_LEN,          /* u64 */
 
+	DEVLINK_ATTR_INFO_DRIVER_NAME,		/* string */
+	DEVLINK_ATTR_INFO_SERIAL_NUMBER,	/* string */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e6f170caf449..f456f6aa3d40 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3714,6 +3714,110 @@ out:
 	return 0;
 }
 
+struct devlink_info_req {
+	struct sk_buff *msg;
+};
+
+int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
+{
+	return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
+}
+EXPORT_SYMBOL_GPL(devlink_info_driver_name_put);
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+	return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+		     enum devlink_command cmd, u32 portid,
+		     u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+	struct devlink_info_req req;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	err = -EMSGSIZE;
+	if (devlink_nl_put_handle(msg, devlink))
+		goto err_cancel_msg;
+
+	req.msg = msg;
+	err = devlink->ops->info_get(devlink, &req, extack);
+	if (err)
+		goto err_cancel_msg;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+err_cancel_msg:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct sk_buff *msg;
+	int err;
+
+	if (!devlink->ops || !devlink->ops->info_get)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+				   info->snd_portid, info->snd_seq, 0,
+				   info->extack);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		if (idx < start) {
+			idx++;
+			continue;
+		}
+
+		mutex_lock(&devlink->lock);
+		err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+					   NETLINK_CB(cb->skb).portid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   cb->extack);
+		mutex_unlock(&devlink->lock);
+		if (err)
+			break;
+		idx++;
+	}
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -3974,6 +4078,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_INFO_GET,
+		.doit = devlink_nl_cmd_info_get_doit,
+		.dumpit = devlink_nl_cmd_info_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From fc6fae7dd987dccce3f322c32dc26b52d69ad00e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 31 Jan 2019 10:50:41 -0800
Subject: devlink: add version reporting to devlink info API

ethtool -i has a few fixed-size fields which can be used to report
firmware version and expansion ROM version. Unfortunately, modern
hardware has more firmware components. There is usually some
datapath microcode, management controller, PXE drivers, and a
CPLD load. Running ethtool -i on modern controllers reveals the
fact that vendors cram multiple values into firmware version field.

Here are some examples from systems I could lay my hands on quickly:

tg3:  "FFV20.2.17 bc 5720-v1.39"
i40e: "6.01 0x800034a4 1.1747.0"
nfp:  "0.0.3.5 0.25 sriov-2.1.16 nic"

Add a new devlink API to allow retrieving multiple versions, and
provide user-readable name for those versions.

While at it break down the versions into three categories:
 - fixed - this is the board/fixed component version, usually vendors
           report information like the board version in the PCI VPD,
           but it will benefit from naming and common API as well;
 - running - this is the running firmware version;
 - stored - this is firmware in the flash, after firmware update
            this value will reflect the flashed version, while the
            running version may only be updated after reboot.

v3:
 - add per-type helpers instead of using the special argument (Jiri).
RFCv2:
 - remove the nesting in attr DEVLINK_ATTR_INFO_VERSIONS (now
   versions are mixed with other info attrs)l
 - have the driver report versions from the same callback as
   other info.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 33 +++++++++++++++++++++++++
 include/uapi/linux/devlink.h |  5 ++++
 net/core/devlink.c           | 57 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index a6d0a530483d..6dc0ef964392 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -614,6 +614,15 @@ int devlink_info_serial_number_put(struct devlink_info_req *req,
 				   const char *sn);
 int devlink_info_driver_name_put(struct devlink_info_req *req,
 				 const char *name);
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+				   const char *version_name,
+				   const char *version_value);
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+				    const char *version_name,
+				    const char *version_value);
+int devlink_info_version_running_put(struct devlink_info_req *req,
+				     const char *version_name,
+				     const char *version_value);
 
 #else
 
@@ -923,6 +932,30 @@ devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
 {
 	return 0;
 }
+
+static inline int
+devlink_info_version_fixed_put(struct devlink_info_req *req,
+			       const char *version_name,
+			       const char *version_value)
+{
+	return 0;
+}
+
+static inline int
+devlink_info_version_stored_put(struct devlink_info_req *req,
+				const char *version_name,
+				const char *version_value)
+{
+	return 0;
+}
+
+static inline int
+devlink_info_version_running_put(struct devlink_info_req *req,
+				 const char *version_name,
+				 const char *version_value)
+{
+	return 0;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 142710d45093..7fffd879c328 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -294,6 +294,11 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_INFO_DRIVER_NAME,		/* string */
 	DEVLINK_ATTR_INFO_SERIAL_NUMBER,	/* string */
+	DEVLINK_ATTR_INFO_VERSION_FIXED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_RUNNING,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_STORED,	/* nested */
+	DEVLINK_ATTR_INFO_VERSION_NAME,		/* string */
+	DEVLINK_ATTR_INFO_VERSION_VALUE,	/* string */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f456f6aa3d40..e31b6d617837 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3730,6 +3730,63 @@ int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
 }
 EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
 
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+				    const char *version_name,
+				    const char *version_value)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start(req->msg, attr);
+	if (!nest)
+		return -EMSGSIZE;
+
+	err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+			     version_name);
+	if (err)
+		goto nla_put_failure;
+
+	err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+			     version_value);
+	if (err)
+		goto nla_put_failure;
+
+	nla_nest_end(req->msg, nest);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(req->msg, nest);
+	return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+				   const char *version_name,
+				   const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+				    const char *version_name,
+				    const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+				     const char *version_name,
+				     const char *version_value)
+{
+	return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+					version_name, version_value);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
 static int
 devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
 		     enum devlink_command cmd, u32 portid,
-- 
cgit v1.2.3


From 785bd550c4fb0e12cb29dd24cf0a0be103f145a0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 31 Jan 2019 10:50:42 -0800
Subject: devlink: add generic info version names

Add defines and docs for generic info versions.

v3:
 - add docs;
 - separate patch (Jiri).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-info-versions.rst | 38 ++++++++++++++++++++++
 Documentation/networking/index.rst                 |  1 +
 include/net/devlink.h                              | 14 ++++++++
 3 files changed, 53 insertions(+)
 create mode 100644 Documentation/networking/devlink-info-versions.rst

(limited to 'include')

diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
new file mode 100644
index 000000000000..7d4ecf6b6f34
--- /dev/null
+++ b/Documentation/networking/devlink-info-versions.rst
@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+=====================
+Devlink info versions
+=====================
+
+board.id
+========
+
+Unique identifier of the board design.
+
+board.rev
+=========
+
+Board design revision.
+
+fw.mgmt
+=======
+
+Control unit firmware version. This firmware is responsible for house
+keeping tasks, PHY control etc. but not the packet-by-packet data path
+operation.
+
+fw.app
+======
+
+Data path microcode controlling high-speed packet processing.
+
+fw.undi
+=======
+
+UNDI software, may include the UEFI driver, firmware or both.
+
+fw.ncsi
+=======
+
+Version of the software responsible for supporting/handling the
+Network Controller Sideband Interface.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index f1627ca2a0ea..9a32451cd201 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -24,6 +24,7 @@ Contents:
    device_drivers/intel/i40e
    device_drivers/intel/iavf
    device_drivers/intel/ice
+   devlink-info-versions
    kapi
    z8530book
    msg_zerocopy
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6dc0ef964392..6b417f141fd6 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -428,6 +428,20 @@ enum devlink_param_wol_types {
 	.validate = _validate,						\
 }
 
+/* Part number, identifier of board design */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID	"board.id"
+/* Revision of board design */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV	"board.rev"
+
+/* Control processor FW version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT	"fw.mgmt"
+/* Data path microcode controlling high-speed packet processing */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_APP	"fw.app"
+/* UNDI software version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_UNDI	"fw.undi"
+/* NCSI support/handler version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_NCSI	"fw.ncsi"
+
 struct devlink_region;
 struct devlink_info_req;
 
-- 
cgit v1.2.3


From ddb6e99e2db14d4b3c22a0dbddc6a09234856bb7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 31 Jan 2019 10:50:47 -0800
Subject: ethtool: add compat for devlink info

If driver did not fill the fw_version field, try to call into
the new devlink get_info op and collect the versions that way.
We assume ethtool was always reporting running versions.

v4:
 - use IS_REACHABLE() to avoid problems with DEVLINK=m (kbuildbot).
v3 (Jiri):
 - do a dump and then parse it instead of special handling;
 - concatenate all versions (well, all that fit :)).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 10 ++++++++
 net/core/devlink.c    | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/ethtool.c    |  7 ++++++
 3 files changed, 80 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 6b417f141fd6..1c8523920f66 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -972,4 +972,14 @@ devlink_info_version_running_put(struct devlink_info_req *req,
 }
 #endif
 
+#if IS_REACHABLE(CONFIG_NET_DEVLINK)
+void devlink_compat_running_version(struct net_device *dev,
+				    char *buf, size_t len);
+#else
+static inline void
+devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
+{
+}
+#endif
+
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index e31b6d617837..eb839d74bcc0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5278,6 +5278,69 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
 
+static void __devlink_compat_running_version(struct devlink *devlink,
+					     char *buf, size_t len)
+{
+	const struct nlattr *nlattr;
+	struct devlink_info_req req;
+	struct sk_buff *msg;
+	int rem, err;
+
+	if (!devlink->ops->info_get)
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	req.msg = msg;
+	err = devlink->ops->info_get(devlink, &req, NULL);
+	if (err)
+		goto free_msg;
+
+	nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+		const struct nlattr *kv;
+		int rem_kv;
+
+		if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+			continue;
+
+		nla_for_each_nested(kv, nlattr, rem_kv) {
+			if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+				continue;
+
+			strlcat(buf, nla_data(kv), len);
+			strlcat(buf, " ", len);
+		}
+	}
+free_msg:
+	nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct net_device *dev,
+				    char *buf, size_t len)
+{
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(devlink_port, &devlink->port_list, list) {
+			if (devlink_port->type == DEVLINK_PORT_TYPE_ETH ||
+			    devlink_port->type_dev == dev) {
+				__devlink_compat_running_version(devlink,
+								 buf, len);
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+}
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 3fe6e9da3579..45c0a6e3d6ad 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -27,6 +27,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/sched/signal.h>
 #include <linux/net.h>
+#include <net/devlink.h>
 #include <net/xdp_sock.h>
 
 /*
@@ -803,6 +804,12 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
 	if (ops->get_eeprom_len)
 		info.eedump_len = ops->get_eeprom_len(dev);
 
+	rtnl_unlock();
+	if (!info.fw_version[0])
+		devlink_compat_running_version(dev, info.fw_version,
+					       sizeof(info.fw_version));
+	rtnl_lock();
+
 	if (copy_to_user(useraddr, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
-- 
cgit v1.2.3


From f38ab20b749da84e3df1f8c9240ddc791b0d5983 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Thu, 20 Dec 2018 14:59:33 +0800
Subject: iio: st_accel: use ACPI orientation data

Platform-specific ST accelerometer mount matrix information can be
provided by returning a package of 6 integers from the ACPI _ONT
method. This has been seen on Acer products such as Veriton Z4860G,
Z6860G and A890, which include a ST SMO8840 sensor. We have also
confirmed experimentally that the Windows driver uses such information.

The _ONT data format was explained by a ST vendor contact. However,
strangely enough, the _ONT transformations must be applied after first
applying another mount matrix which we determined experimentally. ST
have not commented on why this is the case, but we imagine that perhaps
earlier devices (before _ONT was introduced) required this translation
and hence it became 'standard.'

Interpret the _ONT data and export the equivalent mount matrix to
userspace.

If no _ONT data is present, no mount matrix is exported.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c     | 171 +++++++++++++++++++++++++++++++++-
 include/linux/iio/common/st_sensors.h |   1 +
 2 files changed, 171 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index f7b471121508..a3c0916479fa 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -918,12 +919,167 @@ static const struct iio_trigger_ops st_accel_trigger_ops = {
 #define ST_ACCEL_TRIGGER_OPS NULL
 #endif
 
+static const struct iio_mount_matrix *
+get_mount_matrix(const struct iio_dev *indio_dev,
+		 const struct iio_chan_spec *chan)
+{
+	struct st_sensor_data *adata = iio_priv(indio_dev);
+
+	return adata->mount_matrix;
+}
+
+static const struct iio_chan_spec_ext_info mount_matrix_ext_info[] = {
+	IIO_MOUNT_MATRIX(IIO_SHARED_BY_ALL, get_mount_matrix),
+	{ },
+};
+
+/* Read ST-specific _ONT orientation data from ACPI and generate an
+ * appropriate mount matrix.
+ */
+static int apply_acpi_orientation(struct iio_dev *indio_dev,
+				  struct iio_chan_spec *channels)
+{
+#ifdef CONFIG_ACPI
+	struct st_sensor_data *adata = iio_priv(indio_dev);
+	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
+	struct acpi_device *adev;
+	union acpi_object *ont;
+	union acpi_object *elements;
+	acpi_status status;
+	int ret = -EINVAL;
+	unsigned int val;
+	int i, j;
+	int final_ont[3][3] = { { 0 }, };
+
+	/* For some reason, ST's _ONT translation does not apply directly
+	 * to the data read from the sensor. Another translation must be
+	 * performed first, as described by the matrix below. Perhaps
+	 * ST required this specific translation for the first product
+	 * where the device was mounted?
+	 */
+	const int default_ont[3][3] = {
+		{  0,  1,  0 },
+		{ -1,  0,  0 },
+		{  0,  0, -1 },
+	};
+
+
+	adev = ACPI_COMPANION(adata->dev);
+	if (!adev)
+		return 0;
+
+	/* Read _ONT data, which should be a package of 6 integers. */
+	status = acpi_evaluate_object(adev->handle, "_ONT", NULL, &buffer);
+	if (status == AE_NOT_FOUND) {
+		return 0;
+	} else if (ACPI_FAILURE(status)) {
+		dev_warn(&indio_dev->dev, "failed to execute _ONT: %d\n",
+			 status);
+		return status;
+	}
+
+	ont = buffer.pointer;
+	if (ont->type != ACPI_TYPE_PACKAGE || ont->package.count != 6)
+		goto out;
+
+	/* The first 3 integers provide axis order information.
+	 * e.g. 0 1 2 would indicate normal X,Y,Z ordering.
+	 * e.g. 1 0 2 indicates that data arrives in order Y,X,Z.
+	 */
+	elements = ont->package.elements;
+	for (i = 0; i < 3; i++) {
+		if (elements[i].type != ACPI_TYPE_INTEGER)
+			goto out;
+
+		val = elements[i].integer.value;
+		if (val < 0 || val > 2)
+			goto out;
+
+		/* Avoiding full matrix multiplication, we simply reorder the
+		 * columns in the default_ont matrix according to the
+		 * ordering provided by _ONT.
+		 */
+		final_ont[0][i] = default_ont[0][val];
+		final_ont[1][i] = default_ont[1][val];
+		final_ont[2][i] = default_ont[2][val];
+	}
+
+	/* The final 3 integers provide sign flip information.
+	 * 0 means no change, 1 means flip.
+	 * e.g. 0 0 1 means that Z data should be sign-flipped.
+	 * This is applied after the axis reordering from above.
+	 */
+	elements += 3;
+	for (i = 0; i < 3; i++) {
+		if (elements[i].type != ACPI_TYPE_INTEGER)
+			goto out;
+
+		val = elements[i].integer.value;
+		if (val != 0 && val != 1)
+			goto out;
+		if (!val)
+			continue;
+
+		/* Flip the values in the indicated column */
+		final_ont[0][i] *= -1;
+		final_ont[1][i] *= -1;
+		final_ont[2][i] *= -1;
+	}
+
+	/* Convert our integer matrix to a string-based iio_mount_matrix */
+	adata->mount_matrix = devm_kmalloc(&indio_dev->dev,
+					   sizeof(*adata->mount_matrix),
+					   GFP_KERNEL);
+	if (!adata->mount_matrix) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < 3; j++) {
+			int matrix_val = final_ont[i][j];
+			char *str_value;
+
+			switch (matrix_val) {
+			case -1:
+				str_value = "-1";
+				break;
+			case 0:
+				str_value = "0";
+				break;
+			case 1:
+				str_value = "1";
+				break;
+			default:
+				goto out;
+			}
+			adata->mount_matrix->rotation[i * 3 + j] = str_value;
+		}
+	}
+
+	/* Expose the mount matrix via ext_info */
+	for (i = 0; i < indio_dev->num_channels; i++)
+		channels[i].ext_info = mount_matrix_ext_info;
+
+	ret = 0;
+	dev_info(&indio_dev->dev, "computed mount matrix from ACPI\n");
+
+out:
+	kfree(buffer.pointer);
+	return ret;
+#else /* !CONFIG_ACPI */
+	return 0;
+#endif
+}
+
 int st_accel_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *adata = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata =
 		(struct st_sensors_platform_data *)adata->dev->platform_data;
 	int irq = adata->get_irq_data_ready(indio_dev);
+	struct iio_chan_spec *channels;
+	size_t channels_size;
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -942,9 +1098,22 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 
 	adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS;
 	adata->multiread_bit = adata->sensor_settings->multi_read_bit;
-	indio_dev->channels = adata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
+	channels_size = indio_dev->num_channels * sizeof(struct iio_chan_spec);
+	channels = devm_kmemdup(&indio_dev->dev,
+				adata->sensor_settings->ch,
+				channels_size, GFP_KERNEL);
+	if (!channels) {
+		err = -ENOMEM;
+		goto st_accel_power_off;
+	}
+
+	if (apply_acpi_orientation(indio_dev, channels))
+		dev_warn(&indio_dev->dev,
+			 "failed to apply ACPI orientation data: %d\n", err);
+
+	indio_dev->channels = channels;
 	adata->current_fullscale = (struct st_sensor_fullscale_avl *)
 					&adata->sensor_settings->fs.fs_avl[0];
 	adata->odr = adata->sensor_settings->odr.odr_avl[0].hz;
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 8092b8e7f37e..45e9667f0a8c 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -260,6 +260,7 @@ struct st_sensor_settings {
 struct st_sensor_data {
 	struct device *dev;
 	struct iio_trigger *trig;
+	struct iio_mount_matrix *mount_matrix;
 	struct st_sensor_settings *sensor_settings;
 	struct st_sensor_fullscale_avl *current_fullscale;
 	struct regulator *vdd;
-- 
cgit v1.2.3


From 078a85f2806f0ffd11289009462a6a390f9adb5c Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Thu, 31 Jan 2019 13:30:18 +0000
Subject: ASoC: dapm: Only power up active channels from a DAI

Currently all widgets attached to a DAI link will be powered
up when the DAI is active, however this may include routes
that are not actually in use if there are unused channels
available on the DAI.

The macros for creating AIF widgets already include an entry for
slot, it is proposed to change that to channel. The effective
difference here being respresenting the logical channel index
rather than the physical slot index. The CODECs currently
using the slot entry on the DAPM_AIF macros are using it in
a manner consistent with this, the CODECs not using it just
have the field set to zero.

A variable is added to snd_soc_dapm_widget to represent
this channel index and then for each AIF widget attached to
a DAI this is compared against the number of channels on
the stream. Enabling the links for those which will be in
use. This has the nice property that the CODECs which haven't
used the slot/channel entry in the macro will function exactly
as before due to all the AIF widgets having a channel of zero
and a stream by definition having at least one channel.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 22 +++++++++-----
 sound/soc/soc-dapm.c     | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 sound/soc/soc-pcm.c      |  4 +++
 3 files changed, 94 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 46f2ba3ffcb7..79b4ddfb8e9e 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -214,21 +214,21 @@ struct device;
 	.event_flags = SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD}
 
 /* stream domain */
-#define SND_SOC_DAPM_AIF_IN(wname, stname, wslot, wreg, wshift, winvert) \
+#define SND_SOC_DAPM_AIF_IN(wname, stname, wchan, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_aif_in, .name = wname, .sname = stname, \
-	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), }
-#define SND_SOC_DAPM_AIF_IN_E(wname, stname, wslot, wreg, wshift, winvert, \
+	.channel = wchan, SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), }
+#define SND_SOC_DAPM_AIF_IN_E(wname, stname, wchan, wreg, wshift, winvert, \
 			      wevent, wflags)				\
 {	.id = snd_soc_dapm_aif_in, .name = wname, .sname = stname, \
-	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
+	.channel = wchan, SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
 	.event = wevent, .event_flags = wflags }
-#define SND_SOC_DAPM_AIF_OUT(wname, stname, wslot, wreg, wshift, winvert) \
+#define SND_SOC_DAPM_AIF_OUT(wname, stname, wchan, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_aif_out, .name = wname, .sname = stname, \
-	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), }
-#define SND_SOC_DAPM_AIF_OUT_E(wname, stname, wslot, wreg, wshift, winvert, \
+	.channel = wchan, SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), }
+#define SND_SOC_DAPM_AIF_OUT_E(wname, stname, wchan, wreg, wshift, winvert, \
 			     wevent, wflags)				\
 {	.id = snd_soc_dapm_aif_out, .name = wname, .sname = stname, \
-	SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
+	.channel = wchan, SND_SOC_DAPM_INIT_REG_VAL(wreg, wshift, winvert), \
 	.event = wevent, .event_flags = wflags }
 #define SND_SOC_DAPM_DAC(wname, stname, wreg, wshift, winvert) \
 {	.id = snd_soc_dapm_dac, .name = wname, .sname = stname, \
@@ -407,6 +407,10 @@ int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm,
 int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card);
 void snd_soc_dapm_connect_dai_link_widgets(struct snd_soc_card *card);
 
+int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai);
+
 /* dapm path setup */
 int snd_soc_dapm_new_widgets(struct snd_soc_card *card);
 void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm);
@@ -627,6 +631,8 @@ struct snd_soc_dapm_widget {
 	int endpoints[2];
 
 	struct clk *clk;
+
+	int channel;
 };
 
 struct snd_soc_dapm_update {
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index e71cd5b660ad..36d964a52874 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2541,6 +2541,78 @@ int snd_soc_dapm_sync(struct snd_soc_dapm_context *dapm)
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_sync);
 
+static int dapm_update_dai_chan(struct snd_soc_dapm_path *p,
+				struct snd_soc_dapm_widget *w,
+				int channels)
+{
+	switch (w->id) {
+	case snd_soc_dapm_aif_out:
+	case snd_soc_dapm_aif_in:
+		break;
+	default:
+		return 0;
+	}
+
+	dev_dbg(w->dapm->dev, "%s DAI route %s -> %s\n",
+		w->channel < channels ? "Connecting" : "Disconnecting",
+		p->source->name, p->sink->name);
+
+	if (w->channel < channels)
+		soc_dapm_connect_path(p, true, "dai update");
+	else
+		soc_dapm_connect_path(p, false, "dai update");
+
+	return 0;
+}
+
+static int dapm_update_dai_unlocked(struct snd_pcm_substream *substream,
+				    struct snd_pcm_hw_params *params,
+				    struct snd_soc_dai *dai)
+{
+	int dir = substream->stream;
+	int channels = params_channels(params);
+	struct snd_soc_dapm_path *p;
+	struct snd_soc_dapm_widget *w;
+	int ret;
+
+	if (dir == SNDRV_PCM_STREAM_PLAYBACK)
+		w = dai->playback_widget;
+	else
+		w = dai->capture_widget;
+
+	dev_dbg(dai->dev, "Update DAI routes for %s %s\n", dai->name,
+		dir == SNDRV_PCM_STREAM_PLAYBACK ? "playback" : "capture");
+
+	snd_soc_dapm_widget_for_each_sink_path(w, p) {
+		ret = dapm_update_dai_chan(p, p->sink, channels);
+		if (ret < 0)
+			return ret;
+	}
+
+	snd_soc_dapm_widget_for_each_source_path(w, p) {
+		ret = dapm_update_dai_chan(p, p->source, channels);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream,
+			    struct snd_pcm_hw_params *params,
+			    struct snd_soc_dai *dai)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	int ret;
+
+	mutex_lock_nested(&rtd->card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME);
+	ret = dapm_update_dai_unlocked(substream, params, dai);
+	mutex_unlock(&rtd->card->dapm_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_dapm_update_dai);
+
 /*
  * dapm_update_widget_flags() - Re-compute widget sink and source flags
  * @w: The widget for which to update the flags
@@ -3706,6 +3778,8 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 			ret = soc_dai_hw_params(&substream, params, source);
 			if (ret < 0)
 				goto out;
+
+			dapm_update_dai_unlocked(&substream, params, source);
 		}
 
 		substream.stream = SNDRV_PCM_STREAM_PLAYBACK;
@@ -3726,6 +3800,8 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 			ret = soc_dai_hw_params(&substream, params, sink);
 			if (ret < 0)
 				goto out;
+
+			dapm_update_dai_unlocked(&substream, params, sink);
 		}
 		break;
 
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 03f36e534050..a5b40e82dea4 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -969,6 +969,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 		codec_dai->channels = params_channels(&codec_params);
 		codec_dai->sample_bits = snd_pcm_format_physical_width(
 						params_format(&codec_params));
+
+		snd_soc_dapm_update_dai(substream, &codec_params, codec_dai);
 	}
 
 	ret = soc_dai_hw_params(substream, params, cpu_dai);
@@ -998,6 +1000,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	cpu_dai->sample_bits =
 		snd_pcm_format_physical_width(params_format(params));
 
+	snd_soc_dapm_update_dai(substream, params, cpu_dai);
+
 	ret = soc_pcm_params_symmetry(substream, params);
         if (ret)
 		goto component_err;
-- 
cgit v1.2.3


From d5d30d5a5c60628de5e77e3f292a8f9012d51350 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 2 Feb 2019 16:35:26 -0800
Subject: libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family

As Dexuan reports the NVDIMM_FAMILY_HYPERV platform is incompatible with
the existing Linux namespace implementation because it uses
NSLABEL_FLAG_LOCAL for x1-width PMEM interleave sets. Quirk it as an
platform / DIMM that does not provide BLK-aperture access. Allow the
libnvdimm core to assume no potential for aliasing. In case other
implementations make the same mistake, provide a "noblk" module
parameter to force-enable the quirk.

Link: https://lkml.kernel.org/r/PU1P153MB0169977604493B82B662A01CBF920@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM
Reported-by: Dexuan Cui <decui@microsoft.com>
Tested-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c        | 4 ++++
 drivers/nvdimm/dimm_devs.c      | 7 +++++++
 drivers/nvdimm/label.c          | 3 +++
 drivers/nvdimm/namespace_devs.c | 6 ++++++
 drivers/nvdimm/region_devs.c    | 7 +++++++
 include/linux/libnvdimm.h       | 2 ++
 6 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 4a7e8b1fa43b..811c399a3a76 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2016,6 +2016,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
+		/* Quirk to ignore LOCAL for labels on HYPERV DIMMs */
+		if (nfit_mem->family == NVDIMM_FAMILY_HYPERV)
+			set_bit(NDD_NOBLK, &flags);
+
 		if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 4890310df874..553aa78abeee 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -11,6 +11,7 @@
  * General Public License for more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
 #include <linux/ndctl.h>
@@ -25,6 +26,10 @@
 
 static DEFINE_IDA(dimm_ida);
 
+static bool noblk;
+module_param(noblk, bool, 0444);
+MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
+
 /*
  * Retrieve bus and dimm handle and return if this bus supports
  * get_config_data commands
@@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
 
 	nvdimm->dimm_id = dimm_id;
 	nvdimm->provider_data = provider_data;
+	if (noblk)
+		flags |= 1 << NDD_NOBLK;
 	nvdimm->flags = flags;
 	nvdimm->cmd_mask = cmd_mask;
 	nvdimm->num_flush = num_flush;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 6d6e9a12150b..f3d753d3169c 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 		return 0; /* no label, nothing to reserve */
 
 	for_each_clear_bit_le(slot, free, nslot) {
+		struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
 		struct nd_namespace_label *nd_label;
 		struct nd_region *nd_region = NULL;
 		u8 label_uuid[NSLABEL_UUID_LEN];
@@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 
 		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
 		flags = __le32_to_cpu(nd_label->flags);
+		if (test_bit(NDD_NOBLK, &nvdimm->flags))
+			flags &= ~NSLABEL_FLAG_LOCAL;
 		nd_label_gen_id(&label_id, label_uuid, flags);
 		res = nvdimm_allocate_dpa(ndd, &label_id,
 				__le64_to_cpu(nd_label->dpa),
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4b077555ac70..3677b0c4a33d 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -2492,6 +2492,12 @@ static int init_active_labels(struct nd_region *nd_region)
 			if (!label_ent)
 				break;
 			label = nd_label_active(ndd, j);
+			if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
+				u32 flags = __le32_to_cpu(label->flags);
+
+				flags &= ~NSLABEL_FLAG_LOCAL;
+				label->flags = __cpu_to_le32(flags);
+			}
 			label_ent->label = label;
 
 			mutex_lock(&nd_mapping->lock);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e2818f94f292..3b58baa44b5c 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 
 		if (test_bit(NDD_UNARMED, &nvdimm->flags))
 			ro = 1;
+
+		if (test_bit(NDD_NOBLK, &nvdimm->flags)
+				&& dev_type == &nd_blk_device_type) {
+			dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
+					caller, dev_name(&nvdimm->dev), i);
+			return NULL;
+		}
 	}
 
 	if (dev_type == &nd_blk_device_type) {
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 5440f11b0907..7da406ae3a2b 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -42,6 +42,8 @@ enum {
 	NDD_SECURITY_OVERWRITE = 3,
 	/*  tracking whether or not there is a pending device reference */
 	NDD_WORK_PENDING = 4,
+	/* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
+	NDD_NOBLK = 5,
 
 	/* need to set a limit somewhere, but yes, this is likely overkill */
 	ND_IOCTL_MAX_BUFLEN = SZ_4M,
-- 
cgit v1.2.3


From 79a4e91d1bb2a411a4ce2baa93680fa707567003 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 2 Feb 2019 19:50:17 -0800
Subject: device.h: Add __cold to dev_<level> logging functions

Add __cold to the dev_<level> logging functions similar to
the use of __cold in the generic printk function.

Using __cold moves all the dev_<level> logging functions
out-of-line possibly improving code locality and runtime
performance.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 0ab0a3a80ec3..a36830e2d0e5 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1384,28 +1384,28 @@ void device_link_remove(void *consumer, struct device *supplier);
 
 #ifdef CONFIG_PRINTK
 
-__printf(3, 0)
+__printf(3, 0) __cold
 int dev_vprintk_emit(int level, const struct device *dev,
 		     const char *fmt, va_list args);
-__printf(3, 4)
+__printf(3, 4) __cold
 int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...);
 
-__printf(3, 4)
+__printf(3, 4) __cold
 void dev_printk(const char *level, const struct device *dev,
 		const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_emerg(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_alert(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_crit(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_err(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_warn(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_notice(const struct device *dev, const char *fmt, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void _dev_info(const struct device *dev, const char *fmt, ...);
 
 #else
-- 
cgit v1.2.3


From dda7a817f2873a0e0b1c7fde1265758f3623daa4 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 08:48:49 +0200
Subject: net/mlx5: Add XRC transport to ODP device capabilities layout

The device capabilities for ODP structure was missing the field for XRC
transport so add it here.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 35fe5217b244..5407db8ba8e1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -831,7 +831,9 @@ struct mlx5_ifc_odp_cap_bits {
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits ud_odp_caps;
 
-	u8         reserved_at_e0[0x720];
+	struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps;
+
+	u8         reserved_at_100[0x700];
 };
 
 struct mlx5_ifc_calc_op {
-- 
cgit v1.2.3


From 46861e3e88be18846971792b763eaf520a91a802 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 08:48:51 +0200
Subject: net/mlx5: Set ODP SRQ support in firmware

To avoid compatibility issue with older kernels the firmware doesn't
allow SRQ to work with ODP unless kernel asks for it.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 53 ++++++++++++++++++++++++++
 include/linux/mlx5/device.h                    |  3 ++
 include/linux/mlx5/mlx5_ifc.h                  |  1 +
 3 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 085e1133b8d5..e38aa206ab6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -459,6 +459,53 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 	return err;
 }
 
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+{
+	void *set_ctx;
+	void *set_hca_cap;
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, pg))
+		return 0;
+
+	err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
+	if (err)
+		return err;
+
+	/**
+	 * If all bits are cleared we shouldn't try to set it
+	 * or we might fail while trying to access a reserved bit.
+	 */
+	if (!(MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive) ||
+	      MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive) ||
+	      MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)))
+		return 0;
+
+	set_ctx = kzalloc(set_sz, GFP_KERNEL);
+	if (!set_ctx)
+		return -ENOMEM;
+
+	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+	memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
+	       MLX5_ST_SZ_BYTES(odp_cap));
+
+	/* set ODP SRQ support for RC/UD and XRC transports */
+	MLX5_SET(odp_cap, set_hca_cap, ud_odp_caps.srq_receive,
+		 (MLX5_CAP_ODP_MAX(dev, ud_odp_caps.srq_receive)));
+
+	MLX5_SET(odp_cap, set_hca_cap, rc_odp_caps.srq_receive,
+		 (MLX5_CAP_ODP_MAX(dev, rc_odp_caps.srq_receive)));
+
+	MLX5_SET(odp_cap, set_hca_cap, xrc_odp_caps.srq_receive,
+		 (MLX5_CAP_ODP_MAX(dev, xrc_odp_caps.srq_receive)));
+
+	err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ODP);
+
+	kfree(set_ctx);
+	return err;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
 	void *set_ctx = NULL;
@@ -931,6 +978,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto reclaim_boot_pages;
 	}
 
+	err = handle_hca_cap_odp(dev);
+	if (err) {
+		dev_err(&pdev->dev, "handle_hca_cap_odp failed\n");
+		goto reclaim_boot_pages;
+	}
+
 	err = mlx5_satisfy_startup_pages(dev, 0);
 	if (err) {
 		dev_err(&pdev->dev, "failed to allocate init pages\n");
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8c4a820bd4c1..0845a227a7b2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1201,6 +1201,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca_cur[MLX5_CAP_ODP], cap)
 
+#define MLX5_CAP_ODP_MAX(mdev, cap)\
+	MLX5_GET(odp_cap, mdev->caps.hca_max[MLX5_CAP_ODP], cap)
+
 #define MLX5_CAP_VECTOR_CALC(mdev, cap) \
 	MLX5_GET(vector_calc_cap, \
 		 mdev->caps.hca_cur[MLX5_CAP_VECTOR_CALC], cap)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5407db8ba8e1..c5c679390fbd 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -72,6 +72,7 @@ enum {
 
 enum {
 	MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
+	MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
 };
 
-- 
cgit v1.2.3


From 7f1bc6e95d7840d4305595b3e4025cddda88cee5 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:46 -0800
Subject: sockopt: Rename SO_TIMESTAMP* to SO_TIMESTAMP*_OLD

SO_TIMESTAMP, SO_TIMESTAMPNS and SO_TIMESTAMPING options, the
way they are currently defined, are not y2038 safe.
Subsequent patches in the series add new y2038 safe versions
of these options which provide 64 bit timestamps on all
architectures uniformly.
Hence, rename existing options with OLD tag suffixes.

Also note that kernel will not use the untagged SO_TIMESTAMP*
and SCM_TIMESTAMP* options internally anymore.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: deller@gmx.de
Cc: dhowells@redhat.com
Cc: jejb@parisc-linux.org
Cc: ralf@linux-mips.org
Cc: rth@twiddle.net
Cc: linux-afs@lists.infradead.org
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  | 23 ++++++++++++++++-------
 arch/mips/include/uapi/asm/socket.h   | 23 ++++++++++++++++-------
 arch/parisc/include/uapi/asm/socket.h | 23 ++++++++++++++++-------
 arch/sparc/include/uapi/asm/socket.h  | 24 ++++++++++++++++--------
 include/uapi/asm-generic/socket.h     | 23 ++++++++++++++++-------
 net/compat.c                          |  6 +++---
 net/core/sock.c                       | 16 ++++++++--------
 net/ipv4/tcp.c                        |  6 +++---
 net/rds/af_rds.c                      |  2 +-
 net/rds/recv.c                        |  2 +-
 net/rxrpc/local_object.c              |  2 +-
 net/socket.c                          |  8 ++++----
 12 files changed, 101 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index b1c9b542c021..992a0a6dcea1 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -51,13 +51,9 @@
 #define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
 
 #define SO_PEERSEC		30
 #define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		19
@@ -66,9 +62,6 @@
 
 #define SO_MARK			36
 
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
 #define SO_RXQ_OVFL             40
 
 #define SO_WIFI_STATUS		41
@@ -117,4 +110,20 @@
 
 #define SO_BINDTOIFINDEX	62
 
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#if !defined(__KERNEL__)
+
+#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 73e25e35d803..0f4516c34df2 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -65,21 +65,14 @@
 #define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
 
 #define SO_PEERSEC		30
 #define SO_SNDBUFFORCE		31
 #define SO_RCVBUFFORCE		33
 #define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #define SO_MARK			36
 
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
 #define SO_RXQ_OVFL		40
 
 #define SO_WIFI_STATUS		41
@@ -128,4 +121,20 @@
 
 #define SO_BINDTOIFINDEX	62
 
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#if !defined(__KERNEL__)
+
+#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 52bed5976cbe..7c180321ebd6 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -34,10 +34,6 @@
 #define SO_BSDCOMPAT	0x400e
 #define SO_PASSCRED	0x4010
 #define SO_PEERCRED	0x4011
-#define SO_TIMESTAMP	0x4012
-#define SCM_TIMESTAMP	SO_TIMESTAMP
-#define SO_TIMESTAMPNS	0x4013
-#define SCM_TIMESTAMPNS	SO_TIMESTAMPNS
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x4016
@@ -58,9 +54,6 @@
 
 #define SO_MARK			0x401f
 
-#define SO_TIMESTAMPING		0x4020
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
 #define SO_RXQ_OVFL             0x4021
 
 #define SO_WIFI_STATUS		0x4022
@@ -109,4 +102,20 @@
 
 #define SO_BINDTOIFINDEX	0x4037
 
+#define SO_TIMESTAMP_OLD        0x4012
+#define SO_TIMESTAMPNS_OLD      0x4013
+#define SO_TIMESTAMPING_OLD     0x4020
+
+#if !defined(__KERNEL__)
+
+#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index bbdb81594dd4..d8a1bbc3e6c4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -33,7 +33,6 @@
 #define SO_PROTOCOL	0x1028
 #define SO_DOMAIN	0x1029
 
-
 /* Linux specific, keep the same. */
 #define SO_NO_CHECK	0x000b
 #define SO_PRIORITY	0x000c
@@ -45,19 +44,12 @@
 #define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		0x001c
-#define SO_TIMESTAMP		0x001d
-#define SCM_TIMESTAMP		SO_TIMESTAMP
 
 #define SO_PEERSEC		0x001e
 #define SO_PASSSEC		0x001f
-#define SO_TIMESTAMPNS		0x0021
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #define SO_MARK			0x0022
 
-#define SO_TIMESTAMPING		0x0023
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
 #define SO_RXQ_OVFL             0x0024
 
 #define SO_WIFI_STATUS		0x0025
@@ -111,4 +103,20 @@
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
 #define SO_SECURITY_ENCRYPTION_NETWORK		0x5004
 
+#define SO_TIMESTAMP_OLD         0x001d
+#define SO_TIMESTAMPNS_OLD       0x0021
+#define SO_TIMESTAMPING_OLD      0x0023
+
+#if !defined(__KERNEL__)
+
+#define SO_TIMESTAMP           SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS         SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING        SO_TIMESTAMPING_OLD
+
+#define SCM_TIMESTAMP          SO_TIMESTAMP
+#define SCM_TIMESTAMPNS        SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING       SO_TIMESTAMPING
+
+#endif
+
 #endif /* _ASM_SOCKET_H */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 3066ab3853a8..4ef3aed31fb7 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -46,21 +46,14 @@
 #define SO_GET_FILTER		SO_ATTACH_FILTER
 
 #define SO_PEERNAME		28
-#define SO_TIMESTAMP		29
-#define SCM_TIMESTAMP		SO_TIMESTAMP
 
 #define SO_ACCEPTCONN		30
 
 #define SO_PEERSEC		31
 #define SO_PASSSEC		34
-#define SO_TIMESTAMPNS		35
-#define SCM_TIMESTAMPNS		SO_TIMESTAMPNS
 
 #define SO_MARK			36
 
-#define SO_TIMESTAMPING		37
-#define SCM_TIMESTAMPING	SO_TIMESTAMPING
-
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
@@ -112,4 +105,20 @@
 
 #define SO_BINDTOIFINDEX	62
 
+#define SO_TIMESTAMP_OLD        29
+#define SO_TIMESTAMPNS_OLD      35
+#define SO_TIMESTAMPING_OLD     37
+
+#if !defined(__KERNEL__)
+
+#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SCM_TIMESTAMP           SO_TIMESTAMP
+#define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
+#define SCM_TIMESTAMPING        SO_TIMESTAMPING
+
+#endif
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/compat.c b/net/compat.c
index ce8f6e8cdcd2..ccf93cd0e49b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -219,7 +219,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 	}
 
 	if (!COMPAT_USE_64BIT_TIME) {
-		if (level == SOL_SOCKET && type == SCM_TIMESTAMP) {
+		if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) {
 			struct timeval *tv = (struct timeval *)data;
 			ctv.tv_sec = tv->tv_sec;
 			ctv.tv_usec = tv->tv_usec;
@@ -227,8 +227,8 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 			len = sizeof(ctv);
 		}
 		if (level == SOL_SOCKET &&
-		    (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) {
-			int count = type == SCM_TIMESTAMPNS ? 1 : 3;
+		    (type == SO_TIMESTAMPNS_OLD || type == SO_TIMESTAMPING_OLD)) {
+			int count = type == SO_TIMESTAMPNS_OLD ? 1 : 3;
 			int i;
 			struct timespec *ts = (struct timespec *)data;
 			for (i = 0; i < count; i++) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 29c0028df5ae..d5ca8641968f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -867,10 +867,10 @@ set_rcvbuf:
 			clear_bit(SOCK_PASSCRED, &sock->flags);
 		break;
 
-	case SO_TIMESTAMP:
-	case SO_TIMESTAMPNS:
+	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMPNS_OLD:
 		if (valbool)  {
-			if (optname == SO_TIMESTAMP)
+			if (optname == SO_TIMESTAMP_OLD)
 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 			else
 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
@@ -882,7 +882,7 @@ set_rcvbuf:
 		}
 		break;
 
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMPING_OLD:
 		if (val & ~SOF_TIMESTAMPING_MASK) {
 			ret = -EINVAL;
 			break;
@@ -1243,16 +1243,16 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		sock_warn_obsolete_bsdism("getsockopt");
 		break;
 
-	case SO_TIMESTAMP:
+	case SO_TIMESTAMP_OLD:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
-	case SO_TIMESTAMPNS:
+	case SO_TIMESTAMPNS_OLD:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMPING_OLD:
 		v.val = sk->sk_tsflags;
 		break;
 
@@ -2168,7 +2168,7 @@ int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
 			return -EINVAL;
 		sockc->mark = *(u32 *)CMSG_DATA(cmsg);
 		break;
-	case SO_TIMESTAMPING:
+	case SO_TIMESTAMPING_OLD:
 		if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
 			return -EINVAL;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6f8d292ad501..e29aec59cad1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1867,13 +1867,13 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
 		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
 			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 					 sizeof(tss->ts[0]), &tss->ts[0]);
 			} else {
 				tv.tv_sec = tss->ts[0].tv_sec;
 				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
 
-				put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 					 sizeof(tv), &tv);
 			}
 		}
@@ -1893,7 +1893,7 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 
 	if (has_timestamping) {
 		tss->ts[1] = (struct timespec) {0};
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING,
+		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD,
 			 sizeof(*tss), tss);
 	}
 }
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 65387e1e6964..eeb4639adbe5 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -430,7 +430,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
 		ret = rds_set_transport(rs, optval, optlen);
 		release_sock(sock->sk);
 		break;
-	case SO_TIMESTAMP:
+	case SO_TIMESTAMP_OLD:
 		lock_sock(sock->sk);
 		ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
 		release_sock(sock->sk);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 727639dac8a7..04e30d63a159 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -550,7 +550,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
 	if ((inc->i_rx_tstamp != 0) &&
 	    sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
 		struct timeval tv = ktime_to_timeval(inc->i_rx_tstamp);
-		ret = put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+		ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 			       sizeof(tv), &tv);
 		if (ret)
 			goto out;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 0906e51d3cfb..15cf42d5b53a 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -202,7 +202,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 
 		/* We want receive timestamps. */
 		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS,
+		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 					(char *)&opt, sizeof(opt));
 		if (ret < 0) {
 			_debug("setsockopt failed");
diff --git a/net/socket.c b/net/socket.c
index e89884e2197b..5087f9e40f3a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -669,7 +669,7 @@ static bool skb_is_err_queue(const struct sk_buff *skb)
  * before the software timestamp is received, a hardware TX timestamp may be
  * returned only if there is no software TX timestamp. Ignore false software
  * timestamps, which may be made in the __sock_recv_timestamp() call when the
- * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a
+ * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a
  * hardware timestamp.
  */
 static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp)
@@ -721,12 +721,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
 			struct timeval tv;
 			skb_get_timestamp(skb, &tv);
-			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+			put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 				 sizeof(tv), &tv);
 		} else {
 			struct timespec ts;
 			skb_get_timestampns(skb, &ts);
-			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+			put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
 				 sizeof(ts), &ts);
 		}
 	}
@@ -746,7 +746,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	}
 	if (!empty) {
 		put_cmsg(msg, SOL_SOCKET,
-			 SCM_TIMESTAMPING, sizeof(tss), &tss);
+			 SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
 
 		if (skb_is_err_queue(skb) && skb->len &&
 		    SKB_EXT_ERR(skb)->opt_stats)
-- 
cgit v1.2.3


From bcb3fc3247e5a7ceb467ca0cfdaa4c1b830dd8f9 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:47 -0800
Subject: arch: sparc: Override struct __kernel_old_timeval

struct __kernel_old_timeval is supposed to have the same
layout as struct timeval. But, it was inadvarently missed
that __kernel_suseconds has a different definition for
sparc64.
Provide an asm-specific override that fixes it.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/uapi/asm/posix_types.h | 10 ++++++++++
 include/uapi/linux/time.h                 |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/arch/sparc/include/uapi/asm/posix_types.h b/arch/sparc/include/uapi/asm/posix_types.h
index fec499d6efb7..f139e0048628 100644
--- a/arch/sparc/include/uapi/asm/posix_types.h
+++ b/arch/sparc/include/uapi/asm/posix_types.h
@@ -19,6 +19,16 @@ typedef unsigned short         __kernel_old_gid_t;
 typedef int		       __kernel_suseconds_t;
 #define __kernel_suseconds_t __kernel_suseconds_t
 
+typedef long		__kernel_long_t;
+typedef unsigned long	__kernel_ulong_t;
+#define __kernel_long_t __kernel_long_t
+
+struct __kernel_old_timeval {
+	__kernel_long_t tv_sec;
+	__kernel_suseconds_t tv_usec;
+};
+#define __kernel_old_timeval __kernel_old_timeval
+
 #else
 /* sparc 32 bit */
 
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 6b56a2208be7..04d5587f30d3 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -63,10 +63,12 @@ struct __kernel_itimerspec {
  * here, this is probably because it is not y2038 safe and needs to
  * be changed to use another interface.
  */
+#ifndef __kernel_old_timeval
 struct __kernel_old_timeval {
 	__kernel_long_t tv_sec;
 	__kernel_long_t tv_usec;
 };
+#endif
 
 /*
  * The IDs of the various system clocks (for POSIX.1b interval timers):
-- 
cgit v1.2.3


From 13c6ee2a921683bae4bb4ba57b1f5b82f49e6b8a Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:48 -0800
Subject: socket: Use old_timeval types for socket timestamps

As part of y2038 solution, all internal uses of
struct timeval are replaced by struct __kernel_old_timeval
and struct compat_timeval by struct old_timeval32.
Make socket timestamps use these new types.

This is mainly to be able to verify that the kernel build
is y2038 safe when such non y2038 safe types are not
supported anymore.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: isdn@linux-pingi.de
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/socket.c | 2 +-
 include/linux/skbuff.h      | 6 +++---
 net/bluetooth/hci_sock.c    | 4 ++--
 net/compat.c                | 6 +++---
 net/ipv4/tcp.c              | 2 +-
 net/rds/recv.c              | 2 +-
 net/socket.c                | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 15d3ca37669a..4ab8b1b6608f 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -103,7 +103,7 @@ mISDN_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg)
 static inline void
 mISDN_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 {
-	struct timeval	tv;
+	struct __kernel_old_timeval	tv;
 
 	if (_pms(sk)->cmask & MISDN_TIME_STAMP) {
 		skb_get_timestamp(skb, &tv);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c34595374e93..4001611a4c9f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3486,16 +3486,16 @@ static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
 /**
  *	skb_get_timestamp - get timestamp from a skb
  *	@skb: skb to get stamp from
- *	@stamp: pointer to struct timeval to store stamp in
+ *	@stamp: pointer to struct __kernel_old_timeval to store stamp in
  *
  *	Timestamps are stored in the skb as offsets to a base timestamp.
  *	This function converts the offset back to a struct timeval and stores
  *	it in stamp.
  */
 static inline void skb_get_timestamp(const struct sk_buff *skb,
-				     struct timeval *stamp)
+				     struct __kernel_old_timeval *stamp)
 {
-	*stamp = ktime_to_timeval(skb->tstamp);
+	*stamp = ns_to_kernel_old_timeval(skb->tstamp);
 }
 
 static inline void skb_get_timestampns(const struct sk_buff *skb,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e1632394..65228bfa4487 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1383,9 +1383,9 @@ static void hci_sock_cmsg(struct sock *sk, struct msghdr *msg,
 
 	if (mask & HCI_CMSG_TSTAMP) {
 #ifdef CONFIG_COMPAT
-		struct compat_timeval ctv;
+		struct old_timeval32 ctv;
 #endif
-		struct timeval tv;
+		struct __kernel_old_timeval tv;
 		void *data;
 		int len;
 
diff --git a/net/compat.c b/net/compat.c
index ccf93cd0e49b..9629f053d4fa 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -209,8 +209,8 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 {
 	struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
 	struct compat_cmsghdr cmhdr;
-	struct compat_timeval ctv;
-	struct compat_timespec cts[3];
+	struct old_timeval32 ctv;
+	struct old_timespec32 cts[3];
 	int cmlen;
 
 	if (cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
@@ -220,7 +220,7 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
 
 	if (!COMPAT_USE_64BIT_TIME) {
 		if (level == SOL_SOCKET && type == SO_TIMESTAMP_OLD) {
-			struct timeval *tv = (struct timeval *)data;
+			struct __kernel_old_timeval *tv = (struct __kernel_old_timeval *)data;
 			ctv.tv_sec = tv->tv_sec;
 			ctv.tv_usec = tv->tv_usec;
 			data = &ctv;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e29aec59cad1..3ce41b04c0f0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1861,7 +1861,7 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb,
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			       struct scm_timestamping *tss)
 {
-	struct timeval tv;
+	struct __kernel_old_timeval tv;
 	bool has_timestamping = false;
 
 	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 04e30d63a159..435bf2320cd3 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -549,7 +549,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
 
 	if ((inc->i_rx_tstamp != 0) &&
 	    sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
-		struct timeval tv = ktime_to_timeval(inc->i_rx_tstamp);
+		struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp);
 		ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 			       sizeof(tv), &tv);
 		if (ret)
diff --git a/net/socket.c b/net/socket.c
index 5087f9e40f3a..9cc281cdb9d9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -719,7 +719,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 	if (need_software_tstamp) {
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-			struct timeval tv;
+			struct __kernel_old_timeval tv;
 			skb_get_timestamp(skb, &tv);
 			put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
 				 sizeof(tv), &tv);
-- 
cgit v1.2.3


From 98bb03c865d7ddf7a9a2eb80f9a0dd5f261c56ad Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:49 -0800
Subject: socket: Add struct __kernel_sock_timeval

The new type is meant to be used as a y2038 safe structure
to be used as part of cmsg data.
Presently the SO_TIMESTAMP socket option uses struct timeval
for timestamps. This is not y2038 safe.
Subsequent patches in the series add new y2038 safe socket
option to be used in the place of SO_TIMESTAMP_OLD.
struct __kernel_sock_timeval will be used as the timestamp
format at that time.

struct __kernel_sock_timeval also maintains the same layout
across 32 bit and 64 bit ABIs.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/time.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 04d5587f30d3..b8ad1b86b942 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -70,6 +70,11 @@ struct __kernel_old_timeval {
 };
 #endif
 
+struct __kernel_sock_timeval {
+	__s64 tv_sec;
+	__s64 tv_usec;
+};
+
 /*
  * The IDs of the various system clocks (for POSIX.1b interval timers):
  */
-- 
cgit v1.2.3


From 887feae36aee6c08e0dafcdaa5ba921abbb2c56b Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:50 -0800
Subject: socket: Add SO_TIMESTAMP[NS]_NEW

Add SO_TIMESTAMP_NEW and SO_TIMESTAMPNS_NEW variants of
socket timestamp options.
These are the y2038 safe versions of the SO_TIMESTAMP_OLD
and SO_TIMESTAMPNS_OLD for all architectures.

Note that the format of scm_timestamping.ts[0] is not changed
in this patch.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: jejb@parisc-linux.org
Cc: ralf@linux-mips.org
Cc: rth@twiddle.net
Cc: linux-alpha@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-parisc@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  | 14 ++++++++++++--
 arch/mips/include/uapi/asm/socket.h   | 14 ++++++++++++--
 arch/parisc/include/uapi/asm/socket.h | 14 ++++++++++++--
 arch/sparc/include/uapi/asm/socket.h  | 14 ++++++++++++--
 include/linux/skbuff.h                | 18 ++++++++++++++++++
 include/net/sock.h                    |  1 +
 include/uapi/asm-generic/socket.h     | 15 +++++++++++++--
 net/core/sock.c                       | 21 +++++++++++++++++++--
 net/ipv4/tcp.c                        | 33 +++++++++++++++++++++++++--------
 net/rds/af_rds.c                      |  8 ++++++--
 net/rds/recv.c                        | 16 ++++++++++++++--
 net/socket.c                          | 35 +++++++++++++++++++++++++++--------
 12 files changed, 171 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 992a0a6dcea1..aab11eec7c22 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -3,6 +3,7 @@
 #define _UAPI_ASM_SOCKET_H
 
 #include <asm/sockios.h>
+#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 /*
@@ -114,10 +115,19 @@
 #define SO_TIMESTAMPNS_OLD      35
 #define SO_TIMESTAMPING_OLD     37
 
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+
 #if !defined(__KERNEL__)
 
-#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
-#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#if __BITS_PER_LONG == 64
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#endif
+
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 0f4516c34df2..11014f684d9f 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -11,6 +11,7 @@
 #define _UAPI_ASM_SOCKET_H
 
 #include <asm/sockios.h>
+#include <asm/bitsperlong.h>
 
 /*
  * For setsockopt(2)
@@ -125,10 +126,19 @@
 #define SO_TIMESTAMPNS_OLD      35
 #define SO_TIMESTAMPING_OLD     37
 
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+
 #if !defined(__KERNEL__)
 
-#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
-#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#if __BITS_PER_LONG == 64
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#endif
+
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 7c180321ebd6..cbc4b89c2fe4 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -3,6 +3,7 @@
 #define _UAPI_ASM_SOCKET_H
 
 #include <asm/sockios.h>
+#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	0xffff
@@ -106,10 +107,19 @@
 #define SO_TIMESTAMPNS_OLD      0x4013
 #define SO_TIMESTAMPING_OLD     0x4020
 
+#define SO_TIMESTAMP_NEW        0x4038
+#define SO_TIMESTAMPNS_NEW      0x4039
+
 #if !defined(__KERNEL__)
 
-#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
-#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#if __BITS_PER_LONG == 64
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#endif
+
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index d8a1bbc3e6c4..85127425b294 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -3,6 +3,7 @@
 #define _ASM_SOCKET_H
 
 #include <asm/sockios.h>
+#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	0xffff
@@ -107,10 +108,19 @@
 #define SO_TIMESTAMPNS_OLD       0x0021
 #define SO_TIMESTAMPING_OLD      0x0023
 
+#define SO_TIMESTAMP_NEW         0x0041
+#define SO_TIMESTAMPNS_NEW       0x0042
+
 #if !defined(__KERNEL__)
 
-#define SO_TIMESTAMP           SO_TIMESTAMP_OLD
-#define SO_TIMESTAMPNS         SO_TIMESTAMPNS_OLD
+#if __BITS_PER_LONG == 64
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#endif
+
 #define SO_TIMESTAMPING        SO_TIMESTAMPING_OLD
 
 #define SCM_TIMESTAMP          SO_TIMESTAMP
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4001611a4c9f..831846617d07 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3498,12 +3498,30 @@ static inline void skb_get_timestamp(const struct sk_buff *skb,
 	*stamp = ns_to_kernel_old_timeval(skb->tstamp);
 }
 
+static inline void skb_get_new_timestamp(const struct sk_buff *skb,
+					 struct __kernel_sock_timeval *stamp)
+{
+	struct timespec64 ts = ktime_to_timespec64(skb->tstamp);
+
+	stamp->tv_sec = ts.tv_sec;
+	stamp->tv_usec = ts.tv_nsec / 1000;
+}
+
 static inline void skb_get_timestampns(const struct sk_buff *skb,
 				       struct timespec *stamp)
 {
 	*stamp = ktime_to_timespec(skb->tstamp);
 }
 
+static inline void skb_get_new_timestampns(const struct sk_buff *skb,
+					   struct __kernel_timespec *stamp)
+{
+	struct timespec64 ts = ktime_to_timespec64(skb->tstamp);
+
+	stamp->tv_sec = ts.tv_sec;
+	stamp->tv_nsec = ts.tv_nsec;
+}
+
 static inline void __net_timestamp(struct sk_buff *skb)
 {
 	skb->tstamp = ktime_get_real();
diff --git a/include/net/sock.h b/include/net/sock.h
index 2b229f7be8eb..6679f3c120b0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -805,6 +805,7 @@ enum sock_flags {
 	SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */
 	SOCK_TXTIME,
 	SOCK_XDP, /* XDP is attached */
+	SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */
 };
 
 #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 4ef3aed31fb7..f22d3f7162f8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -3,6 +3,7 @@
 #define __ASM_GENERIC_SOCKET_H
 
 #include <asm/sockios.h>
+#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	1
@@ -109,10 +110,20 @@
 #define SO_TIMESTAMPNS_OLD      35
 #define SO_TIMESTAMPING_OLD     37
 
+#define SO_TIMESTAMP_NEW        63
+#define SO_TIMESTAMPNS_NEW      64
+
 #if !defined(__KERNEL__)
 
-#define SO_TIMESTAMP            SO_TIMESTAMP_OLD
-#define SO_TIMESTAMPNS          SO_TIMESTAMPNS_OLD
+#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
+/* on 64-bit and x32, avoid the ?: operator */
+#define SO_TIMESTAMP		SO_TIMESTAMP_OLD
+#define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#else
+#define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
+#define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#endif
+
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/net/core/sock.c b/net/core/sock.c
index d5ca8641968f..14b987eab10c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -868,9 +868,16 @@ set_rcvbuf:
 		break;
 
 	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
 	case SO_TIMESTAMPNS_OLD:
+	case SO_TIMESTAMPNS_NEW:
 		if (valbool)  {
-			if (optname == SO_TIMESTAMP_OLD)
+			if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
+				sock_set_flag(sk, SOCK_TSTAMP_NEW);
+			else
+				sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+
+			if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
 				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
 			else
 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
@@ -879,6 +886,7 @@ set_rcvbuf:
 		} else {
 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
 			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+			sock_reset_flag(sk, SOCK_TSTAMP_NEW);
 		}
 		break;
 
@@ -1245,11 +1253,20 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 	case SO_TIMESTAMP_OLD:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
+				!sock_flag(sk, SOCK_TSTAMP_NEW) &&
 				!sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
 	case SO_TIMESTAMPNS_OLD:
-		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
+		break;
+
+	case SO_TIMESTAMP_NEW:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
+		break;
+
+	case SO_TIMESTAMPNS_NEW:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
 		break;
 
 	case SO_TIMESTAMPING_OLD:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ce41b04c0f0..4e9388bf104a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1861,20 +1861,37 @@ static void tcp_update_recv_tstamps(struct sk_buff *skb,
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 			       struct scm_timestamping *tss)
 {
-	struct __kernel_old_timeval tv;
+	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	bool has_timestamping = false;
 
 	if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) {
 		if (sock_flag(sk, SOCK_RCVTSTAMP)) {
 			if (sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
-					 sizeof(tss->ts[0]), &tss->ts[0]);
+				if (new_tstamp) {
+					struct __kernel_timespec kts = {tss->ts[0].tv_sec, tss->ts[0].tv_nsec};
+
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+						 sizeof(kts), &kts);
+				} else {
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
+						 sizeof(tss->ts[0]), &tss->ts[0]);
+				}
 			} else {
-				tv.tv_sec = tss->ts[0].tv_sec;
-				tv.tv_usec = tss->ts[0].tv_nsec / 1000;
-
-				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
-					 sizeof(tv), &tv);
+				if (new_tstamp) {
+					struct __kernel_sock_timeval stv;
+
+					stv.tv_sec = tss->ts[0].tv_sec;
+					stv.tv_usec = tss->ts[0].tv_nsec / 1000;
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+						 sizeof(stv), &stv);
+				} else {
+					struct __kernel_old_timeval tv;
+
+					tv.tv_sec = tss->ts[0].tv_sec;
+					tv.tv_usec = tss->ts[0].tv_nsec / 1000;
+					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+						 sizeof(tv), &tv);
+				}
 			}
 		}
 
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index eeb4639adbe5..65571a6273c3 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -348,7 +348,7 @@ static int rds_set_transport(struct rds_sock *rs, char __user *optval,
 }
 
 static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
-				 int optlen)
+				 int optlen, int optname)
 {
 	int val, valbool;
 
@@ -360,6 +360,9 @@ static int rds_enable_recvtstamp(struct sock *sk, char __user *optval,
 
 	valbool = val ? 1 : 0;
 
+	if (optname == SO_TIMESTAMP_NEW)
+		sock_set_flag(sk, SOCK_TSTAMP_NEW);
+
 	if (valbool)
 		sock_set_flag(sk, SOCK_RCVTSTAMP);
 	else
@@ -431,8 +434,9 @@ static int rds_setsockopt(struct socket *sock, int level, int optname,
 		release_sock(sock->sk);
 		break;
 	case SO_TIMESTAMP_OLD:
+	case SO_TIMESTAMP_NEW:
 		lock_sock(sock->sk);
-		ret = rds_enable_recvtstamp(sock->sk, optval, optlen);
+		ret = rds_enable_recvtstamp(sock->sk, optval, optlen, optname);
 		release_sock(sock->sk);
 		break;
 	case SO_RDS_MSG_RXPATH_LATENCY:
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 435bf2320cd3..6bb6b16ca270 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -550,8 +550,20 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg,
 	if ((inc->i_rx_tstamp != 0) &&
 	    sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) {
 		struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp);
-		ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
-			       sizeof(tv), &tv);
+
+		if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) {
+			ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+				       sizeof(tv), &tv);
+		} else {
+			struct __kernel_sock_timeval sk_tv;
+
+			sk_tv.tv_sec = tv.tv_sec;
+			sk_tv.tv_usec = tv.tv_usec;
+
+			ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+				       sizeof(sk_tv), &sk_tv);
+		}
+
 		if (ret)
 			goto out;
 	}
diff --git a/net/socket.c b/net/socket.c
index 9cc281cdb9d9..1de96abd78d3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -705,6 +705,7 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	struct scm_timestamping tss;
 	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
@@ -719,15 +720,33 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 	if (need_software_tstamp) {
 		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-			struct __kernel_old_timeval tv;
-			skb_get_timestamp(skb, &tv);
-			put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
-				 sizeof(tv), &tv);
+			if (new_tstamp) {
+				struct __kernel_sock_timeval tv;
+
+				skb_get_new_timestamp(skb, &tv);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW,
+					 sizeof(tv), &tv);
+			} else {
+				struct __kernel_old_timeval tv;
+
+				skb_get_timestamp(skb, &tv);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD,
+					 sizeof(tv), &tv);
+			}
 		} else {
-			struct timespec ts;
-			skb_get_timestampns(skb, &ts);
-			put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
-				 sizeof(ts), &ts);
+			if (new_tstamp) {
+				struct __kernel_timespec ts;
+
+				skb_get_new_timestampns(skb, &ts);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+					 sizeof(ts), &ts);
+			} else {
+				struct timespec ts;
+
+				skb_get_timestampns(skb, &ts);
+				put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
+					 sizeof(ts), &ts);
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 9718475e69084de15c3930ce35672a7dc6da866b Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:51 -0800
Subject: socket: Add SO_TIMESTAMPING_NEW

Add SO_TIMESTAMPING_NEW variant of socket timestamp options.
This is the y2038 safe versions of the SO_TIMESTAMPING_OLD
for all architectures.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: chris@zankel.net
Cc: fenghua.yu@intel.com
Cc: rth@twiddle.net
Cc: tglx@linutronix.de
Cc: ubraun@linux.ibm.com
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux-s390@vger.kernel.org
Cc: linux-xtensa@linux-xtensa.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  |  5 +++--
 arch/mips/include/uapi/asm/socket.h   |  5 +++--
 arch/parisc/include/uapi/asm/socket.h |  5 +++--
 arch/sparc/include/uapi/asm/socket.h  |  5 +++--
 include/linux/socket.h                |  8 ++++++++
 include/uapi/asm-generic/socket.h     |  5 +++--
 include/uapi/linux/errqueue.h         |  4 ++++
 net/core/scm.c                        | 27 +++++++++++++++++++++++++++
 net/core/sock.c                       |  8 +++++++-
 net/ipv4/tcp.c                        | 30 +++++++++++++++++-------------
 net/smc/af_smc.c                      |  3 ++-
 net/socket.c                          | 13 ++++++++-----
 12 files changed, 88 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index aab11eec7c22..934ea6268f1a 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -117,19 +117,20 @@
 
 #define SO_TIMESTAMP_NEW        63
 #define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
 
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
 #endif
 
-#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
-
 #define SCM_TIMESTAMP           SO_TIMESTAMP
 #define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
 #define SCM_TIMESTAMPING        SO_TIMESTAMPING
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 11014f684d9f..110f9506d64f 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -128,19 +128,20 @@
 
 #define SO_TIMESTAMP_NEW        63
 #define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
 
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
 #endif
 
-#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
-
 #define SCM_TIMESTAMP           SO_TIMESTAMP
 #define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
 #define SCM_TIMESTAMPING        SO_TIMESTAMPING
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index cbc4b89c2fe4..bee2a9dde656 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -109,19 +109,20 @@
 
 #define SO_TIMESTAMP_NEW        0x4038
 #define SO_TIMESTAMPNS_NEW      0x4039
+#define SO_TIMESTAMPING_NEW     0x403A
 
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
 #endif
 
-#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
-
 #define SCM_TIMESTAMP           SO_TIMESTAMP
 #define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
 #define SCM_TIMESTAMPING        SO_TIMESTAMPING
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 85127425b294..2b38dda51426 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -110,19 +110,20 @@
 
 #define SO_TIMESTAMP_NEW         0x0041
 #define SO_TIMESTAMPNS_NEW       0x0042
+#define SO_TIMESTAMPING_NEW      0x0043
 
 #if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
 #endif
 
-#define SO_TIMESTAMPING        SO_TIMESTAMPING_OLD
-
 #define SCM_TIMESTAMP          SO_TIMESTAMP
 #define SCM_TIMESTAMPNS        SO_TIMESTAMPNS
 #define SCM_TIMESTAMPING       SO_TIMESTAMPING
diff --git a/include/linux/socket.h b/include/linux/socket.h
index ab2041a00e01..6016daeecee4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -349,9 +349,17 @@ struct ucred {
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
+struct timespec64;
 struct __kernel_timespec;
 struct old_timespec32;
 
+struct scm_timestamping_internal {
+	struct timespec64 ts[3];
+};
+
+extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
+extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss);
+
 /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
  * forbid_cmsg_compat==false
  */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index f22d3f7162f8..2713e0fa68ef 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -112,6 +112,7 @@
 
 #define SO_TIMESTAMP_NEW        63
 #define SO_TIMESTAMPNS_NEW      64
+#define SO_TIMESTAMPING_NEW     65
 
 #if !defined(__KERNEL__)
 
@@ -119,13 +120,13 @@
 /* on 64-bit and x32, avoid the ?: operator */
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
+#define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
+#define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
 #endif
 
-#define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
-
 #define SCM_TIMESTAMP           SO_TIMESTAMP
 #define SCM_TIMESTAMPNS         SO_TIMESTAMPNS
 #define SCM_TIMESTAMPING        SO_TIMESTAMPING
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index c0151200f7d1..d955b9e32288 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -41,6 +41,10 @@ struct scm_timestamping {
 	struct timespec ts[3];
 };
 
+struct scm_timestamping64 {
+	struct __kernel_timespec ts[3];
+};
+
 /* The type of scm_timestamping, passed in sock_extended_err ee_info.
  * This defines the type of ts[0]. For SCM_TSTAMP_SND only, if ts[0]
  * is zero, then this is a hardware timestamp and recorded in ts[2].
diff --git a/net/core/scm.c b/net/core/scm.c
index b1ff8a441748..52ef219cf6df 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -29,6 +29,7 @@
 #include <linux/pid.h>
 #include <linux/nsproxy.h>
 #include <linux/slab.h>
+#include <linux/errqueue.h>
 
 #include <linux/uaccess.h>
 
@@ -252,6 +253,32 @@ out:
 }
 EXPORT_SYMBOL(put_cmsg);
 
+void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal)
+{
+	struct scm_timestamping64 tss;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tss.ts); i++) {
+		tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec;
+		tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec;
+	}
+
+	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss);
+}
+EXPORT_SYMBOL(put_cmsg_scm_timestamping64);
+
+void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss_internal)
+{
+	struct scm_timestamping tss;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tss.ts); i++)
+		tss.ts[i] = timespec64_to_timespec(tss_internal->ts[i]);
+
+	put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
+}
+EXPORT_SYMBOL(put_cmsg_scm_timestamping);
+
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
 	struct cmsghdr __user *cm
diff --git a/net/core/sock.c b/net/core/sock.c
index 14b987eab10c..a9d1ecce96e5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -890,6 +890,8 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_TIMESTAMPING_NEW:
+		sock_set_flag(sk, SOCK_TSTAMP_NEW);
 	case SO_TIMESTAMPING_OLD:
 		if (val & ~SOF_TIMESTAMPING_MASK) {
 			ret = -EINVAL;
@@ -921,9 +923,13 @@ set_rcvbuf:
 		if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
 			sock_enable_timestamp(sk,
 					      SOCK_TIMESTAMPING_RX_SOFTWARE);
-		else
+		else {
+			if (optname == SO_TIMESTAMPING_NEW)
+				sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+
 			sock_disable_timestamp(sk,
 					       (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
+		}
 		break;
 
 	case SO_RCVLOWAT:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4e9388bf104a..cab6b2f2f61d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1844,22 +1844,22 @@ out:
 #endif
 
 static void tcp_update_recv_tstamps(struct sk_buff *skb,
-				    struct scm_timestamping *tss)
+				    struct scm_timestamping_internal *tss)
 {
 	if (skb->tstamp)
-		tss->ts[0] = ktime_to_timespec(skb->tstamp);
+		tss->ts[0] = ktime_to_timespec64(skb->tstamp);
 	else
-		tss->ts[0] = (struct timespec) {0};
+		tss->ts[0] = (struct timespec64) {0};
 
 	if (skb_hwtstamps(skb)->hwtstamp)
-		tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp);
+		tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp);
 	else
-		tss->ts[2] = (struct timespec) {0};
+		tss->ts[2] = (struct timespec64) {0};
 }
 
 /* Similar to __sock_recv_timestamp, but does not require an skb */
 static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
-			       struct scm_timestamping *tss)
+			       struct scm_timestamping_internal *tss)
 {
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
 	bool has_timestamping = false;
@@ -1873,8 +1873,10 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
 						 sizeof(kts), &kts);
 				} else {
+					struct timespec ts_old = timespec64_to_timespec(tss->ts[0]);
+
 					put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
-						 sizeof(tss->ts[0]), &tss->ts[0]);
+						 sizeof(ts_old), &ts_old);
 				}
 			} else {
 				if (new_tstamp) {
@@ -1898,20 +1900,22 @@ static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk,
 		if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
 			has_timestamping = true;
 		else
-			tss->ts[0] = (struct timespec) {0};
+			tss->ts[0] = (struct timespec64) {0};
 	}
 
 	if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
 		if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
 			has_timestamping = true;
 		else
-			tss->ts[2] = (struct timespec) {0};
+			tss->ts[2] = (struct timespec64) {0};
 	}
 
 	if (has_timestamping) {
-		tss->ts[1] = (struct timespec) {0};
-		put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD,
-			 sizeof(*tss), tss);
+		tss->ts[1] = (struct timespec64) {0};
+		if (sock_flag(sk, SOCK_TSTAMP_NEW))
+			put_cmsg_scm_timestamping64(msg, tss);
+		else
+			put_cmsg_scm_timestamping(msg, tss);
 	}
 }
 
@@ -1952,7 +1956,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	long timeo;
 	struct sk_buff *skb, *last;
 	u32 urg_hole = 0;
-	struct scm_timestamping tss;
+	struct scm_timestamping_internal tss;
 	bool has_tss = false;
 	bool has_cmsg;
 
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c4e56602e0c6..369870b0ef79 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -291,7 +291,8 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
 			     (1UL << SOCK_RXQ_OVFL) | \
 			     (1UL << SOCK_WIFI_STATUS) | \
 			     (1UL << SOCK_NOFCS) | \
-			     (1UL << SOCK_FILTER_LOCKED))
+			     (1UL << SOCK_FILTER_LOCKED) | \
+			     (1UL << SOCK_TSTAMP_NEW))
 /* copy only relevant settings and flags of SOL_SOCKET level from smc to
  * clc socket (since smc is not called for these options from net/core)
  */
diff --git a/net/socket.c b/net/socket.c
index 1de96abd78d3..d51930689b98 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -706,7 +706,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 {
 	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
 	int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
-	struct scm_timestamping tss;
+	struct scm_timestamping_internal tss;
+
 	int empty = 1, false_tstamp = 0;
 	struct skb_shared_hwtstamps *shhwtstamps =
 		skb_hwtstamps(skb);
@@ -752,20 +753,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 	memset(&tss, 0, sizeof(tss));
 	if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
-	    ktime_to_timespec_cond(skb->tstamp, tss.ts + 0))
+	    ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
 		empty = 0;
 	if (shhwtstamps &&
 	    (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 	    !skb_is_swtx_tstamp(skb, false_tstamp) &&
-	    ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
+	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) {
 		empty = 0;
 		if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
 		    !skb_is_err_queue(skb))
 			put_ts_pktinfo(msg, skb);
 	}
 	if (!empty) {
-		put_cmsg(msg, SOL_SOCKET,
-			 SO_TIMESTAMPING_OLD, sizeof(tss), &tss);
+		if (sock_flag(sk, SOCK_TSTAMP_NEW))
+			put_cmsg_scm_timestamping64(msg, &tss);
+		else
+			put_cmsg_scm_timestamping(msg, &tss);
 
 		if (skb_is_err_queue(skb) && skb->len &&
 		    SKB_EXT_ERR(skb)->opt_stats)
-- 
cgit v1.2.3


From 45bdc66159d49bfc7f75fe02d25bc74f5d2660cf Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:53 -0800
Subject: socket: Rename SO_RCVTIMEO/ SO_SNDTIMEO with _OLD suffixes

SO_RCVTIMEO and SO_SNDTIMEO socket options use struct timeval
as the time format. struct timeval is not y2038 safe.
The subsequent patches in the series add support for new socket
timeout options with _NEW suffix that will use y2038 safe
data structures. Although the existing struct timeval layout
is sufficiently wide to represent timeouts, because of the way
libc will interpret time_t based on user defined flag, these
new flags provide a way of having a structure that is the same
for all architectures consistently.
Rename the existing options with _OLD suffix forms so that the
right option is enabled for userspace applications according
to the architecture and time_t definition of libc.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: ccaulfie@redhat.com
Cc: deller@gmx.de
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: rth@twiddle.net
Cc: cluster-devel@redhat.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h   | 7 +++++--
 arch/mips/include/uapi/asm/socket.h    | 6 ++++--
 arch/parisc/include/uapi/asm/socket.h  | 6 ++++--
 arch/powerpc/include/uapi/asm/socket.h | 4 ++--
 arch/sparc/include/uapi/asm/socket.h   | 7 +++++--
 fs/dlm/lowcomms.c                      | 4 ++--
 include/uapi/asm-generic/socket.h      | 6 ++++--
 net/core/sock.c                        | 4 ++--
 8 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 934ea6268f1a..9826d1db71d0 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -31,8 +31,8 @@
 #define SO_RCVBUFFORCE	0x100b
 #define	SO_RCVLOWAT	0x1010
 #define	SO_SNDLOWAT	0x1011
-#define	SO_RCVTIMEO	0x1012
-#define	SO_SNDTIMEO	0x1013
+#define	SO_RCVTIMEO_OLD	0x1012
+#define	SO_SNDTIMEO_OLD	0x1013
 #define SO_ACCEPTCONN	0x1014
 #define SO_PROTOCOL	0x1028
 #define SO_DOMAIN	0x1029
@@ -121,6 +121,9 @@
 
 #if !defined(__KERNEL__)
 
+#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
+#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
+
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 110f9506d64f..96cc0e907f12 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -39,8 +39,8 @@
 #define SO_RCVBUF	0x1002	/* Receive buffer. */
 #define SO_SNDLOWAT	0x1003	/* send low-water mark */
 #define SO_RCVLOWAT	0x1004	/* receive low-water mark */
-#define SO_SNDTIMEO	0x1005	/* send timeout */
-#define SO_RCVTIMEO	0x1006	/* receive timeout */
+#define SO_SNDTIMEO_OLD	0x1005	/* send timeout */
+#define SO_RCVTIMEO_OLD	0x1006	/* receive timeout */
 #define SO_ACCEPTCONN	0x1009
 #define SO_PROTOCOL	0x1028	/* protocol type */
 #define SO_DOMAIN	0x1029	/* domain/socket family */
@@ -132,6 +132,8 @@
 
 #if !defined(__KERNEL__)
 
+#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
+#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index bee2a9dde656..046f0cd9cce4 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -22,8 +22,8 @@
 #define SO_RCVBUFFORCE	0x100b
 #define SO_SNDLOWAT	0x1003
 #define SO_RCVLOWAT	0x1004
-#define SO_SNDTIMEO	0x1005
-#define SO_RCVTIMEO	0x1006
+#define SO_SNDTIMEO_OLD	0x1005
+#define SO_RCVTIMEO_OLD	0x1006
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
 #define SO_PROTOCOL	0x1028
@@ -113,6 +113,8 @@
 
 #if !defined(__KERNEL__)
 
+#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
+#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 94de465e0920..12aa0c43e775 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -11,8 +11,8 @@
 
 #define SO_RCVLOWAT	16
 #define SO_SNDLOWAT	17
-#define SO_RCVTIMEO	18
-#define SO_SNDTIMEO	19
+#define SO_RCVTIMEO_OLD	18
+#define SO_SNDTIMEO_OLD	19
 #define SO_PASSCRED	20
 #define SO_PEERCRED	21
 
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 2b38dda51426..342ffdc3b424 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -21,8 +21,8 @@
 #define SO_BSDCOMPAT    0x0400
 #define SO_RCVLOWAT     0x0800
 #define SO_SNDLOWAT     0x1000
-#define SO_RCVTIMEO     0x2000
-#define SO_SNDTIMEO     0x4000
+#define SO_RCVTIMEO_OLD     0x2000
+#define SO_SNDTIMEO_OLD     0x4000
 #define SO_ACCEPTCONN	0x8000
 
 #define SO_SNDBUF	0x1001
@@ -114,6 +114,9 @@
 
 #if !defined(__KERNEL__)
 
+#define SO_RCVTIMEO              SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO              SO_SNDTIMEO_OLD
+
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 76976d6e50f9..c98ad9777ad9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1089,12 +1089,12 @@ static void sctp_connect_to_sock(struct connection *con)
 	 * since O_NONBLOCK argument in connect() function does not work here,
 	 * then, we should restore the default value of this attribute.
 	 */
-	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_OLD, (char *)&tv,
 			  sizeof(tv));
 	result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
 				   0);
 	memset(&tv, 0, sizeof(tv));
-	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&tv,
+	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_OLD, (char *)&tv,
 			  sizeof(tv));
 
 	if (result == -EINPROGRESS)
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 2713e0fa68ef..c56b8b487c12 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -30,8 +30,8 @@
 #define SO_PEERCRED	17
 #define SO_RCVLOWAT	18
 #define SO_SNDLOWAT	19
-#define SO_RCVTIMEO	20
-#define SO_SNDTIMEO	21
+#define SO_RCVTIMEO_OLD	20
+#define SO_SNDTIMEO_OLD	21
 #endif
 
 /* Security levels - as per NRL IPv6 - don't actually do anything */
@@ -116,6 +116,8 @@
 
 #if !defined(__KERNEL__)
 
+#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
+#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
 /* on 64-bit and x32, avoid the ?: operator */
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
diff --git a/net/core/sock.c b/net/core/sock.c
index a9d1ecce96e5..27b40002b780 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -941,11 +941,11 @@ set_rcvbuf:
 			sk->sk_rcvlowat = val ? : 1;
 		break;
 
-	case SO_RCVTIMEO:
+	case SO_RCVTIMEO_OLD:
 		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
 		break;
 
-	case SO_SNDTIMEO:
+	case SO_SNDTIMEO_OLD:
 		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
 		break;
 
-- 
cgit v1.2.3


From a9beb86ae6e55bd92f38453c8623de60b8e5a308 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sat, 2 Feb 2019 07:34:54 -0800
Subject: sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW

Add new socket timeout options that are y2038 safe.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Cc: ccaulfie@redhat.com
Cc: davem@davemloft.net
Cc: deller@gmx.de
Cc: paulus@samba.org
Cc: ralf@linux-mips.org
Cc: rth@twiddle.net
Cc: cluster-devel@redhat.com
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-alpha@vger.kernel.org
Cc: linux-arch@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-parisc@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  | 12 ++++++--
 arch/mips/include/uapi/asm/socket.h   | 11 ++++++--
 arch/parisc/include/uapi/asm/socket.h | 10 +++++--
 arch/sparc/include/uapi/asm/socket.h  | 11 ++++++--
 include/uapi/asm-generic/socket.h     | 11 ++++++--
 net/core/sock.c                       | 53 ++++++++++++++++++++++++++---------
 6 files changed, 83 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9826d1db71d0..0d0fddb7e738 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -119,19 +119,25 @@
 #define SO_TIMESTAMPNS_NEW      64
 #define SO_TIMESTAMPING_NEW     65
 
-#if !defined(__KERNEL__)
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
 
-#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
-#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
+#if !defined(__KERNEL__)
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
 #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
 #endif
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 96cc0e907f12..eb9f33f8a8b3 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -130,18 +130,25 @@
 #define SO_TIMESTAMPNS_NEW      64
 #define SO_TIMESTAMPING_NEW     65
 
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
+
 #if !defined(__KERNEL__)
 
-#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
-#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
 #define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO             SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO             SO_SNDTIMEO_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
 #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
 #endif
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 046f0cd9cce4..16e428f03526 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -111,18 +111,24 @@
 #define SO_TIMESTAMPNS_NEW      0x4039
 #define SO_TIMESTAMPING_NEW     0x403A
 
+#define SO_RCVTIMEO_NEW         0x4040
+#define SO_SNDTIMEO_NEW         0x4041
+
 #if !defined(__KERNEL__)
 
-#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
-#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
 #define SO_TIMESTAMPING         SO_TIMESTAMPING_OLD
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
 #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
 #endif
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 342ffdc3b424..8c9f74a66b55 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -112,19 +112,26 @@
 #define SO_TIMESTAMPNS_NEW       0x0042
 #define SO_TIMESTAMPING_NEW      0x0043
 
+#define SO_RCVTIMEO_NEW          0x0044
+#define SO_SNDTIMEO_NEW          0x0045
+
 #if !defined(__KERNEL__)
 
-#define SO_RCVTIMEO              SO_RCVTIMEO_OLD
-#define SO_SNDTIMEO              SO_SNDTIMEO_OLD
 
 #if __BITS_PER_LONG == 64
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
 #define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
 #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
 #endif
 
 #define SCM_TIMESTAMP          SO_TIMESTAMP
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c56b8b487c12..c8b430cb6dc4 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -114,19 +114,26 @@
 #define SO_TIMESTAMPNS_NEW      64
 #define SO_TIMESTAMPING_NEW     65
 
+#define SO_RCVTIMEO_NEW         66
+#define SO_SNDTIMEO_NEW         67
+
 #if !defined(__KERNEL__)
 
-#define	SO_RCVTIMEO SO_RCVTIMEO_OLD
-#define	SO_SNDTIMEO SO_SNDTIMEO_OLD
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
 /* on 64-bit and x32, avoid the ?: operator */
 #define SO_TIMESTAMP		SO_TIMESTAMP_OLD
 #define SO_TIMESTAMPNS		SO_TIMESTAMPNS_OLD
 #define SO_TIMESTAMPING		SO_TIMESTAMPING_OLD
+
+#define SO_RCVTIMEO		SO_RCVTIMEO_OLD
+#define SO_SNDTIMEO		SO_SNDTIMEO_OLD
 #else
 #define SO_TIMESTAMP (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMP_OLD : SO_TIMESTAMP_NEW)
 #define SO_TIMESTAMPNS (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPNS_OLD : SO_TIMESTAMPNS_NEW)
 #define SO_TIMESTAMPING (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_TIMESTAMPING_OLD : SO_TIMESTAMPING_NEW)
+
+#define SO_RCVTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_RCVTIMEO_OLD : SO_RCVTIMEO_NEW)
+#define SO_SNDTIMEO (sizeof(time_t) == sizeof(__kernel_long_t) ? SO_SNDTIMEO_OLD : SO_SNDTIMEO_NEW)
 #endif
 
 #define SCM_TIMESTAMP           SO_TIMESTAMP
diff --git a/net/core/sock.c b/net/core/sock.c
index 27b40002b780..a8904ae40713 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -335,9 +335,10 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__sk_backlog_rcv);
 
-static int sock_get_timeout(long timeo, void *optval)
+static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
 {
-	struct __kernel_old_timeval tv;
+	struct __kernel_sock_timeval tv;
+	int size;
 
 	if (timeo == MAX_SCHEDULE_TIMEOUT) {
 		tv.tv_sec = 0;
@@ -353,13 +354,23 @@ static int sock_get_timeout(long timeo, void *optval)
 		return sizeof(tv32);
 	}
 
-	*(struct __kernel_old_timeval *)optval = tv;
-	return sizeof(tv);
+	if (old_timeval) {
+		struct __kernel_old_timeval old_tv;
+		old_tv.tv_sec = tv.tv_sec;
+		old_tv.tv_usec = tv.tv_usec;
+		*(struct __kernel_old_timeval *)optval = old_tv;
+		size = sizeof(old_tv);
+	} else {
+		*(struct __kernel_sock_timeval *)optval = tv;
+		size = sizeof(tv);
+	}
+
+	return size;
 }
 
-static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
+static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen, bool old_timeval)
 {
-	struct __kernel_old_timeval tv;
+	struct __kernel_sock_timeval tv;
 
 	if (in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
 		struct old_timeval32 tv32;
@@ -371,6 +382,15 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 			return -EFAULT;
 		tv.tv_sec = tv32.tv_sec;
 		tv.tv_usec = tv32.tv_usec;
+	} else if (old_timeval) {
+		struct __kernel_old_timeval old_tv;
+
+		if (optlen < sizeof(old_tv))
+			return -EINVAL;
+		if (copy_from_user(&old_tv, optval, sizeof(old_tv)))
+			return -EFAULT;
+		tv.tv_sec = old_tv.tv_sec;
+		tv.tv_usec = old_tv.tv_usec;
 	} else {
 		if (optlen < sizeof(tv))
 			return -EINVAL;
@@ -394,8 +414,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 	*timeo_p = MAX_SCHEDULE_TIMEOUT;
 	if (tv.tv_sec == 0 && tv.tv_usec == 0)
 		return 0;
-	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
-		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
+	if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
+		*timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
 	return 0;
 }
 
@@ -942,11 +962,13 @@ set_rcvbuf:
 		break;
 
 	case SO_RCVTIMEO_OLD:
-		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
+	case SO_RCVTIMEO_NEW:
+		ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD);
 		break;
 
 	case SO_SNDTIMEO_OLD:
-		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
+	case SO_SNDTIMEO_NEW:
+		ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD);
 		break;
 
 	case SO_ATTACH_FILTER:
@@ -1171,6 +1193,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		struct linger ling;
 		struct old_timeval32 tm32;
 		struct __kernel_old_timeval tm;
+		struct  __kernel_sock_timeval stm;
 		struct sock_txtime txtime;
 	} v;
 
@@ -1279,12 +1302,14 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_tsflags;
 		break;
 
-	case SO_RCVTIMEO:
-		lv = sock_get_timeout(sk->sk_rcvtimeo, &v);
+	case SO_RCVTIMEO_OLD:
+	case SO_RCVTIMEO_NEW:
+		lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
 		break;
 
-	case SO_SNDTIMEO:
-		lv = sock_get_timeout(sk->sk_sndtimeo, &v);
+	case SO_SNDTIMEO_OLD:
+	case SO_SNDTIMEO_NEW:
+		lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
 		break;
 
 	case SO_RCVLOWAT:
-- 
cgit v1.2.3


From bff5731d43efbdf0bbd2d73cab32fe6435ea1046 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 1 Feb 2019 17:56:28 -0800
Subject: net: devlink: report cell size of shared buffers

Shared buffer allocation is usually done in cell increments.
Drivers will either round up the allocation or refuse the
configuration if it's not an exact multiple of cell size.
Drivers know exactly the cell size of shared buffer, so help
out users by providing this information in dumps.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 1 +
 drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c    | 1 +
 include/net/devlink.h                                  | 1 +
 include/uapi/linux/devlink.h                           | 2 ++
 net/core/devlink.c                                     | 3 +++
 5 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 12c61e0cc570..80066f437a65 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -713,6 +713,7 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
 	pool_info->pool_type = (enum devlink_sb_pool_type) dir;
 	pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
 	pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
+	pool_info->cell_size = mlxsw_sp->sb->cell_size;
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
index 814360ed3a20..ea2e3f829aba 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_shared_buf.c
@@ -48,6 +48,7 @@ int nfp_shared_buf_pool_get(struct nfp_pf *pf, unsigned int sb, u16 pool_index,
 	pool_info->pool_type = le32_to_cpu(get_data.pool_type);
 	pool_info->threshold_type = le32_to_cpu(get_data.threshold_type);
 	pool_info->size = le32_to_cpu(get_data.size) * unit_size;
+	pool_info->cell_size = unit_size;
 
 	return 0;
 }
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1c8523920f66..74d992a68a06 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -62,6 +62,7 @@ struct devlink_sb_pool_info {
 	enum devlink_sb_pool_type pool_type;
 	u32 size;
 	enum devlink_sb_threshold_type threshold_type;
+	u32 cell_size;
 };
 
 /**
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 7fffd879c328..054b2d1a4537 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -300,6 +300,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_INFO_VERSION_NAME,		/* string */
 	DEVLINK_ATTR_INFO_VERSION_VALUE,	/* string */
 
+	DEVLINK_ATTR_SB_POOL_CELL_SIZE,		/* u32 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index eb839d74bcc0..52bf27491fb8 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -932,6 +932,9 @@ static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (nla_put_u8(msg, DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE,
 		       pool_info.threshold_type))
 		goto nla_put_failure;
+	if (nla_put_u32(msg, DEVLINK_ATTR_SB_POOL_CELL_SIZE,
+			pool_info.cell_size))
+		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
 	return 0;
-- 
cgit v1.2.3


From 9fb20801dab46238706267896df1b3938d977129 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 1 Feb 2019 20:20:52 -0800
Subject: net: Fix ip_mc_{dec,inc}_group allocation context

After 4effd28c1245 ("bridge: join all-snoopers multicast address"), I
started seeing the following sleep in atomic warnings:

[   26.763893] BUG: sleeping function called from invalid context at mm/slab.h:421
[   26.771425] in_atomic(): 1, irqs_disabled(): 0, pid: 1658, name: sh
[   26.777855] INFO: lockdep is turned off.
[   26.781916] CPU: 0 PID: 1658 Comm: sh Not tainted 5.0.0-rc4 #20
[   26.787943] Hardware name: BCM97278SV (DT)
[   26.792118] Call trace:
[   26.794645]  dump_backtrace+0x0/0x170
[   26.798391]  show_stack+0x24/0x30
[   26.801787]  dump_stack+0xa4/0xe4
[   26.805182]  ___might_sleep+0x208/0x218
[   26.809102]  __might_sleep+0x78/0x88
[   26.812762]  kmem_cache_alloc_trace+0x64/0x28c
[   26.817301]  igmp_group_dropped+0x150/0x230
[   26.821573]  ip_mc_dec_group+0x1b0/0x1f8
[   26.825585]  br_ip4_multicast_leave_snoopers.isra.11+0x174/0x190
[   26.831704]  br_multicast_toggle+0x78/0xcc
[   26.835887]  store_bridge_parm+0xc4/0xfc
[   26.839894]  multicast_snooping_store+0x3c/0x4c
[   26.844517]  dev_attr_store+0x44/0x5c
[   26.848262]  sysfs_kf_write+0x50/0x68
[   26.852006]  kernfs_fop_write+0x14c/0x1b4
[   26.856102]  __vfs_write+0x60/0x190
[   26.859668]  vfs_write+0xc8/0x168
[   26.863059]  ksys_write+0x70/0xc8
[   26.866449]  __arm64_sys_write+0x24/0x30
[   26.870458]  el0_svc_common+0xa0/0x11c
[   26.874291]  el0_svc_handler+0x38/0x70
[   26.878120]  el0_svc+0x8/0xc

while toggling the bridge's multicast_snooping attribute dynamically.

Pass a gfp_t down to igmpv3_add_delrec(), introduce
__igmp_group_dropped() and introduce __ip_mc_dec_group() to take a gfp_t
argument.

Similarly introduce ____ip_mc_inc_group() and __ip_mc_inc_group() to
allow caller to specify gfp_t.

IPv6 part of the patch appears fine.

Fixes: 4effd28c1245 ("bridge: join all-snoopers multicast address")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h      |  8 +++++++-
 net/bridge/br_multicast.c |  4 ++--
 net/ipv4/igmp.c           | 35 ++++++++++++++++++++++++-----------
 3 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 8b4348f69bc5..cc85f4524dbf 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -137,7 +137,13 @@ extern void ip_mc_up(struct in_device *);
 extern void ip_mc_down(struct in_device *);
 extern void ip_mc_unmap(struct in_device *);
 extern void ip_mc_remap(struct in_device *);
-extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
+extern void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp);
+static inline void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
+{
+	return __ip_mc_dec_group(in_dev, addr, GFP_KERNEL);
+}
+extern void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
+			      gfp_t gfp);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
 int ip_mc_check_igmp(struct sk_buff *skb);
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 780757b7a82f..1fb885a33c66 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1841,7 +1841,7 @@ static void br_ip4_multicast_join_snoopers(struct net_bridge *br)
 	if (!in_dev)
 		return;
 
-	ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP));
+	__ip_mc_inc_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
 	in_dev_put(in_dev);
 }
 
@@ -1872,7 +1872,7 @@ static void br_ip4_multicast_leave_snoopers(struct net_bridge *br)
 	if (WARN_ON(!in_dev))
 		return;
 
-	ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP));
+	__ip_mc_dec_group(in_dev, htonl(INADDR_ALLSNOOPERS_GROUP), GFP_ATOMIC);
 	in_dev_put(in_dev);
 }
 
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index a40e48ded10d..b448cf32296c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -159,7 +159,8 @@ static int unsolicited_report_interval(struct in_device *in_dev)
 	return interval_jiffies;
 }
 
-static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im);
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+			      gfp_t gfp);
 static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im);
 static void igmpv3_clear_delrec(struct in_device *in_dev);
 static int sf_setstate(struct ip_mc_list *pmc);
@@ -1145,7 +1146,8 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 /*
  * deleted ip_mc_list manipulation
  */
-static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
+static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im,
+			      gfp_t gfp)
 {
 	struct ip_mc_list *pmc;
 	struct net *net = dev_net(in_dev->dev);
@@ -1156,7 +1158,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	 * for deleted items allows change reports to use common code with
 	 * non-deleted or query-response MCA's.
 	 */
-	pmc = kzalloc(sizeof(*pmc), GFP_KERNEL);
+	pmc = kzalloc(sizeof(*pmc), gfp);
 	if (!pmc)
 		return;
 	spin_lock_init(&pmc->lock);
@@ -1261,7 +1263,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev)
 }
 #endif
 
-static void igmp_group_dropped(struct ip_mc_list *im)
+static void __igmp_group_dropped(struct ip_mc_list *im, gfp_t gfp)
 {
 	struct in_device *in_dev = im->interface;
 #ifdef CONFIG_IP_MULTICAST
@@ -1292,13 +1294,18 @@ static void igmp_group_dropped(struct ip_mc_list *im)
 			return;
 		}
 		/* IGMPv3 */
-		igmpv3_add_delrec(in_dev, im);
+		igmpv3_add_delrec(in_dev, im, gfp);
 
 		igmp_ifc_event(in_dev);
 	}
 #endif
 }
 
+static void igmp_group_dropped(struct ip_mc_list *im)
+{
+	__igmp_group_dropped(im, GFP_KERNEL);
+}
+
 static void igmp_group_added(struct ip_mc_list *im)
 {
 	struct in_device *in_dev = im->interface;
@@ -1400,8 +1407,8 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
 /*
  *	A socket has joined a multicast group on device dev.
  */
-static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
-			      unsigned int mode)
+static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
+				unsigned int mode, gfp_t gfp)
 {
 	struct ip_mc_list *im;
 
@@ -1415,7 +1422,7 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 		}
 	}
 
-	im = kzalloc(sizeof(*im), GFP_KERNEL);
+	im = kzalloc(sizeof(*im), gfp);
 	if (!im)
 		goto out;
 
@@ -1448,6 +1455,12 @@ out:
 	return;
 }
 
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
+{
+	____ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE, gfp);
+}
+EXPORT_SYMBOL(__ip_mc_inc_group);
+
 void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 {
 	__ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
@@ -1634,7 +1647,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
  *	A socket has left a multicast group on device dev
  */
 
-void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp)
 {
 	struct ip_mc_list *i;
 	struct ip_mc_list __rcu **ip;
@@ -1649,7 +1662,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 				ip_mc_hash_remove(in_dev, i);
 				*ip = i->next_rcu;
 				in_dev->mc_count--;
-				igmp_group_dropped(i);
+				__igmp_group_dropped(i, gfp);
 				ip_mc_clear_src(i);
 
 				if (!in_dev->dead)
@@ -1662,7 +1675,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 		}
 	}
 }
-EXPORT_SYMBOL(ip_mc_dec_group);
+EXPORT_SYMBOL(__ip_mc_dec_group);
 
 /* Device changing type */
 
-- 
cgit v1.2.3


From 5f3d544f1671d214cd26e45bda326f921455256e Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Fri, 1 Feb 2019 22:45:17 -0500
Subject: audit: remove audit_context when CONFIG_ AUDIT and not AUDITSYSCALL

Remove audit_context from struct task_struct and struct audit_buffer
when CONFIG_AUDIT is enabled but CONFIG_AUDITSYSCALL is not.

Also, audit_log_name() (and supporting inode and fcaps functions) should
have been put back in auditsc.c when soft and hard link logging was
normalized since it is only used by syscall auditing.

See github issue https://github.com/linux-audit/audit-kernel/issues/105

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/sched.h |   4 +-
 kernel/audit.c        | 157 -------------------------------------------------
 kernel/audit.h        |   9 ---
 kernel/auditsc.c      | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 161 insertions(+), 167 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9788bb122c5..765119df759a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -885,8 +885,10 @@ struct task_struct {
 
 	struct callback_head		*task_works;
 
-	struct audit_context		*audit_context;
 #ifdef CONFIG_AUDIT
+#ifdef CONFIG_AUDITSYSCALL
+	struct audit_context		*audit_context;
+#endif
 	kuid_t				loginuid;
 	unsigned int			sessionid;
 #endif
diff --git a/kernel/audit.c b/kernel/audit.c
index b7177a8def2e..c89ea48c70a6 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2067,163 +2067,6 @@ void audit_log_key(struct audit_buffer *ab, char *key)
 		audit_log_format(ab, "(null)");
 }
 
-void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
-{
-	int i;
-
-	if (cap_isclear(*cap)) {
-		audit_log_format(ab, " %s=0", prefix);
-		return;
-	}
-	audit_log_format(ab, " %s=", prefix);
-	CAP_FOR_EACH_U32(i)
-		audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
-}
-
-static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
-{
-	if (name->fcap_ver == -1) {
-		audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?");
-		return;
-	}
-	audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
-	audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
-	audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d",
-			 name->fcap.fE, name->fcap_ver,
-			 from_kuid(&init_user_ns, name->fcap.rootid));
-}
-
-static inline int audit_copy_fcaps(struct audit_names *name,
-				   const struct dentry *dentry)
-{
-	struct cpu_vfs_cap_data caps;
-	int rc;
-
-	if (!dentry)
-		return 0;
-
-	rc = get_vfs_caps_from_disk(dentry, &caps);
-	if (rc)
-		return rc;
-
-	name->fcap.permitted = caps.permitted;
-	name->fcap.inheritable = caps.inheritable;
-	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
-	name->fcap.rootid = caps.rootid;
-	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
-				VFS_CAP_REVISION_SHIFT;
-
-	return 0;
-}
-
-/* Copy inode data into an audit_names. */
-void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
-		      struct inode *inode, unsigned int flags)
-{
-	name->ino   = inode->i_ino;
-	name->dev   = inode->i_sb->s_dev;
-	name->mode  = inode->i_mode;
-	name->uid   = inode->i_uid;
-	name->gid   = inode->i_gid;
-	name->rdev  = inode->i_rdev;
-	security_inode_getsecid(inode, &name->osid);
-	if (flags & AUDIT_INODE_NOEVAL) {
-		name->fcap_ver = -1;
-		return;
-	}
-	audit_copy_fcaps(name, dentry);
-}
-
-/**
- * audit_log_name - produce AUDIT_PATH record from struct audit_names
- * @context: audit_context for the task
- * @n: audit_names structure with reportable details
- * @path: optional path to report instead of audit_names->name
- * @record_num: record number to report when handling a list of names
- * @call_panic: optional pointer to int that will be updated if secid fails
- */
-void audit_log_name(struct audit_context *context, struct audit_names *n,
-		    const struct path *path, int record_num, int *call_panic)
-{
-	struct audit_buffer *ab;
-	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
-	if (!ab)
-		return;
-
-	audit_log_format(ab, "item=%d", record_num);
-
-	if (path)
-		audit_log_d_path(ab, " name=", path);
-	else if (n->name) {
-		switch (n->name_len) {
-		case AUDIT_NAME_FULL:
-			/* log the full path */
-			audit_log_format(ab, " name=");
-			audit_log_untrustedstring(ab, n->name->name);
-			break;
-		case 0:
-			/* name was specified as a relative path and the
-			 * directory component is the cwd */
-			audit_log_d_path(ab, " name=", &context->pwd);
-			break;
-		default:
-			/* log the name's directory component */
-			audit_log_format(ab, " name=");
-			audit_log_n_untrustedstring(ab, n->name->name,
-						    n->name_len);
-		}
-	} else
-		audit_log_format(ab, " name=(null)");
-
-	if (n->ino != AUDIT_INO_UNSET)
-		audit_log_format(ab, " inode=%lu"
-				 " dev=%02x:%02x mode=%#ho"
-				 " ouid=%u ogid=%u rdev=%02x:%02x",
-				 n->ino,
-				 MAJOR(n->dev),
-				 MINOR(n->dev),
-				 n->mode,
-				 from_kuid(&init_user_ns, n->uid),
-				 from_kgid(&init_user_ns, n->gid),
-				 MAJOR(n->rdev),
-				 MINOR(n->rdev));
-	if (n->osid != 0) {
-		char *ctx = NULL;
-		u32 len;
-		if (security_secid_to_secctx(
-			n->osid, &ctx, &len)) {
-			audit_log_format(ab, " osid=%u", n->osid);
-			if (call_panic)
-				*call_panic = 2;
-		} else {
-			audit_log_format(ab, " obj=%s", ctx);
-			security_release_secctx(ctx, len);
-		}
-	}
-
-	/* log the audit_names record type */
-	switch(n->type) {
-	case AUDIT_TYPE_NORMAL:
-		audit_log_format(ab, " nametype=NORMAL");
-		break;
-	case AUDIT_TYPE_PARENT:
-		audit_log_format(ab, " nametype=PARENT");
-		break;
-	case AUDIT_TYPE_CHILD_DELETE:
-		audit_log_format(ab, " nametype=DELETE");
-		break;
-	case AUDIT_TYPE_CHILD_CREATE:
-		audit_log_format(ab, " nametype=CREATE");
-		break;
-	default:
-		audit_log_format(ab, " nametype=UNKNOWN");
-		break;
-	}
-
-	audit_log_fcaps(ab, n);
-	audit_log_end(ab);
-}
-
 int audit_log_task_context(struct audit_buffer *ab)
 {
 	char *ctx = NULL;
diff --git a/kernel/audit.h b/kernel/audit.h
index 002f0f7ba732..82734f438ddd 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -213,15 +213,6 @@ extern bool audit_ever_enabled;
 
 extern void audit_log_session_info(struct audit_buffer *ab);
 
-extern void audit_copy_inode(struct audit_names *name,
-			     const struct dentry *dentry,
-			     struct inode *inode, unsigned int flags);
-extern void audit_log_cap(struct audit_buffer *ab, char *prefix,
-			  kernel_cap_t *cap);
-extern void audit_log_name(struct audit_context *context,
-			   struct audit_names *n, const struct path *path,
-			   int record_num, int *call_panic);
-
 extern int auditd_test_task(struct task_struct *task);
 
 #define AUDIT_INODE_BUCKETS	32
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7d37cb1e4aef..d1eab1d4a930 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1139,6 +1139,32 @@ out:
 	kfree(buf_head);
 }
 
+void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap)
+{
+	int i;
+
+	if (cap_isclear(*cap)) {
+		audit_log_format(ab, " %s=0", prefix);
+		return;
+	}
+	audit_log_format(ab, " %s=", prefix);
+	CAP_FOR_EACH_U32(i)
+		audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]);
+}
+
+static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name)
+{
+	if (name->fcap_ver == -1) {
+		audit_log_format(ab, " cap_fe=? cap_fver=? cap_fp=? cap_fi=?");
+		return;
+	}
+	audit_log_cap(ab, "cap_fp", &name->fcap.permitted);
+	audit_log_cap(ab, "cap_fi", &name->fcap.inheritable);
+	audit_log_format(ab, " cap_fe=%d cap_fver=%x cap_frootid=%d",
+			 name->fcap.fE, name->fcap_ver,
+			 from_kuid(&init_user_ns, name->fcap.rootid));
+}
+
 static void show_special(struct audit_context *context, int *call_panic)
 {
 	struct audit_buffer *ab;
@@ -1261,6 +1287,97 @@ static inline int audit_proctitle_rtrim(char *proctitle, int len)
 	return len;
 }
 
+/*
+ * audit_log_name - produce AUDIT_PATH record from struct audit_names
+ * @context: audit_context for the task
+ * @n: audit_names structure with reportable details
+ * @path: optional path to report instead of audit_names->name
+ * @record_num: record number to report when handling a list of names
+ * @call_panic: optional pointer to int that will be updated if secid fails
+ */
+static void audit_log_name(struct audit_context *context, struct audit_names *n,
+		    const struct path *path, int record_num, int *call_panic)
+{
+	struct audit_buffer *ab;
+
+	ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+	if (!ab)
+		return;
+
+	audit_log_format(ab, "item=%d", record_num);
+
+	if (path)
+		audit_log_d_path(ab, " name=", path);
+	else if (n->name) {
+		switch (n->name_len) {
+		case AUDIT_NAME_FULL:
+			/* log the full path */
+			audit_log_format(ab, " name=");
+			audit_log_untrustedstring(ab, n->name->name);
+			break;
+		case 0:
+			/* name was specified as a relative path and the
+			 * directory component is the cwd
+			 */
+			audit_log_d_path(ab, " name=", &context->pwd);
+			break;
+		default:
+			/* log the name's directory component */
+			audit_log_format(ab, " name=");
+			audit_log_n_untrustedstring(ab, n->name->name,
+						    n->name_len);
+		}
+	} else
+		audit_log_format(ab, " name=(null)");
+
+	if (n->ino != AUDIT_INO_UNSET)
+		audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
+				 n->ino,
+				 MAJOR(n->dev),
+				 MINOR(n->dev),
+				 n->mode,
+				 from_kuid(&init_user_ns, n->uid),
+				 from_kgid(&init_user_ns, n->gid),
+				 MAJOR(n->rdev),
+				 MINOR(n->rdev));
+	if (n->osid != 0) {
+		char *ctx = NULL;
+		u32 len;
+
+		if (security_secid_to_secctx(
+			n->osid, &ctx, &len)) {
+			audit_log_format(ab, " osid=%u", n->osid);
+			if (call_panic)
+				*call_panic = 2;
+		} else {
+			audit_log_format(ab, " obj=%s", ctx);
+			security_release_secctx(ctx, len);
+		}
+	}
+
+	/* log the audit_names record type */
+	switch (n->type) {
+	case AUDIT_TYPE_NORMAL:
+		audit_log_format(ab, " nametype=NORMAL");
+		break;
+	case AUDIT_TYPE_PARENT:
+		audit_log_format(ab, " nametype=PARENT");
+		break;
+	case AUDIT_TYPE_CHILD_DELETE:
+		audit_log_format(ab, " nametype=DELETE");
+		break;
+	case AUDIT_TYPE_CHILD_CREATE:
+		audit_log_format(ab, " nametype=CREATE");
+		break;
+	default:
+		audit_log_format(ab, " nametype=UNKNOWN");
+		break;
+	}
+
+	audit_log_fcaps(ab, n);
+	audit_log_end(ab);
+}
+
 static void audit_log_proctitle(void)
 {
 	int res;
@@ -1756,6 +1873,47 @@ void __audit_getname(struct filename *name)
 		get_fs_pwd(current->fs, &context->pwd);
 }
 
+static inline int audit_copy_fcaps(struct audit_names *name,
+				   const struct dentry *dentry)
+{
+	struct cpu_vfs_cap_data caps;
+	int rc;
+
+	if (!dentry)
+		return 0;
+
+	rc = get_vfs_caps_from_disk(dentry, &caps);
+	if (rc)
+		return rc;
+
+	name->fcap.permitted = caps.permitted;
+	name->fcap.inheritable = caps.inheritable;
+	name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE);
+	name->fcap.rootid = caps.rootid;
+	name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >>
+				VFS_CAP_REVISION_SHIFT;
+
+	return 0;
+}
+
+/* Copy inode data into an audit_names. */
+void audit_copy_inode(struct audit_names *name, const struct dentry *dentry,
+		      struct inode *inode, unsigned int flags)
+{
+	name->ino   = inode->i_ino;
+	name->dev   = inode->i_sb->s_dev;
+	name->mode  = inode->i_mode;
+	name->uid   = inode->i_uid;
+	name->gid   = inode->i_gid;
+	name->rdev  = inode->i_rdev;
+	security_inode_getsecid(inode, &name->osid);
+	if (flags & AUDIT_INODE_NOEVAL) {
+		name->fcap_ver = -1;
+		return;
+	}
+	audit_copy_fcaps(name, dentry);
+}
+
 /**
  * __audit_inode - store the inode and device from a lookup
  * @name: name being audited
-- 
cgit v1.2.3


From ce3fdb697f684b0c018cda8af91f953b7936a9c2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 2 Feb 2019 19:47:25 -0800
Subject: netdevice.h: Add __cold to netdev_<level> logging functions

Add __cold to the netdev_<level> logging functions similar to
the use of __cold in the generic printk function.

Using __cold moves all the netdev_<level> logging functions
out-of-line possibly improving code locality and runtime
performance.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e675ef97a426..ba57d0ba425e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4663,22 +4663,22 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
 	return " (unknown)";
 }
 
-__printf(3, 4)
+__printf(3, 4) __cold
 void netdev_printk(const char *level, const struct net_device *dev,
 		   const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_emerg(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_alert(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_crit(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_err(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_warn(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_notice(const struct net_device *dev, const char *format, ...);
-__printf(2, 3)
+__printf(2, 3) __cold
 void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define netdev_level_once(level, dev, fmt, ...)			\
-- 
cgit v1.2.3


From 494c704f9af0a0cddf593b381ea44320888733e6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 2 Feb 2019 10:41:13 +0100
Subject: efi: Use 32-bit alignment for efi_guid_t

The UEFI spec and EDK2 reference implementation both define EFI_GUID as
struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment
is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM),
this means that firmware services invoked by the kernel may assume that
efi_guid_t* arguments are 32-bit aligned, and use memory accessors that
do not tolerate misalignment. So let's set the minimum alignment to 32 bits.

Note that the UEFI spec as well as some comments in the EDK2 code base
suggest that EFI_GUID should be 64-bit aligned, but this appears to be
a mistake, given that no code seems to exist that actually enforces that
or relies on it.

Reported-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Leif Lindholm <leif.lindholm@linaro.org>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Alexander Graf <agraf@suse.de>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Jeffrey Hugo <jhugo@codeaurora.org>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Jones <pjones@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190202094119.13230-5-ard.biesheuvel@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/efi.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45ff763fba76..be08518c2553 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -48,7 +48,20 @@ typedef u16 efi_char16_t;		/* UNICODE character */
 typedef u64 efi_physical_addr_t;
 typedef void *efi_handle_t;
 
-typedef guid_t efi_guid_t;
+/*
+ * The UEFI spec and EDK2 reference implementation both define EFI_GUID as
+ * struct { u32 a; u16; b; u16 c; u8 d[8]; }; and so the implied alignment
+ * is 32 bits not 8 bits like our guid_t. In some cases (i.e., on 32-bit ARM),
+ * this means that firmware services invoked by the kernel may assume that
+ * efi_guid_t* arguments are 32-bit aligned, and use memory accessors that
+ * do not tolerate misalignment. So let's set the minimum alignment to 32 bits.
+ *
+ * Note that the UEFI spec as well as some comments in the EDK2 code base
+ * suggest that EFI_GUID should be 64-bit aligned, but this appears to be
+ * a mistake, given that no code seems to exist that actually enforces that
+ * or relies on it.
+ */
+typedef guid_t efi_guid_t __aligned(__alignof__(u32));
 
 #define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \
 	GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
-- 
cgit v1.2.3


From 8c94abbbe1ba24961278055434504b7dc3595415 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Mon, 28 Jan 2019 14:27:26 +0200
Subject: perf: Convert perf_event_context.refcount to refcount_t

atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable perf_event_context.refcount is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts. Please check Documentation/core-api/refcount-vs-atomic.rst
for more information.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.
Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the perf_event_context.refcount it might make a difference
in following places:

 - get_ctx(), perf_event_ctx_lock_nested(), perf_lock_task_context()
   and __perf_event_ctx_lock_double(): increment in
   refcount_inc_not_zero() only guarantees control dependency
   on success vs. fully ordered atomic counterpart
 - put_ctx(): decrement in refcount_dec_and_test() provides
   RELEASE ordering and ACQUIRE ordering + control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@kernel.org
Cc: namhyung@kernel.org
Link: https://lkml.kernel.org/r/1548678448-24458-2-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  3 ++-
 kernel/events/core.c       | 12 ++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a79e59fc3b7d..6cb5d483ab34 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -54,6 +54,7 @@ struct perf_guest_info_callbacks {
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
 #include <linux/cgroup.h>
+#include <linux/refcount.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -737,7 +738,7 @@ struct perf_event_context {
 	int				nr_stat;
 	int				nr_freq;
 	int				rotate_disable;
-	atomic_t			refcount;
+	refcount_t			refcount;
 	struct task_struct		*task;
 
 	/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5b89de7918d0..677164d54547 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1172,7 +1172,7 @@ static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
 
 static void get_ctx(struct perf_event_context *ctx)
 {
-	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+	refcount_inc(&ctx->refcount);
 }
 
 static void free_ctx(struct rcu_head *head)
@@ -1186,7 +1186,7 @@ static void free_ctx(struct rcu_head *head)
 
 static void put_ctx(struct perf_event_context *ctx)
 {
-	if (atomic_dec_and_test(&ctx->refcount)) {
+	if (refcount_dec_and_test(&ctx->refcount)) {
 		if (ctx->parent_ctx)
 			put_ctx(ctx->parent_ctx);
 		if (ctx->task && ctx->task != TASK_TOMBSTONE)
@@ -1268,7 +1268,7 @@ perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
 again:
 	rcu_read_lock();
 	ctx = READ_ONCE(event->ctx);
-	if (!atomic_inc_not_zero(&ctx->refcount)) {
+	if (!refcount_inc_not_zero(&ctx->refcount)) {
 		rcu_read_unlock();
 		goto again;
 	}
@@ -1401,7 +1401,7 @@ retry:
 		}
 
 		if (ctx->task == TASK_TOMBSTONE ||
-		    !atomic_inc_not_zero(&ctx->refcount)) {
+		    !refcount_inc_not_zero(&ctx->refcount)) {
 			raw_spin_unlock(&ctx->lock);
 			ctx = NULL;
 		} else {
@@ -4057,7 +4057,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->event_list);
 	INIT_LIST_HEAD(&ctx->pinned_active);
 	INIT_LIST_HEAD(&ctx->flexible_active);
-	atomic_set(&ctx->refcount, 1);
+	refcount_set(&ctx->refcount, 1);
 }
 
 static struct perf_event_context *
@@ -10613,7 +10613,7 @@ __perf_event_ctx_lock_double(struct perf_event *group_leader,
 again:
 	rcu_read_lock();
 	gctx = READ_ONCE(group_leader->ctx);
-	if (!atomic_inc_not_zero(&gctx->refcount)) {
+	if (!refcount_inc_not_zero(&gctx->refcount)) {
 		rcu_read_unlock();
 		goto again;
 	}
-- 
cgit v1.2.3


From d036bda7d0e7269c2982eb979acfef855f5d7977 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 18 Jan 2019 14:27:26 +0200
Subject: sched/core: Convert sighand_struct.count to refcount_t

atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable sighand_struct.count is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.

The full comparison can be seen in
https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon
in state to be merged to the documentation tree.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.

Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the sighand_struct.count it might make a difference
in following places:

 - __cleanup_sighand: decrement in refcount_dec_and_test() only
   provides RELEASE ordering and control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: viro@zeniv.linux.org.uk
Link: https://lkml.kernel.org/r/1547814450-18902-2-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                    | 4 ++--
 fs/proc/task_nommu.c         | 2 +-
 include/linux/sched/signal.h | 3 ++-
 kernel/fork.c                | 8 ++++----
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index fb72d36f7823..966cd98a2ce2 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1189,7 +1189,7 @@ no_thread_group:
 	flush_itimer_signals();
 #endif
 
-	if (atomic_read(&oldsighand->count) != 1) {
+	if (refcount_read(&oldsighand->count) != 1) {
 		struct sighand_struct *newsighand;
 		/*
 		 * This ->sighand is shared with the CLONE_SIGHAND
@@ -1199,7 +1199,7 @@ no_thread_group:
 		if (!newsighand)
 			return -ENOMEM;
 
-		atomic_set(&newsighand->count, 1);
+		refcount_set(&newsighand->count, 1);
 		memcpy(newsighand->action, oldsighand->action,
 		       sizeof(newsighand->action));
 
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 0b63d68dedb2..f912872fbf91 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -64,7 +64,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	else
 		bytes += kobjsize(current->files);
 
-	if (current->sighand && atomic_read(&current->sighand->count) > 1)
+	if (current->sighand && refcount_read(&current->sighand->count) > 1)
 		sbytes += kobjsize(current->sighand);
 	else
 		bytes += kobjsize(current->sighand);
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 13789d10a50e..37eeb1a28eba 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -8,13 +8,14 @@
 #include <linux/sched/jobctl.h>
 #include <linux/sched/task.h>
 #include <linux/cred.h>
+#include <linux/refcount.h>
 
 /*
  * Types defining task->signal and task->sighand and APIs using them:
  */
 
 struct sighand_struct {
-	atomic_t		count;
+	refcount_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
 	wait_queue_head_t	signalfd_wqh;
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e6f0e0..370856d4c0b3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1463,7 +1463,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 	struct sighand_struct *sig;
 
 	if (clone_flags & CLONE_SIGHAND) {
-		atomic_inc(&current->sighand->count);
+		refcount_inc(&current->sighand->count);
 		return 0;
 	}
 	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
@@ -1471,7 +1471,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 	if (!sig)
 		return -ENOMEM;
 
-	atomic_set(&sig->count, 1);
+	refcount_set(&sig->count, 1);
 	spin_lock_irq(&current->sighand->siglock);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 	spin_unlock_irq(&current->sighand->siglock);
@@ -1480,7 +1480,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-	if (atomic_dec_and_test(&sighand->count)) {
+	if (refcount_dec_and_test(&sighand->count)) {
 		signalfd_cleanup(sighand);
 		/*
 		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it
@@ -2439,7 +2439,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 			return -EINVAL;
 	}
 	if (unshare_flags & (CLONE_SIGHAND | CLONE_VM)) {
-		if (atomic_read(&current->sighand->count) > 1)
+		if (refcount_read(&current->sighand->count) > 1)
 			return -EINVAL;
 	}
 	if (unshare_flags & CLONE_VM) {
-- 
cgit v1.2.3


From 60d4de3ff7f775509deba94b3db3c1abe55bf7a5 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 18 Jan 2019 14:27:27 +0200
Subject: sched/core: Convert signal_struct.sigcnt to refcount_t

atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable signal_struct.sigcnt is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.

The full comparison can be seen in
https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon
in state to be merged to the documentation tree.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.

Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the signal_struct.sigcnt it might make a difference
in following places:

 - put_signal_struct(): decrement in refcount_dec_and_test() only
   provides RELEASE ordering and control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: viro@zeniv.linux.org.uk
Link: https://lkml.kernel.org/r/1547814450-18902-3-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/signal.h | 2 +-
 init/init_task.c             | 2 +-
 kernel/fork.c                | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 37eeb1a28eba..ae5655197698 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -83,7 +83,7 @@ struct multiprocess_signals {
  * the locking of signal_struct.
  */
 struct signal_struct {
-	atomic_t		sigcnt;
+	refcount_t		sigcnt;
 	atomic_t		live;
 	int			nr_threads;
 	struct list_head	thread_head;
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3be4d7c..9aa3ebc74970 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -44,7 +44,7 @@ static struct signal_struct init_signals = {
 };
 
 static struct sighand_struct init_sighand = {
-	.count		= ATOMIC_INIT(1),
+	.count		= REFCOUNT_INIT(1),
 	.action		= { { { .sa_handler = SIG_DFL, } }, },
 	.siglock	= __SPIN_LOCK_UNLOCKED(init_sighand.siglock),
 	.signalfd_wqh	= __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
diff --git a/kernel/fork.c b/kernel/fork.c
index 370856d4c0b3..935a42d5f8ff 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -710,7 +710,7 @@ static inline void free_signal_struct(struct signal_struct *sig)
 
 static inline void put_signal_struct(struct signal_struct *sig)
 {
-	if (atomic_dec_and_test(&sig->sigcnt))
+	if (refcount_dec_and_test(&sig->sigcnt))
 		free_signal_struct(sig);
 }
 
@@ -1527,7 +1527,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 	sig->nr_threads = 1;
 	atomic_set(&sig->live, 1);
-	atomic_set(&sig->sigcnt, 1);
+	refcount_set(&sig->sigcnt, 1);
 
 	/* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */
 	sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node);
@@ -2082,7 +2082,7 @@ static __latent_entropy struct task_struct *copy_process(
 		} else {
 			current->signal->nr_threads++;
 			atomic_inc(&current->signal->live);
-			atomic_inc(&current->signal->sigcnt);
+			refcount_inc(&current->signal->sigcnt);
 			task_join_group_stop(p);
 			list_add_tail_rcu(&p->thread_group,
 					  &p->group_leader->thread_group);
-- 
cgit v1.2.3


From ec1d281923cf81cc660343d0cb8ffc837ffb991d Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 18 Jan 2019 14:27:29 +0200
Subject: sched/core: Convert task_struct.usage to refcount_t

atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable task_struct.usage is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.

The full comparison can be seen in
https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon
in state to be merged to the documentation tree.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.

Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the task_struct.usage it might make a difference
in following places:

 - put_task_struct(): decrement in refcount_dec_and_test() only
   provides RELEASE ordering and control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: viro@zeniv.linux.org.uk
Link: https://lkml.kernel.org/r/1547814450-18902-5-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 3 ++-
 include/linux/sched/task.h | 4 ++--
 init/init_task.c           | 2 +-
 kernel/fork.c              | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e2bba022827d..9d14d6864ca6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -21,6 +21,7 @@
 #include <linux/seccomp.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
+#include <linux/refcount.h>
 #include <linux/resource.h>
 #include <linux/latencytop.h>
 #include <linux/sched/prio.h>
@@ -607,7 +608,7 @@ struct task_struct {
 	randomized_struct_fields_start
 
 	void				*stack;
-	atomic_t			usage;
+	refcount_t			usage;
 	/* Per task flags (PF_*), defined further below: */
 	unsigned int			flags;
 	unsigned int			ptrace;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 44c6f15800ff..2e97a2227045 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -88,13 +88,13 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0)
 
 extern void __put_task_struct(struct task_struct *t);
 
 static inline void put_task_struct(struct task_struct *t)
 {
-	if (atomic_dec_and_test(&t->usage))
+	if (refcount_dec_and_test(&t->usage))
 		__put_task_struct(t);
 }
 
diff --git a/init/init_task.c b/init/init_task.c
index 9aa3ebc74970..aca34c89529f 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -65,7 +65,7 @@ struct task_struct init_task
 #endif
 	.state		= 0,
 	.stack		= init_stack,
-	.usage		= ATOMIC_INIT(2),
+	.usage		= REFCOUNT_INIT(2),
 	.flags		= PF_KTHREAD,
 	.prio		= MAX_PRIO - 20,
 	.static_prio	= MAX_PRIO - 20,
diff --git a/kernel/fork.c b/kernel/fork.c
index 935a42d5f8ff..3f7e192e29f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -717,7 +717,7 @@ static inline void put_signal_struct(struct signal_struct *sig)
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
-	WARN_ON(atomic_read(&tsk->usage));
+	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
 	cgroup_free(tsk);
@@ -896,7 +896,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	 * One for us, one for whoever does the "release_task()" (usually
 	 * parent)
 	 */
-	atomic_set(&tsk->usage, 2);
+	refcount_set(&tsk->usage, 2);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
 #endif
-- 
cgit v1.2.3


From f0b89d3958d73cd0785ec381f0ddf8efb6f183d8 Mon Sep 17 00:00:00 2001
From: Elena Reshetova <elena.reshetova@intel.com>
Date: Fri, 18 Jan 2019 14:27:30 +0200
Subject: sched/core: Convert task_struct.stack_refcount to refcount_t

atomic_t variables are currently used to implement reference
counters with the following properties:

 - counter is initialized to 1 using atomic_set()
 - a resource is freed upon counter reaching zero
 - once counter reaches zero, its further
   increments aren't allowed
 - counter schema uses basic atomic operations
   (set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable task_struct.stack_refcount is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

** Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.

The full comparison can be seen in
https://lkml.org/lkml/2017/11/15/57 and it is hopefully soon
in state to be merged to the documentation tree.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.

Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the task_struct.stack_refcount it might make a difference
in following places:

 - try_get_task_stack(): increment in refcount_inc_not_zero() only
   guarantees control dependency on success vs. fully ordered
   atomic counterpart
 - put_task_stack(): decrement in refcount_dec_and_test() only
   provides RELEASE ordering and control dependency on success
   vs. fully ordered atomic counterpart

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: David Windsor <dwindsor@gmail.com>
Reviewed-by: Hans Liljestrand <ishkamiel@gmail.com>
Reviewed-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: viro@zeniv.linux.org.uk
Link: https://lkml.kernel.org/r/1547814450-18902-6-git-send-email-elena.reshetova@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h        | 1 +
 include/linux/sched.h            | 2 +-
 include/linux/sched/task_stack.h | 2 +-
 init/init_task.c                 | 2 +-
 kernel/fork.c                    | 6 +++---
 5 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index a7083a45a26c..6049baa5b8bc 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -13,6 +13,7 @@
 #include <linux/securebits.h>
 #include <linux/seqlock.h>
 #include <linux/rbtree.h>
+#include <linux/refcount.h>
 #include <linux/sched/autogroup.h>
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9d14d6864ca6..628bf13cb5a5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1194,7 +1194,7 @@ struct task_struct {
 #endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	/* A live task holds one reference: */
-	atomic_t			stack_refcount;
+	refcount_t			stack_refcount;
 #endif
 #ifdef CONFIG_LIVEPATCH
 	int patch_state;
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 6a841929073f..2413427e439c 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -61,7 +61,7 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 static inline void *try_get_task_stack(struct task_struct *tsk)
 {
-	return atomic_inc_not_zero(&tsk->stack_refcount) ?
+	return refcount_inc_not_zero(&tsk->stack_refcount) ?
 		task_stack_page(tsk) : NULL;
 }
 
diff --git a/init/init_task.c b/init/init_task.c
index aca34c89529f..46dbf546264d 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -61,7 +61,7 @@ struct task_struct init_task
 = {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	.thread_info	= INIT_THREAD_INFO(init_task),
-	.stack_refcount	= ATOMIC_INIT(1),
+	.stack_refcount	= REFCOUNT_INIT(1),
 #endif
 	.state		= 0,
 	.stack		= init_stack,
diff --git a/kernel/fork.c b/kernel/fork.c
index 3f7e192e29f2..77059b211608 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -429,7 +429,7 @@ static void release_task_stack(struct task_struct *tsk)
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 void put_task_stack(struct task_struct *tsk)
 {
-	if (atomic_dec_and_test(&tsk->stack_refcount))
+	if (refcount_dec_and_test(&tsk->stack_refcount))
 		release_task_stack(tsk);
 }
 #endif
@@ -447,7 +447,7 @@ void free_task(struct task_struct *tsk)
 	 * If the task had a separate stack allocation, it should be gone
 	 * by now.
 	 */
-	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+	WARN_ON_ONCE(refcount_read(&tsk->stack_refcount) != 0);
 #endif
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
@@ -867,7 +867,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	tsk->stack_vm_area = stack_vm_area;
 #endif
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	atomic_set(&tsk->stack_refcount, 1);
+	refcount_set(&tsk->stack_refcount, 1);
 #endif
 
 	if (err)
-- 
cgit v1.2.3


From 07879c6a3740fbbf3c8891a0ab484c20a12794d8 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Tue, 18 Dec 2018 11:53:52 -0800
Subject: sched/wake_q: Reduce reference counting for special users

Some users, specifically futexes and rwsems, required fixes
that allowed the callers to be safe when wakeups occur before
they are expected by wake_up_q(). Such scenarios also play
games and rely on reference counting, and until now were
pivoting on wake_q doing it. With the wake_q_add() call being
moved down, this can no longer be the case. As such we end up
with a a double task refcounting overhead; and these callers
care enough about this (being rather core-ish).

This patch introduces a wake_q_add_safe() call that serves
for callers that have already done refcounting and therefore the
task is 'safe' from wake_q point of view (int that it requires
reference throughout the entire queue/>wakeup cycle). In the one
case it has internal reference counting, in the other case it
consumes the reference counting.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Xie Yongji <xieyongji@baidu.com>
Cc: Yongji Xie <elohimes@gmail.com>
Cc: andrea.parri@amarulasolutions.com
Cc: lilin24@baidu.com
Cc: liuqi16@baidu.com
Cc: nixun@baidu.com
Cc: yuanlinsi01@baidu.com
Cc: zhangyu31@baidu.com
Link: https://lkml.kernel.org/r/20181218195352.7orq3upiwfdbrdne@linux-r8p5
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/wake_q.h |  4 +--
 kernel/futex.c               |  3 +--
 kernel/locking/rwsem-xadd.c  |  4 +--
 kernel/sched/core.c          | 60 ++++++++++++++++++++++++++++++++------------
 4 files changed, 48 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/wake_q.h b/include/linux/sched/wake_q.h
index 545f37138057..ad826d2a4557 100644
--- a/include/linux/sched/wake_q.h
+++ b/include/linux/sched/wake_q.h
@@ -51,8 +51,8 @@ static inline void wake_q_init(struct wake_q_head *head)
 	head->lastp = &head->first;
 }
 
-extern void wake_q_add(struct wake_q_head *head,
-		       struct task_struct *task);
+extern void wake_q_add(struct wake_q_head *head, struct task_struct *task);
+extern void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task);
 extern void wake_up_q(struct wake_q_head *head);
 
 #endif /* _LINUX_SCHED_WAKE_Q_H */
diff --git a/kernel/futex.c b/kernel/futex.c
index 69e619baf709..2abe1a0b3062 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1463,8 +1463,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
 	 * Queue the task for later wakeup for after we've released
 	 * the hb->lock. wake_q_add() grabs reference to p.
 	 */
-	wake_q_add(wake_q, p);
-	put_task_struct(p);
+	wake_q_add_safe(wake_q, p);
 }
 
 /*
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 50d9af615dc4..fbe96341beee 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -211,9 +211,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * Ensure issuing the wakeup (either by us or someone else)
 		 * after setting the reader waiter to nil.
 		 */
-		wake_q_add(wake_q, tsk);
-		/* wake_q_add() already take the task ref */
-		put_task_struct(tsk);
+		wake_q_add_safe(wake_q, tsk);
 	}
 
 	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3c8b4dba3d2d..64ceaa5158c5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -396,19 +396,7 @@ static bool set_nr_if_polling(struct task_struct *p)
 #endif
 #endif
 
-/**
- * wake_q_add() - queue a wakeup for 'later' waking.
- * @head: the wake_q_head to add @task to
- * @task: the task to queue for 'later' wakeup
- *
- * Queue a task for later wakeup, most likely by the wake_up_q() call in the
- * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
- * instantly.
- *
- * This function must be used as-if it were wake_up_process(); IOW the task
- * must be ready to be woken at this location.
- */
-void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+static bool __wake_q_add(struct wake_q_head *head, struct task_struct *task)
 {
 	struct wake_q_node *node = &task->wake_q;
 
@@ -422,15 +410,55 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	 */
 	smp_mb__before_atomic();
 	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
-		return;
-
-	get_task_struct(task);
+		return false;
 
 	/*
 	 * The head is context local, there can be no concurrency.
 	 */
 	*head->lastp = node;
 	head->lastp = &node->next;
+	return true;
+}
+
+/**
+ * wake_q_add() - queue a wakeup for 'later' waking.
+ * @head: the wake_q_head to add @task to
+ * @task: the task to queue for 'later' wakeup
+ *
+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
+ * instantly.
+ *
+ * This function must be used as-if it were wake_up_process(); IOW the task
+ * must be ready to be woken at this location.
+ */
+void wake_q_add(struct wake_q_head *head, struct task_struct *task)
+{
+	if (__wake_q_add(head, task))
+		get_task_struct(task);
+}
+
+/**
+ * wake_q_add_safe() - safely queue a wakeup for 'later' waking.
+ * @head: the wake_q_head to add @task to
+ * @task: the task to queue for 'later' wakeup
+ *
+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the
+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come
+ * instantly.
+ *
+ * This function must be used as-if it were wake_up_process(); IOW the task
+ * must be ready to be woken at this location.
+ *
+ * This function is essentially a task-safe equivalent to wake_q_add(). Callers
+ * that already hold reference to @task can call the 'safe' version and trust
+ * wake_q to do the right thing depending whether or not the @task is already
+ * queued for wakeup.
+ */
+void wake_q_add_safe(struct wake_q_head *head, struct task_struct *task)
+{
+	if (!__wake_q_add(head, task))
+		put_task_struct(task);
 }
 
 void wake_up_q(struct wake_q_head *head)
-- 
cgit v1.2.3


From 23127296889fe84b0762b191b5d041e8ba6f2599 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Wed, 23 Jan 2019 16:26:53 +0100
Subject: sched/fair: Update scale invariance of PELT

The current implementation of load tracking invariance scales the
contribution with current frequency and uarch performance (only for
utilization) of the CPU. One main result of this formula is that the
figures are capped by current capacity of CPU. Another one is that the
load_avg is not invariant because not scaled with uarch.

The util_avg of a periodic task that runs r time slots every p time slots
varies in the range :

    U * (1-y^r)/(1-y^p) * y^i < Utilization < U * (1-y^r)/(1-y^p)

with U is the max util_avg value = SCHED_CAPACITY_SCALE

At a lower capacity, the range becomes:

    U * C * (1-y^r')/(1-y^p) * y^i' < Utilization <  U * C * (1-y^r')/(1-y^p)

with C reflecting the compute capacity ratio between current capacity and
max capacity.

so C tries to compensate changes in (1-y^r') but it can't be accurate.

Instead of scaling the contribution value of PELT algo, we should scale the
running time. The PELT signal aims to track the amount of computation of
tasks and/or rq so it seems more correct to scale the running time to
reflect the effective amount of computation done since the last update.

In order to be fully invariant, we need to apply the same amount of
running time and idle time whatever the current capacity. Because running
at lower capacity implies that the task will run longer, we have to ensure
that the same amount of idle time will be applied when system becomes idle
and no idle time has been "stolen". But reaching the maximum utilization
value (SCHED_CAPACITY_SCALE) means that the task is seen as an
always-running task whatever the capacity of the CPU (even at max compute
capacity). In this case, we can discard this "stolen" idle times which
becomes meaningless.

In order to achieve this time scaling, a new clock_pelt is created per rq.
The increase of this clock scales with current capacity when something
is running on rq and synchronizes with clock_task when rq is idle. With
this mechanism, we ensure the same running and idle time whatever the
current capacity. This also enables to simplify the pelt algorithm by
removing all references of uarch and frequency and applying the same
contribution to utilization and loads. Furthermore, the scaling is done
only once per update of clock (update_rq_clock_task()) instead of during
each update of sched_entities and cfs/rt/dl_rq of the rq like the current
implementation. This is interesting when cgroup are involved as shown in
the results below:

On a hikey (octo Arm64 platform).
Performance cpufreq governor and only shallowest c-state to remove variance
generated by those power features so we only track the impact of pelt algo.

each test runs 16 times:

	./perf bench sched pipe
	(higher is better)
	kernel	tip/sched/core     + patch
	        ops/seconds        ops/seconds         diff
	cgroup
	root    59652(+/- 0.18%)   59876(+/- 0.24%)    +0.38%
	level1  55608(+/- 0.27%)   55923(+/- 0.24%)    +0.57%
	level2  52115(+/- 0.29%)   52564(+/- 0.22%)    +0.86%

	hackbench -l 1000
	(lower is better)
	kernel	tip/sched/core     + patch
	        duration(sec)      duration(sec)        diff
	cgroup
	root    4.453(+/- 2.37%)   4.383(+/- 2.88%)     -1.57%
	level1  4.859(+/- 8.50%)   4.830(+/- 7.07%)     -0.60%
	level2  5.063(+/- 9.83%)   4.928(+/- 9.66%)     -2.66%

Then, the responsiveness of PELT is improved when CPU is not running at max
capacity with this new algorithm. I have put below some examples of
duration to reach some typical load values according to the capacity of the
CPU with current implementation and with this patch. These values has been
computed based on the geometric series and the half period value:

  Util (%)     max capacity  half capacity(mainline)  half capacity(w/ patch)
  972 (95%)    138ms         not reachable            276ms
  486 (47.5%)  30ms          138ms                     60ms
  256 (25%)    13ms           32ms                     26ms

On my hikey (octo Arm64 platform) with schedutil governor, the time to
reach max OPP when starting from a null utilization, decreases from 223ms
with current scale invariance down to 121ms with the new algorithm.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Morten.Rasmussen@arm.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bsegall@google.com
Cc: dietmar.eggemann@arm.com
Cc: patrick.bellasi@arm.com
Cc: pjt@google.com
Cc: pkondeti@codeaurora.org
Cc: quentin.perret@arm.com
Cc: rjw@rjwysocki.net
Cc: srinivas.pandruvada@linux.intel.com
Cc: thara.gopinath@linaro.org
Link: https://lkml.kernel.org/r/1548257214-13745-3-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  23 +++-------
 kernel/sched/core.c     |   1 +
 kernel/sched/deadline.c |   6 +--
 kernel/sched/fair.c     |  45 ++++++++++---------
 kernel/sched/pelt.c     |  45 ++++++++++---------
 kernel/sched/pelt.h     | 114 ++++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/rt.c       |   6 +--
 kernel/sched/sched.h    |   5 ++-
 8 files changed, 177 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 628bf13cb5a5..351c0fe64c85 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -357,12 +357,6 @@ struct util_est {
  * For cfs_rq, it is the aggregated load_avg of all runnable and
  * blocked sched_entities.
  *
- * load_avg may also take frequency scaling into account:
- *
- *   load_avg = runnable% * scale_load_down(load) * freq%
- *
- * where freq% is the CPU frequency normalized to the highest frequency.
- *
  * [util_avg definition]
  *
  *   util_avg = running% * SCHED_CAPACITY_SCALE
@@ -371,17 +365,14 @@ struct util_est {
  * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
  * and blocked sched_entities.
  *
- * util_avg may also factor frequency scaling and CPU capacity scaling:
- *
- *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
- *
- * where freq% is the same as above, and capacity% is the CPU capacity
- * normalized to the greatest capacity (due to uarch differences, etc).
+ * load_avg and util_avg don't direcly factor frequency scaling and CPU
+ * capacity scaling. The scaling is done through the rq_clock_pelt that
+ * is used for computing those signals (see update_rq_clock_pelt())
  *
- * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
- * themselves are in the range of [0, 1]. To do fixed point arithmetics,
- * we therefore scale them to as large a range as necessary. This is for
- * example reflected by util_avg's SCHED_CAPACITY_SCALE.
+ * N.B., the above ratios (runnable% and running%) themselves are in the
+ * range of [0, 1]. To do fixed point arithmetics, we therefore scale them
+ * to as large a range as necessary. This is for example reflected by
+ * util_avg's SCHED_CAPACITY_SCALE.
  *
  * [Overflow issue]
  *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..32e06704565e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -180,6 +180,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		update_irq_load_avg(rq, irq_delta + steal);
 #endif
+	update_rq_clock_pelt(rq, delta);
 }
 
 void update_rq_clock(struct rq *rq)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fb8b7b5d745d..6a73e41a2016 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1767,7 +1767,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	deadline_queue_push_tasks(rq);
 
 	if (rq->curr->sched_class != &dl_sched_class)
-		update_dl_rq_load_avg(rq_clock_task(rq), rq, 0);
+		update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 
 	return p;
 }
@@ -1776,7 +1776,7 @@ static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
 {
 	update_curr_dl(rq);
 
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_dl_task(rq, p);
 }
@@ -1793,7 +1793,7 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued)
 {
 	update_curr_dl(rq);
 
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 	/*
 	 * Even when we have runtime, update_curr_dl() might have resulted in us
 	 * not being the leftmost task anymore. In that case NEED_RESCHED will
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index da13e834e990..f41f2eec6186 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -673,9 +673,8 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
 
-#ifdef CONFIG_SMP
 #include "pelt.h"
-#include "sched-pelt.h"
+#ifdef CONFIG_SMP
 
 static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
@@ -763,7 +762,7 @@ void post_init_entity_util_avg(struct sched_entity *se)
 			 * such that the next switched_to_fair() has the
 			 * expected state.
 			 */
-			se->avg.last_update_time = cfs_rq_clock_task(cfs_rq);
+			se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq);
 			return;
 		}
 	}
@@ -3109,7 +3108,7 @@ void set_task_rq_fair(struct sched_entity *se,
 	p_last_update_time = prev->avg.last_update_time;
 	n_last_update_time = next->avg.last_update_time;
 #endif
-	__update_load_avg_blocked_se(p_last_update_time, cpu_of(rq_of(prev)), se);
+	__update_load_avg_blocked_se(p_last_update_time, se);
 	se->avg.last_update_time = n_last_update_time;
 }
 
@@ -3244,11 +3243,11 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
 
 	/*
 	 * runnable_sum can't be lower than running_sum
-	 * As running sum is scale with CPU capacity wehreas the runnable sum
-	 * is not we rescale running_sum 1st
+	 * Rescale running sum to be in the same range as runnable sum
+	 * running_sum is in [0 : LOAD_AVG_MAX <<  SCHED_CAPACITY_SHIFT]
+	 * runnable_sum is in [0 : LOAD_AVG_MAX]
 	 */
-	running_sum = se->avg.util_sum /
-		arch_scale_cpu_capacity(NULL, cpu_of(rq_of(cfs_rq)));
+	running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
 	runnable_sum = max(runnable_sum, running_sum);
 
 	load_sum = (s64)se_weight(se) * runnable_sum;
@@ -3351,7 +3350,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
 
 /**
  * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
- * @now: current time, as per cfs_rq_clock_task()
+ * @now: current time, as per cfs_rq_clock_pelt()
  * @cfs_rq: cfs_rq to update
  *
  * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
@@ -3396,7 +3395,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
 		decayed = 1;
 	}
 
-	decayed |= __update_load_avg_cfs_rq(now, cpu_of(rq_of(cfs_rq)), cfs_rq);
+	decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
 
 #ifndef CONFIG_64BIT
 	smp_wmb();
@@ -3486,9 +3485,7 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 /* Update task and its cfs_rq load average */
 static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-	u64 now = cfs_rq_clock_task(cfs_rq);
-	struct rq *rq = rq_of(cfs_rq);
-	int cpu = cpu_of(rq);
+	u64 now = cfs_rq_clock_pelt(cfs_rq);
 	int decayed;
 
 	/*
@@ -3496,7 +3493,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
 	 * track group sched_entity load average for task_h_load calc in migration
 	 */
 	if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
-		__update_load_avg_se(now, cpu, cfs_rq, se);
+		__update_load_avg_se(now, cfs_rq, se);
 
 	decayed  = update_cfs_rq_load_avg(now, cfs_rq);
 	decayed |= propagate_entity_load_avg(se);
@@ -3548,7 +3545,7 @@ void sync_entity_load_avg(struct sched_entity *se)
 	u64 last_update_time;
 
 	last_update_time = cfs_rq_last_update_time(cfs_rq);
-	__update_load_avg_blocked_se(last_update_time, cpu_of(rq_of(cfs_rq)), se);
+	__update_load_avg_blocked_se(last_update_time, se);
 }
 
 /*
@@ -7015,6 +7012,12 @@ idle:
 	if (new_tasks > 0)
 		goto again;
 
+	/*
+	 * rq is about to be idle, check if we need to update the
+	 * lost_idle_time of clock_pelt
+	 */
+	update_idle_rq_clock_pelt(rq);
+
 	return NULL;
 }
 
@@ -7657,7 +7660,7 @@ static void update_blocked_averages(int cpu)
 		if (throttled_hierarchy(cfs_rq))
 			continue;
 
-		if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq))
+		if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
 			update_tg_load_avg(cfs_rq, 0);
 
 		/* Propagate pending load changes to the parent, if any: */
@@ -7671,8 +7674,8 @@ static void update_blocked_averages(int cpu)
 	}
 
 	curr_class = rq->curr->sched_class;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
 	update_irq_load_avg(rq, 0);
 	/* Don't need periodic decay once load/util_avg are null */
 	if (others_have_blocked(rq))
@@ -7742,11 +7745,11 @@ static inline void update_blocked_averages(int cpu)
 
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
-	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
+	update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
 
 	curr_class = rq->curr->sched_class;
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, curr_class == &rt_sched_class);
-	update_dl_rq_load_avg(rq_clock_task(rq), rq, curr_class == &dl_sched_class);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+	update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
 	update_irq_load_avg(rq, 0);
 #ifdef CONFIG_NO_HZ_COMMON
 	rq->last_blocked_load_update_tick = jiffies;
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 90fb5bc12ad4..befce29bd882 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -26,7 +26,6 @@
 
 #include <linux/sched.h>
 #include "sched.h"
-#include "sched-pelt.h"
 #include "pelt.h"
 
 /*
@@ -106,16 +105,12 @@ static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
  *                     n=1
  */
 static __always_inline u32
-accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
+accumulate_sum(u64 delta, struct sched_avg *sa,
 	       unsigned long load, unsigned long runnable, int running)
 {
-	unsigned long scale_freq, scale_cpu;
 	u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
 	u64 periods;
 
-	scale_freq = arch_scale_freq_capacity(cpu);
-	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
-
 	delta += sa->period_contrib;
 	periods = delta / 1024; /* A period is 1024us (~1ms) */
 
@@ -137,13 +132,12 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
 	}
 	sa->period_contrib = delta;
 
-	contrib = cap_scale(contrib, scale_freq);
 	if (load)
 		sa->load_sum += load * contrib;
 	if (runnable)
 		sa->runnable_load_sum += runnable * contrib;
 	if (running)
-		sa->util_sum += contrib * scale_cpu;
+		sa->util_sum += contrib << SCHED_CAPACITY_SHIFT;
 
 	return periods;
 }
@@ -177,7 +171,7 @@ accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
 static __always_inline int
-___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
+___update_load_sum(u64 now, struct sched_avg *sa,
 		  unsigned long load, unsigned long runnable, int running)
 {
 	u64 delta;
@@ -221,7 +215,7 @@ ___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
 	 * Step 1: accumulate *_sum since last_update_time. If we haven't
 	 * crossed period boundaries, finish.
 	 */
-	if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
+	if (!accumulate_sum(delta, sa, load, runnable, running))
 		return 0;
 
 	return 1;
@@ -267,9 +261,9 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna
  *   runnable_load_avg = \Sum se->avg.runable_load_avg
  */
 
-int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
+int __update_load_avg_blocked_se(u64 now, struct sched_entity *se)
 {
-	if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
+	if (___update_load_sum(now, &se->avg, 0, 0, 0)) {
 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
 		return 1;
 	}
@@ -277,9 +271,9 @@ int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
 	return 0;
 }
 
-int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se)
+int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
+	if (___update_load_sum(now, &se->avg, !!se->on_rq, !!se->on_rq,
 				cfs_rq->curr == se)) {
 
 		___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
@@ -290,9 +284,9 @@ int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_e
 	return 0;
 }
 
-int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
+int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq)
 {
-	if (___update_load_sum(now, cpu, &cfs_rq->avg,
+	if (___update_load_sum(now, &cfs_rq->avg,
 				scale_load_down(cfs_rq->load.weight),
 				scale_load_down(cfs_rq->runnable_weight),
 				cfs_rq->curr != NULL)) {
@@ -317,7 +311,7 @@ int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
 
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
 {
-	if (___update_load_sum(now, rq->cpu, &rq->avg_rt,
+	if (___update_load_sum(now, &rq->avg_rt,
 				running,
 				running,
 				running)) {
@@ -340,7 +334,7 @@ int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
 
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 {
-	if (___update_load_sum(now, rq->cpu, &rq->avg_dl,
+	if (___update_load_sum(now, &rq->avg_dl,
 				running,
 				running,
 				running)) {
@@ -365,22 +359,31 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 int update_irq_load_avg(struct rq *rq, u64 running)
 {
 	int ret = 0;
+
+	/*
+	 * We can't use clock_pelt because irq time is not accounted in
+	 * clock_task. Instead we directly scale the running time to
+	 * reflect the real amount of computation
+	 */
+	running = cap_scale(running, arch_scale_freq_capacity(cpu_of(rq)));
+	running = cap_scale(running, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+
 	/*
 	 * We know the time that has been used by interrupt since last update
 	 * but we don't when. Let be pessimistic and assume that interrupt has
 	 * happened just before the update. This is not so far from reality
 	 * because interrupt will most probably wake up task and trig an update
-	 * of rq clock during which the metric si updated.
+	 * of rq clock during which the metric is updated.
 	 * We start to decay with normal context time and then we add the
 	 * interrupt context time.
 	 * We can safely remove running from rq->clock because
 	 * rq->clock += delta with delta >= running
 	 */
-	ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
+	ret = ___update_load_sum(rq->clock - running, &rq->avg_irq,
 				0,
 				0,
 				0);
-	ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
+	ret += ___update_load_sum(rq->clock, &rq->avg_irq,
 				1,
 				1,
 				1);
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index 7e56b489ff32..7489d5f56960 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -1,8 +1,9 @@
 #ifdef CONFIG_SMP
+#include "sched-pelt.h"
 
-int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se);
-int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se);
-int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq);
+int __update_load_avg_blocked_se(u64 now, struct sched_entity *se);
+int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se);
+int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
 
@@ -42,6 +43,101 @@ static inline void cfs_se_util_change(struct sched_avg *avg)
 	WRITE_ONCE(avg->util_est.enqueued, enqueued);
 }
 
+/*
+ * The clock_pelt scales the time to reflect the effective amount of
+ * computation done during the running delta time but then sync back to
+ * clock_task when rq is idle.
+ *
+ *
+ * absolute time   | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16
+ * @ max capacity  ------******---------------******---------------
+ * @ half capacity ------************---------************---------
+ * clock pelt      | 1| 2|    3|    4| 7| 8| 9|   10|   11|14|15|16
+ *
+ */
+static inline void update_rq_clock_pelt(struct rq *rq, s64 delta)
+{
+	if (unlikely(is_idle_task(rq->curr))) {
+		/* The rq is idle, we can sync to clock_task */
+		rq->clock_pelt  = rq_clock_task(rq);
+		return;
+	}
+
+	/*
+	 * When a rq runs at a lower compute capacity, it will need
+	 * more time to do the same amount of work than at max
+	 * capacity. In order to be invariant, we scale the delta to
+	 * reflect how much work has been really done.
+	 * Running longer results in stealing idle time that will
+	 * disturb the load signal compared to max capacity. This
+	 * stolen idle time will be automatically reflected when the
+	 * rq will be idle and the clock will be synced with
+	 * rq_clock_task.
+	 */
+
+	/*
+	 * Scale the elapsed time to reflect the real amount of
+	 * computation
+	 */
+	delta = cap_scale(delta, arch_scale_cpu_capacity(NULL, cpu_of(rq)));
+	delta = cap_scale(delta, arch_scale_freq_capacity(cpu_of(rq)));
+
+	rq->clock_pelt += delta;
+}
+
+/*
+ * When rq becomes idle, we have to check if it has lost idle time
+ * because it was fully busy. A rq is fully used when the /Sum util_sum
+ * is greater or equal to:
+ * (LOAD_AVG_MAX - 1024 + rq->cfs.avg.period_contrib) << SCHED_CAPACITY_SHIFT;
+ * For optimization and computing rounding purpose, we don't take into account
+ * the position in the current window (period_contrib) and we use the higher
+ * bound of util_sum to decide.
+ */
+static inline void update_idle_rq_clock_pelt(struct rq *rq)
+{
+	u32 divider = ((LOAD_AVG_MAX - 1024) << SCHED_CAPACITY_SHIFT) - LOAD_AVG_MAX;
+	u32 util_sum = rq->cfs.avg.util_sum;
+	util_sum += rq->avg_rt.util_sum;
+	util_sum += rq->avg_dl.util_sum;
+
+	/*
+	 * Reflecting stolen time makes sense only if the idle
+	 * phase would be present at max capacity. As soon as the
+	 * utilization of a rq has reached the maximum value, it is
+	 * considered as an always runnig rq without idle time to
+	 * steal. This potential idle time is considered as lost in
+	 * this case. We keep track of this lost idle time compare to
+	 * rq's clock_task.
+	 */
+	if (util_sum >= divider)
+		rq->lost_idle_time += rq_clock_task(rq) - rq->clock_pelt;
+}
+
+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+	lockdep_assert_held(&rq->lock);
+	assert_clock_updated(rq);
+
+	return rq->clock_pelt - rq->lost_idle_time;
+}
+
+#ifdef CONFIG_CFS_BANDWIDTH
+/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
+static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+{
+	if (unlikely(cfs_rq->throttle_count))
+		return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
+
+	return rq_clock_pelt(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
+}
+#else
+static inline u64 cfs_rq_clock_pelt(struct cfs_rq *cfs_rq)
+{
+	return rq_clock_pelt(rq_of(cfs_rq));
+}
+#endif
+
 #else
 
 static inline int
@@ -67,6 +163,18 @@ update_irq_load_avg(struct rq *rq, u64 running)
 {
 	return 0;
 }
+
+static inline u64 rq_clock_pelt(struct rq *rq)
+{
+	return rq_clock_task(rq);
+}
+
+static inline void
+update_rq_clock_pelt(struct rq *rq, s64 delta) { }
+
+static inline void
+update_idle_rq_clock_pelt(struct rq *rq) { }
+
 #endif
 
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e4f398ad9e73..90fa23d36565 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1587,7 +1587,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	 * rt task
 	 */
 	if (rq->curr->sched_class != &rt_sched_class)
-		update_rt_rq_load_avg(rq_clock_task(rq), rq, 0);
+		update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 
 	return p;
 }
@@ -1596,7 +1596,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);
 
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 
 	/*
 	 * The previous task needs to be made eligible for pushing
@@ -2325,7 +2325,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
-	update_rt_rq_load_avg(rq_clock_task(rq), rq, 1);
+	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
 
 	watchdog(rq, p);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0ed130fae2a9..fe31bc472f3e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -861,7 +861,10 @@ struct rq {
 
 	unsigned int		clock_update_flags;
 	u64			clock;
-	u64			clock_task;
+	/* Ensure that all clocks are in the same cache line */
+	u64			clock_task ____cacheline_aligned;
+	u64			clock_pelt;
+	unsigned long		lost_idle_time;
 
 	atomic_t		nr_iowait;
 
-- 
cgit v1.2.3


From c546951d9c9300065bad253ecdf1ac59ce9d06c8 Mon Sep 17 00:00:00 2001
From: Andrea Parri <andrea.parri@amarulasolutions.com>
Date: Mon, 21 Jan 2019 16:52:40 +0100
Subject: sched/core: Use READ_ONCE()/WRITE_ONCE() in
 move_queued_task()/task_rq_lock()

move_queued_task() synchronizes with task_rq_lock() as follows:

	move_queued_task()		task_rq_lock()

	[S] ->on_rq = MIGRATING		[L] rq = task_rq()
	WMB (__set_task_cpu())		ACQUIRE (rq->lock);
	[S] ->cpu = new_cpu		[L] ->on_rq

where "[L] rq = task_rq()" is ordered before "ACQUIRE (rq->lock)" by an
address dependency and, in turn, "ACQUIRE (rq->lock)" is ordered before
"[L] ->on_rq" by the ACQUIRE itself.

Use READ_ONCE() to load ->cpu in task_rq() (c.f., task_cpu()) to honor
this address dependency.  Also, mark the accesses to ->cpu and ->on_rq
with READ_ONCE()/WRITE_ONCE() to comply with the LKMM.

Signed-off-by: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190121155240.27173-1-andrea.parri@amarulasolutions.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 4 ++--
 kernel/sched/core.c   | 9 +++++----
 kernel/sched/sched.h  | 6 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 351c0fe64c85..4112639c2a85 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1745,9 +1745,9 @@ static __always_inline bool need_resched(void)
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	return p->cpu;
+	return READ_ONCE(p->cpu);
 #else
-	return task_thread_info(p)->cpu;
+	return READ_ONCE(task_thread_info(p)->cpu);
 #endif
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 32e06704565e..ec1b67a195cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -107,11 +107,12 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
 		 *					[L] ->on_rq
 		 *	RELEASE (rq->lock)
 		 *
-		 * If we observe the old CPU in task_rq_lock, the acquire of
+		 * If we observe the old CPU in task_rq_lock(), the acquire of
 		 * the old rq->lock will fully serialize against the stores.
 		 *
-		 * If we observe the new CPU in task_rq_lock, the acquire will
-		 * pair with the WMB to ensure we must then also see migrating.
+		 * If we observe the new CPU in task_rq_lock(), the address
+		 * dependency headed by '[L] rq = task_rq()' and the acquire
+		 * will pair with the WMB to ensure we then also see migrating.
 		 */
 		if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) {
 			rq_pin_lock(rq, rf);
@@ -916,7 +917,7 @@ static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf,
 {
 	lockdep_assert_held(&rq->lock);
 
-	p->on_rq = TASK_ON_RQ_MIGRATING;
+	WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
 	dequeue_task(rq, p, DEQUEUE_NOCLOCK);
 	set_task_cpu(p, new_cpu);
 	rq_unlock(rq, rf);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 99e2a7772d16..c688ef5012e5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1479,9 +1479,9 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 	 */
 	smp_wmb();
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-	p->cpu = cpu;
+	WRITE_ONCE(p->cpu, cpu);
 #else
-	task_thread_info(p)->cpu = cpu;
+	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
 #endif
 	p->wake_cpu = cpu;
 #endif
@@ -1582,7 +1582,7 @@ static inline int task_on_rq_queued(struct task_struct *p)
 
 static inline int task_on_rq_migrating(struct task_struct *p)
 {
-	return p->on_rq == TASK_ON_RQ_MIGRATING;
+	return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
 }
 
 /*
-- 
cgit v1.2.3


From 25db146aa7262bc95c314b2df35db107f5eadd55 Mon Sep 17 00:00:00 2001
From: Jian Hu <jian.hu@amlogic.com>
Date: Fri, 1 Feb 2019 15:53:43 +0100
Subject: dt-bindings: clk: meson: add g12a periph clock controller bindings

Add new clock controller compatible and dt-bindings header for the
Everything-Else domain of the g12a SoC

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jian Hu <jian.hu@amlogic.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Link: https://lkml.kernel.org/r/20190201145345.6795-3-jbrunet@baylibre.com
---
 .../bindings/clock/amlogic,gxbb-clkc.txt           |   1 +
 include/dt-bindings/clock/g12a-clkc.h              | 135 +++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 include/dt-bindings/clock/g12a-clkc.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
index a6871953bf04..5c8b105be4d6 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
@@ -9,6 +9,7 @@ Required Properties:
 		"amlogic,gxbb-clkc" for GXBB SoC,
 		"amlogic,gxl-clkc" for GXL and GXM SoC,
 		"amlogic,axg-clkc" for AXG SoC.
+		"amlogic,g12a-clkc" for G12A SoC.
 - clocks : list of clock phandle, one for each entry clock-names.
 - clock-names : should contain the following:
   * "xtal": the platform xtal
diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h
new file mode 100644
index 000000000000..83b657038d1e
--- /dev/null
+++ b/include/dt-bindings/clock/g12a-clkc.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR MIT */
+/*
+ * Meson-G12A clock tree IDs
+ *
+ * Copyright (c) 2018 Amlogic, Inc. All rights reserved.
+ */
+
+#ifndef __G12A_CLKC_H
+#define __G12A_CLKC_H
+
+#define CLKID_SYS_PLL				0
+#define CLKID_FIXED_PLL				1
+#define CLKID_FCLK_DIV2				2
+#define CLKID_FCLK_DIV3				3
+#define CLKID_FCLK_DIV4				4
+#define CLKID_FCLK_DIV5				5
+#define CLKID_FCLK_DIV7				6
+#define CLKID_GP0_PLL				7
+#define CLKID_CLK81				10
+#define CLKID_MPLL0				11
+#define CLKID_MPLL1				12
+#define CLKID_MPLL2				13
+#define CLKID_MPLL3				14
+#define CLKID_DDR				15
+#define CLKID_DOS				16
+#define CLKID_AUDIO_LOCKER			17
+#define CLKID_MIPI_DSI_HOST			18
+#define CLKID_ETH_PHY				19
+#define CLKID_ISA				20
+#define CLKID_PL301				21
+#define CLKID_PERIPHS				22
+#define CLKID_SPICC0				23
+#define CLKID_I2C				24
+#define CLKID_SANA				25
+#define CLKID_SD				26
+#define CLKID_RNG0				27
+#define CLKID_UART0				28
+#define CLKID_SPICC1				29
+#define CLKID_HIU_IFACE				30
+#define CLKID_MIPI_DSI_PHY			31
+#define CLKID_ASSIST_MISC			32
+#define CLKID_SD_EMMC_A				33
+#define CLKID_SD_EMMC_B				34
+#define CLKID_SD_EMMC_C				35
+#define CLKID_AUDIO_CODEC			36
+#define CLKID_AUDIO				37
+#define CLKID_ETH				38
+#define CLKID_DEMUX				39
+#define CLKID_AUDIO_IFIFO			40
+#define CLKID_ADC				41
+#define CLKID_UART1				42
+#define CLKID_G2D				43
+#define CLKID_RESET				44
+#define CLKID_PCIE_COMB				45
+#define CLKID_PARSER				46
+#define CLKID_USB				47
+#define CLKID_PCIE_PHY				48
+#define CLKID_AHB_ARB0				49
+#define CLKID_AHB_DATA_BUS			50
+#define CLKID_AHB_CTRL_BUS			51
+#define CLKID_HTX_HDCP22			52
+#define CLKID_HTX_PCLK				53
+#define CLKID_BT656				54
+#define CLKID_USB1_DDR_BRIDGE			55
+#define CLKID_MMC_PCLK				56
+#define CLKID_UART2				57
+#define CLKID_VPU_INTR				58
+#define CLKID_GIC				59
+#define CLKID_SD_EMMC_A_CLK0			60
+#define CLKID_SD_EMMC_B_CLK0			61
+#define CLKID_SD_EMMC_C_CLK0			62
+#define CLKID_HIFI_PLL				74
+#define CLKID_VCLK2_VENCI0			80
+#define CLKID_VCLK2_VENCI1			81
+#define CLKID_VCLK2_VENCP0			82
+#define CLKID_VCLK2_VENCP1			83
+#define CLKID_VCLK2_VENCT0			84
+#define CLKID_VCLK2_VENCT1			85
+#define CLKID_VCLK2_OTHER			86
+#define CLKID_VCLK2_ENCI			87
+#define CLKID_VCLK2_ENCP			88
+#define CLKID_DAC_CLK				89
+#define CLKID_AOCLK				90
+#define CLKID_IEC958				91
+#define CLKID_ENC480P				92
+#define CLKID_RNG1				93
+#define CLKID_VCLK2_ENCT			94
+#define CLKID_VCLK2_ENCL			95
+#define CLKID_VCLK2_VENCLMMC			96
+#define CLKID_VCLK2_VENCL			97
+#define CLKID_VCLK2_OTHER1			98
+#define CLKID_FCLK_DIV2P5			99
+#define CLKID_DMA				105
+#define CLKID_EFUSE				106
+#define CLKID_ROM_BOOT				107
+#define CLKID_RESET_SEC				108
+#define CLKID_SEC_AHB_APB3			109
+#define CLKID_VPU_0_SEL				110
+#define CLKID_VPU_0				112
+#define CLKID_VPU_1_SEL				113
+#define CLKID_VPU_1				115
+#define CLKID_VPU				116
+#define CLKID_VAPB_0_SEL			117
+#define CLKID_VAPB_0				119
+#define CLKID_VAPB_1_SEL			120
+#define CLKID_VAPB_1				122
+#define CLKID_VAPB_SEL				123
+#define CLKID_VAPB				124
+#define CLKID_HDMI_PLL				128
+#define CLKID_VID_PLL				129
+#define CLKID_VCLK				138
+#define CLKID_VCLK2				139
+#define CLKID_VCLK_DIV1				148
+#define CLKID_VCLK_DIV2				149
+#define CLKID_VCLK_DIV4				150
+#define CLKID_VCLK_DIV6				151
+#define CLKID_VCLK_DIV12			152
+#define CLKID_VCLK2_DIV1			153
+#define CLKID_VCLK2_DIV2			154
+#define CLKID_VCLK2_DIV4			155
+#define CLKID_VCLK2_DIV6			156
+#define CLKID_VCLK2_DIV12			157
+#define CLKID_CTS_ENCI				162
+#define CLKID_CTS_ENCP				163
+#define CLKID_CTS_VDAC				164
+#define CLKID_HDMI_TX				165
+#define CLKID_HDMI				168
+#define CLKID_MALI_0_SEL			169
+#define CLKID_MALI_0				171
+#define CLKID_MALI_1_SEL			172
+#define CLKID_MALI_1				174
+#define CLKID_MALI				175
+#define CLKID_MPLL_5OM				177
+
+#endif /* __G12A_CLKC_H */
-- 
cgit v1.2.3


From 74cf15cb69c0beb6cdaae3ff43f5231232a1f1b0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 3 Dec 2018 15:59:40 +0100
Subject: iwlwifi: mvm: add HE TB PPDU SIG-A BW to radiotap

Expose the trigger-based PPDU SIG-A bandwidth to radiotap in
the newly defined bits thereof.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 21 +++++++++++++++------
 include/net/ieee80211_radiotap.h              |  6 ++++++
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index b5ff3325c1d3..d2728fe6a041 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -960,6 +960,7 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data,
 	 * the TSF/timers are not be transmitted in HE-MU.
 	 */
 	u8 ru = le32_get_bits(phy_data->d1, IWL_RX_PHY_DATA1_HE_RU_ALLOC_MASK);
+	u32 he_type = rate_n_flags & RATE_MCS_HE_TYPE_MSK;
 	u8 offs = 0;
 
 	rx_status->bw = RATE_INFO_BW_HE_RU;
@@ -1002,19 +1003,27 @@ iwl_mvm_decode_he_phy_ru_alloc(struct iwl_mvm_rx_phy_data *phy_data,
 		he->data2 |=
 			cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_PRISEC_80_SEC);
 
-	if (he_mu) {
 #define CHECK_BW(bw) \
 	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW_ ## bw ## MHZ != \
+		     RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS); \
+	BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_ ## bw ## MHZ != \
 		     RATE_MCS_CHAN_WIDTH_##bw >> RATE_MCS_CHAN_WIDTH_POS)
-		CHECK_BW(20);
-		CHECK_BW(40);
-		CHECK_BW(80);
-		CHECK_BW(160);
+	CHECK_BW(20);
+	CHECK_BW(40);
+	CHECK_BW(80);
+	CHECK_BW(160);
+
+	if (he_mu)
 		he_mu->flags2 |=
 			le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK,
 						   rate_n_flags),
 					 IEEE80211_RADIOTAP_HE_MU_FLAGS2_BW_FROM_SIG_A_BW);
-	}
+	else if (he_type == RATE_MCS_HE_TYPE_TRIG)
+		he->data6 |=
+			cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN) |
+			le16_encode_bits(FIELD_GET(RATE_MCS_CHAN_WIDTH_MSK,
+						   rate_n_flags),
+					 IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW);
 }
 
 static void iwl_mvm_decode_he_phy_data(struct iwl_mvm *mvm,
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 8014153bdd49..38c48bb80439 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -291,6 +291,12 @@ enum ieee80211_radiotap_he_bits {
 
 	IEEE80211_RADIOTAP_HE_DATA6_NSTS		= 0x000f,
 	IEEE80211_RADIOTAP_HE_DATA6_DOPPLER		= 0x0010,
+	IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_KNOWN	= 0x0020,
+	IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW		= 0x00c0,
+		IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_20MHZ	= 0,
+		IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_40MHZ	= 1,
+		IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_80MHZ	= 2,
+		IEEE80211_RADIOTAP_HE_DATA6_TB_PPDU_BW_160MHZ	= 3,
 	IEEE80211_RADIOTAP_HE_DATA6_TXOP		= 0x7f00,
 	IEEE80211_RADIOTAP_HE_DATA6_MIDAMBLE_PDCTY	= 0x8000,
 };
-- 
cgit v1.2.3


From adfebb51e1750c5df9e5d42f505b73c5542a879d Mon Sep 17 00:00:00 2001
From: Bard liao <yung-chuan.liao@linux.intel.com>
Date: Fri, 1 Feb 2019 11:07:40 -0600
Subject: ASoC: topology: unload physical dai link in remove

soc_tplg_link_config() will find the physical dai link and call
soc_tplg_dai_link_load() to load the BE dai link. Currently remove_link()
is only used to remove the FE dai link which is created by the topology.
The BE dai link cannot however be unloaded in snd_soc_tplg_component
_remove(), which is problematic if anything needs to be released or
reinitialized.

This patch aligns the definitions of dynamic types with the existing
UAPI and adds a new remove_backend_link() routine to unload the the BE
dai link when snd_soc_tplg_component_remove() is invoked.

Signed-off-by: Bard liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-topology.h |  1 +
 sound/soc/soc-topology.c     | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h
index 8c43cfc240fa..5223896de26f 100644
--- a/include/sound/soc-topology.h
+++ b/include/sound/soc-topology.h
@@ -45,6 +45,7 @@ enum snd_soc_dobj_type {
 	SND_SOC_DOBJ_DAI_LINK,
 	SND_SOC_DOBJ_PCM,
 	SND_SOC_DOBJ_CODEC_LINK,
+	SND_SOC_DOBJ_BACKEND_LINK,
 };
 
 /* dynamic control object */
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 23d421370e6c..246d2a2d43c8 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -557,6 +557,25 @@ static void remove_link(struct snd_soc_component *comp,
 	kfree(link);
 }
 
+/* unload dai link */
+static void remove_backend_link(struct snd_soc_component *comp,
+	struct snd_soc_dobj *dobj, int pass)
+{
+	if (pass != SOC_TPLG_PASS_LINK)
+		return;
+
+	if (dobj->ops && dobj->ops->link_unload)
+		dobj->ops->link_unload(comp, dobj);
+
+	/*
+	 * We don't free the link here as what remove_link() do since BE
+	 * links are not allocated by topology.
+	 * We however need to reset the dobj type to its initial values
+	 */
+	dobj->type = SND_SOC_DOBJ_NONE;
+	list_del(&dobj->list);
+}
+
 /* bind a kcontrol to it's IO handlers */
 static int soc_tplg_kcontrol_bind_io(struct snd_soc_tplg_ctl_hdr *hdr,
 	struct snd_kcontrol_new *k,
@@ -2163,6 +2182,12 @@ static int soc_tplg_link_config(struct soc_tplg *tplg,
 		return ret;
 	}
 
+	/* for unloading it in snd_soc_tplg_component_remove */
+	link->dobj.index = tplg->index;
+	link->dobj.ops = tplg->ops;
+	link->dobj.type = SND_SOC_DOBJ_BACKEND_LINK;
+	list_add(&link->dobj.list, &tplg->comp->dobj_list);
+
 	return 0;
 }
 
@@ -2649,6 +2674,13 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index)
 			case SND_SOC_DOBJ_DAI_LINK:
 				remove_link(comp, dobj, pass);
 				break;
+			case SND_SOC_DOBJ_BACKEND_LINK:
+				/*
+				 * call link_unload ops if extra
+				 * deinitialization is needed.
+				 */
+				remove_backend_link(comp, dobj, pass);
+				break;
 			default:
 				dev_err(comp->dev, "ASoC: invalid component type %d for removal\n",
 					dobj->type);
-- 
cgit v1.2.3


From a46c52d9f2659498f0c0871f7f2333a692c243fe Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Tue, 29 Jan 2019 15:51:17 +0800
Subject: netfilter: nft_tunnel: Add NFTA_TUNNEL_MODE options

nft "tunnel" expr match both the tun_info of RX and TX. This patch
provide the NFTA_TUNNEL_MODE to individually match the tun_info of
RX or TX.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  9 +++++++++
 net/netfilter/nft_tunnel.c               | 34 ++++++++++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 030302893d96..a66c8de006cc 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1727,10 +1727,19 @@ enum nft_tunnel_keys {
 };
 #define NFT_TUNNEL_MAX	(__NFT_TUNNEL_MAX - 1)
 
+enum nft_tunnel_mode {
+	NFT_TUNNEL_MODE_NONE,
+	NFT_TUNNEL_MODE_RX,
+	NFT_TUNNEL_MODE_TX,
+	__NFT_TUNNEL_MODE_MAX
+};
+#define NFT_TUNNEL_MODE_MAX	(__NFT_TUNNEL_MODE_MAX - 1)
+
 enum nft_tunnel_attributes {
 	NFTA_TUNNEL_UNSPEC,
 	NFTA_TUNNEL_KEY,
 	NFTA_TUNNEL_DREG,
+	NFTA_TUNNEL_MODE,
 	__NFTA_TUNNEL_MAX
 };
 #define NFTA_TUNNEL_MAX	(__NFTA_TUNNEL_MAX - 1)
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3a15f219e4e7..ea28588c5eed 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -15,6 +15,7 @@
 struct nft_tunnel {
 	enum nft_tunnel_keys	key:8;
 	enum nft_registers	dreg:8;
+	enum nft_tunnel_mode	mode:8;
 };
 
 static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
 
 	switch (priv->key) {
 	case NFT_TUNNEL_PATH:
-		nft_reg_store8(dest, !!tun_info);
+		if (!tun_info) {
+			nft_reg_store8(dest, false);
+			return;
+		}
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			nft_reg_store8(dest, true);
+		else
+			nft_reg_store8(dest, false);
 		break;
 	case NFT_TUNNEL_ID:
 		if (!tun_info) {
 			regs->verdict.code = NFT_BREAK;
 			return;
 		}
-		*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+		    (priv->mode == NFT_TUNNEL_MODE_RX &&
+		     !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+		    (priv->mode == NFT_TUNNEL_MODE_TX &&
+		     (tun_info->mode & IP_TUNNEL_INFO_TX)))
+			*dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+		else
+			regs->verdict.code = NFT_BREAK;
 		break;
 	default:
 		WARN_ON(1);
@@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
 static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
 	[NFTA_TUNNEL_KEY]	= { .type = NLA_U32 },
 	[NFTA_TUNNEL_DREG]	= { .type = NLA_U32 },
+	[NFTA_TUNNEL_MODE]	= { .type = NLA_U32 },
 };
 
 static int nft_tunnel_get_init(const struct nft_ctx *ctx,
@@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
 
 	priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
 
+	if (tb[NFTA_TUNNEL_MODE]) {
+		priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
+		if (priv->mode > NFT_TUNNEL_MODE_MAX)
+			return -EOPNOTSUPP;
+	} else {
+		priv->mode = NFT_TUNNEL_MODE_NONE;
+	}
+
 	return nft_validate_register_store(ctx, priv->dreg, NULL,
 					   NFT_DATA_VALUE, len);
 }
@@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb,
 		goto nla_put_failure;
 	if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
 		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 77000bc43da17d5d6bc4ebfaf44d52d43bb69492 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 4 Feb 2019 16:31:04 +0100
Subject: uio: remove the unused iov_for_each macro

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 .clang-format       | 1 -
 include/linux/uio.h | 8 --------
 2 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/.clang-format b/.clang-format
index e6080f5834a3..c144d9c24d5d 100644
--- a/.clang-format
+++ b/.clang-format
@@ -259,7 +259,6 @@ ForEachMacros:
   - 'idr_for_each_entry_ul'
   - 'inet_bind_bucket_for_each'
   - 'inet_lhash2_for_each_icsk_rcu'
-  - 'iov_for_each'
   - 'key_for_each'
   - 'key_for_each_safe'
   - 'klp_for_each_func'
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ecf584f6b82d..87477e1640f9 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -110,14 +110,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 	};
 }
 
-#define iov_for_each(iov, iter, start)				\
-	if (iov_iter_type(start) == ITER_IOVEC ||		\
-	    iov_iter_type(start) == ITER_KVEC)			\
-	for (iter = (start);					\
-	     (iter).count &&					\
-	     ((iov = iov_iter_iovec(&(iter))), 1);		\
-	     iov_iter_advance(&(iter), (iov).iov_len))
-
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
-- 
cgit v1.2.3


From 960587285a56ec3cafb4d1e6b25c19eced4d0bce Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 2 Feb 2019 10:16:59 +0100
Subject: netfilter: nat: remove module dependency on ipv6 core

nf_nat_ipv6 calls two ipv6 core functions, so add those to v6ops to avoid
the module dependency.

This is a prerequisite for merging ipv4 and ipv6 nat implementations.

Add wrappers to avoid the indirection if ipv6 is builtin.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h              |  6 ++++++
 net/ipv6/netfilter.c                        |  4 ++++
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c    | 17 ++++++++++++++++-
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 21 +++++++++++++++++++--
 4 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index c0dc4dd78887..ad4223c10488 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -33,6 +33,12 @@ struct nf_ipv6_ops {
 	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		     bool strict);
 	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
+#if IS_MODULE(CONFIG_IPV6)
+	int (*route_me_harder)(struct net *net, struct sk_buff *skb);
+	int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
+		       const struct in6_addr *daddr, unsigned int srcprefs,
+		       struct in6_addr *saddr);
+#endif
 };
 
 #ifdef CONFIG_NETFILTER
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8b075f0bc351..0a5caf263889 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -112,6 +112,10 @@ static const struct nf_ipv6_ops ipv6ops = {
 	.fragment		= ip6_fragment,
 	.route			= nf_ip6_route,
 	.reroute		= nf_ip6_reroute,
+#if IS_MODULE(CONFIG_IPV6)
+	.route_me_harder	= ip6_route_me_harder,
+	.dev_get_saddr		= ipv6_dev_get_saddr,
+#endif
 };
 
 int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 9c914db44bec..b52026adb3e7 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -17,6 +17,7 @@
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
 #include <net/ip6_route.h>
+#include <net/xfrm.h>
 #include <net/ipv6.h>
 
 #include <net/netfilter/nf_conntrack_core.h>
@@ -317,6 +318,20 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
 	return ret;
 }
 
+static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#ifdef CONFIG_IPV6_MODULE
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return -EHOSTUNREACH;
+
+	return v6_ops->route_me_harder(net, skb);
+#else
+	return ip6_route_me_harder(net, skb);
+#endif
+}
+
 static unsigned int
 nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
@@ -333,7 +348,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
 
 		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
 				      &ct->tuplehash[!dir].tuple.src.u3)) {
-			err = ip6_route_me_harder(state->net, skb);
+			err = nat_route_me_harder(state->net, skb);
 			if (err < 0)
 				ret = NF_DROP_ERR(err);
 		}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 0ad0da5a2600..fd313b726263 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -24,6 +24,23 @@
 
 static atomic_t v6_worker_count;
 
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+		       const struct in6_addr *daddr, unsigned int srcprefs,
+		       struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return -EHOSTUNREACH;
+
+	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
 unsigned int
 nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 		       const struct net_device *out)
@@ -38,8 +55,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
 	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
 			 ctinfo == IP_CT_RELATED_REPLY)));
 
-	if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
-			       &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
 		return NF_DROP;
 
 	nat = nf_ct_nat_ext_add(ct);
-- 
cgit v1.2.3


From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 2 Feb 2019 10:17:00 +0100
Subject: netfilter: ipv6: avoid indirect calls for IPV6=y case

indirect calls are only needed if ipv6 is a module.
Add helpers to abstract the v6ops indirections and use them instead.

fragment, reroute and route_input are kept as indirect calls.
The first two are not not used in hot path and route_input is only
used by bridge netfilter.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h    | 64 +++++++++++++++++++++++++++++++--------
 net/ipv6/netfilter.c              | 15 ++++-----
 net/ipv6/netfilter/nft_fib_ipv6.c |  9 ++----
 net/netfilter/utils.c             |  6 ++--
 net/netfilter/xt_addrtype.c       | 16 +++-------
 5 files changed, 68 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index ad4223c10488..471e9467105b 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -25,29 +25,24 @@ struct nf_queue_entry;
  * if IPv6 is a module.
  */
 struct nf_ipv6_ops {
+#if IS_MODULE(CONFIG_IPV6)
 	int (*chk_addr)(struct net *net, const struct in6_addr *addr,
 			const struct net_device *dev, int strict);
-	void (*route_input)(struct sk_buff *skb);
-	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
-			int (*output)(struct net *, struct sock *, struct sk_buff *));
-	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
-		     bool strict);
-	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
-#if IS_MODULE(CONFIG_IPV6)
 	int (*route_me_harder)(struct net *net, struct sk_buff *skb);
 	int (*dev_get_saddr)(struct net *net, const struct net_device *dev,
 		       const struct in6_addr *daddr, unsigned int srcprefs,
 		       struct in6_addr *saddr);
+	int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
+		     bool strict);
 #endif
+	void (*route_input)(struct sk_buff *skb);
+	int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+			int (*output)(struct net *, struct sock *, struct sk_buff *));
+	int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
 };
 
 #ifdef CONFIG_NETFILTER
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
-__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
-			unsigned int dataoff, u_int8_t protocol);
-
-int ipv6_netfilter_init(void);
-void ipv6_netfilter_fini(void);
+#include <net/addrconf.h>
 
 extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops;
 static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void)
@@ -55,6 +50,49 @@ static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void)
 	return rcu_dereference(nf_ipv6_ops);
 }
 
+static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
+				   const struct net_device *dev, int strict)
+{
+#if IS_MODULE(CONFIG_IPV6)
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return 1;
+
+	return v6_ops->chk_addr(net, addr, dev, strict);
+#else
+	return ipv6_chk_addr(net, addr, dev, strict);
+#endif
+}
+
+int __nf_ip6_route(struct net *net, struct dst_entry **dst,
+			       struct flowi *fl, bool strict);
+
+static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
+			       struct flowi *fl, bool strict)
+{
+#if IS_MODULE(CONFIG_IPV6)
+	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+
+	if (v6ops)
+		return v6ops->route(net, dst, fl, strict);
+
+	return -EHOSTUNREACH;
+#endif
+#if IS_BUILTIN(CONFIG_IPV6)
+	return __nf_ip6_route(net, dst, fl, strict);
+#else
+	return -EHOSTUNREACH;
+#endif
+}
+
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
+__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
+			unsigned int dataoff, u_int8_t protocol);
+
+int ipv6_netfilter_init(void);
+void ipv6_netfilter_fini(void);
+
 #else /* CONFIG_NETFILTER */
 static inline int ipv6_netfilter_init(void) { return 0; }
 static inline void ipv6_netfilter_fini(void) { return; }
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 0a5caf263889..a8263031f3a6 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb,
 	return 0;
 }
 
-static int nf_ip6_route(struct net *net, struct dst_entry **dst,
-			struct flowi *fl, bool strict)
+int __nf_ip6_route(struct net *net, struct dst_entry **dst,
+		   struct flowi *fl, bool strict)
 {
 	static const struct ipv6_pinfo fake_pinfo;
 	static const struct inet_sock fake_sk = {
@@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
 		*dst = result;
 	return err;
 }
+EXPORT_SYMBOL_GPL(__nf_ip6_route);
 
 static const struct nf_ipv6_ops ipv6ops = {
-	.chk_addr		= ipv6_chk_addr,
-	.route_input    	= ip6_route_input,
-	.fragment		= ip6_fragment,
-	.route			= nf_ip6_route,
-	.reroute		= nf_ip6_reroute,
 #if IS_MODULE(CONFIG_IPV6)
+	.chk_addr		= ipv6_chk_addr,
 	.route_me_harder	= ip6_route_me_harder,
 	.dev_get_saddr		= ipv6_dev_get_saddr,
+	.route			= __nf_ip6_route,
 #endif
+	.route_input		= ip6_route_input,
+	.fragment		= ip6_fragment,
+	.reroute		= nf_ip6_reroute,
 };
 
 int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36be3cf0adef..73cdc0bc63f7 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 				struct ipv6hdr *iph)
 {
 	const struct net_device *dev = NULL;
-	const struct nf_ipv6_ops *v6ops;
 	int route_err, addrtype;
 	struct rt6_info *rt;
 	struct flowi6 fl6 = {
@@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 	};
 	u32 ret = 0;
 
-	v6ops = nf_get_ipv6_ops();
-	if (!v6ops)
-		return RTN_UNREACHABLE;
-
 	if (priv->flags & NFTA_FIB_F_IIF)
 		dev = nft_in(pkt);
 	else if (priv->flags & NFTA_FIB_F_OIF)
@@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
 
 	nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
 
-	if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+	if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
 		ret = RTN_LOCAL;
 
-	route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+	route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt,
 				 flowi6_to_flowi(&fl6), false);
 	if (route_err)
 		goto err;
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index 55af9f247993..06dc55590441 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial);
 int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 	     bool strict, unsigned short family)
 {
-	const struct nf_ipv6_ops *v6ops;
+	const struct nf_ipv6_ops *v6ops __maybe_unused;
 	int ret = 0;
 
 	switch (family) {
@@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
 		ret = nf_ip_route(net, dst, fl, strict);
 		break;
 	case AF_INET6:
-		v6ops = rcu_dereference(nf_ipv6_ops);
-		if (v6ops)
-			ret = v6ops->route(net, dst, fl, strict);
+		ret = nf_ip6_route(net, dst, fl, strict);
 		break;
 	}
 
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 89e281b3bfc2..29987ff03621 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype");
 static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 			    const struct in6_addr *addr, u16 mask)
 {
-	const struct nf_ipv6_ops *v6ops;
 	struct flowi6 flow;
 	struct rt6_info *rt;
 	u32 ret = 0;
@@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
-	v6ops = nf_get_ipv6_ops();
-	if (v6ops) {
-		if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
-			if (v6ops->chk_addr(net, addr, dev, true))
-				ret = XT_ADDRTYPE_LOCAL;
-		}
-		route_err = v6ops->route(net, (struct dst_entry **)&rt,
-					 flowi6_to_flowi(&flow), false);
-	} else {
-		route_err = 1;
+	if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+		if (nf_ipv6_chk_addr(net, addr, dev, true))
+			ret = XT_ADDRTYPE_LOCAL;
 	}
 
+	route_err = nf_ip6_route(net, (struct dst_entry **)&rt,
+				 flowi6_to_flowi(&flow), false);
 	if (route_err)
 		return XT_ADDRTYPE_UNREACHABLE;
 
-- 
cgit v1.2.3


From da82334219bc386ef7ea5b4b185a339a973dd513 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 08:48:41 +0200
Subject: IB/core: Allocate a bit for SRQ ODP support

The ODP support matrix is per operation and per transport. The support for
each transport (RC, UD, etc.) is described with a bit field.

ODP for SRQ WQEs is considered a different kind of support from ODP for RQ
WQs and therefore needs a different capability bit.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_verbs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5fc3be884444..5eefdea62831 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -268,6 +268,7 @@ enum ib_odp_transport_cap_bits {
 	IB_ODP_SUPPORT_WRITE	= 1 << 2,
 	IB_ODP_SUPPORT_READ	= 1 << 3,
 	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+	IB_ODP_SUPPORT_SRQ_RECV	= 1 << 5,
 };
 
 struct ib_odp_caps {
-- 
cgit v1.2.3


From 52a72e2a395fa3c5ab5df41058a8511e87215730 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 22 Jan 2019 08:48:42 +0200
Subject: IB/uverbs: Expose XRC ODP device capabilities

Expose XRC ODP capabilities as part of the extended device capabilities.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 1 +
 include/rdma/ib_verbs.h              | 1 +
 include/uapi/rdma/ib_user_verbs.h    | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index d4f1a2ef5015..68c4ea514faf 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -3613,6 +3613,7 @@ static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
 		attr.odp_caps.per_transport_caps.uc_odp_caps;
 	resp.odp_caps.per_transport_caps.ud_odp_caps =
 		attr.odp_caps.per_transport_caps.ud_odp_caps;
+	resp.xrc_odp_caps = attr.odp_caps.per_transport_caps.xrc_odp_caps;
 
 	resp.timestamp_mask = attr.timestamp_mask;
 	resp.hca_core_clock = attr.hca_core_clock;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5eefdea62831..8219c07340a9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -277,6 +277,7 @@ struct ib_odp_caps {
 		uint32_t  rc_odp_caps;
 		uint32_t  uc_odp_caps;
 		uint32_t  ud_odp_caps;
+		uint32_t  xrc_odp_caps;
 	} per_transport_caps;
 };
 
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 480d9a60b68e..0474c7400268 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -270,6 +270,8 @@ struct ib_uverbs_ex_query_device_resp {
 	struct ib_uverbs_tm_caps tm_caps;
 	struct ib_uverbs_cq_moderation_caps cq_moderation_caps;
 	__aligned_u64 max_dm_size;
+	__u32 xrc_odp_caps;
+	__u32 reserved;
 };
 
 struct ib_uverbs_query_port {
-- 
cgit v1.2.3


From a163afc88556e099271a7b423295bc5176fcecce Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 31 Jan 2019 08:30:34 -0800
Subject: IB/core: Remove ib_sg_dma_address() and ib_sg_dma_len()

Keeping single line wrapper functions is not useful. Hence remove the
ib_sg_dma_address() and ib_sg_dma_len() functions. This patch does not
change any functionality.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rw.c              | 12 +++++-------
 drivers/infiniband/ulp/iser/iser_memory.c |  9 ++++-----
 drivers/infiniband/ulp/srp/ib_srp.c       | 17 +++++++----------
 include/rdma/ib_verbs.h                   | 27 ---------------------------
 net/rds/ib.h                              | 12 ++++--------
 net/rds/ib_fmr.c                          |  8 ++++----
 net/rds/ib_frmr.c                         |  4 ++--
 net/rds/ib_recv.c                         |  8 +++-----
 net/rds/ib_send.c                         | 15 +++++++--------
 9 files changed, 36 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index d22c4a2ebac6..89a5be3a2f97 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -179,7 +179,6 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 		struct scatterlist *sg, u32 sg_cnt, u32 offset,
 		u64 remote_addr, u32 rkey, enum dma_data_direction dir)
 {
-	struct ib_device *dev = qp->pd->device;
 	u32 max_sge = dir == DMA_TO_DEVICE ? qp->max_write_sge :
 		      qp->max_read_sge;
 	struct ib_sge *sge;
@@ -209,8 +208,8 @@ static int rdma_rw_init_map_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 		rdma_wr->wr.sg_list = sge;
 
 		for (j = 0; j < nr_sge; j++, sg = sg_next(sg)) {
-			sge->addr = ib_sg_dma_address(dev, sg) + offset;
-			sge->length = ib_sg_dma_len(dev, sg) - offset;
+			sge->addr = sg_dma_address(sg) + offset;
+			sge->length = sg_dma_len(sg) - offset;
 			sge->lkey = qp->pd->local_dma_lkey;
 
 			total_len += sge->length;
@@ -236,14 +235,13 @@ static int rdma_rw_init_single_wr(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
 		struct scatterlist *sg, u32 offset, u64 remote_addr, u32 rkey,
 		enum dma_data_direction dir)
 {
-	struct ib_device *dev = qp->pd->device;
 	struct ib_rdma_wr *rdma_wr = &ctx->single.wr;
 
 	ctx->nr_ops = 1;
 
 	ctx->single.sge.lkey = qp->pd->local_dma_lkey;
-	ctx->single.sge.addr = ib_sg_dma_address(dev, sg) + offset;
-	ctx->single.sge.length = ib_sg_dma_len(dev, sg) - offset;
+	ctx->single.sge.addr = sg_dma_address(sg) + offset;
+	ctx->single.sge.length = sg_dma_len(sg) - offset;
 
 	memset(rdma_wr, 0, sizeof(*rdma_wr));
 	if (dir == DMA_TO_DEVICE)
@@ -294,7 +292,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 	 * Skip to the S/G entry that sg_offset falls into:
 	 */
 	for (;;) {
-		u32 len = ib_sg_dma_len(dev, sg);
+		u32 len = sg_dma_len(sg);
 
 		if (sg_offset < len)
 			break;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 394d1b9c2ff7..2ba70729d7b0 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -145,9 +145,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
 	for_each_sg(data->sg, sg, data->dma_nents, i)
 		iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
 			 "off:0x%x sz:0x%x dma_len:0x%x\n",
-			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
-			 sg_page(sg), sg->offset,
-			 sg->length, ib_sg_dma_len(ibdev, sg));
+			 i, (unsigned long)sg_dma_address(sg),
+			 sg_page(sg), sg->offset, sg->length, sg_dma_len(sg));
 }
 
 static void iser_dump_page_vec(struct iser_page_vec *page_vec)
@@ -204,8 +203,8 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
 		reg->rkey = device->pd->unsafe_global_rkey;
 	else
 		reg->rkey = 0;
-	reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
-	reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
+	reg->sge.addr = sg_dma_address(&sg[0]);
+	reg->sge.length = sg_dma_len(&sg[0]);
 
 	iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
 		 " length=0x%x\n", reg->sge.lkey, reg->rkey,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 29fe46dbdbf1..84184910f038 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1600,9 +1600,8 @@ static int srp_map_sg_entry(struct srp_map_state *state,
 {
 	struct srp_target_port *target = ch->target;
 	struct srp_device *dev = target->srp_host->srp_dev;
-	struct ib_device *ibdev = dev->dev;
-	dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
-	unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+	dma_addr_t dma_addr = sg_dma_address(sg);
+	unsigned int dma_len = sg_dma_len(sg);
 	unsigned int len = 0;
 	int ret;
 
@@ -1696,13 +1695,11 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
 			  int count)
 {
 	struct srp_target_port *target = ch->target;
-	struct srp_device *dev = target->srp_host->srp_dev;
 	struct scatterlist *sg;
 	int i;
 
 	for_each_sg(scat, sg, count, i) {
-		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
-			     ib_sg_dma_len(dev->dev, sg),
+		srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
 			     target->global_rkey);
 	}
 
@@ -1852,8 +1849,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 		buf->len = cpu_to_be32(data_len);
 		WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
 		for_each_sg(scat, sg, count, i) {
-			sge[i].addr   = ib_sg_dma_address(ibdev, sg);
-			sge[i].length = ib_sg_dma_len(ibdev, sg);
+			sge[i].addr   = sg_dma_address(sg);
+			sge[i].length = sg_dma_len(sg);
 			sge[i].lkey   = target->lkey;
 		}
 		req->cmd->num_sge += count;
@@ -1874,9 +1871,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 		struct srp_direct_buf *buf;
 
 		buf = (void *)cmd->add_data + cmd->add_cdb_len;
-		buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
+		buf->va  = cpu_to_be64(sg_dma_address(scat));
 		buf->key = cpu_to_be32(target->global_rkey);
-		buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
+		buf->len = cpu_to_be32(sg_dma_len(scat));
 
 		req->nmdesc = 0;
 		goto map_complete;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8219c07340a9..f7e8709e48cd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3705,33 +3705,6 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
 {
 	dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
 }
-/**
- * ib_sg_dma_address - Return the DMA address from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
- *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_address() into sg_dma_address().
- */
-static inline u64 ib_sg_dma_address(struct ib_device *dev,
-				    struct scatterlist *sg)
-{
-	return sg_dma_address(sg);
-}
-
-/**
- * ib_sg_dma_len - Return the DMA length from a scatter/gather entry
- * @dev: The device for which the DMA addresses were created
- * @sg: The scatter/gather entry
- *
- * Note: this function is obsolete. To do: change all occurrences of
- * ib_sg_dma_len() into sg_dma_len().
- */
-static inline unsigned int ib_sg_dma_len(struct ib_device *dev,
-					 struct scatterlist *sg)
-{
-	return sg_dma_len(sg);
-}
 
 /**
  * ib_dma_max_seg_size - Return the size limit of a single DMA transfer
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 71ff356ee702..1fd1cac85da2 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -331,10 +331,8 @@ static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
 	unsigned int i;
 
 	for_each_sg(sglist, sg, sg_dma_len, i) {
-		ib_dma_sync_single_for_cpu(dev,
-				ib_sg_dma_address(dev, sg),
-				ib_sg_dma_len(dev, sg),
-				direction);
+		ib_dma_sync_single_for_cpu(dev, sg_dma_address(sg),
+					   sg_dma_len(sg), direction);
 	}
 }
 #define ib_dma_sync_sg_for_cpu	rds_ib_dma_sync_sg_for_cpu
@@ -348,10 +346,8 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
 	unsigned int i;
 
 	for_each_sg(sglist, sg, sg_dma_len, i) {
-		ib_dma_sync_single_for_device(dev,
-				ib_sg_dma_address(dev, sg),
-				ib_sg_dma_len(dev, sg),
-				direction);
+		ib_dma_sync_single_for_device(dev, sg_dma_address(sg),
+					      sg_dma_len(sg), direction);
 	}
 }
 #define ib_dma_sync_sg_for_device	rds_ib_dma_sync_sg_for_device
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
index e0f70c4051b6..31cf37da4510 100644
--- a/net/rds/ib_fmr.c
+++ b/net/rds/ib_fmr.c
@@ -108,8 +108,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
 	page_cnt = 0;
 
 	for (i = 0; i < sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+		unsigned int dma_len = sg_dma_len(&scat[i]);
+		u64 dma_addr = sg_dma_address(&scat[i]);
 
 		if (dma_addr & ~PAGE_MASK) {
 			if (i > 0) {
@@ -148,8 +148,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
 
 	page_cnt = 0;
 	for (i = 0; i < sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
+		unsigned int dma_len = sg_dma_len(&scat[i]);
+		u64 dma_addr = sg_dma_address(&scat[i]);
 
 		for (j = 0; j < dma_len; j += PAGE_SIZE)
 			dma_pages[page_cnt++] =
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 6431a023ac89..688dcd68d4ea 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -181,8 +181,8 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
 
 	ret = -EINVAL;
 	for (i = 0; i < ibmr->sg_dma_len; ++i) {
-		unsigned int dma_len = ib_sg_dma_len(dev, &ibmr->sg[i]);
-		u64 dma_addr = ib_sg_dma_address(dev, &ibmr->sg[i]);
+		unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
+		u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
 
 		frmr->sg_byte_len += dma_len;
 		if (dma_addr & ~PAGE_MASK) {
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 2f16146e4ec9..672b91a9e207 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -346,8 +346,8 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
 	sge->length = sizeof(struct rds_header);
 
 	sge = &recv->r_sge[1];
-	sge->addr = ib_sg_dma_address(ic->i_cm_id->device, &recv->r_frag->f_sg);
-	sge->length = ib_sg_dma_len(ic->i_cm_id->device, &recv->r_frag->f_sg);
+	sge->addr = sg_dma_address(&recv->r_frag->f_sg);
+	sge->length = sg_dma_len(&recv->r_frag->f_sg);
 
 	ret = 0;
 out:
@@ -409,9 +409,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 
 		rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv,
 			 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
-			 (long) ib_sg_dma_address(
-				ic->i_cm_id->device,
-				&recv->r_frag->f_sg));
+			 (long)sg_dma_address(&recv->r_frag->f_sg));
 
 		/* XXX when can this fail? */
 		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 2dcb555e6350..dc5897a3a958 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -645,16 +645,16 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		if (i < work_alloc
 		    && scat != &rm->data.op_sg[rm->data.op_count]) {
 			len = min(RDS_FRAG_SIZE,
-				ib_sg_dma_len(dev, scat) - rm->data.op_dmaoff);
+				  sg_dma_len(scat) - rm->data.op_dmaoff);
 			send->s_wr.num_sge = 2;
 
-			send->s_sge[1].addr = ib_sg_dma_address(dev, scat);
+			send->s_sge[1].addr = sg_dma_address(scat);
 			send->s_sge[1].addr += rm->data.op_dmaoff;
 			send->s_sge[1].length = len;
 
 			bytes_sent += len;
 			rm->data.op_dmaoff += len;
-			if (rm->data.op_dmaoff == ib_sg_dma_len(dev, scat)) {
+			if (rm->data.op_dmaoff == sg_dma_len(scat)) {
 				scat++;
 				rm->data.op_dmasg++;
 				rm->data.op_dmaoff = 0;
@@ -808,8 +808,8 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 	}
 
 	/* Convert our struct scatterlist to struct ib_sge */
-	send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
-	send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
+	send->s_sge[0].addr = sg_dma_address(op->op_sg);
+	send->s_sge[0].length = sg_dma_len(op->op_sg);
 	send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
 
 	rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
@@ -921,9 +921,8 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 
 		for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
 		     scat != &op->op_sg[op->op_count]; j++) {
-			len = ib_sg_dma_len(ic->i_cm_id->device, scat);
-			send->s_sge[j].addr =
-				 ib_sg_dma_address(ic->i_cm_id->device, scat);
+			len = sg_dma_len(scat);
+			send->s_sge[j].addr = sg_dma_address(scat);
 			send->s_sge[j].length = len;
 			send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
 
-- 
cgit v1.2.3


From 219a3e8676f3132d27b530c7d2d6bcab89536b57 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Mon, 21 Jan 2019 17:59:28 +0800
Subject: integrity, KEYS: add a reference to platform keyring

commit 9dc92c45177a ("integrity: Define a trusted platform keyring")
introduced a .platform keyring for storing preboot keys, used for
verifying kernel image signatures. Currently only IMA-appraisal is able
to use the keyring to verify kernel images that have their signature
stored in xattr.

This patch exposes the .platform keyring, making it accessible for
verifying PE signed kernel images as well.

Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Kairui Song <kasong@redhat.com>
Cc: David Howells <dhowells@redhat.com>
[zohar@linux.ibm.com: fixed checkpatch errors, squashed with patch fix]
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 certs/system_keyring.c        | 10 ++++++++++
 include/keys/system_keyring.h |  8 ++++++++
 security/integrity/digsig.c   |  3 +++
 3 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 81728717523d..da055e901df4 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -24,6 +24,9 @@ static struct key *builtin_trusted_keys;
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
 static struct key *secondary_trusted_keys;
 #endif
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+static struct key *platform_trusted_keys;
+#endif
 
 extern __initconst const u8 system_certificate_list[];
 extern __initconst const unsigned long system_certificate_list_size;
@@ -266,3 +269,10 @@ error:
 EXPORT_SYMBOL_GPL(verify_pkcs7_signature);
 
 #endif /* CONFIG_SYSTEM_DATA_VERIFICATION */
+
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+void __init set_platform_trusted_keys(struct key *keyring)
+{
+	platform_trusted_keys = keyring;
+}
+#endif
diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h
index 359c2f936004..42a93eda331c 100644
--- a/include/keys/system_keyring.h
+++ b/include/keys/system_keyring.h
@@ -61,5 +61,13 @@ static inline struct key *get_ima_blacklist_keyring(void)
 }
 #endif /* CONFIG_IMA_BLACKLIST_KEYRING */
 
+#if defined(CONFIG_INTEGRITY_PLATFORM_KEYRING) && \
+	defined(CONFIG_SYSTEM_TRUSTED_KEYRING)
+extern void __init set_platform_trusted_keys(struct key *keyring);
+#else
+static inline void set_platform_trusted_keys(struct key *keyring)
+{
+}
+#endif
 
 #endif /* _KEYS_SYSTEM_KEYRING_H */
diff --git a/security/integrity/digsig.c b/security/integrity/digsig.c
index f45d6edecf99..e19c2eb72c51 100644
--- a/security/integrity/digsig.c
+++ b/security/integrity/digsig.c
@@ -87,6 +87,9 @@ static int __integrity_init_keyring(const unsigned int id, key_perm_t perm,
 		pr_info("Can't allocate %s keyring (%d)\n",
 			keyring_name[id], err);
 		keyring[id] = NULL;
+	} else {
+		if (id == INTEGRITY_KEYRING_PLATFORM)
+			set_platform_trusted_keys(keyring[id]);
 	}
 
 	return err;
-- 
cgit v1.2.3


From 278311e417be60f7caef6fcb12bda4da2711ceff Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Mon, 21 Jan 2019 17:59:29 +0800
Subject: kexec, KEYS: Make use of platform keyring for signature verify

This patch allows the kexec_file_load syscall to verify the PE signed
kernel image signature based on the preboot keys stored in the .platform
keyring, as fall back, if the signature verification failed due to not
finding the public key in the secondary or builtin keyrings.

This commit adds a VERIFY_USE_PLATFORM_KEYRING similar to previous
VERIFY_USE_SECONDARY_KEYRING indicating that verify_pkcs7_signature
should verify the signature using platform keyring.  Also, decrease
the error message log level when verification failed with -ENOKEY,
so that if called tried multiple time with different keyring it
won't generate extra noises.

Signed-off-by: Kairui Song <kasong@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Acked-by: Dave Young <dyoung@redhat.com> (for kexec_file_load part)
[zohar@linux.ibm.com: tweaked the first paragraph of the patch description,
 and fixed checkpatch warning.]
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 arch/x86/kernel/kexec-bzimage64.c | 14 +++++++++++---
 certs/system_keyring.c            | 13 ++++++++++++-
 include/linux/verification.h      |  1 +
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 278cd07228dd..e1215a600064 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -531,9 +531,17 @@ static int bzImage64_cleanup(void *loader_data)
 #ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
 static int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
 {
-	return verify_pefile_signature(kernel, kernel_len,
-				       VERIFY_USE_SECONDARY_KEYRING,
-				       VERIFYING_KEXEC_PE_SIGNATURE);
+	int ret;
+
+	ret = verify_pefile_signature(kernel, kernel_len,
+				      VERIFY_USE_SECONDARY_KEYRING,
+				      VERIFYING_KEXEC_PE_SIGNATURE);
+	if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) {
+		ret = verify_pefile_signature(kernel, kernel_len,
+					      VERIFY_USE_PLATFORM_KEYRING,
+					      VERIFYING_KEXEC_PE_SIGNATURE);
+	}
+	return ret;
 }
 #endif
 
diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index da055e901df4..c05c29ae4d5d 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -240,11 +240,22 @@ int verify_pkcs7_signature(const void *data, size_t len,
 #else
 		trusted_keys = builtin_trusted_keys;
 #endif
+	} else if (trusted_keys == VERIFY_USE_PLATFORM_KEYRING) {
+#ifdef CONFIG_INTEGRITY_PLATFORM_KEYRING
+		trusted_keys = platform_trusted_keys;
+#else
+		trusted_keys = NULL;
+#endif
+		if (!trusted_keys) {
+			ret = -ENOKEY;
+			pr_devel("PKCS#7 platform keyring is not available\n");
+			goto error;
+		}
 	}
 	ret = pkcs7_validate_trust(pkcs7, trusted_keys);
 	if (ret < 0) {
 		if (ret == -ENOKEY)
-			pr_err("PKCS#7 signature not signed with a trusted key\n");
+			pr_devel("PKCS#7 signature not signed with a trusted key\n");
 		goto error;
 	}
 
diff --git a/include/linux/verification.h b/include/linux/verification.h
index cfa4730d607a..018fb5f13d44 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -17,6 +17,7 @@
  * should be used.
  */
 #define VERIFY_USE_SECONDARY_KEYRING ((struct key *)1UL)
+#define VERIFY_USE_PLATFORM_KEYRING  ((struct key *)2UL)
 
 /*
  * The use to which an asymmetric key is being put.
-- 
cgit v1.2.3


From fdb2410f7702f25f82804a261f90ad03422bd2c3 Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.ibm.com>
Date: Tue, 22 Jan 2019 14:06:49 -0600
Subject: ima: define ima_post_create_tmpfile() hook and add missing call

If tmpfiles can be made persistent, then newly created tmpfiles need to
be treated like any other new files in policy.

This patch indicates which newly created tmpfiles are in policy, causing
the file hash to be calculated on __fput().

Reported-by: Ignaz Forster <ignaz.forster@gmx.de>
[rgoldwyn@suse.com: Call ima_post_create_tmpfile() in vfs_tmpfile() as
opposed to do_tmpfile(). This will help the case for overlayfs where
copy_up is denied while overwriting a file.]
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 fs/namei.c                        |  1 +
 include/linux/ima.h               |  5 +++++
 security/integrity/ima/ima_main.c | 35 +++++++++++++++++++++++++++++++++--
 3 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 914178cdbe94..373a7ec4b09d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3462,6 +3462,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
 		inode->i_state |= I_LINKABLE;
 		spin_unlock(&inode->i_lock);
 	}
+	ima_post_create_tmpfile(inode);
 	return child;
 
 out_err:
diff --git a/include/linux/ima.h b/include/linux/ima.h
index b5e16b8c50b7..dc12fbcf484c 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -18,6 +18,7 @@ struct linux_binprm;
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
+extern void ima_post_create_tmpfile(struct inode *inode);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_load_data(enum kernel_load_data_id id);
@@ -56,6 +57,10 @@ static inline int ima_file_check(struct file *file, int mask)
 	return 0;
 }
 
+static inline void ima_post_create_tmpfile(struct inode *inode)
+{
+}
+
 static inline void ima_file_free(struct file *file)
 {
 	return;
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 4ffac4f5c647..357edd140c09 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -396,6 +396,33 @@ int ima_file_check(struct file *file, int mask)
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
+/**
+ * ima_post_create_tmpfile - mark newly created tmpfile as new
+ * @file : newly created tmpfile
+ *
+ * No measuring, appraising or auditing of newly created tmpfiles is needed.
+ * Skip calling process_measurement(), but indicate which newly, created
+ * tmpfiles are in policy.
+ */
+void ima_post_create_tmpfile(struct inode *inode)
+{
+	struct integrity_iint_cache *iint;
+	int must_appraise;
+
+	must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK);
+	if (!must_appraise)
+		return;
+
+	/* Nothing to do if we can't allocate memory */
+	iint = integrity_inode_get(inode);
+	if (!iint)
+		return;
+
+	/* needed for writing the security xattrs */
+	set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags);
+	iint->ima_file_status = INTEGRITY_PASS;
+}
+
 /**
  * ima_post_path_mknod - mark as a new inode
  * @dentry: newly created dentry
@@ -413,9 +440,13 @@ void ima_post_path_mknod(struct dentry *dentry)
 	if (!must_appraise)
 		return;
 
+	/* Nothing to do if we can't allocate memory */
 	iint = integrity_inode_get(inode);
-	if (iint)
-		iint->flags |= IMA_NEW_FILE;
+	if (!iint)
+		return;
+
+	/* needed for re-opening empty files */
+	iint->flags |= IMA_NEW_FILE;
 }
 
 /**
-- 
cgit v1.2.3


From 668aa15b5bf87f156ec805cb7348c785c56b82ab Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Tue, 29 Jan 2019 12:08:50 +0200
Subject: RDMA/rxe: Improve loopback marking

Currently a packet is marked for loopback only if the source and
destination addresses equals. This is not enough when multiple gids are
present in rxe device's gid table and the traffic is from one gid to
another. Fix it by marking the packet for loopback if the destination MAC
address is equal to the source MAC address.

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>
Tested-by: Yuval Shaia <yuval.shaia@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/sw/rxe/rxe_av.c  | 1 +
 drivers/infiniband/sw/rxe/rxe_net.c | 9 +++------
 include/uapi/rdma/rdma_user_rxe.h   | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 27a7dec18874..81ee756c19b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -38,6 +38,7 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
 {
 	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
 	rxe_av_fill_ip_info(av, attr);
+	memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
 }
 
 int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 19f3c69916b1..3b162e92e8e8 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -384,9 +384,6 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb,
 		return -EHOSTUNREACH;
 	}
 
-	if (!memcmp(saddr, daddr, sizeof(*daddr)))
-		pkt->mask |= RXE_LOOPBACK_MASK;
-
 	prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
 			cpu_to_be16(ROCE_V2_UDP_DPORT));
 
@@ -411,9 +408,6 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb,
 		return -EHOSTUNREACH;
 	}
 
-	if (!memcmp(saddr, daddr, sizeof(*daddr)))
-		pkt->mask |= RXE_LOOPBACK_MASK;
-
 	prepare_udp_hdr(skb, cpu_to_be16(qp->src_port),
 			cpu_to_be16(ROCE_V2_UDP_DPORT));
 
@@ -437,6 +431,9 @@ int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb, u32 *crc)
 
 	*crc = rxe_icrc_hdr(pkt, skb);
 
+	if (ether_addr_equal(skb->dev->dev_addr, av->dmac))
+		pkt->mask |= RXE_LOOPBACK_MASK;
+
 	return err;
 }
 
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index 44ef6a3b7afc..aae2e696bb38 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -58,8 +58,7 @@ struct rxe_global_route {
 struct rxe_av {
 	__u8			port_num;
 	__u8			network_type;
-	__u16			reserved1;
-	__u32			reserved2;
+	__u8			dmac[6];
 	struct rxe_global_route	grh;
 	union {
 		struct sockaddr_in	_sockaddr_in;
-- 
cgit v1.2.3


From 3eb450367d0823226515ee24712ed08eccb33eb9 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Date: Tue, 23 Oct 2018 23:21:14 -0400
Subject: rds: add type of service(tos) infrastructure

RDS Service type (TOS) is user-defined and needs to be configured
via RDS IOCTL interface. It must be set before initiating any
traffic and once set the TOS can not be changed. All out-going
traffic from the socket will be associated with its TOS.

Reviewed-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
[yanjun.zhu@oracle.com: Adapted original patch with ipv6 changes]
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
 include/uapi/linux/rds.h | 11 +++++++++++
 net/rds/af_rds.c         | 35 ++++++++++++++++++++++++++++++++++-
 net/rds/connection.c     | 20 +++++++++++---------
 net/rds/ib.c             |  1 +
 net/rds/ib_cm.c          |  2 +-
 net/rds/rdma_transport.c |  1 +
 net/rds/rds.h            |  9 +++++++--
 net/rds/recv.c           |  1 +
 net/rds/send.c           |  6 +++---
 net/rds/tcp.c            |  1 +
 net/rds/tcp_listen.c     |  2 +-
 11 files changed, 72 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 8b73cb603c5f..5d0f76c780e5 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -69,6 +69,12 @@
 #define RDS_TRANS_COUNT	3
 #define	RDS_TRANS_NONE	(~0)
 
+/* IOCTLS commands for SOL_RDS */
+#define SIOCRDSSETTOS		(SIOCPROTOPRIVATE)
+#define SIOCRDSGETTOS		(SIOCPROTOPRIVATE + 1)
+
+typedef __u8	rds_tos_t;
+
 /*
  * Control message types for SOL_RDS.
  *
@@ -149,6 +155,7 @@ struct rds_info_connection {
 	__be32		faddr;
 	__u8		transport[TRANSNAMSIZ];		/* null term ascii */
 	__u8		flags;
+	__u8		tos;
 } __attribute__((packed));
 
 struct rds6_info_connection {
@@ -171,6 +178,7 @@ struct rds_info_message {
 	__be16		lport;
 	__be16		fport;
 	__u8		flags;
+	__u8		tos;
 } __attribute__((packed));
 
 struct rds6_info_message {
@@ -214,6 +222,7 @@ struct rds_info_tcp_socket {
 	__u32           last_sent_nxt;
 	__u32           last_expected_una;
 	__u32           last_seen_una;
+	__u8		tos;
 } __attribute__((packed));
 
 struct rds6_info_tcp_socket {
@@ -240,6 +249,7 @@ struct rds_info_rdma_connection {
 	__u32		max_send_sge;
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
+	__u8		tos;
 };
 
 struct rds6_info_rdma_connection {
@@ -253,6 +263,7 @@ struct rds6_info_rdma_connection {
 	__u32		max_send_sge;
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
+	__u8		tos;
 };
 
 /* RDS message Receive Path Latency points */
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 65571a6273c3..904515858037 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -254,7 +254,38 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
 
 static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
-	return -ENOIOCTLCMD;
+	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
+	rds_tos_t tos;
+
+	switch (cmd) {
+	case SIOCRDSSETTOS:
+		if (get_user(tos, (rds_tos_t __user *)arg))
+			return -EFAULT;
+
+		if (rs->rs_transport &&
+		    rs->rs_transport->t_type == RDS_TRANS_TCP)
+			tos = 0;
+
+		spin_lock_bh(&rds_sock_lock);
+		if (rs->rs_tos || rs->rs_conn) {
+			spin_unlock_bh(&rds_sock_lock);
+			return -EINVAL;
+		}
+		rs->rs_tos = tos;
+		spin_unlock_bh(&rds_sock_lock);
+		break;
+	case SIOCRDSGETTOS:
+		spin_lock_bh(&rds_sock_lock);
+		tos = rs->rs_tos;
+		spin_unlock_bh(&rds_sock_lock);
+		if (put_user(tos, (rds_tos_t __user *)arg))
+			return -EFAULT;
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return 0;
 }
 
 static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
@@ -650,6 +681,8 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
 	spin_lock_init(&rs->rs_rdma_lock);
 	rs->rs_rdma_keys = RB_ROOT;
 	rs->rs_rx_traces = 0;
+	rs->rs_tos = 0;
+	rs->rs_conn = NULL;
 
 	spin_lock_bh(&rds_sock_lock);
 	list_add_tail(&rs->rs_item, &rds_sock_list);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 1ab14b68ecc8..7ea134f9a825 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -84,7 +84,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
 					      const struct in6_addr *laddr,
 					      const struct in6_addr *faddr,
 					      struct rds_transport *trans,
-					      int dev_if)
+					      u8 tos, int dev_if)
 {
 	struct rds_connection *conn, *ret = NULL;
 
@@ -92,6 +92,7 @@ static struct rds_connection *rds_conn_lookup(struct net *net,
 		if (ipv6_addr_equal(&conn->c_faddr, faddr) &&
 		    ipv6_addr_equal(&conn->c_laddr, laddr) &&
 		    conn->c_trans == trans &&
+		    conn->c_tos == tos &&
 		    net == rds_conn_net(conn) &&
 		    conn->c_dev_if == dev_if) {
 			ret = conn;
@@ -160,7 +161,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp,
+						gfp_t gfp, u8 tos,
 						int is_outgoing,
 						int dev_if)
 {
@@ -172,7 +173,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
 
 	rcu_read_lock();
-	conn = rds_conn_lookup(net, head, laddr, faddr, trans, dev_if);
+	conn = rds_conn_lookup(net, head, laddr, faddr, trans, tos, dev_if);
 	if (conn &&
 	    conn->c_loopback &&
 	    conn->c_trans != &rds_loop_transport &&
@@ -206,6 +207,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 	conn->c_isv6 = !ipv6_addr_v4mapped(laddr);
 	conn->c_faddr = *faddr;
 	conn->c_dev_if = dev_if;
+	conn->c_tos = tos;
 
 #if IS_ENABLED(CONFIG_IPV6)
 	/* If the local address is link local, set c_bound_if to be the
@@ -298,7 +300,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		struct rds_connection *found;
 
 		found = rds_conn_lookup(net, head, laddr, faddr, trans,
-					dev_if);
+					tos, dev_if);
 		if (found) {
 			struct rds_conn_path *cp;
 			int i;
@@ -333,10 +335,10 @@ out:
 struct rds_connection *rds_conn_create(struct net *net,
 				       const struct in6_addr *laddr,
 				       const struct in6_addr *faddr,
-				       struct rds_transport *trans, gfp_t gfp,
-				       int dev_if)
+				       struct rds_transport *trans, u8 tos,
+				       gfp_t gfp, int dev_if)
 {
-	return __rds_conn_create(net, laddr, faddr, trans, gfp, 0, dev_if);
+	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
@@ -344,9 +346,9 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net,
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp, int dev_if)
+						u8 tos, gfp_t gfp, int dev_if)
 {
-	return __rds_conn_create(net, laddr, faddr, trans, gfp, 1, dev_if);
+	return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1, dev_if);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 9d7b7586f240..21b6588b71ca 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -301,6 +301,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
+	iinfo->tos = conn->c_tos;
 
 	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index a1c3ad380ec8..70518e329a9e 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -786,7 +786,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 
 	/* RDS/IB is not currently netns aware, thus init_net */
 	conn = rds_conn_create(&init_net, daddr6, saddr6,
-			       &rds_ib_transport, GFP_KERNEL, ifindex);
+			       &rds_ib_transport, 0, GFP_KERNEL, ifindex);
 	if (IS_ERR(conn)) {
 		rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
 		conn = NULL;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 63cbc6b8560c..e37f91537d29 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -115,6 +115,7 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 			pr_warn("RDS/RDMA: conn <%pI6c, %pI6c> rejected, dropping connection\n",
 				&conn->c_laddr, &conn->c_faddr);
 			conn->c_proposed_version = RDS_PROTOCOL_COMPAT_VERSION;
+			conn->c_tos = 0;
 			rds_conn_drop(conn);
 		}
 		rdsdebug("Connection rejected: %s\n",
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 660023f08553..7e52b92092d7 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -158,6 +158,9 @@ struct rds_connection {
 	unsigned int		c_version;
 	possible_net_t		c_net;
 
+	/* TOS */
+	u8			c_tos;
+
 	struct list_head	c_map_item;
 	unsigned long		c_map_queued;
 
@@ -652,6 +655,7 @@ struct rds_sock {
 	u8			rs_rx_traces;
 	u8			rs_rx_trace[RDS_MSG_RX_DGRAM_TRACE_MAX];
 	struct rds_msg_zcopy_queue rs_zcookie_queue;
+	u8			rs_tos;
 };
 
 static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
@@ -760,13 +764,14 @@ void rds_conn_exit(void);
 struct rds_connection *rds_conn_create(struct net *net,
 				       const struct in6_addr *laddr,
 				       const struct in6_addr *faddr,
-				       struct rds_transport *trans, gfp_t gfp,
+				       struct rds_transport *trans,
+				       u8 tos, gfp_t gfp,
 				       int dev_if);
 struct rds_connection *rds_conn_create_outgoing(struct net *net,
 						const struct in6_addr *laddr,
 						const struct in6_addr *faddr,
 						struct rds_transport *trans,
-						gfp_t gfp, int dev_if);
+						u8 tos, gfp_t gfp, int dev_if);
 void rds_conn_shutdown(struct rds_conn_path *cpath);
 void rds_conn_destroy(struct rds_connection *conn);
 void rds_conn_drop(struct rds_connection *conn);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 6bb6b16ca270..853de4876088 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -782,6 +782,7 @@ void rds_inc_info_copy(struct rds_incoming *inc,
 
 	minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
 	minfo.len = be32_to_cpu(inc->i_hdr.h_len);
+	minfo.tos = inc->i_conn->c_tos;
 
 	if (flip) {
 		minfo.laddr = daddr;
diff --git a/net/rds/send.c b/net/rds/send.c
index fd8b687d5c05..c555e121b908 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1277,12 +1277,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
 	/* rds_conn_create has a spinlock that runs with IRQ off.
 	 * Caching the conn in the socket helps a lot. */
-	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr))
+	if (rs->rs_conn && ipv6_addr_equal(&rs->rs_conn->c_faddr, &daddr)) {
 		conn = rs->rs_conn;
-	else {
+	} else {
 		conn = rds_conn_create_outgoing(sock_net(sock->sk),
 						&rs->rs_bound_addr, &daddr,
-						rs->rs_transport,
+						rs->rs_transport, 0,
 						sock->sk->sk_allocation,
 						scope_id);
 		if (IS_ERR(conn)) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c16f0a362c32..eb6851952cbf 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -267,6 +267,7 @@ static void rds_tcp_tc_info(struct socket *rds_sock, unsigned int len,
 		tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
 		tsinfo.last_expected_una = tc->t_last_expected_una;
 		tsinfo.last_seen_una = tc->t_last_seen_una;
+		tsinfo.tos = tc->t_cpath->cp_conn->c_tos;
 
 		rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
 	}
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c12203f646da..810a3a49e947 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -200,7 +200,7 @@ int rds_tcp_accept_one(struct socket *sock)
 
 	conn = rds_conn_create(sock_net(sock->sk),
 			       my_addr, peer_addr,
-			       &rds_tcp_transport, GFP_KERNEL, dev_if);
+			       &rds_tcp_transport, 0, GFP_KERNEL, dev_if);
 
 	if (IS_ERR(conn)) {
 		ret = PTR_ERR(conn);
-- 
cgit v1.2.3


From f76903d574b26bc596951a5c5e757eb02c67abbd Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 29 Jan 2019 13:33:11 -0800
Subject: RDMA/IWPM: refactor the IWPM message attribute names

In order to add new IWPM_NL attributes, the enums for the IWPM commands
attributes are refactored such that a new attribute can be added without
breaking ABI version 3. Instead of sharing nl attribute enums for both
request and response messages, we create separate enums for each IWPM
message request and reply.  This allows us to extend any given IWPM
message by adding new attributes for just that message.  These new enums
are created, though, in a way to avoid breaking ABI version 3.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/iwpm_msg.c | 40 ++++++++++++++++++++++----------------
 include/uapi/rdma/rdma_netlink.h   | 21 +++++++++++++++++---
 2 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 8861c052155a..3a8753595c8f 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -403,10 +403,12 @@ register_pid_response_exit:
 
 /* netlink attribute policy for the received response to add mapping request */
 static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
-	[IWPM_NLA_MANAGE_MAPPING_SEQ]     = { .type = NLA_U32 },
-	[IWPM_NLA_MANAGE_ADDR]            = { .len = sizeof(struct sockaddr_storage) },
-	[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
-	[IWPM_NLA_RMANAGE_MAPPING_ERR]	  = { .type = NLA_U16 }
+	[IWPM_NLA_RMANAGE_MAPPING_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_RMANAGE_ADDR]            = {
+				.len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR] = {
+				.len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RMANAGE_MAPPING_ERR]	   = { .type = NLA_U16 }
 };
 
 /*
@@ -430,7 +432,7 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
 
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 
-	msg_seq = nla_get_u32(nltb[IWPM_NLA_MANAGE_MAPPING_SEQ]);
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_RMANAGE_MAPPING_SEQ]);
 	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
 	if (!nlmsg_request) {
 		pr_info("%s: Could not find a matching request (seq = %u)\n",
@@ -439,9 +441,9 @@ int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 	pm_msg = nlmsg_request->req_buffer;
 	local_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_MANAGE_ADDR]);
+			nla_data(nltb[IWPM_NLA_RMANAGE_ADDR]);
 	mapped_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_MANAGE_MAPPED_LOC_ADDR]);
+			nla_data(nltb[IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR]);
 
 	if (iwpm_compare_sockaddr(local_sockaddr, &pm_msg->loc_addr)) {
 		nlmsg_request->err_code = IWPM_USER_LIB_INFO_ERR;
@@ -472,11 +474,15 @@ add_mapping_response_exit:
 /* netlink attribute policy for the response to add and query mapping request
  * and response with remote address info */
 static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] = {
-	[IWPM_NLA_QUERY_MAPPING_SEQ]      = { .type = NLA_U32 },
-	[IWPM_NLA_QUERY_LOCAL_ADDR]       = { .len = sizeof(struct sockaddr_storage) },
-	[IWPM_NLA_QUERY_REMOTE_ADDR]      = { .len = sizeof(struct sockaddr_storage) },
-	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = { .len = sizeof(struct sockaddr_storage) },
-	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = { .len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPING_SEQ]     = { .type = NLA_U32 },
+	[IWPM_NLA_RQUERY_LOCAL_ADDR]      = {
+				.len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_REMOTE_ADDR]     = {
+				.len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR] = {
+				.len = sizeof(struct sockaddr_storage) },
+	[IWPM_NLA_RQUERY_MAPPED_REM_ADDR] = {
+				.len = sizeof(struct sockaddr_storage) },
 	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 }
 };
 
@@ -502,7 +508,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 		return -EINVAL;
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 
-	msg_seq = nla_get_u32(nltb[IWPM_NLA_QUERY_MAPPING_SEQ]);
+	msg_seq = nla_get_u32(nltb[IWPM_NLA_RQUERY_MAPPING_SEQ]);
 	nlmsg_request = iwpm_find_nlmsg_request(msg_seq);
 	if (!nlmsg_request) {
 		pr_info("%s: Could not find a matching request (seq = %u)\n",
@@ -511,9 +517,9 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 	}
 	pm_msg = nlmsg_request->req_buffer;
 	local_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+			nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
 	remote_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+			nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
 	mapped_loc_sockaddr = (struct sockaddr_storage *)
 			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
 	mapped_rem_sockaddr = (struct sockaddr_storage *)
@@ -588,9 +594,9 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 
 	local_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_QUERY_LOCAL_ADDR]);
+			nla_data(nltb[IWPM_NLA_RQUERY_LOCAL_ADDR]);
 	remote_sockaddr = (struct sockaddr_storage *)
-			nla_data(nltb[IWPM_NLA_QUERY_REMOTE_ADDR]);
+			nla_data(nltb[IWPM_NLA_RQUERY_REMOTE_ADDR]);
 	mapped_loc_sockaddr = (struct sockaddr_storage *)
 			nla_data(nltb[IWPM_NLA_RQUERY_MAPPED_LOC_ADDR]);
 	mapped_rem_sockaddr = (struct sockaddr_storage *)
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 2e18b77a817f..42d53e182d5f 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -83,13 +83,20 @@ enum {
 	IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
 	IWPM_NLA_MANAGE_MAPPING_SEQ,
 	IWPM_NLA_MANAGE_ADDR,
-	IWPM_NLA_MANAGE_MAPPED_LOC_ADDR,
+	IWPM_NLA_MANAGE_MAPPING_MAX
+};
+
+enum {
+	IWPM_NLA_RMANAGE_MAPPING_UNSPEC = 0,
+	IWPM_NLA_RMANAGE_MAPPING_SEQ,
+	IWPM_NLA_RMANAGE_ADDR,
+	IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR,
+	/* The following maintains bisectability of rdma-core */
+	IWPM_NLA_MANAGE_MAPPED_LOC_ADDR = IWPM_NLA_RMANAGE_MAPPED_LOC_ADDR,
 	IWPM_NLA_RMANAGE_MAPPING_ERR,
 	IWPM_NLA_RMANAGE_MAPPING_MAX
 };
 
-#define IWPM_NLA_MANAGE_MAPPING_MAX 3
-#define IWPM_NLA_QUERY_MAPPING_MAX  4
 #define IWPM_NLA_MAPINFO_SEND_MAX   3
 
 enum {
@@ -97,6 +104,14 @@ enum {
 	IWPM_NLA_QUERY_MAPPING_SEQ,
 	IWPM_NLA_QUERY_LOCAL_ADDR,
 	IWPM_NLA_QUERY_REMOTE_ADDR,
+	IWPM_NLA_QUERY_MAPPING_MAX,
+};
+
+enum {
+	IWPM_NLA_RQUERY_MAPPING_UNSPEC = 0,
+	IWPM_NLA_RQUERY_MAPPING_SEQ,
+	IWPM_NLA_RQUERY_LOCAL_ADDR,
+	IWPM_NLA_RQUERY_REMOTE_ADDR,
 	IWPM_NLA_RQUERY_MAPPED_LOC_ADDR,
 	IWPM_NLA_RQUERY_MAPPED_REM_ADDR,
 	IWPM_NLA_RQUERY_MAPPING_ERR,
-- 
cgit v1.2.3


From b0bad9ad514fc1dd8890f1749f5d2425a73270e3 Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 29 Jan 2019 13:33:16 -0800
Subject: RDMA/IWPM: Support no port mapping requirements

A soft iwarp driver that uses the host TCP stack via a kernel mode socket
does not need port mapping.  In fact, if the port map daemon, iwpmd, is
running, then iwpmd must not try and create/bind a socket to the actual
port for a soft iwarp connection, since the driver already has that socket
bound.

Yet if the soft iwarp driver wants to interoperate with hard iwarp devices
that -are- using port mapping, then the soft iwarp driver's mappings still
need to be maintained and advertised by the iwpm protocol.

This patch enhances the rdma driver<->iwcm interface to allow an iwarp
driver to specify that it does not want port mapping.  The iwpm
kernel<->iwpmd interface is also enhanced to pass up this information on
map requests.

Care is taken to interoperate with the current iwpmd version (ABI version
3) and only use the new NL attributes if iwpmd supports ABI version 4.

The ABI version define has also been created in rdma_netlink.h so both
kernel and user code can share it.  The iwcm and iwpmd negotiate the ABI
version to use with a new HELLO netlink message.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Tatyana Nikolova <Tatyana.E.Nikolova@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/iwcm.c      |  7 +++-
 drivers/infiniband/core/iwpm_msg.c  | 80 +++++++++++++++++++++++++++++++++++--
 drivers/infiniband/core/iwpm_util.c | 48 +++++++++++++++++++++-
 drivers/infiniband/core/iwpm_util.h | 12 ++++++
 include/rdma/iw_cm.h                | 13 ++++++
 include/rdma/iw_portmap.h           | 15 ++++++-
 include/uapi/rdma/rdma_netlink.h    | 24 +++++++++++
 7 files changed, 192 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 476abc74178e..350ea2bab78a 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -87,7 +87,8 @@ static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = {
 	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
 	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
 	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
-	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb},
+	[RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb}
 };
 
 static struct workqueue_struct *iwcm_wq;
@@ -525,6 +526,8 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
 	cm_id->mapped = true;
 	pm_msg.loc_addr = cm_id->local_addr;
 	pm_msg.rem_addr = cm_id->remote_addr;
+	pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ?
+		       IWPM_FLAGS_NO_PORT_MAP : 0;
 	if (active)
 		status = iwpm_add_and_query_mapping(&pm_msg,
 						    RDMA_NL_IWCM);
@@ -543,7 +546,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
 
 	return iwpm_create_mapinfo(&cm_id->local_addr,
 				   &cm_id->m_local_addr,
-				   RDMA_NL_IWCM);
+				   RDMA_NL_IWCM, pm_msg.flags);
 }
 
 /*
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 3a8753595c8f..2e30e65b0816 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -34,7 +34,7 @@
 #include "iwpm_util.h"
 
 static const char iwpm_ulib_name[IWPM_ULIBNAME_SIZE] = "iWarpPortMapperUser";
-static int iwpm_ulib_version = 3;
+u16 iwpm_ulib_version = IWPM_UABI_VERSION_MIN;
 static int iwpm_user_pid = IWPM_PID_UNDEFINED;
 static atomic_t echo_nlmsg_seq;
 
@@ -130,6 +130,7 @@ pid_query_error:
  * nlmsg attributes:
  *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
  *	[IWPM_NLA_MANAGE_ADDR]
+ *	[IWPM_NLA_MANAGE_FLAGS]
  */
 int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 {
@@ -173,6 +174,18 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	if (ret)
 		goto add_mapping_error;
 
+	/* If flags are required and we're not V4, then return a quiet error */
+	if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
+		ret = -EINVAL;
+		goto add_mapping_error_nowarn;
+	}
+	if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+		ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
+				IWPM_NLA_MANAGE_FLAGS);
+		if (ret)
+			goto add_mapping_error;
+	}
+
 	nlmsg_end(skb, nlh);
 	nlmsg_request->req_buffer = pm_msg;
 
@@ -187,6 +200,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	return ret;
 add_mapping_error:
 	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+add_mapping_error_nowarn:
 	if (skb)
 		dev_kfree_skb(skb);
 	if (nlmsg_request)
@@ -201,6 +215,7 @@ add_mapping_error:
  *	[IWPM_NLA_QUERY_MAPPING_SEQ]
  *	[IWPM_NLA_QUERY_LOCAL_ADDR]
  *	[IWPM_NLA_QUERY_REMOTE_ADDR]
+ *	[IWPM_NLA_QUERY_FLAGS]
  */
 int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 {
@@ -251,6 +266,18 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	if (ret)
 		goto query_mapping_error;
 
+	/* If flags are required and we're not V4, then return a quite error */
+	if (pm_msg->flags && iwpm_ulib_version == IWPM_UABI_VERSION_MIN) {
+		ret = -EINVAL;
+		goto query_mapping_error_nowarn;
+	}
+	if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+		ret = ibnl_put_attr(skb, nlh, sizeof(u32), &pm_msg->flags,
+				IWPM_NLA_QUERY_FLAGS);
+		if (ret)
+			goto query_mapping_error;
+	}
+
 	nlmsg_end(skb, nlh);
 	nlmsg_request->req_buffer = pm_msg;
 
@@ -264,6 +291,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	return ret;
 query_mapping_error:
 	pr_info("%s: %s (client = %d)\n", __func__, err_str, nl_client);
+query_mapping_error_nowarn:
 	if (skb)
 		dev_kfree_skb(skb);
 	if (nlmsg_request)
@@ -379,7 +407,7 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	/* check device name, ulib name and version */
 	if (strcmp(pm_msg->dev_name, dev_name) ||
 			strcmp(iwpm_ulib_name, iwpm_name) ||
-			iwpm_version != iwpm_ulib_version) {
+			iwpm_version < IWPM_UABI_VERSION_MIN) {
 
 		pr_info("%s: Incorrect info (dev = %s name = %s version = %d)\n",
 				__func__, dev_name, iwpm_name, iwpm_version);
@@ -387,6 +415,10 @@ int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
 		goto register_pid_response_exit;
 	}
 	iwpm_user_pid = cb->nlh->nlmsg_pid;
+	iwpm_ulib_version = iwpm_version;
+	if (iwpm_ulib_version < IWPM_UABI_VERSION)
+		pr_warn_once("%s: Down level iwpmd/pid %u.  Continuing...",
+			__func__, iwpm_user_pid);
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
 			__func__, iwpm_user_pid);
@@ -661,7 +693,7 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	iwpm_name = (char *)nla_data(nltb[IWPM_NLA_MAPINFO_ULIB_NAME]);
 	iwpm_version = nla_get_u16(nltb[IWPM_NLA_MAPINFO_ULIB_VER]);
 	if (strcmp(iwpm_ulib_name, iwpm_name) ||
-			iwpm_version != iwpm_ulib_version) {
+			iwpm_version < IWPM_UABI_VERSION_MIN) {
 		pr_info("%s: Invalid port mapper name = %s version = %d\n",
 				__func__, iwpm_name, iwpm_version);
 		return ret;
@@ -675,6 +707,11 @@ int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
 	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
 	iwpm_user_pid = cb->nlh->nlmsg_pid;
+
+	if (iwpm_ulib_version < IWPM_UABI_VERSION)
+		pr_warn_once("%s: Down level iwpmd/pid %u.  Continuing...",
+			__func__, iwpm_user_pid);
+
 	if (!iwpm_mapinfo_available())
 		return 0;
 	pr_debug("%s: iWarp Port Mapper (pid = %d) is available!\n",
@@ -754,3 +791,40 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	up(&nlmsg_request->sem);
 	return 0;
 }
+
+/* netlink attribute policy for the received hello request */
+static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
+	[IWPM_NLA_HELLO_ABI_VERSION]     = { .type = NLA_U16 }
+};
+
+/*
+ * iwpm_hello_cb - Process a port mapper hello request
+ */
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nlattr *nltb[IWPM_NLA_HELLO_MAX];
+	const char *msg_type = "Hello request";
+	u8 nl_client;
+	u16 abi_version;
+	int ret = -EINVAL;
+
+	if (iwpm_parse_nlmsg(cb, IWPM_NLA_HELLO_MAX, hello_policy, nltb,
+			     msg_type)) {
+		pr_info("%s: Unable to parse nlmsg\n", __func__);
+		return ret;
+	}
+	abi_version = nla_get_u16(nltb[IWPM_NLA_HELLO_ABI_VERSION]);
+	nl_client = RDMA_NL_GET_CLIENT(cb->nlh->nlmsg_type);
+	if (!iwpm_valid_client(nl_client)) {
+		pr_info("%s: Invalid port mapper client = %d\n",
+				__func__, nl_client);
+		return ret;
+	}
+	iwpm_set_registration(nl_client, IWPM_REG_INCOMPL);
+	atomic_set(&echo_nlmsg_seq, cb->nlh->nlmsg_seq);
+	iwpm_ulib_version = min_t(u16, IWPM_UABI_VERSION, abi_version);
+	pr_debug("Using ABI version %u\n", iwpm_ulib_version);
+	iwpm_user_pid = cb->nlh->nlmsg_pid;
+	ret = iwpm_send_hello(nl_client, iwpm_user_pid, iwpm_ulib_version);
+	return ret;
+}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index cdb63f3f4de7..363938435476 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -114,7 +114,7 @@ static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
 
 int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 			struct sockaddr_storage *mapped_sockaddr,
-			u8 nl_client)
+			u8 nl_client, u32 map_flags)
 {
 	struct hlist_head *hash_bucket_head = NULL;
 	struct iwpm_mapping_info *map_info;
@@ -132,6 +132,7 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 	memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
 	       sizeof(struct sockaddr_storage));
 	map_info->nl_client = nl_client;
+	map_info->map_flags = map_flags;
 
 	spin_lock_irqsave(&iwpm_mapinfo_lock, flags);
 	if (iwpm_hash_bucket) {
@@ -686,6 +687,14 @@ int iwpm_send_mapinfo(u8 nl_client, int iwpm_pid)
 			if (ret)
 				goto send_mapping_info_unlock;
 
+			if (iwpm_ulib_version > IWPM_UABI_VERSION_MIN) {
+				ret = ibnl_put_attr(skb, nlh, sizeof(u32),
+						&map_info->map_flags,
+						IWPM_NLA_MAPINFO_FLAGS);
+				if (ret)
+					goto send_mapping_info_unlock;
+			}
+
 			nlmsg_end(skb, nlh);
 
 			iwpm_print_sockaddr(&map_info->local_sockaddr,
@@ -754,3 +763,40 @@ int iwpm_mapinfo_available(void)
 	spin_unlock_irqrestore(&iwpm_mapinfo_lock, flags);
 	return full_bucket;
 }
+
+int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
+{
+	struct sk_buff *skb = NULL;
+	struct nlmsghdr *nlh;
+	u32 msg_seq;
+	const char *err_str = "";
+	int ret = -EINVAL;
+
+	skb = iwpm_create_nlmsg(RDMA_NL_IWPM_HELLO, &nlh, nl_client);
+	if (!skb) {
+		err_str = "Unable to create a nlmsg";
+		goto hello_num_error;
+	}
+	nlh->nlmsg_seq = iwpm_get_nlmsg_seq();
+	msg_seq = 0;
+	err_str = "Unable to put attribute of abi_version into nlmsg";
+	ret = ibnl_put_attr(skb, nlh, sizeof(u16), &abi_version,
+			    IWPM_NLA_HELLO_ABI_VERSION);
+	if (ret)
+		goto hello_num_error;
+	nlmsg_end(skb, nlh);
+
+	ret = rdma_nl_unicast(skb, iwpm_pid);
+	if (ret) {
+		skb = NULL;
+		err_str = "Unable to send a nlmsg";
+		goto hello_num_error;
+	}
+	pr_debug("%s: Sent hello abi_version = %u\n", __func__, abi_version);
+	return 0;
+hello_num_error:
+	pr_info("%s: %s\n", __func__, err_str);
+	if (skb)
+		dev_kfree_skb(skb);
+	return ret;
+}
diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h
index af1fc14a0d3d..7e2bcc72f66c 100644
--- a/drivers/infiniband/core/iwpm_util.h
+++ b/drivers/infiniband/core/iwpm_util.h
@@ -78,6 +78,7 @@ struct iwpm_mapping_info {
 	struct sockaddr_storage local_sockaddr;
 	struct sockaddr_storage mapped_sockaddr;
 	u8     nl_client;
+	u32    map_flags;
 };
 
 struct iwpm_remote_info {
@@ -266,4 +267,15 @@ int iwpm_parse_nlmsg(struct netlink_callback *cb, int policy_max,
  * @msg: Message to print
  */
 void iwpm_print_sockaddr(struct sockaddr_storage *sockaddr, char *msg);
+
+/**
+ * iwpm_send_hello - Send hello response to iwpmd
+ *
+ * @nl_client: The index of the netlink client
+ * @abi_version: The kernel's abi_version
+ *
+ * Returns 0 on success or a negative error code
+ */
+int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version);
+extern u16 iwpm_ulib_version;
 #endif
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 5cd7701db148..48512abd3162 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -105,6 +105,18 @@ struct iw_cm_conn_param {
 	u32 qpn;
 };
 
+enum iw_flags {
+
+	/*
+	 * This flag allows the iwcm and iwpmd to still advertise
+	 * mappings but the real and mapped port numbers are the
+	 * same.  Further, iwpmd will not bind any user socket to
+	 * reserve the port.  This is required for soft iwarp
+	 * to play in the port mapped iwarp space.
+	 */
+	IW_F_NO_PORT_MAP = (1 << 0),
+};
+
 struct iw_cm_verbs {
 	void		(*add_ref)(struct ib_qp *qp);
 
@@ -127,6 +139,7 @@ struct iw_cm_verbs {
 
 	int		(*destroy_listen)(struct iw_cm_id *cm_id);
 	char		ifname[IFNAMSIZ];
+	enum iw_flags	driver_flags;
 };
 
 /**
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index fda31673a562..84fac196ef80 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -58,6 +58,7 @@ struct iwpm_sa_data {
 	struct sockaddr_storage mapped_loc_addr;
 	struct sockaddr_storage rem_addr;
 	struct sockaddr_storage mapped_rem_addr;
+	u32 flags;
 };
 
 /**
@@ -205,9 +206,11 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
  * @local_addr: Local ip/tcp address
  * @mapped_addr: Mapped local ip/tcp address
  * @nl_client: The index of the netlink client
+ * @map_flags: IWPM mapping flags
  */
 int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
-			struct sockaddr_storage *mapped_addr, u8 nl_client);
+			struct sockaddr_storage *mapped_addr, u8 nl_client,
+			u32 map_flags);
 
 /**
  * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
@@ -221,4 +224,14 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
 int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
 			struct sockaddr_storage *mapped_addr);
 
+/**
+ * iwpm_hello_cb - Process a hello message from iwpmd
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send the kernel's abi_version
+ * after adjusting it to support the iwpmd version.
+ */
+int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb);
 #endif /* _IW_PORTMAP_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 42d53e182d5f..0f5263767fb4 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -35,6 +35,19 @@ enum {
 	RDMA_NL_RDMA_CM_NUM_ATTR,
 };
 
+/* The minimum version that the iwpm kernel supports */
+#define IWPM_UABI_VERSION_MIN	3
+
+/* The latest version that the iwpm kernel supports */
+#define IWPM_UABI_VERSION	4
+
+/* iwarp port mapper message flags */
+enum {
+
+	/* Do not map the port for this IWPM request */
+	IWPM_FLAGS_NO_PORT_MAP = (1 << 0),
+};
+
 /* iwarp port mapper op-codes */
 enum {
 	RDMA_NL_IWPM_REG_PID = 0,
@@ -45,6 +58,7 @@ enum {
 	RDMA_NL_IWPM_HANDLE_ERR,
 	RDMA_NL_IWPM_MAPINFO,
 	RDMA_NL_IWPM_MAPINFO_NUM,
+	RDMA_NL_IWPM_HELLO,
 	RDMA_NL_IWPM_NUM_OPS
 };
 
@@ -83,6 +97,7 @@ enum {
 	IWPM_NLA_MANAGE_MAPPING_UNSPEC = 0,
 	IWPM_NLA_MANAGE_MAPPING_SEQ,
 	IWPM_NLA_MANAGE_ADDR,
+	IWPM_NLA_MANAGE_FLAGS,
 	IWPM_NLA_MANAGE_MAPPING_MAX
 };
 
@@ -98,12 +113,14 @@ enum {
 };
 
 #define IWPM_NLA_MAPINFO_SEND_MAX   3
+#define IWPM_NLA_REMOVE_MAPPING_MAX 3
 
 enum {
 	IWPM_NLA_QUERY_MAPPING_UNSPEC = 0,
 	IWPM_NLA_QUERY_MAPPING_SEQ,
 	IWPM_NLA_QUERY_LOCAL_ADDR,
 	IWPM_NLA_QUERY_REMOTE_ADDR,
+	IWPM_NLA_QUERY_FLAGS,
 	IWPM_NLA_QUERY_MAPPING_MAX,
 };
 
@@ -129,6 +146,7 @@ enum {
 	IWPM_NLA_MAPINFO_UNSPEC = 0,
 	IWPM_NLA_MAPINFO_LOCAL_ADDR,
 	IWPM_NLA_MAPINFO_MAPPED_ADDR,
+	IWPM_NLA_MAPINFO_FLAGS,
 	IWPM_NLA_MAPINFO_MAX
 };
 
@@ -147,6 +165,12 @@ enum {
 	IWPM_NLA_ERR_MAX
 };
 
+enum {
+	IWPM_NLA_HELLO_UNSPEC = 0,
+	IWPM_NLA_HELLO_ABI_VERSION,
+	IWPM_NLA_HELLO_MAX
+};
+
 /*
  * Local service operations:
  *   RESOLVE - The client requests the local service to resolve a path.
-- 
cgit v1.2.3


From f80d2f0846b7b9ceb1f2a5951229ee4391edaebd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 25 Jan 2019 10:34:43 -0800
Subject: scsi: target/core: Remove the write_pending_status() callback
 function

Due to the patch that makes TMF handling synchronous the
write_pending_status() callback function is no longer called.  Hence remove
it.

Acked-by: Felipe Balbi <balbi@ti.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Himanshu Madhani <himanshu.madhani@qlogic.com>
Cc: Quinn Tran <quinn.tran@qlogic.com>
Cc: Saurav Kashyap <saurav.kashyap@qlogic.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/target/tcm_mod_builder.py      |  8 -------
 drivers/infiniband/ulp/srpt/ib_srpt.c        |  9 --------
 drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c     |  6 ------
 drivers/scsi/qla2xxx/tcm_qla2xxx.c           | 31 ----------------------------
 drivers/target/iscsi/iscsi_target_configfs.c | 13 ------------
 drivers/target/loopback/tcm_loop.c           |  6 ------
 drivers/target/sbp/sbp_target.c              |  6 ------
 drivers/target/target_core_configfs.c        |  4 ----
 drivers/target/target_core_xcopy.c           |  6 ------
 drivers/target/tcm_fc/tcm_fc.h               |  1 -
 drivers/target/tcm_fc/tfc_cmd.c              |  7 -------
 drivers/target/tcm_fc/tfc_conf.c             |  1 -
 drivers/usb/gadget/function/f_tcm.c          |  9 --------
 drivers/vhost/scsi.c                         |  6 ------
 drivers/xen/xen-scsiback.c                   |  6 ------
 include/target/target_core_fabric.h          |  1 -
 16 files changed, 120 deletions(-)

(limited to 'include')

diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
index 94bf6944bb1e..95d6e31f1e3a 100755
--- a/Documentation/target/tcm_mod_builder.py
+++ b/Documentation/target/tcm_mod_builder.py
@@ -297,7 +297,6 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
 	buf += "	.sess_get_index			= " + fabric_mod_name + "_sess_get_index,\n"
 	buf += "	.sess_get_initiator_sid		= NULL,\n"
 	buf += "	.write_pending			= " + fabric_mod_name + "_write_pending,\n"
-	buf += "	.write_pending_status		= " + fabric_mod_name + "_write_pending_status,\n"
 	buf += "	.set_default_node_attributes	= " + fabric_mod_name + "_set_default_node_attrs,\n"
 	buf += "	.get_cmd_state			= " + fabric_mod_name + "_get_cmd_state,\n"
 	buf += "	.queue_data_in			= " + fabric_mod_name + "_queue_data_in,\n"
@@ -479,13 +478,6 @@ def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name):
 			buf += "}\n\n"
 			bufi += "int " + fabric_mod_name + "_write_pending(struct se_cmd *);\n"
 
-		if re.search('write_pending_status\)\(', fo):
-			buf += "int " + fabric_mod_name + "_write_pending_status(struct se_cmd *se_cmd)\n"
-			buf += "{\n"
-			buf += "	return 0;\n"
-			buf += "}\n\n"
-			bufi += "int " + fabric_mod_name + "_write_pending_status(struct se_cmd *);\n"
-
 		if re.search('set_default_node_attributes\)\(', fo):
 			buf += "void " + fabric_mod_name + "_set_default_node_attrs(struct se_node_acl *nacl)\n"
 			buf += "{\n"
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e9c336cff8f5..8cee8c6c6be9 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -2703,14 +2703,6 @@ static int srpt_rdma_cm_handler(struct rdma_cm_id *cm_id,
 	return ret;
 }
 
-static int srpt_write_pending_status(struct se_cmd *se_cmd)
-{
-	struct srpt_send_ioctx *ioctx;
-
-	ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
-	return ioctx->state == SRPT_STATE_NEED_DATA;
-}
-
 /*
  * srpt_write_pending - Start data transfer from initiator to target (write).
  */
@@ -3806,7 +3798,6 @@ static const struct target_core_fabric_ops srpt_template = {
 	.sess_get_index			= srpt_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= srpt_write_pending,
-	.write_pending_status		= srpt_write_pending_status,
 	.set_default_node_attributes	= srpt_set_default_node_attrs,
 	.get_cmd_state			= srpt_get_tcm_cmd_state,
 	.queue_data_in			= srpt_queue_data_in,
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index cc9cae469c4b..7ca277e28d63 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3788,11 +3788,6 @@ static int ibmvscsis_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int ibmvscsis_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static void ibmvscsis_set_default_node_attrs(struct se_node_acl *nacl)
 {
 }
@@ -4053,7 +4048,6 @@ static const struct target_core_fabric_ops ibmvscsis_ops = {
 	.release_cmd			= ibmvscsis_release_cmd,
 	.sess_get_index			= ibmvscsis_sess_get_index,
 	.write_pending			= ibmvscsis_write_pending,
-	.write_pending_status		= ibmvscsis_write_pending_status,
 	.set_default_node_attributes	= ibmvscsis_set_default_node_attrs,
 	.get_cmd_state			= ibmvscsis_get_cmd_state,
 	.queue_data_in			= ibmvscsis_queue_data_in,
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 283e6b80abb5..8a3075d17c63 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -420,26 +420,6 @@ static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 	return qlt_rdy_to_xfer(cmd);
 }
 
-static int tcm_qla2xxx_write_pending_status(struct se_cmd *se_cmd)
-{
-	unsigned long flags;
-	/*
-	 * Check for WRITE_PENDING status to determine if we need to wait for
-	 * CTIO aborts to be posted via hardware in tcm_qla2xxx_handle_data().
-	 */
-	spin_lock_irqsave(&se_cmd->t_state_lock, flags);
-	if (se_cmd->t_state == TRANSPORT_WRITE_PENDING ||
-	    se_cmd->t_state == TRANSPORT_COMPLETE_QF_WP) {
-		spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
-		wait_for_completion_timeout(&se_cmd->t_transport_stop_comp,
-						50);
-		return 0;
-	}
-	spin_unlock_irqrestore(&se_cmd->t_state_lock, flags);
-
-	return 0;
-}
-
 static void tcm_qla2xxx_set_default_node_attrs(struct se_node_acl *nacl)
 {
 	return;
@@ -537,15 +517,6 @@ static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
 
 	cmd->qpair->tgt_counters.qla_core_ret_ctio++;
 	if (!cmd->write_data_transferred) {
-		/*
-		 * Check if se_cmd has already been aborted via LUN_RESET, and
-		 * waiting upon completion in tcm_qla2xxx_write_pending_status()
-		 */
-		if (cmd->se_cmd.transport_state & CMD_T_ABORTED) {
-			complete(&cmd->se_cmd.t_transport_stop_comp);
-			return;
-		}
-
 		switch (cmd->dif_err_code) {
 		case DIF_ERR_GRD:
 			cmd->se_cmd.pi_err =
@@ -1902,7 +1873,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
 	.sess_get_index			= tcm_qla2xxx_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= tcm_qla2xxx_write_pending,
-	.write_pending_status		= tcm_qla2xxx_write_pending_status,
 	.set_default_node_attributes	= tcm_qla2xxx_set_default_node_attrs,
 	.get_cmd_state			= tcm_qla2xxx_get_cmd_state,
 	.queue_data_in			= tcm_qla2xxx_queue_data_in,
@@ -1943,7 +1913,6 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
 	.sess_get_index			= tcm_qla2xxx_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= tcm_qla2xxx_write_pending,
-	.write_pending_status		= tcm_qla2xxx_write_pending_status,
 	.set_default_node_attributes	= tcm_qla2xxx_set_default_node_attrs,
 	.get_cmd_state			= tcm_qla2xxx_get_cmd_state,
 	.queue_data_in			= tcm_qla2xxx_queue_data_in,
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index a5481dfeae8d..cac94c94ef5d 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -1389,18 +1389,6 @@ static int lio_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int lio_write_pending_status(struct se_cmd *se_cmd)
-{
-	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
-	int ret;
-
-	spin_lock_bh(&cmd->istate_lock);
-	ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT);
-	spin_unlock_bh(&cmd->istate_lock);
-
-	return ret;
-}
-
 static int lio_queue_status(struct se_cmd *se_cmd)
 {
 	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
@@ -1564,7 +1552,6 @@ const struct target_core_fabric_ops iscsi_ops = {
 	.sess_get_index			= lio_sess_get_index,
 	.sess_get_initiator_sid		= lio_sess_get_initiator_sid,
 	.write_pending			= lio_write_pending,
-	.write_pending_status		= lio_write_pending_status,
 	.set_default_node_attributes	= lio_set_default_node_attributes,
 	.get_cmd_state			= iscsi_get_cmd_state,
 	.queue_data_in			= lio_queue_data_in,
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 7bd7c0c0db6f..89c6e3719d2b 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -560,11 +560,6 @@ static int tcm_loop_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int tcm_loop_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static int tcm_loop_queue_data_in(struct se_cmd *se_cmd)
 {
 	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
@@ -1159,7 +1154,6 @@ static const struct target_core_fabric_ops loop_ops = {
 	.release_cmd			= tcm_loop_release_cmd,
 	.sess_get_index			= tcm_loop_sess_get_index,
 	.write_pending			= tcm_loop_write_pending,
-	.write_pending_status		= tcm_loop_write_pending_status,
 	.set_default_node_attributes	= tcm_loop_set_default_node_attributes,
 	.get_cmd_state			= tcm_loop_get_cmd_state,
 	.queue_data_in			= tcm_loop_queue_data_in,
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index 08cee13dfb9a..b0d3583998f0 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1749,11 +1749,6 @@ static int sbp_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int sbp_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static void sbp_set_default_node_attrs(struct se_node_acl *nacl)
 {
 	return;
@@ -2329,7 +2324,6 @@ static const struct target_core_fabric_ops sbp_ops = {
 	.release_cmd			= sbp_release_cmd,
 	.sess_get_index			= sbp_sess_get_index,
 	.write_pending			= sbp_write_pending,
-	.write_pending_status		= sbp_write_pending_status,
 	.set_default_node_attributes	= sbp_set_default_node_attrs,
 	.get_cmd_state			= sbp_get_cmd_state,
 	.queue_data_in			= sbp_queue_data_in,
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 72016d0dfca5..1124c84a3b7f 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -401,10 +401,6 @@ static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
 		pr_err("Missing tfo->write_pending()\n");
 		return -EINVAL;
 	}
-	if (!tfo->write_pending_status) {
-		pr_err("Missing tfo->write_pending_status()\n");
-		return -EINVAL;
-	}
 	if (!tfo->set_default_node_attributes) {
 		pr_err("Missing tfo->set_default_node_attributes()\n");
 		return -EINVAL;
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index c2e1fc927fdf..9be1418e919f 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -442,11 +442,6 @@ static int xcopy_pt_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int xcopy_pt_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static int xcopy_pt_queue_data_in(struct se_cmd *se_cmd)
 {
 	return 0;
@@ -463,7 +458,6 @@ static const struct target_core_fabric_ops xcopy_pt_tfo = {
 	.release_cmd		= xcopy_pt_release_cmd,
 	.check_stop_free	= xcopy_pt_check_stop_free,
 	.write_pending		= xcopy_pt_write_pending,
-	.write_pending_status	= xcopy_pt_write_pending_status,
 	.queue_data_in		= xcopy_pt_queue_data_in,
 	.queue_status		= xcopy_pt_queue_status,
 };
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index 11d27b93b413..b8ced4458118 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -158,7 +158,6 @@ void ft_release_cmd(struct se_cmd *);
 int ft_queue_status(struct se_cmd *);
 int ft_queue_data_in(struct se_cmd *);
 int ft_write_pending(struct se_cmd *);
-int ft_write_pending_status(struct se_cmd *);
 int ft_get_cmd_state(struct se_cmd *);
 void ft_queue_tm_resp(struct se_cmd *);
 void ft_aborted_task(struct se_cmd *);
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index a183d4da7db2..f0529ba58f4c 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -184,13 +184,6 @@ int ft_queue_status(struct se_cmd *se_cmd)
 	return 0;
 }
 
-int ft_write_pending_status(struct se_cmd *se_cmd)
-{
-	struct ft_cmd *cmd = container_of(se_cmd, struct ft_cmd, se_cmd);
-
-	return cmd->write_data_len != se_cmd->data_length;
-}
-
 /*
  * Send TX_RDY (transfer ready).
  */
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index 1ce49518d440..c873a052fcb0 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -437,7 +437,6 @@ static const struct target_core_fabric_ops ft_fabric_ops = {
 	.sess_get_index =		ft_sess_get_index,
 	.sess_get_initiator_sid =	NULL,
 	.write_pending =		ft_write_pending,
-	.write_pending_status =		ft_write_pending_status,
 	.set_default_node_attributes =	ft_set_default_node_attr,
 	.get_cmd_state =		ft_get_cmd_state,
 	.queue_data_in =		ft_queue_data_in,
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 34f5982cab78..7f01f78b1d23 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1292,14 +1292,6 @@ static u32 usbg_sess_get_index(struct se_session *se_sess)
 	return 0;
 }
 
-/*
- * XXX Error recovery: return != 0 if we expect writes. Dunno when that could be
- */
-static int usbg_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static void usbg_set_default_node_attrs(struct se_node_acl *nacl)
 {
 }
@@ -1725,7 +1717,6 @@ static const struct target_core_fabric_ops usbg_ops = {
 	.sess_get_index			= usbg_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= usbg_send_write_request,
-	.write_pending_status		= usbg_write_pending_status,
 	.set_default_node_attributes	= usbg_set_default_node_attrs,
 	.get_cmd_state			= usbg_get_cmd_state,
 	.queue_data_in			= usbg_send_read_response,
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 8e10ab436d1f..ad7899f99b97 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -346,11 +346,6 @@ static int vhost_scsi_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int vhost_scsi_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static void vhost_scsi_set_default_node_attrs(struct se_node_acl *nacl)
 {
 	return;
@@ -2298,7 +2293,6 @@ static const struct target_core_fabric_ops vhost_scsi_ops = {
 	.sess_get_index			= vhost_scsi_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= vhost_scsi_write_pending,
-	.write_pending_status		= vhost_scsi_write_pending_status,
 	.set_default_node_attributes	= vhost_scsi_set_default_node_attrs,
 	.get_cmd_state			= vhost_scsi_get_cmd_state,
 	.queue_data_in			= vhost_scsi_queue_data_in,
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index c9e23a126218..e59937293a32 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1404,11 +1404,6 @@ static int scsiback_write_pending(struct se_cmd *se_cmd)
 	return 0;
 }
 
-static int scsiback_write_pending_status(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 static void scsiback_set_default_node_attrs(struct se_node_acl *nacl)
 {
 }
@@ -1818,7 +1813,6 @@ static const struct target_core_fabric_ops scsiback_ops = {
 	.sess_get_index			= scsiback_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= scsiback_write_pending,
-	.write_pending_status		= scsiback_write_pending_status,
 	.set_default_node_attributes	= scsiback_set_default_node_attrs,
 	.get_cmd_state			= scsiback_get_cmd_state,
 	.queue_data_in			= scsiback_queue_data_in,
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index ee5ddd81cd8d..691c6b5e6520 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -74,7 +74,6 @@ struct target_core_fabric_ops {
 	u32 (*sess_get_initiator_sid)(struct se_session *,
 				      unsigned char *, u32);
 	int (*write_pending)(struct se_cmd *);
-	int (*write_pending_status)(struct se_cmd *);
 	void (*set_default_node_attributes)(struct se_node_acl *);
 	int (*get_cmd_state)(struct se_cmd *);
 	int (*queue_data_in)(struct se_cmd *);
-- 
cgit v1.2.3


From 3f0661a492a199b7de2fb63eceba8c79039bff83 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 25 Jan 2019 10:34:44 -0800
Subject: scsi: target/core: Remove several state tests from the TMF code

Whether or not a session is being torn down does not affect whether or not
SCSI commands are in the task set. Hence remove the "tearing down" checks
from the TMF code. The TRANSPORT_ISTATE_PROCESSING check is left out
because it is now safe to wait for a command that is in that state. The
CMD_T_PRE_EXECUTE is left out because abort processing is postponed until
after commands have left the pre-execute state since the patch that makes
TMF processing synchronous.

See also commit 1c21a48055a6 ("target: Avoid early CMD_T_PRE_EXECUTE
failures during ABORT_TASK").

Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_tmr.c       | 25 -------------------------
 drivers/target/target_core_transport.c |  2 --
 include/target/target_core_base.h      |  1 -
 3 files changed, 28 deletions(-)

(limited to 'include')

diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index ad0061e09d4c..df1ca827de68 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -114,21 +114,6 @@ static bool __target_check_io_state(struct se_cmd *se_cmd,
 		spin_unlock(&se_cmd->t_state_lock);
 		return false;
 	}
-	if (se_cmd->transport_state & CMD_T_PRE_EXECUTE) {
-		if (se_cmd->scsi_status) {
-			pr_debug("Attempted to abort io tag: %llu early failure"
-				 " status: 0x%02x\n", se_cmd->tag,
-				 se_cmd->scsi_status);
-			spin_unlock(&se_cmd->t_state_lock);
-			return false;
-		}
-	}
-	if (sess->sess_tearing_down) {
-		pr_debug("Attempted to abort io tag: %llu already shutdown,"
-			" skipping\n", se_cmd->tag);
-		spin_unlock(&se_cmd->t_state_lock);
-		return false;
-	}
 	se_cmd->transport_state |= CMD_T_ABORTED;
 
 	if ((tmr_sess != se_cmd->se_sess) && tas)
@@ -239,16 +224,6 @@ static void core_tmr_drain_tmr_list(
 			spin_unlock(&sess->sess_cmd_lock);
 			continue;
 		}
-		if (cmd->t_state == TRANSPORT_ISTATE_PROCESSING) {
-			spin_unlock(&cmd->t_state_lock);
-			spin_unlock(&sess->sess_cmd_lock);
-			continue;
-		}
-		if (sess->sess_tearing_down) {
-			spin_unlock(&cmd->t_state_lock);
-			spin_unlock(&sess->sess_cmd_lock);
-			continue;
-		}
 		cmd->transport_state |= CMD_T_ABORTED;
 		spin_unlock(&cmd->t_state_lock);
 
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ef9e75b359d4..b25d27e8ca20 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -2056,7 +2056,6 @@ void target_execute_cmd(struct se_cmd *cmd)
 
 	spin_lock_irq(&cmd->t_state_lock);
 	cmd->t_state = TRANSPORT_PROCESSING;
-	cmd->transport_state &= ~CMD_T_PRE_EXECUTE;
 	cmd->transport_state |= CMD_T_ACTIVE | CMD_T_SENT;
 	spin_unlock_irq(&cmd->t_state_lock);
 
@@ -2765,7 +2764,6 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref)
 		ret = -ESHUTDOWN;
 		goto out;
 	}
-	se_cmd->transport_state |= CMD_T_PRE_EXECUTE;
 	list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
 	percpu_ref_get(&se_sess->cmd_count);
 out:
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 69b7b955902c..19a5bf4214fc 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -502,7 +502,6 @@ struct se_cmd {
 #define CMD_T_STOP		(1 << 5)
 #define CMD_T_TAS		(1 << 10)
 #define CMD_T_FABRIC_STOP	(1 << 11)
-#define CMD_T_PRE_EXECUTE	(1 << 12)
 	spinlock_t		t_state_lock;
 	struct kref		cmd_kref;
 	struct completion	t_transport_stop_comp;
-- 
cgit v1.2.3


From 94ebb47160085343ecaaeab90fa7ffbb86030613 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 25 Jan 2019 10:34:47 -0800
Subject: scsi: target/core: Add target_send_busy()

Introduce a function that sends the SCSI status "BUSY" back to the
initiator. The next patch will add a call to this function in the srpt
target driver.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 16 ++++++++++++++++
 include/target/target_core_fabric.h    |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 6471ede00336..e3f7e21e6614 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -3246,6 +3246,22 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd,
 }
 EXPORT_SYMBOL(transport_send_check_condition_and_sense);
 
+/**
+ * target_send_busy - Send SCSI BUSY status back to the initiator
+ * @cmd: SCSI command for which to send a BUSY reply.
+ *
+ * Note: Only call this function if target_submit_cmd*() failed.
+ */
+int target_send_busy(struct se_cmd *cmd)
+{
+	WARN_ON_ONCE(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB);
+
+	cmd->scsi_status = SAM_STAT_BUSY;
+	trace_target_cmd_complete(cmd);
+	return cmd->se_tfo->queue_status(cmd);
+}
+EXPORT_SYMBOL(target_send_busy);
+
 static void target_tmr_work(struct work_struct *work)
 {
 	struct se_cmd *cmd = container_of(work, struct se_cmd, work);
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index 691c6b5e6520..8ed90407f062 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -173,6 +173,7 @@ int	transport_generic_free_cmd(struct se_cmd *, int);
 bool	transport_wait_for_tasks(struct se_cmd *);
 int	transport_send_check_condition_and_sense(struct se_cmd *,
 		sense_reason_t, int);
+int	target_send_busy(struct se_cmd *cmd);
 int	target_get_sess_cmd(struct se_cmd *, bool);
 int	target_put_sess_cmd(struct se_cmd *);
 void	target_sess_cmd_list_set_waiting(struct se_session *);
-- 
cgit v1.2.3


From 5468e82f7034f0ae175a3ce075441356099bdaa3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 4 Feb 2019 11:26:18 +0100
Subject: net: phy: fixed-phy: Drop GPIO from fixed_phy_add()

All users of the fixed_phy_add() pass -1 as GPIO number
to the fixed phy driver, and all users of fixed_phy_register()
pass -1 as GPIO number as well, except for the device
tree MDIO bus.

Any new users should create a proper device and pass the
GPIO as a descriptor associated with the device so delete
the GPIO argument from the calls and drop the code looking
requesting a GPIO in fixed_phy_add().

In fixed phy_register(), investigate the "fixed-link"
node and pick the GPIO descriptor from "link-gpios" if
this property exists. Move the corresponding code out
of of_mdio.c as the fixed phy code anyways requires
OF to be in use.

Tested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/stmicro/stmmac.txt   |  2 +-
 arch/m68k/coldfire/m5272.c                         |  2 +-
 arch/mips/ar7/platform.c                           |  4 +-
 arch/mips/bcm47xx/setup.c                          |  2 +-
 drivers/net/dsa/dsa_loop.c                         |  2 +-
 drivers/net/ethernet/broadcom/bgmac.c              |  2 +-
 drivers/net/ethernet/broadcom/genet/bcmmii.c       |  2 +-
 drivers/net/phy/fixed_phy.c                        | 82 ++++++++++++++++------
 drivers/net/usb/lan78xx.c                          |  3 +-
 drivers/of/of_mdio.c                               |  9 +--
 include/linux/phy_fixed.h                          |  8 +--
 11 files changed, 72 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.txt b/Documentation/networking/device_drivers/stmicro/stmmac.txt
index 2bb07078f535..1ae979fd90d2 100644
--- a/Documentation/networking/device_drivers/stmicro/stmmac.txt
+++ b/Documentation/networking/device_drivers/stmicro/stmmac.txt
@@ -267,7 +267,7 @@ static struct fixed_phy_status stmmac0_fixed_phy_status = {
 
 During the board's device_init we can configure the first
 MAC for fixed_link by calling:
-  fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status, -1);
+  fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status);
 and the second one, with a real PHY device attached to the bus,
 by using the stmmac_mdio_bus_data structure (to provide the id, the
 reset procedure etc).
diff --git a/arch/m68k/coldfire/m5272.c b/arch/m68k/coldfire/m5272.c
index ad1185c68df7..6b3ab583c698 100644
--- a/arch/m68k/coldfire/m5272.c
+++ b/arch/m68k/coldfire/m5272.c
@@ -127,7 +127,7 @@ static struct fixed_phy_status nettel_fixed_phy_status __initdata = {
 static int __init init_BSP(void)
 {
 	m5272_uarts_init();
-	fixed_phy_add(PHY_POLL, 0, &nettel_fixed_phy_status, -1);
+	fixed_phy_add(PHY_POLL, 0, &nettel_fixed_phy_status);
 	return 0;
 }
 
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index f09262e0a72f..10ff07b7721e 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -683,7 +683,7 @@ static int __init ar7_register_devices(void)
 
 	if (ar7_has_high_cpmac()) {
 		res = fixed_phy_add(PHY_POLL, cpmac_high.id,
-				    &fixed_phy_status, -1);
+				    &fixed_phy_status);
 		if (!res) {
 			cpmac_get_mac(1, cpmac_high_data.dev_addr);
 
@@ -696,7 +696,7 @@ static int __init ar7_register_devices(void)
 	} else
 		cpmac_low_data.phy_mask = 0xffffffff;
 
-	res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status, -1);
+	res = fixed_phy_add(PHY_POLL, cpmac_low.id, &fixed_phy_status);
 	if (!res) {
 		cpmac_get_mac(0, cpmac_low_data.dev_addr);
 		res = platform_device_register(&cpmac_low);
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index fe3773539eff..82627c264964 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -274,7 +274,7 @@ static int __init bcm47xx_register_bus_complete(void)
 	bcm47xx_leds_register();
 	bcm47xx_workarounds();
 
-	fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status, -1);
+	fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status);
 	return 0;
 }
 device_initcall(bcm47xx_register_bus_complete);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index 816f34d64736..17482ae09aa5 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -343,7 +343,7 @@ static int __init dsa_loop_init(void)
 	unsigned int i;
 
 	for (i = 0; i < NUM_FIXED_PHYS; i++)
-		phydevs[i] = fixed_phy_register(PHY_POLL, &status, -1, NULL);
+		phydevs[i] = fixed_phy_register(PHY_POLL, &status, NULL);
 
 	return mdio_driver_register(&dsa_loop_drv);
 }
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 2d3a44c40221..4632dd5dbad1 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1446,7 +1446,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
 	struct phy_device *phy_dev;
 	int err;
 
-	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
+	phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
 	if (!phy_dev || IS_ERR(phy_dev)) {
 		dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
 		return -ENODEV;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index aceb9b7b55bd..51880d83131a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -525,7 +525,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
 			.asym_pause = 0,
 		};
 
-		phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1, NULL);
+		phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
 		if (!phydev || IS_ERR(phydev)) {
 			dev_err(kdev, "failed to register fixed PHY device\n");
 			return -ENODEV;
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 47a8cb574c45..f136a23c1a35 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -18,7 +18,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/seqlock.h>
 #include <linux/idr.h>
 #include <linux/netdevice.h>
@@ -38,7 +38,7 @@ struct fixed_phy {
 	bool no_carrier;
 	int (*link_update)(struct net_device *, struct fixed_phy_status *);
 	struct list_head node;
-	int link_gpio;
+	struct gpio_desc *link_gpiod;
 };
 
 static struct platform_device *pdev;
@@ -67,8 +67,8 @@ EXPORT_SYMBOL_GPL(fixed_phy_change_carrier);
 
 static void fixed_phy_update(struct fixed_phy *fp)
 {
-	if (!fp->no_carrier && gpio_is_valid(fp->link_gpio))
-		fp->status.link = !!gpio_get_value_cansleep(fp->link_gpio);
+	if (!fp->no_carrier && fp->link_gpiod)
+		fp->status.link = !!gpiod_get_value_cansleep(fp->link_gpiod);
 }
 
 static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
@@ -133,9 +133,9 @@ int fixed_phy_set_link_update(struct phy_device *phydev,
 }
 EXPORT_SYMBOL_GPL(fixed_phy_set_link_update);
 
-int fixed_phy_add(unsigned int irq, int phy_addr,
-		  struct fixed_phy_status *status,
-		  int link_gpio)
+static int fixed_phy_add_gpiod(unsigned int irq, int phy_addr,
+			       struct fixed_phy_status *status,
+			       struct gpio_desc *gpiod)
 {
 	int ret;
 	struct fixed_mdio_bus *fmb = &platform_fmb;
@@ -156,24 +156,19 @@ int fixed_phy_add(unsigned int irq, int phy_addr,
 
 	fp->addr = phy_addr;
 	fp->status = *status;
-	fp->link_gpio = link_gpio;
-
-	if (gpio_is_valid(fp->link_gpio)) {
-		ret = gpio_request_one(fp->link_gpio, GPIOF_DIR_IN,
-				       "fixed-link-gpio-link");
-		if (ret)
-			goto err_regs;
-	}
+	fp->link_gpiod = gpiod;
 
 	fixed_phy_update(fp);
 
 	list_add_tail(&fp->node, &fmb->phys);
 
 	return 0;
+}
 
-err_regs:
-	kfree(fp);
-	return ret;
+int fixed_phy_add(unsigned int irq, int phy_addr,
+		  struct fixed_phy_status *status) {
+
+	return fixed_phy_add_gpiod(irq, phy_addr, status, NULL);
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
@@ -187,8 +182,8 @@ static void fixed_phy_del(int phy_addr)
 	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
 		if (fp->addr == phy_addr) {
 			list_del(&fp->node);
-			if (gpio_is_valid(fp->link_gpio))
-				gpio_free(fp->link_gpio);
+			if (fp->link_gpiod)
+				gpiod_put(fp->link_gpiod);
 			kfree(fp);
 			ida_simple_remove(&phy_fixed_ida, phy_addr);
 			return;
@@ -196,12 +191,50 @@ static void fixed_phy_del(int phy_addr)
 	}
 }
 
+#ifdef CONFIG_OF_GPIO
+static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np)
+{
+	struct device_node *fixed_link_node;
+	struct gpio_desc *gpiod;
+
+	if (!np)
+		return NULL;
+
+	fixed_link_node = of_get_child_by_name(np, "fixed-link");
+	if (!fixed_link_node)
+		return NULL;
+
+	/*
+	 * As the fixed link is just a device tree node without any
+	 * Linux device associated with it, we simply have obtain
+	 * the GPIO descriptor from the device tree like this.
+	 */
+	gpiod = gpiod_get_from_of_node(fixed_link_node, "link-gpios", 0,
+				       GPIOD_IN, "mdio");
+	of_node_put(fixed_link_node);
+	if (IS_ERR(gpiod)) {
+		if (PTR_ERR(gpiod) == -EPROBE_DEFER)
+			return gpiod;
+		pr_err("error getting GPIO for fixed link %pOF, proceed without\n",
+		       fixed_link_node);
+		gpiod = NULL;
+	}
+
+	return gpiod;
+}
+#else
+static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np)
+{
+	return NULL;
+}
+#endif
+
 struct phy_device *fixed_phy_register(unsigned int irq,
 				      struct fixed_phy_status *status,
-				      int link_gpio,
 				      struct device_node *np)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct gpio_desc *gpiod = NULL;
 	struct phy_device *phy;
 	int phy_addr;
 	int ret;
@@ -209,12 +242,17 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 	if (!fmb->mii_bus || fmb->mii_bus->state != MDIOBUS_REGISTERED)
 		return ERR_PTR(-EPROBE_DEFER);
 
+	/* Check if we have a GPIO associated with this fixed phy */
+	gpiod = fixed_phy_get_gpiod(np);
+	if (IS_ERR(gpiod))
+		return ERR_CAST(gpiod);
+
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
 	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
 	if (phy_addr < 0)
 		return ERR_PTR(phy_addr);
 
-	ret = fixed_phy_add(irq, phy_addr, status, link_gpio);
+	ret = fixed_phy_add_gpiod(irq, phy_addr, status, gpiod);
 	if (ret < 0) {
 		ida_simple_remove(&phy_fixed_ida, phy_addr);
 		return ERR_PTR(ret);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index e96bc0c6140f..3d92ea6fcc02 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2051,8 +2051,7 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev)
 	phydev = phy_find_first(dev->mdiobus);
 	if (!phydev) {
 		netdev_dbg(dev->net, "PHY Not Found!! Registering Fixed PHY\n");
-		phydev = fixed_phy_register(PHY_POLL, &fphy_status, -1,
-					    NULL);
+		phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
 		if (IS_ERR(phydev)) {
 			netdev_err(dev->net, "No PHY/fixed_PHY found\n");
 			return NULL;
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 5ad1342f5682..de6157357e26 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -16,7 +16,6 @@
 #include <linux/phy.h>
 #include <linux/phy_fixed.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
@@ -463,7 +462,6 @@ int of_phy_register_fixed_link(struct device_node *np)
 	struct device_node *fixed_link_node;
 	u32 fixed_link_prop[5];
 	const char *managed;
-	int link_gpio = -1;
 
 	if (of_property_read_string(np, "managed", &managed) == 0 &&
 	    strcmp(managed, "in-band-status") == 0) {
@@ -485,11 +483,7 @@ int of_phy_register_fixed_link(struct device_node *np)
 		status.pause = of_property_read_bool(fixed_link_node, "pause");
 		status.asym_pause = of_property_read_bool(fixed_link_node,
 							  "asym-pause");
-		link_gpio = of_get_named_gpio_flags(fixed_link_node,
-						    "link-gpios", 0, NULL);
 		of_node_put(fixed_link_node);
-		if (link_gpio == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
 
 		goto register_phy;
 	}
@@ -508,8 +502,7 @@ int of_phy_register_fixed_link(struct device_node *np)
 	return -ENODEV;
 
 register_phy:
-	return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, link_gpio,
-						  np));
+	return PTR_ERR_OR_ZERO(fixed_phy_register(PHY_POLL, &status, np));
 }
 EXPORT_SYMBOL(of_phy_register_fixed_link);
 
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 9525567b1951..c78fc203db43 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -15,11 +15,9 @@ struct device_node;
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
 extern int fixed_phy_add(unsigned int irq, int phy_id,
-			 struct fixed_phy_status *status,
-			 int link_gpio);
+			 struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
-					     int link_gpio,
 					     struct device_node *np);
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
@@ -27,14 +25,12 @@ extern int fixed_phy_set_link_update(struct phy_device *phydev,
 					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
-				struct fixed_phy_status *status,
-				int link_gpio)
+				struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
 static inline struct phy_device *fixed_phy_register(unsigned int irq,
 						struct fixed_phy_status *status,
-						int gpio_link,
 						struct device_node *np)
 {
 	return ERR_PTR(-ENODEV);
-- 
cgit v1.2.3


From 0300b1147e528fe6f6b0632b0ef353a87810dabf Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 25 Jan 2019 10:34:54 -0800
Subject: scsi: target/iscsi: Fix spelling of "unsolicited"

Change "unsoliticed" into "unsolicited".

Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/infiniband/ulp/isert/ib_isert.c     | 2 +-
 drivers/target/iscsi/cxgbit/cxgbit_target.c | 2 +-
 drivers/target/iscsi/iscsi_target.c         | 8 ++++----
 drivers/target/iscsi/iscsi_target.h         | 2 +-
 drivers/target/iscsi/iscsi_target_erl1.c    | 2 +-
 include/target/iscsi/iscsi_transport.h      | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index e3dd13798d79..989f1ac4245c 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1186,7 +1186,7 @@ sequence_cmd:
 	rc = iscsit_sequence_cmd(conn, cmd, buf, hdr->cmdsn);
 
 	if (!rc && dump_payload == false && unsol_data)
-		iscsit_set_unsoliticed_dataout(cmd);
+		iscsit_set_unsolicited_dataout(cmd);
 	else if (dump_payload && imm_data)
 		target_put_sess_cmd(&cmd->se_cmd);
 
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c
index 25eb3891e34b..29b350a0b58f 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_target.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c
@@ -960,7 +960,7 @@ after_immediate_data:
 			target_put_sess_cmd(&cmd->se_cmd);
 			return 0;
 		} else if (cmd->unsolicited_data) {
-			iscsit_set_unsoliticed_dataout(cmd);
+			iscsit_set_unsolicited_dataout(cmd);
 		}
 
 	} else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) {
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 98984c3adfba..7059ffdb0c20 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -1194,7 +1194,7 @@ attach_cmd:
 }
 EXPORT_SYMBOL(iscsit_setup_scsi_cmd);
 
-void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *cmd)
+void iscsit_set_unsolicited_dataout(struct iscsi_cmd *cmd)
 {
 	iscsit_set_dataout_sequence_values(cmd);
 
@@ -1202,7 +1202,7 @@ void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *cmd)
 	iscsit_start_dataout_timer(cmd, cmd->conn);
 	spin_unlock_bh(&cmd->dataout_timeout_lock);
 }
-EXPORT_SYMBOL(iscsit_set_unsoliticed_dataout);
+EXPORT_SYMBOL(iscsit_set_unsolicited_dataout);
 
 int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 			    struct iscsi_scsi_req *hdr)
@@ -1236,7 +1236,7 @@ int iscsit_process_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	 */
 	if (!cmd->immediate_data) {
 		if (!cmd->sense_reason && cmd->unsolicited_data)
-			iscsit_set_unsoliticed_dataout(cmd);
+			iscsit_set_unsolicited_dataout(cmd);
 		if (!cmd->sense_reason)
 			return 0;
 
@@ -1308,7 +1308,7 @@ after_immediate_data:
 			target_put_sess_cmd(&cmd->se_cmd);
 			return rc;
 		} else if (cmd->unsolicited_data)
-			iscsit_set_unsoliticed_dataout(cmd);
+			iscsit_set_unsolicited_dataout(cmd);
 
 	} else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) {
 		/*
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h
index 48bac0acf8c7..c95f56a3ce31 100644
--- a/drivers/target/iscsi/iscsi_target.h
+++ b/drivers/target/iscsi/iscsi_target.h
@@ -31,7 +31,7 @@ extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
 				struct iscsi_portal_group *, bool);
 extern int iscsit_del_np(struct iscsi_np *);
 extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *);
-extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *);
+extern void iscsit_set_unsolicited_dataout(struct iscsi_cmd *);
 extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *);
 extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *);
 extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *);
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index 38210ab09887..c27335c29640 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -979,7 +979,7 @@ int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo)
 			if (cmd->se_cmd.transport_state & CMD_T_ABORTED)
 				return 0;
 
-			iscsit_set_unsoliticed_dataout(cmd);
+			iscsit_set_unsolicited_dataout(cmd);
 		}
 		return transport_handle_cdb_direct(&cmd->se_cmd);
 
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 91948bc5b185..8b4b8ce2c1ae 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -53,7 +53,7 @@ extern void iscsit_put_transport(struct iscsit_transport *);
  */
 extern int iscsit_setup_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 				unsigned char *);
-extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *);
+extern void iscsit_set_unsolicited_dataout(struct iscsi_cmd *);
 extern int iscsit_process_scsi_cmd(struct iscsi_conn *, struct iscsi_cmd *,
 				struct iscsi_scsi_req *);
 extern int
-- 
cgit v1.2.3


From 1e65cc1631c187c4607e9a2067769791be529ce6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 25 Jan 2019 10:34:55 -0800
Subject: scsi: target/iscsi: Rename a function and a function pointer

Having both a function and a function pointer member with the same
name (iscsit_release_cmd) is confusing. Hence rename the function pointer
member.

Cc: Nicholas Bellinger <nab@linux-iscsi.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/cxgbit/cxgbit.h      | 2 +-
 drivers/target/iscsi/cxgbit/cxgbit_ddp.c  | 2 +-
 drivers/target/iscsi/cxgbit/cxgbit_main.c | 2 +-
 drivers/target/iscsi/iscsi_target_util.c  | 4 ++--
 include/target/iscsi/iscsi_transport.h    | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/target/iscsi/cxgbit/cxgbit.h b/drivers/target/iscsi/cxgbit/cxgbit.h
index 417b9e66b0cd..3cca22e19964 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit.h
+++ b/drivers/target/iscsi/cxgbit/cxgbit.h
@@ -345,7 +345,7 @@ struct cxgbit_device *cxgbit_find_device(struct net_device *, u8 *);
 int cxgbit_ddp_init(struct cxgbit_device *);
 int cxgbit_setup_conn_pgidx(struct cxgbit_sock *, u32);
 int cxgbit_reserve_ttt(struct cxgbit_sock *, struct iscsi_cmd *);
-void cxgbit_release_cmd(struct iscsi_conn *, struct iscsi_cmd *);
+void cxgbit_unmap_cmd(struct iscsi_conn *, struct iscsi_cmd *);
 
 static inline
 struct cxgbi_ppm *cdev2ppm(struct cxgbit_device *cdev)
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
index 76a262674c8d..d57fd3ed3fa5 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
@@ -263,7 +263,7 @@ out:
 	r2t->targ_xfer_tag = ttinfo->tag;
 }
 
-void cxgbit_release_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+void cxgbit_unmap_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
 	struct cxgbit_cmd *ccmd = iscsit_priv_cmd(cmd);
 
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index c011c826fc26..4a7bb0b49d17 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -678,7 +678,7 @@ static struct iscsit_transport cxgbit_transport = {
 	.iscsit_get_r2t_ttt	= cxgbit_get_r2t_ttt,
 	.iscsit_get_rx_pdu	= cxgbit_get_rx_pdu,
 	.iscsit_validate_params	= cxgbit_validate_params,
-	.iscsit_release_cmd	= cxgbit_release_cmd,
+	.iscsit_unmap_cmd	= cxgbit_unmap_cmd,
 	.iscsit_aborted_task	= iscsit_aborted_task,
 	.iscsit_get_sup_prot_ops = cxgbit_get_sup_prot_ops,
 };
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 58886d4b33cb..3ac494f63a0b 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -760,8 +760,8 @@ void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool check_queues)
 		iscsit_remove_cmd_from_response_queue(cmd, conn);
 	}
 
-	if (conn && conn->conn_transport->iscsit_release_cmd)
-		conn->conn_transport->iscsit_release_cmd(conn, cmd);
+	if (conn && conn->conn_transport->iscsit_unmap_cmd)
+		conn->conn_transport->iscsit_unmap_cmd(conn, cmd);
 }
 
 void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
diff --git a/include/target/iscsi/iscsi_transport.h b/include/target/iscsi/iscsi_transport.h
index 8b4b8ce2c1ae..75bee29fd7dd 100644
--- a/include/target/iscsi/iscsi_transport.h
+++ b/include/target/iscsi/iscsi_transport.h
@@ -26,7 +26,7 @@ struct iscsit_transport {
 	void (*iscsit_aborted_task)(struct iscsi_conn *, struct iscsi_cmd *);
 	int (*iscsit_xmit_pdu)(struct iscsi_conn *, struct iscsi_cmd *,
 			       struct iscsi_datain_req *, const void *, u32);
-	void (*iscsit_release_cmd)(struct iscsi_conn *, struct iscsi_cmd *);
+	void (*iscsit_unmap_cmd)(struct iscsi_conn *, struct iscsi_cmd *);
 	void (*iscsit_get_rx_pdu)(struct iscsi_conn *);
 	int (*iscsit_validate_params)(struct iscsi_conn *);
 	void (*iscsit_get_r2t_ttt)(struct iscsi_conn *, struct iscsi_cmd *,
-- 
cgit v1.2.3


From 809ab9371ca0a96b44d9866ad82849410759a45b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 26 Jan 2019 00:52:26 -0500
Subject: XArray: Update xa_erase family descriptions

xa_erase does not allocate memory and doesn't have a gfp parameter.
Update the descriptions of all four variants to be more useful.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 12 ++++++------
 lib/xarray.c           | 17 ++++++++---------
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5d9d318bcf7a..e11841537631 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -526,9 +526,9 @@ static inline void *xa_store_irq(struct xarray *xa, unsigned long index,
  * @xa: XArray.
  * @index: Index of entry.
  *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.
@@ -550,9 +550,9 @@ static inline void *xa_erase_bh(struct xarray *xa, unsigned long index)
  * @xa: XArray.
  * @index: Index of entry.
  *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.
diff --git a/lib/xarray.c b/lib/xarray.c
index 81c3171ddde9..fb783bf2a441 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1294,13 +1294,12 @@ static void *xas_result(struct xa_state *xas, void *curr)
  * @xa: XArray.
  * @index: Index into array.
  *
- * If the entry at this index is a multi-index entry then all indices will
- * be erased, and the entry will no longer be a multi-index entry.
- * This function expects the xa_lock to be held on entry.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
- * Context: Any context.  Expects xa_lock to be held on entry.  May
- * release and reacquire xa_lock if @gfp flags permit.
- * Return: The old entry at this index.
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ * Return: The entry which used to be at this index.
  */
 void *__xa_erase(struct xarray *xa, unsigned long index)
 {
@@ -1314,9 +1313,9 @@ EXPORT_SYMBOL(__xa_erase);
  * @xa: XArray.
  * @index: Index of entry.
  *
- * This function is the equivalent of calling xa_store() with %NULL as
- * the third argument.  The XArray does not need to allocate memory, so
- * the user does not need to provide GFP flags.
+ * After this function returns, loading from @index will return %NULL.
+ * If the index is part of a multi-index entry, all indices will be erased
+ * and none of the entries will be part of a multi-index entry.
  *
  * Context: Any context.  Takes and releases the xa_lock.
  * Return: The entry which used to be at this index.
-- 
cgit v1.2.3


From fe6f42cf6eb3183ebd6ab6b0b7dcbee2600c2baa Mon Sep 17 00:00:00 2001
From: Nava kishore Manne <nava.manne@xilinx.com>
Date: Wed, 6 Feb 2019 16:37:19 +0530
Subject: firmware: xilinx: Add zynqmp_pm_get_chipid() API

This patch adds a new API to provide access to the
hardware related data like soc revision, IDCODE... etc.

Signed-off-by: Nava kishore Manne <nava.manne@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 24 ++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 70b50377ae5f..16a23bc4c2c3 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -186,6 +186,29 @@ static int zynqmp_pm_get_api_version(u32 *version)
 	return ret;
 }
 
+/**
+ * zynqmp_pm_get_chipid - Get silicon ID registers
+ * @idcode:     IDCODE register
+ * @version:    version register
+ *
+ * Return:      Returns the status of the operation and the idcode and version
+ *              registers in @idcode and @version.
+ */
+static int zynqmp_pm_get_chipid(u32 *idcode, u32 *version)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!idcode || !version)
+		return -EINVAL;
+
+	ret = zynqmp_pm_invoke_fn(PM_GET_CHIPID, 0, 0, 0, 0, ret_payload);
+	*idcode = ret_payload[1];
+	*version = ret_payload[2];
+
+	return ret;
+}
+
 /**
  * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version
  * @version:	Returned version value
@@ -509,6 +532,7 @@ static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset,
 
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
+	.get_chipid = zynqmp_pm_get_chipid,
 	.query_data = zynqmp_pm_query_data,
 	.clock_enable = zynqmp_pm_clock_enable,
 	.clock_disable = zynqmp_pm_clock_disable,
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 07c587a0b06e..5a1f19848100 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -36,6 +36,7 @@ enum pm_api_id {
 	PM_GET_API_VERSION = 1,
 	PM_RESET_ASSERT = 17,
 	PM_RESET_GET_STATUS,
+	PM_GET_CHIPID = 24,
 	PM_IOCTL = 34,
 	PM_QUERY_DATA,
 	PM_CLOCK_ENABLE,
@@ -224,6 +225,7 @@ struct zynqmp_pm_query_data {
 
 struct zynqmp_eemi_ops {
 	int (*get_api_version)(u32 *version);
+	int (*get_chipid)(u32 *idcode, u32 *version);
 	int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
 	int (*clock_enable)(u32 clock_id);
 	int (*clock_disable)(u32 clock_id);
-- 
cgit v1.2.3


From 2292822e1576c89191a65c3d0da584d75d3c033f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 19 Jan 2019 13:16:53 +0100
Subject: i2c: algo-bit: include main i2c header

We are using symbols from it, so we should include it directly. Found
after sorting includes in a driver.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c-algo-bit.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index 63904ba6887e..d64cebc6e65a 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -25,6 +25,8 @@
 #ifndef _LINUX_I2C_ALGO_BIT_H
 #define _LINUX_I2C_ALGO_BIT_H
 
+#include <linux/i2c.h>
+
 /* --- Defines for bit-adapters ---------------------------------------	*/
 /*
  * This struct contains the hw-dependent functions of bit-style adapters to
-- 
cgit v1.2.3


From 738ac0679b969776a638daf2cfb5011049d467da Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Sat, 19 Jan 2019 13:16:54 +0100
Subject: i2c: algo-bit: convert to SPDX header

And use kernel style for the remaining comments in the header.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/algos/i2c-algo-bit.c | 25 ++++++++-----------------
 include/linux/i2c-algo-bit.h     | 31 ++++++++-----------------------
 2 files changed, 16 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c
index c33dcfb87993..5e5990a83da5 100644
--- a/drivers/i2c/algos/i2c-algo-bit.c
+++ b/drivers/i2c/algos/i2c-algo-bit.c
@@ -1,21 +1,12 @@
-/* -------------------------------------------------------------------------
- * i2c-algo-bit.c i2c driver algorithms for bit-shift adapters
- * -------------------------------------------------------------------------
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * i2c-algo-bit.c: i2c driver algorithms for bit-shift adapters
+ *
  *   Copyright (C) 1995-2000 Simon G. Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
- * ------------------------------------------------------------------------- */
-
-/* With some changes from Frodo Looijaard <frodol@dds.nl>, Kyösti Mälkki
-   <kmalkki@cc.hut.fi> and Jean Delvare <jdelvare@suse.de> */
+ *
+ * With some changes from Frodo Looijaard <frodol@dds.nl>, Kyösti Mälkki
+ * <kmalkki@cc.hut.fi> and Jean Delvare <jdelvare@suse.de>
+ */
 
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/include/linux/i2c-algo-bit.h b/include/linux/i2c-algo-bit.h
index d64cebc6e65a..69045df78e2d 100644
--- a/include/linux/i2c-algo-bit.h
+++ b/include/linux/i2c-algo-bit.h
@@ -1,26 +1,11 @@
-/* ------------------------------------------------------------------------- */
-/* i2c-algo-bit.h i2c driver algorithms for bit-shift adapters               */
-/* ------------------------------------------------------------------------- */
-/*   Copyright (C) 1995-99 Simon G. Vogl
-
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
-    MA 02110-1301 USA.							     */
-/* ------------------------------------------------------------------------- */
-
-/* With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi> and even
-   Frodo Looijaard <frodol@dds.nl> */
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * i2c-algo-bit.h: i2c driver algorithms for bit-shift adapters
+ *
+ *   Copyright (C) 1995-99 Simon G. Vogl
+ * With some changes from Kyösti Mälkki <kmalkki@cc.hut.fi> and even
+ * Frodo Looijaard <frodol@dds.nl>
+ */
 
 #ifndef _LINUX_I2C_ALGO_BIT_H
 #define _LINUX_I2C_ALGO_BIT_H
-- 
cgit v1.2.3


From b525903c254dab2491410f0f23707691b7c2c317 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:53:58 +0000
Subject: genirq: Provide basic NMI management for interrupt lines

Add functionality to allocate interrupt lines that will deliver IRQs
as Non-Maskable Interrupts. These allocations are only successful if
the irqchip provides the necessary support and allows NMI delivery for the
interrupt line.

Interrupt lines allocated for NMI delivery must be enabled/disabled through
enable_nmi/disable_nmi_nosync to keep their state consistent.

To treat a PERCPU IRQ as NMI, the interrupt must not be shared nor threaded,
the irqchip directly managing the IRQ must be the root irqchip and the
irqchip cannot be behind a slow bus.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/interrupt.h |   9 ++
 include/linux/irq.h       |   7 ++
 kernel/irq/debugfs.c      |   6 +-
 kernel/irq/internals.h    |   2 +
 kernel/irq/manage.c       | 228 +++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 249 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c672f34235e7..9941d1a8d83c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -156,6 +156,10 @@ __request_percpu_irq(unsigned int irq, irq_handler_t handler,
 		     unsigned long flags, const char *devname,
 		     void __percpu *percpu_dev_id);
 
+extern int __must_check
+request_nmi(unsigned int irq, irq_handler_t handler, unsigned long flags,
+	    const char *name, void *dev);
+
 static inline int __must_check
 request_percpu_irq(unsigned int irq, irq_handler_t handler,
 		   const char *devname, void __percpu *percpu_dev_id)
@@ -167,6 +171,8 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler,
 extern const void *free_irq(unsigned int, void *);
 extern void free_percpu_irq(unsigned int, void __percpu *);
 
+extern const void *free_nmi(unsigned int irq, void *dev_id);
+
 struct device;
 
 extern int __must_check
@@ -217,6 +223,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type);
 extern bool irq_percpu_is_enabled(unsigned int irq);
 extern void irq_wake_thread(unsigned int irq, void *dev_id);
 
+extern void disable_nmi_nosync(unsigned int irq);
+extern void enable_nmi(unsigned int irq);
+
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
diff --git a/include/linux/irq.h b/include/linux/irq.h
index def2b2aac8b1..a7298e4998c8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -442,6 +442,8 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_set_vcpu_affinity:	optional to target a vCPU in a virtual machine
  * @ipi_send_single:	send a single IPI to destination cpus
  * @ipi_send_mask:	send an IPI to destination cpus in cpumask
+ * @irq_nmi_setup:	function called from core code before enabling an NMI
+ * @irq_nmi_teardown:	function called from core code after disabling an NMI
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -490,6 +492,9 @@ struct irq_chip {
 	void		(*ipi_send_single)(struct irq_data *data, unsigned int cpu);
 	void		(*ipi_send_mask)(struct irq_data *data, const struct cpumask *dest);
 
+	int		(*irq_nmi_setup)(struct irq_data *data);
+	void		(*irq_nmi_teardown)(struct irq_data *data);
+
 	unsigned long	flags;
 };
 
@@ -505,6 +510,7 @@ struct irq_chip {
  * IRQCHIP_ONESHOT_SAFE:	One shot does not require mask/unmask
  * IRQCHIP_EOI_THREADED:	Chip requires eoi() on unmask in threaded mode
  * IRQCHIP_SUPPORTS_LEVEL_MSI	Chip can provide two doorbells for Level MSIs
+ * IRQCHIP_SUPPORTS_NMI:	Chip can deliver NMIs, only for root irqchips
  */
 enum {
 	IRQCHIP_SET_TYPE_MASKED		= (1 <<  0),
@@ -515,6 +521,7 @@ enum {
 	IRQCHIP_ONESHOT_SAFE		= (1 <<  5),
 	IRQCHIP_EOI_THREADED		= (1 <<  6),
 	IRQCHIP_SUPPORTS_LEVEL_MSI	= (1 <<  7),
+	IRQCHIP_SUPPORTS_NMI		= (1 <<  8),
 };
 
 #include <linux/irqdesc.h>
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 6f636136cccc..59a04d2a66df 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -56,6 +56,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
 	BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
 	BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
 	BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
+	BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
 };
 
 static void
@@ -140,6 +141,7 @@ static const struct irq_bit_descr irqdesc_istates[] = {
 	BIT_MASK_DESCR(IRQS_WAITING),
 	BIT_MASK_DESCR(IRQS_PENDING),
 	BIT_MASK_DESCR(IRQS_SUSPENDED),
+	BIT_MASK_DESCR(IRQS_NMI),
 };
 
 
@@ -203,8 +205,8 @@ static ssize_t irq_debug_write(struct file *file, const char __user *user_buf,
 		chip_bus_lock(desc);
 		raw_spin_lock_irqsave(&desc->lock, flags);
 
-		if (irq_settings_is_level(desc)) {
-			/* Can't do level, sorry */
+		if (irq_settings_is_level(desc) || desc->istate & IRQS_NMI) {
+			/* Can't do level nor NMIs, sorry */
 			err = -EINVAL;
 		} else {
 			desc->istate |= IRQS_PENDING;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ca6afa267070..2a77cdd27ca9 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -49,6 +49,7 @@ enum {
  * IRQS_WAITING			- irq is waiting
  * IRQS_PENDING			- irq is pending and replayed later
  * IRQS_SUSPENDED		- irq is suspended
+ * IRQS_NMI			- irq line is used to deliver NMIs
  */
 enum {
 	IRQS_AUTODETECT		= 0x00000001,
@@ -60,6 +61,7 @@ enum {
 	IRQS_PENDING		= 0x00000200,
 	IRQS_SUSPENDED		= 0x00000800,
 	IRQS_TIMINGS		= 0x00001000,
+	IRQS_NMI		= 0x00002000,
 };
 
 #include "debug.h"
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index a4888ce4667a..9472ae987946 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -341,7 +341,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 	/* The release function is promised process context */
 	might_sleep();
 
-	if (!desc)
+	if (!desc || desc->istate & IRQS_NMI)
 		return -EINVAL;
 
 	/* Complete initialisation of *notify */
@@ -550,6 +550,21 @@ bool disable_hardirq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(disable_hardirq);
 
+/**
+ *	disable_nmi_nosync - disable an nmi without waiting
+ *	@irq: Interrupt to disable
+ *
+ *	Disable the selected interrupt line. Disables and enables are
+ *	nested.
+ *	The interrupt to disable must have been requested through request_nmi.
+ *	Unlike disable_nmi(), this function does not ensure existing
+ *	instances of the IRQ handler have completed before returning.
+ */
+void disable_nmi_nosync(unsigned int irq)
+{
+	disable_irq_nosync(irq);
+}
+
 void __enable_irq(struct irq_desc *desc)
 {
 	switch (desc->depth) {
@@ -606,6 +621,20 @@ out:
 }
 EXPORT_SYMBOL(enable_irq);
 
+/**
+ *	enable_nmi - enable handling of an nmi
+ *	@irq: Interrupt to enable
+ *
+ *	The interrupt to enable must have been requested through request_nmi.
+ *	Undoes the effect of one call to disable_nmi(). If this
+ *	matches the last disable, processing of interrupts on this
+ *	IRQ line is re-enabled.
+ */
+void enable_nmi(unsigned int irq)
+{
+	enable_irq(irq);
+}
+
 static int set_irq_wake_real(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -641,6 +670,12 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
 	if (!desc)
 		return -EINVAL;
 
+	/* Don't use NMIs as wake up interrupts please */
+	if (desc->istate & IRQS_NMI) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* wakeup-capable irqs can be shared between drivers that
 	 * don't need to have the same sleep mode behaviors.
 	 */
@@ -663,6 +698,8 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
 				irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
 		}
 	}
+
+out_unlock:
 	irq_put_desc_busunlock(desc, flags);
 	return ret;
 }
@@ -1125,6 +1162,39 @@ static void irq_release_resources(struct irq_desc *desc)
 		c->irq_release_resources(d);
 }
 
+static bool irq_supports_nmi(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	/* Only IRQs directly managed by the root irqchip can be set as NMI */
+	if (d->parent_data)
+		return false;
+#endif
+	/* Don't support NMIs for chips behind a slow bus */
+	if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock)
+		return false;
+
+	return d->chip->flags & IRQCHIP_SUPPORTS_NMI;
+}
+
+static int irq_nmi_setup(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	struct irq_chip *c = d->chip;
+
+	return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL;
+}
+
+static void irq_nmi_teardown(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	struct irq_chip *c = d->chip;
+
+	if (c->irq_nmi_teardown)
+		c->irq_nmi_teardown(d);
+}
+
 static int
 setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
 {
@@ -1299,9 +1369,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 * fields must have IRQF_SHARED set and the bits which
 		 * set the trigger type must match. Also all must
 		 * agree on ONESHOT.
+		 * Interrupt lines used for NMIs cannot be shared.
 		 */
 		unsigned int oldtype;
 
+		if (desc->istate & IRQS_NMI) {
+			pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
+				new->name, irq, desc->irq_data.chip->name);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
 		/*
 		 * If nobody did set the configuration before, inherit
 		 * the one provided by the requester.
@@ -1753,6 +1831,59 @@ const void *free_irq(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL(free_irq);
 
+/* This function must be called with desc->lock held */
+static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
+{
+	const char *devname = NULL;
+
+	desc->istate &= ~IRQS_NMI;
+
+	if (!WARN_ON(desc->action == NULL)) {
+		irq_pm_remove_action(desc, desc->action);
+		devname = desc->action->name;
+		unregister_handler_proc(irq, desc->action);
+
+		kfree(desc->action);
+		desc->action = NULL;
+	}
+
+	irq_settings_clr_disable_unlazy(desc);
+	irq_shutdown(desc);
+
+	irq_release_resources(desc);
+
+	irq_chip_pm_put(&desc->irq_data);
+	module_put(desc->owner);
+
+	return devname;
+}
+
+const void *free_nmi(unsigned int irq, void *dev_id)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+	const void *devname;
+
+	if (!desc || WARN_ON(!(desc->istate & IRQS_NMI)))
+		return NULL;
+
+	if (WARN_ON(irq_settings_is_per_cpu_devid(desc)))
+		return NULL;
+
+	/* NMI still enabled */
+	if (WARN_ON(desc->depth == 0))
+		disable_nmi_nosync(irq);
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+
+	irq_nmi_teardown(desc);
+	devname = __cleanup_nmi(irq, desc);
+
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return devname;
+}
+
 /**
  *	request_threaded_irq - allocate an interrupt line
  *	@irq: Interrupt line to allocate
@@ -1922,6 +2053,101 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
 }
 EXPORT_SYMBOL_GPL(request_any_context_irq);
 
+/**
+ *	request_nmi - allocate an interrupt line for NMI delivery
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs.
+ *		  Threaded handler for threaded interrupts.
+ *	@irqflags: Interrupt type flags
+ *	@name: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. It sets up the IRQ line
+ *	to be handled as an NMI.
+ *
+ *	An interrupt line delivering NMIs cannot be shared and IRQ handling
+ *	cannot be threaded.
+ *
+ *	Interrupt lines requested for NMI delivering must produce per cpu
+ *	interrupts and have auto enabling setting disabled.
+ *
+ *	Dev_id must be globally unique. Normally the address of the
+ *	device data structure is used as the cookie. Since the handler
+ *	receives this value it makes sense to use it.
+ *
+ *	If the interrupt line cannot be used to deliver NMIs, function
+ *	will fail and return a negative value.
+ */
+int request_nmi(unsigned int irq, irq_handler_t handler,
+		unsigned long irqflags, const char *name, void *dev_id)
+{
+	struct irqaction *action;
+	struct irq_desc *desc;
+	unsigned long flags;
+	int retval;
+
+	if (irq == IRQ_NOTCONNECTED)
+		return -ENOTCONN;
+
+	/* NMI cannot be shared, used for Polling */
+	if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL))
+		return -EINVAL;
+
+	if (!(irqflags & IRQF_PERCPU))
+		return -EINVAL;
+
+	if (!handler)
+		return -EINVAL;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc || irq_settings_can_autoenable(desc) ||
+	    !irq_settings_can_request(desc) ||
+	    WARN_ON(irq_settings_is_per_cpu_devid(desc)) ||
+	    !irq_supports_nmi(desc))
+		return -EINVAL;
+
+	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING;
+	action->name = name;
+	action->dev_id = dev_id;
+
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		goto err_out;
+
+	retval = __setup_irq(irq, desc, action);
+	if (retval)
+		goto err_irq_setup;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+
+	/* Setup NMI state */
+	desc->istate |= IRQS_NMI;
+	retval = irq_nmi_setup(desc);
+	if (retval) {
+		__cleanup_nmi(irq, desc);
+		raw_spin_unlock_irqrestore(&desc->lock, flags);
+		return -EINVAL;
+	}
+
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+
+err_irq_setup:
+	irq_chip_pm_put(&desc->irq_data);
+err_out:
+	kfree(action);
+
+	return retval;
+}
+
 void enable_percpu_irq(unsigned int irq, unsigned int type)
 {
 	unsigned int cpu = smp_processor_id();
-- 
cgit v1.2.3


From 4b078c3f1a26487c39363089ba0d5c6b09f2a89f Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:53:59 +0000
Subject: genirq: Provide NMI management for percpu_devid interrupts

Add support for percpu_devid interrupts treated as NMIs.

Percpu_devid NMIs need to be setup/torn down on each CPU they target.

The same restrictions as for global NMIs still apply for percpu_devid NMIs.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/interrupt.h |   9 +++
 kernel/irq/manage.c       | 177 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9941d1a8d83c..831ddcdc5597 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -168,10 +168,15 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler,
 				    devname, percpu_dev_id);
 }
 
+extern int __must_check
+request_percpu_nmi(unsigned int irq, irq_handler_t handler,
+		   const char *devname, void __percpu *dev);
+
 extern const void *free_irq(unsigned int, void *);
 extern void free_percpu_irq(unsigned int, void __percpu *);
 
 extern const void *free_nmi(unsigned int irq, void *dev_id);
+extern void free_percpu_nmi(unsigned int irq, void __percpu *percpu_dev_id);
 
 struct device;
 
@@ -224,7 +229,11 @@ extern bool irq_percpu_is_enabled(unsigned int irq);
 extern void irq_wake_thread(unsigned int irq, void *dev_id);
 
 extern void disable_nmi_nosync(unsigned int irq);
+extern void disable_percpu_nmi(unsigned int irq);
 extern void enable_nmi(unsigned int irq);
+extern void enable_percpu_nmi(unsigned int irq, unsigned int type);
+extern int prepare_percpu_nmi(unsigned int irq);
+extern void teardown_percpu_nmi(unsigned int irq);
 
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 9472ae987946..0a1ebc004a59 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2182,6 +2182,11 @@ out:
 }
 EXPORT_SYMBOL_GPL(enable_percpu_irq);
 
+void enable_percpu_nmi(unsigned int irq, unsigned int type)
+{
+	enable_percpu_irq(irq, type);
+}
+
 /**
  * irq_percpu_is_enabled - Check whether the per cpu irq is enabled
  * @irq:	Linux irq number to check for
@@ -2221,6 +2226,11 @@ void disable_percpu_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(disable_percpu_irq);
 
+void disable_percpu_nmi(unsigned int irq)
+{
+	disable_percpu_irq(irq);
+}
+
 /*
  * Internal function to unregister a percpu irqaction.
  */
@@ -2252,6 +2262,8 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 	/* Found it - now remove it from the list of entries: */
 	desc->action = NULL;
 
+	desc->istate &= ~IRQS_NMI;
+
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	unregister_handler_proc(irq, action);
@@ -2305,6 +2317,19 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
 }
 EXPORT_SYMBOL_GPL(free_percpu_irq);
 
+void free_percpu_nmi(unsigned int irq, void __percpu *dev_id)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc || !irq_settings_is_per_cpu_devid(desc))
+		return;
+
+	if (WARN_ON(!(desc->istate & IRQS_NMI)))
+		return;
+
+	kfree(__free_percpu_irq(irq, dev_id));
+}
+
 /**
  *	setup_percpu_irq - setup a per-cpu interrupt
  *	@irq: Interrupt line to setup
@@ -2394,6 +2419,158 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
 }
 EXPORT_SYMBOL_GPL(__request_percpu_irq);
 
+/**
+ *	request_percpu_nmi - allocate a percpu interrupt line for NMI delivery
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs.
+ *	@name: An ascii name for the claiming device
+ *	@dev_id: A percpu cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs
+ *	have to be setup on each CPU by calling ready_percpu_nmi() before being
+ *	enabled on the same CPU by using enable_percpu_nmi().
+ *
+ *	Dev_id must be globally unique. It is a per-cpu variable, and
+ *	the handler gets called with the interrupted CPU's instance of
+ *	that variable.
+ *
+ *	Interrupt lines requested for NMI delivering should have auto enabling
+ *	setting disabled.
+ *
+ *	If the interrupt line cannot be used to deliver NMIs, function
+ *	will fail returning a negative value.
+ */
+int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
+		       const char *name, void __percpu *dev_id)
+{
+	struct irqaction *action;
+	struct irq_desc *desc;
+	unsigned long flags;
+	int retval;
+
+	if (!handler)
+		return -EINVAL;
+
+	desc = irq_to_desc(irq);
+
+	if (!desc || !irq_settings_can_request(desc) ||
+	    !irq_settings_is_per_cpu_devid(desc) ||
+	    irq_settings_can_autoenable(desc) ||
+	    !irq_supports_nmi(desc))
+		return -EINVAL;
+
+	/* The line cannot already be NMI */
+	if (desc->istate & IRQS_NMI)
+		return -EINVAL;
+
+	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+	if (!action)
+		return -ENOMEM;
+
+	action->handler = handler;
+	action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD
+		| IRQF_NOBALANCING;
+	action->name = name;
+	action->percpu_dev_id = dev_id;
+
+	retval = irq_chip_pm_get(&desc->irq_data);
+	if (retval < 0)
+		goto err_out;
+
+	retval = __setup_irq(irq, desc, action);
+	if (retval)
+		goto err_irq_setup;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc->istate |= IRQS_NMI;
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+
+err_irq_setup:
+	irq_chip_pm_put(&desc->irq_data);
+err_out:
+	kfree(action);
+
+	return retval;
+}
+
+/**
+ *	prepare_percpu_nmi - performs CPU local setup for NMI delivery
+ *	@irq: Interrupt line to prepare for NMI delivery
+ *
+ *	This call prepares an interrupt line to deliver NMI on the current CPU,
+ *	before that interrupt line gets enabled with enable_percpu_nmi().
+ *
+ *	As a CPU local operation, this should be called from non-preemptible
+ *	context.
+ *
+ *	If the interrupt line cannot be used to deliver NMIs, function
+ *	will fail returning a negative value.
+ */
+int prepare_percpu_nmi(unsigned int irq)
+{
+	unsigned long flags;
+	struct irq_desc *desc;
+	int ret = 0;
+
+	WARN_ON(preemptible());
+
+	desc = irq_get_desc_lock(irq, &flags,
+				 IRQ_GET_DESC_CHECK_PERCPU);
+	if (!desc)
+		return -EINVAL;
+
+	if (WARN(!(desc->istate & IRQS_NMI),
+		 KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n",
+		 irq)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = irq_nmi_setup(desc);
+	if (ret) {
+		pr_err("Failed to setup NMI delivery: irq %u\n", irq);
+		goto out;
+	}
+
+out:
+	irq_put_desc_unlock(desc, flags);
+	return ret;
+}
+
+/**
+ *	teardown_percpu_nmi - undoes NMI setup of IRQ line
+ *	@irq: Interrupt line from which CPU local NMI configuration should be
+ *	      removed
+ *
+ *	This call undoes the setup done by prepare_percpu_nmi().
+ *
+ *	IRQ line should not be enabled for the current CPU.
+ *
+ *	As a CPU local operation, this should be called from non-preemptible
+ *	context.
+ */
+void teardown_percpu_nmi(unsigned int irq)
+{
+	unsigned long flags;
+	struct irq_desc *desc;
+
+	WARN_ON(preemptible());
+
+	desc = irq_get_desc_lock(irq, &flags,
+				 IRQ_GET_DESC_CHECK_PERCPU);
+	if (!desc)
+		return;
+
+	if (WARN_ON(!(desc->istate & IRQS_NMI)))
+		goto out;
+
+	irq_nmi_teardown(desc);
+out:
+	irq_put_desc_unlock(desc, flags);
+}
+
 /**
  *	irq_get_irqchip_state - returns the irqchip state of a interrupt.
  *	@irq: Interrupt line that is forwarded to a VM
-- 
cgit v1.2.3


From 2dcf1fbcad352baaa5f47b17e57c5743c8eedbad Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:54:00 +0000
Subject: genirq: Provide NMI handlers

Provide flow handlers that are NMI safe for interrupts and percpu_devid
interrupts.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h |  3 +++
 kernel/irq/chip.c   | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index a7298e4998c8..5e91f6bcaacd 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -601,6 +601,9 @@ extern void handle_percpu_devid_irq(struct irq_desc *desc);
 extern void handle_bad_irq(struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
+extern void handle_fasteoi_nmi(struct irq_desc *desc);
+extern void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc);
+
 extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
 extern int irq_chip_pm_get(struct irq_data *data);
 extern int irq_chip_pm_put(struct irq_data *data);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 34e969069488..c32d5f386f68 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -729,6 +729,37 @@ out:
 }
 EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
 
+/**
+ *	handle_fasteoi_nmi - irq handler for NMI interrupt lines
+ *	@desc:	the interrupt description structure for this irq
+ *
+ *	A simple NMI-safe handler, considering the restrictions
+ *	from request_nmi.
+ *
+ *	Only a single callback will be issued to the chip: an ->eoi()
+ *	call when the interrupt has been serviced. This enables support
+ *	for modern forms of interrupt handlers, which handle the flow
+ *	details in hardware, transparently.
+ */
+void handle_fasteoi_nmi(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irqaction *action = desc->action;
+	unsigned int irq = irq_desc_get_irq(desc);
+	irqreturn_t res;
+
+	trace_irq_handler_entry(irq, action);
+	/*
+	 * NMIs cannot be shared, there is only one action.
+	 */
+	res = action->handler(irq, action->dev_id);
+	trace_irq_handler_exit(irq, action, res);
+
+	if (chip->irq_eoi)
+		chip->irq_eoi(&desc->irq_data);
+}
+EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);
+
 /**
  *	handle_edge_irq - edge type IRQ handler
  *	@desc:	the interrupt description structure for this irq
@@ -908,6 +939,29 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 		chip->irq_eoi(&desc->irq_data);
 }
 
+/**
+ * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu
+ *				     dev ids
+ * @desc:	the interrupt description structure for this irq
+ *
+ * Similar to handle_fasteoi_nmi, but handling the dev_id cookie
+ * as a percpu pointer.
+ */
+void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	struct irqaction *action = desc->action;
+	unsigned int irq = irq_desc_get_irq(desc);
+	irqreturn_t res;
+
+	trace_irq_handler_entry(irq, action);
+	res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+	trace_irq_handler_exit(irq, action, res);
+
+	if (chip->irq_eoi)
+		chip->irq_eoi(&desc->irq_data);
+}
+
 static void
 __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 		     int is_chained, const char *name)
-- 
cgit v1.2.3


From 6e4933a006616343f66c4702dc4fc56bb25e7b02 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:54:01 +0000
Subject: irqdesc: Add domain handler for NMIs

NMI handling code should be executed between calls to nmi_enter and
nmi_exit.

Add a separate domain handler to properly setup NMI context when handling
an interrupt requested as NMI.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdesc.h |  5 +++++
 kernel/irq/irqdesc.c    | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd1e40ddac7d..ba05b0d6401a 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -171,6 +171,11 @@ static inline int handle_domain_irq(struct irq_domain *domain,
 {
 	return __handle_domain_irq(domain, hwirq, true, regs);
 }
+
+#ifdef CONFIG_IRQ_DOMAIN
+int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
+		      struct pt_regs *regs);
+#endif
 #endif
 
 /* Test to see if a driver has successfully requested an irq */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index ee062b7939d3..a1d7a7d484e0 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -669,6 +669,41 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
 	set_irq_regs(old_regs);
 	return ret;
 }
+
+#ifdef CONFIG_IRQ_DOMAIN
+/**
+ * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
+ * @domain:	The domain where to perform the lookup
+ * @hwirq:	The HW irq number to convert to a logical one
+ * @regs:	Register file coming from the low-level handling code
+ *
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ */
+int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
+		      struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned int irq;
+	int ret = 0;
+
+	nmi_enter();
+
+	irq = irq_find_mapping(domain, hwirq);
+
+	/*
+	 * ack_bad_irq is not NMI-safe, just report
+	 * an invalid interrupt.
+	 */
+	if (likely(irq))
+		generic_handle_irq(irq);
+	else
+		ret = -EINVAL;
+
+	nmi_exit();
+	set_irq_regs(old_regs);
+	return ret;
+}
+#endif
 #endif
 
 /* Dynamic interrupt handling */
-- 
cgit v1.2.3


From 013e6292aaf5e4b083a50a0f9e17e93628616860 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 20 Nov 2018 11:57:20 +0100
Subject: mtd: rawnand: Simplify the locking

nand_get_device() was complex for apparently no good reason. Let's
replace this locking scheme with 2 mutexes: one attached to the
controller and another one attached to the chip.

Every time the core calls nand_get_device(), it will first lock the
chip and if the chip is not suspended, will then lock the controller.
nand_release_device() will release both lock in the reverse order.

nand_get_device() can sleep, just like the previous implementation,
which means you should never call that from an atomic context.

We also get rid of

- the chip->state field, since all it was used for was flagging the
  chip as suspended. We replace it by a field called chip->suspended
  and directly set it from nand_suspend/resume()
- the controller->wq and controller->active fields which are no longer
  needed since the new controller->lock (now a mutex) guarantees that
  all operations are serialized at the controller level
- panic_nand_get_device() which would anyway be a no-op. Talking about
  panic write, I keep thinking the rawnand implementation is unsafe
  because there's not negotiation with the controller to know when it's
  actually done with it's previous operation. I don't intend to fix
  that here, but that's probably something we should look at, or maybe
  we should consider dropping the ->_panic_write() implementation

Last important change to mention: we now return -EBUSY when someone
tries to access a device that as been suspended, and propagate this
error to the upper layer.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 111 ++++++++++++++++-----------------------
 include/linux/mtd/rawnand.h      |  24 ++++-----
 2 files changed, 54 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index cca4b24d2ffa..96cadead262e 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -278,11 +278,8 @@ EXPORT_SYMBOL_GPL(nand_deselect_target);
 static void nand_release_device(struct nand_chip *chip)
 {
 	/* Release the controller and the chip */
-	spin_lock(&chip->controller->lock);
-	chip->controller->active = NULL;
-	chip->state = FL_READY;
-	wake_up(&chip->controller->wq);
-	spin_unlock(&chip->controller->lock);
+	mutex_unlock(&chip->controller->lock);
+	mutex_unlock(&chip->lock);
 }
 
 /**
@@ -330,58 +327,24 @@ static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
 	return nand_block_bad(chip, ofs);
 }
 
-/**
- * panic_nand_get_device - [GENERIC] Get chip for selected access
- * @chip: the nand chip descriptor
- * @new_state: the state which is requested
- *
- * Used when in panic, no locks are taken.
- */
-static void panic_nand_get_device(struct nand_chip *chip, int new_state)
-{
-	/* Hardware controller shared among independent devices */
-	chip->controller->active = chip;
-	chip->state = new_state;
-}
-
 /**
  * nand_get_device - [GENERIC] Get chip for selected access
  * @chip: NAND chip structure
- * @new_state: the state which is requested
  *
- * Get the device and lock it for exclusive access
+ * Lock the device and its controller for exclusive access
+ *
+ * Return: -EBUSY if the chip has been suspended, 0 otherwise
  */
-static int
-nand_get_device(struct nand_chip *chip, int new_state)
+static int nand_get_device(struct nand_chip *chip)
 {
-	spinlock_t *lock = &chip->controller->lock;
-	wait_queue_head_t *wq = &chip->controller->wq;
-	DECLARE_WAITQUEUE(wait, current);
-retry:
-	spin_lock(lock);
-
-	/* Hardware controller shared among independent devices */
-	if (!chip->controller->active)
-		chip->controller->active = chip;
-
-	if (chip->controller->active == chip && chip->state == FL_READY) {
-		chip->state = new_state;
-		spin_unlock(lock);
-		return 0;
-	}
-	if (new_state == FL_PM_SUSPENDED) {
-		if (chip->controller->active->state == FL_PM_SUSPENDED) {
-			chip->state = FL_PM_SUSPENDED;
-			spin_unlock(lock);
-			return 0;
-		}
+	mutex_lock(&chip->lock);
+	if (chip->suspended) {
+		mutex_unlock(&chip->lock);
+		return -EBUSY;
 	}
-	set_current_state(TASK_UNINTERRUPTIBLE);
-	add_wait_queue(wq, &wait);
-	spin_unlock(lock);
-	schedule();
-	remove_wait_queue(wq, &wait);
-	goto retry;
+	mutex_lock(&chip->controller->lock);
+
+	return 0;
 }
 
 /**
@@ -602,7 +565,10 @@ static int nand_block_markbad_lowlevel(struct nand_chip *chip, loff_t ofs)
 		nand_erase_nand(chip, &einfo, 0);
 
 		/* Write bad block marker to OOB */
-		nand_get_device(chip, FL_WRITING);
+		ret = nand_get_device(chip);
+		if (ret)
+			return ret;
+
 		ret = nand_markbad_bbm(chip, ofs);
 		nand_release_device(chip);
 	}
@@ -3580,7 +3546,9 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 	    ops->mode != MTD_OPS_RAW)
 		return -ENOTSUPP;
 
-	nand_get_device(chip, FL_READING);
+	ret = nand_get_device(chip);
+	if (ret)
+		return ret;
 
 	if (!ops->datbuf)
 		ret = nand_do_read_oob(chip, from, ops);
@@ -4099,9 +4067,6 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
 	struct mtd_oob_ops ops;
 	int ret;
 
-	/* Grab the device */
-	panic_nand_get_device(chip, FL_WRITING);
-
 	nand_select_target(chip, chipnr);
 
 	/* Wait for the device to get ready */
@@ -4132,7 +4097,9 @@ static int nand_write_oob(struct mtd_info *mtd, loff_t to,
 
 	ops->retlen = 0;
 
-	nand_get_device(chip, FL_WRITING);
+	ret = nand_get_device(chip);
+	if (ret)
+		return ret;
 
 	switch (ops->mode) {
 	case MTD_OPS_PLACE_OOB:
@@ -4205,7 +4172,9 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		return -EINVAL;
 
 	/* Grab the lock and see if the device is available */
-	nand_get_device(chip, FL_ERASING);
+	ret = nand_get_device(chip);
+	if (ret)
+		return ret;
 
 	/* Shift to get first page */
 	page = (int)(instr->addr >> chip->page_shift);
@@ -4298,7 +4267,7 @@ static void nand_sync(struct mtd_info *mtd)
 	pr_debug("%s: called\n", __func__);
 
 	/* Grab the lock and see if the device is available */
-	nand_get_device(chip, FL_SYNCING);
+	WARN_ON(nand_get_device(chip));
 	/* Release it and go back */
 	nand_release_device(chip);
 }
@@ -4315,7 +4284,10 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 	int ret;
 
 	/* Select the NAND device */
-	nand_get_device(chip, FL_READING);
+	ret = nand_get_device(chip);
+	if (ret)
+		return ret;
+
 	nand_select_target(chip, chipnr);
 
 	ret = nand_block_checkbad(chip, offs, 0);
@@ -4388,7 +4360,13 @@ static int nand_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len)
  */
 static int nand_suspend(struct mtd_info *mtd)
 {
-	return nand_get_device(mtd_to_nand(mtd), FL_PM_SUSPENDED);
+	struct nand_chip *chip = mtd_to_nand(mtd);
+
+	mutex_lock(&chip->lock);
+	chip->suspended = 1;
+	mutex_unlock(&chip->lock);
+
+	return 0;
 }
 
 /**
@@ -4399,11 +4377,13 @@ static void nand_resume(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
-	if (chip->state == FL_PM_SUSPENDED)
-		nand_release_device(chip);
+	mutex_lock(&chip->lock);
+	if (chip->suspended)
+		chip->suspended = 0;
 	else
 		pr_err("%s called for a chip which is not in suspended state\n",
 			__func__);
+	mutex_unlock(&chip->lock);
 }
 
 /**
@@ -4413,7 +4393,7 @@ static void nand_resume(struct mtd_info *mtd)
  */
 static void nand_shutdown(struct mtd_info *mtd)
 {
-	nand_get_device(mtd_to_nand(mtd), FL_PM_SUSPENDED);
+	nand_suspend(mtd);
 }
 
 /* Set default functions */
@@ -5018,6 +4998,8 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
 	/* Assume all dies are deselected when we enter nand_scan_ident(). */
 	chip->cur_cs = -1;
 
+	mutex_init(&chip->lock);
+
 	/* Enforce the right timings for reset/detection */
 	onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0);
 
@@ -5717,9 +5699,6 @@ static int nand_scan_tail(struct nand_chip *chip)
 	}
 	chip->subpagesize = mtd->writesize >> mtd->subpage_sft;
 
-	/* Initialize state */
-	chip->state = FL_READY;
-
 	/* Invalidate the pagebuffer reference */
 	chip->pagebuf = -1;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 33e240acdc6d..17d2d9ae33bf 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -16,13 +16,12 @@
 #ifndef __LINUX_MTD_RAWNAND_H
 #define __LINUX_MTD_RAWNAND_H
 
-#include <linux/wait.h>
-#include <linux/spinlock.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
 #include <linux/mtd/jedec.h>
 #include <linux/mtd/onfi.h>
+#include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/types.h>
 
@@ -897,25 +896,17 @@ struct nand_controller_ops {
 /**
  * struct nand_controller - Structure used to describe a NAND controller
  *
- * @lock:               protection lock
- * @active:		the mtd device which holds the controller currently
- * @wq:			wait queue to sleep on if a NAND operation is in
- *			progress used instead of the per chip wait queue
- *			when a hw controller is available.
+ * @lock:		lock used to serialize accesses to the NAND controller
  * @ops:		NAND controller operations.
  */
 struct nand_controller {
-	spinlock_t lock;
-	struct nand_chip *active;
-	wait_queue_head_t wq;
+	struct mutex lock;
 	const struct nand_controller_ops *ops;
 };
 
 static inline void nand_controller_init(struct nand_controller *nfc)
 {
-	nfc->active = NULL;
-	spin_lock_init(&nfc->lock);
-	init_waitqueue_head(&nfc->wq);
+	mutex_init(&nfc->lock);
 }
 
 /**
@@ -983,7 +974,6 @@ struct nand_legacy {
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buf_align:		minimum buffer alignment required by a platform
- * @state:		[INTERN] the current state of the NAND device
  * @oob_poi:		"poison value buffer," used for laying out OOB data
  *			before writing
  * @page_shift:		[INTERN] number of address bits in a page (column
@@ -1034,6 +1024,9 @@ struct nand_legacy {
  *			cur_cs < numchips. NAND Controller drivers should not
  *			modify this value, but they're allowed to read it.
  * @read_retries:	[INTERN] the number of read retry modes supported
+ * @lock:		lock protecting the suspended field. Also used to
+ *			serialize accesses to the NAND device.
+ * @suspended:		set to 1 when the device is suspended, 0 when it's not.
  * @bbt:		[INTERN] bad block table pointer
  * @bbt_td:		[REPLACEABLE] bad block table descriptor for flash
  *			lookup.
@@ -1088,7 +1081,8 @@ struct nand_chip {
 
 	int read_retries;
 
-	flstate_t state;
+	struct mutex lock;
+	unsigned int suspended : 1;
 
 	uint8_t *oob_poi;
 	struct nand_controller *controller;
-- 
cgit v1.2.3


From 2d73f3d66b7052c0175f9f33d271ae50826c222e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 21 Jan 2019 15:32:07 +0900
Subject: mtd: rawnand: remove ->legacy.erase and single_erase()

Now that the last user of this hook, denali.c, stopped using it,
we can remove the erase hook from nand_legacy.

I squashed single_erase() because only the difference between
single_erase() and nand_erase_op() is the number of bit shifts.

The status/ret conversion in nand_erase_nand() is unneeded since
commit eb94555e9e97 ("mtd: nand: use usual return values for the
->erase() hook"). Cleaned it up now.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Boris Brezillon <bbrezillon@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 31 ++++---------------------------
 include/linux/mtd/rawnand.h      |  2 --
 2 files changed, 4 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e05ecf2e4269..cf207d6e7a81 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4121,23 +4121,6 @@ out:
 	return ret;
 }
 
-/**
- * single_erase - [GENERIC] NAND standard block erase command function
- * @chip: NAND chip object
- * @page: the page address of the block which will be erased
- *
- * Standard erase command for NAND chips. Returns NAND status.
- */
-static int single_erase(struct nand_chip *chip, int page)
-{
-	unsigned int eraseblock;
-
-	/* Send commands to erase a block */
-	eraseblock = page >> (chip->phys_erase_shift - chip->page_shift);
-
-	return nand_erase_op(chip, eraseblock);
-}
-
 /**
  * nand_erase - [MTD Interface] erase block(s)
  * @mtd: MTD device structure
@@ -4161,7 +4144,7 @@ static int nand_erase(struct mtd_info *mtd, struct erase_info *instr)
 int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    int allowbbt)
 {
-	int page, status, pages_per_block, ret, chipnr;
+	int page, pages_per_block, ret, chipnr;
 	loff_t len;
 
 	pr_debug("%s: start = 0x%012llx, len = %llu\n",
@@ -4215,17 +4198,11 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		if (chip->legacy.erase)
-			status = chip->legacy.erase(chip,
-						    page & chip->pagemask);
-		else
-			status = single_erase(chip, page & chip->pagemask);
-
-		/* See if block erase succeeded */
-		if (status) {
+		ret = nand_erase_op(chip, (page & chip->pagemask) >>
+				    (chip->phys_erase_shift - chip->page_shift));
+		if (ret) {
 			pr_debug("%s: failed erase, page 0x%08x\n",
 					__func__, page);
-			ret = -EIO;
 			instr->fail_addr =
 				((loff_t)page << chip->page_shift);
 			goto erase_exit;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 17d2d9ae33bf..b7445a44a814 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -927,7 +927,6 @@ static inline void nand_controller_init(struct nand_controller *nfc)
  * @waitfunc: hardware specific function for wait on ready.
  * @block_bad: check if a block is bad, using OOB markers
  * @block_markbad: mark a block bad
- * @erase: erase function
  * @set_features: set the NAND chip features
  * @get_features: get the NAND chip features
  * @chip_delay: chip dependent delay for transferring data from array to read
@@ -953,7 +952,6 @@ struct nand_legacy {
 	int (*waitfunc)(struct nand_chip *chip);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
-	int (*erase)(struct nand_chip *chip, int page);
 	int (*set_features)(struct nand_chip *chip, int feature_addr,
 			    u8 *subfeature_para);
 	int (*get_features)(struct nand_chip *chip, int feature_addr,
-- 
cgit v1.2.3


From 278bca7f318e6a29f482eabbca52db538dc5d4e6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 10 Jan 2019 21:00:27 +0200
Subject: vfio-mdev: Switch to use new generic UUID API

There are new types and helpers that are supposed to be used in new code.

As a preparation to get rid of legacy types and API functions do
the conversion here.

Cc: Kirti Wankhede <kwankhede@nvidia.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/mdev/mdev_core.c    | 16 ++++++++--------
 drivers/vfio/mdev/mdev_private.h |  5 +++--
 drivers/vfio/mdev/mdev_sysfs.c   |  6 +++---
 include/linux/mdev.h             |  2 +-
 samples/vfio-mdev/mtty.c         |  8 ++++----
 5 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 0212f0ee8aea..b96fedc77ee5 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -60,9 +60,9 @@ struct mdev_device *mdev_from_dev(struct device *dev)
 }
 EXPORT_SYMBOL(mdev_from_dev);
 
-uuid_le mdev_uuid(struct mdev_device *mdev)
+const guid_t *mdev_uuid(struct mdev_device *mdev)
 {
-	return mdev->uuid;
+	return &mdev->uuid;
 }
 EXPORT_SYMBOL(mdev_uuid);
 
@@ -88,8 +88,7 @@ static void mdev_release_parent(struct kref *kref)
 	put_device(dev);
 }
 
-static
-inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
+static inline struct mdev_parent *mdev_get_parent(struct mdev_parent *parent)
 {
 	if (parent)
 		kref_get(&parent->ref);
@@ -276,7 +275,8 @@ static void mdev_device_release(struct device *dev)
 	kfree(mdev);
 }
 
-int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
+int mdev_device_create(struct kobject *kobj,
+		       struct device *dev, const guid_t *uuid)
 {
 	int ret;
 	struct mdev_device *mdev, *tmp;
@@ -291,7 +291,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 
 	/* Check for duplicate */
 	list_for_each_entry(tmp, &mdev_list, next) {
-		if (!uuid_le_cmp(tmp->uuid, uuid)) {
+		if (guid_equal(&tmp->uuid, uuid)) {
 			mutex_unlock(&mdev_list_lock);
 			ret = -EEXIST;
 			goto mdev_fail;
@@ -305,7 +305,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 		goto mdev_fail;
 	}
 
-	memcpy(&mdev->uuid, &uuid, sizeof(uuid_le));
+	guid_copy(&mdev->uuid, uuid);
 	list_add(&mdev->next, &mdev_list);
 	mutex_unlock(&mdev_list_lock);
 
@@ -315,7 +315,7 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
 	mdev->dev.parent  = dev;
 	mdev->dev.bus     = &mdev_bus_type;
 	mdev->dev.release = mdev_device_release;
-	dev_set_name(&mdev->dev, "%pUl", uuid.b);
+	dev_set_name(&mdev->dev, "%pUl", uuid);
 
 	ret = device_register(&mdev->dev);
 	if (ret) {
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index b5819b7d7ef7..379758c52b1b 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -28,7 +28,7 @@ struct mdev_parent {
 struct mdev_device {
 	struct device dev;
 	struct mdev_parent *parent;
-	uuid_le uuid;
+	guid_t uuid;
 	void *driver_data;
 	struct kref ref;
 	struct list_head next;
@@ -58,7 +58,8 @@ void parent_remove_sysfs_files(struct mdev_parent *parent);
 int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
 void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
 
-int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid);
+int  mdev_device_create(struct kobject *kobj,
+			struct device *dev, const guid_t *uuid);
 int  mdev_device_remove(struct device *dev, bool force_remove);
 
 #endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index ce5dd219f2c8..5193a0e0ce5a 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -55,7 +55,7 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev,
 			    const char *buf, size_t count)
 {
 	char *str;
-	uuid_le uuid;
+	guid_t uuid;
 	int ret;
 
 	if ((count < UUID_STRING_LEN) || (count > UUID_STRING_LEN + 1))
@@ -65,12 +65,12 @@ static ssize_t create_store(struct kobject *kobj, struct device *dev,
 	if (!str)
 		return -ENOMEM;
 
-	ret = uuid_le_to_bin(str, &uuid);
+	ret = guid_parse(str, &uuid);
 	kfree(str);
 	if (ret)
 		return ret;
 
-	ret = mdev_device_create(kobj, dev, uuid);
+	ret = mdev_device_create(kobj, dev, &uuid);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index b6e048e1045f..d7aee90e5da5 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -120,7 +120,7 @@ struct mdev_driver {
 
 extern void *mdev_get_drvdata(struct mdev_device *mdev);
 extern void mdev_set_drvdata(struct mdev_device *mdev, void *data);
-extern uuid_le mdev_uuid(struct mdev_device *mdev);
+extern const guid_t *mdev_uuid(struct mdev_device *mdev);
 
 extern struct bus_type mdev_bus_type;
 
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index f6732aa16bb1..19cd29071ab0 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -156,15 +156,15 @@ static const struct file_operations vd_fops = {
 
 /* function prototypes */
 
-static int mtty_trigger_interrupt(uuid_le uuid);
+static int mtty_trigger_interrupt(const guid_t *uuid);
 
 /* Helper functions */
-static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid)
+static struct mdev_state *find_mdev_state_by_uuid(const guid_t *uuid)
 {
 	struct mdev_state *mds;
 
 	list_for_each_entry(mds, &mdev_devices_list, next) {
-		if (uuid_le_cmp(mdev_uuid(mds->mdev), uuid) == 0)
+		if (guid_equal(mdev_uuid(mds->mdev), uuid))
 			return mds;
 	}
 
@@ -1032,7 +1032,7 @@ static int mtty_set_irqs(struct mdev_device *mdev, uint32_t flags,
 	return ret;
 }
 
-static int mtty_trigger_interrupt(uuid_le uuid)
+static int mtty_trigger_interrupt(const guid_t *uuid)
 {
 	int ret = -1;
 	struct mdev_state *mdev_state;
-- 
cgit v1.2.3


From c3424f59a0cb994a098c3701bf14617580a18290 Mon Sep 17 00:00:00 2001
From: Weiyi Lu <weiyi.lu@mediatek.com>
Date: Fri, 14 Dec 2018 10:04:16 +0800
Subject: dt-bindings: clock: add clock for MT2712

Add new clock according to 3rd ECO design change.
It's the parent clock of audio clock mux.

Signed-off-by: Weiyi Lu <weiyi.lu@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mt2712-clk.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/mt2712-clk.h b/include/dt-bindings/clock/mt2712-clk.h
index 76265836a1e1..c3b29dff9c0e 100644
--- a/include/dt-bindings/clock/mt2712-clk.h
+++ b/include/dt-bindings/clock/mt2712-clk.h
@@ -228,7 +228,8 @@
 #define CLK_TOP_NFI2X_EN		189
 #define CLK_TOP_NFIECC_EN		190
 #define CLK_TOP_NFI1X_CK_EN		191
-#define CLK_TOP_NR_CLK			192
+#define CLK_TOP_APLL2_D3		192
+#define CLK_TOP_NR_CLK			193
 
 /* INFRACFG */
 
-- 
cgit v1.2.3


From a78e8723a50530d15faa25cc0b6f009bcd251c20 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 16 Jan 2019 09:55:41 +0200
Subject: RDMA/cma: Remove CM_ID statistics provided by rdma-cm module

Netlink statistics exported by rdma-cm never had any working user space
component published to the mailing list or to any open source
project. Canvassing various proprietary users, and the original requester,
we find that there are no real users of this interface.

This patch simply removes all occurrences of RDMA CM netlink in favour of
modern nldev implementation, which provides the same information and
accompanied by widely used user space component.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cma.c     | 83 ---------------------------------------
 drivers/infiniband/core/netlink.c |  4 +-
 include/uapi/rdma/rdma_netlink.h  | 17 +-------
 3 files changed, 3 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 81bded0d37d1..e15546ae4d0f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4616,85 +4616,6 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
 	kfree(cma_dev);
 }
 
-static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct nlmsghdr *nlh;
-	struct rdma_cm_id_stats *id_stats;
-	struct rdma_id_private *id_priv;
-	struct rdma_cm_id *id = NULL;
-	struct cma_device *cma_dev;
-	int i_dev = 0, i_id = 0;
-
-	/*
-	 * We export all of the IDs as a sequence of messages.  Each
-	 * ID gets its own netlink message.
-	 */
-	mutex_lock(&lock);
-
-	list_for_each_entry(cma_dev, &dev_list, list) {
-		if (i_dev < cb->args[0]) {
-			i_dev++;
-			continue;
-		}
-
-		i_id = 0;
-		list_for_each_entry(id_priv, &cma_dev->id_list, list) {
-			if (i_id < cb->args[1]) {
-				i_id++;
-				continue;
-			}
-
-			id_stats = ibnl_put_msg(skb, &nlh, cb->nlh->nlmsg_seq,
-						sizeof *id_stats, RDMA_NL_RDMA_CM,
-						RDMA_NL_RDMA_CM_ID_STATS,
-						NLM_F_MULTI);
-			if (!id_stats)
-				goto out;
-
-			memset(id_stats, 0, sizeof *id_stats);
-			id = &id_priv->id;
-			id_stats->node_type = id->route.addr.dev_addr.dev_type;
-			id_stats->port_num = id->port_num;
-			id_stats->bound_dev_if =
-				id->route.addr.dev_addr.bound_dev_if;
-
-			if (ibnl_put_attr(skb, nlh,
-					  rdma_addr_size(cma_src_addr(id_priv)),
-					  cma_src_addr(id_priv),
-					  RDMA_NL_RDMA_CM_ATTR_SRC_ADDR))
-				goto out;
-			if (ibnl_put_attr(skb, nlh,
-					  rdma_addr_size(cma_dst_addr(id_priv)),
-					  cma_dst_addr(id_priv),
-					  RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
-				goto out;
-
-			id_stats->pid	= task_pid_vnr(id_priv->res.task);
-			id_stats->port_space	= id->ps;
-			id_stats->cm_state	= id_priv->state;
-			id_stats->qp_num	= id_priv->qp_num;
-			id_stats->qp_type	= id->qp_type;
-
-			i_id++;
-			nlmsg_end(skb, nlh);
-		}
-
-		cb->args[1] = 0;
-		i_dev++;
-	}
-
-out:
-	mutex_unlock(&lock);
-	cb->args[0] = i_dev;
-	cb->args[1] = i_id;
-
-	return skb->len;
-}
-
-static const struct rdma_nl_cbs cma_cb_table[RDMA_NL_RDMA_CM_NUM_OPS] = {
-	[RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats},
-};
-
 static int cma_init_net(struct net *net)
 {
 	struct cma_pernet *pernet = cma_pernet(net);
@@ -4743,7 +4664,6 @@ static int __init cma_init(void)
 	if (ret)
 		goto err;
 
-	rdma_nl_register(RDMA_NL_RDMA_CM, cma_cb_table);
 	cma_configfs_init();
 
 	return 0;
@@ -4759,7 +4679,6 @@ err_wq:
 static void __exit cma_cleanup(void)
 {
 	cma_configfs_exit();
-	rdma_nl_unregister(RDMA_NL_RDMA_CM);
 	ib_unregister_client(&cma_client);
 	unregister_netdevice_notifier(&cma_nb);
 	ib_sa_unregister_client(&sa_client);
@@ -4767,7 +4686,5 @@ static void __exit cma_cleanup(void)
 	destroy_workqueue(cma_wq);
 }
 
-MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_RDMA_CM, 1);
-
 module_init(cma_init);
 module_exit(cma_cleanup);
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 724f5a62e82f..eecfc0b377c9 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -56,7 +56,6 @@ EXPORT_SYMBOL(rdma_nl_chk_listeners);
 static bool is_nl_msg_valid(unsigned int type, unsigned int op)
 {
 	static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = {
-		[RDMA_NL_RDMA_CM] = RDMA_NL_RDMA_CM_NUM_OPS,
 		[RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS,
 		[RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS,
 		[RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS,
@@ -181,8 +180,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return -EINVAL;
 	}
 	/* FIXME: Convert IWCM to properly handle doit callbacks */
-	if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_RDMA_CM ||
-	    index == RDMA_NL_IWCM) {
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
 		struct netlink_dump_control c = {
 			.dump = cb_table[op].dump,
 		};
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 0f5263767fb4..3a9e681e4257 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -5,8 +5,7 @@
 #include <linux/types.h>
 
 enum {
-	RDMA_NL_RDMA_CM = 1,
-	RDMA_NL_IWCM,
+	RDMA_NL_IWCM = 2,
 	RDMA_NL_RSVD,
 	RDMA_NL_LS,	/* RDMA Local Services */
 	RDMA_NL_NLDEV,	/* RDMA device interface */
@@ -14,8 +13,7 @@ enum {
 };
 
 enum {
-	RDMA_NL_GROUP_CM = 1,
-	RDMA_NL_GROUP_IWPM,
+	RDMA_NL_GROUP_IWPM = 2,
 	RDMA_NL_GROUP_LS,
 	RDMA_NL_NUM_GROUPS
 };
@@ -24,17 +22,6 @@ enum {
 #define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
 #define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
 
-enum {
-	RDMA_NL_RDMA_CM_ID_STATS = 0,
-	RDMA_NL_RDMA_CM_NUM_OPS
-};
-
-enum {
-	RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
-	RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
-	RDMA_NL_RDMA_CM_NUM_ATTR,
-};
-
 /* The minimum version that the iwpm kernel supports */
 #define IWPM_UABI_VERSION_MIN	3
 
-- 
cgit v1.2.3


From a2bfd708b17adb6e597e70d4eca824667f2d4e3c Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Tue, 5 Feb 2019 11:33:22 -0800
Subject: RDMA/iwpm: move kdoc comments to functions

Move the iwpm kdoc comments from the prototype declarations to above
the function bodies.  There are no functional changes in this patch.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/iwpm_msg.c  | 116 ++++++++++++++++++++--------
 drivers/infiniband/core/iwpm_util.c |  40 ++++++++++
 include/rdma/iw_portmap.h           | 149 ------------------------------------
 3 files changed, 123 insertions(+), 182 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 2e30e65b0816..2452b0ddcf0d 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -38,14 +38,21 @@ u16 iwpm_ulib_version = IWPM_UABI_VERSION_MIN;
 static int iwpm_user_pid = IWPM_PID_UNDEFINED;
 static atomic_t echo_nlmsg_seq;
 
+/**
+ * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
+ *
+ * Returns true if the pid is greater than zero, otherwise returns false
+ */
 int iwpm_valid_pid(void)
 {
 	return iwpm_user_pid > 0;
 }
 
-/*
- * iwpm_register_pid - Send a netlink query to user space
- *                     for the iwarp port mapper pid
+/**
+ * iwpm_register_pid - Send a netlink query to userspace
+ *                     to get the iwarp port mapper pid
+ * @pm_msg: Contains driver info to send to the userspace port mapper
+ * @nl_client: The index of the netlink client
  *
  * nlmsg attributes:
  *	[IWPM_NLA_REG_PID_SEQ]
@@ -124,13 +131,19 @@ pid_query_error:
 	return ret;
 }
 
-/*
- * iwpm_add_mapping - Send a netlink add mapping message
- *                    to the port mapper
+/**
+ * iwpm_add_mapping - Send a netlink add mapping request to
+ *                    the userspace port mapper
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
  * nlmsg attributes:
  *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
  *	[IWPM_NLA_MANAGE_ADDR]
  *	[IWPM_NLA_MANAGE_FLAGS]
+ *
+ * If the request is successful, the pm_msg stores
+ * the port mapper response (mapped address info)
  */
 int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 {
@@ -208,9 +221,12 @@ add_mapping_error_nowarn:
 	return ret;
 }
 
-/*
- * iwpm_add_and_query_mapping - Send a netlink add and query
- *                              mapping message to the port mapper
+/**
+ * iwpm_add_and_query_mapping - Process the port mapper response to
+ *                              iwpm_add_and_query_mapping request
+ * @pm_msg: Contains the local ip/tcp address info to send
+ * @nl_client: The index of the netlink client
+ *
  * nlmsg attributes:
  *	[IWPM_NLA_QUERY_MAPPING_SEQ]
  *	[IWPM_NLA_QUERY_LOCAL_ADDR]
@@ -299,9 +315,13 @@ query_mapping_error_nowarn:
 	return ret;
 }
 
-/*
- * iwpm_remove_mapping - Send a netlink remove mapping message
- *                       to the port mapper
+/**
+ * iwpm_remove_mapping - Send a netlink remove mapping request
+ *                       to the userspace port mapper
+ *
+ * @local_addr: Local ip/tcp address to remove
+ * @nl_client: The index of the netlink client
+ *
  * nlmsg attributes:
  *	[IWPM_NLA_MANAGE_MAPPING_SEQ]
  *	[IWPM_NLA_MANAGE_ADDR]
@@ -372,9 +392,14 @@ static const struct nla_policy resp_reg_policy[IWPM_NLA_RREG_PID_MAX] = {
 	[IWPM_NLA_RREG_PID_ERR]     = { .type = NLA_U16 }
 };
 
-/*
- * iwpm_register_pid_cb - Process a port mapper response to
- *                        iwpm_register_pid()
+/**
+ * iwpm_register_pid_cb - Process the port mapper response to
+ *                        iwpm_register_pid query
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * If successful, the function receives the userspace port mapper pid
+ * which is used in future communication with the port mapper
  */
 int iwpm_register_pid_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -443,9 +468,11 @@ static const struct nla_policy resp_add_policy[IWPM_NLA_RMANAGE_MAPPING_MAX] = {
 	[IWPM_NLA_RMANAGE_MAPPING_ERR]	   = { .type = NLA_U16 }
 };
 
-/*
- * iwpm_add_mapping_cb - Process a port mapper response to
- *                       iwpm_add_mapping()
+/**
+ * iwpm_add_mapping_cb - Process the port mapper response to
+ *                       iwpm_add_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
  */
 int iwpm_add_mapping_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -518,9 +545,11 @@ static const struct nla_policy resp_query_policy[IWPM_NLA_RQUERY_MAPPING_MAX] =
 	[IWPM_NLA_RQUERY_MAPPING_ERR]	  = { .type = NLA_U16 }
 };
 
-/*
- * iwpm_add_and_query_mapping_cb - Process a port mapper response to
- *                                 iwpm_add_and_query_mapping()
+/**
+ * iwpm_add_and_query_mapping_cb - Process the port mapper response to
+ *                                 iwpm_add_and_query_mapping request
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
  */
 int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 				struct netlink_callback *cb)
@@ -598,9 +627,13 @@ query_mapping_response_exit:
 	return 0;
 }
 
-/*
- * iwpm_remote_info_cb - Process a port mapper message, containing
- *			  the remote connecting peer address info
+/**
+ * iwpm_remote_info_cb - Process remote connecting peer address info, which
+ *                       the port mapper has received from the connecting peer
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Stores the IPv4/IPv6 address info in a hash table
  */
 int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -673,8 +706,14 @@ static const struct nla_policy resp_mapinfo_policy[IWPM_NLA_MAPINFO_REQ_MAX] = {
 	[IWPM_NLA_MAPINFO_ULIB_VER]  = { .type = NLA_U16 }
 };
 
-/*
- * iwpm_mapping_info_cb - Process a port mapper request for mapping info
+/**
+ * iwpm_mapping_info_cb - Process a notification that the userspace
+ *                        port mapper daemon is started
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send all the local mapping
+ * info records to the userspace port mapper
  */
 int iwpm_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -727,9 +766,11 @@ static const struct nla_policy ack_mapinfo_policy[IWPM_NLA_MAPINFO_NUM_MAX] = {
 	[IWPM_NLA_MAPINFO_ACK_NUM] =  { .type = NLA_U32 }
 };
 
-/*
- * iwpm_ack_mapping_info_cb - Process a port mapper ack for
- *                            the provided mapping info records
+/**
+ * iwpm_ack_mapping_info_cb - Process the port mapper ack for
+ *                            the provided local mapping info records
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
  */
 int iwpm_ack_mapping_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -755,8 +796,11 @@ static const struct nla_policy map_error_policy[IWPM_NLA_ERR_MAX] = {
 	[IWPM_NLA_ERR_CODE]       = { .type = NLA_U16 },
 };
 
-/*
- * iwpm_mapping_error_cb - Process a port mapper error message
+/**
+ * iwpm_mapping_error_cb - Process port mapper notification for error
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
  */
 int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -797,8 +841,14 @@ static const struct nla_policy hello_policy[IWPM_NLA_HELLO_MAX] = {
 	[IWPM_NLA_HELLO_ABI_VERSION]     = { .type = NLA_U16 }
 };
 
-/*
- * iwpm_hello_cb - Process a port mapper hello request
+/**
+ * iwpm_hello_cb - Process a hello message from iwpmd
+ *
+ * @skb:
+ * @cb: Contains the received message (payload and netlink header)
+ *
+ * Using the received port mapper pid, send the kernel's abi_version
+ * after adjusting it to support the iwpmd version.
  */
 int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb)
 {
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 363938435476..7b97f6e2075f 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -51,6 +51,12 @@ static DEFINE_SPINLOCK(iwpm_reminfo_lock);
 static DEFINE_MUTEX(iwpm_admin_lock);
 static struct iwpm_admin_data iwpm_admin;
 
+/**
+ * iwpm_init - Allocate resources for the iwarp port mapper
+ * @nl_client: The index of the netlink client
+ *
+ * Should be called when network interface goes up.
+ */
 int iwpm_init(u8 nl_client)
 {
 	int ret = 0;
@@ -87,6 +93,12 @@ init_exit:
 static void free_hash_bucket(void);
 static void free_reminfo_bucket(void);
 
+/**
+ * iwpm_exit - Deallocate resources for the iwarp port mapper
+ * @nl_client: The index of the netlink client
+ *
+ * Should be called when network interface goes down.
+ */
 int iwpm_exit(u8 nl_client)
 {
 
@@ -112,6 +124,14 @@ int iwpm_exit(u8 nl_client)
 static struct hlist_head *get_mapinfo_hash_bucket(struct sockaddr_storage *,
 					       struct sockaddr_storage *);
 
+/**
+ * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
+ *                       info in a hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_addr: Mapped local ip/tcp address
+ * @nl_client: The index of the netlink client
+ * @map_flags: IWPM mapping flags
+ */
 int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 			struct sockaddr_storage *mapped_sockaddr,
 			u8 nl_client, u32 map_flags)
@@ -151,6 +171,15 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
 	return ret;
 }
 
+/**
+ * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
+ *                       info from the hash table
+ * @local_addr: Local ip/tcp address
+ * @mapped_local_addr: Mapped local ip/tcp address
+ *
+ * Returns err code if mapping info is not found in the hash table,
+ * otherwise returns 0
+ */
 int iwpm_remove_mapinfo(struct sockaddr_storage *local_sockaddr,
 			struct sockaddr_storage *mapped_local_addr)
 {
@@ -251,6 +280,17 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
 	spin_unlock_irqrestore(&iwpm_reminfo_lock, flags);
 }
 
+/**
+ * iwpm_get_remote_info - Get the remote connecting peer address info
+ *
+ * @mapped_loc_addr: Mapped local address of the listening peer
+ * @mapped_rem_addr: Mapped remote address of the connecting peer
+ * @remote_addr: To store the remote address of the connecting peer
+ * @nl_client: The index of the netlink client
+ *
+ * The remote address info is retrieved and provided to the client in
+ * the remote_addr. After that it is removed from the hash table
+ */
 int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
 			 struct sockaddr_storage *mapped_rem_addr,
 			 struct sockaddr_storage *remote_addr,
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index 84fac196ef80..b9fee7feeeb5 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -61,177 +61,28 @@ struct iwpm_sa_data {
 	u32 flags;
 };
 
-/**
- * iwpm_init - Allocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes up.
- */
 int iwpm_init(u8);
-
-/**
- * iwpm_exit - Deallocate resources for the iwarp port mapper
- *
- * Should be called when network interface goes down.
- */
 int iwpm_exit(u8);
-
-/**
- * iwpm_valid_pid - Check if the userspace iwarp port mapper pid is valid
- *
- * Returns true if the pid is greater than zero, otherwise returns false
- */
 int iwpm_valid_pid(void);
-
-/**
- * iwpm_register_pid - Send a netlink query to userspace
- *                     to get the iwarp port mapper pid
- * @pm_msg: Contains driver info to send to the userspace port mapper
- * @nl_client: The index of the netlink client
- */
 int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_mapping - Send a netlink add mapping request to
- *                    the userspace port mapper
- * @pm_msg: Contains the local ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores
- * the port mapper response (mapped address info)
- */
 int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_add_and_query_mapping - Send a netlink add and query mapping request
- *				 to the userspace port mapper
- * @pm_msg: Contains the local and remote ip/tcp address info to send
- * @nl_client: The index of the netlink client
- *
- * If the request is successful, the pm_msg stores the
- * port mapper response (mapped local and remote address info)
- */
 int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client);
-
-/**
- * iwpm_remove_mapping - Send a netlink remove mapping request
- *                       to the userspace port mapper
- *
- * @local_addr: Local ip/tcp address to remove
- * @nl_client: The index of the netlink client
- */
 int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client);
-
-/**
- * iwpm_register_pid_cb - Process the port mapper response to
- *                        iwpm_register_pid query
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * If successful, the function receives the userspace port mapper pid
- * which is used in future communication with the port mapper
- */
 int iwpm_register_pid_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_mapping_cb - Process the port mapper response to
- *                       iwpm_add_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_add_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_add_and_query_mapping_cb - Process the port mapper response to
- *                                 iwpm_add_and_query_mapping request
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_add_and_query_mapping_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_remote_info_cb - Process remote connecting peer address info, which
- *                       the port mapper has received from the connecting peer
- *
- * @cb: Contains the received message (payload and netlink header)
- *
- * Stores the IPv4/IPv6 address info in a hash table
- */
 int iwpm_remote_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_error_cb - Process port mapper notification for error
- *
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_mapping_error_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_mapping_info_cb - Process a notification that the userspace
- *                        port mapper daemon is started
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * Using the received port mapper pid, send all the local mapping
- * info records to the userspace port mapper
- */
 int iwpm_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_ack_mapping_info_cb - Process the port mapper ack for
- *                            the provided local mapping info records
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- */
 int iwpm_ack_mapping_info_cb(struct sk_buff *, struct netlink_callback *);
-
-/**
- * iwpm_get_remote_info - Get the remote connecting peer address info
- *
- * @mapped_loc_addr: Mapped local address of the listening peer
- * @mapped_rem_addr: Mapped remote address of the connecting peer
- * @remote_addr: To store the remote address of the connecting peer
- * @nl_client: The index of the netlink client
- *
- * The remote address info is retrieved and provided to the client in
- * the remote_addr. After that it is removed from the hash table
- */
 int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
 			struct sockaddr_storage *mapped_rem_addr,
 			struct sockaddr_storage *remote_addr, u8 nl_client);
-
-/**
- * iwpm_create_mapinfo - Store local and mapped IPv4/IPv6 address
- *                       info in a hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- * @nl_client: The index of the netlink client
- * @map_flags: IWPM mapping flags
- */
 int iwpm_create_mapinfo(struct sockaddr_storage *local_addr,
 			struct sockaddr_storage *mapped_addr, u8 nl_client,
 			u32 map_flags);
-
-/**
- * iwpm_remove_mapinfo - Remove local and mapped IPv4/IPv6 address
- *                       info from the hash table
- * @local_addr: Local ip/tcp address
- * @mapped_addr: Mapped local ip/tcp address
- *
- * Returns err code if mapping info is not found in the hash table,
- * otherwise returns 0
- */
 int iwpm_remove_mapinfo(struct sockaddr_storage *local_addr,
 			struct sockaddr_storage *mapped_addr);
 
-/**
- * iwpm_hello_cb - Process a hello message from iwpmd
- *
- * @skb:
- * @cb: Contains the received message (payload and netlink header)
- *
- * Using the received port mapper pid, send the kernel's abi_version
- * after adjusting it to support the iwpmd version.
- */
 int iwpm_hello_cb(struct sk_buff *skb, struct netlink_callback *cb);
 #endif /* _IW_PORTMAP_H */
-- 
cgit v1.2.3


From 385156c5f2a61834666f079ee66338f177c65c28 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 19:29:44 -0800
Subject: IB/hfi: Move RC functions into a header file

This patch moves some RC helper functions into a header file so that
they can be called from both RC and  TID RDMA functions. In addition,
a common function for rewinding a request is created in rdmavt so that
it can be shared between qib and hfi1 driver.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/rc.c    | 119 +++++++++++++++----------------------
 drivers/infiniband/hw/hfi1/rc.h    |  50 ++++++++++++++++
 drivers/infiniband/hw/qib/qib_rc.c |   7 +--
 drivers/infiniband/sw/rdmavt/rc.c  |  13 ++++
 include/rdma/rdmavt_qp.h           |  10 ++++
 5 files changed, 123 insertions(+), 76 deletions(-)
 create mode 100644 drivers/infiniband/hw/hfi1/rc.h

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 092d5eba980f..6e74cd3814b8 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -51,28 +51,48 @@
 
 #include "hfi.h"
 #include "qp.h"
+#include "rc.h"
 #include "verbs_txreq.h"
 #include "trace.h"
 
-/* cut down ridiculously long IB macro names */
-#define OP(x) RC_OP(x)
-
-static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
-					 struct rvt_swqe *wqe,
-					 struct hfi1_ibport *ibp);
-
-static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
-		       u32 psn, u32 pmtu)
+struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
+				      u8 *prev_ack, bool *scheduled)
+	__must_hold(&qp->s_lock)
 {
-	u32 len;
+	struct rvt_ack_entry *e = NULL;
+	u8 i, p;
+	bool s = true;
 
-	len = delta_psn(psn, wqe->psn) * pmtu;
-	ss->sge = wqe->sg_list[0];
-	ss->sg_list = wqe->sg_list + 1;
-	ss->num_sge = wqe->wr.num_sge;
-	ss->total_len = wqe->length;
-	rvt_skip_sge(ss, len, false);
-	return wqe->length - len;
+	for (i = qp->r_head_ack_queue; ; i = p) {
+		if (i == qp->s_tail_ack_queue)
+			s = false;
+		if (i)
+			p = i - 1;
+		else
+			p = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
+		if (p == qp->r_head_ack_queue) {
+			e = NULL;
+			break;
+		}
+		e = &qp->s_ack_queue[p];
+		if (!e->opcode) {
+			e = NULL;
+			break;
+		}
+		if (cmp_psn(psn, e->psn) >= 0) {
+			if (p == qp->s_tail_ack_queue &&
+			    cmp_psn(psn, e->lpsn) <= 0)
+				s = false;
+			break;
+		}
+	}
+	if (prev)
+		*prev = p;
+	if (prev_ack)
+		*prev_ack = i;
+	if (scheduled)
+		*scheduled = s;
+	return e;
 }
 
 /**
@@ -1229,9 +1249,9 @@ static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
  * This is similar to hfi1_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
-static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
-					 struct rvt_swqe *wqe,
-					 struct hfi1_ibport *ibp)
+struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+				  struct rvt_swqe *wqe,
+				  struct hfi1_ibport *ibp)
 {
 	lockdep_assert_held(&qp->s_lock);
 	/*
@@ -1314,8 +1334,8 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
  * May be called at interrupt level, with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
-static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
-		     u64 val, struct hfi1_ctxtdata *rcd)
+int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
+	      u64 val, struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_ibport *ibp;
 	enum ib_wc_status status;
@@ -1754,16 +1774,6 @@ bail:
 	return;
 }
 
-static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
-				  struct rvt_qp *qp)
-{
-	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= RVT_R_RSP_NAK;
-		rvt_get_qp(qp);
-		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
-	}
-}
-
 static inline void rc_cancel_ack(struct rvt_qp *qp)
 {
 	qp->r_adefered = 0;
@@ -1796,8 +1806,9 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 	struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 	struct rvt_ack_entry *e;
 	unsigned long flags;
-	u8 i, prev;
-	int old_req;
+	u8 prev;
+	u8 mra; /* most recent ACK */
+	bool old_req;
 
 	trace_hfi1_rcv_error(qp, psn);
 	if (diff > 0) {
@@ -1843,29 +1854,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 
-	for (i = qp->r_head_ack_queue; ; i = prev) {
-		if (i == qp->s_tail_ack_queue)
-			old_req = 0;
-		if (i)
-			prev = i - 1;
-		else
-			prev = rvt_size_atomic(ib_to_rvt(qp->ibqp.device));
-		if (prev == qp->r_head_ack_queue) {
-			e = NULL;
-			break;
-		}
-		e = &qp->s_ack_queue[prev];
-		if (!e->opcode) {
-			e = NULL;
-			break;
-		}
-		if (cmp_psn(psn, e->psn) >= 0) {
-			if (prev == qp->s_tail_ack_queue &&
-			    cmp_psn(psn, e->lpsn) <= 0)
-				old_req = 0;
-			break;
-		}
-	}
+	e = find_prev_entry(qp, psn, &prev, &mra, &old_req);
+
 	switch (opcode) {
 	case OP(RDMA_READ_REQUEST): {
 		struct ib_reth *reth;
@@ -1940,7 +1930,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		 * Resend the most recent ACK if this request is
 		 * after all the previous RDMA reads and atomics.
 		 */
-		if (i == qp->r_head_ack_queue) {
+		if (mra == qp->r_head_ack_queue) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qp->r_nak_state = 0;
 			qp->r_ack_psn = qp->r_psn - 1;
@@ -1951,7 +1941,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		 * Resend the RDMA read or atomic op which
 		 * ACKs this duplicate request.
 		 */
-		qp->s_tail_ack_queue = i;
+		qp->s_tail_ack_queue = mra;
 		break;
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
@@ -1968,17 +1958,6 @@ send_ack:
 	return 0;
 }
 
-static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
-{
-	unsigned next;
-
-	next = n + 1;
-	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
-		next = 0;
-	qp->s_tail_ack_queue = next;
-	qp->s_ack_state = OP(ACKNOWLEDGE);
-}
-
 static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid,
 			  u32 lqpn, u32 rqpn, u8 svc_type)
 {
diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h
new file mode 100644
index 000000000000..4329eadcb3df
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/rc.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef HFI1_RC_H
+#define HFI1_RC_H
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
+{
+	unsigned int next;
+
+	next = n + 1;
+	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
+		next = 0;
+	qp->s_tail_ack_queue = next;
+	qp->s_ack_state = OP(ACKNOWLEDGE);
+}
+
+static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
+				  struct rvt_qp *qp)
+{
+	if (list_empty(&qp->rspwait)) {
+		qp->r_flags |= RVT_R_RSP_NAK;
+		rvt_get_qp(qp);
+		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+	}
+}
+
+static inline u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
+			      u32 psn, u32 pmtu)
+{
+	u32 len;
+
+	len = delta_psn(psn, wqe->psn) * pmtu;
+	return rvt_restart_sge(ss, wqe, len);
+}
+
+struct rvt_ack_entry *find_prev_entry(struct rvt_qp *qp, u32 psn, u8 *prev,
+				      u8 *prev_ack, bool *scheduled);
+int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, u64 val,
+	      struct hfi1_ctxtdata *rcd);
+struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct rvt_swqe *wqe,
+				  struct hfi1_ibport *ibp);
+
+#endif /* HFI1_RC_H */
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 6fa002940451..50dd9811b088 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -45,12 +45,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 	u32 len;
 
 	len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
-	ss->sge = wqe->sg_list[0];
-	ss->sg_list = wqe->sg_list + 1;
-	ss->num_sge = wqe->wr.num_sge;
-	ss->total_len = wqe->length;
-	rvt_skip_sge(ss, len, false);
-	return wqe->length - len;
+	return rvt_restart_sge(ss, wqe, len);
 }
 
 /**
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
index 6131cc558bdb..8d71647820a8 100644
--- a/drivers/infiniband/sw/rdmavt/rc.c
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -187,3 +187,16 @@ void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
 	}
 }
 EXPORT_SYMBOL(rvt_get_credit);
+
+/* rvt_restart_sge - rewind the sge state for a wqe */
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len)
+{
+	ss->sge = wqe->sg_list[0];
+	ss->sg_list = wqe->sg_list + 1;
+	ss->num_sge = wqe->wr.num_sge;
+	ss->total_len = wqe->length;
+	rvt_skip_sge(ss, len, false);
+	return wqe->length - len;
+}
+EXPORT_SYMBOL(rvt_restart_sge);
+
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index cbafb1878669..56a9221378d9 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -628,6 +628,16 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp);
  */
 void rvt_get_credit(struct rvt_qp *qp, u32 aeth);
 
+/**
+ * rvt_restart_sge - rewind the sge state for a wqe
+ * @ss: the sge state pointer
+ * @wqe: the wqe to rewind
+ * @len: the data length from the start of the wqe in bytes
+ *
+ * Returns the remaining data length.
+ */
+u32 rvt_restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 len);
+
 /**
  * @qp - the qp pair
  * @len - the length
-- 
cgit v1.2.3


From 838b6fd2d9ca29998869e4d1ecf4566efe807666 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 19:30:07 -0800
Subject: IB/hfi1: TID RDMA RcvArray programming and TID allocation

TID entries are used by hfi1 hardware to receive data payload from
incoming packets directly into a user buffer and thus avoid data copying
by software. This patch implements the functions for TID allocation,
freeing, and programming TID RcvArray entries in hardware for kernel
clients. TID entries are managed via lists of TID groups similar to PSM.
Furthermore, to track TID resource allocation for each request, software
flows are also allocated and freed as needed. Since software flows
consume large amount of memory for tracking TID allocation and freeing,
it is generally desirable to allocate them dynamically in the send queue
and only for TID RDMA requests, but pre-allocate them for receive queue
because the send queue could have thousands of entries while the receive
queue has only a limited number of entries.

Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/hfi.h          |   2 +
 drivers/infiniband/hw/hfi1/init.c         |   3 +-
 drivers/infiniband/hw/hfi1/tid_rdma.c     | 877 +++++++++++++++++++++++++++++-
 drivers/infiniband/hw/hfi1/tid_rdma.h     | 101 ++++
 drivers/infiniband/hw/hfi1/user_exp_rcv.h |   1 -
 drivers/infiniband/hw/hfi1/verbs.c        |  29 +-
 drivers/infiniband/hw/hfi1/verbs.h        |  34 ++
 drivers/infiniband/sw/rdmavt/qp.c         |   2 +-
 include/rdma/rdmavt_qp.h                  |   2 +
 9 files changed, 1033 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 78aa344c7403..1412ed157c98 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -303,6 +303,8 @@ struct hfi1_ctxtdata {
 	spinlock_t exp_lock;
 	/* Queue for QP's waiting for HW TID flows */
 	struct tid_queue flow_queue;
+	/* Queue for QP's waiting for HW receive array entries */
+	struct tid_queue rarr_queue;
 	/* when waiting for rcv or pioavail */
 	wait_queue_head_t wait;
 	/* uuid from PSM */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 56830a514b92..d13304f7340d 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -372,6 +372,7 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
 		mutex_init(&rcd->exp_mutex);
 		spin_lock_init(&rcd->exp_lock);
 		INIT_LIST_HEAD(&rcd->flow_queue.queue_head);
+		INIT_LIST_HEAD(&rcd->rarr_queue.queue_head);
 
 		hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
 
@@ -1596,7 +1597,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 		struct hfi1_ctxtdata *rcd = dd->rcd[ctxt];
 
 		if (rcd) {
-			hfi1_clear_tids(rcd);
+			hfi1_free_ctxt_rcv_groups(rcd);
 			hfi1_free_ctxt(rcd);
 		}
 	}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 70671212808f..1d02b12590f6 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -8,6 +8,7 @@
 #include "qp.h"
 #include "verbs.h"
 #include "tid_rdma.h"
+#include "exp_rcv.h"
 #include "trace.h"
 
 #define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
@@ -35,8 +36,14 @@ static u32 mask_generation(u32 a)
 #define HFI1_KERNEL_MIN_JKEY HFI1_ADMIN_JKEY_RANGE
 #define HFI1_KERNEL_MAX_JKEY (2 * HFI1_ADMIN_JKEY_RANGE - 1)
 
+/* Maximum number of segments in flight per QP request. */
 #define TID_RDMA_MAX_READ_SEGS_PER_REQ  6
 #define TID_RDMA_MAX_WRITE_SEGS_PER_REQ 4
+#define MAX_REQ max_t(u16, TID_RDMA_MAX_READ_SEGS_PER_REQ, \
+			TID_RDMA_MAX_WRITE_SEGS_PER_REQ)
+#define MAX_FLOWS roundup_pow_of_two(MAX_REQ + 1)
+
+#define MAX_EXPECTED_PAGES     (MAX_EXPECTED_BUFFER / PAGE_SIZE)
 
 #define TID_OPFN_QP_CTXT_MASK 0xff
 #define TID_OPFN_QP_CTXT_SHIFT 56
@@ -79,6 +86,11 @@ static u32 mask_generation(u32 a)
  */
 
 static void tid_rdma_trigger_resume(struct work_struct *work);
+static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
+static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
+					 gfp_t gfp);
+static void hfi1_init_trdma_req(struct rvt_qp *qp,
+				struct tid_rdma_request *req);
 
 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
 {
@@ -230,7 +242,7 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
 	BUILD_BUG_ON(TID_RDMA_JKEY > HFI1_KERNEL_MAX_JKEY);
 	rcd->jkey = TID_RDMA_JKEY;
 	hfi1_set_ctxt_jkey(rcd->dd, rcd, rcd->jkey);
-	return 0;
+	return hfi1_alloc_ctxt_rcv_groups(rcd);
 }
 
 /**
@@ -266,6 +278,7 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		      struct ib_qp_init_attr *init_attr)
 {
 	struct hfi1_qp_priv *qpriv = qp->priv;
+	int i, ret;
 
 	qpriv->rcd = qp_to_rcd(rdi, qp);
 
@@ -278,15 +291,75 @@ int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qpriv->flow_state.generation = KERN_GENERATION_RESERVED;
 	INIT_LIST_HEAD(&qpriv->tid_wait);
 
+	if (init_attr->qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+		struct hfi1_devdata *dd = qpriv->rcd->dd;
+
+		qpriv->pages = kzalloc_node(TID_RDMA_MAX_PAGES *
+						sizeof(*qpriv->pages),
+					    GFP_KERNEL, dd->node);
+		if (!qpriv->pages)
+			return -ENOMEM;
+		for (i = 0; i < qp->s_size; i++) {
+			struct hfi1_swqe_priv *priv;
+			struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, i);
+
+			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
+					    dd->node);
+			if (!priv)
+				return -ENOMEM;
+
+			hfi1_init_trdma_req(qp, &priv->tid_req);
+			priv->tid_req.e.swqe = wqe;
+			wqe->priv = priv;
+		}
+		for (i = 0; i < rvt_max_atomic(rdi); i++) {
+			struct hfi1_ack_priv *priv;
+
+			priv = kzalloc_node(sizeof(*priv), GFP_KERNEL,
+					    dd->node);
+			if (!priv)
+				return -ENOMEM;
+
+			hfi1_init_trdma_req(qp, &priv->tid_req);
+			priv->tid_req.e.ack = &qp->s_ack_queue[i];
+
+			ret = hfi1_kern_exp_rcv_alloc_flows(&priv->tid_req,
+							    GFP_KERNEL);
+			if (ret) {
+				kfree(priv);
+				return ret;
+			}
+			qp->s_ack_queue[i].priv = priv;
+		}
+	}
+
 	return 0;
 }
 
 void hfi1_qp_priv_tid_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
-
-	if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA))
-		cancel_work_sync(&priv->opfn.opfn_work);
+	struct hfi1_qp_priv *qpriv = qp->priv;
+	struct rvt_swqe *wqe;
+	u32 i;
+
+	if (qp->ibqp.qp_type == IB_QPT_RC && HFI1_CAP_IS_KSET(TID_RDMA)) {
+		for (i = 0; i < qp->s_size; i++) {
+			wqe = rvt_get_swqe_ptr(qp, i);
+			kfree(wqe->priv);
+			wqe->priv = NULL;
+		}
+		for (i = 0; i < rvt_max_atomic(rdi); i++) {
+			struct hfi1_ack_priv *priv = qp->s_ack_queue[i].priv;
+
+			if (priv)
+				hfi1_kern_exp_rcv_free_flows(&priv->tid_req);
+			kfree(priv);
+			qp->s_ack_queue[i].priv = NULL;
+		}
+		cancel_work_sync(&qpriv->opfn.opfn_work);
+		kfree(qpriv->pages);
+		qpriv->pages = NULL;
+	}
 }
 
 /* Flow and tid waiter functions */
@@ -540,6 +613,7 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
 	struct hfi1_qp_priv *priv = qp->priv;
 
 	_tid_rdma_flush_wait(qp, &priv->rcd->flow_queue);
+	_tid_rdma_flush_wait(qp, &priv->rcd->rarr_queue);
 }
 
 /* Flow functions */
@@ -702,3 +776,796 @@ void hfi1_kern_init_ctxt_generations(struct hfi1_ctxtdata *rcd)
 		kern_set_hw_flow(rcd, KERN_GENERATION_RESERVED, i);
 	}
 }
+
+/* TID allocation functions */
+static u8 trdma_pset_order(struct tid_rdma_pageset *s)
+{
+	u8 count = s->count;
+
+	return ilog2(count) + 1;
+}
+
+/**
+ * tid_rdma_find_phys_blocks_4k - get groups base on mr info
+ * @npages - number of pages
+ * @pages - pointer to an array of page structs
+ * @list - page set array to return
+ *
+ * This routine returns the number of groups associated with
+ * the current sge information.  This implementation is based
+ * on the expected receive find_phys_blocks() adjusted to
+ * use the MR information vs. the pfn.
+ *
+ * Return:
+ * the number of RcvArray entries
+ */
+static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow,
+					struct page **pages,
+					u32 npages,
+					struct tid_rdma_pageset *list)
+{
+	u32 pagecount, pageidx, setcount = 0, i;
+	void *vaddr, *this_vaddr;
+
+	if (!npages)
+		return 0;
+
+	/*
+	 * Look for sets of physically contiguous pages in the user buffer.
+	 * This will allow us to optimize Expected RcvArray entry usage by
+	 * using the bigger supported sizes.
+	 */
+	vaddr = page_address(pages[0]);
+	for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) {
+		this_vaddr = i < npages ? page_address(pages[i]) : NULL;
+		/*
+		 * If the vaddr's are not sequential, pages are not physically
+		 * contiguous.
+		 */
+		if (this_vaddr != (vaddr + PAGE_SIZE)) {
+			/*
+			 * At this point we have to loop over the set of
+			 * physically contiguous pages and break them down it
+			 * sizes supported by the HW.
+			 * There are two main constraints:
+			 *     1. The max buffer size is MAX_EXPECTED_BUFFER.
+			 *        If the total set size is bigger than that
+			 *        program only a MAX_EXPECTED_BUFFER chunk.
+			 *     2. The buffer size has to be a power of two. If
+			 *        it is not, round down to the closes power of
+			 *        2 and program that size.
+			 */
+			while (pagecount) {
+				int maxpages = pagecount;
+				u32 bufsize = pagecount * PAGE_SIZE;
+
+				if (bufsize > MAX_EXPECTED_BUFFER)
+					maxpages =
+						MAX_EXPECTED_BUFFER >>
+						PAGE_SHIFT;
+				else if (!is_power_of_2(bufsize))
+					maxpages =
+						rounddown_pow_of_two(bufsize) >>
+						PAGE_SHIFT;
+
+				list[setcount].idx = pageidx;
+				list[setcount].count = maxpages;
+				pagecount -= maxpages;
+				pageidx += maxpages;
+				setcount++;
+			}
+			pageidx = i;
+			pagecount = 1;
+			vaddr = this_vaddr;
+		} else {
+			vaddr += PAGE_SIZE;
+			pagecount++;
+		}
+	}
+	/* insure we always return an even number of sets */
+	if (setcount & 1)
+		list[setcount++].count = 0;
+	return setcount;
+}
+
+/**
+ * tid_flush_pages - dump out pages into pagesets
+ * @list - list of pagesets
+ * @idx - pointer to current page index
+ * @pages - number of pages to dump
+ * @sets - current number of pagesset
+ *
+ * This routine flushes out accumuated pages.
+ *
+ * To insure an even number of sets the
+ * code may add a filler.
+ *
+ * This can happen with when pages is not
+ * a power of 2 or pages is a power of 2
+ * less than the maximum pages.
+ *
+ * Return:
+ * The new number of sets
+ */
+
+static u32 tid_flush_pages(struct tid_rdma_pageset *list,
+			   u32 *idx, u32 pages, u32 sets)
+{
+	while (pages) {
+		u32 maxpages = pages;
+
+		if (maxpages > MAX_EXPECTED_PAGES)
+			maxpages = MAX_EXPECTED_PAGES;
+		else if (!is_power_of_2(maxpages))
+			maxpages = rounddown_pow_of_two(maxpages);
+		list[sets].idx = *idx;
+		list[sets++].count = maxpages;
+		*idx += maxpages;
+		pages -= maxpages;
+	}
+	/* might need a filler */
+	if (sets & 1)
+		list[sets++].count = 0;
+	return sets;
+}
+
+/**
+ * tid_rdma_find_phys_blocks_8k - get groups base on mr info
+ * @pages - pointer to an array of page structs
+ * @npages - number of pages
+ * @list - page set array to return
+ *
+ * This routine parses an array of pages to compute pagesets
+ * in an 8k compatible way.
+ *
+ * pages are tested two at a time, i, i + 1 for contiguous
+ * pages and i - 1 and i contiguous pages.
+ *
+ * If any condition is false, any accumlated pages are flushed and
+ * v0,v1 are emitted as separate PAGE_SIZE pagesets
+ *
+ * Otherwise, the current 8k is totaled for a future flush.
+ *
+ * Return:
+ * The number of pagesets
+ * list set with the returned number of pagesets
+ *
+ */
+static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow,
+					struct page **pages,
+					u32 npages,
+					struct tid_rdma_pageset *list)
+{
+	u32 idx, sets = 0, i;
+	u32 pagecnt = 0;
+	void *v0, *v1, *vm1;
+
+	if (!npages)
+		return 0;
+	for (idx = 0, i = 0, vm1 = NULL; i < npages; i += 2) {
+		/* get a new v0 */
+		v0 = page_address(pages[i]);
+		v1 = i + 1 < npages ?
+				page_address(pages[i + 1]) : NULL;
+		/* compare i, i + 1 vaddr */
+		if (v1 != (v0 + PAGE_SIZE)) {
+			/* flush out pages */
+			sets = tid_flush_pages(list, &idx, pagecnt, sets);
+			/* output v0,v1 as two pagesets */
+			list[sets].idx = idx++;
+			list[sets++].count = 1;
+			if (v1) {
+				list[sets].count = 1;
+				list[sets++].idx = idx++;
+			} else {
+				list[sets++].count = 0;
+			}
+			vm1 = NULL;
+			pagecnt = 0;
+			continue;
+		}
+		/* i,i+1 consecutive, look at i-1,i */
+		if (vm1 && v0 != (vm1 + PAGE_SIZE)) {
+			/* flush out pages */
+			sets = tid_flush_pages(list, &idx, pagecnt, sets);
+			pagecnt = 0;
+		}
+		/* pages will always be a multiple of 8k */
+		pagecnt += 2;
+		/* save i-1 */
+		vm1 = v1;
+		/* move to next pair */
+	}
+	/* dump residual pages at end */
+	sets = tid_flush_pages(list, &idx, npages - idx, sets);
+	/* by design cannot be odd sets */
+	WARN_ON(sets & 1);
+	return sets;
+}
+
+/**
+ * Find pages for one segment of a sge array represented by @ss. The function
+ * does not check the sge, the sge must have been checked for alignment with a
+ * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of
+ * rvt_lkey_ok and rvt_rkey_ok. Also, the function only modifies the local sge
+ * copy maintained in @ss->sge, the original sge is not modified.
+ *
+ * Unlike IB RDMA WRITE, we can't decrement ss->num_sge here because we are not
+ * releasing the MR reference count at the same time. Otherwise, we'll "leak"
+ * references to the MR. This difference requires that we keep track of progress
+ * into the sg_list. This is done by the cur_seg cursor in the tid_rdma_request
+ * structure.
+ */
+static u32 kern_find_pages(struct tid_rdma_flow *flow,
+			   struct page **pages,
+			   struct rvt_sge_state *ss, bool *last)
+{
+	struct tid_rdma_request *req = flow->req;
+	struct rvt_sge *sge = &ss->sge;
+	u32 length = flow->req->seg_len;
+	u32 len = PAGE_SIZE;
+	u32 i = 0;
+
+	while (length && req->isge < ss->num_sge) {
+		pages[i++] = virt_to_page(sge->vaddr);
+
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (!sge->sge_length) {
+			if (++req->isge < ss->num_sge)
+				*sge = ss->sg_list[req->isge - 1];
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= RVT_SEGSZ) {
+				++sge->m;
+				sge->n = 0;
+			}
+			sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		length -= len;
+	}
+
+	flow->length = flow->req->seg_len - length;
+	*last = req->isge == ss->num_sge ? false : true;
+	return i;
+}
+
+static void dma_unmap_flow(struct tid_rdma_flow *flow)
+{
+	struct hfi1_devdata *dd;
+	int i;
+	struct tid_rdma_pageset *pset;
+
+	dd = flow->req->rcd->dd;
+	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
+			i++, pset++) {
+		if (pset->count && pset->addr) {
+			dma_unmap_page(&dd->pcidev->dev,
+				       pset->addr,
+				       PAGE_SIZE * pset->count,
+				       DMA_FROM_DEVICE);
+			pset->mapped = 0;
+		}
+	}
+}
+
+static int dma_map_flow(struct tid_rdma_flow *flow, struct page **pages)
+{
+	int i;
+	struct hfi1_devdata *dd = flow->req->rcd->dd;
+	struct tid_rdma_pageset *pset;
+
+	for (i = 0, pset = &flow->pagesets[0]; i < flow->npagesets;
+			i++, pset++) {
+		if (pset->count) {
+			pset->addr = dma_map_page(&dd->pcidev->dev,
+						  pages[pset->idx],
+						  0,
+						  PAGE_SIZE * pset->count,
+						  DMA_FROM_DEVICE);
+
+			if (dma_mapping_error(&dd->pcidev->dev, pset->addr)) {
+				dma_unmap_flow(flow);
+				return -ENOMEM;
+			}
+			pset->mapped = 1;
+		}
+	}
+	return 0;
+}
+
+static inline bool dma_mapped(struct tid_rdma_flow *flow)
+{
+	return !!flow->pagesets[0].mapped;
+}
+
+/*
+ * Get pages pointers and identify contiguous physical memory chunks for a
+ * segment. All segments are of length flow->req->seg_len.
+ */
+static int kern_get_phys_blocks(struct tid_rdma_flow *flow,
+				struct page **pages,
+				struct rvt_sge_state *ss, bool *last)
+{
+	u8 npages;
+
+	/* Reuse previously computed pagesets, if any */
+	if (flow->npagesets) {
+		if (!dma_mapped(flow))
+			return dma_map_flow(flow, pages);
+		return 0;
+	}
+
+	npages = kern_find_pages(flow, pages, ss, last);
+
+	if (flow->req->qp->pmtu == enum_to_mtu(OPA_MTU_4096))
+		flow->npagesets =
+			tid_rdma_find_phys_blocks_4k(flow, pages, npages,
+						     flow->pagesets);
+	else
+		flow->npagesets =
+			tid_rdma_find_phys_blocks_8k(flow, pages, npages,
+						     flow->pagesets);
+
+	return dma_map_flow(flow, pages);
+}
+
+static inline void kern_add_tid_node(struct tid_rdma_flow *flow,
+				     struct hfi1_ctxtdata *rcd, char *s,
+				     struct tid_group *grp, u8 cnt)
+{
+	struct kern_tid_node *node = &flow->tnode[flow->tnode_cnt++];
+
+	WARN_ON_ONCE(flow->tnode_cnt >=
+		     (TID_RDMA_MAX_SEGMENT_SIZE >> PAGE_SHIFT));
+	if (WARN_ON_ONCE(cnt & 1))
+		dd_dev_err(rcd->dd,
+			   "unexpected odd allocation cnt %u map 0x%x used %u",
+			   cnt, grp->map, grp->used);
+
+	node->grp = grp;
+	node->map = grp->map;
+	node->cnt = cnt;
+}
+
+/*
+ * Try to allocate pageset_count TID's from TID groups for a context
+ *
+ * This function allocates TID's without moving groups between lists or
+ * modifying grp->map. This is done as follows, being cogizant of the lists
+ * between which the TID groups will move:
+ * 1. First allocate complete groups of 8 TID's since this is more efficient,
+ *    these groups will move from group->full without affecting used
+ * 2. If more TID's are needed allocate from used (will move from used->full or
+ *    stay in used)
+ * 3. If we still don't have the required number of TID's go back and look again
+ *    at a complete group (will move from group->used)
+ */
+static int kern_alloc_tids(struct tid_rdma_flow *flow)
+{
+	struct hfi1_ctxtdata *rcd = flow->req->rcd;
+	struct hfi1_devdata *dd = rcd->dd;
+	u32 ngroups, pageidx = 0;
+	struct tid_group *group = NULL, *used;
+	u8 use;
+
+	flow->tnode_cnt = 0;
+	ngroups = flow->npagesets / dd->rcv_entries.group_size;
+	if (!ngroups)
+		goto used_list;
+
+	/* First look at complete groups */
+	list_for_each_entry(group,  &rcd->tid_group_list.list, list) {
+		kern_add_tid_node(flow, rcd, "complete groups", group,
+				  group->size);
+
+		pageidx += group->size;
+		if (!--ngroups)
+			break;
+	}
+
+	if (pageidx >= flow->npagesets)
+		goto ok;
+
+used_list:
+	/* Now look at partially used groups */
+	list_for_each_entry(used, &rcd->tid_used_list.list, list) {
+		use = min_t(u32, flow->npagesets - pageidx,
+			    used->size - used->used);
+		kern_add_tid_node(flow, rcd, "used groups", used, use);
+
+		pageidx += use;
+		if (pageidx >= flow->npagesets)
+			goto ok;
+	}
+
+	/*
+	 * Look again at a complete group, continuing from where we left.
+	 * However, if we are at the head, we have reached the end of the
+	 * complete groups list from the first loop above
+	 */
+	if (group && &group->list == &rcd->tid_group_list.list)
+		goto bail_eagain;
+	group = list_prepare_entry(group, &rcd->tid_group_list.list,
+				   list);
+	if (list_is_last(&group->list, &rcd->tid_group_list.list))
+		goto bail_eagain;
+	group = list_next_entry(group, list);
+	use = min_t(u32, flow->npagesets - pageidx, group->size);
+	kern_add_tid_node(flow, rcd, "complete continue", group, use);
+	pageidx += use;
+	if (pageidx >= flow->npagesets)
+		goto ok;
+bail_eagain:
+	return -EAGAIN;
+ok:
+	return 0;
+}
+
+static void kern_program_rcv_group(struct tid_rdma_flow *flow, int grp_num,
+				   u32 *pset_idx)
+{
+	struct hfi1_ctxtdata *rcd = flow->req->rcd;
+	struct hfi1_devdata *dd = rcd->dd;
+	struct kern_tid_node *node = &flow->tnode[grp_num];
+	struct tid_group *grp = node->grp;
+	struct tid_rdma_pageset *pset;
+	u32 pmtu_pg = flow->req->qp->pmtu >> PAGE_SHIFT;
+	u32 rcventry, npages = 0, pair = 0, tidctrl;
+	u8 i, cnt = 0;
+
+	for (i = 0; i < grp->size; i++) {
+		rcventry = grp->base + i;
+
+		if (node->map & BIT(i) || cnt >= node->cnt) {
+			rcv_array_wc_fill(dd, rcventry);
+			continue;
+		}
+		pset = &flow->pagesets[(*pset_idx)++];
+		if (pset->count) {
+			hfi1_put_tid(dd, rcventry, PT_EXPECTED,
+				     pset->addr, trdma_pset_order(pset));
+		} else {
+			hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
+		}
+		npages += pset->count;
+
+		rcventry -= rcd->expected_base;
+		tidctrl = pair ? 0x3 : rcventry & 0x1 ? 0x2 : 0x1;
+		/*
+		 * A single TID entry will be used to use a rcvarr pair (with
+		 * tidctrl 0x3), if ALL these are true (a) the bit pos is even
+		 * (b) the group map shows current and the next bits as free
+		 * indicating two consecutive rcvarry entries are available (c)
+		 * we actually need 2 more entries
+		 */
+		pair = !(i & 0x1) && !((node->map >> i) & 0x3) &&
+			node->cnt >= cnt + 2;
+		if (!pair) {
+			if (!pset->count)
+				tidctrl = 0x1;
+			flow->tid_entry[flow->tidcnt++] =
+				EXP_TID_SET(IDX, rcventry >> 1) |
+				EXP_TID_SET(CTRL, tidctrl) |
+				EXP_TID_SET(LEN, npages);
+			/* Efficient DIV_ROUND_UP(npages, pmtu_pg) */
+			flow->npkts += (npages + pmtu_pg - 1) >> ilog2(pmtu_pg);
+			npages = 0;
+		}
+
+		if (grp->used == grp->size - 1)
+			tid_group_move(grp, &rcd->tid_used_list,
+				       &rcd->tid_full_list);
+		else if (!grp->used)
+			tid_group_move(grp, &rcd->tid_group_list,
+				       &rcd->tid_used_list);
+
+		grp->used++;
+		grp->map |= BIT(i);
+		cnt++;
+	}
+}
+
+static void kern_unprogram_rcv_group(struct tid_rdma_flow *flow, int grp_num)
+{
+	struct hfi1_ctxtdata *rcd = flow->req->rcd;
+	struct hfi1_devdata *dd = rcd->dd;
+	struct kern_tid_node *node = &flow->tnode[grp_num];
+	struct tid_group *grp = node->grp;
+	u32 rcventry;
+	u8 i, cnt = 0;
+
+	for (i = 0; i < grp->size; i++) {
+		rcventry = grp->base + i;
+
+		if (node->map & BIT(i) || cnt >= node->cnt) {
+			rcv_array_wc_fill(dd, rcventry);
+			continue;
+		}
+
+		hfi1_put_tid(dd, rcventry, PT_INVALID, 0, 0);
+
+		grp->used--;
+		grp->map &= ~BIT(i);
+		cnt++;
+
+		if (grp->used == grp->size - 1)
+			tid_group_move(grp, &rcd->tid_full_list,
+				       &rcd->tid_used_list);
+		else if (!grp->used)
+			tid_group_move(grp, &rcd->tid_used_list,
+				       &rcd->tid_group_list);
+	}
+	if (WARN_ON_ONCE(cnt & 1)) {
+		struct hfi1_ctxtdata *rcd = flow->req->rcd;
+		struct hfi1_devdata *dd = rcd->dd;
+
+		dd_dev_err(dd, "unexpected odd free cnt %u map 0x%x used %u",
+			   cnt, grp->map, grp->used);
+	}
+}
+
+static void kern_program_rcvarray(struct tid_rdma_flow *flow)
+{
+	u32 pset_idx = 0;
+	int i;
+
+	flow->npkts = 0;
+	flow->tidcnt = 0;
+	for (i = 0; i < flow->tnode_cnt; i++)
+		kern_program_rcv_group(flow, i, &pset_idx);
+}
+
+/**
+ * hfi1_kern_exp_rcv_setup() - setup TID's and flow for one segment of a
+ * TID RDMA request
+ *
+ * @req: TID RDMA request for which the segment/flow is being set up
+ * @ss: sge state, maintains state across successive segments of a sge
+ * @last: set to true after the last sge segment has been processed
+ *
+ * This function
+ * (1) finds a free flow entry in the flow circular buffer
+ * (2) finds pages and continuous physical chunks constituing one segment
+ *     of an sge
+ * (3) allocates TID group entries for those chunks
+ * (4) programs rcvarray entries in the hardware corresponding to those
+ *     TID's
+ * (5) computes a tidarray with formatted TID entries which can be sent
+ *     to the sender
+ * (6) Reserves and programs HW flows.
+ * (7) It also manages queing the QP when TID/flow resources are not
+ *     available.
+ *
+ * @req points to struct tid_rdma_request of which the segments are a part. The
+ * function uses qp, rcd and seg_len members of @req. In the absence of errors,
+ * req->flow_idx is the index of the flow which has been prepared in this
+ * invocation of function call. With flow = &req->flows[req->flow_idx],
+ * flow->tid_entry contains the TID array which the sender can use for TID RDMA
+ * sends and flow->npkts contains number of packets required to send the
+ * segment.
+ *
+ * hfi1_check_sge_align should be called prior to calling this function and if
+ * it signals error TID RDMA cannot be used for this sge and this function
+ * should not be called.
+ *
+ * For the queuing, caller must hold the flow->req->qp s_lock from the send
+ * engine and the function will procure the exp_lock.
+ *
+ * Return:
+ * The function returns -EAGAIN if sufficient number of TID/flow resources to
+ * map the segment could not be allocated. In this case the function should be
+ * called again with previous arguments to retry the TID allocation. There are
+ * no other error returns. The function returns 0 on success.
+ */
+int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
+			    struct rvt_sge_state *ss, bool *last)
+	__must_hold(&req->qp->s_lock)
+{
+	struct tid_rdma_flow *flow = &req->flows[req->setup_head];
+	struct hfi1_ctxtdata *rcd = req->rcd;
+	struct hfi1_qp_priv *qpriv = req->qp->priv;
+	unsigned long flags;
+	struct rvt_qp *fqp;
+	u16 clear_tail = req->clear_tail;
+
+	lockdep_assert_held(&req->qp->s_lock);
+	/*
+	 * We return error if either (a) we don't have space in the flow
+	 * circular buffer, or (b) we already have max entries in the buffer.
+	 * Max entries depend on the type of request we are processing and the
+	 * negotiated TID RDMA parameters.
+	 */
+	if (!CIRC_SPACE(req->setup_head, clear_tail, MAX_FLOWS) ||
+	    CIRC_CNT(req->setup_head, clear_tail, MAX_FLOWS) >=
+	    req->n_flows)
+		return -EINVAL;
+
+	/*
+	 * Get pages, identify contiguous physical memory chunks for the segment
+	 * If we can not determine a DMA address mapping we will treat it just
+	 * like if we ran out of space above.
+	 */
+	if (kern_get_phys_blocks(flow, qpriv->pages, ss, last)) {
+		hfi1_wait_kmem(flow->req->qp);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&rcd->exp_lock, flags);
+	if (kernel_tid_waiters(rcd, &rcd->rarr_queue, flow->req->qp))
+		goto queue;
+
+	/*
+	 * At this point we know the number of pagesets and hence the number of
+	 * TID's to map the segment. Allocate the TID's from the TID groups. If
+	 * we cannot allocate the required number we exit and try again later
+	 */
+	if (kern_alloc_tids(flow))
+		goto queue;
+	/*
+	 * Finally program the TID entries with the pagesets, compute the
+	 * tidarray and enable the HW flow
+	 */
+	kern_program_rcvarray(flow);
+
+	/*
+	 * Setup the flow state with relevant information.
+	 * This information is used for tracking the sequence of data packets
+	 * for the segment.
+	 * The flow is setup here as this is the most accurate time and place
+	 * to do so. Doing at a later time runs the risk of the flow data in
+	 * qpriv getting out of sync.
+	 */
+	memset(&flow->flow_state, 0x0, sizeof(flow->flow_state));
+	flow->idx = qpriv->flow_state.index;
+	flow->flow_state.generation = qpriv->flow_state.generation;
+	flow->flow_state.spsn = qpriv->flow_state.psn;
+	flow->flow_state.lpsn = flow->flow_state.spsn + flow->npkts - 1;
+	flow->flow_state.r_next_psn =
+		full_flow_psn(flow, flow->flow_state.spsn);
+	qpriv->flow_state.psn += flow->npkts;
+
+	dequeue_tid_waiter(rcd, &rcd->rarr_queue, flow->req->qp);
+	/* get head before dropping lock */
+	fqp = first_qp(rcd, &rcd->rarr_queue);
+	spin_unlock_irqrestore(&rcd->exp_lock, flags);
+	tid_rdma_schedule_tid_wakeup(fqp);
+
+	req->setup_head = (req->setup_head + 1) & (MAX_FLOWS - 1);
+	return 0;
+queue:
+	queue_qp_for_tid_wait(rcd, &rcd->rarr_queue, flow->req->qp);
+	spin_unlock_irqrestore(&rcd->exp_lock, flags);
+	return -EAGAIN;
+}
+
+static void hfi1_tid_rdma_reset_flow(struct tid_rdma_flow *flow)
+{
+	flow->npagesets = 0;
+}
+
+/*
+ * This function is called after one segment has been successfully sent to
+ * release the flow and TID HW/SW resources for that segment. The segments for a
+ * TID RDMA request are setup and cleared in FIFO order which is managed using a
+ * circular buffer.
+ */
+int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req)
+	__must_hold(&req->qp->s_lock)
+{
+	struct tid_rdma_flow *flow = &req->flows[req->clear_tail];
+	struct hfi1_ctxtdata *rcd = req->rcd;
+	unsigned long flags;
+	int i;
+	struct rvt_qp *fqp;
+
+	lockdep_assert_held(&req->qp->s_lock);
+	/* Exit if we have nothing in the flow circular buffer */
+	if (!CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS))
+		return -EINVAL;
+
+	spin_lock_irqsave(&rcd->exp_lock, flags);
+
+	for (i = 0; i < flow->tnode_cnt; i++)
+		kern_unprogram_rcv_group(flow, i);
+	/* To prevent double unprogramming */
+	flow->tnode_cnt = 0;
+	/* get head before dropping lock */
+	fqp = first_qp(rcd, &rcd->rarr_queue);
+	spin_unlock_irqrestore(&rcd->exp_lock, flags);
+
+	dma_unmap_flow(flow);
+
+	hfi1_tid_rdma_reset_flow(flow);
+	req->clear_tail = (req->clear_tail + 1) & (MAX_FLOWS - 1);
+
+	if (fqp == req->qp) {
+		__trigger_tid_waiter(fqp);
+		rvt_put_qp(fqp);
+	} else {
+		tid_rdma_schedule_tid_wakeup(fqp);
+	}
+
+	return 0;
+}
+
+/*
+ * This function is called to release all the tid entries for
+ * a request.
+ */
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
+	__must_hold(&req->qp->s_lock)
+{
+	/* Use memory barrier for proper ordering */
+	while (CIRC_CNT(req->setup_head, req->clear_tail, MAX_FLOWS)) {
+		if (hfi1_kern_exp_rcv_clear(req))
+			break;
+	}
+}
+
+/**
+ * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
+ * @req - the tid rdma request to be cleaned
+ */
+static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
+{
+	kfree(req->flows);
+	req->flows = NULL;
+}
+
+/**
+ * __trdma_clean_swqe - clean up for large sized QPs
+ * @qp: the queue patch
+ * @wqe: the send wqe
+ */
+void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	struct hfi1_swqe_priv *p = wqe->priv;
+
+	hfi1_kern_exp_rcv_free_flows(&p->tid_req);
+}
+
+/*
+ * This can be called at QP create time or in the data path.
+ */
+static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
+					 gfp_t gfp)
+{
+	struct tid_rdma_flow *flows;
+	int i;
+
+	if (likely(req->flows))
+		return 0;
+	flows = kmalloc_node(MAX_FLOWS * sizeof(*flows), gfp,
+			     req->rcd->numa_id);
+	if (!flows)
+		return -ENOMEM;
+	/* mini init */
+	for (i = 0; i < MAX_FLOWS; i++) {
+		flows[i].req = req;
+		flows[i].npagesets = 0;
+		flows[i].pagesets[0].mapped =  0;
+	}
+	req->flows = flows;
+	return 0;
+}
+
+static void hfi1_init_trdma_req(struct rvt_qp *qp,
+				struct tid_rdma_request *req)
+{
+	struct hfi1_qp_priv *qpriv = qp->priv;
+
+	/*
+	 * Initialize various TID RDMA request variables.
+	 * These variables are "static", which is why they
+	 * can be pre-initialized here before the WRs has
+	 * even been submitted.
+	 * However, non-NULL values for these variables do not
+	 * imply that this WQE has been enabled for TID RDMA.
+	 * Drivers should check the WQE's opcode to determine
+	 * if a request is a TID RDMA one or not.
+	 */
+	req->qp = qp;
+	req->rcd = qpriv->rcd;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 3bc0aaf9568f..524baf8c8fac 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -6,7 +6,16 @@
 #ifndef HFI1_TID_RDMA_H
 #define HFI1_TID_RDMA_H
 
+#include <linux/circ_buf.h>
+#include "common.h"
+
+/* Add a convenience helper */
+#define CIRC_ADD(val, add, size) (((val) + (add)) & ((size) - 1))
+#define CIRC_NEXT(val, size) CIRC_ADD(val, 1, size)
+#define CIRC_PREV(val, size) CIRC_ADD(val, -1, size)
+
 #define TID_RDMA_MAX_SEGMENT_SIZE       BIT(18)   /* 256 KiB (for now) */
+#define TID_RDMA_MAX_PAGES              (BIT(18) >> PAGE_SHIFT)
 
 struct tid_rdma_params {
 	struct rcu_head rcu_head;
@@ -36,6 +45,81 @@ struct tid_flow_state {
 	u8 flags;
 };
 
+struct tid_rdma_request {
+	struct rvt_qp *qp;
+	struct hfi1_ctxtdata *rcd;
+	union {
+		struct rvt_swqe *swqe;
+		struct rvt_ack_entry *ack;
+	} e;
+
+	struct tid_rdma_flow *flows;	/* array of tid flows */
+	u16 n_flows;		/* size of the flow buffer window */
+	u16 setup_head;		/* flow index we are setting up */
+	u16 clear_tail;		/* flow index we are clearing */
+	u16 flow_idx;		/* flow index most recently set up */
+
+	u32 seg_len;
+
+	u32 isge;		/* index of "current" sge */
+};
+
+/*
+ * When header suppression is used, PSNs associated with a "flow" are
+ * relevant (and not the PSNs maintained by verbs). Track per-flow
+ * PSNs here for a TID RDMA segment.
+ *
+ */
+struct flow_state {
+	u32 flags;
+	u32 resp_ib_psn;     /* The IB PSN of the response for this flow */
+	u32 generation;      /* generation of flow */
+	u32 spsn;            /* starting PSN in TID space */
+	u32 lpsn;            /* last PSN in TID space */
+	u32 r_next_psn;      /* next PSN to be received (in TID space) */
+};
+
+struct tid_rdma_pageset {
+	dma_addr_t addr : 48; /* Only needed for the first page */
+	u8 idx: 8;
+	u8 count : 7;
+	u8 mapped: 1;
+};
+
+/**
+ * kern_tid_node - used for managing TID's in TID groups
+ *
+ * @grp_idx: rcd relative index to tid_group
+ * @map: grp->map captured prior to programming this TID group in HW
+ * @cnt: Only @cnt of available group entries are actually programmed
+ */
+struct kern_tid_node {
+	struct tid_group *grp;
+	u8 map;
+	u8 cnt;
+};
+
+/* Overall info for a TID RDMA segment */
+struct tid_rdma_flow {
+	/*
+	 * While a TID RDMA segment is being transferred, it uses a QP number
+	 * from the "KDETH section of QP numbers" (which is different from the
+	 * QP number that originated the request). Bits 11-15 of these QP
+	 * numbers identify the "TID flow" for the segment.
+	 */
+	struct flow_state flow_state;
+	struct tid_rdma_request *req;
+	u32 length;
+	u8 tnode_cnt;
+	u8 tidcnt;
+	u8 idx;
+	u8 npagesets;
+	u8 npkts;
+	struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
+	struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
+	u32 tid_entry[TID_RDMA_MAX_PAGES];
+};
+
 bool tid_rdma_conn_req(struct rvt_qp *qp, u64 *data);
 bool tid_rdma_conn_reply(struct rvt_qp *qp, u64 data);
 bool tid_rdma_conn_resp(struct rvt_qp *qp, u64 *data);
@@ -43,6 +127,23 @@ void tid_rdma_conn_error(struct rvt_qp *qp);
 void tid_rdma_opfn_init(struct rvt_qp *qp, struct tid_rdma_params *p);
 
 int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit);
+int hfi1_kern_exp_rcv_setup(struct tid_rdma_request *req,
+			    struct rvt_sge_state *ss, bool *last);
+int hfi1_kern_exp_rcv_clear(struct tid_rdma_request *req);
+void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req);
+void __trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+/**
+ * trdma_clean_swqe - clean flows for swqe if large send queue
+ * @qp: the qp
+ * @wqe: the send wqe
+ */
+static inline void trdma_clean_swqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	if (!wqe->priv)
+		return;
+	__trdma_clean_swqe(qp, wqe);
+}
 
 int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		      struct ib_qp_init_attr *init_attr);
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index e383cc01a2bf..43b105de1d54 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -48,7 +48,6 @@
  */
 
 #include "hfi.h"
-
 #include "exp_rcv.h"
 
 struct tid_pageset {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 571bfd549c2a..02c1873a976c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -504,11 +504,28 @@ static void verbs_sdma_complete(
 	hfi1_put_txreq(tx);
 }
 
+void hfi1_wait_kmem(struct rvt_qp *qp)
+{
+	struct hfi1_qp_priv *priv = qp->priv;
+	struct ib_qp *ibqp = &qp->ibqp;
+	struct ib_device *ibdev = ibqp->device;
+	struct hfi1_ibdev *dev = to_idev(ibdev);
+
+	if (list_empty(&priv->s_iowait.list)) {
+		if (list_empty(&dev->memwait))
+			mod_timer(&dev->mem_timer, jiffies + 1);
+		qp->s_flags |= RVT_S_WAIT_KMEM;
+		list_add_tail(&priv->s_iowait.list, &dev->memwait);
+		priv->s_iowait.lock = &dev->iowait_lock;
+		trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
+		rvt_get_qp(qp);
+	}
+}
+
 static int wait_kmem(struct hfi1_ibdev *dev,
 		     struct rvt_qp *qp,
 		     struct hfi1_pkt_state *ps)
 {
-	struct hfi1_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	int ret = 0;
 
@@ -517,15 +534,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
 		write_seqlock(&dev->iowait_lock);
 		list_add_tail(&ps->s_txreq->txreq.list,
 			      &ps->wait->tx_head);
-		if (list_empty(&priv->s_iowait.list)) {
-			if (list_empty(&dev->memwait))
-				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= RVT_S_WAIT_KMEM;
-			list_add_tail(&priv->s_iowait.list, &dev->memwait);
-			priv->s_iowait.lock = &dev->iowait_lock;
-			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
-			rvt_get_qp(qp);
-		}
+		hfi1_wait_kmem(qp);
 		write_sequnlock(&dev->iowait_lock);
 		hfi1_qp_unbusy(qp, ps->wait);
 		ret = -EBUSY;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 9065e470bebb..94f198b47239 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -159,6 +159,7 @@ struct hfi1_qp_priv {
 	struct sdma_engine *s_sde;                /* current sde */
 	struct send_context *s_sendcontext;       /* current sendcontext */
 	struct hfi1_ctxtdata *rcd;                /* QP's receive context */
+	struct page **pages;                      /* for TID page scan */
 	u32 tid_enqueue;                          /* saved when tid waited */
 	u8 s_sc;		                  /* SC[0..4] for next packet */
 	struct iowait s_iowait;
@@ -173,6 +174,14 @@ struct hfi1_qp_priv {
 	u8 timeout_shift;       /* account for number of packets per segment */
 };
 
+struct hfi1_swqe_priv {
+	struct tid_rdma_request tid_req;
+};
+
+struct hfi1_ack_priv {
+	struct tid_rdma_request tid_req;
+};
+
 /*
  * This structure is used to hold commonly lookedup and computed values during
  * the send engine progress.
@@ -321,6 +330,21 @@ static inline u32 delta_psn(u32 a, u32 b)
 	return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
 }
 
+/*
+ * Look through all the active flows for a TID RDMA request and find
+ * the one (if it exists) that contains the specified PSN.
+ */
+static inline u32 __full_flow_psn(struct flow_state *state, u32 psn)
+{
+	return mask_psn((state->generation << HFI1_KDETH_BTH_SEQ_SHIFT) |
+			(psn & HFI1_KDETH_BTH_SEQ_MASK));
+}
+
+static inline u32 full_flow_psn(struct tid_rdma_flow *flow, u32 psn)
+{
+	return __full_flow_psn(&flow->flow_state, psn);
+}
+
 struct verbs_txreq;
 void hfi1_put_txreq(struct verbs_txreq *tx);
 
@@ -403,6 +427,16 @@ static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
 	return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
 }
 
+void hfi1_wait_kmem(struct rvt_qp *qp);
+
+static inline void hfi1_trdma_send_complete(struct rvt_qp *qp,
+					    struct rvt_swqe *wqe,
+					    enum ib_wc_status status)
+{
+	trdma_clean_swqe(qp, wqe);
+	rvt_send_complete(qp, wqe, status);
+}
+
 extern const enum ib_wc_opcode ib_hfi1_wc_opcode[];
 
 extern const u8 hdr_len_by_opcode[];
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 16247d2a671d..c8e70cf69a8a 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1642,11 +1642,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
 		kref_put(&qp->ip->ref, rvt_release_mmap_info);
 	else
 		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
 	rdi->driver_f.qp_priv_free(rdi, qp);
 	kfree(qp->s_ack_queue);
 	rdma_destroy_ah_attr(&qp->remote_ah_attr);
 	rdma_destroy_ah_attr(&qp->alt_ah_attr);
+	vfree(qp->s_wq);
 	kfree(qp);
 	return 0;
 }
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 56a9221378d9..9095a0b71250 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -174,6 +174,7 @@ struct rvt_swqe {
 	u32 lpsn;               /* last packet sequence number */
 	u32 ssn;                /* send sequence number */
 	u32 length;             /* total length of data in sg_list */
+	void *priv;             /* driver dependent field */
 	struct rvt_sge sg_list[0];
 };
 
@@ -235,6 +236,7 @@ struct rvt_ack_entry {
 	u32 lpsn;
 	u8 opcode;
 	u8 sent;
+	void *priv;
 };
 
 #define	RC_QP_SCALING_INTERVAL	5
-- 
cgit v1.2.3


From 742a3826cf82395e304df99f6494d04b0dd03a84 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 19:30:40 -0800
Subject: IB/hfi1: Add functions to build TID RDMA READ request

This patch adds the helper functions to build the TID RDMA READ request
on the requester side. The key is to allocate TID resources (TID flow
and TID entries) and send the resource information to the responder side
along with the read request. Since the TID resources are limited, each
TID RDMA READ request has to be split into segments with a default
segment size of 256K. A software flow is allocated to track the data
transaction for each segment. The work request opcode, packet opcode, and
packet formats for TID RDMA READ protocol are also defined in this patch.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/tid_rdma.c | 200 ++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/hfi1/tid_rdma.h |  32 ++++++
 drivers/infiniband/hw/hfi1/verbs.h    |   9 ++
 include/rdma/ib_hdrs.h                |   9 +-
 include/rdma/tid_rdma_defs.h          |  52 +++++++++
 5 files changed, 301 insertions(+), 1 deletion(-)
 create mode 100644 include/rdma/tid_rdma_defs.h

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 506b5a59ded5..56c8c10b5a85 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -6,11 +6,27 @@
 
 #include "hfi.h"
 #include "qp.h"
+#include "rc.h"
 #include "verbs.h"
 #include "tid_rdma.h"
 #include "exp_rcv.h"
 #include "trace.h"
 
+/**
+ * DOC: TID RDMA READ protocol
+ *
+ * This is an end-to-end protocol at the hfi1 level between two nodes that
+ * improves performance by avoiding data copy on the requester side. It
+ * converts a qualified RDMA READ request into a TID RDMA READ request on
+ * the requester side and thereafter handles the request and response
+ * differently. To be qualified, the RDMA READ request should meet the
+ * following:
+ * -- The total data length should be greater than 256K;
+ * -- The total data length should be a multiple of 4K page size;
+ * -- Each local scatter-gather entry should be 4K page aligned;
+ * -- Each local scatter-gather entry should be a multiple of 4K page size;
+ */
+
 #define RCV_TID_FLOW_TABLE_CTRL_FLOW_VALID_SMASK BIT_ULL(32)
 #define RCV_TID_FLOW_TABLE_CTRL_HDR_SUPP_EN_SMASK BIT_ULL(33)
 #define RCV_TID_FLOW_TABLE_CTRL_KEEP_AFTER_SEQ_ERR_SMASK BIT_ULL(34)
@@ -18,6 +34,9 @@
 #define RCV_TID_FLOW_TABLE_STATUS_SEQ_MISMATCH_SMASK BIT_ULL(37)
 #define RCV_TID_FLOW_TABLE_STATUS_GEN_MISMATCH_SMASK BIT_ULL(38)
 
+/* Maximum number of packets within a flow generation. */
+#define MAX_TID_FLOW_PSN BIT(HFI1_KDETH_BTH_SEQ_SHIFT)
+
 #define GENERATION_MASK 0xFFFFF
 
 static u32 mask_generation(u32 a)
@@ -45,6 +64,9 @@ static u32 mask_generation(u32 a)
 
 #define MAX_EXPECTED_PAGES     (MAX_EXPECTED_BUFFER / PAGE_SIZE)
 
+#define TID_RDMA_DESTQP_FLOW_SHIFT      11
+#define TID_RDMA_DESTQP_FLOW_MASK       0x1f
+
 #define TID_OPFN_QP_CTXT_MASK 0xff
 #define TID_OPFN_QP_CTXT_SHIFT 56
 #define TID_OPFN_QP_KDETH_MASK 0xff
@@ -1597,3 +1619,181 @@ u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
 
 	return dd->verbs_dev.n_tidwait;
 }
+
+/* TID RDMA READ functions */
+u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
+				    struct ib_other_headers *ohdr, u32 *bth1,
+				    u32 *bth2, u32 *len)
+{
+	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+	struct tid_rdma_flow *flow = &req->flows[req->flow_idx];
+	struct rvt_qp *qp = req->qp;
+	struct hfi1_qp_priv *qpriv = qp->priv;
+	struct hfi1_swqe_priv *wpriv = wqe->priv;
+	struct tid_rdma_read_req *rreq = &ohdr->u.tid_rdma.r_req;
+	struct tid_rdma_params *remote;
+	u32 req_len = 0;
+	void *req_addr = NULL;
+
+	/* This is the IB psn used to send the request */
+	*bth2 = mask_psn(flow->flow_state.ib_spsn + flow->pkt);
+
+	/* TID Entries for TID RDMA READ payload */
+	req_addr = &flow->tid_entry[flow->tid_idx];
+	req_len = sizeof(*flow->tid_entry) *
+			(flow->tidcnt - flow->tid_idx);
+
+	memset(&ohdr->u.tid_rdma.r_req, 0, sizeof(ohdr->u.tid_rdma.r_req));
+	wpriv->ss.sge.vaddr = req_addr;
+	wpriv->ss.sge.sge_length = req_len;
+	wpriv->ss.sge.length = wpriv->ss.sge.sge_length;
+	/*
+	 * We can safely zero these out. Since the first SGE covers the
+	 * entire packet, nothing else should even look at the MR.
+	 */
+	wpriv->ss.sge.mr = NULL;
+	wpriv->ss.sge.m = 0;
+	wpriv->ss.sge.n = 0;
+
+	wpriv->ss.sg_list = NULL;
+	wpriv->ss.total_len = wpriv->ss.sge.sge_length;
+	wpriv->ss.num_sge = 1;
+
+	/* Construct the TID RDMA READ REQ packet header */
+	rcu_read_lock();
+	remote = rcu_dereference(qpriv->tid_rdma.remote);
+
+	KDETH_RESET(rreq->kdeth0, KVER, 0x1);
+	KDETH_RESET(rreq->kdeth1, JKEY, remote->jkey);
+	rreq->reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr +
+			   req->cur_seg * req->seg_len + flow->sent);
+	rreq->reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey);
+	rreq->reth.length = cpu_to_be32(*len);
+	rreq->tid_flow_psn =
+		cpu_to_be32((flow->flow_state.generation <<
+			     HFI1_KDETH_BTH_SEQ_SHIFT) |
+			    ((flow->flow_state.spsn + flow->pkt) &
+			     HFI1_KDETH_BTH_SEQ_MASK));
+	rreq->tid_flow_qp =
+		cpu_to_be32(qpriv->tid_rdma.local.qp |
+			    ((flow->idx & TID_RDMA_DESTQP_FLOW_MASK) <<
+			     TID_RDMA_DESTQP_FLOW_SHIFT) |
+			    qpriv->rcd->ctxt);
+	rreq->verbs_qp = cpu_to_be32(qp->remote_qpn);
+	*bth1 &= ~RVT_QPN_MASK;
+	*bth1 |= remote->qp;
+	*bth2 |= IB_BTH_REQ_ACK;
+	rcu_read_unlock();
+
+	/* We are done with this segment */
+	flow->sent += *len;
+	req->cur_seg++;
+	qp->s_state = TID_OP(READ_REQ);
+	req->ack_pending++;
+	req->flow_idx = (req->flow_idx + 1) & (MAX_FLOWS - 1);
+	qpriv->pending_tid_r_segs++;
+	qp->s_num_rd_atomic++;
+
+	/* Set the TID RDMA READ request payload size */
+	*len = req_len;
+
+	return sizeof(ohdr->u.tid_rdma.r_req) / sizeof(u32);
+}
+
+/*
+ * @len: contains the data length to read upon entry and the read request
+ *       payload length upon exit.
+ */
+u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+				 struct ib_other_headers *ohdr, u32 *bth1,
+				 u32 *bth2, u32 *len)
+	__must_hold(&qp->s_lock)
+{
+	struct hfi1_qp_priv *qpriv = qp->priv;
+	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+	struct tid_rdma_flow *flow = NULL;
+	u32 hdwords = 0;
+	bool last;
+	bool retry = true;
+	u32 npkts = rvt_div_round_up_mtu(qp, *len);
+
+	/*
+	 * Check sync conditions. Make sure that there are no pending
+	 * segments before freeing the flow.
+	 */
+sync_check:
+	if (req->state == TID_REQUEST_SYNC) {
+		if (qpriv->pending_tid_r_segs)
+			goto done;
+
+		hfi1_kern_clear_hw_flow(req->rcd, qp);
+		req->state = TID_REQUEST_ACTIVE;
+	}
+
+	/*
+	 * If the request for this segment is resent, the tid resources should
+	 * have been allocated before. In this case, req->flow_idx should
+	 * fall behind req->setup_head.
+	 */
+	if (req->flow_idx == req->setup_head) {
+		retry = false;
+		if (req->state == TID_REQUEST_RESEND) {
+			/*
+			 * This is the first new segment for a request whose
+			 * earlier segments have been re-sent. We need to
+			 * set up the sge pointer correctly.
+			 */
+			restart_sge(&qp->s_sge, wqe, req->s_next_psn,
+				    qp->pmtu);
+			req->isge = 0;
+			req->state = TID_REQUEST_ACTIVE;
+		}
+
+		/*
+		 * Check sync. The last PSN of each generation is reserved for
+		 * RESYNC.
+		 */
+		if ((qpriv->flow_state.psn + npkts) > MAX_TID_FLOW_PSN - 1) {
+			req->state = TID_REQUEST_SYNC;
+			goto sync_check;
+		}
+
+		/* Allocate the flow if not yet */
+		if (hfi1_kern_setup_hw_flow(qpriv->rcd, qp))
+			goto done;
+
+		/*
+		 * The following call will advance req->setup_head after
+		 * allocating the tid entries.
+		 */
+		if (hfi1_kern_exp_rcv_setup(req, &qp->s_sge, &last)) {
+			req->state = TID_REQUEST_QUEUED;
+
+			/*
+			 * We don't have resources for this segment. The QP has
+			 * already been queued.
+			 */
+			goto done;
+		}
+	}
+
+	/* req->flow_idx should only be one slot behind req->setup_head */
+	flow = &req->flows[req->flow_idx];
+	flow->pkt = 0;
+	flow->tid_idx = 0;
+	flow->sent = 0;
+	if (!retry) {
+		/* Set the first and last IB PSN for the flow in use.*/
+		flow->flow_state.ib_spsn = req->s_next_psn;
+		flow->flow_state.ib_lpsn =
+			flow->flow_state.ib_spsn + flow->npkts - 1;
+	}
+
+	/* Calculate the next segment start psn.*/
+	req->s_next_psn += flow->npkts;
+
+	/* Build the packet header */
+	hdwords = hfi1_build_tid_rdma_read_packet(wqe, ohdr, bth1, bth2, len);
+done:
+	return hdwords;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 3dbeaa8cb5b3..f692f3ff9419 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -45,6 +45,19 @@ struct tid_flow_state {
 	u8 flags;
 };
 
+enum tid_rdma_req_state {
+	TID_REQUEST_INACTIVE = 0,
+	TID_REQUEST_INIT,
+	TID_REQUEST_INIT_RESEND,
+	TID_REQUEST_ACTIVE,
+	TID_REQUEST_RESEND,
+	TID_REQUEST_RESEND_ACTIVE,
+	TID_REQUEST_QUEUED,
+	TID_REQUEST_SYNC,
+	TID_REQUEST_RNR_NAK,
+	TID_REQUEST_COMPLETE,
+};
+
 struct tid_rdma_request {
 	struct rvt_qp *qp;
 	struct hfi1_ctxtdata *rcd;
@@ -60,8 +73,13 @@ struct tid_rdma_request {
 	u16 flow_idx;		/* flow index most recently set up */
 
 	u32 seg_len;
+	u32 s_next_psn;		/* IB PSN of next segment start for read */
 
+	u32 cur_seg;		/* index of current segment */
 	u32 isge;		/* index of "current" sge */
+	u32 ack_pending;        /* num acks pending for this request */
+
+	enum tid_rdma_req_state state;
 };
 
 /*
@@ -77,6 +95,10 @@ struct flow_state {
 	u32 spsn;            /* starting PSN in TID space */
 	u32 lpsn;            /* last PSN in TID space */
 	u32 r_next_psn;      /* next PSN to be received (in TID space) */
+
+	/* For tid rdma read */
+	u32 ib_spsn;         /* starting PSN in Verbs space */
+	u32 ib_lpsn;         /* last PSn in Verbs space */
 };
 
 struct tid_rdma_pageset {
@@ -110,11 +132,14 @@ struct tid_rdma_flow {
 	struct flow_state flow_state;
 	struct tid_rdma_request *req;
 	u32 length;
+	u32 sent;
 	u8 tnode_cnt;
 	u8 tidcnt;
+	u8 tid_idx;
 	u8 idx;
 	u8 npagesets;
 	u8 npkts;
+	u8 pkt;
 	struct kern_tid_node tnode[TID_RDMA_MAX_PAGES];
 	struct tid_rdma_pageset pagesets[TID_RDMA_MAX_PAGES];
 	u32 tid_entry[TID_RDMA_MAX_PAGES];
@@ -159,4 +184,11 @@ struct cntr_entry;
 u64 hfi1_access_sw_tid_wait(const struct cntr_entry *entry,
 			    void *context, int vl, int mode, u64 data);
 
+u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
+				    struct ib_other_headers *ohdr,
+				    u32 *bth1, u32 *bth2, u32 *len);
+u32 hfi1_build_tid_rdma_read_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+				 struct ib_other_headers *ohdr, u32 *bth1,
+				 u32 *bth2, u32 *len);
+
 #endif /* HFI1_TID_RDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 20729454f181..2965b0957855 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -170,12 +170,16 @@ struct hfi1_qp_priv {
 	struct rvt_qp *owner;
 	u8 hdr_type; /* 9B or 16B */
 	unsigned long tid_timer_timeout_jiffies;
+
+	/* For TID RDMA READ */
+	u32 pending_tid_r_segs; /* Num of pending tid read segments */
 	u16 pkts_ps;            /* packets per segment */
 	u8 timeout_shift;       /* account for number of packets per segment */
 };
 
 struct hfi1_swqe_priv {
 	struct tid_rdma_request tid_req;
+	struct rvt_sge_state ss;  /* Used for TID RDMA READ Request */
 };
 
 struct hfi1_ack_priv {
@@ -331,6 +335,11 @@ static inline u32 delta_psn(u32 a, u32 b)
 	return (((int)a - (int)b) << PSN_SHIFT) >> PSN_SHIFT;
 }
 
+static inline struct tid_rdma_request *wqe_to_tid_req(struct rvt_swqe *wqe)
+{
+	return &((struct hfi1_swqe_priv *)wqe->priv)->tid_req;
+}
+
 /*
  * Look through all the active flows for a TID RDMA request and find
  * the one (if it exists) that contains the specified PSN.
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 6e35416170a3..58a0a0f99e7f 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -100,6 +100,8 @@ struct ib_atomic_eth {
 	__be64 compare_data; /* potentially unaligned */
 } __packed;
 
+#include <rdma/tid_rdma_defs.h>
+
 union ib_ehdrs {
 	struct {
 		__be32 deth[2];
@@ -117,6 +119,11 @@ union ib_ehdrs {
 	__be32 aeth;
 	__be32 ieth;
 	struct ib_atomic_eth atomic_eth;
+	/* TID RDMA headers */
+	union {
+		struct tid_rdma_read_req r_req;
+		struct tid_rdma_read_resp r_rsp;
+	} tid_rdma;
 }  __packed;
 
 struct ib_other_headers {
diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h
new file mode 100644
index 000000000000..1c431ea32b52
--- /dev/null
+++ b/include/rdma/tid_rdma_defs.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#ifndef TID_RDMA_DEFS_H
+#define TID_RDMA_DEFS_H
+
+#include <rdma/ib_pack.h>
+
+struct tid_rdma_read_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_read_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[4];
+	__be32 verbs_psn;
+	__be32 verbs_qp;
+};
+
+/*
+ * TID RDMA Opcodes
+ */
+#define IB_OPCODE_TID_RDMA 0xe0
+enum {
+	IB_OPCODE_READ_REQ        = 0x4,
+	IB_OPCODE_READ_RESP       = 0x5,
+
+	IB_OPCODE(TID_RDMA, READ_REQ),
+	IB_OPCODE(TID_RDMA, READ_RESP),
+};
+
+#define TID_OP(x) IB_OPCODE_TID_RDMA_##x
+
+/*
+ * Define TID RDMA specific WR opcodes. The ib_wr_opcode
+ * enum already provides some reserved values for use by
+ * low level drivers. Two of those are used but renamed
+ * to be more descriptive.
+ */
+#define IB_WR_TID_RDMA_READ  IB_WR_RESERVED2
+
+#endif /* TID_RDMA_DEFS_H */
-- 
cgit v1.2.3


From 039cd3daf19b9acbf080054d765cbceac842b6a0 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 19:31:57 -0800
Subject: IB/hfi1: Increment the retry timeout value for TID RDMA READ request

The RC retry timeout value is based on the estimated time for the
response packet to come back. However, for TID RDMA READ request, due
to the use of header suppression, the driver is normally not notified
for each incoming response packet until the last TID RDMA READ response
packet. Consequently, the retry timeout value should be extended to
cover the transaction time for the entire length of a segment (default
256K) instead of that for a single packet. This patch addresses the
issue by introducing new retry timer functions to account for multiple
packets and wrapper functions for backward compatibility.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/rdmavt/qp.c | 11 ++++++-----
 include/rdma/rdma_vt.h            | 12 +++++++++---
 include/rdma/rdmavt_qp.h          |  6 +++++-
 3 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c8e70cf69a8a..2769ebdf89fb 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -2393,11 +2393,12 @@ static inline unsigned long rvt_aeth_to_usec(u32 aeth)
 }
 
 /*
- *  rvt_add_retry_timer - add/start a retry timer
+ *  rvt_add_retry_timer_ext - add/start a retry timer
  *  @qp - the QP
+ *  @shift - timeout shift to wait for multiple packets
  *  add a retry timer on the QP
  */
-void rvt_add_retry_timer(struct rvt_qp *qp)
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift)
 {
 	struct ib_qp *ibqp = &qp->ibqp;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -2405,11 +2406,11 @@ void rvt_add_retry_timer(struct rvt_qp *qp)
 	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_TIMER;
        /* 4.096 usec. * (1 << qp->timeout) */
-	qp->s_timer.expires = jiffies + qp->timeout_jiffies +
-			     rdi->busy_jiffies;
+	qp->s_timer.expires = jiffies + rdi->busy_jiffies +
+			      (qp->timeout_jiffies << shift);
 	add_timer(&qp->s_timer);
 }
-EXPORT_SYMBOL(rvt_add_retry_timer);
+EXPORT_SYMBOL(rvt_add_retry_timer_ext);
 
 /**
  * rvt_add_rnr_timer - add/start an rnr timer
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 168e40be183c..87d66c9630d7 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -574,9 +574,10 @@ static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi,
 /**
  * rvt_mod_retry_timer - mod a retry timer
  * @qp - the QP
+ * @shift - timeout shift to wait for multiple packets
  * Modify a potentially already running retry timer
  */
-static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+static inline void rvt_mod_retry_timer_ext(struct rvt_qp *qp, u8 shift)
 {
 	struct ib_qp *ibqp = &qp->ibqp;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -584,8 +585,13 @@ static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
 	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_TIMER;
 	/* 4.096 usec. * (1 << qp->timeout) */
-	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
-		  rdi->busy_jiffies);
+	mod_timer(&qp->s_timer, jiffies + rdi->busy_jiffies +
+		  (qp->timeout_jiffies << shift));
+}
+
+static inline void rvt_mod_retry_timer(struct rvt_qp *qp)
+{
+	return rvt_mod_retry_timer_ext(qp, 0);
 }
 
 struct rvt_dev_info *rvt_alloc_device(size_t size, int nports);
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 9095a0b71250..d8d88d023092 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -688,7 +688,11 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t);
 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth);
 void rvt_del_timers_sync(struct rvt_qp *qp);
 void rvt_stop_rc_timers(struct rvt_qp *qp);
-void rvt_add_retry_timer(struct rvt_qp *qp);
+void rvt_add_retry_timer_ext(struct rvt_qp *qp, u8 shift);
+static inline void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+	rvt_add_retry_timer_ext(qp, 0);
+}
 
 void rvt_copy_sge(struct rvt_qp *qp, struct rvt_sge_state *ss,
 		  void *data, u32 length,
-- 
cgit v1.2.3


From c098bbb00cd1986cbb58ed1712643f80ed00fcc3 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 21:48:28 -0800
Subject: IB/hfi1: Build TID RDMA WRITE request

This patch adds the functions to build TID RDMA WRITE request.
The work request opcode, packet opcode, and packet formats for TID
RDMA WRITE protocol are also defined in this patch.

Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/qp.h       |  2 ++
 drivers/infiniband/hw/hfi1/tid_rdma.c | 38 ++++++++++++++++++++++++
 drivers/infiniband/hw/hfi1/tid_rdma.h |  3 ++
 include/rdma/ib_hdrs.h                |  5 ++++
 include/rdma/tid_rdma_defs.h          | 56 +++++++++++++++++++++++++++++++++++
 5 files changed, 104 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index ce25a27aa4a1..f74e2509e8b9 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -64,12 +64,14 @@ extern const struct rvt_operation_params hfi1_post_parms[];
  * HFI1_S_AHG_CLEAR - have send engine clear ahg state
  * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
  * HFI1_S_WAIT_TID_SPACE - a QP is waiting for TID resource
+ * HFI1_S_WAIT_TID_RESP - waiting for a TID RDMA WRITE response
  * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
  */
 #define HFI1_S_AHG_VALID         0x80000000
 #define HFI1_S_AHG_CLEAR         0x40000000
 #define HFI1_S_WAIT_PIO_DRAIN    0x20000000
 #define HFI1_S_WAIT_TID_SPACE    0x10000000
+#define HFI1_S_WAIT_TID_RESP     0x08000000
 #define HFI1_S_MIN_BIT_MASK      0x01000000
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 0ee79403acaf..089e301d9bcd 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2975,3 +2975,41 @@ void setup_tid_rdma_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe)
 exit:
 	rcu_read_unlock();
 }
+
+/* TID RDMA WRITE functions */
+
+u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+				  struct ib_other_headers *ohdr,
+				  u32 *bth1, u32 *bth2, u32 *len)
+{
+	struct hfi1_qp_priv *qpriv = qp->priv;
+	struct tid_rdma_request *req = wqe_to_tid_req(wqe);
+	struct tid_rdma_params *remote;
+
+	rcu_read_lock();
+	remote = rcu_dereference(qpriv->tid_rdma.remote);
+	/*
+	 * Set the number of flow to be used based on negotiated
+	 * parameters.
+	 */
+	req->n_flows = remote->max_write;
+	req->state = TID_REQUEST_ACTIVE;
+
+	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth0, KVER, 0x1);
+	KDETH_RESET(ohdr->u.tid_rdma.w_req.kdeth1, JKEY, remote->jkey);
+	ohdr->u.tid_rdma.w_req.reth.vaddr =
+		cpu_to_be64(wqe->rdma_wr.remote_addr + (wqe->length - *len));
+	ohdr->u.tid_rdma.w_req.reth.rkey =
+		cpu_to_be32(wqe->rdma_wr.rkey);
+	ohdr->u.tid_rdma.w_req.reth.length = cpu_to_be32(*len);
+	ohdr->u.tid_rdma.w_req.verbs_qp = cpu_to_be32(qp->remote_qpn);
+	*bth1 &= ~RVT_QPN_MASK;
+	*bth1 |= remote->qp;
+	qp->s_state = TID_OP(WRITE_REQ);
+	qp->s_flags |= HFI1_S_WAIT_TID_RESP;
+	*bth2 |= IB_BTH_REQ_ACK;
+	*len = 0;
+
+	rcu_read_unlock();
+	return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index a53598ce45b2..baba539b2b80 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -233,4 +233,7 @@ static inline void hfi1_setup_tid_rdma_wqe(struct rvt_qp *qp,
 		setup_tid_rdma_wqe(qp, wqe);
 }
 
+u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
+				  struct ib_other_headers *ohdr,
+				  u32 *bth1, u32 *bth2, u32 *len);
 #endif /* HFI1_TID_RDMA_H */
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 58a0a0f99e7f..9a90bd031e8c 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -123,6 +123,11 @@ union ib_ehdrs {
 	union {
 		struct tid_rdma_read_req r_req;
 		struct tid_rdma_read_resp r_rsp;
+		struct tid_rdma_write_req w_req;
+		struct tid_rdma_write_resp w_rsp;
+		struct tid_rdma_write_data w_data;
+		struct tid_rdma_resync resync;
+		struct tid_rdma_ack ack;
 	} tid_rdma;
 }  __packed;
 
diff --git a/include/rdma/tid_rdma_defs.h b/include/rdma/tid_rdma_defs.h
index 1c431ea32b52..08fe47c7ad2c 100644
--- a/include/rdma/tid_rdma_defs.h
+++ b/include/rdma/tid_rdma_defs.h
@@ -27,16 +27,71 @@ struct tid_rdma_read_resp {
 	__be32 verbs_qp;
 };
 
+struct tid_rdma_write_req {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	struct ib_reth reth;
+	__be32 reserved[2];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_resp {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[3];
+	__be32 tid_flow_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_write_data {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_resync {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 reserved[6];
+	__be32 verbs_qp;
+};
+
+struct tid_rdma_ack {
+	__le32 kdeth0;
+	__le32 kdeth1;
+	__be32 aeth;
+	__be32 reserved[2];
+	__be32 tid_flow_psn;
+	__be32 verbs_psn;
+	__be32 tid_flow_qp;
+	__be32 verbs_qp;
+};
+
 /*
  * TID RDMA Opcodes
  */
 #define IB_OPCODE_TID_RDMA 0xe0
 enum {
+	IB_OPCODE_WRITE_REQ       = 0x0,
+	IB_OPCODE_WRITE_RESP      = 0x1,
+	IB_OPCODE_WRITE_DATA      = 0x2,
+	IB_OPCODE_WRITE_DATA_LAST = 0x3,
 	IB_OPCODE_READ_REQ        = 0x4,
 	IB_OPCODE_READ_RESP       = 0x5,
+	IB_OPCODE_RESYNC          = 0x6,
+	IB_OPCODE_ACK             = 0x7,
 
+	IB_OPCODE(TID_RDMA, WRITE_REQ),
+	IB_OPCODE(TID_RDMA, WRITE_RESP),
+	IB_OPCODE(TID_RDMA, WRITE_DATA),
+	IB_OPCODE(TID_RDMA, WRITE_DATA_LAST),
 	IB_OPCODE(TID_RDMA, READ_REQ),
 	IB_OPCODE(TID_RDMA, READ_RESP),
+	IB_OPCODE(TID_RDMA, RESYNC),
+	IB_OPCODE(TID_RDMA, ACK),
 };
 
 #define TID_OP(x) IB_OPCODE_TID_RDMA_##x
@@ -47,6 +102,7 @@ enum {
  * low level drivers. Two of those are used but renamed
  * to be more descriptive.
  */
+#define IB_WR_TID_RDMA_WRITE IB_WR_RESERVED1
 #define IB_WR_TID_RDMA_READ  IB_WR_RESERVED2
 
 #endif /* TID_RDMA_DEFS_H */
-- 
cgit v1.2.3


From 4f9264d156dc6c154a8a6cfae780730bad45c6f8 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 21:48:48 -0800
Subject: IB/hfi1: Add an s_acked_ack_queue pointer

The s_ack_queue is managed by two pointers into the ring:
r_head_ack_queue and s_tail_ack_queue. r_head_ack_queue is the index of
where the next received request is going to be placed and s_tail_ack_queue
is the entry of the request currently being processed. This works
perfectly fine for normal Verbs as the requests are processed one at a
time and the s_tail_ack_queue is not moved until the request that it
points to is fully completed.

In this fashion, s_tail_ack_queue constantly chases r_head_ack_queue and
the two pointers can easily be used to determine "queue full" and "queue
empty" conditions.

The detection of these two conditions are imported in determining when an
old entry can safely be overwritten with a new received request and the
resources associated with the old request be safely released.

When pipelined TID RDMA WRITE is introduced into this mix, things look
very different. r_head_ack_queue is still the point at which a newly
received request will be inserted, s_tail_ack_queue is still the
currently processed request. However, with pipelined TID RDMA WRITE
requests, s_tail_ack_queue moves to the next request once all TID RDMA
WRITE responses for that request have been sent. The rest of the protocol
for a particular request is managed by other pointers specific to TID RDMA
- r_tid_tail and r_tid_ack - which point to the entries for which the next
TID RDMA DATA packets are going to arrive and the request for which
the next TID RDMA ACK packets are to be generated, respectively.

What this means is that entries in the ring, which are "behind"
s_tail_ack_queue (entries which s_tail_ack_queue has gone past) are no
longer considered complete. This is where the problem is - a newly
received request could potentially overwrite a still active TID RDMA WRITE
request.

The reason why the TID RDMA pointers trail s_tail_ack_queue is that the
normal Verbs send engine uses s_tail_ack_queue as the pointer for the next
response. Since TID RDMA WRITE responses are processed by the normal Verbs
send engine, s_tail_ack_queue had to be moved to the next entry once all
TID RDMA WRITE response packets were sent to get the desired pipelining
between requests. Doing otherwise would mean that the normal Verbs send
engine would not be able to send the TID RDMA WRITE responses for the next
TID RDMA request until the current one is fully completed.

This patch introduces the s_acked_ack_queue index to point to the next
request to complete on the responder side. For requests other than TID
RDMA WRITE, s_acked_ack_queue should always be kept in sync with
s_tail_ack_queue. For TID RDMA WRITE request, it may fall behind
s_tail_ack_queue.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/rc.c        | 33 ++++++++++++++++++++++++++++-----
 drivers/infiniband/hw/hfi1/rc.h        |  1 +
 drivers/infiniband/hw/hfi1/tid_rdma.c  |  2 ++
 drivers/infiniband/hw/hfi1/trace_tid.h | 10 ++++++++--
 drivers/infiniband/sw/rdmavt/qp.c      |  1 +
 include/rdma/rdmavt_qp.h               |  1 +
 6 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 6c9ef572fc69..9dc8e524510e 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -120,6 +120,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 	struct hfi1_qp_priv *priv = qp->priv;
 	bool last_pkt;
 	u32 delta;
+	u8 next = qp->s_tail_ack_queue;
 
 	trace_hfi1_rsp_make_rc_ack(qp, 0);
 	lockdep_assert_held(&qp->s_lock);
@@ -149,9 +150,17 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		 * response has been sent instead of only being
 		 * constructed.
 		 */
-		if (++qp->s_tail_ack_queue >
-		    rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
-			qp->s_tail_ack_queue = 0;
+		if (++next > rvt_size_atomic(&dev->rdi))
+			next = 0;
+		/*
+		 * Only advance the s_acked_ack_queue pointer if there
+		 * have been no TID RDMA requests.
+		 */
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->opcode != TID_OP(WRITE_REQ) &&
+		    qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+			qp->s_acked_ack_queue = next;
+		qp->s_tail_ack_queue = next;
 		/* FALLTHROUGH */
 	case OP(SEND_ONLY):
 	case OP(ACKNOWLEDGE):
@@ -172,6 +181,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 			 */
 			len = e->rdma_sge.sge_length;
 			if (len && !e->rdma_sge.mr) {
+				if (qp->s_acked_ack_queue ==
+				    qp->s_tail_ack_queue)
+					qp->s_acked_ack_queue =
+						qp->r_head_ack_queue;
 				qp->s_tail_ack_queue = qp->r_head_ack_queue;
 				goto bail;
 			}
@@ -202,6 +215,10 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 			 */
 			len = e->rdma_sge.sge_length;
 			if (len && !e->rdma_sge.mr) {
+				if (qp->s_acked_ack_queue ==
+				    qp->s_tail_ack_queue)
+					qp->s_acked_ack_queue =
+						qp->r_head_ack_queue;
 				qp->s_tail_ack_queue = qp->r_head_ack_queue;
 				goto bail;
 			}
@@ -2235,6 +2252,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		e->psn = psn;
 		if (old_req)
 			goto unlock_done;
+		if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+			qp->s_acked_ack_queue = prev;
 		qp->s_tail_ack_queue = prev;
 		break;
 	}
@@ -2248,6 +2267,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		 */
 		if (!e || e->opcode != (u8)opcode || old_req)
 			goto unlock_done;
+		if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+			qp->s_acked_ack_queue = prev;
 		qp->s_tail_ack_queue = prev;
 		break;
 	}
@@ -2274,6 +2295,8 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 		 * Resend the RDMA read or atomic op which
 		 * ACKs this duplicate request.
 		 */
+		if (qp->s_tail_ack_queue == qp->s_acked_ack_queue)
+			qp->s_acked_ack_queue = mra;
 		qp->s_tail_ack_queue = mra;
 		break;
 	}
@@ -2646,7 +2669,7 @@ send_last:
 		if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (unlikely(next == qp->s_tail_ack_queue)) {
+		if (unlikely(next == qp->s_acked_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
 				goto nack_inv_unlck;
 			update_ack_queue(qp, next);
@@ -2723,7 +2746,7 @@ send_last:
 		if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 			next = 0;
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (unlikely(next == qp->s_tail_ack_queue)) {
+		if (unlikely(next == qp->s_acked_ack_queue)) {
 			if (!qp->s_ack_queue[next].sent)
 				goto nack_inv_unlck;
 			update_ack_queue(qp, next);
diff --git a/drivers/infiniband/hw/hfi1/rc.h b/drivers/infiniband/hw/hfi1/rc.h
index 4329eadcb3df..8e0935b9bf2a 100644
--- a/drivers/infiniband/hw/hfi1/rc.h
+++ b/drivers/infiniband/hw/hfi1/rc.h
@@ -18,6 +18,7 @@ static inline void update_ack_queue(struct rvt_qp *qp, unsigned int n)
 	if (next > rvt_size_atomic(ib_to_rvt(qp->ibqp.device)))
 		next = 0;
 	qp->s_tail_ack_queue = next;
+	qp->s_acked_ack_queue = next;
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 }
 
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 089e301d9bcd..c320a99afb35 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -2044,6 +2044,8 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
 			goto unlock;
 	}
 	/* Re-process old requests.*/
+	if (qp->s_acked_ack_queue == qp->s_tail_ack_queue)
+		qp->s_acked_ack_queue = prev;
 	qp->s_tail_ack_queue = prev;
 	/*
 	 * Since the qp->s_tail_ack_queue is modified, the
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index b71638c22d4b..51f5b0e8da71 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -40,7 +40,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
 #define RSP_INFO_PRN "[%s] qpn 0x%x state 0x%x s_state 0x%x psn 0x%x " \
 		     "r_psn 0x%x r_state 0x%x r_flags 0x%x " \
 		     "r_head_ack_queue %u s_tail_ack_queue %u " \
-		     "s_ack_state 0x%x " \
+		     "s_acked_ack_queue %u s_ack_state 0x%x " \
 		     "s_nak_state 0x%x s_flags 0x%x ps_flags 0x%x " \
 		     "iow_flags 0x%lx"
 
@@ -62,7 +62,7 @@ u16 hfi1_trace_get_tid_idx(u32 ent);
 		    "s_next_psn 0x%x"
 
 #define RCV_ERR_PRN "[%s] qpn 0x%x s_flags 0x%x state 0x%x " \
-		    "s_tail_ack_queue %u " \
+		    "s_acked_ack_queue %u s_tail_ack_queue %u " \
 		    "r_head_ack_queue %u opcode 0x%x psn 0x%x r_psn 0x%x " \
 		    " diff %d"
 
@@ -671,6 +671,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
 		__field(u8, r_flags)
 		__field(u8, r_head_ack_queue)
 		__field(u8, s_tail_ack_queue)
+		__field(u8, s_acked_ack_queue)
 		__field(u8, s_ack_state)
 		__field(u8, s_nak_state)
 		__field(u8, r_nak_state)
@@ -691,6 +692,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
 		__entry->r_flags = qp->r_flags;
 		__entry->r_head_ack_queue = qp->r_head_ack_queue;
 		__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
+		__entry->s_acked_ack_queue = qp->s_acked_ack_queue;
 		__entry->s_ack_state = qp->s_ack_state;
 		__entry->s_nak_state = qp->s_nak_state;
 		__entry->s_flags = qp->s_flags;
@@ -709,6 +711,7 @@ DECLARE_EVENT_CLASS(/* rsp_info */
 		__entry->r_flags,
 		__entry->r_head_ack_queue,
 		__entry->s_tail_ack_queue,
+		__entry->s_acked_ack_queue,
 		__entry->s_ack_state,
 		__entry->s_nak_state,
 		__entry->s_flags,
@@ -1007,6 +1010,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
 		__field(u32, qpn)
 		__field(u32, s_flags)
 		__field(u8, state)
+		__field(u8, s_acked_ack_queue)
 		__field(u8, s_tail_ack_queue)
 		__field(u8, r_head_ack_queue)
 		__field(u32, opcode)
@@ -1019,6 +1023,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
 		__entry->qpn = qp->ibqp.qp_num;
 		__entry->s_flags = qp->s_flags;
 		__entry->state = qp->state;
+		__entry->s_acked_ack_queue = qp->s_acked_ack_queue;
 		__entry->s_tail_ack_queue = qp->s_tail_ack_queue;
 		__entry->r_head_ack_queue = qp->r_head_ack_queue;
 		__entry->opcode = opcode;
@@ -1032,6 +1037,7 @@ DECLARE_EVENT_CLASS(/* rc_rcv_err */
 		__entry->qpn,
 		__entry->s_flags,
 		__entry->state,
+		__entry->s_acked_ack_queue,
 		__entry->s_tail_ack_queue,
 		__entry->r_head_ack_queue,
 		__entry->opcode,
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 2769ebdf89fb..14ec2577bcaa 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -854,6 +854,7 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_mig_state = IB_MIG_MIGRATED;
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
+	qp->s_acked_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
 	if (qp->r_rq.wq) {
 		qp->r_rq.wq->head = 0;
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index d8d88d023092..4ee612ab6cb4 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -375,6 +375,7 @@ struct rvt_qp {
 	u8 s_rnr_retry;         /* requester RNR retry counter */
 	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
 	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+	u8 s_acked_ack_queue;   /* index into s_ack_queue[] */
 
 	struct rvt_sge_state s_ack_rdma_sge;
 	struct timer_list s_timer;
-- 
cgit v1.2.3


From 3c6cb20a0d17d7a75778fb0935d6fa427c8177af Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 23 Jan 2019 21:51:39 -0800
Subject: IB/hfi1: Add TID RDMA WRITE functionality into RDMA verbs

This patch integrates TID RDMA WRITE protocol into normal RDMA verbs
framework. The TID RDMA WRITE protocol is an end-to-end protocol
between the hfi1 drivers on two OPA nodes that converts a qualified
RDMA WRITE request into a TID RDMA WRITE request to avoid data copying
on the responder side.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/hfi1/qp.c        |   7 +
 drivers/infiniband/hw/hfi1/rc.c        | 487 +++++++++++++++++++++++++++++----
 drivers/infiniband/hw/hfi1/tid_rdma.c  |  14 +
 drivers/infiniband/hw/hfi1/user_sdma.c |   3 +-
 drivers/infiniband/hw/hfi1/verbs.c     |  17 +-
 drivers/infiniband/hw/hfi1/verbs.h     |   1 +
 include/rdma/rdmavt_qp.h               |   1 +
 7 files changed, 481 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 96632c77f36f..cfd598e4b303 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -138,6 +138,12 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
 	.flags = RVT_OPERATION_USE_RESERVE,
 },
 
+[IB_WR_TID_RDMA_WRITE] = {
+	.length = sizeof(struct ib_rdma_wr),
+	.qpt_support = BIT(IB_QPT_RC),
+	.flags = RVT_OPERATION_IGN_RNR_CNT,
+},
+
 };
 
 static void flush_list_head(struct list_head *l)
@@ -780,6 +786,7 @@ void quiesce_qp(struct rvt_qp *qp)
 	struct hfi1_qp_priv *priv = qp->priv;
 
 	hfi1_del_tid_reap_timer(qp);
+	hfi1_del_tid_retry_timer(qp);
 	iowait_sdma_drain(&priv->s_iowait);
 	qp_pio_drain(qp);
 	flush_tx_list(qp);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index fcb733ea8dfb..6d2abea896e5 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -111,16 +111,17 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		       struct hfi1_pkt_state *ps)
 {
 	struct rvt_ack_entry *e;
-	u32 hwords;
+	u32 hwords, hdrlen;
 	u32 len = 0;
 	u32 bth0 = 0, bth2 = 0;
 	u32 bth1 = qp->remote_qpn | (HFI1_CAP_IS_KSET(OPFN) << IB_BTHE_E_SHIFT);
 	int middle = 0;
 	u32 pmtu = qp->pmtu;
-	struct hfi1_qp_priv *priv = qp->priv;
+	struct hfi1_qp_priv *qpriv = qp->priv;
 	bool last_pkt;
 	u32 delta;
 	u8 next = qp->s_tail_ack_queue;
+	struct tid_rdma_request *req;
 
 	trace_hfi1_rsp_make_rc_ack(qp, 0);
 	lockdep_assert_held(&qp->s_lock);
@@ -128,7 +129,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 
-	if (priv->hdr_type == HFI1_PKT_TYPE_9B)
+	if (qpriv->hdr_type == HFI1_PKT_TYPE_9B)
 		/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 		hwords = 5;
 	else
@@ -206,6 +207,21 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 			hwords++;
 			qp->s_ack_rdma_psn = e->psn;
 			bth2 = mask_psn(qp->s_ack_rdma_psn++);
+		} else if (e->opcode == TID_OP(WRITE_REQ)) {
+			/*
+			 * If a TID RDMA WRITE RESP is being resent, we have to
+			 * wait for the actual request. All requests that are to
+			 * be resent will have their state set to
+			 * TID_REQUEST_RESEND. When the new request arrives, the
+			 * state will be changed to TID_REQUEST_RESEND_ACTIVE.
+			 */
+			req = ack_to_tid_req(e);
+			if (req->state == TID_REQUEST_RESEND ||
+			    req->state == TID_REQUEST_INIT_RESEND)
+				goto bail;
+			qp->s_ack_state = TID_OP(WRITE_RESP);
+			qp->s_ack_rdma_psn = mask_psn(e->psn + req->cur_seg);
+			goto write_resp;
 		} else if (e->opcode == TID_OP(READ_REQ)) {
 			/*
 			 * If a TID RDMA read response is being resent and
@@ -267,6 +283,59 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 		bth2 = mask_psn(qp->s_ack_rdma_psn++);
 		break;
 
+	case TID_OP(WRITE_RESP):
+write_resp:
+		/*
+		 * 1. Check if RVT_S_ACK_PENDING is set. If yes,
+		 *    goto normal.
+		 * 2. Attempt to allocate TID resources.
+		 * 3. Remove RVT_S_RESP_PENDING flags from s_flags
+		 * 4. If resources not available:
+		 *    4.1 Set RVT_S_WAIT_TID_SPACE
+		 *    4.2 Queue QP on RCD TID queue
+		 *    4.3 Put QP on iowait list.
+		 *    4.4 Build IB RNR NAK with appropriate timeout value
+		 *    4.5 Return indication progress made.
+		 * 5. If resources are available:
+		 *    5.1 Program HW flow CSRs
+		 *    5.2 Build TID RDMA WRITE RESP packet
+		 *    5.3 If more resources needed, do 2.1 - 2.3.
+		 *    5.4 Wake up next QP on RCD TID queue.
+		 *    5.5 Return indication progress made.
+		 */
+
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		req = ack_to_tid_req(e);
+
+		/*
+		 * Send scheduled RNR NAK's. RNR NAK's need to be sent at
+		 * segment boundaries, not at request boundaries. Don't change
+		 * s_ack_state because we are still in the middle of a request
+		 */
+		if (qpriv->rnr_nak_state == TID_RNR_NAK_SEND &&
+		    qp->s_tail_ack_queue == qpriv->r_tid_alloc &&
+		    req->cur_seg == req->alloc_seg) {
+			qpriv->rnr_nak_state = TID_RNR_NAK_SENT;
+			goto normal_no_state;
+		}
+
+		bth2 = mask_psn(qp->s_ack_rdma_psn);
+		hdrlen = hfi1_build_tid_rdma_write_resp(qp, e, ohdr, &bth1,
+							bth2, &len,
+							&ps->s_txreq->ss);
+		if (!hdrlen)
+			return 0;
+
+		hwords += hdrlen;
+		bth0 = qp->s_ack_state << 24;
+		qp->s_ack_rdma_psn++;
+		if (req->cur_seg != req->total_segs)
+			break;
+
+		e->sent = 1;
+		qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+		break;
+
 	case TID_OP(READ_RESP):
 read_resp:
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -298,8 +367,7 @@ normal:
 		 * (see above).
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~RVT_S_ACK_PENDING;
-		ps->s_txreq->ss = NULL;
+normal_no_state:
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
 				cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
@@ -311,9 +379,11 @@ normal:
 		len = 0;
 		bth0 = OP(ACKNOWLEDGE) << 24;
 		bth2 = mask_psn(qp->s_ack_psn);
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
+		ps->s_txreq->ss = NULL;
 	}
 	qp->s_rdma_ack_cnt++;
-	ps->s_txreq->sde = priv->s_sde;
+	ps->s_txreq->sde = qpriv->s_sde;
 	ps->s_txreq->s_cur_size = len;
 	ps->s_txreq->hdr_dwords = hwords;
 	hfi1_make_ruc_header(qp, ohdr, bth0, bth1, bth2, middle, ps);
@@ -366,6 +436,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 	int middle = 0;
 	int delta;
 	struct tid_rdma_flow *flow = NULL;
+	struct tid_rdma_params *remote;
 
 	trace_hfi1_sender_make_rc_req(qp);
 	lockdep_assert_held(&qp->s_lock);
@@ -414,7 +485,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		goto done_free_tx;
 	}
 
-	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK | HFI1_S_WAIT_HALT))
 		goto bail;
 
 	if (cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) {
@@ -586,6 +657,108 @@ no_flow_control:
 				qp->s_cur = 0;
 			break;
 
+		case IB_WR_TID_RDMA_WRITE:
+			if (newreq) {
+				/*
+				 * Limit the number of TID RDMA WRITE requests.
+				 */
+				if (atomic_read(&priv->n_tid_requests) >=
+				    HFI1_TID_RDMA_WRITE_CNT)
+					goto bail;
+
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
+					qp->s_lsn++;
+			}
+
+			hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr,
+								&bth1, &bth2,
+								&len);
+			ss = NULL;
+			if (priv->s_tid_cur == HFI1_QP_WQE_INVALID) {
+				priv->s_tid_cur = qp->s_cur;
+				if (priv->s_tid_tail == HFI1_QP_WQE_INVALID) {
+					priv->s_tid_tail = qp->s_cur;
+					priv->s_state = TID_OP(WRITE_RESP);
+				}
+			} else if (priv->s_tid_cur == priv->s_tid_head) {
+				struct rvt_swqe *__w;
+				struct tid_rdma_request *__r;
+
+				__w = rvt_get_swqe_ptr(qp, priv->s_tid_cur);
+				__r = wqe_to_tid_req(__w);
+
+				/*
+				 * The s_tid_cur pointer is advanced to s_cur if
+				 * any of the following conditions about the WQE
+				 * to which s_ti_cur currently points to are
+				 * satisfied:
+				 *   1. The request is not a TID RDMA WRITE
+				 *      request,
+				 *   2. The request is in the INACTIVE or
+				 *      COMPLETE states (TID RDMA READ requests
+				 *      stay at INACTIVE and TID RDMA WRITE
+				 *      transition to COMPLETE when done),
+				 *   3. The request is in the ACTIVE or SYNC
+				 *      state and the number of completed
+				 *      segments is equal to the total segment
+				 *      count.
+				 *      (If ACTIVE, the request is waiting for
+				 *       ACKs. If SYNC, the request has not
+				 *       received any responses because it's
+				 *       waiting on a sync point.)
+				 */
+				if (__w->wr.opcode != IB_WR_TID_RDMA_WRITE ||
+				    __r->state == TID_REQUEST_INACTIVE ||
+				    __r->state == TID_REQUEST_COMPLETE ||
+				    ((__r->state == TID_REQUEST_ACTIVE ||
+				      __r->state == TID_REQUEST_SYNC) &&
+				     __r->comp_seg == __r->total_segs)) {
+					if (priv->s_tid_tail ==
+					    priv->s_tid_cur &&
+					    priv->s_state ==
+					    TID_OP(WRITE_DATA_LAST)) {
+						priv->s_tid_tail = qp->s_cur;
+						priv->s_state =
+							TID_OP(WRITE_RESP);
+					}
+					priv->s_tid_cur = qp->s_cur;
+				}
+				/*
+				 * A corner case: when the last TID RDMA WRITE
+				 * request was completed, s_tid_head,
+				 * s_tid_cur, and s_tid_tail all point to the
+				 * same location. Other requests are posted and
+				 * s_cur wraps around to the same location,
+				 * where a new TID RDMA WRITE is posted. In
+				 * this case, none of the indices need to be
+				 * updated. However, the priv->s_state should.
+				 */
+				if (priv->s_tid_tail == qp->s_cur &&
+				    priv->s_state == TID_OP(WRITE_DATA_LAST))
+					priv->s_state = TID_OP(WRITE_RESP);
+			}
+			req = wqe_to_tid_req(wqe);
+			if (newreq) {
+				priv->s_tid_head = qp->s_cur;
+				priv->pending_tid_w_resp += req->total_segs;
+				atomic_inc(&priv->n_tid_requests);
+				atomic_dec(&priv->n_requests);
+			} else {
+				req->state = TID_REQUEST_RESEND;
+				req->comp_seg = delta_psn(bth2, wqe->psn);
+				/*
+				 * Pull back any segments since we are going
+				 * to re-receive them.
+				 */
+				req->setup_head = req->clear_tail;
+				priv->pending_tid_w_resp +=
+					delta_psn(wqe->lpsn, bth2) + 1;
+			}
+
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
+			break;
+
 		case IB_WR_RDMA_READ:
 			/*
 			 * Don't allow more operations to be started
@@ -745,7 +918,8 @@ no_flow_control:
 			if (qp->s_tail >= qp->s_size)
 				qp->s_tail = 0;
 		}
-		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		    wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
 			qp->s_psn = wqe->lpsn + 1;
 		else if (wqe->wr.opcode == IB_WR_TID_RDMA_READ)
 			qp->s_psn = req->s_next_psn;
@@ -865,6 +1039,33 @@ no_flow_control:
 		if (qp->s_cur == qp->s_size)
 			qp->s_cur = 0;
 		break;
+
+	case TID_OP(WRITE_RESP):
+		/*
+		 * This value for s_state is used for restarting a TID RDMA
+		 * WRITE request. See comment in OP(RDMA_READ_RESPONSE_MIDDLE
+		 * for more).
+		 */
+		req = wqe_to_tid_req(wqe);
+		req->state = TID_REQUEST_RESEND;
+		rcu_read_lock();
+		remote = rcu_dereference(priv->tid_rdma.remote);
+		req->comp_seg = delta_psn(qp->s_psn, wqe->psn);
+		len = wqe->length - (req->comp_seg * remote->max_len);
+		rcu_read_unlock();
+
+		bth2 = mask_psn(qp->s_psn);
+		hwords += hfi1_build_tid_rdma_write_req(qp, wqe, ohdr, &bth1,
+							&bth2, &len);
+		qp->s_psn = wqe->lpsn + 1;
+		ss = NULL;
+		qp->s_state = TID_OP(WRITE_REQ);
+		priv->pending_tid_w_resp += delta_psn(wqe->lpsn, bth2) + 1;
+		priv->s_tid_cur = qp->s_cur;
+		if (++qp->s_cur == qp->s_size)
+			qp->s_cur = 0;
+		break;
+
 	case TID_OP(READ_RESP):
 		if (wqe->wr.opcode != IB_WR_TID_RDMA_READ)
 			goto bail;
@@ -965,7 +1166,8 @@ no_flow_control:
 	}
 	qp->s_sending_hpsn = bth2;
 	delta = delta_psn(bth2, wqe->psn);
-	if (delta && delta % HFI1_PSN_CREDIT == 0)
+	if (delta && delta % HFI1_PSN_CREDIT == 0 &&
+	    wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
 		bth2 |= IB_BTH_REQ_ACK;
 	if (qp->s_flags & RVT_S_SEND_ONE) {
 		qp->s_flags &= ~RVT_S_SEND_ONE;
@@ -998,6 +1200,12 @@ bail:
 bail_no_tx:
 	ps->s_txreq = NULL;
 	qp->s_flags &= ~RVT_S_BUSY;
+	/*
+	 * If we didn't get a txreq, the QP will be woken up later to try
+	 * again. Set the flags to indicate which work item to wake
+	 * up.
+	 */
+	iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
 	return 0;
 }
 
@@ -1285,6 +1493,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
 	lockdep_assert_held(&qp->s_lock);
 	qp->s_cur = n;
 	priv->pending_tid_r_segs = 0;
+	priv->pending_tid_w_resp = 0;
 	qp->s_num_rd_atomic = 0;
 
 	/*
@@ -1342,6 +1551,10 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
 		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
 		break;
 
+	case IB_WR_TID_RDMA_WRITE:
+		qp->s_state = TID_OP(WRITE_RESP);
+		break;
+
 	case IB_WR_RDMA_READ:
 		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
 		break;
@@ -1435,7 +1648,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 
 	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
 			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
-			 RVT_S_WAIT_ACK);
+			 RVT_S_WAIT_ACK | HFI1_S_WAIT_TID_RESP);
 	if (wait)
 		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
@@ -1443,7 +1656,8 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 
 /*
  * Set qp->s_sending_psn to the next PSN after the given one.
- * This would be psn+1 except when RDMA reads are present.
+ * This would be psn+1 except when RDMA reads or TID RDMA ops
+ * are present.
  */
 static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
@@ -1456,7 +1670,8 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 		wqe = rvt_get_swqe_ptr(qp, n);
 		if (cmp_psn(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ ||
-			    wqe->wr.opcode == IB_WR_TID_RDMA_READ)
+			    wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
+			    wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
 				qp->s_sending_psn = wqe->lpsn + 1;
 			else
 				qp->s_sending_psn = psn + 1;
@@ -1479,8 +1694,9 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
 	struct rvt_swqe *wqe;
 	struct ib_header *hdr = NULL;
 	struct hfi1_16b_header *hdr_16b = NULL;
-	u32 opcode;
+	u32 opcode, head, tail;
 	u32 psn;
+	struct tid_rdma_request *req;
 
 	lockdep_assert_held(&qp->s_lock);
 	if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
@@ -1507,29 +1723,84 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
 	opcode = ib_bth_get_opcode(ohdr);
 	if ((opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
 	     opcode <= OP(ATOMIC_ACKNOWLEDGE)) ||
-	    opcode == TID_OP(READ_RESP)) {
+	    opcode == TID_OP(READ_RESP) ||
+	    opcode == TID_OP(WRITE_RESP)) {
 		WARN_ON(!qp->s_rdma_ack_cnt);
 		qp->s_rdma_ack_cnt--;
 		return;
 	}
 
 	psn = ib_bth_get_psn(ohdr);
-	reset_sending_psn(qp, psn);
+	/*
+	 * Don't attempt to reset the sending PSN for packets in the
+	 * KDETH PSN space since the PSN does not match anything.
+	 */
+	if (opcode != TID_OP(WRITE_DATA) &&
+	    opcode != TID_OP(WRITE_DATA_LAST) &&
+	    opcode != TID_OP(ACK) && opcode != TID_OP(RESYNC))
+		reset_sending_psn(qp, psn);
+
+	/* Handle TID RDMA WRITE packets differently */
+	if (opcode >= TID_OP(WRITE_REQ) &&
+	    opcode <= TID_OP(WRITE_DATA_LAST)) {
+		head = priv->s_tid_head;
+		tail = priv->s_tid_cur;
+		/*
+		 * s_tid_cur is set to s_tid_head in the case, where
+		 * a new TID RDMA request is being started and all
+		 * previous ones have been completed.
+		 * Therefore, we need to do a secondary check in order
+		 * to properly determine whether we should start the
+		 * RC timer.
+		 */
+		wqe = rvt_get_swqe_ptr(qp, tail);
+		req = wqe_to_tid_req(wqe);
+		if (head == tail && req->comp_seg < req->total_segs) {
+			if (tail == 0)
+				tail = qp->s_size - 1;
+			else
+				tail -= 1;
+		}
+	} else {
+		head = qp->s_tail;
+		tail = qp->s_acked;
+	}
 
 	/*
 	 * Start timer after a packet requesting an ACK has been sent and
 	 * there are still requests that haven't been acked.
 	 */
-	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
+	if ((psn & IB_BTH_REQ_ACK) && tail != head &&
+	    opcode != TID_OP(WRITE_DATA) && opcode != TID_OP(WRITE_DATA_LAST) &&
+	    opcode != TID_OP(RESYNC) &&
 	    !(qp->s_flags &
-		(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
-		(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+	      (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		if (opcode == TID_OP(READ_REQ))
 			rvt_add_retry_timer_ext(qp, priv->timeout_shift);
 		else
 			rvt_add_retry_timer(qp);
 	}
 
+	/* Start TID RDMA ACK timer */
+	if ((opcode == TID_OP(WRITE_DATA) ||
+	     opcode == TID_OP(WRITE_DATA_LAST) ||
+	     opcode == TID_OP(RESYNC)) &&
+	    (psn & IB_BTH_REQ_ACK) &&
+	    !(priv->s_flags & HFI1_S_TID_RETRY_TIMER) &&
+	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		/*
+		 * The TID RDMA ACK packet could be received before this
+		 * function is called. Therefore, add the timer only if TID
+		 * RDMA ACK packets are actually pending.
+		 */
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
+		req = wqe_to_tid_req(wqe);
+		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+		    req->ack_seg < req->cur_seg)
+			hfi1_add_tid_retry_timer(qp);
+	}
+
 	while (qp->s_last != qp->s_acked) {
 		u32 s_last;
 
@@ -1628,7 +1899,16 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 	}
 
 	qp->s_retry = qp->s_retry_cnt;
-	update_last_psn(qp, wqe->lpsn);
+	/*
+	 * Don't update the last PSN if the request being completed is
+	 * a TID RDMA WRITE request.
+	 * Completion of the TID RDMA WRITE requests are done by the
+	 * TID RDMA ACKs and as such could be for a request that has
+	 * already been ACKed as far as the IB state machine is
+	 * concerned.
+	 */
+	if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE)
+		update_last_psn(qp, wqe->lpsn);
 
 	/*
 	 * If we are completing a request which is in the process of
@@ -1658,6 +1938,54 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 	return wqe;
 }
 
+static void set_restart_qp(struct rvt_qp *qp, struct hfi1_ctxtdata *rcd)
+{
+	/* Retry this request. */
+	if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+		qp->r_flags |= RVT_R_RDMAR_SEQ;
+		hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
+		if (list_empty(&qp->rspwait)) {
+			qp->r_flags |= RVT_R_RSP_SEND;
+			rvt_get_qp(qp);
+			list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+		}
+	}
+}
+
+/**
+ * update_qp_retry_state - Update qp retry state.
+ * @qp: the QP
+ * @psn: the packet sequence number of the TID RDMA WRITE RESP.
+ * @spsn:  The start psn for the given TID RDMA WRITE swqe.
+ * @lpsn:  The last psn for the given TID RDMA WRITE swqe.
+ *
+ * This function is called to update the qp retry state upon
+ * receiving a TID WRITE RESP after the qp is scheduled to retry
+ * a request.
+ */
+static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn,
+				  u32 lpsn)
+{
+	struct hfi1_qp_priv *qpriv = qp->priv;
+
+	qp->s_psn = psn + 1;
+	/*
+	 * If this is the first TID RDMA WRITE RESP packet for the current
+	 * request, change the s_state so that the retry will be processed
+	 * correctly. Similarly, if this is the last TID RDMA WRITE RESP
+	 * packet, change the s_state and advance the s_cur.
+	 */
+	if (cmp_psn(psn, lpsn) >= 0) {
+		qp->s_cur = qpriv->s_tid_cur + 1;
+		if (qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		qp->s_state = TID_OP(WRITE_REQ);
+	} else  if (!cmp_psn(psn, spsn)) {
+		qp->s_cur = qpriv->s_tid_cur;
+		qp->s_state = TID_OP(WRITE_RESP);
+	}
+}
+
 /**
  * do_rc_ack - process an incoming RC ACK
  * @qp: the QP the ACK came in on
@@ -1679,6 +2007,7 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 	int ret = 0;
 	u32 ack_psn;
 	int diff;
+	struct rvt_dev_info *rdi;
 
 	lockdep_assert_held(&qp->s_lock);
 	/*
@@ -1725,18 +2054,10 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     (opcode != TID_OP(READ_RESP) || diff != 0)) ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
-			/* Retry this request. */
-			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
-				qp->r_flags |= RVT_R_RDMAR_SEQ;
-				hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
-				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= RVT_R_RSP_SEND;
-					rvt_get_qp(qp);
-					list_add_tail(&qp->rspwait,
-						      &rcd->qp_wait_list);
-				}
-			}
+		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0)) ||
+		    (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+		     (delta_psn(psn, qp->s_last_psn) != 1))) {
+			set_restart_qp(qp, rcd);
 			/*
 			 * No need to process the ACK/NAK since we are
 			 * restarting an earlier request.
@@ -1768,6 +2089,14 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 				hfi1_schedule_send(qp);
 			}
 		}
+
+		/*
+		 * TID RDMA WRITE requests will be completed by the TID RDMA
+		 * ACK packet handler (see tid_rdma.c).
+		 */
+		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE)
+			break;
+
 		wqe = do_rc_completion(qp, wqe, ibp);
 		if (qp->s_acked == qp->s_tail)
 			break;
@@ -1785,17 +2114,60 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 			else
 				rvt_stop_rc_timers(qp);
 		} else if (qp->s_acked != qp->s_tail) {
+			struct rvt_swqe *__w = NULL;
+
+			if (qpriv->s_tid_cur != HFI1_QP_WQE_INVALID)
+				__w = rvt_get_swqe_ptr(qp, qpriv->s_tid_cur);
+
 			/*
-			 * We are expecting more ACKs so
-			 * mod the retry timer.
-			 */
-			rvt_mod_retry_timer(qp);
-			/*
-			 * We can stop re-sending the earlier packets and
-			 * continue with the next packet the receiver wants.
+			 * Stop timers if we've received all of the TID RDMA
+			 * WRITE * responses.
 			 */
-			if (cmp_psn(qp->s_psn, psn) <= 0)
-				reset_psn(qp, psn + 1);
+			if (__w && __w->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+			    opcode == TID_OP(WRITE_RESP)) {
+				/*
+				 * Normally, the loop above would correctly
+				 * process all WQEs from s_acked onward and
+				 * either complete them or check for correct
+				 * PSN sequencing.
+				 * However, for TID RDMA, due to pipelining,
+				 * the response may not be for the request at
+				 * s_acked so the above look would just be
+				 * skipped. This does not allow for checking
+				 * the PSN sequencing. It has to be done
+				 * separately.
+				 */
+				if (cmp_psn(psn, qp->s_last_psn + 1)) {
+					set_restart_qp(qp, rcd);
+					goto bail_stop;
+				}
+				/*
+				 * If the psn is being resent, stop the
+				 * resending.
+				 */
+				if (qp->s_cur != qp->s_tail &&
+				    cmp_psn(qp->s_psn, psn) <= 0)
+					update_qp_retry_state(qp, psn,
+							      __w->psn,
+							      __w->lpsn);
+				else if (--qpriv->pending_tid_w_resp)
+					rvt_mod_retry_timer(qp);
+				else
+					rvt_stop_rc_timers(qp);
+			} else {
+				/*
+				 * We are expecting more ACKs so
+				 * mod the retry timer.
+				 */
+				rvt_mod_retry_timer(qp);
+				/*
+				 * We can stop re-sending the earlier packets
+				 * and continue with the next packet the
+				 * receiver wants.
+				 */
+				if (cmp_psn(qp->s_psn, psn) <= 0)
+					reset_psn(qp, psn + 1);
+			}
 		} else {
 			/* No more acks - kill all timers */
 			rvt_stop_rc_timers(qp);
@@ -1811,6 +2183,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		rvt_get_credit(qp, aeth);
 		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
 		qp->s_retry = qp->s_retry_cnt;
+		/*
+		 * If the current request is a TID RDMA WRITE request and the
+		 * response is not a TID RDMA WRITE RESP packet, s_last_psn
+		 * can't be advanced.
+		 */
+		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE &&
+		    opcode != TID_OP(WRITE_RESP) &&
+		    cmp_psn(psn, wqe->psn) >= 0)
+			return 1;
 		update_last_psn(qp, psn);
 		return 1;
 
@@ -1820,20 +2201,31 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 			goto bail_stop;
 		if (qp->s_flags & RVT_S_WAIT_RNR)
 			goto bail_stop;
-		if (qp->s_rnr_retry == 0) {
+		rdi = ib_to_rvt(qp->ibqp.device);
+		if (qp->s_rnr_retry == 0 &&
+		    !((rdi->post_parms[wqe->wr.opcode].flags &
+		      RVT_OPERATION_IGN_RNR_CNT) &&
+		      qp->s_rnr_retry_cnt == 0)) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
 			goto class_b;
 		}
-		if (qp->s_rnr_retry_cnt < 7)
+		if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
 			qp->s_rnr_retry--;
 
-		/* The last valid PSN is the previous PSN. */
-		update_last_psn(qp, psn - 1);
+		/*
+		 * The last valid PSN is the previous PSN. For TID RDMA WRITE
+		 * request, s_last_psn should be incremented only when a TID
+		 * RDMA WRITE RESP is received to avoid skipping lost TID RDMA
+		 * WRITE RESP packets.
+		 */
+		if (wqe->wr.opcode == IB_WR_TID_RDMA_WRITE) {
+			reset_psn(qp, qp->s_last_psn + 1);
+		} else {
+			update_last_psn(qp, psn - 1);
+			reset_psn(qp, psn);
+		}
 
 		ibp->rvp.n_rc_resends += delta_psn(qp->s_psn, psn);
-
-		reset_psn(qp, psn);
-
 		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
 		rvt_stop_rc_timers(qp);
 		rvt_add_rnr_timer(qp, aeth);
@@ -1918,6 +2310,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
 	while (cmp_psn(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
 		    wqe->wr.opcode == IB_WR_TID_RDMA_READ ||
+		    wqe->wr.opcode == IB_WR_TID_RDMA_WRITE ||
 		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
 			break;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 80111dd1d876..490e47a0f68b 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -3205,6 +3205,20 @@ void hfi1_qp_kern_exp_rcv_clear_all(struct rvt_qp *qp)
 		do {
 			struct hfi1_swqe_priv *priv = wqe->priv;
 
+			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
+		} while (!ret);
+	}
+	for (i = qp->s_acked_ack_queue; i != qp->r_head_ack_queue;) {
+		struct rvt_ack_entry *e = &qp->s_ack_queue[i];
+
+		if (++i == rvt_max_atomic(ib_to_rvt(qp->ibqp.device)))
+			i = 0;
+		/* Free only locally allocated TID entries */
+		if (e->opcode != TID_OP(WRITE_REQ))
+			continue;
+		do {
+			struct hfi1_ack_priv *priv = e->priv;
+
 			ret = hfi1_kern_exp_rcv_clear(&priv->tid_req);
 		} while (!ret);
 	}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index e5e7fad09f32..6764114b886c 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1126,7 +1126,8 @@ static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
 			0xffffffull),
 		psn = val & mask;
 	if (expct)
-		psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
+		psn = (psn & ~HFI1_KDETH_BTH_SEQ_MASK) |
+			((psn + frags) & HFI1_KDETH_BTH_SEQ_MASK);
 	else
 		psn = psn + frags;
 	return psn & mask;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 7b87b77582bd..ab97d71cdd92 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -161,6 +161,7 @@ MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the
  */
 const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
 	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+	[IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE,
 	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
 	[IB_WR_SEND] = IB_WC_SEND,
 	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
@@ -203,6 +204,12 @@ const u8 hdr_len_by_opcode[256] = {
 	[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE]      = 12 + 8 + 4,
 	[IB_OPCODE_TID_RDMA_READ_REQ]                 = 12 + 8 + 36,
 	[IB_OPCODE_TID_RDMA_READ_RESP]                = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_WRITE_REQ]                = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_WRITE_RESP]               = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_WRITE_DATA]               = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST]          = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_ACK]                      = 12 + 8 + 36,
+	[IB_OPCODE_TID_RDMA_RESYNC]                   = 12 + 8 + 36,
 	/* UC */
 	[IB_OPCODE_UC_SEND_FIRST]                     = 12 + 8,
 	[IB_OPCODE_UC_SEND_MIDDLE]                    = 12 + 8,
@@ -248,8 +255,14 @@ static const opcode_handler opcode_handler_tbl[256] = {
 	[IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE]      = &hfi1_rc_rcv,
 
 	/* TID RDMA has separate handlers for different opcodes.*/
+	[IB_OPCODE_TID_RDMA_WRITE_REQ]       = &hfi1_rc_rcv_tid_rdma_write_req,
+	[IB_OPCODE_TID_RDMA_WRITE_RESP]      = &hfi1_rc_rcv_tid_rdma_write_resp,
+	[IB_OPCODE_TID_RDMA_WRITE_DATA]      = &hfi1_rc_rcv_tid_rdma_write_data,
+	[IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data,
 	[IB_OPCODE_TID_RDMA_READ_REQ]        = &hfi1_rc_rcv_tid_rdma_read_req,
 	[IB_OPCODE_TID_RDMA_READ_RESP]       = &hfi1_rc_rcv_tid_rdma_read_resp,
+	[IB_OPCODE_TID_RDMA_RESYNC]          = &hfi1_rc_rcv_tid_rdma_resync,
+	[IB_OPCODE_TID_RDMA_ACK]             = &hfi1_rc_rcv_tid_rdma_ack,
 
 	/* UC */
 	[IB_OPCODE_UC_SEND_FIRST]                     = &hfi1_uc_rcv,
@@ -1332,7 +1345,9 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 	rdi->dparms.props.max_mr_size = U64_MAX;
 	rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
 	rdi->dparms.props.max_qp = hfi1_max_qps;
-	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
+	rdi->dparms.props.max_qp_wr =
+		(hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ?
+		 HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs);
 	rdi->dparms.props.max_send_sge = hfi1_max_sges;
 	rdi->dparms.props.max_recv_sge = hfi1_max_sges;
 	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index bee3d21a548e..62ace0b2d17a 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -193,6 +193,7 @@ struct hfi1_qp_priv {
 	u32 r_tid_ack;      /* the TID RDMA request to be ACK'ed */
 	u32 r_tid_alloc;    /* Request for which we are allocating resources */
 	u32 pending_tid_w_segs; /* Num of pending tid write segments */
+	u32 pending_tid_w_resp; /* Num of pending tid write responses */
 	u32 alloc_w_segs;       /* Number of segments for which write */
 			       /* resources have been allocated for this QP */
 
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 4ee612ab6cb4..f0fbd4063fef 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -246,6 +246,7 @@ struct rvt_ack_entry {
 #define RVT_OPERATION_ATOMIC_SGE  0x00000004
 #define RVT_OPERATION_LOCAL       0x00000008
 #define RVT_OPERATION_USE_RESERVE 0x00000010
+#define RVT_OPERATION_IGN_RNR_CNT 0x00000020
 
 #define RVT_OPERATION_MAX (IB_WR_RESERVED10 + 1)
 
-- 
cgit v1.2.3


From 972248e9111ee6fe9fb56c24ecfd7434f3d713ac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Jan 2019 09:32:03 +0100
Subject: scsi: bsg-lib: handle bidi requests without block layer help

We can just stash away the second request in struct bsg_job instead of
using the block layer req->next_rq field, allowing for the eventual removal
of the latter.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/bsg-lib.c                   | 44 +++++++++++++++++++++----
 block/bsg.c                       | 68 ++++++++-------------------------------
 drivers/scsi/scsi_transport_sas.c |  1 -
 include/linux/bsg-lib.h           |  4 +++
 4 files changed, 56 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 192129856342..005e2b75d775 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -51,11 +51,40 @@ static int bsg_transport_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
 		fmode_t mode)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
+	int ret;
 
 	job->request_len = hdr->request_len;
 	job->request = memdup_user(uptr64(hdr->request), hdr->request_len);
+	if (IS_ERR(job->request))
+		return PTR_ERR(job->request);
+
+	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
+		job->bidi_rq = blk_get_request(rq->q, REQ_OP_SCSI_IN, 0);
+		if (IS_ERR(job->bidi_rq)) {
+			ret = PTR_ERR(job->bidi_rq);
+			goto out;
+		}
+
+		ret = blk_rq_map_user(rq->q, job->bidi_rq, NULL,
+				uptr64(hdr->din_xferp), hdr->din_xfer_len,
+				GFP_KERNEL);
+		if (ret)
+			goto out_free_bidi_rq;
+
+		job->bidi_bio = job->bidi_rq->bio;
+	} else {
+		job->bidi_rq = NULL;
+		job->bidi_bio = NULL;
+	}
 
-	return PTR_ERR_OR_ZERO(job->request);
+	return 0;
+
+out_free_bidi_rq:
+	if (job->bidi_rq)
+		blk_put_request(job->bidi_rq);
+out:
+	kfree(job->request);
+	return ret;
 }
 
 static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
@@ -93,7 +122,7 @@ static int bsg_transport_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
 	/* we assume all request payload was transferred, residual == 0 */
 	hdr->dout_resid = 0;
 
-	if (rq->next_rq) {
+	if (job->bidi_rq) {
 		unsigned int rsp_len = job->reply_payload.payload_len;
 
 		if (WARN_ON(job->reply_payload_rcv_len > rsp_len))
@@ -111,6 +140,11 @@ static void bsg_transport_free_rq(struct request *rq)
 {
 	struct bsg_job *job = blk_mq_rq_to_pdu(rq);
 
+	if (job->bidi_rq) {
+		blk_rq_unmap_user(job->bidi_bio);
+		blk_put_request(job->bidi_rq);
+	}
+
 	kfree(job->request);
 }
 
@@ -200,7 +234,6 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
  */
 static bool bsg_prepare_job(struct device *dev, struct request *req)
 {
-	struct request *rsp = req->next_rq;
 	struct bsg_job *job = blk_mq_rq_to_pdu(req);
 	int ret;
 
@@ -211,8 +244,8 @@ static bool bsg_prepare_job(struct device *dev, struct request *req)
 		if (ret)
 			goto failjob_rls_job;
 	}
-	if (rsp && rsp->bio) {
-		ret = bsg_map_buffer(&job->reply_payload, rsp);
+	if (job->bidi_rq) {
+		ret = bsg_map_buffer(&job->reply_payload, job->bidi_rq);
 		if (ret)
 			goto failjob_rls_rqst_payload;
 	}
@@ -369,7 +402,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, const char *name,
 	}
 
 	q->queuedata = dev;
-	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
 	blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
 	ret = bsg_register_queue(q, dev, name, &bsg_transport_ops);
diff --git a/block/bsg.c b/block/bsg.c
index a799b0ace55c..f306853c6b08 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -74,6 +74,11 @@ static int bsg_scsi_fill_hdr(struct request *rq, struct sg_io_v4 *hdr,
 {
 	struct scsi_request *sreq = scsi_req(rq);
 
+	if (hdr->dout_xfer_len && hdr->din_xfer_len) {
+		pr_warn_once("BIDI support in bsg has been removed.\n");
+		return -EOPNOTSUPP;
+	}
+
 	sreq->cmd_len = hdr->request_len;
 	if (sreq->cmd_len > BLK_MAX_CDB) {
 		sreq->cmd = kzalloc(sreq->cmd_len, GFP_KERNEL);
@@ -114,14 +119,10 @@ static int bsg_scsi_complete_rq(struct request *rq, struct sg_io_v4 *hdr)
 			hdr->response_len = len;
 	}
 
-	if (rq->next_rq) {
-		hdr->dout_resid = sreq->resid_len;
-		hdr->din_resid = scsi_req(rq->next_rq)->resid_len;
-	} else if (rq_data_dir(rq) == READ) {
+	if (rq_data_dir(rq) == READ)
 		hdr->din_resid = sreq->resid_len;
-	} else {
+	else
 		hdr->dout_resid = sreq->resid_len;
-	}
 
 	return ret;
 }
@@ -140,8 +141,8 @@ static const struct bsg_ops bsg_scsi_ops = {
 
 static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
 {
-	struct request *rq, *next_rq = NULL;
-	struct bio *bio, *bidi_bio = NULL;
+	struct request *rq;
+	struct bio *bio;
 	struct sg_io_v4 hdr;
 	int ret;
 
@@ -164,7 +165,7 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
 
 	ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode);
 	if (ret)
-		goto out;
+		return ret;
 
 	rq->timeout = msecs_to_jiffies(hdr.timeout);
 	if (!rq->timeout)
@@ -174,29 +175,6 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
 	if (rq->timeout < BLK_MIN_SG_TIMEOUT)
 		rq->timeout = BLK_MIN_SG_TIMEOUT;
 
-	if (hdr.dout_xfer_len && hdr.din_xfer_len) {
-		if (!test_bit(QUEUE_FLAG_BIDI, &q->queue_flags)) {
-			ret = -EOPNOTSUPP;
-			goto out;
-		}
-
-		pr_warn_once(
-			"BIDI support in bsg has been deprecated and might be removed. "
-			"Please report your use case to linux-scsi@vger.kernel.org\n");
-
-		next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
-		if (IS_ERR(next_rq)) {
-			ret = PTR_ERR(next_rq);
-			goto out;
-		}
-
-		rq->next_rq = next_rq;
-		ret = blk_rq_map_user(q, next_rq, NULL, uptr64(hdr.din_xferp),
-				       hdr.din_xfer_len, GFP_KERNEL);
-		if (ret)
-			goto out_free_nextrq;
-	}
-
 	if (hdr.dout_xfer_len) {
 		ret = blk_rq_map_user(q, rq, NULL, uptr64(hdr.dout_xferp),
 				hdr.dout_xfer_len, GFP_KERNEL);
@@ -206,38 +184,20 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg)
 	}
 
 	if (ret)
-		goto out_unmap_nextrq;
+		goto out_free_rq;
 
 	bio = rq->bio;
-	if (rq->next_rq)
-		bidi_bio = rq->next_rq->bio;
 
 	blk_execute_rq(q, NULL, rq, !(hdr.flags & BSG_FLAG_Q_AT_TAIL));
 	ret = rq->q->bsg_dev.ops->complete_rq(rq, &hdr);
-
-	if (rq->next_rq) {
-		blk_rq_unmap_user(bidi_bio);
-		blk_put_request(rq->next_rq);
-	}
-
 	blk_rq_unmap_user(bio);
+
+out_free_rq:
 	rq->q->bsg_dev.ops->free_rq(rq);
 	blk_put_request(rq);
-
-	if (copy_to_user(uarg, &hdr, sizeof(hdr)))
+	if (!ret && copy_to_user(uarg, &hdr, sizeof(hdr)))
 		return -EFAULT;
 	return ret;
-
-out_unmap_nextrq:
-	if (rq->next_rq)
-		blk_rq_unmap_user(rq->next_rq->bio);
-out_free_nextrq:
-	if (rq->next_rq)
-		blk_put_request(rq->next_rq);
-out:
-	q->bsg_dev.ops->free_rq(rq);
-	blk_put_request(rq);
-	return ret;
 }
 
 static struct bsg_device *bsg_alloc_device(void)
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 692b46937e52..60f1a81d2034 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -213,7 +213,6 @@ static int sas_bsg_initialize(struct Scsi_Host *shost, struct sas_rphy *rphy)
 		to_sas_host_attrs(shost)->q = q;
 	}
 
-	blk_queue_flag_set(QUEUE_FLAG_BIDI, q);
 	return 0;
 }
 
diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h
index b356e0006731..7f14517a559b 100644
--- a/include/linux/bsg-lib.h
+++ b/include/linux/bsg-lib.h
@@ -69,6 +69,10 @@ struct bsg_job {
 	int result;
 	unsigned int reply_payload_rcv_len;
 
+	/* BIDI support */
+	struct request *bidi_rq;
+	struct bio *bidi_bio;
+
 	void *dd_data;		/* Used for driver-specific storage */
 };
 
-- 
cgit v1.2.3


From 19fcae3d4f2dd513d472055fb694d802e9a21b91 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Nov 2018 09:28:20 +0100
Subject: scsi: remove the SCSI OSD library

Now that all the users are gone the SCSI OSD library can be removed as
well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/scsi/osd.txt       |  192 ----
 MAINTAINERS                      |    6 -
 drivers/scsi/Kconfig             |    2 -
 drivers/scsi/Makefile            |    1 -
 drivers/scsi/osd/Kbuild          |   20 -
 drivers/scsi/osd/Kconfig         |   49 -
 drivers/scsi/osd/osd_debug.h     |   30 -
 drivers/scsi/osd/osd_initiator.c | 2076 --------------------------------------
 drivers/scsi/osd/osd_uld.c       |  571 -----------
 include/scsi/osd_initiator.h     |  511 ----------
 include/scsi/osd_ore.h           |  201 ----
 11 files changed, 3659 deletions(-)
 delete mode 100644 Documentation/scsi/osd.txt
 delete mode 100644 drivers/scsi/osd/Kbuild
 delete mode 100644 drivers/scsi/osd/Kconfig
 delete mode 100644 drivers/scsi/osd/osd_debug.h
 delete mode 100644 drivers/scsi/osd/osd_initiator.c
 delete mode 100644 drivers/scsi/osd/osd_uld.c
 delete mode 100644 include/scsi/osd_initiator.h
 delete mode 100644 include/scsi/osd_ore.h

(limited to 'include')

diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt
deleted file mode 100644
index 2bc2ab06b0c0..000000000000
--- a/Documentation/scsi/osd.txt
+++ /dev/null
@@ -1,192 +0,0 @@
-The OSD Standard
-================
-OSD (Object-Based Storage Device) is a T10 SCSI command set that is designed
-to provide efficient operation of input/output logical units that manage the
-allocation, placement, and accessing of variable-size data-storage containers,
-called objects. Objects are intended to contain operating system and application
-constructs. Each object has associated attributes attached to it, which are
-integral part of the object and provide metadata about the object. The standard
-defines some common obligatory attributes, but user attributes can be added as
-needed.
-
-See: http://www.t10.org/ftp/t10/drafts/osd2/ for the latest draft for OSD 2
-or search the web for "OSD SCSI"
-
-OSD in the Linux Kernel
-=======================
-osd-initiator:
-  The main component of OSD in Kernel is the osd-initiator library. Its main
-user is intended to be the pNFS-over-objects layout driver, which uses objects
-as its back-end data storage. Other clients are the other osd parts listed below.
-
-osd-uld:
-  This is a SCSI ULD that registers for OSD type devices and provides a testing
-platform, both for the in-kernel initiator as well as connected targets. It
-currently has no useful user-mode API, though it could have if need be.
-
-osd target:
-  There are no current plans for an OSD target implementation in kernel. For all
-needs, a user-mode target that is based on the scsi tgt target framework is
-available from Ohio Supercomputer Center (OSC) at:
-http://www.open-osd.org/bin/view/Main/OscOsdProject
-There are several other target implementations. See http://open-osd.org for more
-links.
-
-Files and Folders
-=================
-This is the complete list of files included in this work:
-include/scsi/
-	osd_initiator.h   Main API for the initiator library
-	osd_types.h	  Common OSD types
-	osd_sec.h	  Security Manager API
-	osd_protocol.h	  Wire definitions of the OSD standard protocol
-	osd_attributes.h  Wire definitions of OSD attributes
-
-drivers/scsi/osd/
-	osd_initiator.c   OSD-Initiator library implementation
-	osd_uld.c	  The OSD scsi ULD
-	osd_ktest.{h,c}	  In-kernel test suite (called by osd_uld)
-	osd_debug.h	  Some printk macros
-	Makefile	  For both in-tree and out-of-tree compilation
-	Kconfig		  Enables inclusion of the different pieces
-	osd_test.c	  User-mode application to call the kernel tests
-
-The OSD-Initiator Library
-=========================
-osd_initiator is a low level implementation of an osd initiator encoder.
-But even though, it should be intuitive and easy to use. Perhaps over time an
-higher lever will form that automates some of the more common recipes.
-
-init/fini:
-- osd_dev_init() associates a scsi_device with an osd_dev structure
-  and initializes some global pools. This should be done once per scsi_device
-  (OSD LUN). The osd_dev structure is needed for calling osd_start_request().
-
-- osd_dev_fini() cleans up before a osd_dev/scsi_device destruction.
-
-OSD commands encoding, execution, and decoding of results:
-
-struct osd_request's is used to iteratively encode an OSD command and carry
-its state throughout execution. Each request goes through these stages:
-
-a. osd_start_request() allocates the request.
-
-b. Any of the osd_req_* methods is used to encode a request of the specified
-   type.
-
-c. osd_req_add_{get,set}_attr_* may be called to add get/set attributes to the
-   CDB. "List" or "Page" mode can be used exclusively. The attribute-list API
-   can be called multiple times on the same request. However, only one
-   attribute-page can be read, as mandated by the OSD standard.
-
-d. osd_finalize_request() computes offsets into the data-in and data-out buffers
-   and signs the request using the provided capability key and integrity-
-   check parameters.
-
-e. osd_execute_request() may be called to execute the request via the block
-   layer and wait for its completion.  The request can be executed
-   asynchronously by calling the block layer API directly.
-
-f. After execution, osd_req_decode_sense() can be called to decode the request's
-   sense information.
-
-g. osd_req_decode_get_attr() may be called to retrieve osd_add_get_attr_list()
-   values.
-
-h. osd_end_request() must be called to deallocate the request and any resource
-   associated with it. Note that osd_end_request cleans up the request at any
-   stage and it must always be called after a successful osd_start_request().
-
-osd_request's structure:
-
-The OSD standard defines a complex structure of IO segments pointed to by
-members in the CDB. Up to 3 segments can be deployed in the IN-Buffer and up to
-4 in the OUT-Buffer. The ASCII illustration below depicts a secure-read with
-associated get+set of attributes-lists. Other combinations very on the same
-basic theme. From no-segments-used up to all-segments-used.
-
-|________OSD-CDB__________|
-|                         |
-|read_len (offset=0)     -|---------\
-|                         |         |
-|get_attrs_list_length    |         |
-|get_attrs_list_offset   -|----\    |
-|                         |    |    |
-|retrieved_attrs_alloc_len|    |    |
-|retrieved_attrs_offset  -|----|----|-\
-|                         |    |    | |
-|set_attrs_list_length    |    |    | |
-|set_attrs_list_offset   -|-\  |    | |
-|                         | |  |    | |
-|in_data_integ_offset    -|-|--|----|-|-\
-|out_data_integ_offset   -|-|--|--\ | | |
-\_________________________/ |  |  | | | |
-                            |  |  | | | |
-|_______OUT-BUFFER________| |  |  | | | |
-|      Set attr list      |</  |  | | | |
-|                         |    |  | | | |
-|-------------------------|    |  | | | |
-|   Get attr descriptors  |<---/  | | | |
-|                         |       | | | |
-|-------------------------|       | | | |
-|    Out-data integrity   |<------/ | | |
-|                         |         | | |
-\_________________________/         | | |
-                                    | | |
-|________IN-BUFFER________|         | | |
-|      In-Data read       |<--------/ | |
-|                         |           | |
-|-------------------------|           | |
-|      Get attr list      |<----------/ |
-|                         |             |
-|-------------------------|             |
-|    In-data integrity    |<------------/
-|                         |
-\_________________________/
-
-A block device request can carry bidirectional payload by means of associating
-a bidi_read request with a main write-request. Each in/out request is described
-by a chain of BIOs associated with each request.
-The CDB is of a SCSI VARLEN CDB format, as described by OSD standard.
-The OSD standard also mandates alignment restrictions at start of each segment.
-
-In the code, in struct osd_request, there are two _osd_io_info structures to
-describe the IN/OUT buffers above, two BIOs for the data payload and up to five
-_osd_req_data_segment structures to hold the different segments allocation and
-information.
-
-Important: We have chosen to disregard the assumption that a BIO-chain (and
-the resulting sg-list) describes a linear memory buffer. Meaning only first and
-last scatter chain can be incomplete and all the middle chains are of PAGE_SIZE.
-For us, a scatter-gather-list, as its name implies and as used by the Networking
-layer, is to describe a vector of buffers that will be transferred to/from the
-wire. It works very well with current iSCSI transport. iSCSI is currently the
-only deployed OSD transport. In the future we anticipate SAS and FC attached OSD
-devices as well.
-
-The OSD Testing ULD
-===================
-TODO: More user-mode control on tests.
-
-Authors, Mailing list
-=====================
-Please communicate with us on any deployment of osd, whether using this code
-or not.
-
-Any problems, questions, bug reports, lonely OSD nights, please email:
-   OSD Dev List <osd-dev@open-osd.org>
-
-More up-to-date information can be found on:
-http://open-osd.org
-
-Boaz Harrosh <ooo@electrozaur.com>
-
-References
-==========
-Weber, R., "SCSI Object-Based Storage Device Commands",
-T10/1355-D ANSI/INCITS 400-2004,
-http://www.t10.org/ftp/t10/drafts/osd/osd-r10.pdf
-
-Weber, R., "SCSI Object-Based Storage Device Commands -2 (OSD-2)"
-T10/1729-D, Working Draft, rev. 3
-http://www.t10.org/ftp/t10/drafts/osd2/osd2r03.pdf
diff --git a/MAINTAINERS b/MAINTAINERS
index 108b340ab625..0cd037950bc3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11385,12 +11385,6 @@ W:	http://www.nongnu.org/orinoco/
 S:	Orphan
 F:	drivers/net/wireless/intersil/orinoco/
 
-OSD LIBRARY and FILESYSTEM
-M:	Boaz Harrosh <ooo@electrozaur.com>
-S:	Maintained
-F:	drivers/scsi/osd/
-F:	include/scsi/osd_*
-
 OV2659 OMNIVISION SENSOR DRIVER
 M:	"Lad, Prabhakar" <prabhakar.csengg@gmail.com>
 L:	linux-media@vger.kernel.org
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 85c5c89f2fa5..237c6294738d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1515,6 +1515,4 @@ source "drivers/scsi/pcmcia/Kconfig"
 
 source "drivers/scsi/device_handler/Kconfig"
 
-source "drivers/scsi/osd/Kconfig"
-
 endmenu
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index fcb41ae329c4..8826111fdf4a 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -150,7 +150,6 @@ obj-$(CONFIG_CHR_DEV_SG)	+= sg.o
 obj-$(CONFIG_CHR_DEV_SCH)	+= ch.o
 obj-$(CONFIG_SCSI_ENCLOSURE)	+= ses.o
 
-obj-$(CONFIG_SCSI_OSD_INITIATOR) += osd/
 obj-$(CONFIG_SCSI_HISI_SAS) += hisi_sas/
 
 # This goes last, so that "real" scsi devices probe earlier
diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
deleted file mode 100644
index 58cecd45b0f5..000000000000
--- a/drivers/scsi/osd/Kbuild
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# Kbuild for the OSD modules
-#
-# Copyright (C) 2008 Panasas Inc.  All rights reserved.
-#
-# Authors:
-#   Boaz Harrosh <ooo@electrozaur.com>
-#   Benny Halevy <bhalevy@panasas.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License version 2
-#
-
-# libosd.ko - osd-initiator library
-libosd-y := osd_initiator.o
-obj-$(CONFIG_SCSI_OSD_INITIATOR) += libosd.o
-
-# osd.ko - SCSI ULD and char-device
-osd-y := osd_uld.o
-obj-$(CONFIG_SCSI_OSD_ULD) += osd.o
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
deleted file mode 100644
index 347cc5e33749..000000000000
--- a/drivers/scsi/osd/Kconfig
+++ /dev/null
@@ -1,49 +0,0 @@
-#
-# Kernel configuration file for the OSD scsi protocol
-#
-# Copyright (C) 2008 Panasas Inc.  All rights reserved.
-#
-# Authors:
-#   Boaz Harrosh <ooo@electrozaur.com>
-#   Benny Halevy <bhalevy@panasas.com>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public version 2 License as
-# published by the Free Software Foundation
-#
-config SCSI_OSD_INITIATOR
-	tristate "OSD-Initiator library"
-	depends on SCSI
-	help
-		Enable the OSD-Initiator library (libosd.ko).
-		NOTE: You must also select CRYPTO_SHA1 + CRYPTO_HMAC and their
-		dependencies
-
-config SCSI_OSD_ULD
-	tristate "OSD Upper Level driver"
-	depends on SCSI_OSD_INITIATOR
-	help
-		Build a SCSI upper layer driver that exports /dev/osdX devices
-		to user-mode for testing and controlling OSD devices. It is also
-		needed by exofs, for mounting an OSD based file system.
-
-config SCSI_OSD_DPRINT_SENSE
-    int "(0-2) When sense is returned, DEBUG print all sense descriptors"
-    default 1
-    depends on SCSI_OSD_INITIATOR
-    help
-        When a CHECK_CONDITION status is returned from a target, and a
-        sense-buffer is retrieved, turning this on will dump a full
-        sense-decoding message. Setting to 2 will also print recoverable
-        errors that might be regularly returned for some filesystem
-        operations.
-
-config SCSI_OSD_DEBUG
-	bool "Compile All OSD modules with lots of DEBUG prints"
-	default n
-	depends on SCSI_OSD_INITIATOR
-	help
-		OSD Code is populated with lots of OSD_DEBUG(..) printouts to
-		dmesg. Enable this if you found a bug and you want to help us
-		track the problem (see also MAINTAINERS). Setting this will also
-		force SCSI_OSD_DPRINT_SENSE=2.
diff --git a/drivers/scsi/osd/osd_debug.h b/drivers/scsi/osd/osd_debug.h
deleted file mode 100644
index 26341261bb5c..000000000000
--- a/drivers/scsi/osd/osd_debug.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * osd_debug.h - Some kprintf macros
- *
- * Copyright (C) 2008 Panasas Inc.  All rights reserved.
- *
- * Authors:
- *   Boaz Harrosh <ooo@electrozaur.com>
- *   Benny Halevy <bhalevy@panasas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- *
- */
-#ifndef __OSD_DEBUG_H__
-#define __OSD_DEBUG_H__
-
-#define OSD_ERR(fmt, a...) printk(KERN_ERR "osd: " fmt, ##a)
-#define OSD_INFO(fmt, a...) printk(KERN_NOTICE "osd: " fmt, ##a)
-
-#ifdef CONFIG_SCSI_OSD_DEBUG
-#define OSD_DEBUG(fmt, a...) \
-	printk(KERN_NOTICE "osd @%s:%d: " fmt, __func__, __LINE__, ##a)
-#else
-#define OSD_DEBUG(fmt, a...) do {} while (0)
-#endif
-
-/* u64 has problems with printk this will cast it to unsigned long long */
-#define _LLU(x) (unsigned long long)(x)
-
-#endif /* ndef __OSD_DEBUG_H__ */
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
deleted file mode 100644
index 60cf7c5eb880..000000000000
--- a/drivers/scsi/osd/osd_initiator.c
+++ /dev/null
@@ -1,2076 +0,0 @@
-/*
- * osd_initiator - Main body of the osd initiator library.
- *
- * Note: The file does not contain the advanced security functionality which
- * is only needed by the security_manager's initiators.
- *
- * Copyright (C) 2008 Panasas Inc.  All rights reserved.
- *
- * Authors:
- *   Boaz Harrosh <ooo@electrozaur.com>
- *   Benny Halevy <bhalevy@panasas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/slab.h>
-#include <linux/module.h>
-
-#include <scsi/osd_initiator.h>
-#include <scsi/osd_sec.h>
-#include <scsi/osd_attributes.h>
-#include <scsi/osd_sense.h>
-
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_request.h>
-
-#include "osd_debug.h"
-
-#ifndef __unused
-#    define __unused			__attribute__((unused))
-#endif
-
-enum { OSD_REQ_RETRIES = 1 };
-
-MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
-MODULE_DESCRIPTION("open-osd initiator library libosd.ko");
-MODULE_LICENSE("GPL");
-
-static inline void build_test(void)
-{
-	/* structures were not packed */
-	BUILD_BUG_ON(sizeof(struct osd_capability) != OSD_CAP_LEN);
-	BUILD_BUG_ON(sizeof(struct osdv2_cdb) != OSD_TOTAL_CDB_LEN);
-	BUILD_BUG_ON(sizeof(struct osdv1_cdb) != OSDv1_TOTAL_CDB_LEN);
-}
-
-static const char *_osd_ver_desc(struct osd_request *or)
-{
-	return osd_req_is_ver1(or) ? "OSD1" : "OSD2";
-}
-
-#define ATTR_DEF_RI(id, len) ATTR_DEF(OSD_APAGE_ROOT_INFORMATION, id, len)
-
-static int _osd_get_print_system_info(struct osd_dev *od,
-	void *caps, struct osd_dev_info *odi)
-{
-	struct osd_request *or;
-	struct osd_attr get_attrs[] = {
-		ATTR_DEF_RI(OSD_ATTR_RI_VENDOR_IDENTIFICATION, 8),
-		ATTR_DEF_RI(OSD_ATTR_RI_PRODUCT_IDENTIFICATION, 16),
-		ATTR_DEF_RI(OSD_ATTR_RI_PRODUCT_MODEL, 32),
-		ATTR_DEF_RI(OSD_ATTR_RI_PRODUCT_REVISION_LEVEL, 4),
-		ATTR_DEF_RI(OSD_ATTR_RI_PRODUCT_SERIAL_NUMBER, 64 /*variable*/),
-		ATTR_DEF_RI(OSD_ATTR_RI_OSD_NAME, 64 /*variable*/),
-		ATTR_DEF_RI(OSD_ATTR_RI_TOTAL_CAPACITY, 8),
-		ATTR_DEF_RI(OSD_ATTR_RI_USED_CAPACITY, 8),
-		ATTR_DEF_RI(OSD_ATTR_RI_NUMBER_OF_PARTITIONS, 8),
-		ATTR_DEF_RI(OSD_ATTR_RI_CLOCK, 6),
-		/* IBM-OSD-SIM Has a bug with this one put it last */
-		ATTR_DEF_RI(OSD_ATTR_RI_OSD_SYSTEM_ID, 20),
-	};
-	void *iter = NULL, *pFirst;
-	int nelem = ARRAY_SIZE(get_attrs), a = 0;
-	int ret;
-
-	or = osd_start_request(od);
-	if (!or)
-		return -ENOMEM;
-
-	/* get attrs */
-	osd_req_get_attributes(or, &osd_root_object);
-	osd_req_add_get_attr_list(or, get_attrs, ARRAY_SIZE(get_attrs));
-
-	ret = osd_finalize_request(or, 0, caps, NULL);
-	if (ret)
-		goto out;
-
-	ret = osd_execute_request(or);
-	if (ret) {
-		OSD_ERR("Failed to detect %s => %d\n", _osd_ver_desc(or), ret);
-		goto out;
-	}
-
-	osd_req_decode_get_attr_list(or, get_attrs, &nelem, &iter);
-
-	OSD_INFO("Detected %s device\n",
-		_osd_ver_desc(or));
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("VENDOR_IDENTIFICATION  [%s]\n",
-		(char *)pFirst);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("PRODUCT_IDENTIFICATION [%s]\n",
-		(char *)pFirst);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("PRODUCT_MODEL          [%s]\n",
-		(char *)pFirst);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("PRODUCT_REVISION_LEVEL [%u]\n",
-		pFirst ? get_unaligned_be32(pFirst) : ~0U);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("PRODUCT_SERIAL_NUMBER  [%s]\n",
-		(char *)pFirst);
-
-	odi->osdname_len = get_attrs[a].len;
-	/* Avoid NULL for memcmp optimization 0-length is good enough */
-	odi->osdname = kzalloc(odi->osdname_len + 1, GFP_KERNEL);
-	if (!odi->osdname) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	if (odi->osdname_len)
-		memcpy(odi->osdname, get_attrs[a].val_ptr, odi->osdname_len);
-	OSD_INFO("OSD_NAME               [%s]\n", odi->osdname);
-	a++;
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("TOTAL_CAPACITY         [0x%llx]\n",
-		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("USED_CAPACITY          [0x%llx]\n",
-		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
-
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("NUMBER_OF_PARTITIONS   [%llu]\n",
-		pFirst ? _LLU(get_unaligned_be64(pFirst)) : ~0ULL);
-
-	if (a >= nelem)
-		goto out;
-
-	/* FIXME: Where are the time utilities */
-	pFirst = get_attrs[a++].val_ptr;
-	OSD_INFO("CLOCK                  [0x%6phN]\n", pFirst);
-
-	if (a < nelem) { /* IBM-OSD-SIM bug, Might not have it */
-		unsigned len = get_attrs[a].len;
-		char sid_dump[32*4 + 2]; /* 2nibbles+space+ASCII */
-
-		hex_dump_to_buffer(get_attrs[a].val_ptr, len, 32, 1,
-				   sid_dump, sizeof(sid_dump), true);
-		OSD_INFO("OSD_SYSTEM_ID(%d)\n"
-			 "        [%s]\n", len, sid_dump);
-
-		if (unlikely(len > sizeof(odi->systemid))) {
-			OSD_ERR("OSD Target error: OSD_SYSTEM_ID too long(%d). "
-				"device identification might not work\n", len);
-			len = sizeof(odi->systemid);
-		}
-		odi->systemid_len = len;
-		memcpy(odi->systemid, get_attrs[a].val_ptr, len);
-		a++;
-	}
-out:
-	osd_end_request(or);
-	return ret;
-}
-
-int osd_auto_detect_ver(struct osd_dev *od,
-	void *caps, struct osd_dev_info *odi)
-{
-	int ret;
-
-	/* Auto-detect the osd version */
-	ret = _osd_get_print_system_info(od, caps, odi);
-	if (ret) {
-		osd_dev_set_ver(od, OSD_VER1);
-		OSD_DEBUG("converting to OSD1\n");
-		ret = _osd_get_print_system_info(od, caps, odi);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(osd_auto_detect_ver);
-
-static unsigned _osd_req_cdb_len(struct osd_request *or)
-{
-	return osd_req_is_ver1(or) ? OSDv1_TOTAL_CDB_LEN : OSD_TOTAL_CDB_LEN;
-}
-
-static unsigned _osd_req_alist_elem_size(struct osd_request *or, unsigned len)
-{
-	return osd_req_is_ver1(or) ?
-		osdv1_attr_list_elem_size(len) :
-		osdv2_attr_list_elem_size(len);
-}
-
-static void _osd_req_alist_elem_encode(struct osd_request *or,
-	void *attr_last, const struct osd_attr *oa)
-{
-	if (osd_req_is_ver1(or)) {
-		struct osdv1_attributes_list_element *attr = attr_last;
-
-		attr->attr_page = cpu_to_be32(oa->attr_page);
-		attr->attr_id = cpu_to_be32(oa->attr_id);
-		attr->attr_bytes = cpu_to_be16(oa->len);
-		memcpy(attr->attr_val, oa->val_ptr, oa->len);
-	} else {
-		struct osdv2_attributes_list_element *attr = attr_last;
-
-		attr->attr_page = cpu_to_be32(oa->attr_page);
-		attr->attr_id = cpu_to_be32(oa->attr_id);
-		attr->attr_bytes = cpu_to_be16(oa->len);
-		memcpy(attr->attr_val, oa->val_ptr, oa->len);
-	}
-}
-
-static int _osd_req_alist_elem_decode(struct osd_request *or,
-	void *cur_p, struct osd_attr *oa, unsigned max_bytes)
-{
-	unsigned inc;
-	if (osd_req_is_ver1(or)) {
-		struct osdv1_attributes_list_element *attr = cur_p;
-
-		if (max_bytes < sizeof(*attr))
-			return -1;
-
-		oa->len = be16_to_cpu(attr->attr_bytes);
-		inc = _osd_req_alist_elem_size(or, oa->len);
-		if (inc > max_bytes)
-			return -1;
-
-		oa->attr_page = be32_to_cpu(attr->attr_page);
-		oa->attr_id = be32_to_cpu(attr->attr_id);
-
-		/* OSD1: On empty attributes we return a pointer to 2 bytes
-		 * of zeros. This keeps similar behaviour with OSD2.
-		 * (See below)
-		 */
-		oa->val_ptr = likely(oa->len) ? attr->attr_val :
-						(u8 *)&attr->attr_bytes;
-	} else {
-		struct osdv2_attributes_list_element *attr = cur_p;
-
-		if (max_bytes < sizeof(*attr))
-			return -1;
-
-		oa->len = be16_to_cpu(attr->attr_bytes);
-		inc = _osd_req_alist_elem_size(or, oa->len);
-		if (inc > max_bytes)
-			return -1;
-
-		oa->attr_page = be32_to_cpu(attr->attr_page);
-		oa->attr_id = be32_to_cpu(attr->attr_id);
-
-		/* OSD2: For convenience, on empty attributes, we return 8 bytes
-		 * of zeros here. This keeps the same behaviour with OSD2r04,
-		 * and is nice with null terminating ASCII fields.
-		 * oa->val_ptr == NULL marks the end-of-list, or error.
-		 */
-		oa->val_ptr = likely(oa->len) ? attr->attr_val : attr->reserved;
-	}
-	return inc;
-}
-
-static unsigned _osd_req_alist_size(struct osd_request *or, void *list_head)
-{
-	return osd_req_is_ver1(or) ?
-		osdv1_list_size(list_head) :
-		osdv2_list_size(list_head);
-}
-
-static unsigned _osd_req_sizeof_alist_header(struct osd_request *or)
-{
-	return osd_req_is_ver1(or) ?
-		sizeof(struct osdv1_attributes_list_header) :
-		sizeof(struct osdv2_attributes_list_header);
-}
-
-static void _osd_req_set_alist_type(struct osd_request *or,
-	void *list, int list_type)
-{
-	if (osd_req_is_ver1(or)) {
-		struct osdv1_attributes_list_header *attr_list = list;
-
-		memset(attr_list, 0, sizeof(*attr_list));
-		attr_list->type = list_type;
-	} else {
-		struct osdv2_attributes_list_header *attr_list = list;
-
-		memset(attr_list, 0, sizeof(*attr_list));
-		attr_list->type = list_type;
-	}
-}
-
-static bool _osd_req_is_alist_type(struct osd_request *or,
-	void *list, int list_type)
-{
-	if (!list)
-		return false;
-
-	if (osd_req_is_ver1(or)) {
-		struct osdv1_attributes_list_header *attr_list = list;
-
-		return attr_list->type == list_type;
-	} else {
-		struct osdv2_attributes_list_header *attr_list = list;
-
-		return attr_list->type == list_type;
-	}
-}
-
-/* This is for List-objects not Attributes-Lists */
-static void _osd_req_encode_olist(struct osd_request *or,
-	struct osd_obj_id_list *list)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-
-	if (osd_req_is_ver1(or)) {
-		cdbh->v1.list_identifier = list->list_identifier;
-		cdbh->v1.start_address = list->continuation_id;
-	} else {
-		cdbh->v2.list_identifier = list->list_identifier;
-		cdbh->v2.start_address = list->continuation_id;
-	}
-}
-
-static osd_cdb_offset osd_req_encode_offset(struct osd_request *or,
-	u64 offset, unsigned *padding)
-{
-	return __osd_encode_offset(offset, padding,
-			osd_req_is_ver1(or) ?
-				OSDv1_OFFSET_MIN_SHIFT : OSD_OFFSET_MIN_SHIFT,
-			OSD_OFFSET_MAX_SHIFT);
-}
-
-static struct osd_security_parameters *
-_osd_req_sec_params(struct osd_request *or)
-{
-	struct osd_cdb *ocdb = &or->cdb;
-
-	if (osd_req_is_ver1(or))
-		return (struct osd_security_parameters *)&ocdb->v1.sec_params;
-	else
-		return (struct osd_security_parameters *)&ocdb->v2.sec_params;
-}
-
-void osd_dev_init(struct osd_dev *osdd, struct scsi_device *scsi_device)
-{
-	memset(osdd, 0, sizeof(*osdd));
-	osdd->scsi_device = scsi_device;
-	osdd->def_timeout = BLK_DEFAULT_SG_TIMEOUT;
-#ifdef OSD_VER1_SUPPORT
-	osdd->version = OSD_VER2;
-#endif
-	/* TODO: Allocate pools for osd_request attributes ... */
-}
-EXPORT_SYMBOL(osd_dev_init);
-
-void osd_dev_fini(struct osd_dev *osdd)
-{
-	/* TODO: De-allocate pools */
-
-	osdd->scsi_device = NULL;
-}
-EXPORT_SYMBOL(osd_dev_fini);
-
-static struct osd_request *_osd_request_alloc(gfp_t gfp)
-{
-	struct osd_request *or;
-
-	/* TODO: Use mempool with one saved request */
-	or = kzalloc(sizeof(*or), gfp);
-	return or;
-}
-
-static void _osd_request_free(struct osd_request *or)
-{
-	kfree(or);
-}
-
-struct osd_request *osd_start_request(struct osd_dev *dev)
-{
-	struct osd_request *or;
-
-	or = _osd_request_alloc(GFP_KERNEL);
-	if (!or)
-		return NULL;
-
-	or->osd_dev = dev;
-	or->timeout = dev->def_timeout;
-	or->retries = OSD_REQ_RETRIES;
-
-	return or;
-}
-EXPORT_SYMBOL(osd_start_request);
-
-static void _osd_free_seg(struct osd_request *or __unused,
-	struct _osd_req_data_segment *seg)
-{
-	if (!seg->buff || !seg->alloc_size)
-		return;
-
-	kfree(seg->buff);
-	seg->buff = NULL;
-	seg->alloc_size = 0;
-}
-
-static void _put_request(struct request *rq)
-{
-	/*
-	 * If osd_finalize_request() was called but the request was not
-	 * executed through the block layer, then we must release BIOs.
-	 * TODO: Keep error code in or->async_error. Need to audit all
-	 *       code paths.
-	 */
-	if (unlikely(rq->bio))
-		blk_mq_end_request(rq, BLK_STS_IOERR);
-	else
-		blk_put_request(rq);
-}
-
-void osd_end_request(struct osd_request *or)
-{
-	struct request *rq = or->request;
-
-	if (rq) {
-		if (rq->next_rq) {
-			_put_request(rq->next_rq);
-			rq->next_rq = NULL;
-		}
-
-		_put_request(rq);
-	}
-
-	_osd_free_seg(or, &or->get_attr);
-	_osd_free_seg(or, &or->enc_get_attr);
-	_osd_free_seg(or, &or->set_attr);
-	_osd_free_seg(or, &or->cdb_cont);
-
-	_osd_request_free(or);
-}
-EXPORT_SYMBOL(osd_end_request);
-
-static void _set_error_resid(struct osd_request *or, struct request *req,
-			     blk_status_t error)
-{
-	or->async_error = error;
-	or->req_errors = scsi_req(req)->result;
-	or->sense_len = scsi_req(req)->sense_len;
-	if (or->sense_len)
-		memcpy(or->sense, scsi_req(req)->sense, or->sense_len);
-	if (or->out.req)
-		or->out.residual = scsi_req(or->out.req)->resid_len;
-	if (or->in.req)
-		or->in.residual = scsi_req(or->in.req)->resid_len;
-}
-
-int osd_execute_request(struct osd_request *or)
-{
-	blk_execute_rq(or->request->q, NULL, or->request, 0);
-
-	if (scsi_req(or->request)->result) {
-		_set_error_resid(or, or->request, BLK_STS_IOERR);
-		return -EIO;
-	}
-
-	_set_error_resid(or, or->request, BLK_STS_OK);
-	return 0;
-}
-EXPORT_SYMBOL(osd_execute_request);
-
-static void osd_request_async_done(struct request *req, blk_status_t error)
-{
-	struct osd_request *or = req->end_io_data;
-
-	_set_error_resid(or, req, error);
-	if (req->next_rq) {
-		blk_put_request(req->next_rq);
-		req->next_rq = NULL;
-	}
-
-	blk_put_request(req);
-	or->request = NULL;
-	or->in.req = NULL;
-	or->out.req = NULL;
-
-	if (or->async_done)
-		or->async_done(or, or->async_private);
-	else
-		osd_end_request(or);
-}
-
-int osd_execute_request_async(struct osd_request *or,
-	osd_req_done_fn *done, void *private)
-{
-	or->request->end_io_data = or;
-	or->async_private = private;
-	or->async_done = done;
-
-	blk_execute_rq_nowait(or->request->q, NULL, or->request, 0,
-			      osd_request_async_done);
-	return 0;
-}
-EXPORT_SYMBOL(osd_execute_request_async);
-
-u8 sg_out_pad_buffer[1 << OSDv1_OFFSET_MIN_SHIFT];
-u8 sg_in_pad_buffer[1 << OSDv1_OFFSET_MIN_SHIFT];
-
-static int _osd_realloc_seg(struct osd_request *or,
-	struct _osd_req_data_segment *seg, unsigned max_bytes)
-{
-	void *buff;
-
-	if (seg->alloc_size >= max_bytes)
-		return 0;
-
-	buff = krealloc(seg->buff, max_bytes, GFP_KERNEL);
-	if (!buff) {
-		OSD_ERR("Failed to Realloc %d-bytes was-%d\n", max_bytes,
-			seg->alloc_size);
-		return -ENOMEM;
-	}
-
-	memset(buff + seg->alloc_size, 0, max_bytes - seg->alloc_size);
-	seg->buff = buff;
-	seg->alloc_size = max_bytes;
-	return 0;
-}
-
-static int _alloc_cdb_cont(struct osd_request *or, unsigned total_bytes)
-{
-	OSD_DEBUG("total_bytes=%d\n", total_bytes);
-	return _osd_realloc_seg(or, &or->cdb_cont, total_bytes);
-}
-
-static int _alloc_set_attr_list(struct osd_request *or,
-	const struct osd_attr *oa, unsigned nelem, unsigned add_bytes)
-{
-	unsigned total_bytes = add_bytes;
-
-	for (; nelem; --nelem, ++oa)
-		total_bytes += _osd_req_alist_elem_size(or, oa->len);
-
-	OSD_DEBUG("total_bytes=%d\n", total_bytes);
-	return _osd_realloc_seg(or, &or->set_attr, total_bytes);
-}
-
-static int _alloc_get_attr_desc(struct osd_request *or, unsigned max_bytes)
-{
-	OSD_DEBUG("total_bytes=%d\n", max_bytes);
-	return _osd_realloc_seg(or, &or->enc_get_attr, max_bytes);
-}
-
-static int _alloc_get_attr_list(struct osd_request *or)
-{
-	OSD_DEBUG("total_bytes=%d\n", or->get_attr.total_bytes);
-	return _osd_realloc_seg(or, &or->get_attr, or->get_attr.total_bytes);
-}
-
-/*
- * Common to all OSD commands
- */
-
-static void _osdv1_req_encode_common(struct osd_request *or,
-	__be16 act, const struct osd_obj_id *obj, u64 offset, u64 len)
-{
-	struct osdv1_cdb *ocdb = &or->cdb.v1;
-
-	/*
-	 * For speed, the commands
-	 *	OSD_ACT_PERFORM_SCSI_COMMAND	, V1 0x8F7E, V2 0x8F7C
-	 *	OSD_ACT_SCSI_TASK_MANAGEMENT	, V1 0x8F7F, V2 0x8F7D
-	 * are not supported here. Should pass zero and set after the call
-	 */
-	act &= cpu_to_be16(~0x0080); /* V1 action code */
-
-	OSD_DEBUG("OSDv1 execute opcode 0x%x\n", be16_to_cpu(act));
-
-	ocdb->h.varlen_cdb.opcode = VARIABLE_LENGTH_CMD;
-	ocdb->h.varlen_cdb.additional_cdb_length = OSD_ADDITIONAL_CDB_LENGTH;
-	ocdb->h.varlen_cdb.service_action = act;
-
-	ocdb->h.partition = cpu_to_be64(obj->partition);
-	ocdb->h.object = cpu_to_be64(obj->id);
-	ocdb->h.v1.length = cpu_to_be64(len);
-	ocdb->h.v1.start_address = cpu_to_be64(offset);
-}
-
-static void _osdv2_req_encode_common(struct osd_request *or,
-	 __be16 act, const struct osd_obj_id *obj, u64 offset, u64 len)
-{
-	struct osdv2_cdb *ocdb = &or->cdb.v2;
-
-	OSD_DEBUG("OSDv2 execute opcode 0x%x\n", be16_to_cpu(act));
-
-	ocdb->h.varlen_cdb.opcode = VARIABLE_LENGTH_CMD;
-	ocdb->h.varlen_cdb.additional_cdb_length = OSD_ADDITIONAL_CDB_LENGTH;
-	ocdb->h.varlen_cdb.service_action = act;
-
-	ocdb->h.partition = cpu_to_be64(obj->partition);
-	ocdb->h.object = cpu_to_be64(obj->id);
-	ocdb->h.v2.length = cpu_to_be64(len);
-	ocdb->h.v2.start_address = cpu_to_be64(offset);
-}
-
-static void _osd_req_encode_common(struct osd_request *or,
-	__be16 act, const struct osd_obj_id *obj, u64 offset, u64 len)
-{
-	if (osd_req_is_ver1(or))
-		_osdv1_req_encode_common(or, act, obj, offset, len);
-	else
-		_osdv2_req_encode_common(or, act, obj, offset, len);
-}
-
-/*
- * Device commands
- */
-/*TODO: void osd_req_set_master_seed_xchg(struct osd_request *, ...); */
-/*TODO: void osd_req_set_master_key(struct osd_request *, ...); */
-
-void osd_req_format(struct osd_request *or, u64 tot_capacity)
-{
-	_osd_req_encode_common(or, OSD_ACT_FORMAT_OSD, &osd_root_object, 0,
-				tot_capacity);
-}
-EXPORT_SYMBOL(osd_req_format);
-
-int osd_req_list_dev_partitions(struct osd_request *or,
-	osd_id initial_id, struct osd_obj_id_list *list, unsigned nelem)
-{
-	return osd_req_list_partition_objects(or, 0, initial_id, list, nelem);
-}
-EXPORT_SYMBOL(osd_req_list_dev_partitions);
-
-static void _osd_req_encode_flush(struct osd_request *or,
-	enum osd_options_flush_scope_values op)
-{
-	struct osd_cdb_head *ocdb = osd_cdb_head(&or->cdb);
-
-	ocdb->command_specific_options = op;
-}
-
-void osd_req_flush_obsd(struct osd_request *or,
-	enum osd_options_flush_scope_values op)
-{
-	_osd_req_encode_common(or, OSD_ACT_FLUSH_OSD, &osd_root_object, 0, 0);
-	_osd_req_encode_flush(or, op);
-}
-EXPORT_SYMBOL(osd_req_flush_obsd);
-
-/*TODO: void osd_req_perform_scsi_command(struct osd_request *,
-	const u8 *cdb, ...); */
-/*TODO: void osd_req_task_management(struct osd_request *, ...); */
-
-/*
- * Partition commands
- */
-static void _osd_req_encode_partition(struct osd_request *or,
-	__be16 act, osd_id partition)
-{
-	struct osd_obj_id par = {
-		.partition = partition,
-		.id = 0,
-	};
-
-	_osd_req_encode_common(or, act, &par, 0, 0);
-}
-
-void osd_req_create_partition(struct osd_request *or, osd_id partition)
-{
-	_osd_req_encode_partition(or, OSD_ACT_CREATE_PARTITION, partition);
-}
-EXPORT_SYMBOL(osd_req_create_partition);
-
-void osd_req_remove_partition(struct osd_request *or, osd_id partition)
-{
-	_osd_req_encode_partition(or, OSD_ACT_REMOVE_PARTITION, partition);
-}
-EXPORT_SYMBOL(osd_req_remove_partition);
-
-/*TODO: void osd_req_set_partition_key(struct osd_request *,
-	osd_id partition, u8 new_key_id[OSD_CRYPTO_KEYID_SIZE],
-	u8 seed[OSD_CRYPTO_SEED_SIZE]); */
-
-static int _osd_req_list_objects(struct osd_request *or,
-	__be16 action, const struct osd_obj_id *obj, osd_id initial_id,
-	struct osd_obj_id_list *list, unsigned nelem)
-{
-	struct request_queue *q = osd_request_queue(or->osd_dev);
-	u64 len = nelem * sizeof(osd_id) + sizeof(*list);
-	struct bio *bio;
-
-	_osd_req_encode_common(or, action, obj, (u64)initial_id, len);
-
-	if (list->list_identifier)
-		_osd_req_encode_olist(or, list);
-
-	WARN_ON(or->in.bio);
-	bio = bio_map_kern(q, list, len, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		OSD_ERR("!!! Failed to allocate list_objects BIO\n");
-		return PTR_ERR(bio);
-	}
-
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-	or->in.bio = bio;
-	or->in.total_bytes = bio->bi_iter.bi_size;
-	return 0;
-}
-
-int osd_req_list_partition_collections(struct osd_request *or,
-	osd_id partition, osd_id initial_id, struct osd_obj_id_list *list,
-	unsigned nelem)
-{
-	struct osd_obj_id par = {
-		.partition = partition,
-		.id = 0,
-	};
-
-	return osd_req_list_collection_objects(or, &par, initial_id, list,
-					       nelem);
-}
-EXPORT_SYMBOL(osd_req_list_partition_collections);
-
-int osd_req_list_partition_objects(struct osd_request *or,
-	osd_id partition, osd_id initial_id, struct osd_obj_id_list *list,
-	unsigned nelem)
-{
-	struct osd_obj_id par = {
-		.partition = partition,
-		.id = 0,
-	};
-
-	return _osd_req_list_objects(or, OSD_ACT_LIST, &par, initial_id, list,
-				     nelem);
-}
-EXPORT_SYMBOL(osd_req_list_partition_objects);
-
-void osd_req_flush_partition(struct osd_request *or,
-	osd_id partition, enum osd_options_flush_scope_values op)
-{
-	_osd_req_encode_partition(or, OSD_ACT_FLUSH_PARTITION, partition);
-	_osd_req_encode_flush(or, op);
-}
-EXPORT_SYMBOL(osd_req_flush_partition);
-
-/*
- * Collection commands
- */
-/*TODO: void osd_req_create_collection(struct osd_request *,
-	const struct osd_obj_id *); */
-/*TODO: void osd_req_remove_collection(struct osd_request *,
-	const struct osd_obj_id *); */
-
-int osd_req_list_collection_objects(struct osd_request *or,
-	const struct osd_obj_id *obj, osd_id initial_id,
-	struct osd_obj_id_list *list, unsigned nelem)
-{
-	return _osd_req_list_objects(or, OSD_ACT_LIST_COLLECTION, obj,
-				     initial_id, list, nelem);
-}
-EXPORT_SYMBOL(osd_req_list_collection_objects);
-
-/*TODO: void query(struct osd_request *, ...); V2 */
-
-void osd_req_flush_collection(struct osd_request *or,
-	const struct osd_obj_id *obj, enum osd_options_flush_scope_values op)
-{
-	_osd_req_encode_common(or, OSD_ACT_FLUSH_PARTITION, obj, 0, 0);
-	_osd_req_encode_flush(or, op);
-}
-EXPORT_SYMBOL(osd_req_flush_collection);
-
-/*TODO: void get_member_attrs(struct osd_request *, ...); V2 */
-/*TODO: void set_member_attrs(struct osd_request *, ...); V2 */
-
-/*
- * Object commands
- */
-void osd_req_create_object(struct osd_request *or, struct osd_obj_id *obj)
-{
-	_osd_req_encode_common(or, OSD_ACT_CREATE, obj, 0, 0);
-}
-EXPORT_SYMBOL(osd_req_create_object);
-
-void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *obj)
-{
-	_osd_req_encode_common(or, OSD_ACT_REMOVE, obj, 0, 0);
-}
-EXPORT_SYMBOL(osd_req_remove_object);
-
-
-/*TODO: void osd_req_create_multi(struct osd_request *or,
-	struct osd_obj_id *first, struct osd_obj_id_list *list, unsigned nelem);
-*/
-
-void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset,
-	struct bio *bio, u64 len)
-{
-	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
-	WARN_ON(or->out.bio || or->out.total_bytes);
-	WARN_ON(!op_is_write(bio_op(bio)));
-	or->out.bio = bio;
-	or->out.total_bytes = len;
-}
-EXPORT_SYMBOL(osd_req_write);
-
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = osd_request_queue(or->osd_dev);
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-	osd_req_write(or, obj, offset, bio, len);
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_write_kern);
-
-/*TODO: void osd_req_append(struct osd_request *,
-	const struct osd_obj_id *, struct bio *data_out); */
-/*TODO: void osd_req_create_write(struct osd_request *,
-	const struct osd_obj_id *, struct bio *data_out, u64 offset); */
-/*TODO: void osd_req_clear(struct osd_request *,
-	const struct osd_obj_id *, u64 offset, u64 len); */
-/*TODO: void osd_req_punch(struct osd_request *,
-	const struct osd_obj_id *, u64 offset, u64 len); V2 */
-
-void osd_req_flush_object(struct osd_request *or,
-	const struct osd_obj_id *obj, enum osd_options_flush_scope_values op,
-	/*V2*/ u64 offset, /*V2*/ u64 len)
-{
-	if (unlikely(osd_req_is_ver1(or) && (offset || len))) {
-		OSD_DEBUG("OSD Ver1 flush on specific range ignored\n");
-		offset = 0;
-		len = 0;
-	}
-
-	_osd_req_encode_common(or, OSD_ACT_FLUSH, obj, offset, len);
-	_osd_req_encode_flush(or, op);
-}
-EXPORT_SYMBOL(osd_req_flush_object);
-
-void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset,
-	struct bio *bio, u64 len)
-{
-	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
-	WARN_ON(or->in.bio || or->in.total_bytes);
-	WARN_ON(op_is_write(bio_op(bio)));
-	or->in.bio = bio;
-	or->in.total_bytes = len;
-}
-EXPORT_SYMBOL(osd_req_read);
-
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
-	struct request_queue *req_q = osd_request_queue(or->osd_dev);
-	struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	osd_req_read(or, obj, offset, bio, len);
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_read_kern);
-
-static int _add_sg_continuation_descriptor(struct osd_request *or,
-	const struct osd_sg_entry *sglist, unsigned numentries, u64 *len)
-{
-	struct osd_sg_continuation_descriptor *oscd;
-	u32 oscd_size;
-	unsigned i;
-	int ret;
-
-	oscd_size = sizeof(*oscd) + numentries * sizeof(oscd->entries[0]);
-
-	if (!or->cdb_cont.total_bytes) {
-		/* First time, jump over the header, we will write to:
-		 *	cdb_cont.buff + cdb_cont.total_bytes
-		 */
-		or->cdb_cont.total_bytes =
-				sizeof(struct osd_continuation_segment_header);
-	}
-
-	ret = _alloc_cdb_cont(or, or->cdb_cont.total_bytes + oscd_size);
-	if (unlikely(ret))
-		return ret;
-
-	oscd = or->cdb_cont.buff + or->cdb_cont.total_bytes;
-	oscd->hdr.type = cpu_to_be16(SCATTER_GATHER_LIST);
-	oscd->hdr.pad_length = 0;
-	oscd->hdr.length = cpu_to_be32(oscd_size - sizeof(*oscd));
-
-	*len = 0;
-	/* copy the sg entries and convert to network byte order */
-	for (i = 0; i < numentries; i++) {
-		oscd->entries[i].offset = cpu_to_be64(sglist[i].offset);
-		oscd->entries[i].len    = cpu_to_be64(sglist[i].len);
-		*len += sglist[i].len;
-	}
-
-	or->cdb_cont.total_bytes += oscd_size;
-	OSD_DEBUG("total_bytes=%d oscd_size=%d numentries=%d\n",
-		  or->cdb_cont.total_bytes, oscd_size, numentries);
-	return 0;
-}
-
-static int _osd_req_finalize_cdb_cont(struct osd_request *or, const u8 *cap_key)
-{
-	struct request_queue *req_q = osd_request_queue(or->osd_dev);
-	struct bio *bio;
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-	struct osd_continuation_segment_header *cont_seg_hdr;
-
-	if (!or->cdb_cont.total_bytes)
-		return 0;
-
-	cont_seg_hdr = or->cdb_cont.buff;
-	cont_seg_hdr->format = CDB_CONTINUATION_FORMAT_V2;
-	cont_seg_hdr->service_action = cdbh->varlen_cdb.service_action;
-
-	/* create a bio for continuation segment */
-	bio = bio_map_kern(req_q, or->cdb_cont.buff, or->cdb_cont.total_bytes,
-			   GFP_KERNEL);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
-	/* integrity check the continuation before the bio is linked
-	 * with the other data segments since the continuation
-	 * integrity is separate from the other data segments.
-	 */
-	osd_sec_sign_data(cont_seg_hdr->integrity_check, bio, cap_key);
-
-	cdbh->v2.cdb_continuation_length = cpu_to_be32(or->cdb_cont.total_bytes);
-
-	/* we can't use _req_append_segment, because we need to link in the
-	 * continuation bio to the head of the bio list - the
-	 * continuation segment (if it exists) is always the first segment in
-	 * the out data buffer.
-	 */
-	bio->bi_next = or->out.bio;
-	or->out.bio = bio;
-	or->out.total_bytes += or->cdb_cont.total_bytes;
-
-	return 0;
-}
-
-/* osd_req_write_sg: Takes a @bio that points to the data out buffer and an
- * @sglist that has the scatter gather entries. Scatter-gather enables a write
- * of multiple none-contiguous areas of an object, in a single call. The extents
- * may overlap and/or be in any order. The only constrain is that:
- *	total_bytes(sglist) >= total_bytes(bio)
- */
-int osd_req_write_sg(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio,
-	const struct osd_sg_entry *sglist, unsigned numentries)
-{
-	u64 len;
-	int ret = _add_sg_continuation_descriptor(or, sglist, numentries, &len);
-
-	if (ret)
-		return ret;
-	osd_req_write(or, obj, 0, bio, len);
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_write_sg);
-
-/* osd_req_read_sg: Read multiple extents of an object into @bio
- * See osd_req_write_sg
- */
-int osd_req_read_sg(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio,
-	const struct osd_sg_entry *sglist, unsigned numentries)
-{
-	u64 len;
-	u64 off;
-	int ret;
-
-	if (numentries > 1) {
-		off = 0;
-		ret = _add_sg_continuation_descriptor(or, sglist, numentries,
-						      &len);
-		if (ret)
-			return ret;
-	} else {
-		/* Optimize the case of single segment, read_sg is a
-		 * bidi operation.
-		 */
-		len = sglist->len;
-		off = sglist->offset;
-	}
-	osd_req_read(or, obj, off, bio, len);
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_read_sg);
-
-/* SG-list write/read Kern API
- *
- * osd_req_{write,read}_sg_kern takes an array of @buff pointers and an array
- * of sg_entries. @numentries indicates how many pointers and sg_entries there
- * are.  By requiring an array of buff pointers. This allows a caller to do a
- * single write/read and scatter into multiple buffers.
- * NOTE: Each buffer + len should not cross a page boundary.
- */
-static struct bio *_create_sg_bios(struct osd_request *or,
-	void **buff, const struct osd_sg_entry *sglist, unsigned numentries)
-{
-	struct request_queue *q = osd_request_queue(or->osd_dev);
-	struct bio *bio;
-	unsigned i;
-
-	bio = bio_kmalloc(GFP_KERNEL, numentries);
-	if (unlikely(!bio)) {
-		OSD_DEBUG("Failed to allocate BIO size=%u\n", numentries);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	for (i = 0; i < numentries; i++) {
-		unsigned offset = offset_in_page(buff[i]);
-		struct page *page = virt_to_page(buff[i]);
-		unsigned len = sglist[i].len;
-		unsigned added_len;
-
-		BUG_ON(offset + len > PAGE_SIZE);
-		added_len = bio_add_pc_page(q, bio, page, len, offset);
-		if (unlikely(len != added_len)) {
-			OSD_DEBUG("bio_add_pc_page len(%d) != added_len(%d)\n",
-				  len, added_len);
-			bio_put(bio);
-			return ERR_PTR(-ENOMEM);
-		}
-	}
-
-	return bio;
-}
-
-int osd_req_write_sg_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, void **buff,
-	const struct osd_sg_entry *sglist, unsigned numentries)
-{
-	struct bio *bio = _create_sg_bios(or, buff, sglist, numentries);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-	osd_req_write_sg(or, obj, bio, sglist, numentries);
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_write_sg_kern);
-
-int osd_req_read_sg_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, void **buff,
-	const struct osd_sg_entry *sglist, unsigned numentries)
-{
-	struct bio *bio = _create_sg_bios(or, buff, sglist, numentries);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	osd_req_read_sg(or, obj, bio, sglist, numentries);
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_read_sg_kern);
-
-
-
-void osd_req_get_attributes(struct osd_request *or,
-	const struct osd_obj_id *obj)
-{
-	_osd_req_encode_common(or, OSD_ACT_GET_ATTRIBUTES, obj, 0, 0);
-}
-EXPORT_SYMBOL(osd_req_get_attributes);
-
-void osd_req_set_attributes(struct osd_request *or,
-	const struct osd_obj_id *obj)
-{
-	_osd_req_encode_common(or, OSD_ACT_SET_ATTRIBUTES, obj, 0, 0);
-}
-EXPORT_SYMBOL(osd_req_set_attributes);
-
-/*
- * Attributes List-mode
- */
-
-int osd_req_add_set_attr_list(struct osd_request *or,
-	const struct osd_attr *oa, unsigned nelem)
-{
-	unsigned total_bytes = or->set_attr.total_bytes;
-	void *attr_last;
-	int ret;
-
-	if (or->attributes_mode &&
-	    or->attributes_mode != OSD_CDB_GET_SET_ATTR_LISTS) {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-	or->attributes_mode = OSD_CDB_GET_SET_ATTR_LISTS;
-
-	if (!total_bytes) { /* first-time: allocate and put list header */
-		total_bytes = _osd_req_sizeof_alist_header(or);
-		ret = _alloc_set_attr_list(or, oa, nelem, total_bytes);
-		if (ret)
-			return ret;
-		_osd_req_set_alist_type(or, or->set_attr.buff,
-					OSD_ATTR_LIST_SET_RETRIEVE);
-	}
-	attr_last = or->set_attr.buff + total_bytes;
-
-	for (; nelem; --nelem) {
-		unsigned elem_size = _osd_req_alist_elem_size(or, oa->len);
-
-		total_bytes += elem_size;
-		if (unlikely(or->set_attr.alloc_size < total_bytes)) {
-			or->set_attr.total_bytes = total_bytes - elem_size;
-			ret = _alloc_set_attr_list(or, oa, nelem, total_bytes);
-			if (ret)
-				return ret;
-			attr_last =
-				or->set_attr.buff + or->set_attr.total_bytes;
-		}
-
-		_osd_req_alist_elem_encode(or, attr_last, oa);
-
-		attr_last += elem_size;
-		++oa;
-	}
-
-	or->set_attr.total_bytes = total_bytes;
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_add_set_attr_list);
-
-static int _req_append_segment(struct osd_request *or,
-	unsigned padding, struct _osd_req_data_segment *seg,
-	struct _osd_req_data_segment *last_seg, struct _osd_io_info *io)
-{
-	void *pad_buff;
-	int ret;
-
-	if (padding) {
-		/* check if we can just add it to last buffer */
-		if (last_seg &&
-		    (padding <= last_seg->alloc_size - last_seg->total_bytes))
-			pad_buff = last_seg->buff + last_seg->total_bytes;
-		else
-			pad_buff = io->pad_buff;
-
-		ret = blk_rq_map_kern(io->req->q, io->req, pad_buff, padding,
-				       GFP_KERNEL);
-		if (ret)
-			return ret;
-		io->total_bytes += padding;
-	}
-
-	ret = blk_rq_map_kern(io->req->q, io->req, seg->buff, seg->total_bytes,
-			       GFP_KERNEL);
-	if (ret)
-		return ret;
-
-	io->total_bytes += seg->total_bytes;
-	OSD_DEBUG("padding=%d buff=%p total_bytes=%d\n", padding, seg->buff,
-		  seg->total_bytes);
-	return 0;
-}
-
-static int _osd_req_finalize_set_attr_list(struct osd_request *or)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-	unsigned padding;
-	int ret;
-
-	if (!or->set_attr.total_bytes) {
-		cdbh->attrs_list.set_attr_offset = OSD_OFFSET_UNUSED;
-		return 0;
-	}
-
-	cdbh->attrs_list.set_attr_bytes = cpu_to_be32(or->set_attr.total_bytes);
-	cdbh->attrs_list.set_attr_offset =
-		osd_req_encode_offset(or, or->out.total_bytes, &padding);
-
-	ret = _req_append_segment(or, padding, &or->set_attr,
-				  or->out.last_seg, &or->out);
-	if (ret)
-		return ret;
-
-	or->out.last_seg = &or->set_attr;
-	return 0;
-}
-
-int osd_req_add_get_attr_list(struct osd_request *or,
-	const struct osd_attr *oa, unsigned nelem)
-{
-	unsigned total_bytes = or->enc_get_attr.total_bytes;
-	void *attr_last;
-	int ret;
-
-	if (or->attributes_mode &&
-	    or->attributes_mode != OSD_CDB_GET_SET_ATTR_LISTS) {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-	or->attributes_mode = OSD_CDB_GET_SET_ATTR_LISTS;
-
-	/* first time calc data-in list header size */
-	if (!or->get_attr.total_bytes)
-		or->get_attr.total_bytes = _osd_req_sizeof_alist_header(or);
-
-	/* calc data-out info */
-	if (!total_bytes) { /* first-time: allocate and put list header */
-		unsigned max_bytes;
-
-		total_bytes = _osd_req_sizeof_alist_header(or);
-		max_bytes = total_bytes +
-			nelem * sizeof(struct osd_attributes_list_attrid);
-		ret = _alloc_get_attr_desc(or, max_bytes);
-		if (ret)
-			return ret;
-
-		_osd_req_set_alist_type(or, or->enc_get_attr.buff,
-					OSD_ATTR_LIST_GET);
-	}
-	attr_last = or->enc_get_attr.buff + total_bytes;
-
-	for (; nelem; --nelem) {
-		struct osd_attributes_list_attrid *attrid;
-		const unsigned cur_size = sizeof(*attrid);
-
-		total_bytes += cur_size;
-		if (unlikely(or->enc_get_attr.alloc_size < total_bytes)) {
-			or->enc_get_attr.total_bytes = total_bytes - cur_size;
-			ret = _alloc_get_attr_desc(or,
-					total_bytes + nelem * sizeof(*attrid));
-			if (ret)
-				return ret;
-			attr_last = or->enc_get_attr.buff +
-				or->enc_get_attr.total_bytes;
-		}
-
-		attrid = attr_last;
-		attrid->attr_page = cpu_to_be32(oa->attr_page);
-		attrid->attr_id = cpu_to_be32(oa->attr_id);
-
-		attr_last += cur_size;
-
-		/* calc data-in size */
-		or->get_attr.total_bytes +=
-			_osd_req_alist_elem_size(or, oa->len);
-		++oa;
-	}
-
-	or->enc_get_attr.total_bytes = total_bytes;
-
-	OSD_DEBUG(
-	       "get_attr.total_bytes=%u(%u) enc_get_attr.total_bytes=%u(%zu)\n",
-	       or->get_attr.total_bytes,
-	       or->get_attr.total_bytes - _osd_req_sizeof_alist_header(or),
-	       or->enc_get_attr.total_bytes,
-	       (or->enc_get_attr.total_bytes - _osd_req_sizeof_alist_header(or))
-			/ sizeof(struct osd_attributes_list_attrid));
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_add_get_attr_list);
-
-static int _osd_req_finalize_get_attr_list(struct osd_request *or)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-	unsigned out_padding;
-	unsigned in_padding;
-	int ret;
-
-	if (!or->enc_get_attr.total_bytes) {
-		cdbh->attrs_list.get_attr_desc_offset = OSD_OFFSET_UNUSED;
-		cdbh->attrs_list.get_attr_offset = OSD_OFFSET_UNUSED;
-		return 0;
-	}
-
-	ret = _alloc_get_attr_list(or);
-	if (ret)
-		return ret;
-
-	/* The out-going buffer info update */
-	OSD_DEBUG("out-going\n");
-	cdbh->attrs_list.get_attr_desc_bytes =
-		cpu_to_be32(or->enc_get_attr.total_bytes);
-
-	cdbh->attrs_list.get_attr_desc_offset =
-		osd_req_encode_offset(or, or->out.total_bytes, &out_padding);
-
-	ret = _req_append_segment(or, out_padding, &or->enc_get_attr,
-				  or->out.last_seg, &or->out);
-	if (ret)
-		return ret;
-	or->out.last_seg = &or->enc_get_attr;
-
-	/* The incoming buffer info update */
-	OSD_DEBUG("in-coming\n");
-	cdbh->attrs_list.get_attr_alloc_length =
-		cpu_to_be32(or->get_attr.total_bytes);
-
-	cdbh->attrs_list.get_attr_offset =
-		osd_req_encode_offset(or, or->in.total_bytes, &in_padding);
-
-	ret = _req_append_segment(or, in_padding, &or->get_attr, NULL,
-				  &or->in);
-	if (ret)
-		return ret;
-	or->in.last_seg = &or->get_attr;
-
-	return 0;
-}
-
-int osd_req_decode_get_attr_list(struct osd_request *or,
-	struct osd_attr *oa, int *nelem, void **iterator)
-{
-	unsigned cur_bytes, returned_bytes;
-	int n;
-	const unsigned sizeof_attr_list = _osd_req_sizeof_alist_header(or);
-	void *cur_p;
-
-	if (!_osd_req_is_alist_type(or, or->get_attr.buff,
-				    OSD_ATTR_LIST_SET_RETRIEVE)) {
-		oa->attr_page = 0;
-		oa->attr_id = 0;
-		oa->val_ptr = NULL;
-		oa->len = 0;
-		*iterator = NULL;
-		return 0;
-	}
-
-	if (*iterator) {
-		BUG_ON((*iterator < or->get_attr.buff) ||
-		     (or->get_attr.buff + or->get_attr.alloc_size < *iterator));
-		cur_p = *iterator;
-		cur_bytes = (*iterator - or->get_attr.buff) - sizeof_attr_list;
-		returned_bytes = or->get_attr.total_bytes;
-	} else { /* first time decode the list header */
-		cur_bytes = sizeof_attr_list;
-		returned_bytes = _osd_req_alist_size(or, or->get_attr.buff) +
-					sizeof_attr_list;
-
-		cur_p = or->get_attr.buff + sizeof_attr_list;
-
-		if (returned_bytes > or->get_attr.alloc_size) {
-			OSD_DEBUG("target report: space was not big enough! "
-				  "Allocate=%u Needed=%u\n",
-				  or->get_attr.alloc_size,
-				  returned_bytes + sizeof_attr_list);
-
-			returned_bytes =
-				or->get_attr.alloc_size - sizeof_attr_list;
-		}
-		or->get_attr.total_bytes = returned_bytes;
-	}
-
-	for (n = 0; (n < *nelem) && (cur_bytes < returned_bytes); ++n) {
-		int inc = _osd_req_alist_elem_decode(or, cur_p, oa,
-						 returned_bytes - cur_bytes);
-
-		if (inc < 0) {
-			OSD_ERR("BAD FOOD from target. list not valid!"
-				"c=%d r=%d n=%d\n",
-				cur_bytes, returned_bytes, n);
-			oa->val_ptr = NULL;
-			cur_bytes = returned_bytes; /* break the caller loop */
-			break;
-		}
-
-		cur_bytes += inc;
-		cur_p += inc;
-		++oa;
-	}
-
-	*iterator = (returned_bytes - cur_bytes) ? cur_p : NULL;
-	*nelem = n;
-	return returned_bytes - cur_bytes;
-}
-EXPORT_SYMBOL(osd_req_decode_get_attr_list);
-
-/*
- * Attributes Page-mode
- */
-
-int osd_req_add_get_attr_page(struct osd_request *or,
-	u32 page_id, void *attar_page, unsigned max_page_len,
-	const struct osd_attr *set_one_attr)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-
-	if (or->attributes_mode &&
-	    or->attributes_mode != OSD_CDB_GET_ATTR_PAGE_SET_ONE) {
-		WARN_ON(1);
-		return -EINVAL;
-	}
-	or->attributes_mode = OSD_CDB_GET_ATTR_PAGE_SET_ONE;
-
-	or->get_attr.buff = attar_page;
-	or->get_attr.total_bytes = max_page_len;
-
-	cdbh->attrs_page.get_attr_page = cpu_to_be32(page_id);
-	cdbh->attrs_page.get_attr_alloc_length = cpu_to_be32(max_page_len);
-
-	if (!set_one_attr || !set_one_attr->attr_page)
-		return 0; /* The set is optional */
-
-	or->set_attr.buff = set_one_attr->val_ptr;
-	or->set_attr.total_bytes = set_one_attr->len;
-
-	cdbh->attrs_page.set_attr_page = cpu_to_be32(set_one_attr->attr_page);
-	cdbh->attrs_page.set_attr_id = cpu_to_be32(set_one_attr->attr_id);
-	cdbh->attrs_page.set_attr_length = cpu_to_be32(set_one_attr->len);
-	return 0;
-}
-EXPORT_SYMBOL(osd_req_add_get_attr_page);
-
-static int _osd_req_finalize_attr_page(struct osd_request *or)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-	unsigned in_padding, out_padding;
-	int ret;
-
-	/* returned page */
-	cdbh->attrs_page.get_attr_offset =
-		osd_req_encode_offset(or, or->in.total_bytes, &in_padding);
-
-	ret = _req_append_segment(or, in_padding, &or->get_attr, NULL,
-				  &or->in);
-	if (ret)
-		return ret;
-
-	if (or->set_attr.total_bytes == 0)
-		return 0;
-
-	/* set one value */
-	cdbh->attrs_page.set_attr_offset =
-		osd_req_encode_offset(or, or->out.total_bytes, &out_padding);
-
-	ret = _req_append_segment(or, out_padding, &or->set_attr, NULL,
-				  &or->out);
-	return ret;
-}
-
-static inline void osd_sec_parms_set_out_offset(bool is_v1,
-	struct osd_security_parameters *sec_parms, osd_cdb_offset offset)
-{
-	if (is_v1)
-		sec_parms->v1.data_out_integrity_check_offset = offset;
-	else
-		sec_parms->v2.data_out_integrity_check_offset = offset;
-}
-
-static inline void osd_sec_parms_set_in_offset(bool is_v1,
-	struct osd_security_parameters *sec_parms, osd_cdb_offset offset)
-{
-	if (is_v1)
-		sec_parms->v1.data_in_integrity_check_offset = offset;
-	else
-		sec_parms->v2.data_in_integrity_check_offset = offset;
-}
-
-static int _osd_req_finalize_data_integrity(struct osd_request *or,
-	bool has_in, bool has_out, struct bio *out_data_bio, u64 out_data_bytes,
-	const u8 *cap_key)
-{
-	struct osd_security_parameters *sec_parms = _osd_req_sec_params(or);
-	int ret;
-
-	if (!osd_is_sec_alldata(sec_parms))
-		return 0;
-
-	if (has_out) {
-		struct _osd_req_data_segment seg = {
-			.buff = &or->out_data_integ,
-			.total_bytes = sizeof(or->out_data_integ),
-		};
-		unsigned pad;
-
-		or->out_data_integ.data_bytes = cpu_to_be64(out_data_bytes);
-		or->out_data_integ.set_attributes_bytes = cpu_to_be64(
-			or->set_attr.total_bytes);
-		or->out_data_integ.get_attributes_bytes = cpu_to_be64(
-			or->enc_get_attr.total_bytes);
-
-		osd_sec_parms_set_out_offset(osd_req_is_ver1(or), sec_parms,
-			osd_req_encode_offset(or, or->out.total_bytes, &pad));
-
-		ret = _req_append_segment(or, pad, &seg, or->out.last_seg,
-					  &or->out);
-		if (ret)
-			return ret;
-		or->out.last_seg = NULL;
-
-		/* they are now all chained to request sign them all together */
-		osd_sec_sign_data(&or->out_data_integ, out_data_bio,
-				  cap_key);
-	}
-
-	if (has_in) {
-		struct _osd_req_data_segment seg = {
-			.buff = &or->in_data_integ,
-			.total_bytes = sizeof(or->in_data_integ),
-		};
-		unsigned pad;
-
-		osd_sec_parms_set_in_offset(osd_req_is_ver1(or), sec_parms,
-			osd_req_encode_offset(or, or->in.total_bytes, &pad));
-
-		ret = _req_append_segment(or, pad, &seg, or->in.last_seg,
-					  &or->in);
-		if (ret)
-			return ret;
-
-		or->in.last_seg = NULL;
-	}
-
-	return 0;
-}
-
-/*
- * osd_finalize_request and helpers
- */
-static struct request *_make_request(struct request_queue *q, bool has_write,
-			      struct _osd_io_info *oii)
-{
-	struct request *req;
-	struct bio *bio = oii->bio;
-	int ret;
-
-	req = blk_get_request(q, has_write ? REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
-			0);
-	if (IS_ERR(req))
-		return req;
-
-	for_each_bio(bio) {
-		struct bio *bounce_bio = bio;
-
-		ret = blk_rq_append_bio(req, &bounce_bio);
-		if (ret)
-			return ERR_PTR(ret);
-	}
-
-	return req;
-}
-
-static int _init_blk_request(struct osd_request *or,
-	bool has_in, bool has_out)
-{
-	struct scsi_device *scsi_device = or->osd_dev->scsi_device;
-	struct request_queue *q = scsi_device->request_queue;
-	struct request *req;
-	int ret;
-
-	req = _make_request(q, has_out, has_out ? &or->out : &or->in);
-	if (IS_ERR(req)) {
-		ret = PTR_ERR(req);
-		goto out;
-	}
-
-	or->request = req;
-	req->rq_flags |= RQF_QUIET;
-
-	req->timeout = or->timeout;
-	scsi_req(req)->retries = or->retries;
-
-	if (has_out) {
-		or->out.req = req;
-		if (has_in) {
-			/* allocate bidi request */
-			req = _make_request(q, false, &or->in);
-			if (IS_ERR(req)) {
-				OSD_DEBUG("blk_get_request for bidi failed\n");
-				ret = PTR_ERR(req);
-				goto out;
-			}
-			or->in.req = or->request->next_rq = req;
-		}
-	} else if (has_in)
-		or->in.req = req;
-
-	ret = 0;
-out:
-	OSD_DEBUG("or=%p has_in=%d has_out=%d => %d, %p\n",
-			or, has_in, has_out, ret, or->request);
-	return ret;
-}
-
-int osd_finalize_request(struct osd_request *or,
-	u8 options, const void *cap, const u8 *cap_key)
-{
-	struct osd_cdb_head *cdbh = osd_cdb_head(&or->cdb);
-	bool has_in, has_out;
-	 /* Save for data_integrity without the cdb_continuation */
-	struct bio *out_data_bio = or->out.bio;
-	u64 out_data_bytes = or->out.total_bytes;
-	int ret;
-
-	if (options & OSD_REQ_FUA)
-		cdbh->options |= OSD_CDB_FUA;
-
-	if (options & OSD_REQ_DPO)
-		cdbh->options |= OSD_CDB_DPO;
-
-	if (options & OSD_REQ_BYPASS_TIMESTAMPS)
-		cdbh->timestamp_control = OSD_CDB_BYPASS_TIMESTAMPS;
-
-	osd_set_caps(&or->cdb, cap);
-
-	has_in = or->in.bio || or->get_attr.total_bytes;
-	has_out = or->out.bio || or->cdb_cont.total_bytes ||
-		or->set_attr.total_bytes || or->enc_get_attr.total_bytes;
-
-	ret = _osd_req_finalize_cdb_cont(or, cap_key);
-	if (ret) {
-		OSD_DEBUG("_osd_req_finalize_cdb_cont failed\n");
-		return ret;
-	}
-	ret = _init_blk_request(or, has_in, has_out);
-	if (ret) {
-		OSD_DEBUG("_init_blk_request failed\n");
-		return ret;
-	}
-
-	or->out.pad_buff = sg_out_pad_buffer;
-	or->in.pad_buff = sg_in_pad_buffer;
-
-	if (!or->attributes_mode)
-		or->attributes_mode = OSD_CDB_GET_SET_ATTR_LISTS;
-	cdbh->command_specific_options |= or->attributes_mode;
-	if (or->attributes_mode == OSD_CDB_GET_ATTR_PAGE_SET_ONE) {
-		ret = _osd_req_finalize_attr_page(or);
-		if (ret) {
-			OSD_DEBUG("_osd_req_finalize_attr_page failed\n");
-			return ret;
-		}
-	} else {
-		/* TODO: I think that for the GET_ATTR command these 2 should
-		 * be reversed to keep them in execution order (for embedded
-		 * targets with low memory footprint)
-		 */
-		ret = _osd_req_finalize_set_attr_list(or);
-		if (ret) {
-			OSD_DEBUG("_osd_req_finalize_set_attr_list failed\n");
-			return ret;
-		}
-
-		ret = _osd_req_finalize_get_attr_list(or);
-		if (ret) {
-			OSD_DEBUG("_osd_req_finalize_get_attr_list failed\n");
-			return ret;
-		}
-	}
-
-	ret = _osd_req_finalize_data_integrity(or, has_in, has_out,
-					       out_data_bio, out_data_bytes,
-					       cap_key);
-	if (ret)
-		return ret;
-
-	osd_sec_sign_cdb(&or->cdb, cap_key);
-
-	scsi_req(or->request)->cmd = or->cdb.buff;
-	scsi_req(or->request)->cmd_len = _osd_req_cdb_len(or);
-
-	return 0;
-}
-EXPORT_SYMBOL(osd_finalize_request);
-
-static bool _is_osd_security_code(int code)
-{
-	return	(code == osd_security_audit_value_frozen) ||
-		(code == osd_security_working_key_frozen) ||
-		(code == osd_nonce_not_unique) ||
-		(code == osd_nonce_timestamp_out_of_range) ||
-		(code == osd_invalid_dataout_buffer_integrity_check_value);
-}
-
-#define OSD_SENSE_PRINT1(fmt, a...) \
-	do { \
-		if (__cur_sense_need_output) \
-			OSD_ERR(fmt, ##a); \
-	} while (0)
-
-#define OSD_SENSE_PRINT2(fmt, a...) OSD_SENSE_PRINT1("    " fmt, ##a)
-
-int osd_req_decode_sense_full(struct osd_request *or,
-	struct osd_sense_info *osi, bool silent,
-	struct osd_obj_id *bad_obj_list __unused, int max_obj __unused,
-	struct osd_attr *bad_attr_list, int max_attr)
-{
-	int sense_len, original_sense_len;
-	struct osd_sense_info local_osi;
-	struct scsi_sense_descriptor_based *ssdb;
-	void *cur_descriptor;
-#if (CONFIG_SCSI_OSD_DPRINT_SENSE == 0)
-	const bool __cur_sense_need_output = false;
-#else
-	bool __cur_sense_need_output = !silent;
-#endif
-	int ret;
-
-	if (likely(!or->req_errors))
-		return 0;
-
-	osi = osi ? : &local_osi;
-	memset(osi, 0, sizeof(*osi));
-
-	ssdb = (typeof(ssdb))or->sense;
-	sense_len = or->sense_len;
-	if ((sense_len < (int)sizeof(*ssdb) || !ssdb->sense_key)) {
-		OSD_ERR("Block-layer returned error(0x%x) but "
-			"sense_len(%u) || key(%d) is empty\n",
-			or->req_errors, sense_len, ssdb->sense_key);
-		goto analyze;
-	}
-
-	if ((ssdb->response_code != 0x72) && (ssdb->response_code != 0x73)) {
-		OSD_ERR("Unrecognized scsi sense: rcode=%x length=%d\n",
-			ssdb->response_code, sense_len);
-		goto analyze;
-	}
-
-	osi->key = ssdb->sense_key;
-	osi->additional_code = be16_to_cpu(ssdb->additional_sense_code);
-	original_sense_len = ssdb->additional_sense_length + 8;
-
-#if (CONFIG_SCSI_OSD_DPRINT_SENSE == 1)
-	if (__cur_sense_need_output)
-		__cur_sense_need_output = (osi->key > scsi_sk_recovered_error);
-#endif
-	OSD_SENSE_PRINT1("Main Sense information key=0x%x length(%d, %d) "
-			"additional_code=0x%x async_error=%d errors=0x%x\n",
-			osi->key, original_sense_len, sense_len,
-			osi->additional_code, or->async_error,
-			or->req_errors);
-
-	if (original_sense_len < sense_len)
-		sense_len = original_sense_len;
-
-	cur_descriptor = ssdb->ssd;
-	sense_len -= sizeof(*ssdb);
-	while (sense_len > 0) {
-		struct scsi_sense_descriptor *ssd = cur_descriptor;
-		int cur_len = ssd->additional_length + 2;
-
-		sense_len -= cur_len;
-
-		if (sense_len < 0)
-			break; /* sense was truncated */
-
-		switch (ssd->descriptor_type) {
-		case scsi_sense_information:
-		case scsi_sense_command_specific_information:
-		{
-			struct scsi_sense_command_specific_data_descriptor
-				*sscd = cur_descriptor;
-
-			osi->command_info =
-				get_unaligned_be64(&sscd->information) ;
-			OSD_SENSE_PRINT2(
-				"command_specific_information 0x%llx \n",
-				_LLU(osi->command_info));
-			break;
-		}
-		case scsi_sense_key_specific:
-		{
-			struct scsi_sense_key_specific_data_descriptor
-				*ssks = cur_descriptor;
-
-			osi->sense_info = get_unaligned_be16(&ssks->value);
-			OSD_SENSE_PRINT2(
-				"sense_key_specific_information %u"
-				"sksv_cd_bpv_bp (0x%x)\n",
-				osi->sense_info, ssks->sksv_cd_bpv_bp);
-			break;
-		}
-		case osd_sense_object_identification:
-		{ /*FIXME: Keep first not last, Store in array*/
-			struct osd_sense_identification_data_descriptor
-				*osidd = cur_descriptor;
-
-			osi->not_initiated_command_functions =
-				le32_to_cpu(osidd->not_initiated_functions);
-			osi->completed_command_functions =
-				le32_to_cpu(osidd->completed_functions);
-			osi->obj.partition = be64_to_cpu(osidd->partition_id);
-			osi->obj.id = be64_to_cpu(osidd->object_id);
-			OSD_SENSE_PRINT2(
-				"object_identification pid=0x%llx oid=0x%llx\n",
-				_LLU(osi->obj.partition), _LLU(osi->obj.id));
-			OSD_SENSE_PRINT2(
-				"not_initiated_bits(%x) "
-				"completed_command_bits(%x)\n",
-				osi->not_initiated_command_functions,
-				osi->completed_command_functions);
-			break;
-		}
-		case osd_sense_response_integrity_check:
-		{
-			struct osd_sense_response_integrity_check_descriptor
-				*d = cur_descriptor;
-			/* 2nibbles+space+ASCII */
-			char dump[sizeof(d->integrity_check_value) * 4 + 2];
-
-			hex_dump_to_buffer(d->integrity_check_value,
-					sizeof(d->integrity_check_value),
-					32, 1, dump, sizeof(dump), true);
-			OSD_SENSE_PRINT2("response_integrity [%s]\n", dump);
-		}
-		case osd_sense_attribute_identification:
-		{
-			struct osd_sense_attributes_data_descriptor
-				*osadd = cur_descriptor;
-			unsigned len = min(cur_len, sense_len);
-			struct osd_sense_attr *pattr = osadd->sense_attrs;
-
-			while (len >= sizeof(*pattr)) {
-				u32 attr_page = be32_to_cpu(pattr->attr_page);
-				u32 attr_id = be32_to_cpu(pattr->attr_id);
-
-				if (!osi->attr.attr_page) {
-					osi->attr.attr_page = attr_page;
-					osi->attr.attr_id = attr_id;
-				}
-
-				if (bad_attr_list && max_attr) {
-					bad_attr_list->attr_page = attr_page;
-					bad_attr_list->attr_id = attr_id;
-					bad_attr_list++;
-					max_attr--;
-				}
-
-				len -= sizeof(*pattr);
-				OSD_SENSE_PRINT2(
-					"osd_sense_attribute_identification"
-					"attr_page=0x%x attr_id=0x%x\n",
-					attr_page, attr_id);
-			}
-		}
-		/*These are not legal for OSD*/
-		case scsi_sense_field_replaceable_unit:
-			OSD_SENSE_PRINT2("scsi_sense_field_replaceable_unit\n");
-			break;
-		case scsi_sense_stream_commands:
-			OSD_SENSE_PRINT2("scsi_sense_stream_commands\n");
-			break;
-		case scsi_sense_block_commands:
-			OSD_SENSE_PRINT2("scsi_sense_block_commands\n");
-			break;
-		case scsi_sense_ata_return:
-			OSD_SENSE_PRINT2("scsi_sense_ata_return\n");
-			break;
-		default:
-			if (ssd->descriptor_type <= scsi_sense_Reserved_last)
-				OSD_SENSE_PRINT2(
-					"scsi_sense Reserved descriptor (0x%x)",
-					ssd->descriptor_type);
-			else
-				OSD_SENSE_PRINT2(
-					"scsi_sense Vendor descriptor (0x%x)",
-					ssd->descriptor_type);
-		}
-
-		cur_descriptor += cur_len;
-	}
-
-analyze:
-	if (!osi->key) {
-		/* scsi sense is Empty, the request was never issued to target
-		 * linux return code might tell us what happened.
-		 */
-		if (or->async_error == BLK_STS_RESOURCE)
-			osi->osd_err_pri = OSD_ERR_PRI_RESOURCE;
-		else
-			osi->osd_err_pri = OSD_ERR_PRI_UNREACHABLE;
-		ret = or->async_error;
-	} else if (osi->key <= scsi_sk_recovered_error) {
-		osi->osd_err_pri = 0;
-		ret = 0;
-	} else if (osi->additional_code == scsi_invalid_field_in_cdb) {
-		if (osi->cdb_field_offset == OSD_CFO_STARTING_BYTE) {
-			osi->osd_err_pri = OSD_ERR_PRI_CLEAR_PAGES;
-			ret = -EFAULT; /* caller should recover from this */
-		} else if (osi->cdb_field_offset == OSD_CFO_OBJECT_ID) {
-			osi->osd_err_pri = OSD_ERR_PRI_NOT_FOUND;
-			ret = -ENOENT;
-		} else if (osi->cdb_field_offset == OSD_CFO_PERMISSIONS) {
-			osi->osd_err_pri = OSD_ERR_PRI_NO_ACCESS;
-			ret = -EACCES;
-		} else {
-			osi->osd_err_pri = OSD_ERR_PRI_BAD_CRED;
-			ret = -EINVAL;
-		}
-	} else if (osi->additional_code == osd_quota_error) {
-		osi->osd_err_pri = OSD_ERR_PRI_NO_SPACE;
-		ret = -ENOSPC;
-	} else if (_is_osd_security_code(osi->additional_code)) {
-		osi->osd_err_pri = OSD_ERR_PRI_BAD_CRED;
-		ret = -EINVAL;
-	} else {
-		osi->osd_err_pri = OSD_ERR_PRI_EIO;
-		ret = -EIO;
-	}
-
-	if (!or->out.residual)
-		or->out.residual = or->out.total_bytes;
-	if (!or->in.residual)
-		or->in.residual = or->in.total_bytes;
-
-	return ret;
-}
-EXPORT_SYMBOL(osd_req_decode_sense_full);
-
-/*
- * Implementation of osd_sec.h API
- * TODO: Move to a separate osd_sec.c file at a later stage.
- */
-
-enum { OSD_SEC_CAP_V1_ALL_CAPS =
-	OSD_SEC_CAP_APPEND | OSD_SEC_CAP_OBJ_MGMT | OSD_SEC_CAP_REMOVE   |
-	OSD_SEC_CAP_CREATE | OSD_SEC_CAP_SET_ATTR | OSD_SEC_CAP_GET_ATTR |
-	OSD_SEC_CAP_WRITE  | OSD_SEC_CAP_READ     | OSD_SEC_CAP_POL_SEC  |
-	OSD_SEC_CAP_GLOBAL | OSD_SEC_CAP_DEV_MGMT
-};
-
-enum { OSD_SEC_CAP_V2_ALL_CAPS =
-	OSD_SEC_CAP_V1_ALL_CAPS | OSD_SEC_CAP_QUERY | OSD_SEC_CAP_M_OBJECT
-};
-
-void osd_sec_init_nosec_doall_caps(void *caps,
-	const struct osd_obj_id *obj, bool is_collection, const bool is_v1)
-{
-	struct osd_capability *cap = caps;
-	u8 type;
-	u8 descriptor_type;
-
-	if (likely(obj->id)) {
-		if (unlikely(is_collection)) {
-			type = OSD_SEC_OBJ_COLLECTION;
-			descriptor_type = is_v1 ? OSD_SEC_OBJ_DESC_OBJ :
-						  OSD_SEC_OBJ_DESC_COL;
-		} else {
-			type = OSD_SEC_OBJ_USER;
-			descriptor_type = OSD_SEC_OBJ_DESC_OBJ;
-		}
-		WARN_ON(!obj->partition);
-	} else {
-		type = obj->partition ? OSD_SEC_OBJ_PARTITION :
-					OSD_SEC_OBJ_ROOT;
-		descriptor_type = OSD_SEC_OBJ_DESC_PAR;
-	}
-
-	memset(cap, 0, sizeof(*cap));
-
-	cap->h.format = OSD_SEC_CAP_FORMAT_VER1;
-	cap->h.integrity_algorithm__key_version = 0; /* MAKE_BYTE(0, 0); */
-	cap->h.security_method = OSD_SEC_NOSEC;
-/*	cap->expiration_time;
-	cap->AUDIT[30-10];
-	cap->discriminator[42-30];
-	cap->object_created_time; */
-	cap->h.object_type = type;
-	osd_sec_set_caps(&cap->h, OSD_SEC_CAP_V1_ALL_CAPS);
-	cap->h.object_descriptor_type = descriptor_type;
-	cap->od.obj_desc.policy_access_tag = 0;
-	cap->od.obj_desc.allowed_partition_id = cpu_to_be64(obj->partition);
-	cap->od.obj_desc.allowed_object_id = cpu_to_be64(obj->id);
-}
-EXPORT_SYMBOL(osd_sec_init_nosec_doall_caps);
-
-/* FIXME: Extract version from caps pointer.
- *        Also Pete's target only supports caps from OSDv1 for now
- */
-void osd_set_caps(struct osd_cdb *cdb, const void *caps)
-{
-	/* NOTE: They start at same address */
-	memcpy(&cdb->v1.caps, caps, OSDv1_CAP_LEN);
-}
-
-bool osd_is_sec_alldata(struct osd_security_parameters *sec_parms __unused)
-{
-	return false;
-}
-
-void osd_sec_sign_cdb(struct osd_cdb *ocdb __unused, const u8 *cap_key __unused)
-{
-}
-
-void osd_sec_sign_data(void *data_integ __unused,
-		       struct bio *bio __unused, const u8 *cap_key __unused)
-{
-}
-
-/*
- * Declared in osd_protocol.h
- * 4.12.5 Data-In and Data-Out buffer offsets
- * byte offset = mantissa * (2^(exponent+8))
- * Returns the smallest allowed encoded offset that contains given @offset
- * The actual encoded offset returned is @offset + *@padding.
- */
-osd_cdb_offset __osd_encode_offset(
-	u64 offset, unsigned *padding, int min_shift, int max_shift)
-{
-	u64 try_offset = -1, mod, align;
-	osd_cdb_offset be32_offset;
-	int shift;
-
-	*padding = 0;
-	if (!offset)
-		return 0;
-
-	for (shift = min_shift; shift < max_shift; ++shift) {
-		try_offset = offset >> shift;
-		if (try_offset < (1 << OSD_OFFSET_MAX_BITS))
-			break;
-	}
-
-	BUG_ON(shift == max_shift);
-
-	align = 1 << shift;
-	mod = offset & (align - 1);
-	if (mod) {
-		*padding = align - mod;
-		try_offset += 1;
-	}
-
-	try_offset |= ((shift - 8) & 0xf) << 28;
-	be32_offset = cpu_to_be32((u32)try_offset);
-
-	OSD_DEBUG("offset=%llu mantissa=%llu exp=%d encoded=%x pad=%d\n",
-		 _LLU(offset), _LLU(try_offset & 0x0FFFFFFF), shift,
-		 be32_offset, *padding);
-	return be32_offset;
-}
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
deleted file mode 100644
index eaf36ccf58db..000000000000
--- a/drivers/scsi/osd/osd_uld.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * osd_uld.c - OSD Upper Layer Driver
- *
- * A Linux driver module that registers as a SCSI ULD and probes
- * for OSD type SCSI devices.
- * It's main function is to export osd devices to in-kernel users like
- * osdfs and pNFS-objects-LD. It also provides one ioctl for running
- * in Kernel tests.
- *
- * Copyright (C) 2008 Panasas Inc.  All rights reserved.
- *
- * Authors:
- *   Boaz Harrosh <ooo@electrozaur.com>
- *   Benny Halevy <bhalevy@panasas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/namei.h>
-#include <linux/cdev.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/idr.h>
-#include <linux/major.h>
-#include <linux/file.h>
-#include <linux/slab.h>
-
-#include <scsi/scsi.h>
-#include <scsi/scsi_driver.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_ioctl.h>
-
-#include <scsi/osd_initiator.h>
-#include <scsi/osd_sec.h>
-
-#include "osd_debug.h"
-
-#ifndef TYPE_OSD
-#  define TYPE_OSD 0x11
-#endif
-
-#ifndef SCSI_OSD_MAJOR
-#  define SCSI_OSD_MAJOR 260
-#endif
-#define SCSI_OSD_MAX_MINOR MINORMASK
-
-static const char osd_name[] = "osd";
-static const char *osd_version_string = "open-osd 0.2.1";
-
-MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
-MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR);
-MODULE_ALIAS_SCSI_DEVICE(TYPE_OSD);
-
-struct osd_uld_device {
-	int minor;
-	struct device class_dev;
-	struct cdev cdev;
-	struct osd_dev od;
-	struct osd_dev_info odi;
-	struct gendisk *disk;
-};
-
-struct osd_dev_handle {
-	struct osd_dev od;
-	struct file *file;
-	struct osd_uld_device *oud;
-} ;
-
-static DEFINE_IDA(osd_minor_ida);
-
-/*
- * scsi sysfs attribute operations
- */
-static ssize_t osdname_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct osd_uld_device *ould = container_of(dev, struct osd_uld_device,
-						   class_dev);
-	return sprintf(buf, "%s\n", ould->odi.osdname);
-}
-static DEVICE_ATTR_RO(osdname);
-
-static ssize_t systemid_show(struct device *dev, struct device_attribute *attr,
-			    char *buf)
-{
-	struct osd_uld_device *ould = container_of(dev, struct osd_uld_device,
-						   class_dev);
-
-	memcpy(buf, ould->odi.systemid, ould->odi.systemid_len);
-	return ould->odi.systemid_len;
-}
-static DEVICE_ATTR_RO(systemid);
-
-static struct attribute *osd_uld_attrs[] = {
-	&dev_attr_osdname.attr,
-	&dev_attr_systemid.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(osd_uld);
-
-static struct class osd_uld_class = {
-	.owner		= THIS_MODULE,
-	.name		= "scsi_osd",
-	.dev_groups	= osd_uld_groups,
-};
-
-/*
- * Char Device operations
- */
-
-static int osd_uld_open(struct inode *inode, struct file *file)
-{
-	struct osd_uld_device *oud = container_of(inode->i_cdev,
-					struct osd_uld_device, cdev);
-
-	get_device(&oud->class_dev);
-	/* cache osd_uld_device on file handle */
-	file->private_data = oud;
-	OSD_DEBUG("osd_uld_open %p\n", oud);
-	return 0;
-}
-
-static int osd_uld_release(struct inode *inode, struct file *file)
-{
-	struct osd_uld_device *oud = file->private_data;
-
-	OSD_DEBUG("osd_uld_release %p\n", file->private_data);
-	file->private_data = NULL;
-	put_device(&oud->class_dev);
-	return 0;
-}
-
-/* FIXME: Only one vector for now */
-unsigned g_test_ioctl;
-do_test_fn *g_do_test;
-
-int osduld_register_test(unsigned ioctl, do_test_fn *do_test)
-{
-	if (g_test_ioctl)
-		return -EINVAL;
-
-	g_test_ioctl = ioctl;
-	g_do_test = do_test;
-	return 0;
-}
-EXPORT_SYMBOL(osduld_register_test);
-
-void osduld_unregister_test(unsigned ioctl)
-{
-	if (ioctl == g_test_ioctl) {
-		g_test_ioctl = 0;
-		g_do_test = NULL;
-	}
-}
-EXPORT_SYMBOL(osduld_unregister_test);
-
-static do_test_fn *_find_ioctl(unsigned cmd)
-{
-	if (g_test_ioctl == cmd)
-		return g_do_test;
-	else
-		return NULL;
-}
-
-static long osd_uld_ioctl(struct file *file, unsigned int cmd,
-	unsigned long arg)
-{
-	struct osd_uld_device *oud = file->private_data;
-	int ret;
-	do_test_fn *do_test;
-
-	do_test = _find_ioctl(cmd);
-	if (do_test)
-		ret = do_test(&oud->od, cmd, arg);
-	else {
-		OSD_ERR("Unknown ioctl %d: osd_uld_device=%p\n", cmd, oud);
-		ret = -ENOIOCTLCMD;
-	}
-	return ret;
-}
-
-static const struct file_operations osd_fops = {
-	.owner          = THIS_MODULE,
-	.open           = osd_uld_open,
-	.release        = osd_uld_release,
-	.unlocked_ioctl = osd_uld_ioctl,
-	.llseek		= noop_llseek,
-};
-
-struct osd_dev *osduld_path_lookup(const char *name)
-{
-	struct osd_uld_device *oud;
-	struct osd_dev_handle *odh;
-	struct file *file;
-	int error;
-
-	if (!name || !*name) {
-		OSD_ERR("Mount with !path || !*path\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	odh = kzalloc(sizeof(*odh), GFP_KERNEL);
-	if (unlikely(!odh))
-		return ERR_PTR(-ENOMEM);
-
-	file = filp_open(name, O_RDWR, 0);
-	if (IS_ERR(file)) {
-		error = PTR_ERR(file);
-		goto free_od;
-	}
-
-	if (file->f_op != &osd_fops){
-		error = -EINVAL;
-		goto close_file;
-	}
-
-	oud = file->private_data;
-
-	odh->od = oud->od;
-	odh->file = file;
-	odh->oud = oud;
-
-	return &odh->od;
-
-close_file:
-	fput(file);
-free_od:
-	kfree(odh);
-	return ERR_PTR(error);
-}
-EXPORT_SYMBOL(osduld_path_lookup);
-
-static inline bool _the_same_or_null(const u8 *a1, unsigned a1_len,
-				     const u8 *a2, unsigned a2_len)
-{
-	if (!a2_len) /* User string is Empty means don't care */
-		return true;
-
-	if (a1_len != a2_len)
-		return false;
-
-	return 0 == memcmp(a1, a2, a1_len);
-}
-
-static int _match_odi(struct device *dev, const void *find_data)
-{
-	struct osd_uld_device *oud = container_of(dev, struct osd_uld_device,
-						  class_dev);
-	const struct osd_dev_info *odi = find_data;
-
-	if (_the_same_or_null(oud->odi.systemid, oud->odi.systemid_len,
-			      odi->systemid, odi->systemid_len) &&
-	    _the_same_or_null(oud->odi.osdname, oud->odi.osdname_len,
-			      odi->osdname, odi->osdname_len)) {
-		OSD_DEBUG("found device sysid_len=%d osdname=%d\n",
-			  odi->systemid_len, odi->osdname_len);
-		return 1;
-	} else {
-		return 0;
-	}
-}
-
-/* osduld_info_lookup - Loop through all devices, return the requested osd_dev.
- *
- * if @odi->systemid_len and/or @odi->osdname_len are zero, they act as a don't
- * care. .e.g if they're both zero /dev/osd0 is returned.
- */
-struct osd_dev *osduld_info_lookup(const struct osd_dev_info *odi)
-{
-	struct device *dev = class_find_device(&osd_uld_class, NULL, odi, _match_odi);
-	if (likely(dev)) {
-		struct osd_dev_handle *odh = kzalloc(sizeof(*odh), GFP_KERNEL);
-		struct osd_uld_device *oud = container_of(dev,
-			struct osd_uld_device, class_dev);
-
-		if (unlikely(!odh)) {
-			put_device(dev);
-			return ERR_PTR(-ENOMEM);
-		}
-
-		odh->od = oud->od;
-		odh->oud = oud;
-
-		return &odh->od;
-	}
-
-	return ERR_PTR(-ENODEV);
-}
-EXPORT_SYMBOL(osduld_info_lookup);
-
-void osduld_put_device(struct osd_dev *od)
-{
-	if (od && !IS_ERR(od)) {
-		struct osd_dev_handle *odh =
-				container_of(od, struct osd_dev_handle, od);
-		struct osd_uld_device *oud = odh->oud;
-
-		BUG_ON(od->scsi_device != oud->od.scsi_device);
-
-		/* If scsi has released the device (logout), and exofs has last
-		 * reference on oud it will be freed by above osd_uld_release
-		 * within fput below. But this will oops in cdev_release which
-		 * is called after the fops->release. A get_/put_ pair makes
-		 * sure we have a cdev for the duration of fput
-		 */
-		if (odh->file) {
-			get_device(&oud->class_dev);
-			fput(odh->file);
-		}
-		put_device(&oud->class_dev);
-		kfree(odh);
-	}
-}
-EXPORT_SYMBOL(osduld_put_device);
-
-const struct osd_dev_info *osduld_device_info(struct osd_dev *od)
-{
-	struct osd_dev_handle *odh =
-				container_of(od, struct osd_dev_handle, od);
-	return &odh->oud->odi;
-}
-EXPORT_SYMBOL(osduld_device_info);
-
-bool osduld_device_same(struct osd_dev *od, const struct osd_dev_info *odi)
-{
-	struct osd_dev_handle *odh =
-				container_of(od, struct osd_dev_handle, od);
-	struct osd_uld_device *oud = odh->oud;
-
-	return (oud->odi.systemid_len == odi->systemid_len) &&
-		_the_same_or_null(oud->odi.systemid, oud->odi.systemid_len,
-				 odi->systemid, odi->systemid_len) &&
-		(oud->odi.osdname_len == odi->osdname_len) &&
-		_the_same_or_null(oud->odi.osdname, oud->odi.osdname_len,
-				  odi->osdname, odi->osdname_len);
-}
-EXPORT_SYMBOL(osduld_device_same);
-
-/*
- * Scsi Device operations
- */
-
-static int __detect_osd(struct osd_uld_device *oud)
-{
-	struct scsi_device *scsi_device = oud->od.scsi_device;
-	struct scsi_sense_hdr sense_hdr;
-	char caps[OSD_CAP_LEN];
-	int error;
-
-	/* sending a test_unit_ready as first command seems to be needed
-	 * by some targets
-	 */
-	OSD_DEBUG("start scsi_test_unit_ready %p %p %p\n",
-			oud, scsi_device, scsi_device->request_queue);
-	error = scsi_test_unit_ready(scsi_device, 10*HZ, 5, &sense_hdr);
-	if (error)
-		OSD_ERR("warning: scsi_test_unit_ready failed\n");
-
-	osd_sec_init_nosec_doall_caps(caps, &osd_root_object, false, true);
-	if (osd_auto_detect_ver(&oud->od, caps, &oud->odi))
-		return -ENODEV;
-
-	return 0;
-}
-
-static void __remove(struct device *dev)
-{
-	struct osd_uld_device *oud = container_of(dev, struct osd_uld_device,
-						  class_dev);
-	struct scsi_device *scsi_device = oud->od.scsi_device;
-
-	kfree(oud->odi.osdname);
-
-	osd_dev_fini(&oud->od);
-	scsi_device_put(scsi_device);
-
-	OSD_INFO("osd_remove %s\n",
-		 oud->disk ? oud->disk->disk_name : NULL);
-
-	if (oud->disk)
-		put_disk(oud->disk);
-
-	kfree(oud);
-}
-
-static int osd_probe(struct device *dev)
-{
-	struct scsi_device *scsi_device = to_scsi_device(dev);
-	struct gendisk *disk;
-	struct osd_uld_device *oud;
-	int minor;
-	int error;
-
-	if (scsi_device->type != TYPE_OSD)
-		return -ENODEV;
-
-	minor = ida_alloc_max(&osd_minor_ida, SCSI_OSD_MAX_MINOR, GFP_KERNEL);
-	if (minor == -ENOSPC)
-		return -EBUSY;
-	if (minor < 0)
-		return -ENODEV;
-
-	error = -ENOMEM;
-	oud = kzalloc(sizeof(*oud), GFP_KERNEL);
-	if (NULL == oud)
-		goto err_retract_minor;
-
-	/* class device member */
-	device_initialize(&oud->class_dev);
-	dev_set_drvdata(dev, oud);
-	oud->minor = minor;
-	oud->class_dev.devt = MKDEV(SCSI_OSD_MAJOR, oud->minor);
-	oud->class_dev.class = &osd_uld_class;
-	oud->class_dev.parent = dev;
-	oud->class_dev.release = __remove;
-
-	/* hold one more reference to the scsi_device that will get released
-	 * in __release, in case a logout is happening while fs is mounted
-	 */
-	if (scsi_device_get(scsi_device))
-		goto err_retract_minor;
-	osd_dev_init(&oud->od, scsi_device);
-
-	/* allocate a disk and set it up */
-	/* FIXME: do we need this since sg has already done that */
-	disk = alloc_disk(1);
-	if (!disk) {
-		OSD_ERR("alloc_disk failed\n");
-		goto err_free_osd;
-	}
-	disk->major = SCSI_OSD_MAJOR;
-	disk->first_minor = oud->minor;
-	sprintf(disk->disk_name, "osd%d", oud->minor);
-	oud->disk = disk;
-
-	/* Detect the OSD Version */
-	error = __detect_osd(oud);
-	if (error) {
-		OSD_ERR("osd detection failed, non-compatible OSD device\n");
-		goto err_free_osd;
-	}
-
-	/* init the char-device for communication with user-mode */
-	cdev_init(&oud->cdev, &osd_fops);
-	oud->cdev.owner = THIS_MODULE;
-
-	error = dev_set_name(&oud->class_dev, "%s", disk->disk_name);
-	if (error) {
-		OSD_ERR("dev_set_name failed => %d\n", error);
-		goto err_free_osd;
-	}
-
-	error = cdev_device_add(&oud->cdev, &oud->class_dev);
-	if (error) {
-		OSD_ERR("device_register failed => %d\n", error);
-		goto err_free_osd;
-	}
-
-	OSD_INFO("osd_probe %s\n", disk->disk_name);
-	return 0;
-
-err_free_osd:
-	put_device(&oud->class_dev);
-err_retract_minor:
-	ida_free(&osd_minor_ida, minor);
-	return error;
-}
-
-static int osd_remove(struct device *dev)
-{
-	struct scsi_device *scsi_device = to_scsi_device(dev);
-	struct osd_uld_device *oud = dev_get_drvdata(dev);
-
-	if (oud->od.scsi_device != scsi_device) {
-		OSD_ERR("Half cooked osd-device %p, || %p!=%p",
-			dev, oud->od.scsi_device, scsi_device);
-	}
-
-	cdev_device_del(&oud->cdev, &oud->class_dev);
-	ida_free(&osd_minor_ida, oud->minor);
-	put_device(&oud->class_dev);
-
-	return 0;
-}
-
-/*
- * Global driver and scsi registration
- */
-
-static struct scsi_driver osd_driver = {
-	.gendrv = {
-		.name		= osd_name,
-		.owner		= THIS_MODULE,
-		.probe		= osd_probe,
-		.remove		= osd_remove,
-	}
-};
-
-static int __init osd_uld_init(void)
-{
-	int err;
-
-	err = class_register(&osd_uld_class);
-	if (err) {
-		OSD_ERR("Unable to register sysfs class => %d\n", err);
-		return err;
-	}
-
-	err = register_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0),
-				     SCSI_OSD_MAX_MINOR, osd_name);
-	if (err) {
-		OSD_ERR("Unable to register major %d for osd ULD => %d\n",
-			SCSI_OSD_MAJOR, err);
-		goto err_out;
-	}
-
-	err = scsi_register_driver(&osd_driver.gendrv);
-	if (err) {
-		OSD_ERR("scsi_register_driver failed => %d\n", err);
-		goto err_out_chrdev;
-	}
-
-	OSD_INFO("LOADED %s\n", osd_version_string);
-	return 0;
-
-err_out_chrdev:
-	unregister_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0), SCSI_OSD_MAX_MINOR);
-err_out:
-	class_unregister(&osd_uld_class);
-	return err;
-}
-
-static void __exit osd_uld_exit(void)
-{
-	scsi_unregister_driver(&osd_driver.gendrv);
-	unregister_chrdev_region(MKDEV(SCSI_OSD_MAJOR, 0), SCSI_OSD_MAX_MINOR);
-	class_unregister(&osd_uld_class);
-	OSD_INFO("UNLOADED %s\n", osd_version_string);
-}
-
-module_init(osd_uld_init);
-module_exit(osd_uld_exit);
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
deleted file mode 100644
index 86a569d008b2..000000000000
--- a/include/scsi/osd_initiator.h
+++ /dev/null
@@ -1,511 +0,0 @@
-/*
- * osd_initiator.h - OSD initiator API definition
- *
- * Copyright (C) 2008 Panasas Inc.  All rights reserved.
- *
- * Authors:
- *   Boaz Harrosh <ooo@electrozaur.com>
- *   Benny Halevy <bhalevy@panasas.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- *
- */
-#ifndef __OSD_INITIATOR_H__
-#define __OSD_INITIATOR_H__
-
-#include <scsi/osd_protocol.h>
-#include <scsi/osd_types.h>
-
-#include <linux/blkdev.h>
-#include <scsi/scsi_device.h>
-
-/* Note: "NI" in comments below means "Not Implemented yet" */
-
-/* Configure of code:
- * #undef if you *don't* want OSD v1 support in runtime.
- * If #defined the initiator will dynamically configure to encode OSD v1
- * CDB's if the target is detected to be OSD v1 only.
- * OSD v2 only commands, options, and attributes will be ignored if target
- * is v1 only.
- * If #defined will result in bigger/slower code (OK Slower maybe not)
- * Q: Should this be CONFIG_SCSI_OSD_VER1_SUPPORT and set from Kconfig?
- */
-#define OSD_VER1_SUPPORT y
-
-enum osd_std_version {
-	OSD_VER_NONE = 0,
-	OSD_VER1 = 1,
-	OSD_VER2 = 2,
-};
-
-/*
- * Object-based Storage Device.
- * This object represents an OSD device.
- * It is not a full linux device in any way. It is only
- * a place to hang resources associated with a Linux
- * request Q and some default properties.
- */
-struct osd_dev {
-	struct scsi_device *scsi_device;
-	unsigned def_timeout;
-
-#ifdef OSD_VER1_SUPPORT
-	enum osd_std_version version;
-#endif
-};
-
-/* Unique Identification of an OSD device */
-struct osd_dev_info {
-	unsigned systemid_len;
-	u8 systemid[OSD_SYSTEMID_LEN];
-	unsigned osdname_len;
-	u8 *osdname;
-};
-
-/* Retrieve/return osd_dev(s) for use by Kernel clients
- * Use IS_ERR/ERR_PTR on returned "osd_dev *".
- */
-struct osd_dev *osduld_path_lookup(const char *dev_name);
-struct osd_dev *osduld_info_lookup(const struct osd_dev_info *odi);
-void osduld_put_device(struct osd_dev *od);
-
-const struct osd_dev_info *osduld_device_info(struct osd_dev *od);
-bool osduld_device_same(struct osd_dev *od, const struct osd_dev_info *odi);
-
-/* Add/remove test ioctls from external modules */
-typedef int (do_test_fn)(struct osd_dev *od, unsigned cmd, unsigned long arg);
-int osduld_register_test(unsigned ioctl, do_test_fn *do_test);
-void osduld_unregister_test(unsigned ioctl);
-
-/* These are called by uld at probe time */
-void osd_dev_init(struct osd_dev *od, struct scsi_device *scsi_device);
-void osd_dev_fini(struct osd_dev *od);
-
-/**
- * osd_auto_detect_ver - Detect the OSD version, return Unique Identification
- *
- * @od:     OSD target lun handle
- * @caps:   Capabilities authorizing OSD root read attributes access
- * @odi:    Retrieved information uniquely identifying the osd target lun
- *          Note: odi->osdname must be kfreed by caller.
- *
- * Auto detects the OSD version of the OSD target and sets the @od
- * accordingly. Meanwhile also returns the "system id" and "osd name" root
- * attributes which uniquely identify the OSD target. This member is usually
- * called by the ULD. ULD users should call osduld_device_info().
- * This rutine allocates osd requests and memory at GFP_KERNEL level and might
- * sleep.
- */
-int osd_auto_detect_ver(struct osd_dev *od,
-	void *caps, struct osd_dev_info *odi);
-
-static inline struct request_queue *osd_request_queue(struct osd_dev *od)
-{
-	return od->scsi_device->request_queue;
-}
-
-/* we might want to use function vector in the future */
-static inline void osd_dev_set_ver(struct osd_dev *od, enum osd_std_version v)
-{
-#ifdef OSD_VER1_SUPPORT
-	od->version = v;
-#endif
-}
-
-static inline bool osd_dev_is_ver1(struct osd_dev *od)
-{
-#ifdef OSD_VER1_SUPPORT
-	return od->version == OSD_VER1;
-#else
-	return false;
-#endif
-}
-
-struct osd_request;
-typedef void (osd_req_done_fn)(struct osd_request *or, void *private);
-
-struct osd_request {
-	struct osd_cdb cdb;
-	struct osd_data_out_integrity_info out_data_integ;
-	struct osd_data_in_integrity_info in_data_integ;
-
-	struct osd_dev *osd_dev;
-	struct request *request;
-
-	struct _osd_req_data_segment {
-		void *buff;
-		unsigned alloc_size; /* 0 here means: don't call kfree */
-		unsigned total_bytes;
-	} cdb_cont, set_attr, enc_get_attr, get_attr;
-
-	struct _osd_io_info {
-		struct bio *bio;
-		u64 total_bytes;
-		u64 residual;
-		struct request *req;
-		struct _osd_req_data_segment *last_seg;
-		u8 *pad_buff;
-	} out, in;
-
-	unsigned timeout;
-	unsigned retries;
-	unsigned sense_len;
-	u8 sense[OSD_MAX_SENSE_LEN];
-	enum osd_attributes_mode attributes_mode;
-
-	osd_req_done_fn *async_done;
-	void *async_private;
-	blk_status_t async_error;
-	int req_errors;
-};
-
-static inline bool osd_req_is_ver1(struct osd_request *or)
-{
-	return osd_dev_is_ver1(or->osd_dev);
-}
-
-/*
- * How to use the osd library:
- *
- * osd_start_request
- *	Allocates a request.
- *
- * osd_req_*
- *	Call one of, to encode the desired operation.
- *
- * osd_add_{get,set}_attr
- *	Optionally add attributes to the CDB, list or page mode.
- *
- * osd_finalize_request
- *	Computes final data out/in offsets and signs the request,
- *	making it ready for execution.
- *
- * osd_execute_request
- *	May be called to execute it through the block layer. Other wise submit
- *	the associated block request in some other way.
- *
- * After execution:
- * osd_req_decode_sense
- *	Decodes sense information to verify execution results.
- *
- * osd_req_decode_get_attr
- *	Retrieve osd_add_get_attr_list() values if used.
- *
- * osd_end_request
- *	Must be called to deallocate the request.
- */
-
-/**
- * osd_start_request - Allocate and initialize an osd_request
- *
- * @osd_dev:    OSD device that holds the scsi-device and default values
- *              that the request is associated with.
- *
- * Allocate osd_request and initialize all members to the
- * default/initial state.
- */
-struct osd_request *osd_start_request(struct osd_dev *od);
-
-enum osd_req_options {
-	OSD_REQ_FUA = 0x08,	/* Force Unit Access */
-	OSD_REQ_DPO = 0x10,	/* Disable Page Out */
-
-	OSD_REQ_BYPASS_TIMESTAMPS = 0x80,
-};
-
-/**
- * osd_finalize_request - Sign request and prepare request for execution
- *
- * @or:		osd_request to prepare
- * @options:	combination of osd_req_options bit flags or 0.
- * @cap:	A Pointer to an OSD_CAP_LEN bytes buffer that is received from
- *              The security manager as capabilities for this cdb.
- * @cap_key:	The cryptographic key used to sign the cdb/data. Can be null
- *              if NOSEC is used.
- *
- * The actual request and bios are only allocated here, so are the get_attr
- * buffers that will receive the returned attributes. Copy's @cap to cdb.
- * Sign the cdb/data with @cap_key.
- */
-int osd_finalize_request(struct osd_request *or,
-	u8 options, const void *cap, const u8 *cap_key);
-
-/**
- * osd_execute_request - Execute the request synchronously through block-layer
- *
- * @or:		osd_request to Executed
- *
- * Calls blk_execute_rq to q the command and waits for completion.
- */
-int osd_execute_request(struct osd_request *or);
-
-/**
- * osd_execute_request_async - Execute the request without waitting.
- *
- * @or:                      - osd_request to Executed
- * @done: (Optional)         - Called at end of execution
- * @private:                 - Will be passed to @done function
- *
- * Calls blk_execute_rq_nowait to queue the command. When execution is done
- * optionally calls @done with @private as parameter. @or->async_error will
- * have the return code
- */
-int osd_execute_request_async(struct osd_request *or,
-	osd_req_done_fn *done, void *private);
-
-/**
- * osd_req_decode_sense_full - Decode sense information after execution.
- *
- * @or:           - osd_request to examine
- * @osi           - Receives a more detailed error report information (optional).
- * @silent        - Do not print to dmsg (Even if enabled)
- * @bad_obj_list  - Some commands act on multiple objects. Failed objects will
- *                  be received here (optional)
- * @max_obj       - Size of @bad_obj_list.
- * @bad_attr_list - List of failing attributes (optional)
- * @max_attr      - Size of @bad_attr_list.
- *
- * After execution, osd_request results are analyzed using this function. The
- * return code is the final disposition on the error. So it is possible that a
- * CHECK_CONDITION was returned from target but this will return NO_ERROR, for
- * example on recovered errors. All parameters are optional if caller does
- * not need any returned information.
- * Note: This function will also dump the error to dmsg according to settings
- * of the SCSI_OSD_DPRINT_SENSE Kconfig value. Set @silent if you know the
- * command would routinely fail, to not spam the dmsg file.
- */
-
-/**
- * osd_err_priority - osd categorized return codes in ascending severity.
- *
- * The categories are borrowed from the pnfs_osd_errno enum.
- * See comments for translated Linux codes returned by osd_req_decode_sense.
- */
-enum osd_err_priority {
-	OSD_ERR_PRI_NO_ERROR	= 0,
-	/* Recoverable, caller should clear_highpage() all pages */
-	OSD_ERR_PRI_CLEAR_PAGES = 1, /* -EFAULT */
-	OSD_ERR_PRI_RESOURCE	= 2, /* -ENOMEM */
-	OSD_ERR_PRI_BAD_CRED	= 3, /* -EINVAL */
-	OSD_ERR_PRI_NO_ACCESS	= 4, /* -EACCES */
-	OSD_ERR_PRI_UNREACHABLE	= 5, /* any other */
-	OSD_ERR_PRI_NOT_FOUND	= 6, /* -ENOENT */
-	OSD_ERR_PRI_NO_SPACE	= 7, /* -ENOSPC */
-	OSD_ERR_PRI_EIO		= 8, /* -EIO    */
-};
-
-struct osd_sense_info {
-	enum osd_err_priority osd_err_pri;
-
-	int key;		/* one of enum scsi_sense_keys */
-	int additional_code ;	/* enum osd_additional_sense_codes */
-	union { /* Sense specific information */
-		u16 sense_info;
-		u16 cdb_field_offset; 	/* scsi_invalid_field_in_cdb */
-	};
-	union { /* Command specific information */
-		u64 command_info;
-	};
-
-	u32 not_initiated_command_functions; /* osd_command_functions_bits */
-	u32 completed_command_functions; /* osd_command_functions_bits */
-	struct osd_obj_id obj;
-	struct osd_attr attr;
-};
-
-int osd_req_decode_sense_full(struct osd_request *or,
-	struct osd_sense_info *osi, bool silent,
-	struct osd_obj_id *bad_obj_list, int max_obj,
-	struct osd_attr *bad_attr_list, int max_attr);
-
-static inline int osd_req_decode_sense(struct osd_request *or,
-	struct osd_sense_info *osi)
-{
-	return osd_req_decode_sense_full(or, osi, false, NULL, 0, NULL, 0);
-}
-
-/**
- * osd_end_request - return osd_request to free store
- *
- * @or:		osd_request to free
- *
- * Deallocate all osd_request resources (struct req's, BIOs, buffers, etc.)
- */
-void osd_end_request(struct osd_request *or);
-
-/*
- * CDB Encoding
- *
- * Note: call only one of the following methods.
- */
-
-/*
- * Device commands
- */
-void osd_req_set_master_seed_xchg(struct osd_request *or, ...);/* NI */
-void osd_req_set_master_key(struct osd_request *or, ...);/* NI */
-
-void osd_req_format(struct osd_request *or, u64 tot_capacity);
-
-/* list all partitions
- * @list header must be initialized to zero on first run.
- *
- * Call osd_is_obj_list_done() to find if we got the complete list.
- */
-int osd_req_list_dev_partitions(struct osd_request *or,
-	osd_id initial_id, struct osd_obj_id_list *list, unsigned nelem);
-
-void osd_req_flush_obsd(struct osd_request *or,
-	enum osd_options_flush_scope_values);
-
-void osd_req_perform_scsi_command(struct osd_request *or,
-	const u8 *cdb, ...);/* NI */
-void osd_req_task_management(struct osd_request *or, ...);/* NI */
-
-/*
- * Partition commands
- */
-void osd_req_create_partition(struct osd_request *or, osd_id partition);
-void osd_req_remove_partition(struct osd_request *or, osd_id partition);
-
-void osd_req_set_partition_key(struct osd_request *or,
-	osd_id partition, u8 new_key_id[OSD_CRYPTO_KEYID_SIZE],
-	u8 seed[OSD_CRYPTO_SEED_SIZE]);/* NI */
-
-/* list all collections in the partition
- * @list header must be init to zero on first run.
- *
- * Call osd_is_obj_list_done() to find if we got the complete list.
- */
-int osd_req_list_partition_collections(struct osd_request *or,
-	osd_id partition, osd_id initial_id, struct osd_obj_id_list *list,
-	unsigned nelem);
-
-/* list all objects in the partition
- * @list header must be init to zero on first run.
- *
- * Call osd_is_obj_list_done() to find if we got the complete list.
- */
-int osd_req_list_partition_objects(struct osd_request *or,
-	osd_id partition, osd_id initial_id, struct osd_obj_id_list *list,
-	unsigned nelem);
-
-void osd_req_flush_partition(struct osd_request *or,
-	osd_id partition, enum osd_options_flush_scope_values);
-
-/*
- * Collection commands
- */
-void osd_req_create_collection(struct osd_request *or,
-	const struct osd_obj_id *);/* NI */
-void osd_req_remove_collection(struct osd_request *or,
-	const struct osd_obj_id *);/* NI */
-
-/* list all objects in the collection */
-int osd_req_list_collection_objects(struct osd_request *or,
-	const struct osd_obj_id *, osd_id initial_id,
-	struct osd_obj_id_list *list, unsigned nelem);
-
-/* V2 only filtered list of objects in the collection */
-void osd_req_query(struct osd_request *or, ...);/* NI */
-
-void osd_req_flush_collection(struct osd_request *or,
-	const struct osd_obj_id *, enum osd_options_flush_scope_values);
-
-void osd_req_get_member_attrs(struct osd_request *or, ...);/* V2-only NI */
-void osd_req_set_member_attrs(struct osd_request *or, ...);/* V2-only NI */
-
-/*
- * Object commands
- */
-void osd_req_create_object(struct osd_request *or, struct osd_obj_id *);
-void osd_req_remove_object(struct osd_request *or, struct osd_obj_id *);
-
-void osd_req_write(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
-int osd_req_write_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-void osd_req_append(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_out);/* NI */
-void osd_req_create_write(struct osd_request *or,
-	const struct osd_obj_id *, struct bio *data_out, u64 offset);/* NI */
-void osd_req_clear(struct osd_request *or,
-	const struct osd_obj_id *, u64 offset, u64 len);/* NI */
-void osd_req_punch(struct osd_request *or,
-	const struct osd_obj_id *, u64 offset, u64 len);/* V2-only NI */
-
-void osd_req_flush_object(struct osd_request *or,
-	const struct osd_obj_id *, enum osd_options_flush_scope_values,
-	/*V2*/ u64 offset, /*V2*/ u64 len);
-
-void osd_req_read(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, struct bio *bio, u64 len);
-int osd_req_read_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-/* Scatter/Gather write/read commands */
-int osd_req_write_sg(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio,
-	const struct osd_sg_entry *sglist, unsigned numentries);
-int osd_req_read_sg(struct osd_request *or,
-	const struct osd_obj_id *obj, struct bio *bio,
-	const struct osd_sg_entry *sglist, unsigned numentries);
-int osd_req_write_sg_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, void **buff,
-	const struct osd_sg_entry *sglist, unsigned numentries);
-int osd_req_read_sg_kern(struct osd_request *or,
-	const struct osd_obj_id *obj, void **buff,
-	const struct osd_sg_entry *sglist, unsigned numentries);
-
-/*
- * Root/Partition/Collection/Object Attributes commands
- */
-
-/* get before set */
-void osd_req_get_attributes(struct osd_request *or, const struct osd_obj_id *);
-
-/* set before get */
-void osd_req_set_attributes(struct osd_request *or, const struct osd_obj_id *);
-
-/*
- * Attributes appended to most commands
- */
-
-/* Attributes List mode (or V2 CDB) */
-  /*
-   * TODO: In ver2 if at finalize time only one attr was set and no gets,
-   * then the Attributes CDB mode is used automatically to save IO.
-   */
-
-/* set a list of attributes. */
-int osd_req_add_set_attr_list(struct osd_request *or,
-	const struct osd_attr *, unsigned nelem);
-
-/* get a list of attributes */
-int osd_req_add_get_attr_list(struct osd_request *or,
-	const struct osd_attr *, unsigned nelem);
-
-/*
- * Attributes list decoding
- * Must be called after osd_request.request was executed
- * It is called in a loop to decode the returned get_attr
- * (see osd_add_get_attr)
- */
-int osd_req_decode_get_attr_list(struct osd_request *or,
-	struct osd_attr *, int *nelem, void **iterator);
-
-/* Attributes Page mode */
-
-/*
- * Read an attribute page and optionally set one attribute
- *
- * Retrieves the attribute page directly to a user buffer.
- * @attr_page_data shall stay valid until end of execution.
- * See osd_attributes.h for common page structures
- */
-int osd_req_add_get_attr_page(struct osd_request *or,
-	u32 page_id, void *attr_page_data, unsigned max_page_len,
-	const struct osd_attr *set_one);
-
-#endif /* __OSD_LIB_H__ */
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
deleted file mode 100644
index 7a8d2cd30328..000000000000
--- a/include/scsi/osd_ore.h
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright (C) 2011
- * Boaz Harrosh <ooo@electrozaur.com>
- *
- * Public Declarations of the ORE API
- *
- * This file is part of the ORE (Object Raid Engine) library.
- *
- * ORE is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation. (GPL v2)
- *
- * ORE is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with the ORE; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __ORE_H__
-#define __ORE_H__
-
-#include <scsi/osd_initiator.h>
-#include <scsi/osd_attributes.h>
-#include <scsi/osd_sec.h>
-#include <linux/pnfs_osd_xdr.h>
-#include <linux/bug.h>
-
-struct ore_comp {
-	struct osd_obj_id	obj;
-	u8			cred[OSD_CAP_LEN];
-};
-
-struct ore_layout {
-	/* Our way of looking at the data_map */
-	enum pnfs_osd_raid_algorithm4
-		 raid_algorithm;
-	unsigned stripe_unit;
-	unsigned mirrors_p1;
-
-	unsigned group_width;
-	unsigned parity;
-	u64	 group_depth;
-	unsigned group_count;
-
-	/* Cached often needed calculations filled in by
-	 * ore_verify_layout
-	 */
-	unsigned long max_io_length;	/* Max length that should be passed to
-					 * ore_get_rw_state
-					 */
-};
-
-struct ore_dev {
-	struct osd_dev *od;
-};
-
-struct ore_components {
-	unsigned	first_dev;		/* First logical device no    */
-	unsigned	numdevs;		/* Num of devices in array    */
-	/* If @single_comp == EC_SINGLE_COMP, @comps points to a single
-	 * component. else there are @numdevs components
-	 */
-	enum EC_COMP_USAGE {
-		EC_SINGLE_COMP = 0, EC_MULTPLE_COMPS = 0xffffffff
-	}		single_comp;
-	struct ore_comp	*comps;
-
-	/* Array of pointers to ore_dev-* . User will usually have these pointed
-	 * too a bigger struct which contain an "ore_dev ored" member and use
-	 * container_of(oc->ods[i], struct foo_dev, ored) to access the bigger
-	 * structure.
-	 */
-	struct ore_dev	**ods;
-};
-
-/* ore_comp_dev Recievies a logical device index */
-static inline struct osd_dev *ore_comp_dev(
-	const struct ore_components *oc, unsigned i)
-{
-	BUG_ON((i < oc->first_dev) || (oc->first_dev + oc->numdevs <= i));
-	return oc->ods[i - oc->first_dev]->od;
-}
-
-static inline void ore_comp_set_dev(
-	struct ore_components *oc, unsigned i, struct osd_dev *od)
-{
-	oc->ods[i - oc->first_dev]->od = od;
-}
-
-struct ore_striping_info {
-	u64 offset;
-	u64 obj_offset;
-	u64 length;
-	u64 first_stripe_start; /* only used in raid writes */
-	u64 M; /* for truncate */
-	unsigned bytes_in_stripe;
-	unsigned dev;
-	unsigned par_dev;
-	unsigned unit_off;
-	unsigned cur_pg;
-	unsigned cur_comp;
-	unsigned maxdevUnits;
-};
-
-struct ore_io_state;
-typedef void (*ore_io_done_fn)(struct ore_io_state *ios, void *private);
-struct _ore_r4w_op {
-	/* @Priv given here is passed ios->private */
-	struct page * (*get_page)(void *priv, u64 page_index, bool *uptodate);
-	void (*put_page)(void *priv, struct page *page);
-};
-
-struct ore_io_state {
-	struct kref		kref;
-	struct ore_striping_info si;
-
-	void			*private;
-	ore_io_done_fn	done;
-
-	struct ore_layout	*layout;
-	struct ore_components	*oc;
-
-	/* Global read/write IO*/
-	loff_t			offset;
-	unsigned long		length;
-	void			*kern_buff;
-
-	struct page		**pages;
-	unsigned		nr_pages;
-	unsigned		pgbase;
-	unsigned		pages_consumed;
-
-	/* Attributes */
-	unsigned		in_attr_len;
-	struct osd_attr		*in_attr;
-	unsigned		out_attr_len;
-	struct osd_attr		*out_attr;
-
-	bool			reading;
-
-	/* House keeping of Parity pages */
-	bool			extra_part_alloc;
-	struct page		**parity_pages;
-	unsigned		max_par_pages;
-	unsigned		cur_par_page;
-	unsigned		sgs_per_dev;
-	struct __stripe_pages_2d *sp2d;
-	struct ore_io_state	 *ios_read_4_write;
-	const struct _ore_r4w_op *r4w;
-
-	/* Variable array of size numdevs */
-	unsigned numdevs;
-	struct ore_per_dev_state {
-		struct osd_request *or;
-		struct bio *bio;
-		loff_t offset;
-		unsigned length;
-		unsigned last_sgs_total;
-		unsigned dev;
-		struct osd_sg_entry *sglist;
-		unsigned cur_sg;
-	} per_dev[];
-};
-
-static inline unsigned ore_io_state_size(unsigned numdevs)
-{
-	return sizeof(struct ore_io_state) +
-		sizeof(struct ore_per_dev_state) * numdevs;
-}
-
-/* ore.c */
-int ore_verify_layout(unsigned total_comps, struct ore_layout *layout);
-void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
-			  u64 length, struct ore_striping_info *si);
-int ore_get_rw_state(struct ore_layout *layout, struct ore_components *comps,
-		     bool is_reading, u64 offset, u64 length,
-		     struct ore_io_state **ios);
-int ore_get_io_state(struct ore_layout *layout, struct ore_components *comps,
-		     struct ore_io_state **ios);
-void ore_put_io_state(struct ore_io_state *ios);
-
-typedef void (*ore_on_dev_error)(struct ore_io_state *ios, struct ore_dev *od,
-	unsigned dev_index, enum osd_err_priority oep,
-	u64 dev_offset, u64  dev_len);
-int ore_check_io(struct ore_io_state *ios, ore_on_dev_error rep);
-
-int ore_create(struct ore_io_state *ios);
-int ore_remove(struct ore_io_state *ios);
-int ore_write(struct ore_io_state *ios);
-int ore_read(struct ore_io_state *ios);
-int ore_truncate(struct ore_layout *layout, struct ore_components *comps,
-		 u64 size);
-
-int extract_attr_from_ios(struct ore_io_state *ios, struct osd_attr *attr);
-
-extern const struct osd_attr g_attr_logical_length;
-
-#endif
-- 
cgit v1.2.3


From ae3d56d81507c33024ba7c1eae2ef433aa9bc0d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Jan 2019 09:33:07 +0100
Subject: scsi: remove bidirectional command support

No real need for bidi support once the OSD code is gone.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/cxgbi/libcxgbi.c      | 13 +++---
 drivers/scsi/iscsi_tcp.c           |  9 +---
 drivers/scsi/libiscsi.c            | 64 +++-------------------------
 drivers/scsi/libiscsi_tcp.c        |  8 ++--
 drivers/scsi/scsi_debug.c          | 85 ++++----------------------------------
 drivers/scsi/scsi_error.c          |  3 --
 drivers/scsi/scsi_lib.c            | 60 ++-------------------------
 drivers/scsi/virtio_scsi.c         | 14 ++-----
 drivers/target/loopback/tcm_loop.c | 15 -------
 drivers/usb/storage/uas.c          | 11 +----
 include/scsi/scsi_cmnd.h           | 19 +--------
 include/scsi/scsi_eh.h             |  1 -
 12 files changed, 34 insertions(+), 268 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index 75f876409fb9..4466ae5c9a74 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -1211,7 +1211,7 @@ scmd_get_params(struct scsi_cmnd *sc, struct scatterlist **sgl,
 		unsigned int *sgcnt, unsigned int *dlen,
 		unsigned int prot)
 {
-	struct scsi_data_buffer *sdb = prot ? scsi_prot(sc) : scsi_out(sc);
+	struct scsi_data_buffer *sdb = prot ? scsi_prot(sc) : &sc->sdb;
 
 	*sgl = sdb->table.sgl;
 	*sgcnt = sdb->table.nents;
@@ -1427,8 +1427,7 @@ static void task_release_itt(struct iscsi_task *task, itt_t hdr_itt)
 	log_debug(1 << CXGBI_DBG_DDP,
 		  "cdev 0x%p, task 0x%p, release tag 0x%x.\n",
 		  cdev, task, tag);
-	if (sc &&
-	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE) &&
+	if (sc && sc->sc_data_direction == DMA_FROM_DEVICE &&
 	    cxgbi_ppm_is_ddp_tag(ppm, tag)) {
 		struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
 		struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
@@ -1460,9 +1459,7 @@ static int task_reserve_itt(struct iscsi_task *task, itt_t *hdr_itt)
 	u32 tag = 0;
 	int err = -EINVAL;
 
-	if (sc &&
-	    (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_FROM_DEVICE)
-	) {
+	if (sc && sc->sc_data_direction == DMA_FROM_DEVICE) {
 		struct cxgbi_task_data *tdata = iscsi_task_cxgbi_data(task);
 		struct cxgbi_task_tag_info *ttinfo = &tdata->ttinfo;
 
@@ -1896,7 +1893,7 @@ int cxgbi_conn_alloc_pdu(struct iscsi_task *task, u8 opcode)
 	if (SKB_MAX_HEAD(cdev->skb_tx_rsvd) > (512 * MAX_SKB_FRAGS) &&
 	    (opcode == ISCSI_OP_SCSI_DATA_OUT ||
 	     (opcode == ISCSI_OP_SCSI_CMD &&
-	      (scsi_bidi_cmnd(sc) || sc->sc_data_direction == DMA_TO_DEVICE))))
+	      sc->sc_data_direction == DMA_TO_DEVICE)))
 		/* data could goes into skb head */
 		headroom += min_t(unsigned int,
 				SKB_MAX_HEAD(cdev->skb_tx_rsvd),
@@ -1971,7 +1968,7 @@ int cxgbi_conn_init_pdu(struct iscsi_task *task, unsigned int offset,
 		return 0;
 
 	if (task->sc) {
-		struct scsi_data_buffer *sdb = scsi_out(task->sc);
+		struct scsi_data_buffer *sdb = &task->sc->sdb;
 		struct scatterlist *sg = NULL;
 		int err;
 
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index cae6368ebb98..ed3debce2819 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -518,7 +518,7 @@ static int iscsi_sw_tcp_pdu_init(struct iscsi_task *task,
 	if (!task->sc)
 		iscsi_sw_tcp_send_linear_data_prep(conn, task->data, count);
 	else {
-		struct scsi_data_buffer *sdb = scsi_out(task->sc);
+		struct scsi_data_buffer *sdb = &task->sc->sdb;
 
 		err = iscsi_sw_tcp_send_data_prep(conn, sdb->table.sgl,
 						  sdb->table.nents, offset,
@@ -952,12 +952,6 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param)
 	return 0;
 }
 
-static int iscsi_sw_tcp_slave_alloc(struct scsi_device *sdev)
-{
-	blk_queue_flag_set(QUEUE_FLAG_BIDI, sdev->request_queue);
-	return 0;
-}
-
 static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)
 {
 	struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host);
@@ -985,7 +979,6 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
 	.eh_device_reset_handler= iscsi_eh_device_reset,
 	.eh_target_reset_handler = iscsi_eh_recover_target,
 	.dma_boundary		= PAGE_SIZE - 1,
-	.slave_alloc            = iscsi_sw_tcp_slave_alloc,
 	.slave_configure        = iscsi_sw_tcp_slave_configure,
 	.target_alloc		= iscsi_target_alloc,
 	.proc_name		= "iscsi_tcp",
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index b8d325ce8754..bca3a8636c27 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -228,32 +228,6 @@ static int iscsi_prep_ecdb_ahs(struct iscsi_task *task)
 	return 0;
 }
 
-static int iscsi_prep_bidi_ahs(struct iscsi_task *task)
-{
-	struct scsi_cmnd *sc = task->sc;
-	struct iscsi_rlength_ahdr *rlen_ahdr;
-	int rc;
-
-	rlen_ahdr = iscsi_next_hdr(task);
-	rc = iscsi_add_hdr(task, sizeof(*rlen_ahdr));
-	if (rc)
-		return rc;
-
-	rlen_ahdr->ahslength =
-		cpu_to_be16(sizeof(rlen_ahdr->read_length) +
-						  sizeof(rlen_ahdr->reserved));
-	rlen_ahdr->ahstype = ISCSI_AHSTYPE_RLENGTH;
-	rlen_ahdr->reserved = 0;
-	rlen_ahdr->read_length = cpu_to_be32(scsi_in(sc)->length);
-
-	ISCSI_DBG_SESSION(task->conn->session,
-			  "bidi-in rlen_ahdr->read_length(%d) "
-		          "rlen_ahdr->ahslength(%d)\n",
-		          be32_to_cpu(rlen_ahdr->read_length),
-		          be16_to_cpu(rlen_ahdr->ahslength));
-	return 0;
-}
-
 /**
  * iscsi_check_tmf_restrictions - check if a task is affected by TMF
  * @task: iscsi task
@@ -392,13 +366,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 	memcpy(hdr->cdb, sc->cmnd, cmd_len);
 
 	task->imm_count = 0;
-	if (scsi_bidi_cmnd(sc)) {
-		hdr->flags |= ISCSI_FLAG_CMD_READ;
-		rc = iscsi_prep_bidi_ahs(task);
-		if (rc)
-			return rc;
-	}
-
 	if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL)
 		task->protected = true;
 
@@ -473,12 +440,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 
 	conn->scsicmd_pdus_cnt++;
 	ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x "
-			  "itt 0x%x len %d bidi_len %d cmdsn %d win %d]\n",
-			  scsi_bidi_cmnd(sc) ? "bidirectional" :
+			  "itt 0x%x len %d cmdsn %d win %d]\n",
 			  sc->sc_data_direction == DMA_TO_DEVICE ?
 			  "write" : "read", conn->id, sc, sc->cmnd[0],
 			  task->itt, transfer_length,
-			  scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0,
 			  session->cmdsn,
 			  session->max_cmdsn - session->exp_cmdsn + 1);
 	return 0;
@@ -647,12 +612,7 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
 		state = ISCSI_TASK_ABRT_TMF;
 
 	sc->result = err << 16;
-	if (!scsi_bidi_cmnd(sc))
-		scsi_set_resid(sc, scsi_bufflen(sc));
-	else {
-		scsi_out(sc)->resid = scsi_out(sc)->length;
-		scsi_in(sc)->resid = scsi_in(sc)->length;
-	}
+	scsi_set_resid(sc, scsi_bufflen(sc));
 
 	/* regular RX path uses back_lock */
 	spin_lock_bh(&conn->session->back_lock);
@@ -907,14 +867,7 @@ invalid_datalen:
 
 	if (rhdr->flags & (ISCSI_FLAG_CMD_BIDI_UNDERFLOW |
 			   ISCSI_FLAG_CMD_BIDI_OVERFLOW)) {
-		int res_count = be32_to_cpu(rhdr->bi_residual_count);
-
-		if (scsi_bidi_cmnd(sc) && res_count > 0 &&
-				(rhdr->flags & ISCSI_FLAG_CMD_BIDI_OVERFLOW ||
-				 res_count <= scsi_in(sc)->length))
-			scsi_in(sc)->resid = res_count;
-		else
-			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+		sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
 
 	if (rhdr->flags & (ISCSI_FLAG_CMD_UNDERFLOW |
@@ -961,8 +914,8 @@ iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 
 		if (res_count > 0 &&
 		    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
-		     res_count <= scsi_in(sc)->length))
-			scsi_in(sc)->resid = res_count;
+		     res_count <= sc->sdb.length))
+			sc->sdb.resid = res_count;
 		else
 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
 	}
@@ -1804,12 +1757,7 @@ fault:
 	spin_unlock_bh(&session->frwd_lock);
 	ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
 			  sc->cmnd[0], reason);
-	if (!scsi_bidi_cmnd(sc))
-		scsi_set_resid(sc, scsi_bufflen(sc));
-	else {
-		scsi_out(sc)->resid = scsi_out(sc)->length;
-		scsi_in(sc)->resid = scsi_in(sc)->length;
-	}
+	scsi_set_resid(sc, scsi_bufflen(sc));
 	sc->scsi_done(sc);
 	return 0;
 }
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 8a6b1b3f8277..9923e9e3b884 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -495,7 +495,7 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
 	struct iscsi_tcp_task *tcp_task = task->dd_data;
 	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
 	int datasn = be32_to_cpu(rhdr->datasn);
-	unsigned total_in_length = scsi_in(task->sc)->length;
+	unsigned total_in_length = task->sc->sdb.length;
 
 	/*
 	 * lib iscsi will update this in the completion handling if there
@@ -580,11 +580,11 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 			      data_length, session->max_burst);
 
 	data_offset = be32_to_cpu(rhdr->data_offset);
-	if (data_offset + data_length > scsi_out(task->sc)->length) {
+	if (data_offset + data_length > task->sc->sdb.length) {
 		iscsi_conn_printk(KERN_ERR, conn,
 				  "invalid R2T with data len %u at offset %u "
 				  "and total length %d\n", data_length,
-				  data_offset, scsi_out(task->sc)->length);
+				  data_offset, task->sc->sdb.length);
 		return ISCSI_ERR_DATALEN;
 	}
 
@@ -696,7 +696,7 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 		if (tcp_conn->in.datalen) {
 			struct iscsi_tcp_task *tcp_task = task->dd_data;
 			struct ahash_request *rx_hash = NULL;
-			struct scsi_data_buffer *sdb = scsi_in(task->sc);
+			struct scsi_data_buffer *sdb = &task->sc->sdb;
 
 			/*
 			 * Setup copy of Data-In into the struct scsi_cmnd
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 661512bec3ac..8044bb08455d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -430,7 +430,6 @@ static int resp_rsup_opcodes(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_rsup_tmfs(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_write_same_10(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_write_same_16(struct scsi_cmnd *, struct sdebug_dev_info *);
-static int resp_xdwriteread_10(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_comp_write(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_write_buffer(struct scsi_cmnd *, struct sdebug_dev_info *);
 static int resp_sync_cache(struct scsi_cmnd *, struct sdebug_dev_info *);
@@ -600,9 +599,6 @@ static const struct opcode_info_t opcode_info_arr[SDEB_I_LAST_ELEMENT + 1] = {
 	{0, 0x42, 0, F_D_OUT | FF_MEDIA_IO, resp_unmap, NULL, /* UNMAP */
 	    {10,  0x1, 0, 0, 0, 0, 0x3f, 0xff, 0xff, 0xc7, 0, 0, 0, 0, 0, 0} },
 /* 25 */
-	{0, 0x53, 0, F_D_IN | F_D_OUT | FF_MEDIA_IO, resp_xdwriteread_10,
-	    NULL, {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0x3f, 0xff, 0xff, 0xc7,
-		   0, 0, 0, 0, 0, 0} },		/* XDWRITEREAD(10) */
 	{0, 0x3b, 0, F_D_OUT_MAYBE, resp_write_buffer, NULL,
 	    {10,  0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7, 0, 0,
 	     0, 0, 0, 0} },			/* WRITE_BUFFER */
@@ -1010,11 +1006,11 @@ static int fill_from_dev_buffer(struct scsi_cmnd *scp, unsigned char *arr,
 				int arr_len)
 {
 	int act_len;
-	struct scsi_data_buffer *sdb = scsi_in(scp);
+	struct scsi_data_buffer *sdb = &scp->sdb;
 
 	if (!sdb->length)
 		return 0;
-	if (!(scsi_bidi_cmnd(scp) || scp->sc_data_direction == DMA_FROM_DEVICE))
+	if (scp->sc_data_direction != DMA_FROM_DEVICE)
 		return DID_ERROR << 16;
 
 	act_len = sg_copy_from_buffer(sdb->table.sgl, sdb->table.nents,
@@ -1033,12 +1029,12 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr,
 				  int arr_len, unsigned int off_dst)
 {
 	int act_len, n;
-	struct scsi_data_buffer *sdb = scsi_in(scp);
+	struct scsi_data_buffer *sdb = &scp->sdb;
 	off_t skip = off_dst;
 
 	if (sdb->length <= off_dst)
 		return 0;
-	if (!(scsi_bidi_cmnd(scp) || scp->sc_data_direction == DMA_FROM_DEVICE))
+	if (scp->sc_data_direction != DMA_FROM_DEVICE)
 		return DID_ERROR << 16;
 
 	act_len = sg_pcopy_from_buffer(sdb->table.sgl, sdb->table.nents,
@@ -1058,7 +1054,7 @@ static int fetch_to_dev_buffer(struct scsi_cmnd *scp, unsigned char *arr,
 {
 	if (!scsi_bufflen(scp))
 		return 0;
-	if (!(scsi_bidi_cmnd(scp) || scp->sc_data_direction == DMA_TO_DEVICE))
+	if (scp->sc_data_direction != DMA_TO_DEVICE)
 		return -1;
 
 	return scsi_sg_copy_to_buffer(scp, arr, arr_len);
@@ -2477,21 +2473,19 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba,
 {
 	int ret;
 	u64 block, rest = 0;
-	struct scsi_data_buffer *sdb;
+	struct scsi_data_buffer *sdb = &scmd->sdb;
 	enum dma_data_direction dir;
 
 	if (do_write) {
-		sdb = scsi_out(scmd);
 		dir = DMA_TO_DEVICE;
 		write_since_sync = true;
 	} else {
-		sdb = scsi_in(scmd);
 		dir = DMA_FROM_DEVICE;
 	}
 
 	if (!sdb->length)
 		return 0;
-	if (!(scsi_bidi_cmnd(scmd) || scmd->sc_data_direction == dir))
+	if (scmd->sc_data_direction != dir)
 		return -1;
 
 	block = do_div(lba, sdebug_store_sectors);
@@ -2774,7 +2768,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	if (unlikely(ret == -1))
 		return DID_ERROR << 16;
 
-	scsi_in(scp)->resid = scsi_bufflen(scp) - ret;
+	scp->sdb.resid = scsi_bufflen(scp) - ret;
 
 	if (unlikely(sqcp)) {
 		if (sqcp->inj_recovered) {
@@ -3718,68 +3712,6 @@ static int resp_report_luns(struct scsi_cmnd *scp,
 	return res;
 }
 
-static int resp_xdwriteread(struct scsi_cmnd *scp, unsigned long long lba,
-			    unsigned int num, struct sdebug_dev_info *devip)
-{
-	int j;
-	unsigned char *kaddr, *buf;
-	unsigned int offset;
-	struct scsi_data_buffer *sdb = scsi_in(scp);
-	struct sg_mapping_iter miter;
-
-	/* better not to use temporary buffer. */
-	buf = kzalloc(scsi_bufflen(scp), GFP_ATOMIC);
-	if (!buf) {
-		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
-				INSUFF_RES_ASCQ);
-		return check_condition_result;
-	}
-
-	scsi_sg_copy_to_buffer(scp, buf, scsi_bufflen(scp));
-
-	offset = 0;
-	sg_miter_start(&miter, sdb->table.sgl, sdb->table.nents,
-			SG_MITER_ATOMIC | SG_MITER_TO_SG);
-
-	while (sg_miter_next(&miter)) {
-		kaddr = miter.addr;
-		for (j = 0; j < miter.length; j++)
-			*(kaddr + j) ^= *(buf + offset + j);
-
-		offset += miter.length;
-	}
-	sg_miter_stop(&miter);
-	kfree(buf);
-
-	return 0;
-}
-
-static int resp_xdwriteread_10(struct scsi_cmnd *scp,
-			       struct sdebug_dev_info *devip)
-{
-	u8 *cmd = scp->cmnd;
-	u64 lba;
-	u32 num;
-	int errsts;
-
-	if (!scsi_bidi_cmnd(scp)) {
-		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
-				INSUFF_RES_ASCQ);
-		return check_condition_result;
-	}
-	errsts = resp_read_dt0(scp, devip);
-	if (errsts)
-		return errsts;
-	if (!(cmd[1] & 0x4)) {		/* DISABLE_WRITE is not set */
-		errsts = resp_write_dt0(scp, devip);
-		if (errsts)
-			return errsts;
-	}
-	lba = get_unaligned_be32(cmd + 2);
-	num = get_unaligned_be16(cmd + 7);
-	return resp_xdwriteread(scp, lba, num, devip);
-}
-
 static struct sdebug_queue *get_queue(struct scsi_cmnd *cmnd)
 {
 	u32 tag = blk_mq_unique_tag(cmnd->request);
@@ -3953,7 +3885,6 @@ static int scsi_debug_slave_alloc(struct scsi_device *sdp)
 	if (sdebug_verbose)
 		pr_info("slave_alloc <%u %u %u %llu>\n",
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-	blk_queue_flag_set(QUEUE_FLAG_BIDI, sdp->request_queue);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 16eef068e9e9..1b8378f36139 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -965,7 +965,6 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
 	ses->cmnd = scmd->cmnd;
 	ses->data_direction = scmd->sc_data_direction;
 	ses->sdb = scmd->sdb;
-	ses->next_rq = scmd->request->next_rq;
 	ses->result = scmd->result;
 	ses->underflow = scmd->underflow;
 	ses->prot_op = scmd->prot_op;
@@ -976,7 +975,6 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
 	scmd->cmnd = ses->eh_cmnd;
 	memset(scmd->cmnd, 0, BLK_MAX_CDB);
 	memset(&scmd->sdb, 0, sizeof(scmd->sdb));
-	scmd->request->next_rq = NULL;
 	scmd->result = 0;
 
 	if (sense_bytes) {
@@ -1029,7 +1027,6 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
 	scmd->cmnd = ses->cmnd;
 	scmd->sc_data_direction = ses->data_direction;
 	scmd->sdb = ses->sdb;
-	scmd->request->next_rq = ses->next_rq;
 	scmd->result = ses->result;
 	scmd->underflow = ses->underflow;
 	scmd->prot_op = ses->prot_op;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 00cd365fb7d2..2667bb95a441 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -556,15 +556,8 @@ static void scsi_uninit_cmd(struct scsi_cmnd *cmd)
 
 static void scsi_mq_free_sgtables(struct scsi_cmnd *cmd)
 {
-	struct scsi_data_buffer *sdb;
-
 	if (cmd->sdb.table.nents)
 		sg_free_table_chained(&cmd->sdb.table, true);
-	if (cmd->request->next_rq) {
-		sdb = cmd->request->next_rq->special;
-		if (sdb)
-			sg_free_table_chained(&sdb->table, true);
-	}
 	if (scsi_prot_sg_count(cmd))
 		sg_free_table_chained(&cmd->prot_sdb->table, true);
 }
@@ -578,7 +571,7 @@ static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd)
 
 /* Returns false when no more bytes to process, true if there are more */
 static bool scsi_end_request(struct request *req, blk_status_t error,
-		unsigned int bytes, unsigned int bidi_bytes)
+		unsigned int bytes)
 {
 	struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req);
 	struct scsi_device *sdev = cmd->device;
@@ -587,11 +580,6 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
 	if (blk_update_request(req, error, bytes))
 		return true;
 
-	/* Bidi request must be completed as a whole */
-	if (unlikely(bidi_bytes) &&
-	    blk_update_request(req->next_rq, error, bidi_bytes))
-		return true;
-
 	if (blk_queue_add_random(q))
 		add_disk_randomness(req->rq_disk);
 
@@ -816,7 +804,7 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 				scsi_print_command(cmd);
 			}
 		}
-		if (!scsi_end_request(req, blk_stat, blk_rq_err_bytes(req), 0))
+		if (!scsi_end_request(req, blk_stat, blk_rq_err_bytes(req)))
 			return;
 		/*FALLTHRU*/
 	case ACTION_REPREP:
@@ -951,29 +939,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		 */
 		scsi_req(req)->result = cmd->result;
 		scsi_req(req)->resid_len = scsi_get_resid(cmd);
-
-		if (unlikely(scsi_bidi_cmnd(cmd))) {
-			/*
-			 * Bidi commands Must be complete as a whole,
-			 * both sides at once.
-			 */
-			scsi_req(req->next_rq)->resid_len = scsi_in(cmd)->resid;
-			if (scsi_end_request(req, BLK_STS_OK, blk_rq_bytes(req),
-					blk_rq_bytes(req->next_rq)))
-				WARN_ONCE(true,
-					  "Bidi command with remaining bytes");
-			return;
-		}
-	}
-
-	/* no bidi support yet, other than in pass-through */
-	if (unlikely(blk_bidi_rq(req))) {
-		WARN_ONCE(true, "Only support bidi command in passthrough");
-		scmd_printk(KERN_ERR, cmd, "Killing bidi command\n");
-		if (scsi_end_request(req, BLK_STS_IOERR, blk_rq_bytes(req),
-				     blk_rq_bytes(req->next_rq)))
-			WARN_ONCE(true, "Bidi command with remaining bytes");
-		return;
 	}
 
 	/*
@@ -990,13 +955,13 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * to retry code. Fast path should return in this block.
 	 */
 	if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) {
-		if (likely(!scsi_end_request(req, blk_stat, good_bytes, 0)))
+		if (likely(!scsi_end_request(req, blk_stat, good_bytes)))
 			return; /* no bytes remaining */
 	}
 
 	/* Kill remainder if no retries. */
 	if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) {
-		if (scsi_end_request(req, blk_stat, blk_rq_bytes(req), 0))
+		if (scsi_end_request(req, blk_stat, blk_rq_bytes(req)))
 			WARN_ONCE(true,
 			    "Bytes remaining after failed, no-retry command");
 		return;
@@ -1058,12 +1023,6 @@ blk_status_t scsi_init_io(struct scsi_cmnd *cmd)
 	if (ret)
 		return ret;
 
-	if (blk_bidi_rq(rq)) {
-		ret = scsi_init_sgtable(rq->next_rq, rq->next_rq->special);
-		if (ret)
-			goto out_free_sgtables;
-	}
-
 	if (blk_integrity_rq(rq)) {
 		struct scsi_data_buffer *prot_sdb = cmd->prot_sdb;
 		int ivecs, count;
@@ -1624,17 +1583,6 @@ static blk_status_t scsi_mq_prep_fn(struct request *req)
 			(struct scatterlist *)(cmd->prot_sdb + 1);
 	}
 
-	if (blk_bidi_rq(req)) {
-		struct request *next_rq = req->next_rq;
-		struct scsi_data_buffer *bidi_sdb = blk_mq_rq_to_pdu(next_rq);
-
-		memset(bidi_sdb, 0, sizeof(struct scsi_data_buffer));
-		bidi_sdb->table.sgl =
-			(struct scatterlist *)(bidi_sdb + 1);
-
-		next_rq->special = bidi_sdb;
-	}
-
 	blk_mq_start_request(req);
 
 	return scsi_setup_cmnd(sdev, req);
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 772b976e4ee4..1a6f150cd2d8 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -100,16 +100,8 @@ static inline struct Scsi_Host *virtio_scsi_host(struct virtio_device *vdev)
 
 static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
 {
-	if (!resid)
-		return;
-
-	if (!scsi_bidi_cmnd(sc)) {
+	if (resid)
 		scsi_set_resid(sc, resid);
-		return;
-	}
-
-	scsi_in(sc)->resid = min(resid, scsi_in(sc)->length);
-	scsi_out(sc)->resid = resid - scsi_in(sc)->resid;
 }
 
 /**
@@ -403,9 +395,9 @@ static int virtscsi_add_cmd(struct virtqueue *vq,
 
 	if (sc && sc->sc_data_direction != DMA_NONE) {
 		if (sc->sc_data_direction != DMA_FROM_DEVICE)
-			out = &scsi_out(sc)->table;
+			out = &sc->sdb.table;
 		if (sc->sc_data_direction != DMA_TO_DEVICE)
-			in = &scsi_in(sc)->table;
+			in = &sc->sdb.table;
 	}
 
 	/* Request header.  */
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 89c6e3719d2b..3305b47fdf53 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -128,14 +128,6 @@ static void tcm_loop_submission_work(struct work_struct *work)
 		set_host_byte(sc, DID_ERROR);
 		goto out_done;
 	}
-	if (scsi_bidi_cmnd(sc)) {
-		struct scsi_data_buffer *sdb = scsi_in(sc);
-
-		sgl_bidi = sdb->table.sgl;
-		sgl_bidi_count = sdb->table.nents;
-		se_cmd->se_cmd_flags |= SCF_BIDI;
-
-	}
 
 	transfer_length = scsi_transfer_length(sc);
 	if (!scsi_prot_sg_count(sc) &&
@@ -304,12 +296,6 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc)
 	return FAILED;
 }
 
-static int tcm_loop_slave_alloc(struct scsi_device *sd)
-{
-	blk_queue_flag_set(QUEUE_FLAG_BIDI, sd->request_queue);
-	return 0;
-}
-
 static struct scsi_host_template tcm_loop_driver_template = {
 	.show_info		= tcm_loop_show_info,
 	.proc_name		= "tcm_loopback",
@@ -325,7 +311,6 @@ static struct scsi_host_template tcm_loop_driver_template = {
 	.cmd_per_lun		= 1024,
 	.max_sectors		= 0xFFFF,
 	.dma_boundary		= PAGE_SIZE - 1,
-	.slave_alloc		= tcm_loop_slave_alloc,
 	.module			= THIS_MODULE,
 	.track_queue_depth	= 1,
 };
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 36742e8e7edc..95f2142093d5 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -368,25 +368,19 @@ static void uas_data_cmplt(struct urb *urb)
 	struct scsi_cmnd *cmnd = urb->context;
 	struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp;
 	struct uas_dev_info *devinfo = (void *)cmnd->device->hostdata;
-	struct scsi_data_buffer *sdb = NULL;
+	struct scsi_data_buffer *sdb = &cmnd->sdb;
 	unsigned long flags;
 	int status = urb->status;
 
 	spin_lock_irqsave(&devinfo->lock, flags);
 
 	if (cmdinfo->data_in_urb == urb) {
-		sdb = scsi_in(cmnd);
 		cmdinfo->state &= ~DATA_IN_URB_INFLIGHT;
 		cmdinfo->data_in_urb = NULL;
 	} else if (cmdinfo->data_out_urb == urb) {
-		sdb = scsi_out(cmnd);
 		cmdinfo->state &= ~DATA_OUT_URB_INFLIGHT;
 		cmdinfo->data_out_urb = NULL;
 	}
-	if (sdb == NULL) {
-		WARN_ON_ONCE(1);
-		goto out;
-	}
 
 	if (devinfo->resetting)
 		goto out;
@@ -426,8 +420,7 @@ static struct urb *uas_alloc_data_urb(struct uas_dev_info *devinfo, gfp_t gfp,
 	struct usb_device *udev = devinfo->udev;
 	struct uas_cmd_info *cmdinfo = (void *)&cmnd->SCp;
 	struct urb *urb = usb_alloc_urb(0, gfp);
-	struct scsi_data_buffer *sdb = (dir == DMA_FROM_DEVICE)
-		? scsi_in(cmnd) : scsi_out(cmnd);
+	struct scsi_data_buffer *sdb = &cmnd->sdb;
 	unsigned int pipe = (dir == DMA_FROM_DEVICE)
 		? devinfo->data_in_pipe : devinfo->data_out_pipe;
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index d85e6befa26b..af6ae3e1eba6 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -213,23 +213,6 @@ static inline int scsi_get_resid(struct scsi_cmnd *cmd)
 #define scsi_for_each_sg(cmd, sg, nseg, __i)			\
 	for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
 
-static inline int scsi_bidi_cmnd(struct scsi_cmnd *cmd)
-{
-	return blk_bidi_rq(cmd->request) &&
-		(cmd->request->next_rq->special != NULL);
-}
-
-static inline struct scsi_data_buffer *scsi_in(struct scsi_cmnd *cmd)
-{
-	return scsi_bidi_cmnd(cmd) ?
-		cmd->request->next_rq->special : &cmd->sdb;
-}
-
-static inline struct scsi_data_buffer *scsi_out(struct scsi_cmnd *cmd)
-{
-	return &cmd->sdb;
-}
-
 static inline int scsi_sg_copy_from_buffer(struct scsi_cmnd *cmd,
 					   void *buf, int buflen)
 {
@@ -351,7 +334,7 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
 
 static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
 {
-	unsigned int xfer_len = scsi_out(scmd)->length;
+	unsigned int xfer_len = scmd->sdb.length;
 	unsigned int prot_interval = scsi_prot_interval(scmd);
 
 	if (scmd->prot_flags & SCSI_PROT_TRANSFER_PI)
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index 2b7e227960e1..3810b340551c 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -39,7 +39,6 @@ struct scsi_eh_save {
 	unsigned char prot_op;
 	unsigned char *cmnd;
 	struct scsi_data_buffer sdb;
-	struct request *next_rq;
 	/* new command support */
 	unsigned char eh_cmnd[BLK_MAX_CDB];
 	struct scatterlist sense_sgl;
-- 
cgit v1.2.3


From 69ed175c195595c73901e18366cb0ebeaeb68b8a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 9 Nov 2018 19:35:11 +0100
Subject: scsi: block: remove req->special

No users left.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-mq.c         | 1 -
 drivers/scsi/sd.c      | 2 --
 include/linux/blkdev.h | 2 --
 3 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ba37b9e15e9..502cbf964a3b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -331,7 +331,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
 #endif
-	rq->special = NULL;
 	/* tag was already set */
 	rq->extra_len = 0;
 	WRITE_ONCE(rq->deadline, 0);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3db9b1fe7516..c124459041dc 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1171,8 +1171,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	if (ret != BLK_STS_OK)
 		return ret;
 
-	WARN_ON_ONCE(cmd != rq->special);
-
 	if (!scsi_device_online(sdp) || sdp->changed) {
 		scmd_printk(KERN_ERR, cmd, "device offline or changed\n");
 		return BLK_STS_IOERR;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 338604dff7d0..fd1450d53f1c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -216,8 +216,6 @@ struct request {
 	unsigned short write_hint;
 	unsigned short ioprio;
 
-	void *special;		/* opaque pointer available for LLD use */
-
 	unsigned int extra_len;	/* length of alignment and padding */
 
 	enum mq_rq_state state;
-- 
cgit v1.2.3


From 8b3238cabd50e2715b6544e724e74685209b190a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Dec 2018 08:01:10 -0800
Subject: scsi: block: remove bidi support

Unused now, and another field in struct request bites the dust.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-mq-debugfs.c | 1 -
 block/blk-mq.c         | 3 ---
 include/linux/blkdev.h | 6 ------
 3 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 90d68760af08..ac832547160a 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -115,7 +115,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m)
 static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(STOPPED),
 	QUEUE_FLAG_NAME(DYING),
-	QUEUE_FLAG_NAME(BIDI),
 	QUEUE_FLAG_NAME(NOMERGES),
 	QUEUE_FLAG_NAME(SAME_COMP),
 	QUEUE_FLAG_NAME(FAIL_IO),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 502cbf964a3b..820d131a6893 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -339,7 +339,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
-	rq->next_rq = NULL;
 
 	data->ctx->rq_dispatched[op_is_sync(op)]++;
 	refcount_set(&rq->ref, 1);
@@ -549,8 +548,6 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 		rq_qos_done(rq->q, rq);
 		rq->end_io(rq, error);
 	} else {
-		if (unlikely(blk_bidi_rq(rq)))
-			blk_mq_free_request(rq->next_rq);
 		blk_mq_free_request(rq);
 	}
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fd1450d53f1c..21beb456b97a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -234,9 +234,6 @@ struct request {
 	 */
 	rq_end_io_fn *end_io;
 	void *end_io_data;
-
-	/* for bidi */
-	struct request *next_rq;
 };
 
 static inline bool blk_op_is_scsi(unsigned int op)
@@ -572,7 +569,6 @@ struct request_queue {
 
 #define QUEUE_FLAG_STOPPED	1	/* queue is stopped */
 #define QUEUE_FLAG_DYING	2	/* queue being torn down */
-#define QUEUE_FLAG_BIDI		4	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES     5	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	6	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	7	/* fake timeout */
@@ -644,8 +640,6 @@ static inline bool blk_account_rq(struct request *rq)
 	return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
 }
 
-#define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
-
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
 #define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
-- 
cgit v1.2.3


From 5870970b9a828d8693aa6d15742573289d7dbcd0 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:58:39 +0000
Subject: arm64: Fix HCR.TGE status for NMI contexts

When using VHE, the host needs to clear HCR_EL2.TGE bit in order
to interact with guest TLBs, switching from EL2&0 translation regime
to EL1&0.

However, some non-maskable asynchronous event could happen while TGE is
cleared like SDEI. Because of this address translation operations
relying on EL2&0 translation regime could fail (tlb invalidation,
userspace access, ...).

Fix this by properly setting HCR_EL2.TGE when entering NMI context and
clear it if necessary when returning to the interrupted context.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Suggested-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: linux-arch@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/hardirq.h | 31 +++++++++++++++++++++++++++++++
 arch/arm64/kernel/irq.c          |  3 +++
 include/linux/hardirq.h          |  7 +++++++
 3 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h
index 1473fc2f7ab7..89691c86640a 100644
--- a/arch/arm64/include/asm/hardirq.h
+++ b/arch/arm64/include/asm/hardirq.h
@@ -17,8 +17,12 @@
 #define __ASM_HARDIRQ_H
 
 #include <linux/cache.h>
+#include <linux/percpu.h>
 #include <linux/threads.h>
+#include <asm/barrier.h>
 #include <asm/irq.h>
+#include <asm/kvm_arm.h>
+#include <asm/sysreg.h>
 
 #define NR_IPI	7
 
@@ -37,6 +41,33 @@ u64 smp_irq_stat_cpu(unsigned int cpu);
 
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED	1
 
+struct nmi_ctx {
+	u64 hcr;
+};
+
+DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts);
+
+#define arch_nmi_enter()							\
+	do {									\
+		if (is_kernel_in_hyp_mode()) {					\
+			struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts);	\
+			nmi_ctx->hcr = read_sysreg(hcr_el2);			\
+			if (!(nmi_ctx->hcr & HCR_TGE)) {			\
+				write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2);	\
+				isb();						\
+			}							\
+		}								\
+	} while (0)
+
+#define arch_nmi_exit()								\
+	do {									\
+		if (is_kernel_in_hyp_mode()) {					\
+			struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts);	\
+			if (!(nmi_ctx->hcr & HCR_TGE))				\
+				write_sysreg(nmi_ctx->hcr, hcr_el2);		\
+		}								\
+	} while (0)
+
 static inline void ack_bad_irq(unsigned int irq)
 {
 	extern unsigned long irq_err_count;
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 780a12f59a8f..92fa81798fb9 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -33,6 +33,9 @@
 
 unsigned long irq_err_count;
 
+/* Only access this in an NMI enter/exit */
+DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts);
+
 DEFINE_PER_CPU(unsigned long *, irq_stack_ptr);
 
 int arch_show_interrupts(struct seq_file *p, int prec)
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 0fbbcdf0c178..da0af631ded5 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -60,8 +60,14 @@ extern void irq_enter(void);
  */
 extern void irq_exit(void);
 
+#ifndef arch_nmi_enter
+#define arch_nmi_enter()	do { } while (0)
+#define arch_nmi_exit()		do { } while (0)
+#endif
+
 #define nmi_enter()						\
 	do {							\
+		arch_nmi_enter();				\
 		printk_nmi_enter();				\
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
@@ -80,6 +86,7 @@ extern void irq_exit(void);
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
 		printk_nmi_exit();				\
+		arch_nmi_exit();				\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
-- 
cgit v1.2.3


From 13b210ddf474d9f3368766008a89fe82a6f90b48 Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Thu, 31 Jan 2019 14:58:49 +0000
Subject: efi: Let architectures decide the flags that should be saved/restored

Currently, irqflags are saved before calling runtime services and
checked for mismatch on return.

Provide a pair of overridable macros to save and restore (if needed) the
state that need to be preserved on return from a runtime service.
This allows to check for flags that are not necesarly related to
irqflags.

Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: linux-efi@vger.kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/firmware/efi/runtime-wrappers.c | 17 +++++++++++++++--
 include/linux/efi.h                     |  5 +++--
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 8903b9ccfc2b..c70df5ae7c4a 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -89,11 +89,24 @@ exit:									\
 	efi_rts_work.status;						\
 })
 
+#ifndef arch_efi_save_flags
+#define arch_efi_save_flags(state_flags)	local_save_flags(state_flags)
+#define arch_efi_restore_flags(state_flags)	local_irq_restore(state_flags)
+#endif
+
+unsigned long efi_call_virt_save_flags(void)
+{
+	unsigned long flags;
+
+	arch_efi_save_flags(flags);
+	return flags;
+}
+
 void efi_call_virt_check_flags(unsigned long flags, const char *call)
 {
 	unsigned long cur_flags, mismatch;
 
-	local_save_flags(cur_flags);
+	cur_flags = efi_call_virt_save_flags();
 
 	mismatch = flags ^ cur_flags;
 	if (!WARN_ON_ONCE(mismatch & ARCH_EFI_IRQ_FLAGS_MASK))
@@ -102,7 +115,7 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
 	add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
 	pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
 			   flags, cur_flags, call);
-	local_irq_restore(flags);
+	arch_efi_restore_flags(flags);
 }
 
 /*
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45ff763fba76..bd80b7ec35db 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1607,6 +1607,7 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg,
 
 bool efi_runtime_disabled(void);
 extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+extern unsigned long efi_call_virt_save_flags(void);
 
 enum efi_secureboot_mode {
 	efi_secureboot_mode_unset,
@@ -1652,7 +1653,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table);
 									\
 	arch_efi_call_virt_setup();					\
 									\
-	local_save_flags(__flags);					\
+	__flags = efi_call_virt_save_flags();				\
 	__s = arch_efi_call_virt(p, f, args);				\
 	efi_call_virt_check_flags(__flags, __stringify(f));		\
 									\
@@ -1667,7 +1668,7 @@ void efi_retrieve_tpm2_eventlog(efi_system_table_t *sys_table);
 									\
 	arch_efi_call_virt_setup();					\
 									\
-	local_save_flags(__flags);					\
+	__flags = efi_call_virt_save_flags();				\
 	arch_efi_call_virt(p, f, args);					\
 	efi_call_virt_check_flags(__flags, __stringify(f));		\
 									\
-- 
cgit v1.2.3


From 840018668ce2d96783356204ff282d6c9b0e5f66 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Thu, 31 Jan 2019 11:47:08 -0700
Subject: perf/aux: Make perf_event accessible to setup_aux()

When pmu::setup_aux() is called the coresight PMU needs to know which
sink to use for the session by looking up the information in the
event's attr::config2 field.

As such simply replace the cpu information by the complete perf_event
structure and change all affected customers.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki Poulouse <suzuki.poulose@arm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Link: http://lkml.kernel.org/r/20190131184714.20388-2-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/s390/kernel/perf_cpum_sf.c                  | 6 +++---
 arch/x86/events/intel/bts.c                      | 4 +++-
 arch/x86/events/intel/pt.c                       | 5 +++--
 drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++---
 drivers/perf/arm_spe_pmu.c                       | 6 +++---
 include/linux/perf_event.h                       | 2 +-
 kernel/events/ring_buffer.c                      | 2 +-
 7 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index bfabeb1889cc..1266194afb02 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb)
 
 /*
  * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
- * @cpu:	On which to allocate, -1 means current
+ * @event:	Event the buffer is setup for, event->cpu == -1 means current
  * @pages:	Array of pointers to buffer pages passed from perf core
  * @nr_pages:	Total pages
  * @snapshot:	Flag for snapshot mode
@@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb)
  *
  * Return the private AUX buffer structure if success or NULL if fails.
  */
-static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
-			      bool snapshot)
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+			      int nr_pages, bool snapshot)
 {
 	struct sf_buffer *sfb;
 	struct aux_buffer *aux;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index a01ef1b0f883..7cdd7b13bbda 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -77,10 +77,12 @@ static size_t buf_size(struct page *page)
 }
 
 static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+bts_buffer_setup_aux(struct perf_event *event, void **pages,
+		     int nr_pages, bool overwrite)
 {
 	struct bts_buffer *buf;
 	struct page *page;
+	int cpu = event->cpu;
 	int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
 	unsigned long offset;
 	size_t size = nr_pages << PAGE_SHIFT;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 9494ca68fd9d..c0e86ff21f81 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
  * Return:	Our private PT buffer structure.
  */
 static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+pt_buffer_setup_aux(struct perf_event *event, void **pages,
+		    int nr_pages, bool snapshot)
 {
 	struct pt_buffer *buf;
-	int node, ret;
+	int node, ret, cpu = event->cpu;
 
 	if (!nr_pages)
 		return NULL;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index abe8249b893b..f21eb28b6782 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -177,15 +177,15 @@ static void etm_free_aux(void *data)
 	schedule_work(&event_data->work);
 }
 
-static void *etm_setup_aux(int event_cpu, void **pages,
+static void *etm_setup_aux(struct perf_event *event, void **pages,
 			   int nr_pages, bool overwrite)
 {
-	int cpu;
+	int cpu = event->cpu;
 	cpumask_t *mask;
 	struct coresight_device *sink;
 	struct etm_event_data *event_data = NULL;
 
-	event_data = alloc_event_data(event_cpu);
+	event_data = alloc_event_data(cpu);
 	if (!event_data)
 		return NULL;
 	INIT_WORK(&event_data->work, free_event_data);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 8e46a9dad2fa..7cb766dafe85 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event)
 {
 }
 
-static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages,
-				   bool snapshot)
+static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
+				   int nr_pages, bool snapshot)
 {
-	int i;
+	int i, cpu = event->cpu;
 	struct page **pglist;
 	struct arm_spe_pmu_buf *buf;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6cb5d483ab34..d9c3610e0e25 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -410,7 +410,7 @@ struct pmu {
 	/*
 	 * Set up pmu-private data structures for an AUX area
 	 */
-	void *(*setup_aux)		(int cpu, void **pages,
+	void *(*setup_aux)		(struct perf_event *event, void **pages,
 					 int nr_pages, bool overwrite);
 					/* optional */
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 805f0423ee0b..70ae2422cbaf 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -657,7 +657,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 			goto out;
 	}
 
-	rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
+	rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages,
 					     overwrite);
 	if (!rb->aux_priv)
 		goto out;
-- 
cgit v1.2.3


From bb8e370bdc141ddff526e5e5ee74210c91fee0b8 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Thu, 31 Jan 2019 11:47:09 -0700
Subject: coresight: perf: Add "sinks" group to PMU directory

Add a "sinks" directory entry so that users can see all the sinks
available in the system in a single place.  Individual sink are added
as they are registered with the coresight bus.

Committer tests:

Test built on a ubuntu 18.04 container with a cross build environment to
arm64, the new field is there, need to find a machine with this feature
to do further testing in the future.

  root@d15263e5734a:/git/perf# grep CORESIGHT /tmp/build/v5.0-rc2+/.config
  CONFIG_CORESIGHT=y
  CONFIG_CORESIGHT_LINKS_AND_SINKS=y
  CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y
  CONFIG_CORESIGHT_CATU=y
  CONFIG_CORESIGHT_SINK_TPIU=y
  CONFIG_CORESIGHT_SINK_ETBV10=y
  CONFIG_CORESIGHT_SOURCE_ETM4X=y
  CONFIG_CORESIGHT_DYNAMIC_REPLICATOR=y
  CONFIG_CORESIGHT_STM=y
  CONFIG_CORESIGHT_CPU_DEBUG=m
  root@d15263e5734a:/git/perf#
  root@d15263e5734a:/git/perf# file /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/*.o
  .../coresight/coresight-catu.o:               ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-cpu-debug.mod.o:      ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-cpu-debug.o:          ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-dynamic-replicator.o: ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-etb10.o:              ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-etm-perf.o:           ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-etm4x-sysfs.o:        ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-etm4x.o:              ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-funnel.o:             ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-replicator.o:         ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-stm.o:                ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-tmc-etf.o:            ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-tmc-etr.o:            ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-tmc.o:                ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight-tpiu.o:               ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/coresight.o:                    ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  .../coresight/of_coresight.o:                 ELF 64-bit MSB relocatable, ARM aarch64, version 1 (SYSV), not stripped
  root@d15263e5734a:/git/perf#

  root@d15263e5734a:/git/perf# pahole -C coresight_device /tmp/build/v5.0-rc2+/drivers/hwtracing/coresight/coresight.o
  struct coresight_device {
          struct coresight_connection * conns;             /*     0     8 */
          int                        nr_inport;            /*     8     4 */
          int                        nr_outport;           /*    12     4 */
          enum coresight_dev_type    type;                 /*    16     4 */
          union coresight_dev_subtype subtype;             /*    20     8 */

          /* XXX 4 bytes hole, try to pack */

          const struct coresight_ops  * ops;               /*    32     8 */
          struct device              dev;                  /*    40  1408 */

          /* XXX last struct has 7 bytes of padding */

          /* --- cacheline 22 boundary (1408 bytes) was 40 bytes ago --- */
          atomic_t *                 refcnt;               /*  1448     8 */
          bool                       orphan;               /*  1456     1 */
          bool                       enable;               /*  1457     1 */
          bool                       activated;            /*  1458     1 */

          /* XXX 5 bytes hole, try to pack */

          struct dev_ext_attribute * ea;                   /*  1464     8 */

          /* size: 1472, cachelines: 23, members: 12 */
          /* sum members: 1463, holes: 2, sum holes: 9 */
          /* paddings: 1, sum paddings: 7 */
  };
  root@d15263e5734a:/git/perf#

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Link: http://lkml.kernel.org/r/20190131184714.20388-3-mathieu.poirier@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 drivers/hwtracing/coresight/coresight-etm-perf.c | 82 ++++++++++++++++++++++++
 drivers/hwtracing/coresight/coresight-etm-perf.h |  6 +-
 drivers/hwtracing/coresight/coresight.c          | 18 ++++++
 include/linux/coresight.h                        |  7 +-
 4 files changed, 110 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index f21eb28b6782..cdbdb28dc175 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -14,6 +14,7 @@
 #include <linux/perf_event.h>
 #include <linux/percpu-defs.h>
 #include <linux/slab.h>
+#include <linux/stringhash.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
@@ -43,8 +44,18 @@ static const struct attribute_group etm_pmu_format_group = {
 	.attrs  = etm_config_formats_attr,
 };
 
+static struct attribute *etm_config_sinks_attr[] = {
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_sinks_group = {
+	.name   = "sinks",
+	.attrs  = etm_config_sinks_attr,
+};
+
 static const struct attribute_group *etm_pmu_attr_groups[] = {
 	&etm_pmu_format_group,
+	&etm_pmu_sinks_group,
 	NULL,
 };
 
@@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link)
 	return 0;
 }
 
+static ssize_t etm_perf_sink_name_show(struct device *dev,
+				       struct device_attribute *dattr,
+				       char *buf)
+{
+	struct dev_ext_attribute *ea;
+
+	ea = container_of(dattr, struct dev_ext_attribute, attr);
+	return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
+}
+
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{
+	int ret;
+	unsigned long hash;
+	const char *name;
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *pdev = csdev->dev.parent;
+	struct dev_ext_attribute *ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return -EINVAL;
+
+	if (csdev->ea != NULL)
+		return -EINVAL;
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL);
+	if (!ea)
+		return -ENOMEM;
+
+	name = dev_name(pdev);
+	/* See function coresight_get_sink_by_id() to know where this is used */
+	hash = hashlen_hash(hashlen_string(NULL, name));
+
+	ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL);
+	if (!ea->attr.attr.name)
+		return -ENOMEM;
+
+	ea->attr.attr.mode = 0444;
+	ea->attr.show = etm_perf_sink_name_show;
+	ea->var = (unsigned long *)hash;
+
+	ret = sysfs_add_file_to_group(&pmu_dev->kobj,
+				      &ea->attr.attr, "sinks");
+
+	if (!ret)
+		csdev->ea = ea;
+
+	return ret;
+}
+
+void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+{
+	struct device *pmu_dev = etm_pmu.dev;
+	struct dev_ext_attribute *ea = csdev->ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return;
+
+	if (!ea)
+		return;
+
+	sysfs_remove_file_from_group(&pmu_dev->kobj,
+				     &ea->attr.attr, "sinks");
+	csdev->ea = NULL;
+}
+
 static int __init etm_perf_init(void)
 {
 	int ret;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index da7d9336a15c..015213abe00a 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -59,6 +59,8 @@ struct etm_event_data {
 
 #ifdef CONFIG_CORESIGHT
 int etm_perf_symlink(struct coresight_device *csdev, bool link);
+int etm_perf_add_symlink_sink(struct coresight_device *csdev);
+void etm_perf_del_symlink_sink(struct coresight_device *csdev);
 static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 {
 	struct etm_event_data *data = perf_get_aux(handle);
@@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 #else
 static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
 { return -EINVAL; }
-
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{ return -EINVAL; }
+void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
 static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 {
 	return NULL;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 2b0df1a0a8df..d7fa90be6f42 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 
 static DEFINE_MUTEX(coresight_mutex);
@@ -1167,6 +1168,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 		goto err_out;
 	}
 
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		ret = etm_perf_add_symlink_sink(csdev);
+
+		if (ret) {
+			device_unregister(&csdev->dev);
+			/*
+			 * As with the above, all resources are free'd
+			 * explicitly via coresight_device_release() triggered
+			 * from put_device(), which is in turn called from
+			 * function device_unregister().
+			 */
+			goto err_out;
+		}
+	}
+
 	mutex_lock(&coresight_mutex);
 
 	coresight_fixup_device_conns(csdev);
@@ -1185,6 +1202,7 @@ EXPORT_SYMBOL_GPL(coresight_register);
 
 void coresight_unregister(struct coresight_device *csdev)
 {
+	etm_perf_del_symlink_sink(csdev);
 	/* Remove references of that device in the topology */
 	coresight_remove_conns(csdev);
 	device_unregister(&csdev->dev);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 46c67a764877..7b87965f7a65 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -154,8 +154,9 @@ struct coresight_connection {
  * @orphan:	true if the component has connections that haven't been linked.
  * @enable:	'true' if component is currently part of an active path.
  * @activated:	'true' only if a _sink_ has been activated.  A sink can be
-		activated but not yet enabled.  Enabling for a _sink_
-		happens when a source has been selected for that it.
+ *		activated but not yet enabled.  Enabling for a _sink_
+ *		appens when a source has been selected for that it.
+ * @ea:		Device attribute for sink representation under PMU directory.
  */
 struct coresight_device {
 	struct coresight_connection *conns;
@@ -168,7 +169,9 @@ struct coresight_device {
 	atomic_t *refcnt;
 	bool orphan;
 	bool enable;	/* true only if configured as part of a path */
+	/* sink specific fields */
 	bool activated;	/* true only if a sink is part of a path */
+	struct dev_ext_attribute *ea;
 };
 
 #define to_coresight_device(d) container_of(d, struct coresight_device, dev)
-- 
cgit v1.2.3


From 5f02a877638472e83cb5e335f9eec27052b1c7c2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:28 +0200
Subject: fsnotify: annotate directory entry modification events

"dirent" events are referring to events that modify directory entries,
such as create,delete,rename. Those events should always be reported
on a watched directory, regardless if FS_EVENT_ON_CHILD is set
on the watch mask.

fsnotify_nameremove() and fsnotify_move() were modified to no longer
set the FS_EVENT_ON_CHILD event bit. This is a semantic change to
align with the "dirent" event definition. It has no effect on any
existing backend, because dnotify, inotify and audit always requets the
child events and fanotify does not get the delete,rename events.

The fsnotify_dirent() helper is used instead of fsnotify_parent() to
report a dirent event to dentry->d_parent without FS_EVENT_ON_CHILD
and regardless if parent has the FS_EVENT_ON_CHILD bit set.

Unlike fsnotify_parent(), fsnotify_dirent() assumes that dentry->d_name
and dentry->d_parent are stable. For fsnotify_create()/fsnotify_mkdir(),
this assumption is abviously correct. For fsnotify_nameremove(), it is
less trivial, so we use dget_parent() and take_dentry_name_snapshot() to
grab stable references.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify.h | 49 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 40 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 2ccb08cb5d6a..39b22e88423d 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,8 +17,22 @@
 #include <linux/slab.h>
 #include <linux/bug.h>
 
+/*
+ * Notify this @dir inode about a change in the directory entry @dentry.
+ *
+ * Unlike fsnotify_parent(), the event will be reported regardless of the
+ * FS_EVENT_ON_CHILD mask on the parent inode.
+ */
+static inline int fsnotify_dirent(struct inode *dir, struct dentry *dentry,
+				  __u32 mask)
+{
+	return fsnotify(dir, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
+			dentry->d_name.name, 0);
+}
+
 /* Notify this dentry's parent about a child's events. */
-static inline int fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask)
+static inline int fsnotify_parent(const struct path *path,
+				  struct dentry *dentry, __u32 mask)
 {
 	if (!dentry)
 		dentry = path->dentry;
@@ -85,8 +99,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 {
 	struct inode *source = moved->d_inode;
 	u32 fs_cookie = fsnotify_get_cookie();
-	__u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
-	__u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
+	__u32 old_dir_mask = FS_MOVED_FROM;
+	__u32 new_dir_mask = FS_MOVED_TO;
 	const unsigned char *new_name = moved->d_name.name;
 
 	if (old_dir == new_dir)
@@ -128,15 +142,35 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt)
 
 /*
  * fsnotify_nameremove - a filename was removed from a directory
+ *
+ * This is mostly called under parent vfs inode lock so name and
+ * dentry->d_parent should be stable. However there are some corner cases where
+ * inode lock is not held. So to be on the safe side and be reselient to future
+ * callers and out of tree users of d_delete(), we do not assume that d_parent
+ * and d_name are stable and we use dget_parent() and
+ * take_dentry_name_snapshot() to grab stable references.
  */
 static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
 {
+	struct dentry *parent;
+	struct name_snapshot name;
 	__u32 mask = FS_DELETE;
 
+	/* d_delete() of pseudo inode? (e.g. __ns_get_path() playing tricks) */
+	if (IS_ROOT(dentry))
+		return;
+
 	if (isdir)
 		mask |= FS_ISDIR;
 
-	fsnotify_parent(NULL, dentry, mask);
+	parent = dget_parent(dentry);
+	take_dentry_name_snapshot(&name, dentry);
+
+	fsnotify(d_inode(parent), mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
+		 name.name, 0);
+
+	release_dentry_name_snapshot(&name);
+	dput(parent);
 }
 
 /*
@@ -155,7 +189,7 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
 {
 	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
+	fsnotify_dirent(inode, dentry, FS_CREATE);
 }
 
 /*
@@ -176,12 +210,9 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
  */
 static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
 {
-	__u32 mask = (FS_CREATE | FS_ISDIR);
-	struct inode *d_inode = dentry->d_inode;
-
 	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
+	fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR);
 }
 
 /*
-- 
cgit v1.2.3


From e220140ff6241e180d0c2fc294e61ee6bbc6a18e Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:29 +0200
Subject: fsnotify: remove dirent events from FS_EVENTS_POSS_ON_CHILD mask

"dirent" events are referring to events that modify directory entries,
such as create,delete,rename. Those events are always be reported
on a watched directory, regardless if FS_EVENT_ON_CHILD is set
on the watch mask.

ALL_FSNOTIFY_DIRENT_EVENTS defines all the dirent event types and
those event types are removed from FS_EVENTS_POSS_ON_CHILD.

That means for a directory with an inotify watch and only dirent
events in the mask (i.e. create,delete,move), all children dentries
will no longer have the DCACHE_FSNOTIFY_PARENT_WATCHED flag set.
This will allow all events that happen on children to be optimized
away in __fsnotify_parent() without the need to dereference
child->d_parent->d_inode->i_fsnotify_mask.

Since the dirent events are never repoted via __fsnotify_parent(),
this results in no change of logic, but only an optimization.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify_backend.h | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7639774e7475..7f195d43efaf 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -59,27 +59,33 @@
  * dnotify and inotify. */
 #define FS_EVENT_ON_CHILD	0x08000000
 
-/* This is a list of all events that may get sent to a parernt based on fs event
- * happening to inodes inside that directory */
-#define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
-				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
-				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
-				   FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM | \
-				   FS_OPEN_EXEC | FS_OPEN_EXEC_PERM)
-
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
+/*
+ * Directory entry modification events - reported only to directory
+ * where entry is modified and not to a watching parent.
+ * The watching parent may get an FS_ATTRIB|FS_EVENT_ON_CHILD event
+ * when a directory entry inside a child subdir changes.
+ */
+#define ALL_FSNOTIFY_DIRENT_EVENTS	(FS_CREATE | FS_DELETE | FS_MOVE)
+
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
 				  FS_OPEN_EXEC_PERM)
 
+/*
+ * This is a list of all events that may get sent to a parent based on fs event
+ * happening to inodes inside that directory.
+ */
+#define FS_EVENTS_POSS_ON_CHILD   (ALL_FSNOTIFY_PERM_EVENTS | \
+				   FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
+				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | \
+				   FS_OPEN | FS_OPEN_EXEC)
+
 /* Events that can be reported to backends */
-#define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
-			     FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \
-			     FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \
-			     FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \
-			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
-			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME | \
-			     FS_OPEN_EXEC | FS_OPEN_EXEC_PERM)
+#define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
+			     FS_EVENTS_POSS_ON_CHILD | \
+			     FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
+			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED)
 
 /* Extra flags that may be reported with event or control handling of events */
 #define ALL_FSNOTIFY_FLAGS  (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
-- 
cgit v1.2.3


From a0a92d261f2922f4b5d2c0a98d6c41a89c7f5edd Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:31 +0200
Subject: fsnotify: move mask out of struct fsnotify_event

Common fsnotify_event helpers have no need for the mask field.
It is only used by backend code, so move the field out of the
abstract fsnotify_event struct and into the concrete backend
event structs.

This change packs struct inotify_event_info better on 64bit
machine and will allow us to cram some more fields into
struct fanotify_event_info.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c        | 11 +++++++----
 fs/notify/fanotify/fanotify.h        |  1 +
 fs/notify/fanotify/fanotify_user.c   | 10 +++++-----
 fs/notify/inotify/inotify.h          |  1 +
 fs/notify/inotify/inotify_fsnotify.c |  9 +++++----
 fs/notify/inotify/inotify_user.c     |  5 +++--
 fs/notify/notification.c             | 22 +---------------------
 include/linux/fsnotify_backend.h     | 10 ++++++----
 8 files changed, 29 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 3723f3d18d20..98197802bbfb 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -36,20 +36,22 @@ static bool should_merge(struct fsnotify_event *old_fsn,
 static int fanotify_merge(struct list_head *list, struct fsnotify_event *event)
 {
 	struct fsnotify_event *test_event;
+	struct fanotify_event_info *new;
 
 	pr_debug("%s: list=%p event=%p\n", __func__, list, event);
+	new = FANOTIFY_E(event);
 
 	/*
 	 * Don't merge a permission event with any other event so that we know
 	 * the event structure we have created in fanotify_handle_event() is the
 	 * one we should check for permission response.
 	 */
-	if (fanotify_is_perm_event(event->mask))
+	if (fanotify_is_perm_event(new->mask))
 		return 0;
 
 	list_for_each_entry_reverse(test_event, list, list) {
 		if (should_merge(test_event, event)) {
-			test_event->mask |= event->mask;
+			FANOTIFY_E(test_event)->mask |= new->mask;
 			return 1;
 		}
 	}
@@ -173,7 +175,8 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 	if (!event)
 		goto out;
 init: __maybe_unused
-	fsnotify_init_event(&event->fse, inode, mask);
+	fsnotify_init_event(&event->fse, inode);
+	event->mask = mask;
 	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
 		event->pid = get_pid(task_pid(current));
 	else
@@ -280,7 +283,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
 	event = FANOTIFY_E(fsn_event);
 	path_put(&event->path);
 	put_pid(event->pid);
-	if (fanotify_is_perm_event(fsn_event->mask)) {
+	if (fanotify_is_perm_event(event->mask)) {
 		kmem_cache_free(fanotify_perm_event_cachep,
 				FANOTIFY_PE(fsn_event));
 		return;
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index ea05b8a401e7..e630d787d4c3 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -14,6 +14,7 @@ extern struct kmem_cache *fanotify_perm_event_cachep;
  */
 struct fanotify_event_info {
 	struct fsnotify_event fse;
+	u32 mask;
 	/*
 	 * We hold ref to this path so it may be dereferenced at any point
 	 * during this object's lifetime
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9c870b0d2b56..dea47d07cc29 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -131,9 +131,9 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->reserved = 0;
-	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
+	metadata->mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->pid);
-	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
+	if (unlikely(event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
 	else {
 		metadata->fd = create_fd(group, event, file);
@@ -230,7 +230,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 			 fanotify_event_metadata.event_len))
 		goto out_close_fd;
 
-	if (fanotify_is_perm_event(event->mask))
+	if (fanotify_is_perm_event(FANOTIFY_E(event)->mask))
 		FANOTIFY_PE(event)->fd = fd;
 
 	if (fd != FAN_NOFD)
@@ -316,7 +316,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		 * Permission events get queued to wait for response.  Other
 		 * events can be destroyed now.
 		 */
-		if (!fanotify_is_perm_event(kevent->mask)) {
+		if (!fanotify_is_perm_event(FANOTIFY_E(kevent)->mask)) {
 			fsnotify_destroy_event(group, kevent);
 		} else {
 			if (ret <= 0) {
@@ -401,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		fsn_event = fsnotify_remove_first_event(group);
-		if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) {
+		if (!(FANOTIFY_E(fsn_event)->mask & FANOTIFY_PERM_EVENTS)) {
 			spin_unlock(&group->notification_lock);
 			fsnotify_destroy_event(group, fsn_event);
 			spin_lock(&group->notification_lock);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 7e4578d35b61..74ae60305189 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -5,6 +5,7 @@
 
 struct inotify_event_info {
 	struct fsnotify_event fse;
+	u32 mask;
 	int wd;
 	u32 sync_cookie;
 	int name_len;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index f4184b4f3815..fe97299975f2 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -43,11 +43,11 @@ static bool event_compare(struct fsnotify_event *old_fsn,
 {
 	struct inotify_event_info *old, *new;
 
-	if (old_fsn->mask & FS_IN_IGNORED)
-		return false;
 	old = INOTIFY_E(old_fsn);
 	new = INOTIFY_E(new_fsn);
-	if ((old_fsn->mask == new_fsn->mask) &&
+	if (old->mask & FS_IN_IGNORED)
+		return false;
+	if ((old->mask == new->mask) &&
 	    (old_fsn->inode == new_fsn->inode) &&
 	    (old->name_len == new->name_len) &&
 	    (!old->name_len || !strcmp(old->name, new->name)))
@@ -114,7 +114,8 @@ int inotify_handle_event(struct fsnotify_group *group,
 	}
 
 	fsn_event = &event->fse;
-	fsnotify_init_event(fsn_event, inode, mask);
+	fsnotify_init_event(fsn_event, inode);
+	event->mask = mask;
 	event->wd = i_mark->wd;
 	event->sync_cookie = cookie;
 	event->name_len = len;
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 798f1253141a..e2901fbb9f76 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -189,7 +189,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	 */
 	pad_name_len = round_event_name_len(fsn_event);
 	inotify_event.len = pad_name_len;
-	inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
+	inotify_event.mask = inotify_mask_to_arg(event->mask);
 	inotify_event.wd = event->wd;
 	inotify_event.cookie = event->sync_cookie;
 
@@ -634,7 +634,8 @@ static struct fsnotify_group *inotify_new_group(unsigned int max_events)
 		return ERR_PTR(-ENOMEM);
 	}
 	group->overflow_event = &oevent->fse;
-	fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW);
+	fsnotify_init_event(group->overflow_event, NULL);
+	oevent->mask = FS_Q_OVERFLOW;
 	oevent->wd = -1;
 	oevent->sync_cookie = 0;
 	oevent->name_len = 0;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 3c3e36745f59..027d5d5bb90e 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -71,7 +71,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 			    struct fsnotify_event *event)
 {
 	/* Overflow events are per-group and we don't want to free them */
-	if (!event || event->mask == FS_Q_OVERFLOW)
+	if (!event || event == group->overflow_event)
 		return;
 	/*
 	 * If the event is still queued, we have a problem... Do an unreliable
@@ -194,23 +194,3 @@ void fsnotify_flush_notify(struct fsnotify_group *group)
 	}
 	spin_unlock(&group->notification_lock);
 }
-
-/*
- * fsnotify_create_event - Allocate a new event which will be sent to each
- * group's handle_event function if the group was interested in this
- * particular event.
- *
- * @inode the inode which is supposed to receive the event (sometimes a
- *	parent of the inode to which the event happened.
- * @mask what actually happened.
- * @data pointer to the object which was actually affected
- * @data_type flag indication if the data is a file, path, inode, nothing...
- * @name the filename, if available
- */
-void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
-			 u32 mask)
-{
-	INIT_LIST_HEAD(&event->list);
-	event->inode = inode;
-	event->mask = mask;
-}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7f195d43efaf..1e4b88bd1443 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -135,7 +135,6 @@ struct fsnotify_event {
 	struct list_head list;
 	/* inode may ONLY be dereferenced during handle_event(). */
 	struct inode *inode;	/* either the inode the event happened to or its parent */
-	u32 mask;		/* the type of access, bitwise OR for FS_* event types */
 };
 
 /*
@@ -485,9 +484,12 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
 extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
 extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
 
-/* put here because inotify does some weird stuff when destroying watches */
-extern void fsnotify_init_event(struct fsnotify_event *event,
-				struct inode *to_tell, u32 mask);
+static inline void fsnotify_init_event(struct fsnotify_event *event,
+				       struct inode *inode)
+{
+	INIT_LIST_HEAD(&event->list);
+	event->inode = inode;
+}
 
 #else
 
-- 
cgit v1.2.3


From d6cd33ad71029a3f77ba1686caf55d4dea58d916 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 29 Jan 2019 11:31:52 +0100
Subject: regulator: gpio: Convert to use descriptors

This converts the GPIO regulator driver to use decriptors only.

We have to let go of the array gpio handling: the fetched descriptors
are handled individually anyway, and the array retrieveal function
does not make it possible to retrieve each GPIO descriptor with
unique flags. Instead get them one by one.

We request the "enable" GPIO separately as before, and make sure
that this line is requested as nonexclusive since enable lines can
be shared and the regulator core expects this.

Most users of the GPIO regulator are using device tree.

There are two boards in the kernel using the gpio regulator from a
non-devicetree path: PXA hx4700 and magician. Make sure to switch
these over to use descriptors as well.

Cc: Philipp Zabel <p.zabel@pengutronix.de> # Magician
Cc: Petr Cvek <petr.cvek@tul.cz> # Magician
Cc: Robert Jarzmik <robert.jarzmik@free.fr> # PXA
Cc: Paul Parsons <lost.distance@yahoo.com> # hx4700
Cc: Kevin Hilman <khilman@baylibre.com> # Meson
Cc: Neil Armstrong <narmstrong@baylibre.com> # Meson
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-pxa/hx4700.c               |  23 +++--
 arch/arm/mach-pxa/magician.c             |  23 +++--
 drivers/regulator/gpio-regulator.c       | 150 ++++++++++++-------------------
 include/linux/regulator/gpio-regulator.h |  12 +--
 4 files changed, 95 insertions(+), 113 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c
index b79b757fdd41..51d38d5e776a 100644
--- a/arch/arm/mach-pxa/hx4700.c
+++ b/arch/arm/mach-pxa/hx4700.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/fb.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
@@ -702,9 +703,7 @@ static struct regulator_init_data bq24022_init_data = {
 	.consumer_supplies      = bq24022_consumers,
 };
 
-static struct gpio bq24022_gpios[] = {
-	{ GPIO96_HX4700_BQ24022_ISET2, GPIOF_OUT_INIT_LOW, "bq24022_iset2" },
-};
+static enum gpiod_flags bq24022_gpiod_gflags[] = { GPIOD_OUT_LOW };
 
 static struct gpio_regulator_state bq24022_states[] = {
 	{ .value = 100000, .gpios = (0 << 0) },
@@ -714,12 +713,10 @@ static struct gpio_regulator_state bq24022_states[] = {
 static struct gpio_regulator_config bq24022_info = {
 	.supply_name = "bq24022",
 
-	.enable_gpio = GPIO72_HX4700_BQ24022_nCHARGE_EN,
-	.enable_high = 0,
 	.enabled_at_boot = 0,
 
-	.gpios = bq24022_gpios,
-	.nr_gpios = ARRAY_SIZE(bq24022_gpios),
+	.gflags = bq24022_gpiod_gflags,
+	.ngpios = ARRAY_SIZE(bq24022_gpiod_gflags),
 
 	.states = bq24022_states,
 	.nr_states = ARRAY_SIZE(bq24022_states),
@@ -736,6 +733,17 @@ static struct platform_device bq24022 = {
 	},
 };
 
+static struct gpiod_lookup_table bq24022_gpiod_table = {
+	.dev_id = "gpio-regulator",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO96_HX4700_BQ24022_ISET2,
+			    NULL, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO72_HX4700_BQ24022_nCHARGE_EN,
+			    "enable", GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 /*
  * StrataFlash
  */
@@ -878,6 +886,7 @@ static void __init hx4700_init(void)
 	pxa_set_btuart_info(NULL);
 	pxa_set_stuart_info(NULL);
 
+	gpiod_add_lookup_table(&bq24022_gpiod_table);
 	platform_add_devices(devices, ARRAY_SIZE(devices));
 	pwm_add_table(hx4700_pwm_lookup, ARRAY_SIZE(hx4700_pwm_lookup));
 
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index 08b079653c3f..6538a7c0e504 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -645,9 +645,8 @@ static struct regulator_init_data bq24022_init_data = {
 	.consumer_supplies	= bq24022_consumers,
 };
 
-static struct gpio bq24022_gpios[] = {
-	{ EGPIO_MAGICIAN_BQ24022_ISET2, GPIOF_OUT_INIT_LOW, "bq24022_iset2" },
-};
+
+static enum gpiod_flags bq24022_gpiod_gflags[] = { GPIOD_OUT_LOW };
 
 static struct gpio_regulator_state bq24022_states[] = {
 	{ .value = 100000, .gpios = (0 << 0) },
@@ -657,12 +656,10 @@ static struct gpio_regulator_state bq24022_states[] = {
 static struct gpio_regulator_config bq24022_info = {
 	.supply_name		= "bq24022",
 
-	.enable_gpio		= GPIO30_MAGICIAN_BQ24022_nCHARGE_EN,
-	.enable_high		= 0,
 	.enabled_at_boot	= 1,
 
-	.gpios			= bq24022_gpios,
-	.nr_gpios		= ARRAY_SIZE(bq24022_gpios),
+	.gflags = bq24022_gpiod_gflags,
+	.ngpios = ARRAY_SIZE(bq24022_gpiod_gflags),
 
 	.states			= bq24022_states,
 	.nr_states		= ARRAY_SIZE(bq24022_states),
@@ -679,6 +676,17 @@ static struct platform_device bq24022 = {
 	},
 };
 
+static struct gpiod_lookup_table bq24022_gpiod_table = {
+	.dev_id = "gpio-regulator",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", EGPIO_MAGICIAN_BQ24022_ISET2,
+			    NULL, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("gpio-pxa", GPIO30_MAGICIAN_BQ24022_nCHARGE_EN,
+			    "enable", GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 /*
  * fixed regulator for ads7846
  */
@@ -1027,6 +1035,7 @@ static void __init magician_init(void)
 	regulator_register_always_on(0, "power", pwm_backlight_supply,
 		ARRAY_SIZE(pwm_backlight_supply), 5000000);
 
+	gpiod_add_lookup_table(&bq24022_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(devices));
 }
 
diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index b2f5ec4f658a..07fb41abd4e8 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -30,16 +30,15 @@
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/gpio-regulator.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
 struct gpio_regulator_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 
-	struct gpio *gpios;
+	struct gpio_desc **gpiods;
 	int nr_gpios;
 
 	struct gpio_regulator_state *states;
@@ -82,7 +81,7 @@ static int gpio_regulator_set_voltage(struct regulator_dev *dev,
 
 	for (ptr = 0; ptr < data->nr_gpios; ptr++) {
 		state = (target & (1 << ptr)) >> ptr;
-		gpio_set_value_cansleep(data->gpios[ptr].gpio, state);
+		gpiod_set_value_cansleep(data->gpiods[ptr], state);
 	}
 	data->state = target;
 
@@ -119,7 +118,7 @@ static int gpio_regulator_set_current_limit(struct regulator_dev *dev,
 
 	for (ptr = 0; ptr < data->nr_gpios; ptr++) {
 		state = (target & (1 << ptr)) >> ptr;
-		gpio_set_value_cansleep(data->gpios[ptr].gpio, state);
+		gpiod_set_value_cansleep(data->gpiods[ptr], state);
 	}
 	data->state = target;
 
@@ -138,7 +137,8 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np,
 {
 	struct gpio_regulator_config *config;
 	const char *regtype;
-	int proplen, gpio, i;
+	int proplen, i;
+	int ngpios;
 	int ret;
 
 	config = devm_kzalloc(dev,
@@ -153,59 +153,36 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np,
 
 	config->supply_name = config->init_data->constraints.name;
 
-	if (of_property_read_bool(np, "enable-active-high"))
-		config->enable_high = true;
-
 	if (of_property_read_bool(np, "enable-at-boot"))
 		config->enabled_at_boot = true;
 
 	of_property_read_u32(np, "startup-delay-us", &config->startup_delay);
 
-	config->enable_gpio = of_get_named_gpio(np, "enable-gpio", 0);
-	if (config->enable_gpio < 0 && config->enable_gpio != -ENOENT)
-		return ERR_PTR(config->enable_gpio);
-
-	/* Fetch GPIOs. - optional property*/
-	ret = of_gpio_count(np);
-	if ((ret < 0) && (ret != -ENOENT))
-		return ERR_PTR(ret);
-
-	if (ret > 0) {
-		config->nr_gpios = ret;
-		config->gpios = devm_kcalloc(dev,
-					config->nr_gpios, sizeof(struct gpio),
-					GFP_KERNEL);
-		if (!config->gpios)
+	/* Fetch GPIO init levels */
+	ngpios = gpiod_count(dev, NULL);
+	if (ngpios > 0) {
+		config->gflags = devm_kzalloc(dev,
+					      sizeof(enum gpiod_flags)
+					      * ngpios,
+					      GFP_KERNEL);
+		if (!config->gflags)
 			return ERR_PTR(-ENOMEM);
 
-		proplen = of_property_count_u32_elems(np, "gpios-states");
-		/* optional property */
-		if (proplen < 0)
-			proplen = 0;
+		for (i = 0; i < ngpios; i++) {
+			u32 val;
 
-		if (proplen > 0 && proplen != config->nr_gpios) {
-			dev_warn(dev, "gpios <-> gpios-states mismatch\n");
-			proplen = 0;
-		}
+			ret = of_property_read_u32_index(np, "gpios-states", i,
+							 &val);
 
-		for (i = 0; i < config->nr_gpios; i++) {
-			gpio = of_get_named_gpio(np, "gpios", i);
-			if (gpio < 0) {
-				if (gpio != -ENOENT)
-					return ERR_PTR(gpio);
-				break;
-			}
-			config->gpios[i].gpio = gpio;
-			config->gpios[i].label = config->supply_name;
-			if (proplen > 0) {
-				of_property_read_u32_index(np, "gpios-states",
-							   i, &ret);
-				if (ret)
-					config->gpios[i].flags =
-							   GPIOF_OUT_INIT_HIGH;
-			}
+			/* Default to high per specification */
+			if (ret)
+				config->gflags[i] = GPIOD_OUT_HIGH;
+			else
+				config->gflags[i] =
+					val ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
 		}
 	}
+	config->ngpios = ngpios;
 
 	/* Fetch states. */
 	proplen = of_property_count_u32_elems(np, "states");
@@ -255,7 +232,8 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	struct device_node *np = pdev->dev.of_node;
 	struct gpio_regulator_data *drvdata;
 	struct regulator_config cfg = { };
-	int ptr, ret, state;
+	enum gpiod_flags gflags;
+	int ptr, ret, state, i;
 
 	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct gpio_regulator_data),
 			       GFP_KERNEL);
@@ -275,26 +253,21 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	if (config->nr_gpios != 0) {
-		drvdata->gpios = kmemdup(config->gpios,
-					 config->nr_gpios * sizeof(struct gpio),
-					 GFP_KERNEL);
-		if (drvdata->gpios == NULL) {
-			dev_err(&pdev->dev, "Failed to allocate gpio data\n");
-			ret = -ENOMEM;
-			goto err_name;
-		}
-
-		drvdata->nr_gpios = config->nr_gpios;
-		ret = gpio_request_array(drvdata->gpios, drvdata->nr_gpios);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(&pdev->dev,
-					"Could not obtain regulator setting GPIOs: %d\n",
-					ret);
-			goto err_memgpio;
-		}
+	drvdata->gpiods = devm_kzalloc(&pdev->dev, sizeof(struct gpio_desc *),
+				       GFP_KERNEL);
+	if (!drvdata->gpiods)
+		return -ENOMEM;
+	for (i = 0; i < config->ngpios; i++) {
+		drvdata->gpiods[i] = devm_gpiod_get_index(&pdev->dev,
+							  NULL,
+							  i,
+							  config->gflags[i]);
+		if (IS_ERR(drvdata->gpiods[i]))
+			return PTR_ERR(drvdata->gpiods[i]);
+		/* This is good to know */
+		gpiod_set_consumer_name(drvdata->gpiods[i], drvdata->desc.name);
 	}
+	drvdata->nr_gpios = config->ngpios;
 
 	drvdata->states = kmemdup(config->states,
 				  config->nr_states *
@@ -303,7 +276,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	if (drvdata->states == NULL) {
 		dev_err(&pdev->dev, "Failed to allocate state data\n");
 		ret = -ENOMEM;
-		goto err_stategpio;
+		goto err_name;
 	}
 	drvdata->nr_states = config->nr_states;
 
@@ -330,7 +303,7 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	/* build initial state from gpio init data. */
 	state = 0;
 	for (ptr = 0; ptr < drvdata->nr_gpios; ptr++) {
-		if (config->gpios[ptr].flags & GPIOF_OUT_INIT_HIGH)
+		if (config->gflags[ptr] == GPIOD_OUT_HIGH)
 			state |= (1 << ptr);
 	}
 	drvdata->state = state;
@@ -340,21 +313,19 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	cfg.driver_data = drvdata;
 	cfg.of_node = np;
 
-	if (gpio_is_valid(config->enable_gpio)) {
-		cfg.ena_gpio = config->enable_gpio;
-		cfg.ena_gpio_initialized = true;
-	}
-	cfg.ena_gpio_invert = !config->enable_high;
-	if (config->enabled_at_boot) {
-		if (config->enable_high)
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH;
-		else
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW;
-	} else {
-		if (config->enable_high)
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW;
-		else
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH;
+	/*
+	 * The signal will be inverted by the GPIO core if flagged so in the
+	 * decriptor.
+	 */
+	if (config->enabled_at_boot)
+		gflags = GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE;
+	else
+		gflags = GPIOD_OUT_LOW | GPIOD_FLAGS_BIT_NONEXCLUSIVE;
+
+	cfg.ena_gpiod = gpiod_get_optional(&pdev->dev, "enable", gflags);
+	if (IS_ERR(cfg.ena_gpiod)) {
+		ret = PTR_ERR(cfg.ena_gpiod);
+		goto err_memstate;
 	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &cfg);
@@ -370,10 +341,6 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 
 err_memstate:
 	kfree(drvdata->states);
-err_stategpio:
-	gpio_free_array(drvdata->gpios, drvdata->nr_gpios);
-err_memgpio:
-	kfree(drvdata->gpios);
 err_name:
 	kfree(drvdata->desc.name);
 	return ret;
@@ -384,12 +351,7 @@ static int gpio_regulator_remove(struct platform_device *pdev)
 	struct gpio_regulator_data *drvdata = platform_get_drvdata(pdev);
 
 	regulator_unregister(drvdata->dev);
-
-	gpio_free_array(drvdata->gpios, drvdata->nr_gpios);
-
 	kfree(drvdata->states);
-	kfree(drvdata->gpios);
-
 	kfree(drvdata->desc.name);
 
 	return 0;
diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h
index 19fbd267406d..49c407afb944 100644
--- a/include/linux/regulator/gpio-regulator.h
+++ b/include/linux/regulator/gpio-regulator.h
@@ -21,6 +21,8 @@
 #ifndef __REGULATOR_GPIO_H
 #define __REGULATOR_GPIO_H
 
+#include <linux/gpio/consumer.h>
+
 struct regulator_init_data;
 
 enum regulator_type;
@@ -53,9 +55,9 @@ struct gpio_regulator_state {
  *			This is used to keep the regulator at
  *			the default state
  * @startup_delay:	Start-up time in microseconds
- * @gpios:		Array containing the gpios needed to control
- *			the setting of the regulator
- * @nr_gpios:		Number of gpios
+ * @gflags:		Array of GPIO configuration flags for initial
+ *			states
+ * @ngpios:		Number of GPIOs and configurations available
  * @states:		Array of gpio_regulator_state entries describing
  *			the gpio state for specific voltages
  * @nr_states:		Number of states available
@@ -74,8 +76,8 @@ struct gpio_regulator_config {
 	unsigned enabled_at_boot:1;
 	unsigned startup_delay;
 
-	struct gpio *gpios;
-	int nr_gpios;
+	enum gpiod_flags *gflags;
+	int ngpios;
 
 	struct gpio_regulator_state *states;
 	int nr_states;
-- 
cgit v1.2.3


From 01dc79cd6fe7d25b0eba84009634f5435cbdb4e6 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 29 Jan 2019 11:31:53 +0100
Subject: regulator: fixed/gpio: Pull inversion/OD into gpiolib

This pushes the handling of inversion semantics and open drain
settings to the GPIO descriptor and gpiolib. All affected board
files are also augmented.

This is especially nice since we don't have to have any
confusing flags passed around to the left and right littering
the fixed and GPIO regulator drivers and the regulator core.
It is all just very straight-forward: the core asks the GPIO
line to be asserted or deasserted and gpiolib deals with the
rest depending on how the platform is configured: if the line
is active low, it deals with that, if the line is open drain,
it deals with that too.

Cc: Alexander Shiyan <shc_work@mail.ru> # i.MX boards user
Cc: Haojian Zhuang <haojian.zhuang@gmail.com> # MMP2 maintainer
Cc: Aaro Koskinen <aaro.koskinen@iki.fi> # OMAP1 maintainer
Cc: Tony Lindgren <tony@atomide.com> # OMAP1,2,3 maintainer
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com> # EM-X270 maintainer
Cc: Robert Jarzmik <robert.jarzmik@free.fr> # EZX maintainer
Cc: Philipp Zabel <philipp.zabel@gmail.com> # Magician maintainer
Cc: Petr Cvek <petr.cvek@tul.cz> # Magician
Cc: Robert Jarzmik <robert.jarzmik@free.fr> # PXA
Cc: Paul Parsons <lost.distance@yahoo.com> # hx4700
Cc: Daniel Mack <zonque@gmail.com> # Raumfeld maintainer
Cc: Marc Zyngier <marc.zyngier@arm.com> # Zeus maintainer
Cc: Geert Uytterhoeven <geert+renesas@glider.be> # SuperH pinctrl/GPIO maintainer
Cc: Russell King <rmk+kernel@armlinux.org.uk> # SA1100
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Janusz Krzysztofik <jmkrzyszt@gmail.com> #OMAP1 Amstrad Delta
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-imx/mach-mx21ads.c                   |  1 -
 arch/arm/mach-imx/mach-mx27ads.c                   |  2 +-
 arch/arm/mach-mmp/brownstone.c                     |  1 -
 arch/arm/mach-omap1/board-ams-delta.c              |  2 --
 arch/arm/mach-omap2/pdata-quirks.c                 |  1 -
 arch/arm/mach-pxa/em-x270.c                        |  1 -
 arch/arm/mach-pxa/ezx.c                            |  3 +-
 arch/arm/mach-pxa/raumfeld.c                       |  1 -
 arch/arm/mach-pxa/zeus.c                           |  3 +-
 arch/arm/mach-sa1100/assabet.c                     |  1 -
 arch/sh/boards/mach-ecovec24/setup.c               |  2 --
 .../intel-mid/device_libs/platform_bcm43xx.c       |  1 -
 drivers/regulator/core.c                           |  8 ++---
 drivers/regulator/da9055-regulator.c               |  1 -
 drivers/regulator/fixed.c                          | 35 +++++-----------------
 include/linux/regulator/fixed.h                    | 10 -------
 include/linux/regulator/gpio-regulator.h           |  6 ----
 17 files changed, 13 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c
index 2e1e540f2e5a..d278fb672d40 100644
--- a/arch/arm/mach-imx/mach-mx21ads.c
+++ b/arch/arm/mach-imx/mach-mx21ads.c
@@ -205,7 +205,6 @@ static struct regulator_init_data mx21ads_lcd_regulator_init_data = {
 static struct fixed_voltage_config mx21ads_lcd_regulator_pdata = {
 	.supply_name	= "LCD",
 	.microvolts	= 3300000,
-	.enable_high	= 1,
 	.init_data	= &mx21ads_lcd_regulator_init_data,
 };
 
diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c
index f5e04047ed13..6dd7f57c332f 100644
--- a/arch/arm/mach-imx/mach-mx27ads.c
+++ b/arch/arm/mach-imx/mach-mx27ads.c
@@ -237,7 +237,7 @@ static struct fixed_voltage_config mx27ads_lcd_regulator_pdata = {
 static struct gpiod_lookup_table mx27ads_lcd_regulator_gpiod_table = {
 	.dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */
 	.table = {
-		GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_HIGH),
+		GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_LOW),
 		{ },
 	},
 };
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index a04e249c654b..d2560fb1e835 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -149,7 +149,6 @@ static struct regulator_init_data brownstone_v_5vp_data = {
 static struct fixed_voltage_config brownstone_v_5vp = {
 	.supply_name		= "v_5vp",
 	.microvolts		= 5000000,
-	.enable_high		= 1,
 	.enabled_at_boot	= 1,
 	.init_data		= &brownstone_v_5vp_data,
 };
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index c4c0a8ea11e4..be30c3c061b4 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -267,7 +267,6 @@ static struct fixed_voltage_config modem_nreset_config = {
 	.supply_name		= "modem_nreset",
 	.microvolts		= 3300000,
 	.startup_delay		= 25000,
-	.enable_high		= 1,
 	.enabled_at_boot	= 1,
 	.init_data		= &modem_nreset_data,
 };
@@ -533,7 +532,6 @@ static struct regulator_init_data keybrd_pwr_initdata = {
 static struct fixed_voltage_config keybrd_pwr_config = {
 	.supply_name		= "keybrd_pwr",
 	.microvolts		= 5000000,
-	.enable_high		= 1,
 	.init_data		= &keybrd_pwr_initdata,
 };
 
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 8a5b6ed4ec36..a2ecc5e69abb 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -330,7 +330,6 @@ static struct fixed_voltage_config pandora_vwlan = {
 	.supply_name		= "vwlan",
 	.microvolts		= 1800000, /* 1.8V */
 	.startup_delay		= 50000, /* 50ms */
-	.enable_high		= 1,
 	.init_data		= &pandora_vmmc3,
 };
 
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 32c1edeb3f14..5ba7bb7f7d51 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -976,7 +976,6 @@ static struct fixed_voltage_config camera_dummy_config = {
 	.supply_name		= "camera_vdd",
 	.input_supply		= "vcc cam",
 	.microvolts		= 2800000,
-	.enable_high		= 0,
 	.init_data		= &camera_dummy_initdata,
 };
 
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 565965e9acc7..5e110e70ce5a 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -714,7 +714,6 @@ static struct regulator_init_data camera_regulator_initdata = {
 static struct fixed_voltage_config camera_regulator_config = {
 	.supply_name		= "camera_vdd",
 	.microvolts		= 2800000,
-	.enable_high		= 0,
 	.init_data		= &camera_regulator_initdata,
 };
 
@@ -730,7 +729,7 @@ static struct gpiod_lookup_table camera_supply_gpiod_table = {
 	.dev_id = "reg-fixed-voltage.1",
 	.table = {
 		GPIO_LOOKUP("gpio-pxa", GPIO50_nCAM_EN,
-			    NULL, GPIO_ACTIVE_HIGH),
+			    NULL, GPIO_ACTIVE_LOW),
 		{ },
 	},
 };
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index e1db072756f2..e13bfc9b01d2 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -883,7 +883,6 @@ static struct regulator_init_data audio_va_initdata = {
 static struct fixed_voltage_config audio_va_config = {
 	.supply_name		= "audio_va",
 	.microvolts		= 5000000,
-	.enable_high		= 1,
 	.enabled_at_boot	= 0,
 	.init_data		= &audio_va_initdata,
 };
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index c411f79d4cb5..ebd654302387 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -426,7 +426,7 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
 	.dev_id = "reg-fixed-voltage.0",
 	.table = {
 		GPIO_LOOKUP("gpio-pxa", ZEUS_CAN_SHDN_GPIO,
-			    NULL, GPIO_ACTIVE_HIGH),
+			    NULL, GPIO_ACTIVE_LOW),
 		{ },
 	},
 };
@@ -547,7 +547,6 @@ static struct regulator_init_data zeus_ohci_regulator_data = {
 static struct fixed_voltage_config zeus_ohci_regulator_config = {
 	.supply_name		= "vbus2",
 	.microvolts		= 5000000, /* 5.0V */
-	.enable_high		= 1,
 	.startup_delay		= 0,
 	.init_data		= &zeus_ohci_regulator_data,
 };
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index dfa42496ec27..d09c3f236186 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -469,7 +469,6 @@ static struct regulator_consumer_supply assabet_cf_vcc_consumers[] = {
 static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = {
 	.supply_name = "cf-power",
 	.microvolts = 3300000,
-	.enable_high = 1,
 };
 
 static struct gpiod_lookup_table assabet_cf_vcc_gpio_table = {
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 22b4106b8084..5495efa07335 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -630,7 +630,6 @@ static struct regulator_init_data cn12_power_init_data = {
 static struct fixed_voltage_config cn12_power_info = {
 	.supply_name = "CN12 SD/MMC Vdd",
 	.microvolts = 3300000,
-	.enable_high = 1,
 	.init_data = &cn12_power_init_data,
 };
 
@@ -671,7 +670,6 @@ static struct regulator_init_data sdhi0_power_init_data = {
 static struct fixed_voltage_config sdhi0_power_info = {
 	.supply_name = "CN11 SD/MMC Vdd",
 	.microvolts = 3300000,
-	.enable_high = 1,
 	.init_data = &sdhi0_power_init_data,
 };
 
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 96f438d4b026..1421d5330b2c 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -44,7 +44,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = {
 	 */
 	.microvolts		= 2000000,		/* 1.8V */
 	.startup_delay		= 250 * 1000,		/* 250ms */
-	.enable_high		= 1,			/* active high */
 	.enabled_at_boot	= 0,			/* disabled at boot */
 	.init_data		= &bcm43xx_vmmc_data,
 };
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 430a73dea487..1778c5d1b2d0 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -82,7 +82,6 @@ struct regulator_enable_gpio {
 	struct gpio_desc *gpiod;
 	u32 enable_count;	/* a number of enabled shared GPIO */
 	u32 request_count;	/* a number of requested shared GPIO */
-	unsigned int ena_gpio_invert:1;
 };
 
 /*
@@ -2268,7 +2267,6 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev,
 	}
 
 	pin->gpiod = gpiod;
-	pin->ena_gpio_invert = config->ena_gpio_invert;
 	list_add(&pin->list, &regulator_ena_gpio_list);
 
 update_ena_gpio_to_rdev:
@@ -2319,8 +2317,7 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
 	if (enable) {
 		/* Enable GPIO at initial use */
 		if (pin->enable_count == 0)
-			gpiod_set_value_cansleep(pin->gpiod,
-						 !pin->ena_gpio_invert);
+			gpiod_set_value_cansleep(pin->gpiod, 1);
 
 		pin->enable_count++;
 	} else {
@@ -2331,8 +2328,7 @@ static int regulator_ena_gpio_ctrl(struct regulator_dev *rdev, bool enable)
 
 		/* Disable GPIO if not used */
 		if (pin->enable_count <= 1) {
-			gpiod_set_value_cansleep(pin->gpiod,
-						 pin->ena_gpio_invert);
+			gpiod_set_value_cansleep(pin->gpiod, 0);
 			pin->enable_count = 0;
 		}
 	}
diff --git a/drivers/regulator/da9055-regulator.c b/drivers/regulator/da9055-regulator.c
index 588c3d2445cf..417cafe2aba0 100644
--- a/drivers/regulator/da9055-regulator.c
+++ b/drivers/regulator/da9055-regulator.c
@@ -457,7 +457,6 @@ static int da9055_gpio_init(struct da9055_regulator *regulator,
 		int gpio_mux = pdata->gpio_ren[id];
 
 		config->ena_gpiod = pdata->ena_gpiods[id];
-		config->ena_gpio_invert = 1;
 
 		/*
 		 * GPI pin is muxed with regulator to control the
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 9abdb9130766..b5afc9db2c61 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -79,15 +79,6 @@ of_get_fixed_voltage_config(struct device *dev,
 
 	of_property_read_u32(np, "startup-delay-us", &config->startup_delay);
 
-	/*
-	 * FIXME: we pulled active low/high and open drain handling into
-	 * gpiolib so it will be handled there. Delete this in the second
-	 * step when we also remove the custom inversion handling for all
-	 * legacy boardfiles.
-	 */
-	config->enable_high = 1;
-	config->gpio_is_open_drain = 0;
-
 	if (of_find_property(np, "vin-supply", NULL))
 		config->input_supply = "vin";
 
@@ -151,24 +142,14 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 
 	drvdata->desc.fixed_uV = config->microvolts;
 
-	cfg.ena_gpio_invert = !config->enable_high;
-	if (config->enabled_at_boot) {
-		if (config->enable_high)
-			gflags = GPIOD_OUT_HIGH;
-		else
-			gflags = GPIOD_OUT_LOW;
-	} else {
-		if (config->enable_high)
-			gflags = GPIOD_OUT_LOW;
-		else
-			gflags = GPIOD_OUT_HIGH;
-	}
-	if (config->gpio_is_open_drain) {
-		if (gflags == GPIOD_OUT_HIGH)
-			gflags = GPIOD_OUT_HIGH_OPEN_DRAIN;
-		else
-			gflags = GPIOD_OUT_LOW_OPEN_DRAIN;
-	}
+	/*
+	 * The signal will be inverted by the GPIO core if flagged so in the
+	 * decriptor.
+	 */
+	if (config->enabled_at_boot)
+		gflags = GPIOD_OUT_HIGH;
+	else
+		gflags = GPIOD_OUT_LOW;
 
 	/*
 	 * Some fixed regulators share the enable line between two
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index 1a4340ed8e2b..f10140da7145 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -25,14 +25,6 @@ struct regulator_init_data;
  * @input_supply:	Name of the input regulator supply
  * @microvolts:		Output voltage of regulator
  * @startup_delay:	Start-up time in microseconds
- * @gpio_is_open_drain: Gpio pin is open drain or normal type.
- *			If it is open drain type then HIGH will be set
- *			through PULL-UP with setting gpio as input
- *			and low will be set as gpio-output with driven
- *			to low. For non-open-drain case, the gpio will
- *			will be in output and drive to low/high accordingly.
- * @enable_high:	Polarity of enable GPIO
- *			1 = Active high, 0 = Active low
  * @enabled_at_boot:	Whether regulator has been enabled at
  * 			boot or not. 1 = Yes, 0 = No
  * 			This is used to keep the regulator at
@@ -48,8 +40,6 @@ struct fixed_voltage_config {
 	const char *input_supply;
 	int microvolts;
 	unsigned startup_delay;
-	unsigned gpio_is_open_drain:1;
-	unsigned enable_high:1;
 	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
 };
diff --git a/include/linux/regulator/gpio-regulator.h b/include/linux/regulator/gpio-regulator.h
index 49c407afb944..11cd6375215d 100644
--- a/include/linux/regulator/gpio-regulator.h
+++ b/include/linux/regulator/gpio-regulator.h
@@ -46,10 +46,6 @@ struct gpio_regulator_state {
 /**
  * struct gpio_regulator_config - config structure
  * @supply_name:	Name of the regulator supply
- * @enable_gpio:	GPIO to use for enable control
- *			set to -EINVAL if not used
- * @enable_high:	Polarity of enable GPIO
- *			1 = Active high, 0 = Active low
  * @enabled_at_boot:	Whether regulator has been enabled at
  *			boot or not. 1 = Yes, 0 = No
  *			This is used to keep the regulator at
@@ -71,8 +67,6 @@ struct gpio_regulator_state {
 struct gpio_regulator_config {
 	const char *supply_name;
 
-	int enable_gpio;
-	unsigned enable_high:1;
 	unsigned enabled_at_boot:1;
 	unsigned startup_delay;
 
-- 
cgit v1.2.3


From 541d052d721506549774ab780a2709e4ff8ca79b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 29 Jan 2019 11:31:56 +0100
Subject: regulator: core: Only support passing enable GPIO descriptors

Now that we changed all providers to pass descriptors into the core
for enable GPIOs instead of a global GPIO number, delete the support
for passing GPIO numbers in, and we get a cleanup and size reduction
in the core, and from a GPIO point of view we use the modern, cleaner
interface.

Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         | 32 ++++++--------------------------
 include/linux/regulator/driver.h | 12 +-----------
 2 files changed, 7 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 1778c5d1b2d0..4fb475a2e4f2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include <linux/suspend.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/regmap.h>
@@ -2236,35 +2235,19 @@ static int regulator_ena_gpio_request(struct regulator_dev *rdev,
 {
 	struct regulator_enable_gpio *pin;
 	struct gpio_desc *gpiod;
-	int ret;
 
-	if (config->ena_gpiod)
-		gpiod = config->ena_gpiod;
-	else
-		gpiod = gpio_to_desc(config->ena_gpio);
+	gpiod = config->ena_gpiod;
 
 	list_for_each_entry(pin, &regulator_ena_gpio_list, list) {
 		if (pin->gpiod == gpiod) {
-			rdev_dbg(rdev, "GPIO %d is already used\n",
-				config->ena_gpio);
+			rdev_dbg(rdev, "GPIO is already used\n");
 			goto update_ena_gpio_to_rdev;
 		}
 	}
 
-	if (!config->ena_gpiod) {
-		ret = gpio_request_one(config->ena_gpio,
-				       GPIOF_DIR_OUT | config->ena_gpio_flags,
-				       rdev_get_name(rdev));
-		if (ret)
-			return ret;
-	}
-
 	pin = kzalloc(sizeof(struct regulator_enable_gpio), GFP_KERNEL);
-	if (pin == NULL) {
-		if (!config->ena_gpiod)
-			gpio_free(config->ena_gpio);
+	if (pin == NULL)
 		return -ENOMEM;
-	}
 
 	pin->gpiod = gpiod;
 	list_add(&pin->list, &regulator_ena_gpio_list);
@@ -2287,7 +2270,6 @@ static void regulator_ena_gpio_free(struct regulator_dev *rdev)
 		if (pin->gpiod == rdev->ena_pin->gpiod) {
 			if (pin->request_count <= 1) {
 				pin->request_count = 0;
-				gpiod_put(pin->gpiod);
 				list_del(&pin->list);
 				kfree(pin);
 				rdev->ena_pin = NULL;
@@ -4971,15 +4953,13 @@ regulator_register(const struct regulator_desc *regulator_desc,
 			goto clean;
 	}
 
-	if (config->ena_gpiod ||
-	    ((config->ena_gpio || config->ena_gpio_initialized) &&
-	     gpio_is_valid(config->ena_gpio))) {
+	if (config->ena_gpiod) {
 		mutex_lock(&regulator_list_mutex);
 		ret = regulator_ena_gpio_request(rdev, config);
 		mutex_unlock(&regulator_list_mutex);
 		if (ret != 0) {
-			rdev_err(rdev, "Failed to request enable GPIO%d: %d\n",
-				 config->ena_gpio, ret);
+			rdev_err(rdev, "Failed to request enable GPIO: %d\n",
+				 ret);
 			goto clean;
 		}
 		/* The regulator core took over the GPIO descriptor */
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 795b38a06b6c..7f8345bff4e1 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -401,13 +401,7 @@ struct regulator_desc {
  *           NULL).
  * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
  *          insufficient.
- * @ena_gpio_initialized: GPIO controlling regulator enable was properly
- *                        initialized, meaning that >= 0 is a valid gpio
- *                        identifier and < 0 is a non existent gpio.
- * @ena_gpio: GPIO controlling regulator enable.
- * @ena_gpiod: GPIO descriptor controlling regulator enable.
- * @ena_gpio_invert: Sense for GPIO enable control.
- * @ena_gpio_flags: Flags to use when calling gpio_request_one()
+ * @ena_gpiod: GPIO controlling regulator enable.
  */
 struct regulator_config {
 	struct device *dev;
@@ -416,11 +410,7 @@ struct regulator_config {
 	struct device_node *of_node;
 	struct regmap *regmap;
 
-	bool ena_gpio_initialized;
-	int ena_gpio;
 	struct gpio_desc *ena_gpiod;
-	unsigned int ena_gpio_invert:1;
-	unsigned int ena_gpio_flags;
 };
 
 /*
-- 
cgit v1.2.3


From d325c402964e7c63db94e9138c530832269a1297 Mon Sep 17 00:00:00 2001
From: Miroslav Benes <mbenes@suse.cz>
Date: Fri, 28 Dec 2018 14:38:47 +0100
Subject: ring-buffer: Remove unused function ring_buffer_page_len()

Commit 6b7e633fe9c2 ("tracing: Remove extra zeroing out of the ring
buffer page") removed the only caller of ring_buffer_page_len(). The
function is now unused and may be removed.

Link: http://lkml.kernel.org/r/20181228133847.106177-1-mbenes@suse.cz

Signed-off-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 --
 kernel/trace/ring_buffer.c  | 14 --------------
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5b9ae62272bb..f1429675f252 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -187,8 +187,6 @@ void ring_buffer_set_clock(struct ring_buffer *buffer,
 void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs);
 bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer);
 
-size_t ring_buffer_page_len(void *page);
-
 size_t ring_buffer_nr_pages(struct ring_buffer *buffer, int cpu);
 size_t ring_buffer_nr_dirty_pages(struct ring_buffer *buffer, int cpu);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 06e864a334bb..9a91479bbbfe 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -353,20 +353,6 @@ static void rb_init_page(struct buffer_data_page *bpage)
 	local_set(&bpage->commit, 0);
 }
 
-/**
- * ring_buffer_page_len - the size of data on the page.
- * @page: The page to read
- *
- * Returns the amount of data on the page, including buffer page header.
- */
-size_t ring_buffer_page_len(void *page)
-{
-	struct buffer_data_page *bpage = page;
-
-	return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
-		+ BUF_PAGE_HDR_SIZE;
-}
-
 /*
  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
  * this issue out.
-- 
cgit v1.2.3


From 7453e1dafdec076f87384c8647d2960affd57ecc Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 4 Feb 2019 14:53:04 +0100
Subject: ALSA: info: Add standard helpers for card proc file entries

Two new helper functions are added here for cleaning up the existing
lengthy calls.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/info.h | 35 +++++++++++++++++++++++++++++++++++
 sound/core/info.c    | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/sound/info.h b/include/sound/info.h
index becdf66d2825..96530f7599e1 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -160,6 +160,13 @@ static inline void snd_info_set_text_ops(struct snd_info_entry *entry,
 	entry->c.text.read = read;
 }
 
+int snd_card_rw_proc_new(struct snd_card *card, const char *name,
+			 void *private_data,
+			 void (*read)(struct snd_info_entry *,
+				      struct snd_info_buffer *),
+			 void (*write)(struct snd_info_entry *entry,
+				       struct snd_info_buffer *buffer));
+
 int snd_info_check_reserved_words(const char *str);
 
 #else
@@ -189,10 +196,38 @@ static inline int snd_card_proc_new(struct snd_card *card, const char *name,
 static inline void snd_info_set_text_ops(struct snd_info_entry *entry __attribute__((unused)),
 					 void *private_data,
 					 void (*read)(struct snd_info_entry *, struct snd_info_buffer *)) {}
+static inline int snd_card_rw_proc_new(struct snd_card *card, const char *name,
+				       void *private_data,
+				       void (*read)(struct snd_info_entry *,
+						    struct snd_info_buffer *),
+				       void (*write)(struct snd_info_entry *entry,
+						     struct snd_info_buffer *buffer))
+{
+	return 0;
+}
 static inline int snd_info_check_reserved_words(const char *str) { return 1; }
 
 #endif
 
+/**
+ * snd_card_ro_proc_new - Create a read-only text proc file entry for the card
+ * @card: the card instance
+ * @name: the file name
+ * @private_data: the arbitrary private data
+ * @read: the read callback
+ *
+ * This proc file entry will be registered via snd_card_register() call, and
+ * it will be removed automatically at the card removal, too.
+ */
+static inline int
+snd_card_ro_proc_new(struct snd_card *card, const char *name,
+		     void *private_data,
+		     void (*read)(struct snd_info_entry *,
+				  struct snd_info_buffer *))
+{
+	return snd_card_rw_proc_new(card, name, private_data, read, NULL);
+}
+
 /*
  * OSS info part
  */
diff --git a/sound/core/info.c b/sound/core/info.c
index 5cd00629c0f5..6c149fa54d2d 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -866,6 +866,38 @@ int snd_info_register(struct snd_info_entry *entry)
 }
 EXPORT_SYMBOL(snd_info_register);
 
+/**
+ * snd_card_rw_proc_new - Create a read/write text proc file entry for the card
+ * @card: the card instance
+ * @name: the file name
+ * @private_data: the arbitrary private data
+ * @read: the read callback
+ * @write: the write callback, NULL for read-only
+ *
+ * This proc file entry will be registered via snd_card_register() call, and
+ * it will be removed automatically at the card removal, too.
+ */
+int snd_card_rw_proc_new(struct snd_card *card, const char *name,
+			 void *private_data,
+			 void (*read)(struct snd_info_entry *,
+				      struct snd_info_buffer *),
+			 void (*write)(struct snd_info_entry *entry,
+				       struct snd_info_buffer *buffer))
+{
+	struct snd_info_entry *entry;
+
+	entry = snd_info_create_card_entry(card, name, card->proc_root);
+	if (!entry)
+		return -ENOMEM;
+	snd_info_set_text_ops(entry, private_data, read);
+	if (write) {
+		entry->mode |= 0200;
+		entry->c.text.write = write;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(snd_card_rw_proc_new);
+
 /*
 
  */
-- 
cgit v1.2.3


From 9725752867cb158e076bcb6bc4bdb35d9710b1bd Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 5 Feb 2019 16:10:00 +0100
Subject: ALSA: info: Drop unused snd_info_entry.card field

It's referred only in snd_card_id_read() which can receive the card
object via private_data.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/info.h | 1 -
 sound/core/info.c    | 4 +---
 sound/core/init.c    | 6 ++++--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/info.h b/include/sound/info.h
index 96530f7599e1..97fdda41e076 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -82,7 +82,6 @@ struct snd_info_entry {
 		struct snd_info_entry_ops *ops;
 	} c;
 	struct snd_info_entry *parent;
-	struct snd_card *card;
 	struct module *module;
 	void *private_data;
 	void (*private_free)(struct snd_info_entry *entry);
diff --git a/sound/core/info.c b/sound/core/info.c
index 6c149fa54d2d..4d23069e7928 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -750,10 +750,8 @@ struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card,
 	if (!parent)
 		parent = card->proc_root;
 	entry = snd_info_create_entry(name, parent);
-	if (entry) {
+	if (entry)
 		entry->module = card->module;
-		entry->card = card;
-	}
 	return entry;
 }
 EXPORT_SYMBOL(snd_info_create_card_entry);
diff --git a/sound/core/init.c b/sound/core/init.c
index 4849c611c0fe..5252a9ce13dc 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -104,7 +104,9 @@ EXPORT_SYMBOL(snd_mixer_oss_notify_callback);
 static void snd_card_id_read(struct snd_info_entry *entry,
 			     struct snd_info_buffer *buffer)
 {
-	snd_iprintf(buffer, "%s\n", entry->card->id);
+	struct snd_card *card = entry->private_data;
+
+	snd_iprintf(buffer, "%s\n", card->id);
 }
 
 static int init_info_for_card(struct snd_card *card)
@@ -116,7 +118,7 @@ static int init_info_for_card(struct snd_card *card)
 		dev_dbg(card->dev, "unable to create card entry\n");
 		return -ENOMEM;
 	}
-	entry->c.text.read = snd_card_id_read;
+	snd_info_set_text_ops(entry, card, snd_card_id_read);
 	card->proc_id = entry;
 
 	return snd_info_card_register(card);
-- 
cgit v1.2.3


From 29b2625ff605394ecd0b078e0cb67a151bb4d80c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 5 Feb 2019 16:26:06 +0100
Subject: ALSA: info: Move card id proc creation into info.c

The creation of card's id proc file can be moved gracefully into
info.c.  Also, the assignment of card->proc_id is superfluous and can
be dropped.  So let's do it.

Basically this is no functional change but code refactoring, but one
potential behavior change is that now it returns properly the error
code from snd_info_card_register(), which is a good thing (tm).

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h |  1 -
 sound/core/info.c    | 11 ++++++++++-
 sound/core/init.c    | 33 ++++-----------------------------
 3 files changed, 14 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 36a5934cf4b1..e923c23e05dd 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -120,7 +120,6 @@ struct snd_card {
 	struct list_head ctl_files;	/* active control files */
 
 	struct snd_info_entry *proc_root;	/* root for soundcard specific files */
-	struct snd_info_entry *proc_id;	/* the card id */
 	struct proc_dir_entry *proc_root_link;	/* number link to real id */
 
 	struct list_head files_list;	/* all files associated to this card */
diff --git a/sound/core/info.c b/sound/core/info.c
index 7a4e733172ee..96a074019c33 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -504,6 +504,14 @@ int __exit snd_info_done(void)
 	return 0;
 }
 
+static void snd_card_id_read(struct snd_info_entry *entry,
+			     struct snd_info_buffer *buffer)
+{
+	struct snd_card *card = entry->private_data;
+
+	snd_iprintf(buffer, "%s\n", card->id);
+}
+
 /*
  * create a card proc file
  * called from init.c
@@ -521,7 +529,8 @@ int snd_info_card_create(struct snd_card *card)
 	if (!entry)
 		return -ENOMEM;
 	card->proc_root = entry;
-	return 0;
+
+	return snd_card_ro_proc_new(card, "id", card, snd_card_id_read);
 }
 
 /*
diff --git a/sound/core/init.c b/sound/core/init.c
index 5252a9ce13dc..0c4dc40376a7 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -100,33 +100,6 @@ int (*snd_mixer_oss_notify_callback)(struct snd_card *card, int free_flag);
 EXPORT_SYMBOL(snd_mixer_oss_notify_callback);
 #endif
 
-#ifdef CONFIG_SND_PROC_FS
-static void snd_card_id_read(struct snd_info_entry *entry,
-			     struct snd_info_buffer *buffer)
-{
-	struct snd_card *card = entry->private_data;
-
-	snd_iprintf(buffer, "%s\n", card->id);
-}
-
-static int init_info_for_card(struct snd_card *card)
-{
-	struct snd_info_entry *entry;
-
-	entry = snd_info_create_card_entry(card, "id", card->proc_root);
-	if (!entry) {
-		dev_dbg(card->dev, "unable to create card entry\n");
-		return -ENOMEM;
-	}
-	snd_info_set_text_ops(entry, card, snd_card_id_read);
-	card->proc_id = entry;
-
-	return snd_info_card_register(card);
-}
-#else /* !CONFIG_SND_PROC_FS */
-#define init_info_for_card(card)
-#endif
-
 static int check_empty_slot(struct module *module, int slot)
 {
 	return !slots[slot] || !*slots[slot];
@@ -493,7 +466,6 @@ static int snd_card_do_free(struct snd_card *card)
 	snd_device_free_all(card);
 	if (card->private_free)
 		card->private_free(card);
-	snd_info_free_entry(card->proc_id);
 	if (snd_info_card_free(card) < 0) {
 		dev_warn(card->dev, "unable to free card info\n");
 		/* Not fatal error */
@@ -797,7 +769,10 @@ int snd_card_register(struct snd_card *card)
 	}
 	snd_cards[card->number] = card;
 	mutex_unlock(&snd_card_mutex);
-	init_info_for_card(card);
+	err = snd_info_card_register(card);
+	if (err < 0)
+		return err;
+
 #if IS_ENABLED(CONFIG_SND_MIXER_OSS)
 	if (snd_mixer_oss_notify_callback)
 		snd_mixer_oss_notify_callback(card, SND_MIXER_OSS_NOTIFY_REGISTER);
-- 
cgit v1.2.3


From f13d4b5f85e1c436c9bf21205509266b5a81a320 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 5 Feb 2019 10:22:28 -0600
Subject: ASoC: dapm: harden use of lookup tables

To detect potential errors, let's add:

a) build-time warnings when the table size isn't aligned with the enum
list
b) run-time warnings when the values are not initialized. This
requires an increase by one of all values to avoid the default 0.

Suggested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h |   3 +
 sound/soc/soc-dapm.c     | 158 ++++++++++++++++++++++++-----------------------
 2 files changed, 85 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 79b4ddfb8e9e..c00a0b8ade08 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -523,6 +523,9 @@ enum snd_soc_dapm_type {
 	snd_soc_dapm_asrc,		/* DSP/CODEC ASRC component */
 	snd_soc_dapm_encoder,		/* FW/SW audio encoder component */
 	snd_soc_dapm_decoder,		/* FW/SW audio decoder component */
+
+	/* Don't edit below this line */
+	SND_SOC_DAPM_TYPE_COUNT
 };
 
 enum snd_soc_dapm_subclass {
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 40e7190f533a..d31d295b540f 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -64,85 +64,85 @@ snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
 
 /* dapm power sequences - make this per codec in the future */
 static int dapm_up_seq[] = {
-	[snd_soc_dapm_pre] = 0,
-	[snd_soc_dapm_regulator_supply] = 1,
-	[snd_soc_dapm_pinctrl] = 1,
-	[snd_soc_dapm_clock_supply] = 1,
-	[snd_soc_dapm_supply] = 2,
-	[snd_soc_dapm_micbias] = 3,
-	[snd_soc_dapm_vmid] = 3,
-	[snd_soc_dapm_dai_link] = 2,
-	[snd_soc_dapm_dai_in] = 4,
-	[snd_soc_dapm_dai_out] = 4,
-	[snd_soc_dapm_aif_in] = 4,
-	[snd_soc_dapm_aif_out] = 4,
-	[snd_soc_dapm_mic] = 5,
-	[snd_soc_dapm_siggen] = 5,
-	[snd_soc_dapm_input] = 5,
-	[snd_soc_dapm_output] = 5,
-	[snd_soc_dapm_mux] = 6,
-	[snd_soc_dapm_demux] = 6,
-	[snd_soc_dapm_dac] = 7,
-	[snd_soc_dapm_switch] = 8,
-	[snd_soc_dapm_mixer] = 8,
-	[snd_soc_dapm_mixer_named_ctl] = 8,
-	[snd_soc_dapm_pga] = 9,
-	[snd_soc_dapm_buffer] = 9,
-	[snd_soc_dapm_scheduler] = 9,
-	[snd_soc_dapm_effect] = 9,
-	[snd_soc_dapm_src] = 9,
-	[snd_soc_dapm_asrc] = 9,
-	[snd_soc_dapm_encoder] = 9,
-	[snd_soc_dapm_decoder] = 9,
-	[snd_soc_dapm_adc] = 10,
-	[snd_soc_dapm_out_drv] = 11,
-	[snd_soc_dapm_hp] = 11,
-	[snd_soc_dapm_spk] = 11,
-	[snd_soc_dapm_line] = 11,
-	[snd_soc_dapm_sink] = 11,
-	[snd_soc_dapm_kcontrol] = 12,
-	[snd_soc_dapm_post] = 13,
+	[snd_soc_dapm_pre] = 1,
+	[snd_soc_dapm_regulator_supply] = 2,
+	[snd_soc_dapm_pinctrl] = 2,
+	[snd_soc_dapm_clock_supply] = 2,
+	[snd_soc_dapm_supply] = 3,
+	[snd_soc_dapm_micbias] = 4,
+	[snd_soc_dapm_vmid] = 4,
+	[snd_soc_dapm_dai_link] = 3,
+	[snd_soc_dapm_dai_in] = 5,
+	[snd_soc_dapm_dai_out] = 5,
+	[snd_soc_dapm_aif_in] = 5,
+	[snd_soc_dapm_aif_out] = 5,
+	[snd_soc_dapm_mic] = 6,
+	[snd_soc_dapm_siggen] = 6,
+	[snd_soc_dapm_input] = 6,
+	[snd_soc_dapm_output] = 6,
+	[snd_soc_dapm_mux] = 7,
+	[snd_soc_dapm_demux] = 7,
+	[snd_soc_dapm_dac] = 8,
+	[snd_soc_dapm_switch] = 9,
+	[snd_soc_dapm_mixer] = 9,
+	[snd_soc_dapm_mixer_named_ctl] = 9,
+	[snd_soc_dapm_pga] = 10,
+	[snd_soc_dapm_buffer] = 10,
+	[snd_soc_dapm_scheduler] = 10,
+	[snd_soc_dapm_effect] = 10,
+	[snd_soc_dapm_src] = 10,
+	[snd_soc_dapm_asrc] = 10,
+	[snd_soc_dapm_encoder] = 10,
+	[snd_soc_dapm_decoder] = 10,
+	[snd_soc_dapm_adc] = 11,
+	[snd_soc_dapm_out_drv] = 12,
+	[snd_soc_dapm_hp] = 12,
+	[snd_soc_dapm_spk] = 12,
+	[snd_soc_dapm_line] = 12,
+	[snd_soc_dapm_sink] = 12,
+	[snd_soc_dapm_kcontrol] = 13,
+	[snd_soc_dapm_post] = 14,
 };
 
 static int dapm_down_seq[] = {
-	[snd_soc_dapm_pre] = 0,
-	[snd_soc_dapm_kcontrol] = 1,
-	[snd_soc_dapm_adc] = 2,
-	[snd_soc_dapm_hp] = 3,
-	[snd_soc_dapm_spk] = 3,
-	[snd_soc_dapm_line] = 3,
-	[snd_soc_dapm_out_drv] = 3,
-	[snd_soc_dapm_sink] = 3,
-	[snd_soc_dapm_pga] = 4,
-	[snd_soc_dapm_buffer] = 4,
-	[snd_soc_dapm_scheduler] = 4,
-	[snd_soc_dapm_effect] = 4,
-	[snd_soc_dapm_src] = 4,
-	[snd_soc_dapm_asrc] = 4,
-	[snd_soc_dapm_encoder] = 4,
-	[snd_soc_dapm_decoder] = 4,
-	[snd_soc_dapm_switch] = 5,
-	[snd_soc_dapm_mixer_named_ctl] = 5,
-	[snd_soc_dapm_mixer] = 5,
-	[snd_soc_dapm_dac] = 6,
-	[snd_soc_dapm_mic] = 7,
-	[snd_soc_dapm_siggen] = 7,
-	[snd_soc_dapm_input] = 7,
-	[snd_soc_dapm_output] = 7,
-	[snd_soc_dapm_micbias] = 8,
-	[snd_soc_dapm_vmid] = 8,
-	[snd_soc_dapm_mux] = 9,
-	[snd_soc_dapm_demux] = 9,
-	[snd_soc_dapm_aif_in] = 10,
-	[snd_soc_dapm_aif_out] = 10,
-	[snd_soc_dapm_dai_in] = 10,
-	[snd_soc_dapm_dai_out] = 10,
-	[snd_soc_dapm_dai_link] = 11,
-	[snd_soc_dapm_supply] = 12,
-	[snd_soc_dapm_clock_supply] = 13,
-	[snd_soc_dapm_pinctrl] = 13,
-	[snd_soc_dapm_regulator_supply] = 13,
-	[snd_soc_dapm_post] = 14,
+	[snd_soc_dapm_pre] = 1,
+	[snd_soc_dapm_kcontrol] = 2,
+	[snd_soc_dapm_adc] = 3,
+	[snd_soc_dapm_hp] = 4,
+	[snd_soc_dapm_spk] = 4,
+	[snd_soc_dapm_line] = 4,
+	[snd_soc_dapm_out_drv] = 4,
+	[snd_soc_dapm_sink] = 4,
+	[snd_soc_dapm_pga] = 5,
+	[snd_soc_dapm_buffer] = 5,
+	[snd_soc_dapm_scheduler] = 5,
+	[snd_soc_dapm_effect] = 5,
+	[snd_soc_dapm_src] = 5,
+	[snd_soc_dapm_asrc] = 5,
+	[snd_soc_dapm_encoder] = 5,
+	[snd_soc_dapm_decoder] = 5,
+	[snd_soc_dapm_switch] = 6,
+	[snd_soc_dapm_mixer_named_ctl] = 6,
+	[snd_soc_dapm_mixer] = 6,
+	[snd_soc_dapm_dac] = 7,
+	[snd_soc_dapm_mic] = 8,
+	[snd_soc_dapm_siggen] = 8,
+	[snd_soc_dapm_input] = 8,
+	[snd_soc_dapm_output] = 8,
+	[snd_soc_dapm_micbias] = 9,
+	[snd_soc_dapm_vmid] = 9,
+	[snd_soc_dapm_mux] = 10,
+	[snd_soc_dapm_demux] = 10,
+	[snd_soc_dapm_aif_in] = 11,
+	[snd_soc_dapm_aif_out] = 11,
+	[snd_soc_dapm_dai_in] = 11,
+	[snd_soc_dapm_dai_out] = 11,
+	[snd_soc_dapm_dai_link] = 12,
+	[snd_soc_dapm_supply] = 13,
+	[snd_soc_dapm_clock_supply] = 14,
+	[snd_soc_dapm_pinctrl] = 14,
+	[snd_soc_dapm_regulator_supply] = 14,
+	[snd_soc_dapm_post] = 15,
 };
 
 static void dapm_assert_locked(struct snd_soc_dapm_context *dapm)
@@ -1425,11 +1425,17 @@ static int dapm_seq_compare(struct snd_soc_dapm_widget *a,
 {
 	int *sort;
 
+	BUILD_BUG_ON(ARRAY_SIZE(dapm_up_seq) != SND_SOC_DAPM_TYPE_COUNT);
+	BUILD_BUG_ON(ARRAY_SIZE(dapm_down_seq) != SND_SOC_DAPM_TYPE_COUNT);
+
 	if (power_up)
 		sort = dapm_up_seq;
 	else
 		sort = dapm_down_seq;
 
+	WARN_ONCE(sort[a->id] == 0, "offset a->id %d not initialized\n", a->id);
+	WARN_ONCE(sort[b->id] == 0, "offset b->id %d not initialized\n", b->id);
+
 	if (sort[a->id] != sort[b->id])
 		return sort[a->id] - sort[b->id];
 	if (a->subseq != b->subseq) {
-- 
cgit v1.2.3


From 1878f0dcbff0cd07f62602deb160a44d69a8f146 Mon Sep 17 00:00:00 2001
From: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Date: Wed, 6 Feb 2019 07:36:40 +0100
Subject: net: phy: provide full set of accessor functions to MMD registers

This adds full set of locked and unlocked accessor functions to read and
write PHY MMD registers and/or bitfields.

Set of functions exactly matches what is already available for PHY
legacy registers.

Signed-off-by: Nikita Yushchenko <nikita.yoush@cogentembedded.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c | 116 ++++++++++++++++++++++++++++++++++-----
 include/linux/phy.h        | 134 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 214 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 909b3344babf..7d6aad287f84 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -414,15 +414,15 @@ static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
 }
 
 /**
- * phy_read_mmd - Convenience function for reading a register
+ * __phy_read_mmd - Convenience function for reading a register
  * from an MMD on a given PHY.
  * @phydev: The phy_device struct
  * @devad: The MMD to read from (0..31)
  * @regnum: The register on the MMD to read (0..65535)
  *
- * Same rules as for phy_read();
+ * Same rules as for __phy_read();
  */
-int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
+int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 {
 	int val;
 
@@ -434,33 +434,52 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 	} else if (phydev->is_c45) {
 		u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
 
-		val = mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr);
+		val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr);
 	} else {
 		struct mii_bus *bus = phydev->mdio.bus;
 		int phy_addr = phydev->mdio.addr;
 
-		mutex_lock(&bus->mdio_lock);
 		mmd_phy_indirect(bus, phy_addr, devad, regnum);
 
 		/* Read the content of the MMD's selected register */
 		val = __mdiobus_read(bus, phy_addr, MII_MMD_DATA);
-		mutex_unlock(&bus->mdio_lock);
 	}
 	return val;
 }
+EXPORT_SYMBOL(__phy_read_mmd);
+
+/**
+ * phy_read_mmd - Convenience function for reading a register
+ * from an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from
+ * @regnum: The register on the MMD to read
+ *
+ * Same rules as for phy_read();
+ */
+int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
+{
+	int ret;
+
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	ret = __phy_read_mmd(phydev, devad, regnum);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL(phy_read_mmd);
 
 /**
- * phy_write_mmd - Convenience function for writing a register
+ * __phy_write_mmd - Convenience function for writing a register
  * on an MMD on a given PHY.
  * @phydev: The phy_device struct
  * @devad: The MMD to read from
  * @regnum: The register on the MMD to read
  * @val: value to write to @regnum
  *
- * Same rules as for phy_write();
+ * Same rules as for __phy_write();
  */
-int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
+int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
 {
 	int ret;
 
@@ -472,23 +491,43 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
 	} else if (phydev->is_c45) {
 		u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
 
-		ret = mdiobus_write(phydev->mdio.bus, phydev->mdio.addr,
-				    addr, val);
+		ret = __mdiobus_write(phydev->mdio.bus, phydev->mdio.addr,
+				      addr, val);
 	} else {
 		struct mii_bus *bus = phydev->mdio.bus;
 		int phy_addr = phydev->mdio.addr;
 
-		mutex_lock(&bus->mdio_lock);
 		mmd_phy_indirect(bus, phy_addr, devad, regnum);
 
 		/* Write the data into MMD's selected register */
 		__mdiobus_write(bus, phy_addr, MII_MMD_DATA, val);
-		mutex_unlock(&bus->mdio_lock);
 
 		ret = 0;
 	}
 	return ret;
 }
+EXPORT_SYMBOL(__phy_write_mmd);
+
+/**
+ * phy_write_mmd - Convenience function for writing a register
+ * on an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from
+ * @regnum: The register on the MMD to read
+ * @val: value to write to @regnum
+ *
+ * Same rules as for phy_write();
+ */
+int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
+{
+	int ret;
+
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	ret = __phy_write_mmd(phydev, devad, regnum, val);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL(phy_write_mmd);
 
 /**
@@ -538,6 +577,57 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
 }
 EXPORT_SYMBOL_GPL(phy_modify);
 
+/**
+ * __phy_modify_mmd - Convenience function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * Unlocked helper function which allows a MMD register to be modified as
+ * new register value = (old register value & ~mask) | set
+ */
+int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+		     u16 mask, u16 set)
+{
+	int ret;
+
+	ret = __phy_read_mmd(phydev, devad, regnum);
+	if (ret < 0)
+		return ret;
+
+	ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set);
+
+	return ret < 0 ? ret : 0;
+}
+EXPORT_SYMBOL_GPL(__phy_modify_mmd);
+
+/**
+ * phy_modify_mmd - Convenience function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+		   u16 mask, u16 set)
+{
+	int ret;
+
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	ret = __phy_modify_mmd(phydev, devad, regnum, mask, set);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_modify_mmd);
+
 static int __phy_read_page(struct phy_device *phydev)
 {
 	return phydev->drv->read_page(phydev);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 70f83d0d7469..237dd035858a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -692,17 +692,6 @@ static inline bool phy_is_started(struct phy_device *phydev)
 
 void phy_resolve_aneg_linkmode(struct phy_device *phydev);
 
-/**
- * phy_read_mmd - Convenience function for reading a register
- * from an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to read from
- * @regnum: The register on the MMD to read
- *
- * Same rules as for phy_read();
- */
-int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
-
 /**
  * phy_read - Convenience function for reading a given PHY register
  * @phydev: the phy_device struct
@@ -758,9 +747,60 @@ static inline int __phy_write(struct phy_device *phydev, u32 regnum, u16 val)
 			       val);
 }
 
+/**
+ * phy_read_mmd - Convenience function for reading a register
+ * from an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from
+ * @regnum: The register on the MMD to read
+ *
+ * Same rules as for phy_read();
+ */
+int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
+
+/**
+ * __phy_read_mmd - Convenience function for reading a register
+ * from an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to read from
+ * @regnum: The register on the MMD to read
+ *
+ * Same rules as for __phy_read();
+ */
+int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
+
+/**
+ * phy_write_mmd - Convenience function for writing a register
+ * on an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to write to
+ * @regnum: The register on the MMD to read
+ * @val: value to write to @regnum
+ *
+ * Same rules as for phy_write();
+ */
+int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
+
+/**
+ * __phy_write_mmd - Convenience function for writing a register
+ * on an MMD on a given PHY.
+ * @phydev: The phy_device struct
+ * @devad: The MMD to write to
+ * @regnum: The register on the MMD to read
+ * @val: value to write to @regnum
+ *
+ * Same rules as for __phy_write();
+ */
+int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
+
 int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 
+int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+		u16 mask, u16 set);
+int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+		u16 mask, u16 set);
+
 /**
  * __phy_set_bits - Convenience function for setting bits in a PHY register
  * @phydev: the phy_device struct
@@ -810,6 +850,66 @@ static inline int phy_clear_bits(struct phy_device *phydev, u32 regnum, u16 val)
 	return phy_modify(phydev, regnum, val, 0);
 }
 
+/**
+ * __phy_set_bits_mmd - Convenience function for setting bits in a register
+ * on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @val: bits to set
+ *
+ * The caller must have taken the MDIO bus lock.
+ */
+static inline int __phy_set_bits_mmd(struct phy_device *phydev, int devad,
+		u32 regnum, u16 val)
+{
+	return __phy_modify_mmd(phydev, devad, regnum, 0, val);
+}
+
+/**
+ * __phy_clear_bits_mmd - Convenience function for clearing bits in a register
+ * on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @val: bits to clear
+ *
+ * The caller must have taken the MDIO bus lock.
+ */
+static inline int __phy_clear_bits_mmd(struct phy_device *phydev, int devad,
+		u32 regnum, u16 val)
+{
+	return __phy_modify_mmd(phydev, devad, regnum, val, 0);
+}
+
+/**
+ * phy_set_bits_mmd - Convenience function for setting bits in a register
+ * on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @val: bits to set
+ */
+static inline int phy_set_bits_mmd(struct phy_device *phydev, int devad,
+		u32 regnum, u16 val)
+{
+	return phy_modify_mmd(phydev, devad, regnum, 0, val);
+}
+
+/**
+ * phy_clear_bits_mmd - Convenience function for clearing bits in a register
+ * on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @val: bits to clear
+ */
+static inline int phy_clear_bits_mmd(struct phy_device *phydev, int devad,
+		u32 regnum, u16 val)
+{
+	return phy_modify_mmd(phydev, devad, regnum, val, 0);
+}
+
 /**
  * phy_interrupt_is_valid - Convenience function for testing a given PHY irq
  * @phydev: the phy_device struct
@@ -886,18 +986,6 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev)
 	return phydev->is_pseudo_fixed_link;
 }
 
-/**
- * phy_write_mmd - Convenience function for writing a register
- * on an MMD on a given PHY.
- * @phydev: The phy_device struct
- * @devad: The MMD to read from
- * @regnum: The register on the MMD to read
- * @val: value to write to @regnum
- *
- * Same rules as for phy_write();
- */
-int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
-
 int phy_save_page(struct phy_device *phydev);
 int phy_select_page(struct phy_device *phydev, int page);
 int phy_restore_page(struct phy_device *phydev, int oldpage, int ret);
-- 
cgit v1.2.3


From fd9dc93e36231fb6d520e0edd467058fad4fd12d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 6 Feb 2019 13:07:11 -0500
Subject: XArray: Change xa_insert to return -EBUSY

Userspace translates EEXIST to "File exists" which isn't a very good
error message for the problem.  "Device or resource busy" is a better
indication of what went wrong.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst | 2 +-
 fs/nilfs2/btnode.c                | 2 +-
 include/linux/xarray.h            | 6 +++---
 lib/test_xarray.c                 | 4 ++--
 lib/xarray.c                      | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 5d54b27c6eba..42bb1a62650f 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -85,7 +85,7 @@ which was at that index; if it returns the same entry which was passed as
 
 If you want to only store a new entry to an index if the current entry
 at that index is ``NULL``, you can use :c:func:`xa_insert` which
-returns ``-EEXIST`` if the entry is not empty.
+returns ``-EBUSY`` if the entry is not empty.
 
 You can enquire whether a mark is set on an entry by using
 :c:func:`xa_get_mark`.  If the entry is not ``NULL``, you can set a mark
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index f2129a5d9f23..4391fd3abd8f 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -189,7 +189,7 @@ retry:
 		 */
 		if (!err)
 			return 0;
-		else if (err != -EEXIST)
+		else if (err != -EBUSY)
 			goto failed_unlock;
 
 		err = invalidate_inode_pages2_range(btnc, newkey, newkey);
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index e11841537631..57cf35c4d094 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -664,7 +664,7 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index,
  *
  * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
 static inline int xa_insert(struct xarray *xa, unsigned long index,
@@ -693,7 +693,7 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
  *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
 static inline int xa_insert_bh(struct xarray *xa, unsigned long index,
@@ -722,7 +722,7 @@ static inline int xa_insert_bh(struct xarray *xa, unsigned long index,
  *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
 static inline int xa_insert_irq(struct xarray *xa, unsigned long index,
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 671a93ee09e6..9d894e93456c 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -346,7 +346,7 @@ static noinline void check_cmpxchg(struct xarray *xa)
 
 	XA_BUG_ON(xa, !xa_empty(xa));
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
-	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EBUSY);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
@@ -388,7 +388,7 @@ static noinline void check_reserve(struct xarray *xa)
 	/* But xa_insert does not */
 	xa_reserve(xa, 12345678, GFP_KERNEL);
 	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) !=
-			-EEXIST);
+			-EBUSY);
 	XA_BUG_ON(xa, xa_empty(xa));
 	XA_BUG_ON(xa, xa_erase(xa, 12345678) != NULL);
 	XA_BUG_ON(xa, !xa_empty(xa));
diff --git a/lib/xarray.c b/lib/xarray.c
index fb783bf2a441..1b97ca58bd15 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1451,7 +1451,7 @@ EXPORT_SYMBOL(__xa_cmpxchg);
  *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
- * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
 int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
@@ -1471,7 +1471,7 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 			if (xa_track_free(xa))
 				xas_clear_mark(&xas, XA_FREE_MARK);
 		} else {
-			xas_set_err(&xas, -EEXIST);
+			xas_set_err(&xas, -EBUSY);
 		}
 	} while (__xas_nomem(&xas, gfp));
 
-- 
cgit v1.2.3


From 3ccaf57a6a63ad171a951dcaddffc453b2414c7b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 14:43:22 -0400
Subject: XArray: Add support for 1s-based allocation

A lot of places want to allocate IDs starting at 1 instead of 0.
While the xa_alloc() API supports this, it's not very efficient if lots
of IDs are allocated, due to having to walk down to the bottom of the
tree to see if ID 1 is available, then all the way over to the next
non-allocated ID.  This method marks ID 0 as being occupied which wastes
one slot in the XArray, but preserves xa_empty() as working.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst | 10 +++--
 include/linux/xarray.h            | 14 ++++++-
 lib/test_xarray.c                 | 88 ++++++++++++++++++++++++---------------
 lib/xarray.c                      | 11 +++++
 4 files changed, 86 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 42bb1a62650f..e90c4925cd37 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -131,17 +131,21 @@ If you use :c:func:`DEFINE_XARRAY_ALLOC` to define the XArray, or
 initialise it by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
 the XArray changes to track whether entries are in use or not.
 
-You can call :c:func:`xa_alloc` to store the entry at any unused index
+You can call :c:func:`xa_alloc` to store the entry at an unused index
 in the XArray.  If you need to modify the array from interrupt context,
 you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
 interrupts while allocating the ID.
 
-Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert`
-will mark the entry as being allocated.  Unlike a normal XArray, storing
+Using :c:func:`xa_store`, :c:func:`xa_cmpxchg` or :c:func:`xa_insert` will
+also mark the entry as being allocated.  Unlike a normal XArray, storing
 ``NULL`` will mark the entry as being in use, like :c:func:`xa_reserve`.
 To free an entry, use :c:func:`xa_erase` (or :c:func:`xa_release` if
 you only want to free the entry if it's ``NULL``).
 
+By default, the lowest free entry is allocated starting from 0.  If you
+want to allocate entries starting at 1, it is more efficient to use
+:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``.
+
 You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
 is used to track whether an entry is free or not.  The other marks are
 available for your use.
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 57cf35c4d094..99dd0838b4ba 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -220,10 +220,13 @@ enum xa_lock_type {
 #define XA_FLAGS_LOCK_IRQ	((__force gfp_t)XA_LOCK_IRQ)
 #define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
 #define XA_FLAGS_TRACK_FREE	((__force gfp_t)4U)
+#define XA_FLAGS_ZERO_BUSY	((__force gfp_t)8U)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
+/* ALLOC is for a normal 0-based alloc.  ALLOC1 is for an 1-based alloc */
 #define XA_FLAGS_ALLOC	(XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK))
+#define XA_FLAGS_ALLOC1	(XA_FLAGS_TRACK_FREE | XA_FLAGS_ZERO_BUSY)
 
 /**
  * struct xarray - The anchor of the XArray.
@@ -279,7 +282,7 @@ struct xarray {
 #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
 
 /**
- * DEFINE_XARRAY_ALLOC() - Define an XArray which can allocate IDs.
+ * DEFINE_XARRAY_ALLOC() - Define an XArray which allocates IDs starting at 0.
  * @name: A string that names your XArray.
  *
  * This is intended for file scope definitions of allocating XArrays.
@@ -287,6 +290,15 @@ struct xarray {
  */
 #define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC)
 
+/**
+ * DEFINE_XARRAY_ALLOC1() - Define an XArray which allocates IDs starting at 1.
+ * @name: A string that names your XArray.
+ *
+ * This is intended for file scope definitions of allocating XArrays.
+ * See also DEFINE_XARRAY().
+ */
+#define DEFINE_XARRAY_ALLOC1(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC1)
+
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_erase(struct xarray *, unsigned long index);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 9d894e93456c..cd74f8f32abe 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -589,64 +589,86 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
-static DEFINE_XARRAY_ALLOC(xa0);
-
-static noinline void check_xa_alloc(void)
+static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 {
 	int i;
 	u32 id;
 
-	/* An empty array should assign 0 to the first alloc */
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+	/* An empty array should assign %base to the first alloc */
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
 	/* Erasing it should make the array empty again */
-	xa_erase_index(&xa0, 0);
-	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+	xa_erase_index(xa, base);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* And it should assign %base again */
+	xa_alloc_index(xa, base, GFP_KERNEL);
+
+	/* Allocating and then erasing a lot should not lose base */
+	for (i = base + 1; i < 2 * XA_CHUNK_SIZE; i++)
+		xa_alloc_index(xa, i, GFP_KERNEL);
+	for (i = base; i < 2 * XA_CHUNK_SIZE; i++)
+		xa_erase_index(xa, i);
+	xa_alloc_index(xa, base, GFP_KERNEL);
+
+	/* Destroying the array should do the same as erasing */
+	xa_destroy(xa);
 
-	/* And it should assign 0 again */
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	/* And it should assign %base again */
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	/* The next assigned ID should be 1 */
-	xa_alloc_index(&xa0, 1, GFP_KERNEL);
-	xa_erase_index(&xa0, 1);
+	/* The next assigned ID should be base+1 */
+	xa_alloc_index(xa, base + 1, GFP_KERNEL);
+	xa_erase_index(xa, base + 1);
 
 	/* Storing a value should mark it used */
-	xa_store_index(&xa0, 1, GFP_KERNEL);
-	xa_alloc_index(&xa0, 2, GFP_KERNEL);
+	xa_store_index(xa, base + 1, GFP_KERNEL);
+	xa_alloc_index(xa, base + 2, GFP_KERNEL);
 
-	/* If we then erase 0, it should be free */
-	xa_erase_index(&xa0, 0);
-	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+	/* If we then erase base, it should be free */
+	xa_erase_index(xa, base);
+	xa_alloc_index(xa, base, GFP_KERNEL);
 
-	xa_erase_index(&xa0, 1);
-	xa_erase_index(&xa0, 2);
+	xa_erase_index(xa, base + 1);
+	xa_erase_index(xa, base + 2);
 
 	for (i = 1; i < 5000; i++) {
-		xa_alloc_index(&xa0, i, GFP_KERNEL);
+		xa_alloc_index(xa, base + i, GFP_KERNEL);
 	}
 
-	xa_destroy(&xa0);
+	xa_destroy(xa);
 
+	/* Check that we fail properly at the limit of allocation */
 	id = 0xfffffffeU;
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, id != 0xfffffffeU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, id != 0xfffffffeU);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, id != 0xffffffffU);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, id != 0xffffffffU);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
 				GFP_KERNEL) != -ENOSPC);
-	XA_BUG_ON(&xa0, id != 0xffffffffU);
-	xa_destroy(&xa0);
+	XA_BUG_ON(xa, id != 0xffffffffU);
+	xa_destroy(xa);
 
 	id = 10;
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id),
 				GFP_KERNEL) != -ENOSPC);
-	XA_BUG_ON(&xa0, xa_store_index(&xa0, 3, GFP_KERNEL) != 0);
-	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, 5, xa_mk_index(id),
+	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id),
 				GFP_KERNEL) != -ENOSPC);
-	xa_erase_index(&xa0, 3);
-	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+	xa_erase_index(xa, 3);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static DEFINE_XARRAY_ALLOC(xa0);
+static DEFINE_XARRAY_ALLOC1(xa1);
+
+static noinline void check_xa_alloc(void)
+{
+	check_xa_alloc_1(&xa0, 0);
+	check_xa_alloc_1(&xa1, 1);
 }
 
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
diff --git a/lib/xarray.c b/lib/xarray.c
index 1b97ca58bd15..468fb7b7963f 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -57,6 +57,11 @@ static inline bool xa_track_free(const struct xarray *xa)
 	return xa->xa_flags & XA_FLAGS_TRACK_FREE;
 }
 
+static inline bool xa_zero_busy(const struct xarray *xa)
+{
+	return xa->xa_flags & XA_FLAGS_ZERO_BUSY;
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -432,6 +437,8 @@ static void xas_shrink(struct xa_state *xas)
 			break;
 		if (!xa_is_node(entry) && node->shift)
 			break;
+		if (xa_is_zero(entry) && xa_zero_busy(xa))
+			entry = NULL;
 		xas->xa_node = XAS_BOUNDS;
 
 		RCU_INIT_POINTER(xa->xa_head, entry);
@@ -628,6 +635,8 @@ static void *xas_create(struct xa_state *xas, bool allow_root)
 	if (xas_top(node)) {
 		entry = xa_head_locked(xa);
 		xas->xa_node = NULL;
+		if (!entry && xa_zero_busy(xa))
+			entry = XA_ZERO_ENTRY;
 		shift = xas_expand(xas, entry);
 		if (shift < 0)
 			return NULL;
@@ -1942,6 +1951,8 @@ void xa_destroy(struct xarray *xa)
 	entry = xa_head_locked(xa);
 	RCU_INIT_POINTER(xa->xa_head, NULL);
 	xas_init_marks(&xas);
+	if (xa_zero_busy(xa))
+		xa_mark_clear(xa, XA_FREE_MARK);
 	/* lockdep checks we're still holding the lock in xas_free_nodes() */
 	if (xa_is_node(entry))
 		xas_free_nodes(&xas, xa_to_node(entry));
-- 
cgit v1.2.3


From a3e4d3f97ec844de005a679585c04c5c03dfbdb6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 31 Dec 2018 10:41:01 -0500
Subject: XArray: Redesign xa_alloc API

It was too easy to forget to initialise the start index.  Add an
xa_limit data structure which can be used to pass min & max, and
define a couple of special values for common cases.  Also add some
more tests cribbed from the IDR test suite.  Change the return value
from -ENOSPC to -EBUSY to match xa_insert().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 80 +++++++++++++++++++++++++++++-----------------
 lib/test_xarray.c      | 86 ++++++++++++++++++++++++++++++++++++++++----------
 lib/xarray.c           | 29 ++++++++---------
 3 files changed, 135 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 99dd0838b4ba..883bb958e462 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -200,6 +200,27 @@ static inline int xa_err(void *entry)
 	return 0;
 }
 
+/**
+ * struct xa_limit - Represents a range of IDs.
+ * @min: The lowest ID to allocate (inclusive).
+ * @max: The maximum ID to allocate (inclusive).
+ *
+ * This structure is used either directly or via the XA_LIMIT() macro
+ * to communicate the range of IDs that are valid for allocation.
+ * Two common ranges are predefined for you:
+ *  * xa_limit_32b	- [0 - UINT_MAX]
+ *  * xa_limit_31b	- [0 - INT_MAX]
+ */
+struct xa_limit {
+	u32 max;
+	u32 min;
+};
+
+#define XA_LIMIT(_min, _max) (struct xa_limit) { .min = _min, .max = _max }
+
+#define xa_limit_32b	XA_LIMIT(0, UINT_MAX)
+#define xa_limit_31b	XA_LIMIT(0, INT_MAX)
+
 typedef unsigned __bitwise xa_mark_t;
 #define XA_MARK_0		((__force xa_mark_t)0U)
 #define XA_MARK_1		((__force xa_mark_t)1U)
@@ -476,7 +497,8 @@ void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
 int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t);
-int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
+int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry,
+		struct xa_limit, gfp_t);
 int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -753,26 +775,26 @@ static inline int xa_insert_irq(struct xarray *xa, unsigned long index,
  * xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
  * @id: Pointer to ID.
- * @max: Maximum ID to allocate (inclusive).
  * @entry: New entry.
+ * @limit: Range of ID to allocate.
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @id and @max.
- * Updates the @id pointer with the index, then stores the entry at that
- * index.  A concurrent lookup will not see an uninitialised @id.
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
  *
- * Context: Process context.  Takes and releases the xa_lock.  May sleep if
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep if
  * the @gfp flags permit.
- * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
- * there is no more space in the XArray.
+ * Return: 0 on success, -ENOMEM if memory could not be allocated or
+ * -EBUSY if there are no free entries in @limit.
  */
-static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
-		gfp_t gfp)
+static inline __must_check int xa_alloc(struct xarray *xa, u32 *id,
+		void *entry, struct xa_limit limit, gfp_t gfp)
 {
 	int err;
 
 	xa_lock(xa);
-	err = __xa_alloc(xa, id, max, entry, gfp);
+	err = __xa_alloc(xa, id, entry, limit, gfp);
 	xa_unlock(xa);
 
 	return err;
@@ -782,26 +804,26 @@ static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
  * xa_alloc_bh() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
  * @id: Pointer to ID.
- * @max: Maximum ID to allocate (inclusive).
  * @entry: New entry.
+ * @limit: Range of ID to allocate.
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @id and @max.
- * Updates the @id pointer with the index, then stores the entry at that
- * index.  A concurrent lookup will not see an uninitialised @id.
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
  *
  * Context: Any context.  Takes and releases the xa_lock while
  * disabling softirqs.  May sleep if the @gfp flags permit.
- * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
- * there is no more space in the XArray.
+ * Return: 0 on success, -ENOMEM if memory could not be allocated or
+ * -EBUSY if there are no free entries in @limit.
  */
-static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry,
-		gfp_t gfp)
+static inline int __must_check xa_alloc_bh(struct xarray *xa, u32 *id,
+		void *entry, struct xa_limit limit, gfp_t gfp)
 {
 	int err;
 
 	xa_lock_bh(xa);
-	err = __xa_alloc(xa, id, max, entry, gfp);
+	err = __xa_alloc(xa, id, entry, limit, gfp);
 	xa_unlock_bh(xa);
 
 	return err;
@@ -811,26 +833,26 @@ static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry,
  * xa_alloc_irq() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
  * @id: Pointer to ID.
- * @max: Maximum ID to allocate (inclusive).
  * @entry: New entry.
+ * @limit: Range of ID to allocate.
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @id and @max.
- * Updates the @id pointer with the index, then stores the entry at that
- * index.  A concurrent lookup will not see an uninitialised @id.
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
  *
  * Context: Process context.  Takes and releases the xa_lock while
  * disabling interrupts.  May sleep if the @gfp flags permit.
- * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
- * there is no more space in the XArray.
+ * Return: 0 on success, -ENOMEM if memory could not be allocated or
+ * -EBUSY if there are no free entries in @limit.
  */
-static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
-		gfp_t gfp)
+static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
+		void *entry, struct xa_limit limit, gfp_t gfp)
 {
 	int err;
 
 	xa_lock_irq(xa);
-	err = __xa_alloc(xa, id, max, entry, gfp);
+	err = __xa_alloc(xa, id, entry, limit, gfp);
 	xa_unlock_irq(xa);
 
 	return err;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index cd74f8f32abe..b5a6b981454d 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -40,9 +40,9 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 
 static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	u32 id = 0;
+	u32 id;
 
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(index),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(index), xa_limit_32b,
 				gfp) != 0);
 	XA_BUG_ON(xa, id != index);
 }
@@ -640,28 +640,81 @@ static noinline void check_xa_alloc_1(struct xarray *xa, unsigned int base)
 	xa_destroy(xa);
 
 	/* Check that we fail properly at the limit of allocation */
-	id = 0xfffffffeU;
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX - 1),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
 				GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, id != 0xfffffffeU);
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(UINT_MAX),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
 				GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, id != 0xffffffffU);
-	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
-	XA_BUG_ON(xa, id != 0xffffffffU);
+	id = 3;
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(0),
+				XA_LIMIT(UINT_MAX - 1, UINT_MAX),
+				GFP_KERNEL) != -EBUSY);
+	XA_BUG_ON(xa, id != 3);
 	xa_destroy(xa);
 
-	id = 10;
-	XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
+				GFP_KERNEL) != -EBUSY);
 	XA_BUG_ON(xa, xa_store_index(xa, 3, GFP_KERNEL) != 0);
-	XA_BUG_ON(xa, xa_alloc(xa, &id, 5, xa_mk_index(id),
-				GFP_KERNEL) != -ENOSPC);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_index(10), XA_LIMIT(10, 5),
+				GFP_KERNEL) != -EBUSY);
 	xa_erase_index(xa, 3);
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base)
+{
+	unsigned int i, id;
+	unsigned long index;
+	void *entry;
+
+	/* Allocate and free a NULL and check xa_empty() behaves */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, id) != NULL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Ditto, but check destroy instead of erase */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = base; i < base + 10; i++) {
+		XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b,
+					GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != i);
+	}
+
+	XA_BUG_ON(xa, xa_store(xa, 3, xa_mk_index(3), GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store(xa, 4, xa_mk_index(4), GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store(xa, 4, NULL, GFP_KERNEL) != xa_mk_index(4));
+	XA_BUG_ON(xa, xa_erase(xa, 5) != NULL);
+	XA_BUG_ON(xa, xa_alloc(xa, &id, NULL, xa_limit_32b, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 5);
+
+	xa_for_each(xa, index, entry) {
+		xa_erase_index(xa, index);
+	}
+
+	for (i = base; i < base + 9; i++) {
+		XA_BUG_ON(xa, xa_erase(xa, i) != NULL);
+		XA_BUG_ON(xa, xa_empty(xa));
+	}
+	XA_BUG_ON(xa, xa_erase(xa, 8) != NULL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_erase(xa, base + 9) != NULL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	xa_destroy(xa);
+}
+
 static DEFINE_XARRAY_ALLOC(xa0);
 static DEFINE_XARRAY_ALLOC1(xa1);
 
@@ -669,6 +722,8 @@ static noinline void check_xa_alloc(void)
 {
 	check_xa_alloc_1(&xa0, 0);
 	check_xa_alloc_1(&xa1, 1);
+	check_xa_alloc_2(&xa0, 0);
+	check_xa_alloc_2(&xa1, 1);
 }
 
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
@@ -1219,9 +1274,8 @@ static void check_align_1(struct xarray *xa, char *name)
 	void *entry;
 
 	for (i = 0; i < 8; i++) {
-		id = 0;
-		XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, name + i, GFP_KERNEL)
-				!= 0);
+		XA_BUG_ON(xa, xa_alloc(xa, &id, name + i, xa_limit_32b,
+					GFP_KERNEL) != 0);
 		XA_BUG_ON(xa, id != i);
 	}
 	xa_for_each(xa, index, entry)
diff --git a/lib/xarray.c b/lib/xarray.c
index 468fb7b7963f..c707388fb05e 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1615,23 +1615,23 @@ EXPORT_SYMBOL(xa_store_range);
  * __xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
  * @id: Pointer to ID.
- * @max: Maximum ID to allocate (inclusive).
+ * @limit: Range for allocated ID.
  * @entry: New entry.
  * @gfp: Memory allocation flags.
  *
- * Allocates an unused ID in the range specified by @id and @max.
- * Updates the @id pointer with the index, then stores the entry at that
- * index.  A concurrent lookup will not see an uninitialised @id.
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
  *
  * Context: Any context.  Expects xa_lock to be held on entry.  May
  * release and reacquire xa_lock if @gfp flags permit.
- * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
- * there is no more space in the XArray.
+ * Return: 0 on success, -ENOMEM if memory could not be allocated or
+ * -EBUSY if there are no free entries in @limit.
  */
-int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+int __xa_alloc(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, gfp_t gfp)
 {
 	XA_STATE(xas, xa, 0);
-	int err;
 
 	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return -EINVAL;
@@ -1642,18 +1642,17 @@ int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
 		entry = XA_ZERO_ENTRY;
 
 	do {
-		xas.xa_index = *id;
-		xas_find_marked(&xas, max, XA_FREE_MARK);
+		xas.xa_index = limit.min;
+		xas_find_marked(&xas, limit.max, XA_FREE_MARK);
 		if (xas.xa_node == XAS_RESTART)
-			xas_set_err(&xas, -ENOSPC);
+			xas_set_err(&xas, -EBUSY);
+		else
+			*id = xas.xa_index;
 		xas_store(&xas, entry);
 		xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
-	err = xas_error(&xas);
-	if (!err)
-		*id = xas.xa_index;
-	return err;
+	return xas_error(&xas);
 }
 EXPORT_SYMBOL(__xa_alloc);
 
-- 
cgit v1.2.3


From 2fa044e51a1f35d7b04cbde07ec513b0ba195e38 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 6 Nov 2018 14:13:35 -0500
Subject: XArray: Add cyclic allocation

This differs slightly from the IDR equivalent in five ways.

1. It can allocate up to UINT_MAX instead of being limited to INT_MAX,
   like xa_alloc().  Also like xa_alloc(), it will write to the 'id'
   pointer before placing the entry in the XArray.
2. The 'next' cursor is allocated separately from the XArray instead
   of being part of the IDR.  This saves memory for all the users which
   do not use the cyclic allocation API and suits some users better.
3. It returns -EBUSY instead of -ENOSPC.
4. It will attempt to wrap back to the minimum value on memory allocation
   failure as well as on an -EBUSY error, assuming that a user would
   rather allocate a small ID than suffer an ID allocation failure.
5. It reports whether it has wrapped, which is important to some users.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |   4 +-
 include/linux/xarray.h            | 102 ++++++++++++++++++++++++++++++++++++++
 lib/test_xarray.c                 |  53 ++++++++++++++++++++
 lib/xarray.c                      |  50 +++++++++++++++++++
 4 files changed, 208 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index e90c4925cd37..c7436da5c4ad 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -144,7 +144,9 @@ you only want to free the entry if it's ``NULL``).
 
 By default, the lowest free entry is allocated starting from 0.  If you
 want to allocate entries starting at 1, it is more efficient to use
-:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``.
+:c:func:`DEFINE_XARRAY_ALLOC1` or ``XA_FLAGS_ALLOC1``.  If you want to
+allocate IDs up to a maximum, then wrap back around to the lowest free
+ID, you can use :c:func:`xa_alloc_cyclic`.
 
 You cannot use ``XA_MARK_0`` with an allocating XArray as this mark
 is used to track whether an entry is free or not.  The other marks are
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 883bb958e462..5ed6b462e754 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -242,6 +242,7 @@ enum xa_lock_type {
 #define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
 #define XA_FLAGS_TRACK_FREE	((__force gfp_t)4U)
 #define XA_FLAGS_ZERO_BUSY	((__force gfp_t)8U)
+#define XA_FLAGS_ALLOC_WRAPPED	((__force gfp_t)16U)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
@@ -499,6 +500,8 @@ void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t);
 int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry,
 		struct xa_limit, gfp_t);
+int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry,
+		struct xa_limit, u32 *next, gfp_t);
 int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -858,6 +861,105 @@ static inline int __must_check xa_alloc_irq(struct xarray *xa, u32 *id,
 	return err;
 }
 
+/**
+ * xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @entry: New entry.
+ * @limit: Range of allocated ID.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
+ * The search for an empty entry will start at @next and will wrap
+ * around if necessary.
+ *
+ * Context: Any context.  Takes and releases the xa_lock.  May sleep if
+ * the @gfp flags permit.
+ * Return: 0 if the allocation succeeded without wrapping.  1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated or -EBUSY if there are no free entries in @limit.
+ */
+static inline int xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, u32 *next, gfp_t gfp)
+{
+	int err;
+
+	xa_lock(xa);
+	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
+	xa_unlock(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_cyclic_bh() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @entry: New entry.
+ * @limit: Range of allocated ID.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
+ * The search for an empty entry will start at @next and will wrap
+ * around if necessary.
+ *
+ * Context: Any context.  Takes and releases the xa_lock while
+ * disabling softirqs.  May sleep if the @gfp flags permit.
+ * Return: 0 if the allocation succeeded without wrapping.  1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated or -EBUSY if there are no free entries in @limit.
+ */
+static inline int xa_alloc_cyclic_bh(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, u32 *next, gfp_t gfp)
+{
+	int err;
+
+	xa_lock_bh(xa);
+	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
+	xa_unlock_bh(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_cyclic_irq() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @entry: New entry.
+ * @limit: Range of allocated ID.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
+ * The search for an empty entry will start at @next and will wrap
+ * around if necessary.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.  May sleep if the @gfp flags permit.
+ * Return: 0 if the allocation succeeded without wrapping.  1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated or -EBUSY if there are no free entries in @limit.
+ */
+static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, u32 *next, gfp_t gfp)
+{
+	int err;
+
+	xa_lock_irq(xa);
+	err = __xa_alloc_cyclic(xa, id, entry, limit, next, gfp);
+	xa_unlock_irq(xa);
+
+	return err;
+}
+
 /**
  * xa_reserve() - Reserve this index in the XArray.
  * @xa: XArray.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index b5a6b981454d..eaf53f742c72 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -715,6 +715,57 @@ static noinline void check_xa_alloc_2(struct xarray *xa, unsigned int base)
 	xa_destroy(xa);
 }
 
+static noinline void check_xa_alloc_3(struct xarray *xa, unsigned int base)
+{
+	struct xa_limit limit = XA_LIMIT(1, 0x3fff);
+	u32 next = 0;
+	unsigned int i, id;
+	unsigned long index;
+	void *entry;
+
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(1), limit,
+				&next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 1);
+
+	next = 0x3ffd;
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(0x3ffd), limit,
+				&next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != 0x3ffd);
+	xa_erase_index(xa, 0x3ffd);
+	xa_erase_index(xa, 1);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = 0x3ffe; i < 0x4003; i++) {
+		if (i < 0x4000)
+			entry = xa_mk_index(i);
+		else
+			entry = xa_mk_index(i - 0x3fff);
+		XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, entry, limit,
+					&next, GFP_KERNEL) != (id == 1));
+		XA_BUG_ON(xa, xa_mk_index(id) != entry);
+	}
+
+	/* Check wrap-around is handled correctly */
+	if (base != 0)
+		xa_erase_index(xa, base);
+	xa_erase_index(xa, base + 1);
+	next = UINT_MAX;
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(UINT_MAX),
+				xa_limit_32b, &next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != UINT_MAX);
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base),
+				xa_limit_32b, &next, GFP_KERNEL) != 1);
+	XA_BUG_ON(xa, id != base);
+	XA_BUG_ON(xa, xa_alloc_cyclic(xa, &id, xa_mk_index(base + 1),
+				xa_limit_32b, &next, GFP_KERNEL) != 0);
+	XA_BUG_ON(xa, id != base + 1);
+
+	xa_for_each(xa, index, entry)
+		xa_erase_index(xa, index);
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static DEFINE_XARRAY_ALLOC(xa0);
 static DEFINE_XARRAY_ALLOC1(xa1);
 
@@ -724,6 +775,8 @@ static noinline void check_xa_alloc(void)
 	check_xa_alloc_1(&xa1, 1);
 	check_xa_alloc_2(&xa0, 0);
 	check_xa_alloc_2(&xa1, 1);
+	check_xa_alloc_3(&xa0, 0);
+	check_xa_alloc_3(&xa1, 1);
 }
 
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
diff --git a/lib/xarray.c b/lib/xarray.c
index c707388fb05e..89e37ac50850 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1656,6 +1656,56 @@ int __xa_alloc(struct xarray *xa, u32 *id, void *entry,
 }
 EXPORT_SYMBOL(__xa_alloc);
 
+/**
+ * __xa_alloc_cyclic() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @entry: New entry.
+ * @limit: Range of allocated ID.
+ * @next: Pointer to next ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Finds an empty entry in @xa between @limit.min and @limit.max,
+ * stores the index into the @id pointer, then stores the entry at
+ * that index.  A concurrent lookup will not see an uninitialised @id.
+ * The search for an empty entry will start at @next and will wrap
+ * around if necessary.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 if the allocation succeeded without wrapping.  1 if the
+ * allocation succeeded after wrapping, -ENOMEM if memory could not be
+ * allocated or -EBUSY if there are no free entries in @limit.
+ */
+int __xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+		struct xa_limit limit, u32 *next, gfp_t gfp)
+{
+	u32 min = limit.min;
+	int ret;
+
+	limit.min = max(min, *next);
+	ret = __xa_alloc(xa, id, entry, limit, gfp);
+	if ((xa->xa_flags & XA_FLAGS_ALLOC_WRAPPED) && ret == 0) {
+		xa->xa_flags &= ~XA_FLAGS_ALLOC_WRAPPED;
+		ret = 1;
+	}
+
+	if (ret < 0 && limit.min > min) {
+		limit.min = min;
+		ret = __xa_alloc(xa, id, entry, limit, gfp);
+		if (ret == 0)
+			ret = 1;
+	}
+
+	if (ret >= 0) {
+		*next = *id + 1;
+		if (*next == 0)
+			xa->xa_flags |= XA_FLAGS_ALLOC_WRAPPED;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(__xa_alloc_cyclic);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
-- 
cgit v1.2.3


From 60b8f0ddf1a927ef02141a6610fd52575134f821 Mon Sep 17 00:00:00 2001
From: Phil Edworthy <phil.edworthy@renesas.com>
Date: Mon, 3 Dec 2018 11:13:09 +0000
Subject: clk: Add (devm_)clk_get_optional() functions

This adds clk_get_optional() and devm_clk_get_optional() functions to get
optional clocks.

They behave the same as (devm_)clk_get() except where there is no clock
producer. In this case, instead of returning -ENOENT, the function
returns NULL. This makes error checking simpler and allows
clk_prepare_enable, etc to be called on the returned reference
without additional checks.

Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Russell King <linux@armlinux.org.uk>
[sboyd@kernel.org: Document in devres.txt]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/clk/clk-devres.c              | 11 +++++++++++
 include/linux/clk.h                   | 36 +++++++++++++++++++++++++++++++++++
 3 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index b277cafce71e..83f38c0439cd 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -242,6 +242,7 @@ certainly invest a bit more effort into libata core layer).
 
 CLOCK
   devm_clk_get()
+  devm_clk_get_optional()
   devm_clk_put()
   devm_clk_hw_register()
   devm_of_clk_add_hw_provider()
diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index c9a86156ced8..daa1fc8fba53 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -29,6 +29,17 @@ struct clk *devm_clk_get(struct device *dev, const char *id)
 }
 EXPORT_SYMBOL(devm_clk_get);
 
+struct clk *devm_clk_get_optional(struct device *dev, const char *id)
+{
+	struct clk *clk = devm_clk_get(dev, id);
+
+	if (clk == ERR_PTR(-ENOENT))
+		return NULL;
+
+	return clk;
+}
+EXPORT_SYMBOL(devm_clk_get_optional);
+
 struct clk_bulk_devres {
 	struct clk_bulk_data *clks;
 	int num_clks;
diff --git a/include/linux/clk.h b/include/linux/clk.h
index a7773b5c0b9f..d8bc1a856b39 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -383,6 +383,17 @@ int __must_check devm_clk_bulk_get_all(struct device *dev,
  */
 struct clk *devm_clk_get(struct device *dev, const char *id);
 
+/**
+ * devm_clk_get_optional - lookup and obtain a managed reference to an optional
+ *			   clock producer.
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Behaves the same as devm_clk_get() except where there is no clock producer.
+ * In this case, instead of returning -ENOENT, the function returns NULL.
+ */
+struct clk *devm_clk_get_optional(struct device *dev, const char *id);
+
 /**
  * devm_get_clk_from_child - lookup and obtain a managed reference to a
  *			     clock producer from child node.
@@ -718,6 +729,12 @@ static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 	return NULL;
 }
 
+static inline struct clk *devm_clk_get_optional(struct device *dev,
+						const char *id)
+{
+	return NULL;
+}
+
 static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 						 struct clk_bulk_data *clks)
 {
@@ -862,6 +879,25 @@ static inline void clk_bulk_disable_unprepare(int num_clks,
 	clk_bulk_unprepare(num_clks, clks);
 }
 
+/**
+ * clk_get_optional - lookup and obtain a reference to an optional clock
+ *		      producer.
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Behaves the same as clk_get() except where there is no clock producer. In
+ * this case, instead of returning -ENOENT, the function returns NULL.
+ */
+static inline struct clk *clk_get_optional(struct device *dev, const char *id)
+{
+	struct clk *clk = clk_get(dev, id);
+
+	if (clk == ERR_PTR(-ENOENT))
+		return NULL;
+
+	return clk;
+}
+
 #if defined(CONFIG_OF) && defined(CONFIG_COMMON_CLK)
 struct clk *of_clk_get(struct device_node *np, int index);
 struct clk *of_clk_get_by_name(struct device_node *np, const char *name);
-- 
cgit v1.2.3


From 3eee6c7d119cd8563ad25898f94d6c1b514da548 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 7 Dec 2018 13:09:39 +0200
Subject: clkdev: add managed clkdev lookup registration

Clkdev registration lacks of managed registration functions and it
seems few drivers do not drop clkdev lookups at exit. Add
devm_clk_hw_register_clkdev and devm_clk_release_clkdev to ease lookup
releasing at exit.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 Documentation/driver-model/devres.txt |   1 +
 drivers/clk/clkdev.c                  | 111 +++++++++++++++++++++++++++-------
 include/linux/clkdev.h                |   4 ++
 3 files changed, 93 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index b277cafce71e..805b7bf5d98f 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -245,6 +245,7 @@ CLOCK
   devm_clk_put()
   devm_clk_hw_register()
   devm_of_clk_add_hw_provider()
+  devm_clk_hw_register_clkdev()
 
 DMA
   dmaenginem_async_device_register()
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 9ab3db8b3988..4621f8a91fc0 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -401,6 +401,23 @@ static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw,
 	return cl;
 }
 
+static int do_clk_register_clkdev(struct clk_hw *hw,
+	struct clk_lookup **cl, const char *con_id, const char *dev_id)
+{
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	/*
+	 * Since dev_id can be NULL, and NULL is handled specially, we must
+	 * pass it as either a NULL format string, or with "%s".
+	 */
+	if (dev_id)
+		*cl = __clk_register_clkdev(hw, con_id, "%s", dev_id);
+	else
+		*cl = __clk_register_clkdev(hw, con_id, NULL);
+
+	return *cl ? 0 : -ENOMEM;
+}
+
 /**
  * clk_register_clkdev - register one clock lookup for a struct clk
  * @clk: struct clk to associate with all clk_lookups
@@ -423,17 +440,8 @@ int clk_register_clkdev(struct clk *clk, const char *con_id,
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	/*
-	 * Since dev_id can be NULL, and NULL is handled specially, we must
-	 * pass it as either a NULL format string, or with "%s".
-	 */
-	if (dev_id)
-		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, "%s",
-					   dev_id);
-	else
-		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, NULL);
-
-	return cl ? 0 : -ENOMEM;
+	return do_clk_register_clkdev(__clk_get_hw(clk), &cl, con_id,
+					      dev_id);
 }
 EXPORT_SYMBOL(clk_register_clkdev);
 
@@ -456,18 +464,75 @@ int clk_hw_register_clkdev(struct clk_hw *hw, const char *con_id,
 {
 	struct clk_lookup *cl;
 
-	if (IS_ERR(hw))
-		return PTR_ERR(hw);
+	return do_clk_register_clkdev(hw, &cl, con_id, dev_id);
+}
+EXPORT_SYMBOL(clk_hw_register_clkdev);
 
-	/*
-	 * Since dev_id can be NULL, and NULL is handled specially, we must
-	 * pass it as either a NULL format string, or with "%s".
-	 */
-	if (dev_id)
-		cl = __clk_register_clkdev(hw, con_id, "%s", dev_id);
-	else
-		cl = __clk_register_clkdev(hw, con_id, NULL);
+static void devm_clkdev_release(struct device *dev, void *res)
+{
+	clkdev_drop(*(struct clk_lookup **)res);
+}
 
-	return cl ? 0 : -ENOMEM;
+static int devm_clk_match_clkdev(struct device *dev, void *res, void *data)
+{
+	struct clk_lookup **l = res;
+
+	return *l == data;
 }
-EXPORT_SYMBOL(clk_hw_register_clkdev);
+
+/**
+ * devm_clk_release_clkdev - Resource managed clkdev lookup release
+ * @dev: device this lookup is bound
+ * @con_id: connection ID string on device
+ * @dev_id: format string describing device name
+ *
+ * Drop the clkdev lookup created with devm_clk_hw_register_clkdev.
+ * Normally this function will not need to be called and the resource
+ * management code will ensure that the resource is freed.
+ */
+void devm_clk_release_clkdev(struct device *dev, const char *con_id,
+			     const char *dev_id)
+{
+	struct clk_lookup *cl;
+	int rval;
+
+	cl = clk_find(dev_id, con_id);
+	WARN_ON(!cl);
+	rval = devres_release(dev, devm_clkdev_release,
+			      devm_clk_match_clkdev, cl);
+	WARN_ON(rval);
+}
+EXPORT_SYMBOL(devm_clk_release_clkdev);
+
+/**
+ * devm_clk_hw_register_clkdev - managed clk lookup registration for clk_hw
+ * @dev: device this lookup is bound
+ * @hw: struct clk_hw to associate with all clk_lookups
+ * @con_id: connection ID string on device
+ * @dev_id: format string describing device name
+ *
+ * con_id or dev_id may be NULL as a wildcard, just as in the rest of
+ * clkdev.
+ *
+ * To make things easier for mass registration, we detect error clk_hws
+ * from a previous clk_hw_register_*() call, and return the error code for
+ * those.  This is to permit this function to be called immediately
+ * after clk_hw_register_*().
+ */
+int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw,
+				const char *con_id, const char *dev_id)
+{
+	int rval = -ENOMEM;
+	struct clk_lookup **cl;
+
+	cl = devres_alloc(devm_clkdev_release, sizeof(*cl), GFP_KERNEL);
+	if (cl) {
+		rval = do_clk_register_clkdev(hw, cl, con_id, dev_id);
+		if (!rval)
+			devres_add(dev, cl);
+		else
+			devres_free(cl);
+	}
+	return rval;
+}
+EXPORT_SYMBOL(devm_clk_hw_register_clkdev);
diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 4890ff033220..ccb32af5848b 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -52,4 +52,8 @@ int clk_add_alias(const char *, const char *, const char *, struct device *);
 int clk_register_clkdev(struct clk *, const char *, const char *);
 int clk_hw_register_clkdev(struct clk_hw *, const char *, const char *);
 
+int devm_clk_hw_register_clkdev(struct device *dev, struct clk_hw *hw,
+				const char *con_id, const char *dev_id);
+void devm_clk_release_clkdev(struct device *dev, const char *con_id,
+			     const char *dev_id);
 #endif
-- 
cgit v1.2.3


From 8f2566225ae2d62d532bb1810ed74fa4bbc5bbdb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:43 +0100
Subject: flow_offload: add flow_rule and flow_match structures and use them

This patch wraps the dissector key and mask - that flower uses to
represent the matching side - around the flow_match structure.

To avoid a follow up patch that would edit the same LoCs in the drivers,
this patch also wraps this new flow match structure around the flow rule
object. This new structure will also contain the flow actions in follow
up patches.

This introduces two new interfaces:

	bool flow_rule_match_key(rule, dissector_id)

that returns true if a given matching key is set on, and:

	flow_rule_match_XYZ(rule, &match);

To fetch the matching side XYZ into the match container structure, to
retrieve the key and the mask with one single call.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c       | 174 ++++-----
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c   | 194 ++++------
 drivers/net/ethernet/intel/i40e/i40e_main.c        | 178 ++++-----
 drivers/net/ethernet/intel/iavf/iavf_main.c        | 195 ++++------
 drivers/net/ethernet/intel/igb/igb_main.c          |  64 ++--
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |  68 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 376 ++++++++-----------
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  | 202 +++++-----
 drivers/net/ethernet/netronome/nfp/flower/action.c |  11 +-
 drivers/net/ethernet/netronome/nfp/flower/match.c  | 417 +++++++++++----------
 .../net/ethernet/netronome/nfp/flower/offload.c    | 149 +++-----
 drivers/net/ethernet/qlogic/qede/qede_filter.c     |  85 ++---
 include/net/flow_offload.h                         | 115 ++++++
 include/net/pkt_cls.h                              |  11 +-
 net/core/Makefile                                  |   2 +-
 net/core/flow_offload.c                            | 143 +++++++
 net/sched/cls_flower.c                             |  47 ++-
 17 files changed, 1209 insertions(+), 1222 deletions(-)
 create mode 100644 include/net/flow_offload.h
 create mode 100644 net/core/flow_offload.c

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index c683b5e96b1d..90a2170c5138 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -177,18 +177,12 @@ static int bnxt_tc_parse_actions(struct bnxt *bp,
 	return 0;
 }
 
-#define GET_KEY(flow_cmd, key_type)					\
-		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
-					  (flow_cmd)->key)
-#define GET_MASK(flow_cmd, key_type)					\
-		skb_flow_dissector_target((flow_cmd)->dissector, key_type,\
-					  (flow_cmd)->mask)
-
 static int bnxt_tc_parse_flow(struct bnxt *bp,
 			      struct tc_cls_flower_offload *tc_flow_cmd,
 			      struct bnxt_tc_flow *flow)
 {
-	struct flow_dissector *dissector = tc_flow_cmd->dissector;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(tc_flow_cmd);
+	struct flow_dissector *dissector = rule->match.dissector;
 
 	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
 	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
@@ -198,140 +192,120 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
-		struct flow_dissector_key_basic *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_BASIC);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		flow->l2_key.ether_type = key->n_proto;
-		flow->l2_mask.ether_type = mask->n_proto;
+		flow_rule_match_basic(rule, &match);
+		flow->l2_key.ether_type = match.key->n_proto;
+		flow->l2_mask.ether_type = match.mask->n_proto;
 
-		if (key->n_proto == htons(ETH_P_IP) ||
-		    key->n_proto == htons(ETH_P_IPV6)) {
-			flow->l4_key.ip_proto = key->ip_proto;
-			flow->l4_mask.ip_proto = mask->ip_proto;
+		if (match.key->n_proto == htons(ETH_P_IP) ||
+		    match.key->n_proto == htons(ETH_P_IPV6)) {
+			flow->l4_key.ip_proto = match.key->ip_proto;
+			flow->l4_mask.ip_proto = match.mask->ip_proto;
 		}
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
-		struct flow_dissector_key_eth_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ETH_ADDRS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
-		ether_addr_copy(flow->l2_key.dmac, key->dst);
-		ether_addr_copy(flow->l2_mask.dmac, mask->dst);
-		ether_addr_copy(flow->l2_key.smac, key->src);
-		ether_addr_copy(flow->l2_mask.smac, mask->src);
+		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
+		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
+		ether_addr_copy(flow->l2_key.smac, match.key->src);
+		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
-		struct flow_dissector_key_vlan *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_VLAN);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
+		flow_rule_match_vlan(rule, &match);
 		flow->l2_key.inner_vlan_tci =
-		   cpu_to_be16(VLAN_TCI(key->vlan_id, key->vlan_priority));
+			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
+					     match.key->vlan_priority));
 		flow->l2_mask.inner_vlan_tci =
-		   cpu_to_be16((VLAN_TCI(mask->vlan_id, mask->vlan_priority)));
+			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
+					      match.mask->vlan_priority)));
 		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
 		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
 		flow->l2_key.num_vlans = 1;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
+		flow_rule_match_ipv4_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
-		flow->l3_key.ipv4.daddr.s_addr = key->dst;
-		flow->l3_mask.ipv4.daddr.s_addr = mask->dst;
-		flow->l3_key.ipv4.saddr.s_addr = key->src;
-		flow->l3_mask.ipv4.saddr.s_addr = mask->src;
-	} else if (dissector_uses_key(dissector,
-				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
-
+		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
+		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
+		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
+		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
-		flow->l3_key.ipv6.daddr = key->dst;
-		flow->l3_mask.ipv6.daddr = mask->dst;
-		flow->l3_key.ipv6.saddr = key->src;
-		flow->l3_mask.ipv6.saddr = mask->src;
+		flow->l3_key.ipv6.daddr = match.key->dst;
+		flow->l3_mask.ipv6.daddr = match.mask->dst;
+		flow->l3_key.ipv6.saddr = match.key->src;
+		flow->l3_mask.ipv6.saddr = match.mask->src;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
-		struct flow_dissector_key_ports *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_PORTS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
+		flow_rule_match_ports(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
-		flow->l4_key.ports.dport = key->dst;
-		flow->l4_mask.ports.dport = mask->dst;
-		flow->l4_key.ports.sport = key->src;
-		flow->l4_mask.ports.sport = mask->src;
+		flow->l4_key.ports.dport = match.key->dst;
+		flow->l4_mask.ports.dport = match.mask->dst;
+		flow->l4_key.ports.sport = match.key->src;
+		flow->l4_mask.ports.sport = match.mask->src;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ICMP)) {
-		struct flow_dissector_key_icmp *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
-		struct flow_dissector_key_icmp *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ICMP);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
 
+		flow_rule_match_icmp(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
-		flow->l4_key.icmp.type = key->type;
-		flow->l4_key.icmp.code = key->code;
-		flow->l4_mask.icmp.type = mask->type;
-		flow->l4_mask.icmp.code = mask->code;
+		flow->l4_key.icmp.type = match.key->type;
+		flow->l4_key.icmp.code = match.key->code;
+		flow->l4_mask.icmp.type = match.mask->type;
+		flow->l4_mask.icmp.code = match.mask->code;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
-		struct flow_dissector_key_ipv4_addrs *mask =
-				GET_MASK(tc_flow_cmd,
-					 FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
-		flow->tun_key.u.ipv4.dst = key->dst;
-		flow->tun_mask.u.ipv4.dst = mask->dst;
-		flow->tun_key.u.ipv4.src = key->src;
-		flow->tun_mask.u.ipv4.src = mask->src;
-	} else if (dissector_uses_key(dissector,
+		flow->tun_key.u.ipv4.dst = match.key->dst;
+		flow->tun_mask.u.ipv4.dst = match.mask->dst;
+		flow->tun_key.u.ipv4.src = match.key->src;
+		flow->tun_mask.u.ipv4.src = match.mask->src;
+	} else if (flow_rule_match_key(rule,
 				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
-		struct flow_dissector_key_keyid *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_KEYID);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
+		flow_rule_match_enc_keyid(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
-		flow->tun_key.tun_id = key32_to_tunnel_id(key->keyid);
-		flow->tun_mask.tun_id = key32_to_tunnel_id(mask->keyid);
+		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
+		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
-		struct flow_dissector_key_ports *mask =
-			GET_MASK(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_PORTS);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		struct flow_match_ports match;
 
+		flow_rule_match_enc_ports(rule, &match);
 		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
-		flow->tun_key.tp_dst = key->dst;
-		flow->tun_mask.tp_dst = mask->dst;
-		flow->tun_key.tp_src = key->src;
-		flow->tun_mask.tp_src = mask->src;
+		flow->tun_key.tp_dst = match.key->dst;
+		flow->tun_mask.tp_dst = match.mask->dst;
+		flow->tun_key.tp_src = match.key->src;
+		flow->tun_mask.tp_src = match.mask->src;
 	}
 
 	return bnxt_tc_parse_actions(bp, &flow->actions, tc_flow_cmd->exts);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index c116f96956fe..39c5af5dad3d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -83,28 +83,23 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 				     struct tc_cls_flower_offload *cls,
 				     struct ch_filter_specification *fs)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
 	u16 addr_type = 0;
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  cls->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->mask);
-		u16 ethtype_key = ntohs(key->n_proto);
-		u16 ethtype_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+		u16 ethtype_key, ethtype_mask;
+
+		flow_rule_match_basic(rule, &match);
+		ethtype_key = ntohs(match.key->n_proto);
+		ethtype_mask = ntohs(match.mask->n_proto);
 
 		if (ethtype_key == ETH_P_ALL) {
 			ethtype_key = 0;
@@ -116,115 +111,89 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 
 		fs->val.ethtype = ethtype_key;
 		fs->mask.ethtype = ethtype_mask;
-		fs->val.proto = key->ip_proto;
-		fs->mask.proto = mask->ip_proto;
+		fs->val.proto = match.key->ip_proto;
+		fs->mask.proto = match.mask->ip_proto;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  cls->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  cls->mask);
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
 		fs->type = 0;
-		memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst));
-		memcpy(&fs->val.fip[0], &key->src, sizeof(key->src));
-		memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst));
-		memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src));
+		memcpy(&fs->val.lip[0], &match.key->dst, sizeof(match.key->dst));
+		memcpy(&fs->val.fip[0], &match.key->src, sizeof(match.key->src));
+		memcpy(&fs->mask.lip[0], &match.mask->dst, sizeof(match.mask->dst));
+		memcpy(&fs->mask.fip[0], &match.mask->src, sizeof(match.mask->src));
 
 		/* also initialize nat_lip/fip to same values */
-		memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst));
-		memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src));
-
+		memcpy(&fs->nat_lip[0], &match.key->dst, sizeof(match.key->dst));
+		memcpy(&fs->nat_fip[0], &match.key->src, sizeof(match.key->src));
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  cls->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  cls->mask);
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_ipv6_addrs(rule, &match);
 		fs->type = 1;
-		memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst));
-		memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src));
-		memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst));
-		memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src));
+		memcpy(&fs->val.lip[0], match.key->dst.s6_addr,
+		       sizeof(match.key->dst));
+		memcpy(&fs->val.fip[0], match.key->src.s6_addr,
+		       sizeof(match.key->src));
+		memcpy(&fs->mask.lip[0], match.mask->dst.s6_addr,
+		       sizeof(match.mask->dst));
+		memcpy(&fs->mask.fip[0], match.mask->src.s6_addr,
+		       sizeof(match.mask->src));
 
 		/* also initialize nat_lip/fip to same values */
-		memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst));
-		memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src));
+		memcpy(&fs->nat_lip[0], match.key->dst.s6_addr,
+		       sizeof(match.key->dst));
+		memcpy(&fs->nat_fip[0], match.key->src.s6_addr,
+		       sizeof(match.key->src));
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_PORTS,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_PORTS,
-						 cls->mask);
-		fs->val.lport = cpu_to_be16(key->dst);
-		fs->mask.lport = cpu_to_be16(mask->dst);
-		fs->val.fport = cpu_to_be16(key->src);
-		fs->mask.fport = cpu_to_be16(mask->src);
+		flow_rule_match_ports(rule, &match);
+		fs->val.lport = cpu_to_be16(match.key->dst);
+		fs->mask.lport = cpu_to_be16(match.mask->dst);
+		fs->val.fport = cpu_to_be16(match.key->src);
+		fs->mask.fport = cpu_to_be16(match.mask->src);
 
 		/* also initialize nat_lport/fport to same values */
-		fs->nat_lport = cpu_to_be16(key->dst);
-		fs->nat_fport = cpu_to_be16(key->src);
+		fs->nat_lport = cpu_to_be16(match.key->dst);
+		fs->nat_fport = cpu_to_be16(match.key->src);
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *key, *mask;
-
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_IP,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_IP,
-						 cls->mask);
-		fs->val.tos = key->tos;
-		fs->mask.tos = mask->tos;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_ip(rule, &match);
+		fs->val.tos = match.key->tos;
+		fs->mask.tos = match.mask->tos;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key, *mask;
-
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_ENC_KEYID,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_ENC_KEYID,
-						 cls->mask);
-		fs->val.vni = be32_to_cpu(key->keyid);
-		fs->mask.vni = be32_to_cpu(mask->keyid);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
+
+		flow_rule_match_enc_keyid(rule, &match);
+		fs->val.vni = be32_to_cpu(match.key->keyid);
+		fs->mask.vni = be32_to_cpu(match.mask->keyid);
 		if (fs->mask.vni) {
 			fs->val.encap_vld = 1;
 			fs->mask.encap_vld = 1;
 		}
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 		u16 vlan_tci, vlan_tci_mask;
 
-		key = skb_flow_dissector_target(cls->dissector,
-						FLOW_DISSECTOR_KEY_VLAN,
-						cls->key);
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_VLAN,
-						 cls->mask);
-		vlan_tci = key->vlan_id | (key->vlan_priority <<
-					   VLAN_PRIO_SHIFT);
-		vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
-						 VLAN_PRIO_SHIFT);
+		flow_rule_match_vlan(rule, &match);
+		vlan_tci = match.key->vlan_id | (match.key->vlan_priority <<
+					       VLAN_PRIO_SHIFT);
+		vlan_tci_mask = match.mask->vlan_id | (match.mask->vlan_priority <<
+						     VLAN_PRIO_SHIFT);
 		fs->val.ivlan = vlan_tci;
 		fs->mask.ivlan = vlan_tci_mask;
 
@@ -255,10 +224,12 @@ static void cxgb4_process_flow_match(struct net_device *dev,
 static int cxgb4_validate_flow_match(struct net_device *dev,
 				     struct tc_cls_flower_offload *cls)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 ethtype_mask = 0;
 	u16 ethtype_key = 0;
 
-	if (cls->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
@@ -268,36 +239,29 @@ static int cxgb4_validate_flow_match(struct net_device *dev,
 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
 	      BIT(FLOW_DISSECTOR_KEY_IP))) {
 		netdev_warn(dev, "Unsupported key used: 0x%x\n",
-			    cls->dissector->used_keys);
+			    dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(cls->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  cls->mask);
-		ethtype_key = ntohs(key->n_proto);
-		ethtype_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		ethtype_key = ntohs(match.key->n_proto);
+		ethtype_mask = ntohs(match.mask->n_proto);
 	}
 
-	if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
 		u16 eth_ip_type = ethtype_key & ethtype_mask;
-		struct flow_dissector_key_ip *mask;
+		struct flow_match_ip match;
 
 		if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) {
 			netdev_err(dev, "IP Key supported only with IPv4/v6");
 			return -EINVAL;
 		}
 
-		mask = skb_flow_dissector_target(cls->dissector,
-						 FLOW_DISSECTOR_KEY_IP,
-						 cls->mask);
-		if (mask->ttl) {
+		flow_rule_match_ip(rule, &match);
+		if (match.mask->ttl) {
 			netdev_warn(dev, "ttl match unsupported for offload");
 			return -EOPNOTSUPP;
 		}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5c6731b97059..44856a84738d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7169,11 +7169,13 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
 				 struct tc_cls_flower_offload *f,
 				 struct i40e_cloud_filter *filter)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0;
 	struct i40e_pf *pf = vsi->back;
 	u8 field_flags = 0;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -7183,143 +7185,109 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
 		dev_err(&pf->pdev->dev, "Unsupported key used: 0x%x\n",
-			f->dissector->used_keys);
+			dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
-
-		if (mask->keyid != 0)
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
 			field_flags |= I40E_CLOUD_FIELD_TEN_ID;
 
-		filter->tenant_id = be32_to_cpu(key->keyid);
+		filter->tenant_id = be32_to_cpu(match.key->keyid);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
 			n_proto_mask = 0;
 		}
 		filter->n_proto = n_proto_key & n_proto_mask;
-		filter->ip_proto = key->ip_proto;
+		filter->ip_proto = match.key->ip_proto;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+		flow_rule_match_eth_addrs(rule, &match);
 
 		/* use is_broadcast and is_zero to check for all 0xf or 0 */
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (is_broadcast_ether_addr(mask->dst)) {
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
 				field_flags |= I40E_CLOUD_FIELD_OMAC;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
-					mask->dst);
+					match.mask->dst);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (!is_zero_ether_addr(mask->src)) {
-			if (is_broadcast_ether_addr(mask->src)) {
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
 				field_flags |= I40E_CLOUD_FIELD_IMAC;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ether src mask %pM\n",
-					mask->src);
+					match.mask->src);
 				return I40E_ERR_CONFIG;
 			}
 		}
-		ether_addr_copy(filter->dst_mac, key->dst);
-		ether_addr_copy(filter->src_mac, key->src);
+		ether_addr_copy(filter->dst_mac, match.key->dst);
+		ether_addr_copy(filter->src_mac, match.key->src);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
-		if (mask->vlan_id) {
-			if (mask->vlan_id == VLAN_VID_MASK) {
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
 				field_flags |= I40E_CLOUD_FIELD_IVLAN;
 
 			} else {
 				dev_err(&pf->pdev->dev, "Bad vlan mask 0x%04x\n",
-					mask->vlan_id);
+					match.mask->vlan_id);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		filter->vlan_id = cpu_to_be16(key->vlan_id);
+		filter->vlan_id = cpu_to_be16(match.key->vlan_id);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
-
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be32(0xffffffff)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
-					&mask->dst);
+					&match.mask->dst);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->src) {
-			if (mask->src == cpu_to_be32(0xffffffff)) {
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
-					&mask->src);
+					&match.mask->src);
 				return I40E_ERR_CONFIG;
 			}
 		}
@@ -7328,70 +7296,60 @@ static int i40e_parse_cls_flower(struct i40e_vsi *vsi,
 			dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n");
 			return I40E_ERR_CONFIG;
 		}
-		filter->dst_ipv4 = key->dst;
-		filter->src_ipv4 = key->src;
+		filter->dst_ipv4 = match.key->dst;
+		filter->src_ipv4 = match.key->src;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
 
 		/* src and dest IPV6 address should not be LOOPBACK
 		 * (0:0:0:0:0:0:0:1), which can be represented as ::1
 		 */
-		if (ipv6_addr_loopback(&key->dst) ||
-		    ipv6_addr_loopback(&key->src)) {
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
 			dev_err(&pf->pdev->dev,
 				"Bad ipv6, addr is LOOPBACK\n");
 			return I40E_ERR_CONFIG;
 		}
-		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
 			field_flags |= I40E_CLOUD_FIELD_IIP;
 
-		memcpy(&filter->src_ipv6, &key->src.s6_addr32,
+		memcpy(&filter->src_ipv6, &match.key->src.s6_addr32,
 		       sizeof(filter->src_ipv6));
-		memcpy(&filter->dst_ipv6, &key->dst.s6_addr32,
+		memcpy(&filter->dst_ipv6, &match.key->dst.s6_addr32,
 		       sizeof(filter->dst_ipv6));
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
 
-		if (mask->src) {
-			if (mask->src == cpu_to_be16(0xffff)) {
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad src port mask 0x%04x\n",
-					be16_to_cpu(mask->src));
+					be16_to_cpu(match.mask->src));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be16(0xffff)) {
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
 				field_flags |= I40E_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&pf->pdev->dev, "Bad dst port mask 0x%04x\n",
-					be16_to_cpu(mask->dst));
+					be16_to_cpu(match.mask->dst));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		filter->dst_port = key->dst;
-		filter->src_port = key->src;
+		filter->dst_port = match.key->dst;
+		filter->src_port = match.key->src;
 
 		switch (filter->ip_proto) {
 		case IPPROTO_TCP:
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 9f2b7b7adf6b..4569d69a2b55 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -2439,6 +2439,8 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 				 struct tc_cls_flower_offload *f,
 				 struct iavf_cloud_filter *filter)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0;
 	u16 n_proto_key = 0;
 	u8 field_flags = 0;
@@ -2447,7 +2449,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 	int i = 0;
 	struct virtchnl_filter *vf = &filter->f;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -2457,32 +2459,24 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID))) {
 		dev_err(&adapter->pdev->dev, "Unsupported key used: 0x%x\n",
-			f->dissector->used_keys);
+			dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 
-		if (mask->keyid != 0)
+		flow_rule_match_enc_keyid(rule, &match);
+		if (match.mask->keyid != 0)
 			field_flags |= IAVF_CLOUD_FIELD_TEN_ID;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
@@ -2496,122 +2490,103 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			vf->flow_type = VIRTCHNL_TCP_V6_FLOW;
 		}
 
-		if (key->ip_proto != IPPROTO_TCP) {
+		if (match.key->ip_proto != IPPROTO_TCP) {
 			dev_info(&adapter->pdev->dev, "Only TCP transport is supported\n");
 			return -EINVAL;
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
 
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
 		/* use is_broadcast and is_zero to check for all 0xf or 0 */
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (is_broadcast_ether_addr(mask->dst)) {
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (is_broadcast_ether_addr(match.mask->dst)) {
 				field_flags |= IAVF_CLOUD_FIELD_OMAC;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n",
-					mask->dst);
+					match.mask->dst);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (!is_zero_ether_addr(mask->src)) {
-			if (is_broadcast_ether_addr(mask->src)) {
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (is_broadcast_ether_addr(match.mask->src)) {
 				field_flags |= IAVF_CLOUD_FIELD_IMAC;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n",
-					mask->src);
+					match.mask->src);
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (!is_zero_ether_addr(key->dst))
-			if (is_valid_ether_addr(key->dst) ||
-			    is_multicast_ether_addr(key->dst)) {
+		if (!is_zero_ether_addr(match.key->dst))
+			if (is_valid_ether_addr(match.key->dst) ||
+			    is_multicast_ether_addr(match.key->dst)) {
 				/* set the mask if a valid dst_mac address */
 				for (i = 0; i < ETH_ALEN; i++)
 					vf->mask.tcp_spec.dst_mac[i] |= 0xff;
 				ether_addr_copy(vf->data.tcp_spec.dst_mac,
-						key->dst);
+						match.key->dst);
 			}
 
-		if (!is_zero_ether_addr(key->src))
-			if (is_valid_ether_addr(key->src) ||
-			    is_multicast_ether_addr(key->src)) {
+		if (!is_zero_ether_addr(match.key->src))
+			if (is_valid_ether_addr(match.key->src) ||
+			    is_multicast_ether_addr(match.key->src)) {
 				/* set the mask if a valid dst_mac address */
 				for (i = 0; i < ETH_ALEN; i++)
 					vf->mask.tcp_spec.src_mac[i] |= 0xff;
 				ether_addr_copy(vf->data.tcp_spec.src_mac,
-						key->src);
+						match.key->src);
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
-		if (mask->vlan_id) {
-			if (mask->vlan_id == VLAN_VID_MASK) {
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id == VLAN_VID_MASK) {
 				field_flags |= IAVF_CLOUD_FIELD_IVLAN;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n",
-					mask->vlan_id);
+					match.mask->vlan_id);
 				return I40E_ERR_CONFIG;
 			}
 		}
 		vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff);
-		vf->data.tcp_spec.vlan_id = cpu_to_be16(key->vlan_id);
+		vf->data.tcp_spec.vlan_id = cpu_to_be16(match.key->vlan_id);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
-
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be32(0xffffffff)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be32(0xffffffff)) {
 				field_flags |= IAVF_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n",
-					be32_to_cpu(mask->dst));
+					be32_to_cpu(match.mask->dst));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->src) {
-			if (mask->src == cpu_to_be32(0xffffffff)) {
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be32(0xffffffff)) {
 				field_flags |= IAVF_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n",
-					be32_to_cpu(mask->dst));
+					be32_to_cpu(match.mask->dst));
 				return I40E_ERR_CONFIG;
 			}
 		}
@@ -2620,28 +2595,23 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 			dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n");
 			return I40E_ERR_CONFIG;
 		}
-		if (key->dst) {
+		if (match.key->dst) {
 			vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff);
-			vf->data.tcp_spec.dst_ip[0] = key->dst;
+			vf->data.tcp_spec.dst_ip[0] = match.key->dst;
 		}
-		if (key->src) {
+		if (match.key->src) {
 			vf->mask.tcp_spec.src_ip[0] |= cpu_to_be32(0xffffffff);
-			vf->data.tcp_spec.src_ip[0] = key->src;
+			vf->data.tcp_spec.src_ip[0] = match.key->src;
 		}
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
 
 		/* validate mask, make sure it is not IPV6_ADDR_ANY */
-		if (ipv6_addr_any(&mask->dst)) {
+		if (ipv6_addr_any(&match.mask->dst)) {
 			dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n",
 				IPV6_ADDR_ANY);
 			return I40E_ERR_CONFIG;
@@ -2650,61 +2620,56 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter,
 		/* src and dest IPv6 address should not be LOOPBACK
 		 * (0:0:0:0:0:0:0:1) which can be represented as ::1
 		 */
-		if (ipv6_addr_loopback(&key->dst) ||
-		    ipv6_addr_loopback(&key->src)) {
+		if (ipv6_addr_loopback(&match.key->dst) ||
+		    ipv6_addr_loopback(&match.key->src)) {
 			dev_err(&adapter->pdev->dev,
 				"ipv6 addr should not be loopback\n");
 			return I40E_ERR_CONFIG;
 		}
-		if (!ipv6_addr_any(&mask->dst) || !ipv6_addr_any(&mask->src))
+		if (!ipv6_addr_any(&match.mask->dst) ||
+		    !ipv6_addr_any(&match.mask->src))
 			field_flags |= IAVF_CLOUD_FIELD_IIP;
 
 		for (i = 0; i < 4; i++)
 			vf->mask.tcp_spec.dst_ip[i] |= cpu_to_be32(0xffffffff);
-		memcpy(&vf->data.tcp_spec.dst_ip, &key->dst.s6_addr32,
+		memcpy(&vf->data.tcp_spec.dst_ip, &match.key->dst.s6_addr32,
 		       sizeof(vf->data.tcp_spec.dst_ip));
 		for (i = 0; i < 4; i++)
 			vf->mask.tcp_spec.src_ip[i] |= cpu_to_be32(0xffffffff);
-		memcpy(&vf->data.tcp_spec.src_ip, &key->src.s6_addr32,
+		memcpy(&vf->data.tcp_spec.src_ip, &match.key->src.s6_addr32,
 		       sizeof(vf->data.tcp_spec.src_ip));
 	}
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
-
-		if (mask->src) {
-			if (mask->src == cpu_to_be16(0xffff)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if (match.mask->src) {
+			if (match.mask->src == cpu_to_be16(0xffff)) {
 				field_flags |= IAVF_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad src port mask %u\n",
-					be16_to_cpu(mask->src));
+					be16_to_cpu(match.mask->src));
 				return I40E_ERR_CONFIG;
 			}
 		}
 
-		if (mask->dst) {
-			if (mask->dst == cpu_to_be16(0xffff)) {
+		if (match.mask->dst) {
+			if (match.mask->dst == cpu_to_be16(0xffff)) {
 				field_flags |= IAVF_CLOUD_FIELD_IIP;
 			} else {
 				dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n",
-					be16_to_cpu(mask->dst));
+					be16_to_cpu(match.mask->dst));
 				return I40E_ERR_CONFIG;
 			}
 		}
-		if (key->dst) {
+		if (match.key->dst) {
 			vf->mask.tcp_spec.dst_port |= cpu_to_be16(0xffff);
-			vf->data.tcp_spec.dst_port = key->dst;
+			vf->data.tcp_spec.dst_port = match.key->dst;
 		}
 
-		if (key->src) {
+		if (match.key->src) {
 			vf->mask.tcp_spec.src_port |= cpu_to_be16(0xffff);
-			vf->data.tcp_spec.src_port = key->src;
+			vf->data.tcp_spec.src_port = match.key->src;
 		}
 	}
 	vf->field_flags = field_flags;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index d35cc9697e2d..6d812e96572d 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2581,9 +2581,11 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter,
 				int traffic_class,
 				struct igb_nfc_filter *input)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	struct netlink_ext_ack *extack = f->common.extack;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -2593,78 +2595,60 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter,
 		return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key, *mask;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						 f->mask);
-
-		if (!is_zero_ether_addr(mask->dst)) {
-			if (!is_broadcast_ether_addr(mask->dst)) {
+		flow_rule_match_eth_addrs(rule, &match);
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (!is_broadcast_ether_addr(match.mask->dst)) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for destination MAC address");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |=
 				IGB_FILTER_FLAG_DST_MAC_ADDR;
-			ether_addr_copy(input->filter.dst_addr, key->dst);
+			ether_addr_copy(input->filter.dst_addr, match.key->dst);
 		}
 
-		if (!is_zero_ether_addr(mask->src)) {
-			if (!is_broadcast_ether_addr(mask->src)) {
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (!is_broadcast_ether_addr(match.mask->src)) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full masks are supported for source MAC address");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |=
 				IGB_FILTER_FLAG_SRC_MAC_ADDR;
-			ether_addr_copy(input->filter.src_addr, key->src);
+			ether_addr_copy(input->filter.src_addr, match.key->src);
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key, *mask;
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_BASIC,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_BASIC,
-						 f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		if (mask->n_proto) {
-			if (mask->n_proto != ETHER_TYPE_FULL_MASK) {
+		flow_rule_match_basic(rule, &match);
+		if (match.mask->n_proto) {
+			if (match.mask->n_proto != ETHER_TYPE_FULL_MASK) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for EtherType filter");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |= IGB_FILTER_FLAG_ETHER_TYPE;
-			input->filter.etype = key->n_proto;
+			input->filter.etype = match.key->n_proto;
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key, *mask;
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_VLAN,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_VLAN,
-						 f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
-		if (mask->vlan_priority) {
-			if (mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_priority) {
+			if (match.mask->vlan_priority != VLAN_PRIO_FULL_MASK) {
 				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
 				return -EINVAL;
 			}
 
 			input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
-			input->filter.vlan_tci = key->vlan_priority;
+			input->filter.vlan_tci = match.key->vlan_priority;
 		}
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 046948ead152..47bb4eb894c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -496,25 +496,21 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
 				    void *headers_c,
 				    void *headers_v)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
 	struct netlink_ext_ack *extack = f->common.extack;
-	struct flow_dissector_key_ports *key =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_ENC_PORTS,
-					  f->key);
-	struct flow_dissector_key_ports *mask =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_ENC_PORTS,
-					  f->mask);
 	void *misc_c = MLX5_ADDR_OF(fte_match_param,
 				    spec->match_criteria,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param,
 				    spec->match_value,
 				    misc_parameters);
+	struct flow_match_ports enc_ports;
+
+	flow_rule_match_enc_ports(rule, &enc_ports);
 
 	/* Full udp dst port must be given */
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
-	    memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) {
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) ||
+	    memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "VXLAN decap filter must include enc_dst_port condition");
 		netdev_warn(priv->netdev,
@@ -523,12 +519,12 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
 	}
 
 	/* udp dst port must be knonwn as a VXLAN port */
-	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) {
+	if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) {
 		NL_SET_ERR_MSG_MOD(extack,
 				   "Matched UDP port is not registered as a VXLAN port");
 		netdev_warn(priv->netdev,
 			    "UDP port %d is not registered as a VXLAN port\n",
-			    be16_to_cpu(key->dst));
+			    be16_to_cpu(enc_ports.key->dst));
 		return -EOPNOTSUPP;
 	}
 
@@ -536,26 +532,26 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv,
 	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
 	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
 
-	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst));
-	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
+		 ntohs(enc_ports.mask->dst));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+		 ntohs(enc_ports.key->dst));
 
-	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src));
-	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
+		 ntohs(enc_ports.mask->src));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+		 ntohs(enc_ports.key->src));
 
 	/* match on VNI */
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->key);
-		struct flow_dissector_key_keyid *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_KEYID,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid enc_keyid;
+
+		flow_rule_match_enc_keyid(rule, &enc_keyid);
+
 		MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
-			 be32_to_cpu(mask->keyid));
+			 be32_to_cpu(enc_keyid.mask->keyid));
 		MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
-			 be32_to_cpu(key->keyid));
+			 be32_to_cpu(enc_keyid.key->keyid));
 	}
 	return 0;
 }
@@ -570,6 +566,7 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
 
 	if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) {
 		NL_SET_ERR_MSG_MOD(f->common.extack,
@@ -587,21 +584,14 @@ static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv,
 	MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
 
 	/* gre key */
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
-		struct flow_dissector_key_keyid *mask = NULL;
-		struct flow_dissector_key_keyid *key = NULL;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid enc_keyid;
 
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_ENC_KEYID,
-						 f->mask);
+		flow_rule_match_enc_keyid(rule, &enc_keyid);
 		MLX5_SET(fte_match_set_misc, misc_c,
-			 gre_key.key, be32_to_cpu(mask->keyid));
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_ENC_KEYID,
-						f->key);
+			 gre_key.key, be32_to_cpu(enc_keyid.mask->keyid));
 		MLX5_SET(fte_match_set_misc, misc_v,
-			 gre_key.key, be32_to_cpu(key->keyid));
+			 gre_key.key, be32_to_cpu(enc_keyid.key->keyid));
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 74159d39dd66..cd289ce0582d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1309,12 +1309,9 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 				       outer_headers);
 	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				       outer_headers);
-
-	struct flow_dissector_key_control *enc_control =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-					  f->key);
-	int err = 0;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_match_control enc_control;
+	int err;
 
 	err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
 				 headers_c, headers_v);
@@ -1324,79 +1321,70 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 		return err;
 	}
 
-	if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  f->mask);
+	flow_rule_match_enc_control(rule, &enc_control);
+
+	if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
-			 ntohl(mask->src));
+			 ntohl(match.mask->src));
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
-			 ntohl(key->src));
+			 ntohl(match.key->src));
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-			 ntohl(mask->dst));
+			 ntohl(match.mask->dst));
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-			 ntohl(key->dst));
+			 ntohl(match.key->dst));
 
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
-	} else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
-						  f->mask);
+	} else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_enc_ipv6_addrs(rule, &match);
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+		       &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+		       &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+		       &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
+		       &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 
 		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
-		struct flow_dissector_key_ip *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IP,
-						  f->key);
-		struct flow_dissector_key_ip *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IP,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+		flow_rule_match_enc_ip(rule, &match);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+			 match.mask->tos & 0x3);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+			 match.key->tos & 0x3);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+			 match.mask->tos >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+			 match.key->tos  >> 2);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+			 match.mask->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+			 match.key->ttl);
 
-		if (mask->ttl &&
+		if (match.mask->ttl &&
 		    !MLX5_CAP_ESW_FLOWTABLE_FDB
 			(priv->mdev,
 			 ft_field_support.outer_ipv4_ttl)) {
@@ -1437,12 +1425,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 				    misc_parameters);
 	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 				    misc_parameters);
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 
 	*match_level = MLX5_MATCH_NONE;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -1461,20 +1451,18 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
-			    f->dissector->used_keys);
+			    dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
-	if ((dissector_uses_key(f->dissector,
-				FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
-	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
-	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
-	    dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  f->key);
-		switch (key->addr_type) {
+	if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+	     flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+	     flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_enc_control(rule, &match);
+		switch (match.key->addr_type) {
 		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 			if (parse_tunnel_attr(priv, spec, f, filter_dev))
@@ -1493,35 +1481,27 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 					 inner_headers);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-			 ntohs(mask->n_proto));
+			 ntohs(match.mask->n_proto));
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-			 ntohs(key->n_proto));
+			 ntohs(match.key->n_proto));
 
-		if (mask->n_proto)
+		if (match.mask->n_proto)
 			*match_level = MLX5_MATCH_L2;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
-		if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
-			if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_priority ||
+		    match.mask->vlan_tpid) {
+			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
 				MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 					 svlan_tag, 1);
 				MLX5_SET(fte_match_set_lyr_2_4, headers_v,
@@ -1533,11 +1513,15 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 					 cvlan_tag, 1);
 			}
 
-			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
+				 match.mask->vlan_id);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
+				 match.key->vlan_id);
 
-			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
+				 match.mask->vlan_priority);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
+				 match.key->vlan_priority);
 
 			*match_level = MLX5_MATCH_L2;
 		}
@@ -1547,17 +1531,14 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		*match_level = MLX5_MATCH_L2;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CVLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CVLAN,
-						  f->mask);
-		if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) {
-			if (key->vlan_tpid == htons(ETH_P_8021AD)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_id ||
+		    match.mask->vlan_priority ||
+		    match.mask->vlan_tpid) {
+			if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
 				MLX5_SET(fte_match_set_misc, misc_c,
 					 outer_second_svlan_tag, 1);
 				MLX5_SET(fte_match_set_misc, misc_v,
@@ -1570,69 +1551,58 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 			}
 
 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
-				 mask->vlan_id);
+				 match.mask->vlan_id);
 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
-				 key->vlan_id);
+				 match.key->vlan_id);
 			MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
-				 mask->vlan_priority);
+				 match.mask->vlan_priority);
 			MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
-				 key->vlan_priority);
+				 match.key->vlan_priority);
 
 			*match_level = MLX5_MATCH_L2;
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     dmac_47_16),
-				mask->dst);
+				match.mask->dst);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     dmac_47_16),
-				key->dst);
+				match.key->dst);
 
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 					     smac_47_16),
-				mask->src);
+				match.mask->src);
 		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 					     smac_47_16),
-				key->src);
+				match.key->src);
 
-		if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst))
+		if (!is_zero_ether_addr(match.mask->src) ||
+		    !is_zero_ether_addr(match.mask->dst))
 			*match_level = MLX5_MATCH_L2;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		struct flow_dissector_key_control *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->mask);
-		addr_type = key->addr_type;
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 
 		/* the HW doesn't support frag first/later */
-		if (mask->flags & FLOW_DIS_FIRST_FRAG)
+		if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
 			return -EOPNOTSUPP;
 
-		if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+		if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
-				 key->flags & FLOW_DIS_IS_FRAGMENT);
+				 match.key->flags & FLOW_DIS_IS_FRAGMENT);
 
 			/* the HW doesn't need L3 inline to match on frag=no */
-			if (!(key->flags & FLOW_DIS_IS_FRAGMENT))
+			if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
 				*match_level = MLX5_MATCH_L2;
 	/* ***  L2 attributes parsing up to here *** */
 			else
@@ -1640,102 +1610,85 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		}
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		ip_proto = key->ip_proto;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
-			 mask->ip_proto);
+			 match.mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
-			 key->ip_proto);
+			 match.key->ip_proto);
 
-		if (mask->ip_proto)
+		if (match.mask->ip_proto)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-		struct flow_dissector_key_ipv4_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv4_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						  f->mask);
+		struct flow_match_ipv4_addrs match;
 
+		flow_rule_match_ipv4_addrs(rule, &match);
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       &mask->src, sizeof(mask->src));
+		       &match.mask->src, sizeof(match.mask->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv4_layout.ipv4),
-		       &key->src, sizeof(key->src));
+		       &match.key->src, sizeof(match.key->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       &mask->dst, sizeof(mask->dst));
+		       &match.mask->dst, sizeof(match.mask->dst));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
-		       &key->dst, sizeof(key->dst));
+		       &match.key->dst, sizeof(match.key->dst));
 
-		if (mask->src || mask->dst)
+		if (match.mask->src || match.mask->dst)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-		struct flow_dissector_key_ipv6_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->key);
-		struct flow_dissector_key_ipv6_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						  f->mask);
+		struct flow_match_ipv6_addrs match;
 
+		flow_rule_match_ipv6_addrs(rule, &match);
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &mask->src, sizeof(mask->src));
+		       &match.mask->src, sizeof(match.mask->src));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-		       &key->src, sizeof(key->src));
+		       &match.key->src, sizeof(match.key->src));
 
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &mask->dst, sizeof(mask->dst));
+		       &match.mask->dst, sizeof(match.mask->dst));
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-		       &key->dst, sizeof(key->dst));
+		       &match.key->dst, sizeof(match.key->dst));
 
-		if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
-		    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+		if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
+		    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
 			*match_level = MLX5_MATCH_L3;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IP,
-						  f->key);
-		struct flow_dissector_key_ip *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_IP,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
+		flow_rule_match_ip(rule, &match);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
+			 match.mask->tos & 0x3);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
+			 match.key->tos & 0x3);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
+			 match.mask->tos >> 2);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
+			 match.key->tos  >> 2);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
+			 match.mask->ttl);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
+			 match.key->ttl);
 
-		if (mask->ttl &&
+		if (match.mask->ttl &&
 		    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
 						ft_field_support.outer_ipv4_ttl)) {
 			NL_SET_ERR_MSG_MOD(extack,
@@ -1743,44 +1696,39 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 			return -EOPNOTSUPP;
 		}
 
-		if (mask->tos || mask->ttl)
+		if (match.mask->tos || match.mask->ttl)
 			*match_level = MLX5_MATCH_L3;
 	}
 
 	/* ***  L3 attributes parsing up to here *** */
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->key);
-		struct flow_dissector_key_ports *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_PORTS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
 		switch (ip_proto) {
 		case IPPROTO_TCP:
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 tcp_sport, ntohs(mask->src));
+				 tcp_sport, ntohs(match.mask->src));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 tcp_sport, ntohs(key->src));
+				 tcp_sport, ntohs(match.key->src));
 
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 tcp_dport, ntohs(mask->dst));
+				 tcp_dport, ntohs(match.mask->dst));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 tcp_dport, ntohs(key->dst));
+				 tcp_dport, ntohs(match.key->dst));
 			break;
 
 		case IPPROTO_UDP:
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 udp_sport, ntohs(mask->src));
+				 udp_sport, ntohs(match.mask->src));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 udp_sport, ntohs(key->src));
+				 udp_sport, ntohs(match.key->src));
 
 			MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-				 udp_dport, ntohs(mask->dst));
+				 udp_dport, ntohs(match.mask->dst));
 			MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-				 udp_dport, ntohs(key->dst));
+				 udp_dport, ntohs(match.key->dst));
 			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack,
@@ -1790,26 +1738,20 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 			return -EINVAL;
 		}
 
-		if (mask->src || mask->dst)
+		if (match.mask->src || match.mask->dst)
 			*match_level = MLX5_MATCH_L4;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_TCP,
-						  f->key);
-		struct flow_dissector_key_tcp *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_TCP,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp match;
 
+		flow_rule_match_tcp(rule, &match);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
-			 ntohs(mask->flags));
+			 ntohs(match.mask->flags));
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
-			 ntohs(key->flags));
+			 ntohs(match.key->flags));
 
-		if (mask->flags)
+		if (match.mask->flags)
 			*match_level = MLX5_MATCH_L4;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index ff072358d950..a20379e29e02 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -113,59 +113,49 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei,
 				       struct tc_cls_flower_offload *f)
 {
-	struct flow_dissector_key_ipv4_addrs *key =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-					  f->key);
-	struct flow_dissector_key_ipv4_addrs *mask =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-					  f->mask);
+	struct flow_match_ipv4_addrs match;
+
+	flow_rule_match_ipv4_addrs(f->rule, &match);
 
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
-				       (char *) &key->src,
-				       (char *) &mask->src, 4);
+				       (char *) &match.key->src,
+				       (char *) &match.mask->src, 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
-				       (char *) &key->dst,
-				       (char *) &mask->dst, 4);
+				       (char *) &match.key->dst,
+				       (char *) &match.mask->dst, 4);
 }
 
 static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei,
 				       struct tc_cls_flower_offload *f)
 {
-	struct flow_dissector_key_ipv6_addrs *key =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-					  f->key);
-	struct flow_dissector_key_ipv6_addrs *mask =
-		skb_flow_dissector_target(f->dissector,
-					  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-					  f->mask);
+	struct flow_match_ipv6_addrs match;
+
+	flow_rule_match_ipv6_addrs(f->rule, &match);
 
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_96_127,
-				       &key->src.s6_addr[0x0],
-				       &mask->src.s6_addr[0x0], 4);
+				       &match.key->src.s6_addr[0x0],
+				       &match.mask->src.s6_addr[0x0], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_64_95,
-				       &key->src.s6_addr[0x4],
-				       &mask->src.s6_addr[0x4], 4);
+				       &match.key->src.s6_addr[0x4],
+				       &match.mask->src.s6_addr[0x4], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_32_63,
-				       &key->src.s6_addr[0x8],
-				       &mask->src.s6_addr[0x8], 4);
+				       &match.key->src.s6_addr[0x8],
+				       &match.mask->src.s6_addr[0x8], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_SRC_IP_0_31,
-				       &key->src.s6_addr[0xC],
-				       &mask->src.s6_addr[0xC], 4);
+				       &match.key->src.s6_addr[0xC],
+				       &match.mask->src.s6_addr[0xC], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_96_127,
-				       &key->dst.s6_addr[0x0],
-				       &mask->dst.s6_addr[0x0], 4);
+				       &match.key->dst.s6_addr[0x0],
+				       &match.mask->dst.s6_addr[0x0], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_64_95,
-				       &key->dst.s6_addr[0x4],
-				       &mask->dst.s6_addr[0x4], 4);
+				       &match.key->dst.s6_addr[0x4],
+				       &match.mask->dst.s6_addr[0x4], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_32_63,
-				       &key->dst.s6_addr[0x8],
-				       &mask->dst.s6_addr[0x8], 4);
+				       &match.key->dst.s6_addr[0x8],
+				       &match.mask->dst.s6_addr[0x8], 4);
 	mlxsw_sp_acl_rulei_keymask_buf(rulei, MLXSW_AFK_ELEMENT_DST_IP_0_31,
-				       &key->dst.s6_addr[0xC],
-				       &mask->dst.s6_addr[0xC], 4);
+				       &match.key->dst.s6_addr[0xC],
+				       &match.mask->dst.s6_addr[0xC], 4);
 }
 
 static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
@@ -173,9 +163,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
 				       struct tc_cls_flower_offload *f,
 				       u8 ip_proto)
 {
-	struct flow_dissector_key_ports *key, *mask;
+	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_match_ports match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS))
 		return 0;
 
 	if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) {
@@ -184,16 +175,13 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp,
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_PORTS,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_PORTS,
-					 f->mask);
+	flow_rule_match_ports(rule, &match);
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_DST_L4_PORT,
-				       ntohs(key->dst), ntohs(mask->dst));
+				       ntohs(match.key->dst),
+				       ntohs(match.mask->dst));
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_SRC_L4_PORT,
-				       ntohs(key->src), ntohs(mask->src));
+				       ntohs(match.key->src),
+				       ntohs(match.mask->src));
 	return 0;
 }
 
@@ -202,9 +190,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 				     struct tc_cls_flower_offload *f,
 				     u8 ip_proto)
 {
-	struct flow_dissector_key_tcp *key, *mask;
+	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_match_tcp match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP))
 		return 0;
 
 	if (ip_proto != IPPROTO_TCP) {
@@ -213,14 +202,11 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp,
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_TCP,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_TCP,
-					 f->mask);
+	flow_rule_match_tcp(rule, &match);
+
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_TCP_FLAGS,
-				       ntohs(key->flags), ntohs(mask->flags));
+				       ntohs(match.key->flags),
+				       ntohs(match.mask->flags));
 	return 0;
 }
 
@@ -229,9 +215,10 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 				    struct tc_cls_flower_offload *f,
 				    u16 n_proto)
 {
-	struct flow_dissector_key_ip *key, *mask;
+	const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_match_ip match;
 
-	if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP))
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP))
 		return 0;
 
 	if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) {
@@ -240,20 +227,18 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 		return -EINVAL;
 	}
 
-	key = skb_flow_dissector_target(f->dissector,
-					FLOW_DISSECTOR_KEY_IP,
-					f->key);
-	mask = skb_flow_dissector_target(f->dissector,
-					 FLOW_DISSECTOR_KEY_IP,
-					 f->mask);
+	flow_rule_match_ip(rule, &match);
+
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_,
-				       key->ttl, mask->ttl);
+				       match.key->ttl, match.mask->ttl);
 
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN,
-				       key->tos & 0x3, mask->tos & 0x3);
+				       match.key->tos & 0x3,
+				       match.mask->tos & 0x3);
 
 	mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP,
-				       key->tos >> 6, mask->tos >> 6);
+				       match.key->tos >> 6,
+				       match.mask->tos >> 6);
 
 	return 0;
 }
@@ -263,13 +248,15 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_rule_info *rulei,
 				 struct tc_cls_flower_offload *f)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	u16 n_proto_mask = 0;
 	u16 n_proto_key = 0;
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 	int err;
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
@@ -286,25 +273,19 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 
 	mlxsw_sp_acl_rulei_priority(rulei, f->common.prio);
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_CONTROL,
-						  f->key);
-		addr_type = key->addr_type;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
+
+		flow_rule_match_control(rule, &match);
+		addr_type = match.key->addr_type;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->key);
-		struct flow_dissector_key_basic *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  f->mask);
-		n_proto_key = ntohs(key->n_proto);
-		n_proto_mask = ntohs(mask->n_proto);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		n_proto_key = ntohs(match.key->n_proto);
+		n_proto_mask = ntohs(match.mask->n_proto);
 
 		if (n_proto_key == ETH_P_ALL) {
 			n_proto_key = 0;
@@ -314,60 +295,53 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 					       MLXSW_AFK_ELEMENT_ETHERTYPE,
 					       n_proto_key, n_proto_mask);
 
-		ip_proto = key->ip_proto;
+		ip_proto = match.key->ip_proto;
 		mlxsw_sp_acl_rulei_keymask_u32(rulei,
 					       MLXSW_AFK_ELEMENT_IP_PROTO,
-					       key->ip_proto, mask->ip_proto);
+					       match.key->ip_proto,
+					       match.mask->ip_proto);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		struct flow_dissector_key_eth_addrs *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->key);
-		struct flow_dissector_key_eth_addrs *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
 
+		flow_rule_match_eth_addrs(rule, &match);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_DMAC_32_47,
-					       key->dst, mask->dst, 2);
+					       match.key->dst,
+					       match.mask->dst, 2);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_DMAC_0_31,
-					       key->dst + 2, mask->dst + 2, 4);
+					       match.key->dst + 2,
+					       match.mask->dst + 2, 4);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_SMAC_32_47,
-					       key->src, mask->src, 2);
+					       match.key->src,
+					       match.mask->src, 2);
 		mlxsw_sp_acl_rulei_keymask_buf(rulei,
 					       MLXSW_AFK_ELEMENT_SMAC_0_31,
-					       key->src + 2, mask->src + 2, 4);
+					       match.key->src + 2,
+					       match.mask->src + 2, 4);
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *key =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->key);
-		struct flow_dissector_key_vlan *mask =
-			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_VLAN,
-						  f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
 
+		flow_rule_match_vlan(rule, &match);
 		if (mlxsw_sp_acl_block_is_egress_bound(block)) {
 			NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress");
 			return -EOPNOTSUPP;
 		}
-		if (mask->vlan_id != 0)
+		if (match.mask->vlan_id != 0)
 			mlxsw_sp_acl_rulei_keymask_u32(rulei,
 						       MLXSW_AFK_ELEMENT_VID,
-						       key->vlan_id,
-						       mask->vlan_id);
-		if (mask->vlan_priority != 0)
+						       match.key->vlan_id,
+						       match.mask->vlan_id);
+		if (match.mask->vlan_priority != 0)
 			mlxsw_sp_acl_rulei_keymask_u32(rulei,
 						       MLXSW_AFK_ELEMENT_PCP,
-						       key->vlan_priority,
-						       mask->vlan_priority);
+						       match.key->vlan_priority,
+						       match.mask->vlan_priority);
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 8d54b36afee8..43192640bdd1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -587,6 +587,7 @@ static int
 nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 	     char *nfp_action, int *a_len, u32 *csum_updated)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 	struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src;
 	struct nfp_fl_set_ipv6_tc_hl_fl set_ip6_tc_hl_fl;
 	struct nfp_fl_set_ip4_ttl_tos set_ip_ttl_tos;
@@ -643,13 +644,11 @@ nfp_fl_pedit(const struct tc_action *action, struct tc_cls_flower_offload *flow,
 			return err;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *basic;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  flow->key);
-		ip_proto = basic->ip_proto;
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 	}
 
 	if (set_eth.head.len_lw) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index c04a0d6b0184..1279fa5da9e1 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -8,31 +8,41 @@
 #include "main.h"
 
 static void
-nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *frame,
-			    struct tc_cls_flower_offload *flow, u8 key_type,
-			    bool mask_version)
+nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
+			    struct nfp_flower_meta_tci *msk,
+			    struct tc_cls_flower_offload *flow, u8 key_type)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_vlan *flow_vlan;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 	u16 tmp_tci;
 
-	memset(frame, 0, sizeof(struct nfp_flower_meta_tci));
+	memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
+	memset(msk, 0, sizeof(struct nfp_flower_meta_tci));
+
 	/* Populate the metadata frame. */
-	frame->nfp_flow_key_layer = key_type;
-	frame->mask_id = ~0;
+	ext->nfp_flow_key_layer = key_type;
+	ext->mask_id = ~0;
+
+	msk->nfp_flow_key_layer = key_type;
+	msk->mask_id = ~0;
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		flow_vlan = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_VLAN,
-						      target);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
 		/* Populate the tci field. */
-		if (flow_vlan->vlan_id || flow_vlan->vlan_priority) {
+		if (match.key->vlan_id || match.key->vlan_priority) {
+			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
+					     match.key->vlan_priority) |
+				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
+					     match.key->vlan_id) |
+				  NFP_FLOWER_MASK_VLAN_CFI;
+			ext->tci = cpu_to_be16(tmp_tci);
 			tmp_tci = FIELD_PREP(NFP_FLOWER_MASK_VLAN_PRIO,
-					     flow_vlan->vlan_priority) |
+					     match.mask->vlan_priority) |
 				  FIELD_PREP(NFP_FLOWER_MASK_VLAN_VID,
-					     flow_vlan->vlan_id) |
+					     match.mask->vlan_id) |
 				  NFP_FLOWER_MASK_VLAN_CFI;
-			frame->tci = cpu_to_be16(tmp_tci);
+			msk->tci = cpu_to_be16(tmp_tci);
 		}
 	}
 }
@@ -64,231 +74,244 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
 }
 
 static void
-nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
-		       struct tc_cls_flower_offload *flow,
-		       bool mask_version)
+nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
+		       struct nfp_flower_mac_mpls *msk,
+		       struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_eth_addrs *addr;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_mac_mpls));
+	memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
+	memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_ETH_ADDRS,
-						 target);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
 		/* Populate mac frame. */
-		ether_addr_copy(frame->mac_dst, &addr->dst[0]);
-		ether_addr_copy(frame->mac_src, &addr->src[0]);
+		ether_addr_copy(ext->mac_dst, &match.key->dst[0]);
+		ether_addr_copy(ext->mac_src, &match.key->src[0]);
+		ether_addr_copy(msk->mac_dst, &match.mask->dst[0]);
+		ether_addr_copy(msk->mac_src, &match.mask->src[0]);
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
-		struct flow_dissector_key_mpls *mpls;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
+		struct flow_match_mpls match;
 		u32 t_mpls;
 
-		mpls = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_MPLS,
-						 target);
-
-		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) |
+		flow_rule_match_mpls(rule, &match);
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.key->mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.key->mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.key->mpls_bos) |
 			 NFP_FLOWER_MASK_MPLS_Q;
-
-		frame->mpls_lse = cpu_to_be32(t_mpls);
-	} else if (dissector_uses_key(flow->dissector,
-				      FLOW_DISSECTOR_KEY_BASIC)) {
+		ext->mpls_lse = cpu_to_be32(t_mpls);
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.mask->mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.mask->mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.mask->mpls_bos) |
+			 NFP_FLOWER_MASK_MPLS_Q;
+		msk->mpls_lse = cpu_to_be32(t_mpls);
+	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 		/* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
 		 * bit, which indicates an mpls ether type but without any
 		 * mpls fields.
 		 */
-		struct flow_dissector_key_basic *key_basic;
-
-		key_basic = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_BASIC,
-						      flow->key);
-		if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
-		    key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC))
-			frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+		struct flow_match_basic match;
+
+		flow_rule_match_basic(rule, &match);
+		if (match.key->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
+		    match.key->n_proto == cpu_to_be16(ETH_P_MPLS_MC)) {
+			ext->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+			msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
+		}
 	}
 }
 
 static void
-nfp_flower_compile_tport(struct nfp_flower_tp_ports *frame,
-			 struct tc_cls_flower_offload *flow,
-			 bool mask_version)
+nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
+			 struct nfp_flower_tp_ports *msk,
+			 struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ports *tp;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_tp_ports));
+	memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
+	memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		tp = skb_flow_dissector_target(flow->dissector,
-					       FLOW_DISSECTOR_KEY_PORTS,
-					       target);
-		frame->port_src = tp->src;
-		frame->port_dst = tp->dst;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		ext->port_src = match.key->src;
+		ext->port_dst = match.key->dst;
+		msk->port_src = match.mask->src;
+		msk->port_dst = match.mask->dst;
 	}
 }
 
 static void
-nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *frame,
-			  struct tc_cls_flower_offload *flow,
-			  bool mask_version)
+nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
+			  struct nfp_flower_ip_ext *msk,
+			  struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *basic;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		basic = skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
-						  target);
-		frame->proto = basic->ip_proto;
+		flow_rule_match_basic(rule, &match);
+		ext->proto = match.key->ip_proto;
+		msk->proto = match.mask->ip_proto;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
-		struct flow_dissector_key_ip *flow_ip;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
+		struct flow_match_ip match;
 
-		flow_ip = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_IP,
-						    target);
-		frame->tos = flow_ip->tos;
-		frame->ttl = flow_ip->ttl;
+		flow_rule_match_ip(rule, &match);
+		ext->tos = match.key->tos;
+		ext->ttl = match.key->ttl;
+		msk->tos = match.mask->tos;
+		msk->ttl = match.mask->ttl;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *tcp;
-		u32 tcp_flags;
-
-		tcp = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_TCP, target);
-		tcp_flags = be16_to_cpu(tcp->flags);
-
-		if (tcp_flags & TCPHDR_FIN)
-			frame->flags |= NFP_FL_TCP_FLAG_FIN;
-		if (tcp_flags & TCPHDR_SYN)
-			frame->flags |= NFP_FL_TCP_FLAG_SYN;
-		if (tcp_flags & TCPHDR_RST)
-			frame->flags |= NFP_FL_TCP_FLAG_RST;
-		if (tcp_flags & TCPHDR_PSH)
-			frame->flags |= NFP_FL_TCP_FLAG_PSH;
-		if (tcp_flags & TCPHDR_URG)
-			frame->flags |= NFP_FL_TCP_FLAG_URG;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp match;
+		u16 tcp_flags;
+
+		flow_rule_match_tcp(rule, &match);
+		tcp_flags = be16_to_cpu(match.key->flags);
+
+		if (tcp_flags & TCPHDR_FIN) {
+			ext->flags |= NFP_FL_TCP_FLAG_FIN;
+			msk->flags |= NFP_FL_TCP_FLAG_FIN;
+		}
+		if (tcp_flags & TCPHDR_SYN) {
+			ext->flags |= NFP_FL_TCP_FLAG_SYN;
+			msk->flags |= NFP_FL_TCP_FLAG_SYN;
+		}
+		if (tcp_flags & TCPHDR_RST) {
+			ext->flags |= NFP_FL_TCP_FLAG_RST;
+			msk->flags |= NFP_FL_TCP_FLAG_RST;
+		}
+		if (tcp_flags & TCPHDR_PSH) {
+			ext->flags |= NFP_FL_TCP_FLAG_PSH;
+			msk->flags |= NFP_FL_TCP_FLAG_PSH;
+		}
+		if (tcp_flags & TCPHDR_URG) {
+			ext->flags |= NFP_FL_TCP_FLAG_URG;
+			msk->flags |= NFP_FL_TCP_FLAG_URG;
+		}
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control match;
 
-		key = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_CONTROL,
-						target);
-		if (key->flags & FLOW_DIS_IS_FRAGMENT)
-			frame->flags |= NFP_FL_IP_FRAGMENTED;
-		if (key->flags & FLOW_DIS_FIRST_FRAG)
-			frame->flags |= NFP_FL_IP_FRAG_FIRST;
+		flow_rule_match_control(rule, &match);
+		if (match.key->flags & FLOW_DIS_IS_FRAGMENT) {
+			ext->flags |= NFP_FL_IP_FRAGMENTED;
+			msk->flags |= NFP_FL_IP_FRAGMENTED;
+		}
+		if (match.key->flags & FLOW_DIS_FIRST_FRAG) {
+			ext->flags |= NFP_FL_IP_FRAG_FIRST;
+			msk->flags |= NFP_FL_IP_FRAG_FIRST;
+		}
 	}
 }
 
 static void
-nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
+			struct nfp_flower_ipv4 *msk,
+			struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *addr;
-
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4));
-
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 target);
-		frame->ipv4_src = addr->src;
-		frame->ipv4_dst = addr->dst;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_match_ipv4_addrs match;
+
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4));
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		flow_rule_match_ipv4_addrs(rule, &match);
+		ext->ipv4_src = match.key->src;
+		ext->ipv4_dst = match.key->dst;
+		msk->ipv4_src = match.mask->src;
+		msk->ipv4_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
 }
 
 static void
-nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
-			struct tc_cls_flower_offload *flow,
-			bool mask_version)
+nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
+			struct nfp_flower_ipv6 *msk,
+			struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv6_addrs *addr;
-
-	memset(frame, 0, sizeof(struct nfp_flower_ipv6));
-
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		addr = skb_flow_dissector_target(flow->dissector,
-						 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						 target);
-		frame->ipv6_src = addr->src;
-		frame->ipv6_dst = addr->dst;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+
+	memset(ext, 0, sizeof(struct nfp_flower_ipv6));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv6));
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
+
+		flow_rule_match_ipv6_addrs(rule, &match);
+		ext->ipv6_src = match.key->src;
+		ext->ipv6_dst = match.key->dst;
+		msk->ipv6_src = match.mask->src;
+		msk->ipv6_dst = match.mask->dst;
 	}
 
-	nfp_flower_compile_ip_ext(&frame->ip_ext, flow, mask_version);
+	nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
 }
 
 static int
-nfp_flower_compile_geneve_opt(void *key_buf, struct tc_cls_flower_offload *flow,
-			      bool mask_version)
+nfp_flower_compile_geneve_opt(void *ext, void *msk,
+			      struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_enc_opts *opts;
+	struct flow_match_enc_opts match;
 
-	opts = skb_flow_dissector_target(flow->dissector,
-					 FLOW_DISSECTOR_KEY_ENC_OPTS,
-					 target);
-	memcpy(key_buf, opts->data, opts->len);
+	flow_rule_match_enc_opts(flow->rule, &match);
+	memcpy(ext, match.key->data, match.key->len);
+	memcpy(msk, match.mask->data, match.mask->len);
 
 	return 0;
 }
 
 static void
-nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *frame,
-				struct tc_cls_flower_offload *flow,
-				bool mask_version)
+nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
+				struct nfp_flower_ipv4_udp_tun *msk,
+				struct tc_cls_flower_offload *flow)
 {
-	struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
-	struct flow_dissector_key_ipv4_addrs *tun_ips;
-	struct flow_dissector_key_keyid *vni;
-	struct flow_dissector_key_ip *ip;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
 
-	memset(frame, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
+	memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
+	memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_match_enc_keyid match;
 		u32 temp_vni;
 
-		vni = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_ENC_KEYID,
-						target);
-		temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
-		frame->tun_id = cpu_to_be32(temp_vni);
+		flow_rule_match_enc_keyid(rule, &match);
+		temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		ext->tun_id = cpu_to_be32(temp_vni);
+		temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
+		msk->tun_id = cpu_to_be32(temp_vni);
 	}
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
-		tun_ips =
-		   skb_flow_dissector_target(flow->dissector,
-					     FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-					     target);
-		frame->ip_src = tun_ips->src;
-		frame->ip_dst = tun_ips->dst;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
+
+		flow_rule_match_enc_ipv4_addrs(rule, &match);
+		ext->ip_src = match.key->src;
+		ext->ip_dst = match.key->dst;
+		msk->ip_src = match.mask->src;
+		msk->ip_dst = match.mask->dst;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) {
-		ip = skb_flow_dissector_target(flow->dissector,
-					       FLOW_DISSECTOR_KEY_ENC_IP,
-					       target);
-		frame->tos = ip->tos;
-		frame->ttl = ip->ttl;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
+		struct flow_match_ip match;
+
+		flow_rule_match_enc_ip(rule, &match);
+		ext->tos = match.key->tos;
+		ext->ttl = match.key->ttl;
+		msk->tos = match.mask->tos;
+		msk->ttl = match.mask->ttl;
 	}
 }
 
@@ -313,12 +336,9 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	ext = nfp_flow->unmasked_data;
 	msk = nfp_flow->mask_data;
 
-	/* Populate Exact Metadata. */
 	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
-				    flow, key_ls->key_layer, false);
-	/* Populate Mask Metadata. */
-	nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)msk,
-				    flow, key_ls->key_layer, true);
+				    (struct nfp_flower_meta_tci *)msk,
+				    flow, key_ls->key_layer);
 	ext += sizeof(struct nfp_flower_meta_tci);
 	msk += sizeof(struct nfp_flower_meta_tci);
 
@@ -348,45 +368,33 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	msk += sizeof(struct nfp_flower_in_port);
 
 	if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
-		/* Populate Exact MAC Data. */
 		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
-				       flow, false);
-		/* Populate Mask MAC Data. */
-		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)msk,
-				       flow, true);
+				       (struct nfp_flower_mac_mpls *)msk,
+				       flow);
 		ext += sizeof(struct nfp_flower_mac_mpls);
 		msk += sizeof(struct nfp_flower_mac_mpls);
 	}
 
 	if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
-		/* Populate Exact TP Data. */
 		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
-					 flow, false);
-		/* Populate Mask TP Data. */
-		nfp_flower_compile_tport((struct nfp_flower_tp_ports *)msk,
-					 flow, true);
+					 (struct nfp_flower_tp_ports *)msk,
+					 flow);
 		ext += sizeof(struct nfp_flower_tp_ports);
 		msk += sizeof(struct nfp_flower_tp_ports);
 	}
 
 	if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
-		/* Populate Exact IPv4 Data. */
 		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
-					flow, false);
-		/* Populate Mask IPv4 Data. */
-		nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)msk,
-					flow, true);
+					(struct nfp_flower_ipv4 *)msk,
+					flow);
 		ext += sizeof(struct nfp_flower_ipv4);
 		msk += sizeof(struct nfp_flower_ipv4);
 	}
 
 	if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
-		/* Populate Exact IPv4 Data. */
 		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
-					flow, false);
-		/* Populate Mask IPv4 Data. */
-		nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)msk,
-					flow, true);
+					(struct nfp_flower_ipv6 *)msk,
+					flow);
 		ext += sizeof(struct nfp_flower_ipv6);
 		msk += sizeof(struct nfp_flower_ipv6);
 	}
@@ -395,10 +403,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	    key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
 		__be32 tun_dst;
 
-		/* Populate Exact VXLAN Data. */
-		nfp_flower_compile_ipv4_udp_tun((void *)ext, flow, false);
-		/* Populate Mask VXLAN Data. */
-		nfp_flower_compile_ipv4_udp_tun((void *)msk, flow, true);
+		nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
 		tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst;
 		ext += sizeof(struct nfp_flower_ipv4_udp_tun);
 		msk += sizeof(struct nfp_flower_ipv4_udp_tun);
@@ -410,11 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 		nfp_tunnel_add_ipv4_off(app, tun_dst);
 
 		if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
-			err = nfp_flower_compile_geneve_opt(ext, flow, false);
-			if (err)
-				return err;
-
-			err = nfp_flower_compile_geneve_opt(msk, flow, true);
+			err = nfp_flower_compile_geneve_opt(ext, msk, flow);
 			if (err)
 				return err;
 		}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 2cdbf29ecbe7..74f7ff292052 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -102,23 +102,22 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow,
 
 static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f)
 {
-	return dissector_uses_key(f->dissector,
-				  FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
-		dissector_uses_key(f->dissector,
-				   FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
-		dissector_uses_key(f->dissector,
-				   FLOW_DISSECTOR_KEY_PORTS) ||
-		dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ICMP);
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+
+	return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) ||
+	       flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP);
 }
 
 static int
-nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
+nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts,
 			  u32 *key_layer_two, int *key_size)
 {
-	if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY)
+	if (enc_opts->key->len > NFP_FL_MAX_GENEVE_OPT_KEY)
 		return -EOPNOTSUPP;
 
-	if (enc_opts->len > 0) {
+	if (enc_opts->key->len > 0) {
 		*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP;
 		*key_size += sizeof(struct nfp_flower_geneve_options);
 	}
@@ -133,20 +132,21 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 				struct tc_cls_flower_offload *flow,
 				enum nfp_flower_tun_type *tun_type)
 {
-	struct flow_dissector_key_basic *mask_basic = NULL;
-	struct flow_dissector_key_basic *key_basic = NULL;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow);
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_match_basic basic = { NULL, NULL};
 	struct nfp_flower_priv *priv = app->priv;
 	u32 key_layer_two;
 	u8 key_layer;
 	int key_size;
 	int err;
 
-	if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
+	if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
 		return -EOPNOTSUPP;
 
 	/* If any tun dissector is used then the required set must be used. */
-	if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
-	    (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+	if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+	    (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
 	    != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
 		return -EOPNOTSUPP;
 
@@ -155,76 +155,53 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 	key_size = sizeof(struct nfp_flower_meta_tci) +
 		   sizeof(struct nfp_flower_in_port);
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS) ||
-	    dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS) ||
+	    flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS)) {
 		key_layer |= NFP_FLOWER_LAYER_MAC;
 		key_size += sizeof(struct nfp_flower_mac_mpls);
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
-		struct flow_dissector_key_vlan *flow_vlan;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan vlan;
 
-		flow_vlan = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_VLAN,
-						      flow->mask);
+		flow_rule_match_vlan(rule, &vlan);
 		if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) &&
-		    flow_vlan->vlan_priority)
+		    vlan.key->vlan_priority)
 			return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(flow->dissector,
-			       FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-		struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
-		struct flow_dissector_key_ports *mask_enc_ports = NULL;
-		struct flow_dissector_key_enc_opts *enc_op = NULL;
-		struct flow_dissector_key_ports *enc_ports = NULL;
-		struct flow_dissector_key_control *mask_enc_ctl =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  flow->mask);
-		struct flow_dissector_key_control *enc_ctl =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
-						  flow->key);
-
-		if (mask_enc_ctl->addr_type != 0xffff ||
-		    enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_match_enc_opts enc_op = { NULL, NULL };
+		struct flow_match_ipv4_addrs ipv4_addrs;
+		struct flow_match_control enc_ctl;
+		struct flow_match_ports enc_ports;
+
+		flow_rule_match_enc_control(rule, &enc_ctl);
+
+		if (enc_ctl.mask->addr_type != 0xffff ||
+		    enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
 			return -EOPNOTSUPP;
 
 		/* These fields are already verified as used. */
-		mask_ipv4 =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
-						  flow->mask);
-		if (mask_ipv4->dst != cpu_to_be32(~0))
+		flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
+		if (ipv4_addrs.mask->dst != cpu_to_be32(~0))
 			return -EOPNOTSUPP;
 
-		mask_enc_ports =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  flow->mask);
-		enc_ports =
-			skb_flow_dissector_target(flow->dissector,
-						  FLOW_DISSECTOR_KEY_ENC_PORTS,
-						  flow->key);
 
-		if (mask_enc_ports->dst != cpu_to_be16(~0))
+		flow_rule_match_enc_ports(rule, &enc_ports);
+		if (enc_ports.mask->dst != cpu_to_be16(~0))
 			return -EOPNOTSUPP;
 
-		if (dissector_uses_key(flow->dissector,
-				       FLOW_DISSECTOR_KEY_ENC_OPTS)) {
-			enc_op = skb_flow_dissector_target(flow->dissector,
-							   FLOW_DISSECTOR_KEY_ENC_OPTS,
-							   flow->key);
-		}
+		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
+			flow_rule_match_enc_opts(rule, &enc_op);
 
-		switch (enc_ports->dst) {
+		switch (enc_ports.key->dst) {
 		case htons(NFP_FL_VXLAN_PORT):
 			*tun_type = NFP_FL_TUNNEL_VXLAN;
 			key_layer |= NFP_FLOWER_LAYER_VXLAN;
 			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
 
-			if (enc_op)
+			if (enc_op.key)
 				return -EOPNOTSUPP;
 			break;
 		case htons(NFP_FL_GENEVE_PORT):
@@ -236,11 +213,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
 			key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
 
-			if (!enc_op)
+			if (!enc_op.key)
 				break;
 			if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))
 				return -EOPNOTSUPP;
-			err = nfp_flower_calc_opt_layer(enc_op, &key_layer_two,
+			err = nfp_flower_calc_opt_layer(&enc_op, &key_layer_two,
 							&key_size);
 			if (err)
 				return err;
@@ -254,19 +231,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 			return -EOPNOTSUPP;
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		mask_basic = skb_flow_dissector_target(flow->dissector,
-						       FLOW_DISSECTOR_KEY_BASIC,
-						       flow->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC))
+		flow_rule_match_basic(rule, &basic);
 
-		key_basic = skb_flow_dissector_target(flow->dissector,
-						      FLOW_DISSECTOR_KEY_BASIC,
-						      flow->key);
-	}
-
-	if (mask_basic && mask_basic->n_proto) {
+	if (basic.mask && basic.mask->n_proto) {
 		/* Ethernet type is present in the key. */
-		switch (key_basic->n_proto) {
+		switch (basic.key->n_proto) {
 		case cpu_to_be16(ETH_P_IP):
 			key_layer |= NFP_FLOWER_LAYER_IPV4;
 			key_size += sizeof(struct nfp_flower_ipv4);
@@ -305,9 +275,9 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
-	if (mask_basic && mask_basic->ip_proto) {
+	if (basic.mask && basic.mask->ip_proto) {
 		/* Ethernet type is present in the key. */
-		switch (key_basic->ip_proto) {
+		switch (basic.key->ip_proto) {
 		case IPPROTO_TCP:
 		case IPPROTO_UDP:
 		case IPPROTO_SCTP:
@@ -324,14 +294,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_TCP)) {
-		struct flow_dissector_key_tcp *tcp;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
+		struct flow_match_tcp tcp;
 		u32 tcp_flags;
 
-		tcp = skb_flow_dissector_target(flow->dissector,
-						FLOW_DISSECTOR_KEY_TCP,
-						flow->key);
-		tcp_flags = be16_to_cpu(tcp->flags);
+		flow_rule_match_tcp(rule, &tcp);
+		tcp_flags = be16_to_cpu(tcp.key->flags);
 
 		if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS)
 			return -EOPNOTSUPP;
@@ -347,12 +315,12 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		 * space, thus we need to ensure we include a IPv4/IPv6 key
 		 * layer if we have not done so already.
 		 */
-		if (!key_basic)
+		if (!basic.key)
 			return -EOPNOTSUPP;
 
 		if (!(key_layer & NFP_FLOWER_LAYER_IPV4) &&
 		    !(key_layer & NFP_FLOWER_LAYER_IPV6)) {
-			switch (key_basic->n_proto) {
+			switch (basic.key->n_proto) {
 			case cpu_to_be16(ETH_P_IP):
 				key_layer |= NFP_FLOWER_LAYER_IPV4;
 				key_size += sizeof(struct nfp_flower_ipv4);
@@ -369,14 +337,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
 		}
 	}
 
-	if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
-		struct flow_dissector_key_control *key_ctl;
-
-		key_ctl = skb_flow_dissector_target(flow->dissector,
-						    FLOW_DISSECTOR_KEY_CONTROL,
-						    flow->key);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
+		struct flow_match_control ctl;
 
-		if (key_ctl->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
+		flow_rule_match_control(rule, &ctl);
+		if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS)
 			return -EOPNOTSUPP;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index b16ce7d93caf..81d5b9304229 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -2033,24 +2033,20 @@ qede_tc_parse_ports(struct qede_dev *edev,
 		    struct tc_cls_flower_offload *f,
 		    struct qede_arfs_tuple *t)
 {
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
-		struct flow_dissector_key_ports *key, *mask;
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_PORTS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_PORTS,
-						 f->mask);
-
-		if ((key->src && mask->src != U16_MAX) ||
-		    (key->dst && mask->dst != U16_MAX)) {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+		struct flow_match_ports match;
+
+		flow_rule_match_ports(rule, &match);
+		if ((match.key->src && match.mask->src != U16_MAX) ||
+		    (match.key->dst && match.mask->dst != U16_MAX)) {
 			DP_NOTICE(edev, "Do not support ports masks\n");
 			return -EINVAL;
 		}
 
-		t->src_port = key->src;
-		t->dst_port = key->dst;
+		t->src_port = match.key->src;
+		t->dst_port = match.key->dst;
 	}
 
 	return 0;
@@ -2061,32 +2057,27 @@ qede_tc_parse_v6_common(struct qede_dev *edev,
 			struct tc_cls_flower_offload *f,
 			struct qede_arfs_tuple *t)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
 	struct in6_addr zero_addr, addr;
 
 	memset(&zero_addr, 0, sizeof(addr));
 	memset(&addr, 0xff, sizeof(addr));
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
-		struct flow_dissector_key_ipv6_addrs *key, *mask;
-
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_IPV6_ADDRS,
-						 f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		struct flow_match_ipv6_addrs match;
 
-		if ((memcmp(&key->src, &zero_addr, sizeof(addr)) &&
-		     memcmp(&mask->src, &addr, sizeof(addr))) ||
-		    (memcmp(&key->dst, &zero_addr, sizeof(addr)) &&
-		     memcmp(&mask->dst, &addr, sizeof(addr)))) {
+		flow_rule_match_ipv6_addrs(rule, &match);
+		if ((memcmp(&match.key->src, &zero_addr, sizeof(addr)) &&
+		     memcmp(&match.mask->src, &addr, sizeof(addr))) ||
+		    (memcmp(&match.key->dst, &zero_addr, sizeof(addr)) &&
+		     memcmp(&match.mask->dst, &addr, sizeof(addr)))) {
 			DP_NOTICE(edev,
 				  "Do not support IPv6 address prefix/mask\n");
 			return -EINVAL;
 		}
 
-		memcpy(&t->src_ipv6, &key->src, sizeof(addr));
-		memcpy(&t->dst_ipv6, &key->dst, sizeof(addr));
+		memcpy(&t->src_ipv6, &match.key->src, sizeof(addr));
+		memcpy(&t->dst_ipv6, &match.key->dst, sizeof(addr));
 	}
 
 	if (qede_tc_parse_ports(edev, f, t))
@@ -2100,24 +2091,20 @@ qede_tc_parse_v4_common(struct qede_dev *edev,
 			struct tc_cls_flower_offload *f,
 			struct qede_arfs_tuple *t)
 {
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
-		struct flow_dissector_key_ipv4_addrs *key, *mask;
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						f->key);
-		mask = skb_flow_dissector_target(f->dissector,
-						 FLOW_DISSECTOR_KEY_IPV4_ADDRS,
-						 f->mask);
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		struct flow_match_ipv4_addrs match;
 
-		if ((key->src && mask->src != U32_MAX) ||
-		    (key->dst && mask->dst != U32_MAX)) {
+		flow_rule_match_ipv4_addrs(rule, &match);
+		if ((match.key->src && match.mask->src != U32_MAX) ||
+		    (match.key->dst && match.mask->dst != U32_MAX)) {
 			DP_NOTICE(edev, "Do not support ipv4 prefix/masks\n");
 			return -EINVAL;
 		}
 
-		t->src_ipv4 = key->src;
-		t->dst_ipv4 = key->dst;
+		t->src_ipv4 = match.key->src;
+		t->dst_ipv4 = match.key->dst;
 	}
 
 	if (qede_tc_parse_ports(edev, f, t))
@@ -2175,19 +2162,21 @@ qede_parse_flower_attr(struct qede_dev *edev, __be16 proto,
 		       struct tc_cls_flower_offload *f,
 		       struct qede_arfs_tuple *tuple)
 {
+	struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
+	struct flow_dissector *dissector = rule->match.dissector;
 	int rc = -EINVAL;
 	u8 ip_proto = 0;
 
 	memset(tuple, 0, sizeof(*tuple));
 
-	if (f->dissector->used_keys &
+	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS))) {
 		DP_NOTICE(edev, "Unsupported key set:0x%x\n",
-			  f->dissector->used_keys);
+			  dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
@@ -2197,13 +2186,11 @@ qede_parse_flower_attr(struct qede_dev *edev, __be16 proto,
 		return -EPROTONOSUPPORT;
 	}
 
-	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
-		struct flow_dissector_key_basic *key;
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+		struct flow_match_basic match;
 
-		key = skb_flow_dissector_target(f->dissector,
-						FLOW_DISSECTOR_KEY_BASIC,
-						f->key);
-		ip_proto = key->ip_proto;
+		flow_rule_match_basic(rule, &match);
+		ip_proto = match.key->ip_proto;
 	}
 
 	if (ip_proto == IPPROTO_TCP && proto == htons(ETH_P_IP))
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
new file mode 100644
index 000000000000..461c66595763
--- /dev/null
+++ b/include/net/flow_offload.h
@@ -0,0 +1,115 @@
+#ifndef _NET_FLOW_OFFLOAD_H
+#define _NET_FLOW_OFFLOAD_H
+
+#include <net/flow_dissector.h>
+
+struct flow_match {
+	struct flow_dissector	*dissector;
+	void			*mask;
+	void			*key;
+};
+
+struct flow_match_basic {
+	struct flow_dissector_key_basic *key, *mask;
+};
+
+struct flow_match_control {
+	struct flow_dissector_key_control *key, *mask;
+};
+
+struct flow_match_eth_addrs {
+	struct flow_dissector_key_eth_addrs *key, *mask;
+};
+
+struct flow_match_vlan {
+	struct flow_dissector_key_vlan *key, *mask;
+};
+
+struct flow_match_ipv4_addrs {
+	struct flow_dissector_key_ipv4_addrs *key, *mask;
+};
+
+struct flow_match_ipv6_addrs {
+	struct flow_dissector_key_ipv6_addrs *key, *mask;
+};
+
+struct flow_match_ip {
+	struct flow_dissector_key_ip *key, *mask;
+};
+
+struct flow_match_ports {
+	struct flow_dissector_key_ports *key, *mask;
+};
+
+struct flow_match_icmp {
+	struct flow_dissector_key_icmp *key, *mask;
+};
+
+struct flow_match_tcp {
+	struct flow_dissector_key_tcp *key, *mask;
+};
+
+struct flow_match_mpls {
+	struct flow_dissector_key_mpls *key, *mask;
+};
+
+struct flow_match_enc_keyid {
+	struct flow_dissector_key_keyid *key, *mask;
+};
+
+struct flow_match_enc_opts {
+	struct flow_dissector_key_enc_opts *key, *mask;
+};
+
+struct flow_rule;
+
+void flow_rule_match_basic(const struct flow_rule *rule,
+			   struct flow_match_basic *out);
+void flow_rule_match_control(const struct flow_rule *rule,
+			     struct flow_match_control *out);
+void flow_rule_match_eth_addrs(const struct flow_rule *rule,
+			       struct flow_match_eth_addrs *out);
+void flow_rule_match_vlan(const struct flow_rule *rule,
+			  struct flow_match_vlan *out);
+void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv4_addrs *out);
+void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv6_addrs *out);
+void flow_rule_match_ip(const struct flow_rule *rule,
+			struct flow_match_ip *out);
+void flow_rule_match_ports(const struct flow_rule *rule,
+			   struct flow_match_ports *out);
+void flow_rule_match_tcp(const struct flow_rule *rule,
+			 struct flow_match_tcp *out);
+void flow_rule_match_icmp(const struct flow_rule *rule,
+			  struct flow_match_icmp *out);
+void flow_rule_match_mpls(const struct flow_rule *rule,
+			  struct flow_match_mpls *out);
+void flow_rule_match_enc_control(const struct flow_rule *rule,
+				 struct flow_match_control *out);
+void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv4_addrs *out);
+void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv6_addrs *out);
+void flow_rule_match_enc_ip(const struct flow_rule *rule,
+			    struct flow_match_ip *out);
+void flow_rule_match_enc_ports(const struct flow_rule *rule,
+			       struct flow_match_ports *out);
+void flow_rule_match_enc_keyid(const struct flow_rule *rule,
+			       struct flow_match_enc_keyid *out);
+void flow_rule_match_enc_opts(const struct flow_rule *rule,
+			      struct flow_match_enc_opts *out);
+
+struct flow_rule {
+	struct flow_match	match;
+};
+
+struct flow_rule *flow_rule_alloc(void);
+
+static inline bool flow_rule_match_key(const struct flow_rule *rule,
+				       enum flow_dissector_key_id key)
+{
+	return dissector_uses_key(rule->match.dissector, key);
+}
+
+#endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 40965fbbcd31..04b64523cc32 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -6,6 +6,7 @@
 #include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
+#include <net/flow_offload.h>
 
 /* TC action not accessible from user space */
 #define TC_ACT_REINSERT		(TC_ACT_VALUE_MAX + 1)
@@ -760,13 +761,17 @@ struct tc_cls_flower_offload {
 	struct tc_cls_common_offload common;
 	enum tc_fl_command command;
 	unsigned long cookie;
-	struct flow_dissector *dissector;
-	struct fl_flow_key *mask;
-	struct fl_flow_key *key;
+	struct flow_rule *rule;
 	struct tcf_exts *exts;
 	u32 classid;
 };
 
+static inline struct flow_rule *
+tc_cls_flower_offload_flow_rule(struct tc_cls_flower_offload *tc_flow_cmd)
+{
+	return tc_flow_cmd->rule;
+}
+
 enum tc_matchall_command {
 	TC_CLSMATCHALL_REPLACE,
 	TC_CLSMATCHALL_DESTROY,
diff --git a/net/core/Makefile b/net/core/Makefile
index fccd31e0e7f7..f97d6254e564 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
 			sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
-			fib_notifier.o xdp.o
+			fib_notifier.o xdp.o flow_offload.o
 
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
new file mode 100644
index 000000000000..2fbf6903d2f6
--- /dev/null
+++ b/net/core/flow_offload.c
@@ -0,0 +1,143 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <net/flow_offload.h>
+
+struct flow_rule *flow_rule_alloc(void)
+{
+	return kzalloc(sizeof(struct flow_rule), GFP_KERNEL);
+}
+EXPORT_SYMBOL(flow_rule_alloc);
+
+#define FLOW_DISSECTOR_MATCH(__rule, __type, __out)				\
+	const struct flow_match *__m = &(__rule)->match;			\
+	struct flow_dissector *__d = (__m)->dissector;				\
+										\
+	(__out)->key = skb_flow_dissector_target(__d, __type, (__m)->key);	\
+	(__out)->mask = skb_flow_dissector_target(__d, __type, (__m)->mask);	\
+
+void flow_rule_match_basic(const struct flow_rule *rule,
+			   struct flow_match_basic *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_BASIC, out);
+}
+EXPORT_SYMBOL(flow_rule_match_basic);
+
+void flow_rule_match_control(const struct flow_rule *rule,
+			     struct flow_match_control *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_control);
+
+void flow_rule_match_eth_addrs(const struct flow_rule *rule,
+			       struct flow_match_eth_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_eth_addrs);
+
+void flow_rule_match_vlan(const struct flow_rule *rule,
+			  struct flow_match_vlan *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_VLAN, out);
+}
+EXPORT_SYMBOL(flow_rule_match_vlan);
+
+void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv4_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv4_addrs);
+
+void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
+				struct flow_match_ipv6_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ipv6_addrs);
+
+void flow_rule_match_ip(const struct flow_rule *rule,
+			struct flow_match_ip *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ip);
+
+void flow_rule_match_ports(const struct flow_rule *rule,
+			   struct flow_match_ports *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ports);
+
+void flow_rule_match_tcp(const struct flow_rule *rule,
+			 struct flow_match_tcp *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_TCP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_tcp);
+
+void flow_rule_match_icmp(const struct flow_rule *rule,
+			  struct flow_match_icmp *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ICMP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_icmp);
+
+void flow_rule_match_mpls(const struct flow_rule *rule,
+			  struct flow_match_mpls *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_MPLS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_mpls);
+
+void flow_rule_match_enc_control(const struct flow_rule *rule,
+				 struct flow_match_control *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_control);
+
+void flow_rule_match_enc_ipv4_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv4_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv4_addrs);
+
+void flow_rule_match_enc_ipv6_addrs(const struct flow_rule *rule,
+				    struct flow_match_ipv6_addrs *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ipv6_addrs);
+
+void flow_rule_match_enc_ip(const struct flow_rule *rule,
+			    struct flow_match_ip *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_IP, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ip);
+
+void flow_rule_match_enc_ports(const struct flow_rule *rule,
+			       struct flow_match_ports *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_PORTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_ports);
+
+void flow_rule_match_enc_keyid(const struct flow_rule *rule,
+			       struct flow_match_enc_keyid *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_KEYID, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_keyid);
+
+void flow_rule_match_enc_opts(const struct flow_rule *rule,
+			      struct flow_match_enc_opts *out)
+{
+	FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_ENC_OPTS, out);
+}
+EXPORT_SYMBOL(flow_rule_match_enc_opts);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f6aa57fbbbaf..aaffea0b66e9 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -381,16 +381,22 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	bool skip_sw = tc_skip_sw(f->flags);
 	int err;
 
+	cls_flower.rule = flow_rule_alloc();
+	if (!cls_flower.rule)
+		return -ENOMEM;
+
 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
 	cls_flower.command = TC_CLSFLOWER_REPLACE;
 	cls_flower.cookie = (unsigned long) f;
-	cls_flower.dissector = &f->mask->dissector;
-	cls_flower.mask = &f->mask->key;
-	cls_flower.key = &f->mkey;
+	cls_flower.rule->match.dissector = &f->mask->dissector;
+	cls_flower.rule->match.mask = &f->mask->key;
+	cls_flower.rule->match.key = &f->mkey;
 	cls_flower.exts = &f->exts;
 	cls_flower.classid = f->res.classid;
 
 	err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+	kfree(cls_flower.rule);
+
 	if (err < 0) {
 		fl_hw_destroy_filter(tp, f, NULL);
 		return err;
@@ -1463,18 +1469,24 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			if (tc_skip_hw(f->flags))
 				continue;
 
+			cls_flower.rule = flow_rule_alloc();
+			if (!cls_flower.rule)
+				return -ENOMEM;
+
 			tc_cls_common_offload_init(&cls_flower.common, tp,
 						   f->flags, extack);
 			cls_flower.command = add ?
 				TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
 			cls_flower.cookie = (unsigned long)f;
-			cls_flower.dissector = &mask->dissector;
-			cls_flower.mask = &mask->key;
-			cls_flower.key = &f->mkey;
+			cls_flower.rule->match.dissector = &mask->dissector;
+			cls_flower.rule->match.mask = &mask->key;
+			cls_flower.rule->match.key = &f->mkey;
 			cls_flower.exts = &f->exts;
 			cls_flower.classid = f->res.classid;
 
 			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+			kfree(cls_flower.rule);
+
 			if (err) {
 				if (add && tc_skip_sw(f->flags))
 					return err;
@@ -1489,25 +1501,32 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 	return 0;
 }
 
-static void fl_hw_create_tmplt(struct tcf_chain *chain,
-			       struct fl_flow_tmplt *tmplt)
+static int fl_hw_create_tmplt(struct tcf_chain *chain,
+			      struct fl_flow_tmplt *tmplt)
 {
 	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = chain->block;
 	struct tcf_exts dummy_exts = { 0, };
 
+	cls_flower.rule = flow_rule_alloc();
+	if (!cls_flower.rule)
+		return -ENOMEM;
+
 	cls_flower.common.chain_index = chain->index;
 	cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
 	cls_flower.cookie = (unsigned long) tmplt;
-	cls_flower.dissector = &tmplt->dissector;
-	cls_flower.mask = &tmplt->mask;
-	cls_flower.key = &tmplt->dummy_key;
+	cls_flower.rule->match.dissector = &tmplt->dissector;
+	cls_flower.rule->match.mask = &tmplt->mask;
+	cls_flower.rule->match.key = &tmplt->dummy_key;
 	cls_flower.exts = &dummy_exts;
 
 	/* We don't care if driver (any of them) fails to handle this
 	 * call. It serves just as a hint for it.
 	 */
 	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+	kfree(cls_flower.rule);
+
+	return 0;
 }
 
 static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1551,12 +1570,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
 	err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
 	if (err)
 		goto errout_tmplt;
-	kfree(tb);
 
 	fl_init_dissector(&tmplt->dissector, &tmplt->mask);
 
-	fl_hw_create_tmplt(chain, tmplt);
+	err = fl_hw_create_tmplt(chain, tmplt);
+	if (err)
+		goto errout_tmplt;
 
+	kfree(tb);
 	return tmplt;
 
 errout_tmplt:
-- 
cgit v1.2.3


From e3ab786b42535da00c731c3585165e88bf35ab09 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:45 +0100
Subject: flow_offload: add flow action infrastructure

This new infrastructure defines the nic actions that you can perform
from existing network drivers. This infrastructure allows us to avoid a
direct dependency with the native software TC action representation.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 69 +++++++++++++++++++++++++++++++++++++++++++++-
 include/net/pkt_cls.h      |  1 +
 net/core/flow_offload.c    | 14 ++++++++--
 net/sched/cls_api.c        | 17 ++++++++++++
 net/sched/cls_flower.c     |  7 +++--
 5 files changed, 102 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 461c66595763..dabc819b6cc9 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -100,11 +100,78 @@ void flow_rule_match_enc_keyid(const struct flow_rule *rule,
 void flow_rule_match_enc_opts(const struct flow_rule *rule,
 			      struct flow_match_enc_opts *out);
 
+enum flow_action_id {
+	FLOW_ACTION_ACCEPT		= 0,
+	FLOW_ACTION_DROP,
+	FLOW_ACTION_TRAP,
+	FLOW_ACTION_GOTO,
+	FLOW_ACTION_REDIRECT,
+	FLOW_ACTION_MIRRED,
+	FLOW_ACTION_VLAN_PUSH,
+	FLOW_ACTION_VLAN_POP,
+	FLOW_ACTION_VLAN_MANGLE,
+	FLOW_ACTION_TUNNEL_ENCAP,
+	FLOW_ACTION_TUNNEL_DECAP,
+	FLOW_ACTION_MANGLE,
+	FLOW_ACTION_ADD,
+	FLOW_ACTION_CSUM,
+	FLOW_ACTION_MARK,
+};
+
+/* This is mirroring enum pedit_header_type definition for easy mapping between
+ * tc pedit action. Legacy TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK is mapped to
+ * FLOW_ACT_MANGLE_UNSPEC, which is supported by no driver.
+ */
+enum flow_action_mangle_base {
+	FLOW_ACT_MANGLE_UNSPEC		= 0,
+	FLOW_ACT_MANGLE_HDR_TYPE_ETH,
+	FLOW_ACT_MANGLE_HDR_TYPE_IP4,
+	FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+	FLOW_ACT_MANGLE_HDR_TYPE_TCP,
+	FLOW_ACT_MANGLE_HDR_TYPE_UDP,
+};
+
+struct flow_action_entry {
+	enum flow_action_id		id;
+	union {
+		u32			chain_index;	/* FLOW_ACTION_GOTO */
+		struct net_device	*dev;		/* FLOW_ACTION_REDIRECT */
+		struct {				/* FLOW_ACTION_VLAN */
+			u16		vid;
+			__be16		proto;
+			u8		prio;
+		} vlan;
+		struct {				/* FLOW_ACTION_PACKET_EDIT */
+			enum flow_action_mangle_base htype;
+			u32		offset;
+			u32		mask;
+			u32		val;
+		} mangle;
+		const struct ip_tunnel_info *tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
+		u32			csum_flags;	/* FLOW_ACTION_CSUM */
+		u32			mark;		/* FLOW_ACTION_MARK */
+	};
+};
+
+struct flow_action {
+	unsigned int			num_entries;
+	struct flow_action_entry 	entries[0];
+};
+
+static inline bool flow_action_has_entries(const struct flow_action *action)
+{
+	return action->num_entries;
+}
+
+#define flow_action_for_each(__i, __act, __actions)			\
+        for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++])
+
 struct flow_rule {
 	struct flow_match	match;
+	struct flow_action	action;
 };
 
-struct flow_rule *flow_rule_alloc(void);
+struct flow_rule *flow_rule_alloc(unsigned int num_actions);
 
 static inline bool flow_rule_match_key(const struct flow_rule *rule,
 				       enum flow_dissector_key_id key)
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 04b64523cc32..74a7582ad47c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -622,6 +622,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop);
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
 
 enum tc_block_command {
 	TC_BLOCK_BIND,
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 2fbf6903d2f6..c3a00eac4804 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -3,9 +3,19 @@
 #include <linux/slab.h>
 #include <net/flow_offload.h>
 
-struct flow_rule *flow_rule_alloc(void)
+struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
-	return kzalloc(sizeof(struct flow_rule), GFP_KERNEL);
+	struct flow_rule *rule;
+
+	rule = kzalloc(sizeof(struct flow_rule) +
+		       sizeof(struct flow_action_entry) * num_actions,
+		       GFP_KERNEL);
+	if (!rule)
+		return NULL;
+
+	rule->action.num_entries = num_actions;
+
+	return rule;
 }
 EXPORT_SYMBOL(flow_rule_alloc);
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2b5cb2eb34e..57713c63ac56 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,7 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
+#include <net/tc_act/tc_pedit.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -2515,6 +2516,22 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 }
 EXPORT_SYMBOL(tc_setup_cb_call);
 
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
+{
+	unsigned int num_acts = 0;
+	struct tc_action *act;
+	int i;
+
+	tcf_exts_for_each_action(i, act, exts) {
+		if (is_tcf_pedit(act))
+			num_acts += tcf_pedit_nkeys(act);
+		else
+			num_acts++;
+	}
+	return num_acts;
+}
+EXPORT_SYMBOL(tcf_exts_num_actions);
+
 static __net_init int tcf_net_init(struct net *net)
 {
 	struct tcf_net *tn = net_generic(net, tcf_net_id);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index aaffea0b66e9..0062c9133a22 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -381,7 +381,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	bool skip_sw = tc_skip_sw(f->flags);
 	int err;
 
-	cls_flower.rule = flow_rule_alloc();
+	cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
 	if (!cls_flower.rule)
 		return -ENOMEM;
 
@@ -1469,7 +1469,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			if (tc_skip_hw(f->flags))
 				continue;
 
-			cls_flower.rule = flow_rule_alloc();
+			cls_flower.rule =
+				flow_rule_alloc(tcf_exts_num_actions(&f->exts));
 			if (!cls_flower.rule)
 				return -ENOMEM;
 
@@ -1508,7 +1509,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
 	struct tcf_block *block = chain->block;
 	struct tcf_exts dummy_exts = { 0, };
 
-	cls_flower.rule = flow_rule_alloc();
+	cls_flower.rule = flow_rule_alloc(0);
 	if (!cls_flower.rule)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 3a7b68617de749d930b5503c3c5f30a17524f0bd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:46 +0100
Subject: cls_api: add translator to flow_action representation

This patch implements a new function to translate from native TC action
to the new flow_action representation. Moreover, this patch also updates
cls_flower to use this new function.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  |  2 +
 net/sched/cls_api.c    | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/sched/cls_flower.c | 14 +++++++
 3 files changed, 115 insertions(+)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 74a7582ad47c..c470c100b926 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -620,6 +620,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 #endif /* CONFIG_NET_CLS_IND */
 
+int tc_setup_flow_action(struct flow_action *flow_action,
+			 const struct tcf_exts *exts);
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop);
 unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 57713c63ac56..02cf6d2fa0e1 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -32,6 +32,13 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_mirred.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -2516,6 +2523,98 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 }
 EXPORT_SYMBOL(tc_setup_cb_call);
 
+int tc_setup_flow_action(struct flow_action *flow_action,
+			 const struct tcf_exts *exts)
+{
+	const struct tc_action *act;
+	int i, j, k;
+
+	if (!exts)
+		return 0;
+
+	j = 0;
+	tcf_exts_for_each_action(i, act, exts) {
+		struct flow_action_entry *entry;
+
+		entry = &flow_action->entries[j];
+		if (is_tcf_gact_ok(act)) {
+			entry->id = FLOW_ACTION_ACCEPT;
+		} else if (is_tcf_gact_shot(act)) {
+			entry->id = FLOW_ACTION_DROP;
+		} else if (is_tcf_gact_trap(act)) {
+			entry->id = FLOW_ACTION_TRAP;
+		} else if (is_tcf_gact_goto_chain(act)) {
+			entry->id = FLOW_ACTION_GOTO;
+			entry->chain_index = tcf_gact_goto_chain_index(act);
+		} else if (is_tcf_mirred_egress_redirect(act)) {
+			entry->id = FLOW_ACTION_REDIRECT;
+			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_mirred_egress_mirror(act)) {
+			entry->id = FLOW_ACTION_MIRRED;
+			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_vlan(act)) {
+			switch (tcf_vlan_action(act)) {
+			case TCA_VLAN_ACT_PUSH:
+				entry->id = FLOW_ACTION_VLAN_PUSH;
+				entry->vlan.vid = tcf_vlan_push_vid(act);
+				entry->vlan.proto = tcf_vlan_push_proto(act);
+				entry->vlan.prio = tcf_vlan_push_prio(act);
+				break;
+			case TCA_VLAN_ACT_POP:
+				entry->id = FLOW_ACTION_VLAN_POP;
+				break;
+			case TCA_VLAN_ACT_MODIFY:
+				entry->id = FLOW_ACTION_VLAN_MANGLE;
+				entry->vlan.vid = tcf_vlan_push_vid(act);
+				entry->vlan.proto = tcf_vlan_push_proto(act);
+				entry->vlan.prio = tcf_vlan_push_prio(act);
+				break;
+			default:
+				goto err_out;
+			}
+		} else if (is_tcf_tunnel_set(act)) {
+			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+			entry->tunnel = tcf_tunnel_info(act);
+		} else if (is_tcf_tunnel_release(act)) {
+			entry->id = FLOW_ACTION_TUNNEL_DECAP;
+			entry->tunnel = tcf_tunnel_info(act);
+		} else if (is_tcf_pedit(act)) {
+			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
+				switch (tcf_pedit_cmd(act, k)) {
+				case TCA_PEDIT_KEY_EX_CMD_SET:
+					entry->id = FLOW_ACTION_MANGLE;
+					break;
+				case TCA_PEDIT_KEY_EX_CMD_ADD:
+					entry->id = FLOW_ACTION_ADD;
+					break;
+				default:
+					goto err_out;
+				}
+				entry->mangle.htype = tcf_pedit_htype(act, k);
+				entry->mangle.mask = tcf_pedit_mask(act, k);
+				entry->mangle.val = tcf_pedit_val(act, k);
+				entry->mangle.offset = tcf_pedit_offset(act, k);
+				entry = &flow_action->entries[++j];
+			}
+		} else if (is_tcf_csum(act)) {
+			entry->id = FLOW_ACTION_CSUM;
+			entry->csum_flags = tcf_csum_update_flags(act);
+		} else if (is_tcf_skbedit_mark(act)) {
+			entry->id = FLOW_ACTION_MARK;
+			entry->mark = tcf_skbedit_mark(act);
+		} else {
+			goto err_out;
+		}
+
+		if (!is_tcf_pedit(act))
+			j++;
+	}
+	return 0;
+err_out:
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tc_setup_flow_action);
+
 unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
 {
 	unsigned int num_acts = 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0062c9133a22..48c54ef52a98 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -394,6 +394,12 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	cls_flower.exts = &f->exts;
 	cls_flower.classid = f->res.classid;
 
+	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+	if (err) {
+		kfree(cls_flower.rule);
+		return err;
+	}
+
 	err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
 	kfree(cls_flower.rule);
 
@@ -1483,6 +1489,14 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			cls_flower.rule->match.mask = &mask->key;
 			cls_flower.rule->match.key = &f->mkey;
 			cls_flower.exts = &f->exts;
+
+			err = tc_setup_flow_action(&cls_flower.rule->action,
+						   &f->exts);
+			if (err) {
+				kfree(cls_flower.rule);
+				return err;
+			}
+
 			cls_flower.classid = f->res.classid;
 
 			err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
-- 
cgit v1.2.3


From 3b1903ef97c080a80ead3a6a2305f55108e08269 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:47 +0100
Subject: flow_offload: add statistics retrieval infrastructure and use it

This patch provides the flow_stats structure that acts as container for
tc_cls_flower_offload, then we can use to restore the statistics on the
existing TC actions. Hence, tcf_exts_stats_update() is not used from
drivers anymore.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c          |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c  |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c       |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c |  2 +-
 drivers/net/ethernet/netronome/nfp/flower/offload.c   |  5 ++---
 include/net/flow_offload.h                            | 14 ++++++++++++++
 include/net/pkt_cls.h                                 |  1 +
 net/sched/cls_flower.c                                |  4 ++++
 8 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 90a2170c5138..6a87434b7173 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -1396,8 +1396,8 @@ static int bnxt_tc_get_flow_stats(struct bnxt *bp,
 	lastused = flow->lastused;
 	spin_unlock(&flow->stats_lock);
 
-	tcf_exts_stats_update(tc_flow_cmd->exts, stats.bytes, stats.packets,
-			      lastused);
+	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets,
+			  lastused);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 39c5af5dad3d..8a2d66ee1d7b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -807,9 +807,9 @@ int cxgb4_tc_flower_stats(struct net_device *dev,
 	if (ofld_stats->packet_count != packets) {
 		if (ofld_stats->prev_packet_count != packets)
 			ofld_stats->last_used = jiffies;
-		tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count,
-				      packets - ofld_stats->packet_count,
-				      ofld_stats->last_used);
+		flow_stats_update(&cls->stats, bytes - ofld_stats->byte_count,
+				  packets - ofld_stats->packet_count,
+				  ofld_stats->last_used);
 
 		ofld_stats->packet_count = packets;
 		ofld_stats->byte_count = bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1c8e8da166bd..c4c6bbcafc68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3071,7 +3071,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 
 out:
-	tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+	flow_stats_update(&f->stats, bytes, packets, lastuse);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index a20379e29e02..c090ecb62041 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -460,7 +460,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
 	if (err)
 		goto err_rule_get_stats;
 
-	tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
+	flow_stats_update(&f->stats, bytes, packets, lastuse);
 
 	mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
 	return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 74f7ff292052..fe1469d201af 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -554,9 +554,8 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev,
 	ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id);
 
 	spin_lock_bh(&priv->stats_lock);
-	tcf_exts_stats_update(flow->exts, priv->stats[ctx_id].bytes,
-			      priv->stats[ctx_id].pkts,
-			      priv->stats[ctx_id].used);
+	flow_stats_update(&flow->stats, priv->stats[ctx_id].bytes,
+			  priv->stats[ctx_id].pkts, priv->stats[ctx_id].used);
 
 	priv->stats[ctx_id].pkts = 0;
 	priv->stats[ctx_id].bytes = 0;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index dabc819b6cc9..f9ce39992dbd 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -179,4 +179,18 @@ static inline bool flow_rule_match_key(const struct flow_rule *rule,
 	return dissector_uses_key(rule->match.dissector, key);
 }
 
+struct flow_stats {
+	u64	pkts;
+	u64	bytes;
+	u64	lastused;
+};
+
+static inline void flow_stats_update(struct flow_stats *flow_stats,
+				     u64 bytes, u64 pkts, u64 lastused)
+{
+	flow_stats->pkts	= pkts;
+	flow_stats->bytes	= bytes;
+	flow_stats->lastused	= lastused;
+}
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index c470c100b926..bea1b1c3ea9b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -765,6 +765,7 @@ struct tc_cls_flower_offload {
 	enum tc_fl_command command;
 	unsigned long cookie;
 	struct flow_rule *rule;
+	struct flow_stats stats;
 	struct tcf_exts *exts;
 	u32 classid;
 };
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 48c54ef52a98..8ec85056aa0d 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -429,6 +429,10 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	cls_flower.classid = f->res.classid;
 
 	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+
+	tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
+			      cls_flower.stats.pkts,
+			      cls_flower.stats.lastused);
 }
 
 static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
-- 
cgit v1.2.3


From 2cd173e6d542f8e96369608db849b71135d65afb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:49 +0100
Subject: cls_flower: don't expose TC actions to drivers anymore

Now that drivers have been converted to use the flow action
infrastructure, remove this field from the tc_cls_flower_offload
structure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  | 1 -
 net/sched/cls_flower.c | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea1b1c3ea9b..cb8be396a11f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -766,7 +766,6 @@ struct tc_cls_flower_offload {
 	unsigned long cookie;
 	struct flow_rule *rule;
 	struct flow_stats stats;
-	struct tcf_exts *exts;
 	u32 classid;
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 8ec85056aa0d..c5d1db3a3db7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -391,7 +391,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	cls_flower.rule->match.dissector = &f->mask->dissector;
 	cls_flower.rule->match.mask = &f->mask->key;
 	cls_flower.rule->match.key = &f->mkey;
-	cls_flower.exts = &f->exts;
 	cls_flower.classid = f->res.classid;
 
 	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
@@ -425,7 +424,6 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
 	tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
 	cls_flower.command = TC_CLSFLOWER_STATS;
 	cls_flower.cookie = (unsigned long) f;
-	cls_flower.exts = &f->exts;
 	cls_flower.classid = f->res.classid;
 
 	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
@@ -1492,7 +1490,6 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 			cls_flower.rule->match.dissector = &mask->dissector;
 			cls_flower.rule->match.mask = &mask->key;
 			cls_flower.rule->match.key = &f->mkey;
-			cls_flower.exts = &f->exts;
 
 			err = tc_setup_flow_action(&cls_flower.rule->action,
 						   &f->exts);
@@ -1525,7 +1522,6 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
 {
 	struct tc_cls_flower_offload cls_flower = {};
 	struct tcf_block *block = chain->block;
-	struct tcf_exts dummy_exts = { 0, };
 
 	cls_flower.rule = flow_rule_alloc(0);
 	if (!cls_flower.rule)
@@ -1537,7 +1533,6 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
 	cls_flower.rule->match.dissector = &tmplt->dissector;
 	cls_flower.rule->match.mask = &tmplt->mask;
 	cls_flower.rule->match.key = &tmplt->dummy_key;
-	cls_flower.exts = &dummy_exts;
 
 	/* We don't care if driver (any of them) fails to handle this
 	 * call. It serves just as a hint for it.
-- 
cgit v1.2.3


From 8bec2833fb28d7c8fcc25127190abbe3eb0ca260 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:50 +0100
Subject: flow_offload: add wake-up-on-lan and queue to flow_action

These actions need to be added to support the ethtool_rx_flow interface.
The queue action includes a field to specify the RSS context, that is
set via FLOW_RSS flow type flag and the rss_context field in struct
ethtool_rxnfc, plus the corresponding queue index. FLOW_RSS implies that
rss_context is non-zero, therefore, queue.ctx == 0 means that FLOW_RSS
was not set. Also add a field to store the vf index which is stored in
the ethtool_rxnfc ring_cookie field.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index f9ce39992dbd..23166caa0da5 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -116,6 +116,8 @@ enum flow_action_id {
 	FLOW_ACTION_ADD,
 	FLOW_ACTION_CSUM,
 	FLOW_ACTION_MARK,
+	FLOW_ACTION_WAKE,
+	FLOW_ACTION_QUEUE,
 };
 
 /* This is mirroring enum pedit_header_type definition for easy mapping between
@@ -150,6 +152,11 @@ struct flow_action_entry {
 		const struct ip_tunnel_info *tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
 		u32			csum_flags;	/* FLOW_ACTION_CSUM */
 		u32			mark;		/* FLOW_ACTION_MARK */
+		struct {				/* FLOW_ACTION_QUEUE */
+			u32		ctx;
+			u32		index;
+			u8		vf;
+		} queue;
 	};
 };
 
-- 
cgit v1.2.3


From eca4205f9ec3bea2d5aad0493c19f5d2675a20fc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 2 Feb 2019 12:50:51 +0100
Subject: ethtool: add ethtool_rx_flow_spec to flow_rule structure translator

This patch adds a function to translate the ethtool_rx_flow_spec
structure to the flow_rule representation.

This allows us to reuse code from the driver side given that both flower
and ethtool_rx_flow interfaces use the same representation.

This patch also includes support for the flow type flags FLOW_EXT,
FLOW_MAC_EXT and FLOW_RSS.

The ethtool_rx_flow_spec_input wrapper structure is used to convey the
rss_context field, that is away from the ethtool_rx_flow_spec structure,
and the ethtool_rx_flow_spec structure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  15 +++
 net/core/ethtool.c      | 241 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 256 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index afd9596ce636..19a8de5326fb 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -400,4 +400,19 @@ struct ethtool_ops {
 	void	(*get_ethtool_phy_stats)(struct net_device *,
 					 struct ethtool_stats *, u64 *);
 };
+
+struct ethtool_rx_flow_rule {
+	struct flow_rule	*rule;
+	unsigned long		priv[0];
+};
+
+struct ethtool_rx_flow_spec_input {
+	const struct ethtool_rx_flow_spec	*fs;
+	u32					rss_ctx;
+};
+
+struct ethtool_rx_flow_rule *
+ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input);
+void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *rule);
+
 #endif /* _LINUX_ETHTOOL_H */
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 45c0a6e3d6ad..0fbf39239b29 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -29,6 +29,7 @@
 #include <linux/net.h>
 #include <net/devlink.h>
 #include <net/xdp_sock.h>
+#include <net/flow_offload.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -2820,3 +2821,243 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
 	return rc;
 }
+
+struct ethtool_rx_flow_key {
+	struct flow_dissector_key_basic			basic;
+	union {
+		struct flow_dissector_key_ipv4_addrs	ipv4;
+		struct flow_dissector_key_ipv6_addrs	ipv6;
+	};
+	struct flow_dissector_key_ports			tp;
+	struct flow_dissector_key_ip			ip;
+	struct flow_dissector_key_vlan			vlan;
+	struct flow_dissector_key_eth_addrs		eth_addrs;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct ethtool_rx_flow_match {
+	struct flow_dissector		dissector;
+	struct ethtool_rx_flow_key	key;
+	struct ethtool_rx_flow_key	mask;
+};
+
+struct ethtool_rx_flow_rule *
+ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
+{
+	const struct ethtool_rx_flow_spec *fs = input->fs;
+	static struct in6_addr zero_addr = {};
+	struct ethtool_rx_flow_match *match;
+	struct ethtool_rx_flow_rule *flow;
+	struct flow_action_entry *act;
+
+	flow = kzalloc(sizeof(struct ethtool_rx_flow_rule) +
+		       sizeof(struct ethtool_rx_flow_match), GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	/* ethtool_rx supports only one single action per rule. */
+	flow->rule = flow_rule_alloc(1);
+	if (!flow->rule) {
+		kfree(flow);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	match = (struct ethtool_rx_flow_match *)flow->priv;
+	flow->rule->match.dissector	= &match->dissector;
+	flow->rule->match.mask		= &match->mask;
+	flow->rule->match.key		= &match->key;
+
+	match->mask.basic.n_proto = htons(0xffff);
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW: {
+		const struct ethtool_tcpip4_spec *v4_spec, *v4_m_spec;
+
+		match->key.basic.n_proto = htons(ETH_P_IP);
+
+		v4_spec = &fs->h_u.tcp_ip4_spec;
+		v4_m_spec = &fs->m_u.tcp_ip4_spec;
+
+		if (v4_m_spec->ip4src) {
+			match->key.ipv4.src = v4_spec->ip4src;
+			match->mask.ipv4.src = v4_m_spec->ip4src;
+		}
+		if (v4_m_spec->ip4dst) {
+			match->key.ipv4.dst = v4_spec->ip4dst;
+			match->mask.ipv4.dst = v4_m_spec->ip4dst;
+		}
+		if (v4_m_spec->ip4src ||
+		    v4_m_spec->ip4dst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IPV4_ADDRS] =
+				offsetof(struct ethtool_rx_flow_key, ipv4);
+		}
+		if (v4_m_spec->psrc) {
+			match->key.tp.src = v4_spec->psrc;
+			match->mask.tp.src = v4_m_spec->psrc;
+		}
+		if (v4_m_spec->pdst) {
+			match->key.tp.dst = v4_spec->pdst;
+			match->mask.tp.dst = v4_m_spec->pdst;
+		}
+		if (v4_m_spec->psrc ||
+		    v4_m_spec->pdst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_PORTS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+				offsetof(struct ethtool_rx_flow_key, tp);
+		}
+		if (v4_m_spec->tos) {
+			match->key.ip.tos = v4_spec->tos;
+			match->mask.ip.tos = v4_m_spec->tos;
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IP);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+				offsetof(struct ethtool_rx_flow_key, ip);
+		}
+		}
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW: {
+		const struct ethtool_tcpip6_spec *v6_spec, *v6_m_spec;
+
+		match->key.basic.n_proto = htons(ETH_P_IPV6);
+
+		v6_spec = &fs->h_u.tcp_ip6_spec;
+		v6_m_spec = &fs->m_u.tcp_ip6_spec;
+		if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+			memcpy(&match->key.ipv6.src, v6_spec->ip6src,
+			       sizeof(match->key.ipv6.src));
+			memcpy(&match->mask.ipv6.src, v6_m_spec->ip6src,
+			       sizeof(match->mask.ipv6.src));
+		}
+		if (memcmp(v6_m_spec->ip6dst, &zero_addr, sizeof(zero_addr))) {
+			memcpy(&match->key.ipv6.dst, v6_spec->ip6dst,
+			       sizeof(match->key.ipv6.dst));
+			memcpy(&match->mask.ipv6.dst, v6_m_spec->ip6dst,
+			       sizeof(match->mask.ipv6.dst));
+		}
+		if (memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr)) ||
+		    memcmp(v6_m_spec->ip6src, &zero_addr, sizeof(zero_addr))) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IPV6_ADDRS] =
+				offsetof(struct ethtool_rx_flow_key, ipv6);
+		}
+		if (v6_m_spec->psrc) {
+			match->key.tp.src = v6_spec->psrc;
+			match->mask.tp.src = v6_m_spec->psrc;
+		}
+		if (v6_m_spec->pdst) {
+			match->key.tp.dst = v6_spec->pdst;
+			match->mask.tp.dst = v6_m_spec->pdst;
+		}
+		if (v6_m_spec->psrc ||
+		    v6_m_spec->pdst) {
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_PORTS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_PORTS] =
+				offsetof(struct ethtool_rx_flow_key, tp);
+		}
+		if (v6_m_spec->tclass) {
+			match->key.ip.tos = v6_spec->tclass;
+			match->mask.ip.tos = v6_m_spec->tclass;
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_IP);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_IP] =
+				offsetof(struct ethtool_rx_flow_key, ip);
+		}
+		}
+		break;
+	default:
+		ethtool_rx_flow_rule_destroy(flow);
+		return ERR_PTR(-EINVAL);
+	}
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		match->key.basic.ip_proto = IPPROTO_TCP;
+		break;
+	case UDP_V4_FLOW:
+	case UDP_V6_FLOW:
+		match->key.basic.ip_proto = IPPROTO_UDP;
+		break;
+	}
+	match->mask.basic.ip_proto = 0xff;
+
+	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_BASIC);
+	match->dissector.offset[FLOW_DISSECTOR_KEY_BASIC] =
+		offsetof(struct ethtool_rx_flow_key, basic);
+
+	if (fs->flow_type & FLOW_EXT) {
+		const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+		const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+		if (ext_m_spec->vlan_etype &&
+		    ext_m_spec->vlan_tci) {
+			match->key.vlan.vlan_tpid = ext_h_spec->vlan_etype;
+			match->mask.vlan.vlan_tpid = ext_m_spec->vlan_etype;
+
+			match->key.vlan.vlan_id =
+				ntohs(ext_h_spec->vlan_tci) & 0x0fff;
+			match->mask.vlan.vlan_id =
+				ntohs(ext_m_spec->vlan_tci) & 0x0fff;
+
+			match->key.vlan.vlan_priority =
+				(ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
+			match->mask.vlan.vlan_priority =
+				(ntohs(ext_m_spec->vlan_tci) & 0xe000) >> 13;
+
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_VLAN);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_VLAN] =
+				offsetof(struct ethtool_rx_flow_key, vlan);
+		}
+	}
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		const struct ethtool_flow_ext *ext_h_spec = &fs->h_ext;
+		const struct ethtool_flow_ext *ext_m_spec = &fs->m_ext;
+
+		if (ext_m_spec->h_dest) {
+			memcpy(match->key.eth_addrs.dst, ext_h_spec->h_dest,
+			       ETH_ALEN);
+			memcpy(match->mask.eth_addrs.dst, ext_m_spec->h_dest,
+			       ETH_ALEN);
+
+			match->dissector.used_keys |=
+				BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS);
+			match->dissector.offset[FLOW_DISSECTOR_KEY_ETH_ADDRS] =
+				offsetof(struct ethtool_rx_flow_key, eth_addrs);
+		}
+	}
+
+	act = &flow->rule->action.entries[0];
+	switch (fs->ring_cookie) {
+	case RX_CLS_FLOW_DISC:
+		act->id = FLOW_ACTION_DROP;
+		break;
+	case RX_CLS_FLOW_WAKE:
+		act->id = FLOW_ACTION_WAKE;
+		break;
+	default:
+		act->id = FLOW_ACTION_QUEUE;
+		if (fs->flow_type & FLOW_RSS)
+			act->queue.ctx = input->rss_ctx;
+
+		act->queue.vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
+		act->queue.index = ethtool_get_flow_spec_ring(fs->ring_cookie);
+		break;
+	}
+
+	return flow;
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_create);
+
+void ethtool_rx_flow_rule_destroy(struct ethtool_rx_flow_rule *flow)
+{
+	kfree(flow->rule);
+	kfree(flow);
+}
+EXPORT_SYMBOL(ethtool_rx_flow_rule_destroy);
-- 
cgit v1.2.3


From 57186a5f43d093e769076a4f1bf043351d8668a0 Mon Sep 17 00:00:00 2001
From: Nir Dotan <nird@mellanox.com>
Date: Mon, 4 Feb 2019 18:47:45 +0000
Subject: devlink: add hardware errors tracing facility

Define a tracepoint and allow user to trace messages in case of an hardware
error code for hardware associated with devlink instance.

Signed-off-by: Nir Dotan <nird@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/devlink.h | 33 +++++++++++++++++++++++++++++++++
 net/core/devlink.c             |  1 +
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 44acfbca1266..40705364a50f 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -46,6 +46,35 @@ TRACE_EVENT(devlink_hwmsg,
 		  (int) __entry->len, __get_dynamic_array(buf), __entry->len)
 );
 
+/*
+ * Tracepoint for devlink hardware error:
+ */
+TRACE_EVENT(devlink_hwerr,
+	TP_PROTO(const struct devlink *devlink, int err, const char *msg),
+
+	TP_ARGS(devlink, err, msg),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__field(int, err)
+		__string(msg, msg)
+		),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__entry->err = err;
+		__assign_str(msg, msg);
+		),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s err=%d %s",
+			__get_str(bus_name), __get_str(dev_name),
+			__get_str(driver_name), __entry->err, __get_str(msg))
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
@@ -64,6 +93,10 @@ static inline void trace_devlink_hwmsg(const struct devlink *devlink,
 {
 }
 
+static inline void trace_devlink_hwerr(const struct devlink *devlink,
+				       int err, const char *msg)
+{
+}
 #endif /* _TRACE_DEVLINK_H */
 
 #endif
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 52bf27491fb8..cd0d393bc62d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -81,6 +81,7 @@ struct devlink_dpipe_header devlink_dpipe_header_ipv6 = {
 EXPORT_SYMBOL(devlink_dpipe_header_ipv6);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg);
+EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr);
 
 static LIST_HEAD(devlink_list);
 
-- 
cgit v1.2.3


From d6abc5969463359c366d459247b90366fcd6f5c5 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 6 Feb 2019 09:45:35 -0800
Subject: net: Introduce ndo_get_port_parent_id()

In preparation for getting rid of switchdev_ops, create a dedicated NDO
operation for getting the port's parent identifier. There are
essentially two classes of drivers that need to implement getting the
port's parent ID which are VF/PF drivers with a built-in switch, and
pure switchdev drivers such as mlxsw, ocelot, dsa etc.

We introduce a helper function: dev_get_port_parent_id() which supports
recursion into the lower devices to obtain the first port's parent ID.

Convert the bridge, core and ipv4 multicast routing code to check for
such ndo_get_port_parent_id() and call the helper function when valid
before falling back to switchdev_port_attr_get(). This will allow us to
convert all relevant drivers in one go instead of having to implement
both switchdev_port_attr_get() and ndo_get_port_parent_id() operations,
then get rid of switchdev_port_attr_get().

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 ++++++++
 net/bridge/br_switchdev.c |  9 ++++++--
 net/core/dev.c            | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 net/core/net-sysfs.c      |  7 +++++-
 net/core/rtnetlink.c      |  6 ++++-
 net/ipv4/ipmr.c           |  8 ++++++-
 6 files changed, 91 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba57d0ba425e..1d95e634f3fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1188,6 +1188,10 @@ struct dev_ifalias {
  *	not implement this, it is assumed that the hw is not able to have
  *	multiple net devices on single physical port.
  *
+ * int (*ndo_get_port_parent_id)(struct net_device *dev,
+ *				 struct netdev_phys_item_id *ppid)
+ *	Called to get the parent ID of the physical port of this device.
+ *
  * void (*ndo_udp_tunnel_add)(struct net_device *dev,
  *			      struct udp_tunnel_info *ti);
  *	Called by UDP tunnel to notify a driver about the UDP port and socket
@@ -1412,6 +1416,8 @@ struct net_device_ops {
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
 							struct netdev_phys_item_id *ppid);
+	int			(*ndo_get_port_parent_id)(struct net_device *dev,
+							  struct netdev_phys_item_id *ppid);
 	int			(*ndo_get_phys_port_name)(struct net_device *dev,
 							  char *name, size_t len);
 	void			(*ndo_udp_tunnel_add)(struct net_device *dev,
@@ -3651,6 +3657,9 @@ int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_item_id *ppid);
 int dev_get_phys_port_name(struct net_device *dev,
 			   char *name, size_t len);
+int dev_get_port_parent_id(struct net_device *dev,
+			   struct netdev_phys_item_id *ppid, bool recurse);
+bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 4d2b9eb7604a..06b0ae44585f 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -14,7 +14,8 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
 
 	/* dev is yet to be added to the port list. */
 	list_for_each_entry(p, &br->port_list, list) {
-		if (switchdev_port_same_parent_id(dev, p->dev))
+		if (netdev_port_same_parent_id(dev, p->dev) ||
+		    switchdev_port_same_parent_id(dev, p->dev))
 			return p->offload_fwd_mark;
 	}
 
@@ -23,6 +24,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
 
 int nbp_switchdev_mark_set(struct net_bridge_port *p)
 {
+	const struct net_device_ops *ops = p->dev->netdev_ops;
 	struct switchdev_attr attr = {
 		.orig_dev = p->dev,
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
@@ -31,7 +33,10 @@ int nbp_switchdev_mark_set(struct net_bridge_port *p)
 
 	ASSERT_RTNL();
 
-	err = switchdev_port_attr_get(p->dev, &attr);
+	if (ops->ndo_get_port_parent_id)
+		err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true);
+	else
+		err = switchdev_port_attr_get(p->dev, &attr);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index bfa4be42afff..8c6d5cf8a308 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7877,6 +7877,63 @@ int dev_get_phys_port_name(struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_get_phys_port_name);
 
+/**
+ *	dev_get_port_parent_id - Get the device's port parent identifier
+ *	@dev: network device
+ *	@ppid: pointer to a storage for the port's parent identifier
+ *	@recurse: allow/disallow recursion to lower devices
+ *
+ *	Get the devices's port parent identifier
+ */
+int dev_get_port_parent_id(struct net_device *dev,
+			   struct netdev_phys_item_id *ppid,
+			   bool recurse)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct netdev_phys_item_id first = { };
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err = -EOPNOTSUPP;
+
+	if (ops->ndo_get_port_parent_id)
+		return ops->ndo_get_port_parent_id(dev, ppid);
+
+	if (!recurse)
+		return err;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = dev_get_port_parent_id(lower_dev, ppid, recurse);
+		if (err)
+			break;
+		if (!first.id_len)
+			first = *ppid;
+		else if (memcmp(&first, ppid, sizeof(*ppid)))
+			return -ENODATA;
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(dev_get_port_parent_id);
+
+/**
+ *	netdev_port_same_parent_id - Indicate if two network devices have
+ *	the same port parent identifier
+ *	@a: first network device
+ *	@b: second network device
+ */
+bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
+{
+	struct netdev_phys_item_id a_id = { };
+	struct netdev_phys_item_id b_id = { };
+
+	if (dev_get_port_parent_id(a, &a_id, true) ||
+	    dev_get_port_parent_id(b, &b_id, true))
+		return false;
+
+	return netdev_phys_item_id_same(&a_id, &b_id);
+}
+EXPORT_SYMBOL(netdev_port_same_parent_id);
+
 /**
  *	dev_change_proto_down - update protocol port state information
  *	@dev: device
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ff9fd2bb4ce4..4eace9f1dcf9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -495,6 +495,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
+	const struct net_device_ops *ops = netdev->netdev_ops;
 	ssize_t ret = -EINVAL;
 
 	if (!rtnl_trylock())
@@ -507,7 +508,11 @@ static ssize_t phys_switch_id_show(struct device *dev,
 			.flags = SWITCHDEV_F_NO_RECURSE,
 		};
 
-		ret = switchdev_port_attr_get(netdev, &attr);
+		if (ops->ndo_get_port_parent_id)
+			ret = dev_get_port_parent_id(netdev, &attr.u.ppid,
+						     false);
+		else
+			ret = switchdev_port_attr_get(netdev, &attr);
 		if (!ret)
 			ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
 				      attr.u.ppid.id);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f5a98082ac7a..90dd02c1f561 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1146,6 +1146,7 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
+	const struct net_device_ops *ops = dev->netdev_ops;
 	int err;
 	struct switchdev_attr attr = {
 		.orig_dev = dev,
@@ -1153,7 +1154,10 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 
-	err = switchdev_port_attr_get(dev, &attr);
+	if (ops->ndo_get_port_parent_id)
+		err = dev_get_port_parent_id(dev, &attr.u.ppid, false);
+	else
+		err = switchdev_port_attr_get(dev, &attr);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index fb99002c3d4e..c71bcc42d66d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -837,6 +837,7 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
 static int vif_add(struct net *net, struct mr_table *mrt,
 		   struct vifctl *vifc, int mrtsock)
 {
+	const struct net_device_ops *ops;
 	int vifi = vifc->vifc_vifi;
 	struct switchdev_attr attr = {
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
@@ -920,7 +921,12 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 			(VIFF_TUNNEL | VIFF_REGISTER));
 
 	attr.orig_dev = dev;
-	if (!switchdev_port_attr_get(dev, &attr)) {
+	ops = dev->netdev_ops;
+	if (ops->ndo_get_port_parent_id &&
+	    !dev_get_port_parent_id(dev, &attr.u.ppid, true)) {
+		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
+		v->dev_parent_id.id_len = attr.u.ppid.id_len;
+	} else if (!switchdev_port_attr_get(dev, &attr)) {
 		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
 		v->dev_parent_id.id_len = attr.u.ppid.id_len;
 	} else {
-- 
cgit v1.2.3


From bccb30254a4a02ee370dd23b2afbd25d7a78bc34 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 6 Feb 2019 09:45:46 -0800
Subject: net: Get rid of SWITCHDEV_ATTR_ID_PORT_PARENT_ID

Now that we have a dedicated NDO for getting a port's parent ID, get rid
of SWITCHDEV_ATTR_ID_PORT_PARENT_ID and convert all callers to use the
NDO exclusively. This is a preliminary change to getting rid of
switchdev_ops eventually.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   | 11 -----------
 net/bridge/br_switchdev.c | 14 +++-----------
 net/core/net-sysfs.c      | 19 ++++---------------
 net/core/rtnetlink.c      | 16 +++-------------
 net/ipv4/ipmr.c           | 19 +++++--------------
 net/switchdev/switchdev.c | 20 --------------------
 6 files changed, 15 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 63843ae5dc81..5e87b54c5dc5 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -43,7 +43,6 @@ static inline bool switchdev_trans_ph_commit(struct switchdev_trans *trans)
 
 enum switchdev_attr_id {
 	SWITCHDEV_ATTR_ID_UNDEFINED,
-	SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
 	SWITCHDEV_ATTR_ID_PORT_STP_STATE,
 	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
 	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
@@ -61,7 +60,6 @@ struct switchdev_attr {
 	void *complete_priv;
 	void (*complete)(struct net_device *dev, int err, void *priv);
 	union {
-		struct netdev_phys_item_id ppid;	/* PORT_PARENT_ID */
 		u8 stp_state;				/* PORT_STP_STATE */
 		unsigned long brport_flags;		/* PORT_BRIDGE_FLAGS */
 		unsigned long brport_flags_support;	/* PORT_BRIDGE_FLAGS_SUPPORT */
@@ -208,9 +206,6 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
 
-bool switchdev_port_same_parent_id(struct net_device *a,
-				   struct net_device *b);
-
 int switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
@@ -295,12 +290,6 @@ call_switchdev_blocking_notifiers(unsigned long val,
 	return NOTIFY_DONE;
 }
 
-static inline bool switchdev_port_same_parent_id(struct net_device *a,
-						 struct net_device *b)
-{
-	return false;
-}
-
 static inline int
 switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 06b0ae44585f..db9e8ab96d48 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -14,8 +14,7 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
 
 	/* dev is yet to be added to the port list. */
 	list_for_each_entry(p, &br->port_list, list) {
-		if (netdev_port_same_parent_id(dev, p->dev) ||
-		    switchdev_port_same_parent_id(dev, p->dev))
+		if (netdev_port_same_parent_id(dev, p->dev))
 			return p->offload_fwd_mark;
 	}
 
@@ -24,19 +23,12 @@ static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
 
 int nbp_switchdev_mark_set(struct net_bridge_port *p)
 {
-	const struct net_device_ops *ops = p->dev->netdev_ops;
-	struct switchdev_attr attr = {
-		.orig_dev = p->dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
+	struct netdev_phys_item_id ppid = { };
 	int err;
 
 	ASSERT_RTNL();
 
-	if (ops->ndo_get_port_parent_id)
-		err = dev_get_port_parent_id(p->dev, &attr.u.ppid, true);
-	else
-		err = switchdev_port_attr_get(p->dev, &attr);
+	err = dev_get_port_parent_id(p->dev, &ppid, true);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4eace9f1dcf9..7c5061123ead 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -12,7 +12,6 @@
 #include <linux/capability.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-#include <net/switchdev.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
 #include <linux/sched/signal.h>
@@ -495,27 +494,17 @@ static ssize_t phys_switch_id_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
 	struct net_device *netdev = to_net_dev(dev);
-	const struct net_device_ops *ops = netdev->netdev_ops;
 	ssize_t ret = -EINVAL;
 
 	if (!rtnl_trylock())
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		struct switchdev_attr attr = {
-			.orig_dev = netdev,
-			.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-			.flags = SWITCHDEV_F_NO_RECURSE,
-		};
-
-		if (ops->ndo_get_port_parent_id)
-			ret = dev_get_port_parent_id(netdev, &attr.u.ppid,
-						     false);
-		else
-			ret = switchdev_port_attr_get(netdev, &attr);
+		struct netdev_phys_item_id ppid = { };
+
+		ret = dev_get_port_parent_id(netdev, &ppid, false);
 		if (!ret)
-			ret = sprintf(buf, "%*phN\n", attr.u.ppid.id_len,
-				      attr.u.ppid.id);
+			ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
 	}
 	rtnl_unlock();
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 90dd02c1f561..a51cab95ba64 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -46,7 +46,6 @@
 
 #include <linux/inet.h>
 #include <linux/netdevice.h>
-#include <net/switchdev.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/arp.h>
@@ -1146,26 +1145,17 @@ static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
 
 static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
+	struct netdev_phys_item_id ppid = { };
 	int err;
-	struct switchdev_attr attr = {
-		.orig_dev = dev,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
-	};
 
-	if (ops->ndo_get_port_parent_id)
-		err = dev_get_port_parent_id(dev, &attr.u.ppid, false);
-	else
-		err = switchdev_port_attr_get(dev, &attr);
+	err = dev_get_port_parent_id(dev, &ppid, false);
 	if (err) {
 		if (err == -EOPNOTSUPP)
 			return 0;
 		return err;
 	}
 
-	if (nla_put(skb, IFLA_PHYS_SWITCH_ID, attr.u.ppid.id_len,
-		    attr.u.ppid.id))
+	if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id))
 		return -EMSGSIZE;
 
 	return 0;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c71bcc42d66d..e536970557dd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,7 +67,6 @@
 #include <net/fib_rules.h>
 #include <linux/netconf.h>
 #include <net/nexthop.h>
-#include <net/switchdev.h>
 
 #include <linux/nospec.h>
 
@@ -837,11 +836,8 @@ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
 static int vif_add(struct net *net, struct mr_table *mrt,
 		   struct vifctl *vifc, int mrtsock)
 {
-	const struct net_device_ops *ops;
+	struct netdev_phys_item_id ppid = { };
 	int vifi = vifc->vifc_vifi;
-	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
 	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
@@ -920,15 +916,10 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 			vifc->vifc_flags | (!mrtsock ? VIFF_STATIC : 0),
 			(VIFF_TUNNEL | VIFF_REGISTER));
 
-	attr.orig_dev = dev;
-	ops = dev->netdev_ops;
-	if (ops->ndo_get_port_parent_id &&
-	    !dev_get_port_parent_id(dev, &attr.u.ppid, true)) {
-		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
-		v->dev_parent_id.id_len = attr.u.ppid.id_len;
-	} else if (!switchdev_port_attr_get(dev, &attr)) {
-		memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
-		v->dev_parent_id.id_len = attr.u.ppid.id_len;
+	err = dev_get_port_parent_id(dev, &ppid, true);
+	if (err == 0) {
+		memcpy(v->dev_parent_id.id, ppid.id, ppid.id_len);
+		v->dev_parent_id.id_len = ppid.id_len;
 	} else {
 		v->dev_parent_id.id_len = 0;
 	}
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index cd78253de31d..7e1357db33d7 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -592,26 +592,6 @@ int call_switchdev_blocking_notifiers(unsigned long val, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(call_switchdev_blocking_notifiers);
 
-bool switchdev_port_same_parent_id(struct net_device *a,
-				   struct net_device *b)
-{
-	struct switchdev_attr a_attr = {
-		.orig_dev = a,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
-	struct switchdev_attr b_attr = {
-		.orig_dev = b,
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
-
-	if (switchdev_port_attr_get(a, &a_attr) ||
-	    switchdev_port_attr_get(b, &b_attr))
-		return false;
-
-	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
-}
-EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
 static int __switchdev_handle_port_obj_add(struct net_device *dev,
 			struct switchdev_notifier_port_obj_info *port_obj_info,
 			bool (*check_cb)(const struct net_device *dev),
-- 
cgit v1.2.3


From 4d5f007eedb74d71a7bde2bff69b6a31ad8ab427 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 2 Jan 2019 13:28:47 +0100
Subject: time: make adjtime compat handling available for 32 bit

We want to reuse the compat_timex handling on 32-bit architectures the
same way we are using the compat handling for timespec when moving to
64-bit time_t.

Move all definitions related to compat_timex out of the compat code
into the normal timekeeping code, along with a rename to old_timex32,
corresponding to the timespec/timeval structures, and make it controlled
by CONFIG_COMPAT_32BIT_TIME, which 32-bit architectures will then select.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/compat.h     | 35 ++---------------------
 include/linux/time32.h     | 32 ++++++++++++++++++++-
 kernel/compat.c            | 64 ------------------------------------------
 kernel/time/posix-timers.c | 14 ++--------
 kernel/time/time.c         | 70 +++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 102 insertions(+), 113 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 056be0d03722..657ca6abd855 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -132,37 +132,6 @@ struct compat_tms {
 	compat_clock_t		tms_cstime;
 };
 
-struct compat_timex {
-	compat_uint_t modes;
-	compat_long_t offset;
-	compat_long_t freq;
-	compat_long_t maxerror;
-	compat_long_t esterror;
-	compat_int_t status;
-	compat_long_t constant;
-	compat_long_t precision;
-	compat_long_t tolerance;
-	struct old_timeval32 time;
-	compat_long_t tick;
-	compat_long_t ppsfreq;
-	compat_long_t jitter;
-	compat_int_t shift;
-	compat_long_t stabil;
-	compat_long_t jitcnt;
-	compat_long_t calcnt;
-	compat_long_t errcnt;
-	compat_long_t stbcnt;
-	compat_int_t tai;
-
-	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-	compat_int_t:32; compat_int_t:32; compat_int_t:32; compat_int_t:32;
-	compat_int_t:32; compat_int_t:32; compat_int_t:32;
-};
-
-struct timex;
-int compat_get_timex(struct timex *, const struct compat_timex __user *);
-int compat_put_timex(struct compat_timex __user *, const struct timex *);
-
 #define _COMPAT_NSIG_WORDS	(_COMPAT_NSIG / _COMPAT_NSIG_BPW)
 
 typedef struct {
@@ -808,7 +777,7 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
 asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
-asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
+asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp);
 
 /* kernel/timer.c */
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
@@ -911,7 +880,7 @@ asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     struct file_handle __user *handle,
 					     int flags);
 asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
-					 struct compat_timex __user *tp);
+					 struct old_timex32 __user *tp);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags);
 asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 118b9977080c..820a22e2b98b 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -10,6 +10,7 @@
  */
 
 #include <linux/time64.h>
+#include <linux/timex.h>
 
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
@@ -35,13 +36,42 @@ struct old_utimbuf32 {
 	old_time32_t	modtime;
 };
 
+struct old_timex32 {
+	u32 modes;
+	s32 offset;
+	s32 freq;
+	s32 maxerror;
+	s32 esterror;
+	s32 status;
+	s32 constant;
+	s32 precision;
+	s32 tolerance;
+	struct old_timeval32 time;
+	s32 tick;
+	s32 ppsfreq;
+	s32 jitter;
+	s32 shift;
+	s32 stabil;
+	s32 jitcnt;
+	s32 calcnt;
+	s32 errcnt;
+	s32 stbcnt;
+	s32 tai;
+
+	s32:32; s32:32; s32:32; s32:32;
+	s32:32; s32:32; s32:32; s32:32;
+	s32:32; s32:32; s32:32;
+};
+
 extern int get_old_timespec32(struct timespec64 *, const void __user *);
 extern int put_old_timespec32(const struct timespec64 *, void __user *);
 extern int get_old_itimerspec32(struct itimerspec64 *its,
 			const struct old_itimerspec32 __user *uits);
 extern int put_old_itimerspec32(const struct itimerspec64 *its,
 			struct old_itimerspec32 __user *uits);
-
+struct timex;
+int get_old_timex32(struct timex *, const struct old_timex32 __user *);
+int put_old_timex32(struct old_timex32 __user *, const struct timex *);
 
 #if __BITS_PER_LONG == 64
 
diff --git a/kernel/compat.c b/kernel/compat.c
index f01affa17e22..d8a36c6ad7c9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -20,7 +20,6 @@
 #include <linux/syscalls.h>
 #include <linux/unistd.h>
 #include <linux/security.h>
-#include <linux/timex.h>
 #include <linux/export.h>
 #include <linux/migrate.h>
 #include <linux/posix-timers.h>
@@ -30,69 +29,6 @@
 
 #include <linux/uaccess.h>
 
-int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp)
-{
-	struct compat_timex tx32;
-
-	memset(txc, 0, sizeof(struct timex));
-	if (copy_from_user(&tx32, utp, sizeof(struct compat_timex)))
-		return -EFAULT;
-
-	txc->modes = tx32.modes;
-	txc->offset = tx32.offset;
-	txc->freq = tx32.freq;
-	txc->maxerror = tx32.maxerror;
-	txc->esterror = tx32.esterror;
-	txc->status = tx32.status;
-	txc->constant = tx32.constant;
-	txc->precision = tx32.precision;
-	txc->tolerance = tx32.tolerance;
-	txc->time.tv_sec = tx32.time.tv_sec;
-	txc->time.tv_usec = tx32.time.tv_usec;
-	txc->tick = tx32.tick;
-	txc->ppsfreq = tx32.ppsfreq;
-	txc->jitter = tx32.jitter;
-	txc->shift = tx32.shift;
-	txc->stabil = tx32.stabil;
-	txc->jitcnt = tx32.jitcnt;
-	txc->calcnt = tx32.calcnt;
-	txc->errcnt = tx32.errcnt;
-	txc->stbcnt = tx32.stbcnt;
-
-	return 0;
-}
-
-int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc)
-{
-	struct compat_timex tx32;
-
-	memset(&tx32, 0, sizeof(struct compat_timex));
-	tx32.modes = txc->modes;
-	tx32.offset = txc->offset;
-	tx32.freq = txc->freq;
-	tx32.maxerror = txc->maxerror;
-	tx32.esterror = txc->esterror;
-	tx32.status = txc->status;
-	tx32.constant = txc->constant;
-	tx32.precision = txc->precision;
-	tx32.tolerance = txc->tolerance;
-	tx32.time.tv_sec = txc->time.tv_sec;
-	tx32.time.tv_usec = txc->time.tv_usec;
-	tx32.tick = txc->tick;
-	tx32.ppsfreq = txc->ppsfreq;
-	tx32.jitter = txc->jitter;
-	tx32.shift = txc->shift;
-	tx32.stabil = txc->stabil;
-	tx32.jitcnt = txc->jitcnt;
-	tx32.calcnt = txc->calcnt;
-	tx32.errcnt = txc->errcnt;
-	tx32.stbcnt = txc->stbcnt;
-	tx32.tai = txc->tai;
-	if (copy_to_user(utp, &tx32, sizeof(struct compat_timex)))
-		return -EFAULT;
-	return 0;
-}
-
 static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
 {
 	return (!access_ok(ctv, sizeof(*ctv)) ||
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0e84bb72a3da..8955f32f2a36 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1123,12 +1123,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	return err;
 }
 
-#endif
-
-#ifdef CONFIG_COMPAT
-
 COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
-		       struct compat_timex __user *, utp)
+		       struct old_timex32 __user *, utp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timex ktx;
@@ -1139,22 +1135,18 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 	if (!kc->clock_adj)
 		return -EOPNOTSUPP;
 
-	err = compat_get_timex(&ktx, utp);
+	err = get_old_timex32(&ktx, utp);
 	if (err)
 		return err;
 
 	err = kc->clock_adj(which_clock, &ktx);
 
 	if (err >= 0)
-		err = compat_put_timex(utp, &ktx);
+		err = put_old_timex32(utp, &ktx);
 
 	return err;
 }
 
-#endif
-
-#ifdef CONFIG_COMPAT_32BIT_TIME
-
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 		       struct old_timespec32 __user *, tp)
 {
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 2edb5088a70b..2d013bc2b271 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -278,20 +278,82 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
+int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp)
+{
+	struct old_timex32 tx32;
+
+	memset(txc, 0, sizeof(struct timex));
+	if (copy_from_user(&tx32, utp, sizeof(struct old_timex32)))
+		return -EFAULT;
+
+	txc->modes = tx32.modes;
+	txc->offset = tx32.offset;
+	txc->freq = tx32.freq;
+	txc->maxerror = tx32.maxerror;
+	txc->esterror = tx32.esterror;
+	txc->status = tx32.status;
+	txc->constant = tx32.constant;
+	txc->precision = tx32.precision;
+	txc->tolerance = tx32.tolerance;
+	txc->time.tv_sec = tx32.time.tv_sec;
+	txc->time.tv_usec = tx32.time.tv_usec;
+	txc->tick = tx32.tick;
+	txc->ppsfreq = tx32.ppsfreq;
+	txc->jitter = tx32.jitter;
+	txc->shift = tx32.shift;
+	txc->stabil = tx32.stabil;
+	txc->jitcnt = tx32.jitcnt;
+	txc->calcnt = tx32.calcnt;
+	txc->errcnt = tx32.errcnt;
+	txc->stbcnt = tx32.stbcnt;
+
+	return 0;
+}
+
+int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc)
+{
+	struct old_timex32 tx32;
+
+	memset(&tx32, 0, sizeof(struct old_timex32));
+	tx32.modes = txc->modes;
+	tx32.offset = txc->offset;
+	tx32.freq = txc->freq;
+	tx32.maxerror = txc->maxerror;
+	tx32.esterror = txc->esterror;
+	tx32.status = txc->status;
+	tx32.constant = txc->constant;
+	tx32.precision = txc->precision;
+	tx32.tolerance = txc->tolerance;
+	tx32.time.tv_sec = txc->time.tv_sec;
+	tx32.time.tv_usec = txc->time.tv_usec;
+	tx32.tick = txc->tick;
+	tx32.ppsfreq = txc->ppsfreq;
+	tx32.jitter = txc->jitter;
+	tx32.shift = txc->shift;
+	tx32.stabil = txc->stabil;
+	tx32.jitcnt = txc->jitcnt;
+	tx32.calcnt = txc->calcnt;
+	tx32.errcnt = txc->errcnt;
+	tx32.stbcnt = txc->stbcnt;
+	tx32.tai = txc->tai;
+	if (copy_to_user(utp, &tx32, sizeof(struct old_timex32)))
+		return -EFAULT;
+	return 0;
+}
 
-COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp)
+COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp)
 {
 	struct timex txc;
 	int err, ret;
 
-	err = compat_get_timex(&txc, utp);
+	err = get_old_timex32(&txc, utp);
 	if (err)
 		return err;
 
 	ret = do_adjtimex(&txc);
 
-	err = compat_put_timex(utp, &txc);
+	err = put_old_timex32(utp, &txc);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3


From 2c620ff93d9fbd5d644760d4c21d389078ec1080 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Mon, 2 Jul 2018 22:44:20 -0700
Subject: time: Add struct __kernel_timex

struct timex uses struct timeval internally.
struct timeval is not y2038 safe.
Introduce a new UAPI type struct __kernel_timex
that is y2038 safe.

struct __kernel_timex uses a timeval type that is
similar to struct __kernel_timespec which preserves the
same structure size across 32 bit and 64 bit ABIs.
struct __kernel_timex also restructures other members of the
structure to make the structure the same on 64 bit and 32 bit
architectures.
Note that struct __kernel_timex is the same as struct timex
on a 64 bit architecture.

The above solution is similar to other new y2038 syscalls
that are being introduced: both 32 bit and 64 bit ABIs
have a common entry, and the compat entry supports the old 32 bit
syscall interface.

Alternatives considered were:
1. Add new time type to struct timex that makes use of padded
   bits. This time type could be based on the struct __kernel_timespec.
   modes will use a flag to notify which time structure should be
   used internally.
   This needs some application level changes on both 64 bit and 32 bit
   architectures. Although 64 bit machines could continue to use the
   older timeval structure without any changes.

2. Add a new u8 type to struct timex that makes use of padded bits. This
   can be used to save higher order tv_sec bits. modes will use a flag to
   notify presence of such a type.
   This will need some application level changes on 32 bit architectures.

3. Add a new compat_timex structure that differs in only the size of the
   time type; keep rest of struct timex the same.
   This requires extra syscalls to manage all 3 cases on 64 bit
   architectures. This will not need any application level changes but will
   add more complexity from kernel side.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/timex.h      |  7 +++++++
 include/uapi/linux/timex.h | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 39c25dbebfe8..7f40e9e42ecc 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,6 +53,13 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
+/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
+ * and 32-bit emulation.
+ */
+#ifndef CONFIG_64BIT_TIME
+#define __kernel_timex timex
+#endif
+
 #include <uapi/linux/timex.h>
 
 #define ADJ_ADJTIME		0x8000	/* switch between adjtime/adjtimex modes */
diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h
index 92685d826444..a1c6b73016a5 100644
--- a/include/uapi/linux/timex.h
+++ b/include/uapi/linux/timex.h
@@ -92,6 +92,47 @@ struct timex {
 	int  :32; int  :32; int  :32;
 };
 
+struct __kernel_timex_timeval {
+	__kernel_time64_t       tv_sec;
+	long long		tv_usec;
+};
+
+#ifndef __kernel_timex
+struct __kernel_timex {
+	unsigned int modes;	/* mode selector */
+	int :32;            /* pad */
+	long long offset;	/* time offset (usec) */
+	long long freq;	/* frequency offset (scaled ppm) */
+	long long maxerror;/* maximum error (usec) */
+	long long esterror;/* estimated error (usec) */
+	int status;		/* clock command/status */
+	int :32;            /* pad */
+	long long constant;/* pll time constant */
+	long long precision;/* clock precision (usec) (read only) */
+	long long tolerance;/* clock frequency tolerance (ppm)
+				   * (read only)
+				   */
+	struct __kernel_timex_timeval time;	/* (read only, except for ADJ_SETOFFSET) */
+	long long tick;	/* (modified) usecs between clock ticks */
+
+	long long ppsfreq;/* pps frequency (scaled ppm) (ro) */
+	long long jitter; /* pps jitter (us) (ro) */
+	int shift;              /* interval duration (s) (shift) (ro) */
+	int :32;            /* pad */
+	long long stabil;            /* pps stability (scaled ppm) (ro) */
+	long long jitcnt; /* jitter limit exceeded (ro) */
+	long long calcnt; /* calibration intervals (ro) */
+	long long errcnt; /* calibration errors (ro) */
+	long long stbcnt; /* stability limit exceeded (ro) */
+
+	int tai;		/* TAI offset (ro) */
+
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32; int  :32;
+	int  :32; int  :32; int  :32;
+};
+#endif
+
 /*
  * Mode codes (timex.mode)
  */
-- 
cgit v1.2.3


From 50b93f30f6d8672f9ec80e90af94d733f11a20e0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 1 Jan 2019 17:34:39 +0100
Subject: time: fix sys_timer_settime prototype

A small typo has crept into the y2038 conversion of the timer_settime
system call. So far this was completely harmless, but once we start
using the new version, this has to be fixed.

Fixes: 6ff847350702 ("time: Change types to new y2038 safe __kernel_itimerspec")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 938d8908b9e0..baa4b70b02d3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -591,7 +591,7 @@ asmlinkage long sys_timer_gettime(timer_t timer_id,
 asmlinkage long sys_timer_getoverrun(timer_t timer_id);
 asmlinkage long sys_timer_settime(timer_t timer_id, int flags,
 				const struct __kernel_itimerspec __user *new_setting,
-				struct itimerspec __user *old_setting);
+				struct __kernel_itimerspec __user *old_setting);
 asmlinkage long sys_timer_delete(timer_t timer_id);
 asmlinkage long sys_clock_settime(clockid_t which_clock,
 				const struct __kernel_timespec __user *tp);
-- 
cgit v1.2.3


From 1a596398a3d75f966b75f428e992cf1f242f9a5b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 3 Jan 2019 21:12:39 +0100
Subject: sparc64: add custom adjtimex/clock_adjtime functions

sparc64 is the only architecture on Linux that has a 'timeval'
definition with a 32-bit tv_usec but a 64-bit tv_sec. This causes
problems for sparc32 compat mode when we convert it to use the
new __kernel_timex type that has the same layout as all other
64-bit architectures.

To avoid adding sparc64 specific code into the generic adjtimex
implementation, this adds a wrapper in the sparc64 system call handling
that converts the sparc64 'timex' into the new '__kernel_timex'.

At this point, the two structures are defined to be identical,
but that will change in the next step once we convert sparc32.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/sparc/kernel/sys_sparc_64.c       | 59 +++++++++++++++++++++++++++++++++-
 arch/sparc/kernel/syscalls/syscall.tbl |  6 ++--
 include/linux/timex.h                  |  2 ++
 kernel/time/posix-timers.c             | 24 +++++++-------
 4 files changed, 76 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 1c079e7bab09..37de18a11207 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -28,8 +28,9 @@
 #include <linux/random.h>
 #include <linux/export.h>
 #include <linux/context_tracking.h>
-
+#include <linux/timex.h>
 #include <linux/uaccess.h>
+
 #include <asm/utrap.h>
 #include <asm/unistd.h>
 
@@ -544,6 +545,62 @@ out_unlock:
 	return err;
 }
 
+SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
+{
+	struct timex txc;		/* Local copy of parameter */
+	struct timex *kt = (void *)&txc;
+	int ret;
+
+	/* Copy the user data space into the kernel copy
+	 * structure. But bear in mind that the structures
+	 * may change
+	 */
+	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
+
+	/*
+	 * override for sparc64 specific timeval type: tv_usec
+	 * is 32 bit wide instead of 64-bit in __kernel_timex
+	 */
+	kt->time.tv_usec = txc.time.tv_usec;
+	ret = do_adjtimex(kt);
+	txc.time.tv_usec = kt->time.tv_usec;
+
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
+
+SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p)
+{
+	struct timex txc;		/* Local copy of parameter */
+	struct timex *kt = (void *)&txc;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
+		pr_err_once("process %d (%s) attempted a POSIX timer syscall "
+		    "while CONFIG_POSIX_TIMERS is not set\n",
+		    current->pid, current->comm);
+
+		return -ENOSYS;
+	}
+
+	/* Copy the user data space into the kernel copy
+	 * structure. But bear in mind that the structures
+	 * may change
+	 */
+	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+		return -EFAULT;
+
+	/*
+	 * override for sparc64 specific timeval type: tv_usec
+	 * is 32 bit wide instead of 64-bit in __kernel_timex
+	 */
+	kt->time.tv_usec = txc.time.tv_usec;
+	ret = do_clock_adjtime(which_clock, kt);
+	txc.time.tv_usec = kt->time.tv_usec;
+
+	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+}
+
 SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type,
 		utrap_handler_t, new_p, utrap_handler_t, new_d,
 		utrap_handler_t __user *, old_p,
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 6992d17cce37..e63cd013cc77 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -258,7 +258,8 @@
 216	64	sigreturn		sys_nis_syscall
 217	common	clone			sys_clone
 218	common	ioprio_get		sys_ioprio_get
-219	common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+219	32	adjtimex		sys_adjtimex			compat_sys_adjtimex
+219	64	adjtimex		sys_sparc_adjtimex
 220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 220	64	sigprocmask		sys_nis_syscall
 221	common	create_module		sys_ni_syscall
@@ -377,7 +378,8 @@
 331	common	prlimit64		sys_prlimit64
 332	common	name_to_handle_at	sys_name_to_handle_at
 333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-334	common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+334	32	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+334	64	clock_adjtime		sys_sparc_clock_adjtime
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 337	common	setns			sys_setns
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 7f40e9e42ecc..a15e6aeb8d49 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -159,6 +159,8 @@ extern unsigned long tick_nsec;		/* SHIFTED_HZ period (nsec) */
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
 extern int do_adjtimex(struct timex *);
+extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx);
+
 extern void hardpps(const struct timespec64 *, const struct timespec64 *);
 
 int read_current_timer(unsigned long *timer_val);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 8955f32f2a36..8f7f1dd95940 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1047,22 +1047,28 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 	return error;
 }
 
-SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
-		struct timex __user *, utx)
+int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
-	struct timex ktx;
-	int err;
 
 	if (!kc)
 		return -EINVAL;
 	if (!kc->clock_adj)
 		return -EOPNOTSUPP;
 
+	return kc->clock_adj(which_clock, ktx);
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+		struct timex __user *, utx)
+{
+	struct timex ktx;
+	int err;
+
 	if (copy_from_user(&ktx, utx, sizeof(ktx)))
 		return -EFAULT;
 
-	err = kc->clock_adj(which_clock, &ktx);
+	err = do_clock_adjtime(which_clock, &ktx);
 
 	if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
 		return -EFAULT;
@@ -1126,20 +1132,14 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 		       struct old_timex32 __user *, utp)
 {
-	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timex ktx;
 	int err;
 
-	if (!kc)
-		return -EINVAL;
-	if (!kc->clock_adj)
-		return -EOPNOTSUPP;
-
 	err = get_old_timex32(&ktx, utp);
 	if (err)
 		return err;
 
-	err = kc->clock_adj(which_clock, &ktx);
+	err = do_clock_adjtime(which_clock, &ktx);
 
 	if (err >= 0)
 		err = put_old_timex32(utp, &ktx);
-- 
cgit v1.2.3


From ead25417f82ed7f8a21da4dcefc768169f7da884 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Mon, 2 Jul 2018 22:44:21 -0700
Subject: timex: use __kernel_timex internally

struct timex is not y2038 safe.
Replace all uses of timex with y2038 safe __kernel_timex.

Note that struct __kernel_timex is an ABI interface definition.
We could define a new structure based on __kernel_timex that
is only available internally instead. Right now, there isn't
a strong motivation for this as the structure is isolated to
a few defined struct timex interfaces and such a structure would
be exactly the same as struct timex.

The patch was generated by the following coccinelle script:

virtual patch

@depends on patch forall@
identifier ts;
expression e;
@@
(
- struct timex ts;
+ struct __kernel_timex ts;
|
- struct timex ts = {};
+ struct __kernel_timex ts = {};
|
- struct timex ts = e;
+ struct __kernel_timex ts = e;
|
- struct timex *ts;
+ struct __kernel_timex *ts;
|
(memset \| copy_from_user \| copy_to_user \)(...,
- sizeof(struct timex))
+ sizeof(struct __kernel_timex))
)

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts,
+ struct __kernel_timex *ts,
...) {
...
}

@depends on patch forall@
identifier ts;
identifier fn;
@@
fn(...,
- struct timex *ts) {
+ struct __kernel_timex *ts) {
...
}

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Cc: linux-alpha@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/kernel/osf_sys.c      |  5 +++--
 arch/sparc/kernel/sys_sparc_64.c |  4 ++--
 drivers/ptp/ptp_clock.c          |  2 +-
 include/linux/posix-clock.h      |  2 +-
 include/linux/time32.h           |  6 +++---
 include/linux/timex.h            |  4 ++--
 kernel/time/ntp.c                | 18 ++++++++++--------
 kernel/time/ntp_internal.h       |  2 +-
 kernel/time/posix-clock.c        |  2 +-
 kernel/time/posix-timers.c       |  8 ++++----
 kernel/time/posix-timers.h       |  2 +-
 kernel/time/time.c               | 14 +++++++-------
 kernel/time/timekeeping.c        |  4 ++--
 13 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 792586038808..bf497b8b0ec6 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1253,7 +1253,7 @@ struct timex32 {
 
 SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
 {
-        struct timex txc;
+	struct __kernel_timex txc;
 	int ret;
 
 	/* copy relevant bits of struct timex. */
@@ -1270,7 +1270,8 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p)
 	if (copy_to_user(txc_p, &txc, offsetof(struct timex32, time)) ||
 	    (copy_to_user(&txc_p->tick, &txc.tick, sizeof(struct timex32) - 
 			  offsetof(struct timex32, tick))) ||
-	    (put_tv_to_tv32(&txc_p->time, &txc.time)))
+	    (put_user(txc.time.tv_sec, &txc_p->time.tv_sec)) ||
+	    (put_user(txc.time.tv_usec, &txc_p->time.tv_usec)))
 	  return -EFAULT;
 
 	return ret;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 37de18a11207..9825ca6a6020 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -548,7 +548,7 @@ out_unlock:
 SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
 {
 	struct timex txc;		/* Local copy of parameter */
-	struct timex *kt = (void *)&txc;
+	struct __kernel_timex *kt = (void *)&txc;
 	int ret;
 
 	/* Copy the user data space into the kernel copy
@@ -572,7 +572,7 @@ SYSCALL_DEFINE1(sparc_adjtimex, struct timex __user *, txc_p)
 SYSCALL_DEFINE2(sparc_clock_adjtime, const clockid_t, which_clock,struct timex __user *, txc_p)
 {
 	struct timex txc;		/* Local copy of parameter */
-	struct timex *kt = (void *)&txc;
+	struct __kernel_timex *kt = (void *)&txc;
 	int ret;
 
 	if (!IS_ENABLED(CONFIG_POSIX_TIMERS)) {
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 48f3594a7458..79bd102c9bbc 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -124,7 +124,7 @@ static int ptp_clock_gettime(struct posix_clock *pc, struct timespec64 *tp)
 	return err;
 }
 
-static int ptp_clock_adjtime(struct posix_clock *pc, struct timex *tx)
+static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 {
 	struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
 	struct ptp_clock_info *ops;
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index 3a3bc71017d5..18674d7d5b1c 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -51,7 +51,7 @@ struct posix_clock;
 struct posix_clock_operations {
 	struct module *owner;
 
-	int  (*clock_adjtime)(struct posix_clock *pc, struct timex *tx);
+	int  (*clock_adjtime)(struct posix_clock *pc, struct __kernel_timex *tx);
 
 	int  (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts);
 
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 820a22e2b98b..0a1f302a1753 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -69,9 +69,9 @@ extern int get_old_itimerspec32(struct itimerspec64 *its,
 			const struct old_itimerspec32 __user *uits);
 extern int put_old_itimerspec32(const struct itimerspec64 *its,
 			struct old_itimerspec32 __user *uits);
-struct timex;
-int get_old_timex32(struct timex *, const struct old_timex32 __user *);
-int put_old_timex32(struct old_timex32 __user *, const struct timex *);
+struct __kernel_timex;
+int get_old_timex32(struct __kernel_timex *, const struct old_timex32 __user *);
+int put_old_timex32(struct old_timex32 __user *, const struct __kernel_timex *);
 
 #if __BITS_PER_LONG == 64
 
diff --git a/include/linux/timex.h b/include/linux/timex.h
index a15e6aeb8d49..4aff9f0d1367 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -158,8 +158,8 @@ extern unsigned long tick_nsec;		/* SHIFTED_HZ period (nsec) */
 #define NTP_INTERVAL_FREQ  (HZ)
 #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ)
 
-extern int do_adjtimex(struct timex *);
-extern int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx);
+extern int do_adjtimex(struct __kernel_timex *);
+extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx);
 
 extern void hardpps(const struct timespec64 *, const struct timespec64 *);
 
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 36a2bef00125..92a90014a925 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -188,13 +188,13 @@ static inline int is_error_status(int status)
 			&& (status & (STA_PPSWANDER|STA_PPSERROR)));
 }
 
-static inline void pps_fill_timex(struct timex *txc)
+static inline void pps_fill_timex(struct __kernel_timex *txc)
 {
 	txc->ppsfreq	   = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
 					 PPM_SCALE_INV, NTP_SCALE_SHIFT);
 	txc->jitter	   = pps_jitter;
 	if (!(time_status & STA_NANO))
-		txc->jitter /= NSEC_PER_USEC;
+		txc->jitter = pps_jitter / NSEC_PER_USEC;
 	txc->shift	   = pps_shift;
 	txc->stabil	   = pps_stabil;
 	txc->jitcnt	   = pps_jitcnt;
@@ -220,7 +220,7 @@ static inline int is_error_status(int status)
 	return status & (STA_UNSYNC|STA_CLOCKERR);
 }
 
-static inline void pps_fill_timex(struct timex *txc)
+static inline void pps_fill_timex(struct __kernel_timex *txc)
 {
 	/* PPS is not implemented, so these are zero */
 	txc->ppsfreq	   = 0;
@@ -633,7 +633,7 @@ void ntp_notify_cmos_timer(void)
 /*
  * Propagate a new txc->status value into the NTP state:
  */
-static inline void process_adj_status(const struct timex *txc)
+static inline void process_adj_status(const struct __kernel_timex *txc)
 {
 	if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
 		time_state = TIME_OK;
@@ -656,7 +656,8 @@ static inline void process_adj_status(const struct timex *txc)
 }
 
 
-static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai)
+static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
+					  s32 *time_tai)
 {
 	if (txc->modes & ADJ_STATUS)
 		process_adj_status(txc);
@@ -707,7 +708,8 @@ static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai
  * adjtimex mainly allows reading (and writing, if superuser) of
  * kernel time-keeping variables. used by xntpd.
  */
-int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
+int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
+		  s32 *time_tai)
 {
 	int result;
 
@@ -729,7 +731,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
 		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
 				  NTP_SCALE_SHIFT);
 		if (!(time_status & STA_NANO))
-			txc->offset /= NSEC_PER_USEC;
+			txc->offset = (u32)txc->offset / NSEC_PER_USEC;
 	}
 
 	result = time_state;	/* mostly `TIME_OK' */
@@ -754,7 +756,7 @@ int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
 	txc->time.tv_sec = (time_t)ts->tv_sec;
 	txc->time.tv_usec = ts->tv_nsec;
 	if (!(time_status & STA_NANO))
-		txc->time.tv_usec /= NSEC_PER_USEC;
+		txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
 
 	/* Handle leapsec adjustments */
 	if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index c24b0e13f011..40e6122e634e 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,6 @@ extern void ntp_clear(void);
 extern u64 ntp_tick_length(void);
 extern ktime_t ntp_get_next_leap(void);
 extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai);
+extern int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts, s32 *time_tai);
 extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
 #endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 425bbfce6819..ec960bb939fd 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -228,7 +228,7 @@ static void put_clock_desc(struct posix_clock_desc *cd)
 	fput(cd->fp);
 }
 
-static int pc_clock_adjtime(clockid_t id, struct timex *tx)
+static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx)
 {
 	struct posix_clock_desc cd;
 	int err;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 8f7f1dd95940..2d84b3db1ade 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -179,7 +179,7 @@ static int posix_clock_realtime_set(const clockid_t which_clock,
 }
 
 static int posix_clock_realtime_adj(const clockid_t which_clock,
-				    struct timex *t)
+				    struct __kernel_timex *t)
 {
 	return do_adjtimex(t);
 }
@@ -1047,7 +1047,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
 	return error;
 }
 
-int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx)
+int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 
@@ -1062,7 +1062,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct timex * ktx)
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
 		struct timex __user *, utx)
 {
-	struct timex ktx;
+	struct __kernel_timex ktx;
 	int err;
 
 	if (copy_from_user(&ktx, utx, sizeof(ktx)))
@@ -1132,7 +1132,7 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 		       struct old_timex32 __user *, utp)
 {
-	struct timex ktx;
+	struct __kernel_timex ktx;
 	int err;
 
 	err = get_old_timex32(&ktx, utp);
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index ddb21145211a..de5daa6d975a 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -8,7 +8,7 @@ struct k_clock {
 			     const struct timespec64 *tp);
 	int	(*clock_get)(const clockid_t which_clock,
 			     struct timespec64 *tp);
-	int	(*clock_adj)(const clockid_t which_clock, struct timex *tx);
+	int	(*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx);
 	int	(*timer_create)(struct k_itimer *timer);
 	int	(*nsleep)(const clockid_t which_clock, int flags,
 			  const struct timespec64 *);
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 2d013bc2b271..d179d33f639a 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -265,25 +265,25 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
 
 SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 {
-	struct timex txc;		/* Local copy of parameter */
+	struct __kernel_timex txc;		/* Local copy of parameter */
 	int ret;
 
 	/* Copy the user data space into the kernel copy
 	 * structure. But bear in mind that the structures
 	 * may change
 	 */
-	if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+	if (copy_from_user(&txc, txc_p, sizeof(struct __kernel_timex)))
 		return -EFAULT;
 	ret = do_adjtimex(&txc);
-	return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+	return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret;
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp)
+int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp)
 {
 	struct old_timex32 tx32;
 
-	memset(txc, 0, sizeof(struct timex));
+	memset(txc, 0, sizeof(struct __kernel_timex));
 	if (copy_from_user(&tx32, utp, sizeof(struct old_timex32)))
 		return -EFAULT;
 
@@ -311,7 +311,7 @@ int get_old_timex32(struct timex *txc, const struct old_timex32 __user *utp)
 	return 0;
 }
 
-int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc)
+int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex *txc)
 {
 	struct old_timex32 tx32;
 
@@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct timex *txc)
 
 COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp)
 {
-	struct timex txc;
+	struct __kernel_timex txc;
 	int err, ret;
 
 	err = get_old_timex32(&txc, utp);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ac5dbf2cd4a2..f986e1918d12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2234,7 +2234,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
 /**
  * timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
  */
-static int timekeeping_validate_timex(const struct timex *txc)
+static int timekeeping_validate_timex(const struct __kernel_timex *txc)
 {
 	if (txc->modes & ADJ_ADJTIME) {
 		/* singleshot must not be used with any other mode bits */
@@ -2300,7 +2300,7 @@ static int timekeeping_validate_timex(const struct timex *txc)
 /**
  * do_adjtimex() - Accessor function to NTP __do_adjtimex function
  */
-int do_adjtimex(struct timex *txc)
+int do_adjtimex(struct __kernel_timex *txc)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	unsigned long flags;
-- 
cgit v1.2.3


From 3876ced476c8ec17265d1739467e726ada88b660 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Mon, 2 Jul 2018 22:44:22 -0700
Subject: timex: change syscalls to use struct __kernel_timex

struct timex is not y2038 safe.
Switch all the syscall apis to use y2038 safe __kernel_timex.

Note that sys_adjtimex() does not have a y2038 safe solution.  C libraries
can implement it by calling clock_adjtime(CLOCK_REALTIME, ...).

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h   | 6 +++---
 kernel/time/posix-timers.c | 2 +-
 kernel/time/time.c         | 4 +++-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index baa4b70b02d3..09330d5bda0c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -54,7 +54,7 @@ struct __sysctl_args;
 struct sysinfo;
 struct timespec;
 struct timeval;
-struct timex;
+struct __kernel_timex;
 struct timezone;
 struct tms;
 struct utimbuf;
@@ -695,7 +695,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv,
 				struct timezone __user *tz);
 asmlinkage long sys_settimeofday(struct timeval __user *tv,
 				struct timezone __user *tz);
-asmlinkage long sys_adjtimex(struct timex __user *txc_p);
+asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
 
 /* kernel/timer.c */
 asmlinkage long sys_getpid(void);
@@ -870,7 +870,7 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      struct file_handle __user *handle,
 				      int flags);
 asmlinkage long sys_clock_adjtime(clockid_t which_clock,
-				struct timex __user *tx);
+				struct __kernel_timex __user *tx);
 asmlinkage long sys_syncfs(int fd);
 asmlinkage long sys_setns(int fd, int nstype);
 asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 2d84b3db1ade..de79f85ae14f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1060,7 +1060,7 @@ int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
 }
 
 SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
-		struct timex __user *, utx)
+		struct __kernel_timex __user *, utx)
 {
 	struct __kernel_timex ktx;
 	int err;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index d179d33f639a..78b5c8f1495a 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -263,7 +263,8 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
 }
 #endif
 
-SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
 {
 	struct __kernel_timex txc;		/* Local copy of parameter */
 	int ret;
@@ -277,6 +278,7 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
 	ret = do_adjtimex(&txc);
 	return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret;
 }
+#endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp)
-- 
cgit v1.2.3


From 8dabe7245bbc134f2cfcc12cde75c019dab924cc Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 7 Jan 2019 00:33:08 +0100
Subject: y2038: syscalls: rename y2038 compat syscalls

A lot of system calls that pass a time_t somewhere have an implementation
using a COMPAT_SYSCALL_DEFINEx() on 64-bit architectures, and have
been reworked so that this implementation can now be used on 32-bit
architectures as well.

The missing step is to redefine them using the regular SYSCALL_DEFINEx()
to get them out of the compat namespace and make it possible to build them
on 32-bit architectures.

Any system call that ends in 'time' gets a '32' suffix on its name for
that version, while the others get a '_time32' suffix, to distinguish
them from the normal version, which takes a 64-bit time argument in the
future.

In this step, only 64-bit architectures are changed, doing this rename
first lets us avoid touching the 32-bit architectures twice.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/include/asm/unistd32.h         | 48 ++++++++++----------
 arch/mips/kernel/syscalls/syscall_n32.tbl | 50 ++++++++++-----------
 arch/mips/kernel/syscalls/syscall_o32.tbl | 52 +++++++++++-----------
 arch/parisc/kernel/syscalls/syscall.tbl   | 54 +++++++++++------------
 arch/powerpc/kernel/syscalls/syscall.tbl  | 52 +++++++++++-----------
 arch/s390/kernel/syscalls/syscall.tbl     | 52 +++++++++++-----------
 arch/sparc/kernel/syscalls/syscall.tbl    | 52 +++++++++++-----------
 arch/x86/entry/syscalls/syscall_32.tbl    | 52 +++++++++++-----------
 fs/aio.c                                  | 10 ++---
 fs/select.c                               |  4 +-
 fs/timerfd.c                              |  4 +-
 fs/utimes.c                               | 10 ++---
 include/linux/compat.h                    | 73 ++-----------------------------
 include/linux/syscalls.h                  | 57 ++++++++++++++++++++++++
 include/uapi/asm-generic/unistd.h         | 44 +++++++++----------
 ipc/mqueue.c                              | 16 +++----
 ipc/sem.c                                 |  2 +-
 kernel/futex.c                            |  2 +-
 kernel/sched/core.c                       |  5 +--
 kernel/signal.c                           |  2 +-
 kernel/sys_ni.c                           | 18 ++++----
 kernel/time/hrtimer.c                     |  2 +-
 kernel/time/posix-stubs.c                 | 25 ++++++-----
 kernel/time/posix-timers.c                | 32 +++++++-------
 kernel/time/time.c                        |  8 ++--
 net/compat.c                              |  2 +-
 26 files changed, 361 insertions(+), 367 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index d10cce69a4b0..1ded82857161 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -270,7 +270,7 @@ __SYSCALL(__NR_uname, sys_newuname)
 			/* 123 was sys_modify_ldt */
 __SYSCALL(123, sys_ni_syscall)
 #define __NR_adjtimex 124
-__SYSCALL(__NR_adjtimex, compat_sys_adjtimex)
+__SYSCALL(__NR_adjtimex, sys_adjtimex_time32)
 #define __NR_mprotect 125
 __SYSCALL(__NR_mprotect, sys_mprotect)
 #define __NR_sigprocmask 126
@@ -344,9 +344,9 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 160
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
 #define __NR_sched_rr_get_interval 161
-__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval)
+__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32)
 #define __NR_nanosleep 162
-__SYSCALL(__NR_nanosleep, compat_sys_nanosleep)
+__SYSCALL(__NR_nanosleep, sys_nanosleep_time32)
 #define __NR_mremap 163
 __SYSCALL(__NR_mremap, sys_mremap)
 #define __NR_setresuid 164
@@ -376,7 +376,7 @@ __SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask)
 #define __NR_rt_sigpending 176
 __SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending)
 #define __NR_rt_sigtimedwait 177
-__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait)
+__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
 #define __NR_rt_sigqueueinfo 178
 __SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo)
 #define __NR_rt_sigsuspend 179
@@ -502,7 +502,7 @@ __SYSCALL(__NR_tkill, sys_tkill)
 #define __NR_sendfile64 239
 __SYSCALL(__NR_sendfile64, sys_sendfile64)
 #define __NR_futex 240
-__SYSCALL(__NR_futex, compat_sys_futex)
+__SYSCALL(__NR_futex, sys_futex_time32)
 #define __NR_sched_setaffinity 241
 __SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity)
 #define __NR_sched_getaffinity 242
@@ -512,7 +512,7 @@ __SYSCALL(__NR_io_setup, compat_sys_io_setup)
 #define __NR_io_destroy 244
 __SYSCALL(__NR_io_destroy, sys_io_destroy)
 #define __NR_io_getevents 245
-__SYSCALL(__NR_io_getevents, compat_sys_io_getevents)
+__SYSCALL(__NR_io_getevents, sys_io_getevents_time32)
 #define __NR_io_submit 246
 __SYSCALL(__NR_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 247
@@ -538,21 +538,21 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
 #define __NR_timer_create 257
 __SYSCALL(__NR_timer_create, compat_sys_timer_create)
 #define __NR_timer_settime 258
-__SYSCALL(__NR_timer_settime, compat_sys_timer_settime)
+__SYSCALL(__NR_timer_settime, sys_timer_settime32)
 #define __NR_timer_gettime 259
-__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime)
+__SYSCALL(__NR_timer_gettime, sys_timer_gettime32)
 #define __NR_timer_getoverrun 260
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
 #define __NR_timer_delete 261
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
 #define __NR_clock_settime 262
-__SYSCALL(__NR_clock_settime, compat_sys_clock_settime)
+__SYSCALL(__NR_clock_settime, sys_clock_settime32)
 #define __NR_clock_gettime 263
-__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime)
+__SYSCALL(__NR_clock_gettime, sys_clock_gettime32)
 #define __NR_clock_getres 264
-__SYSCALL(__NR_clock_getres, compat_sys_clock_getres)
+__SYSCALL(__NR_clock_getres, sys_clock_getres_time32)
 #define __NR_clock_nanosleep 265
-__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep)
+__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep_time32)
 #define __NR_statfs64 266
 __SYSCALL(__NR_statfs64, compat_sys_aarch32_statfs64)
 #define __NR_fstatfs64 267
@@ -560,7 +560,7 @@ __SYSCALL(__NR_fstatfs64, compat_sys_aarch32_fstatfs64)
 #define __NR_tgkill 268
 __SYSCALL(__NR_tgkill, sys_tgkill)
 #define __NR_utimes 269
-__SYSCALL(__NR_utimes, compat_sys_utimes)
+__SYSCALL(__NR_utimes, sys_utimes_time32)
 #define __NR_arm_fadvise64_64 270
 __SYSCALL(__NR_arm_fadvise64_64, compat_sys_aarch32_fadvise64_64)
 #define __NR_pciconfig_iobase 271
@@ -574,9 +574,9 @@ __SYSCALL(__NR_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 275
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
 #define __NR_mq_timedsend 276
-__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend)
+__SYSCALL(__NR_mq_timedsend, sys_mq_timedsend_time32)
 #define __NR_mq_timedreceive 277
-__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive)
+__SYSCALL(__NR_mq_timedreceive, sys_mq_timedreceive_time32)
 #define __NR_mq_notify 278
 __SYSCALL(__NR_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 279
@@ -646,7 +646,7 @@ __SYSCALL(__NR_request_key, sys_request_key)
 #define __NR_keyctl 311
 __SYSCALL(__NR_keyctl, compat_sys_keyctl)
 #define __NR_semtimedop 312
-__SYSCALL(__NR_semtimedop, compat_sys_semtimedop)
+__SYSCALL(__NR_semtimedop, sys_semtimedop_time32)
 #define __NR_vserver 313
 __SYSCALL(__NR_vserver, sys_ni_syscall)
 #define __NR_ioprio_set 314
@@ -674,7 +674,7 @@ __SYSCALL(__NR_mknodat, sys_mknodat)
 #define __NR_fchownat 325
 __SYSCALL(__NR_fchownat, sys_fchownat)
 #define __NR_futimesat 326
-__SYSCALL(__NR_futimesat, compat_sys_futimesat)
+__SYSCALL(__NR_futimesat, sys_futimesat_time32)
 #define __NR_fstatat64 327
 __SYSCALL(__NR_fstatat64, sys_fstatat64)
 #define __NR_unlinkat 328
@@ -692,9 +692,9 @@ __SYSCALL(__NR_fchmodat, sys_fchmodat)
 #define __NR_faccessat 334
 __SYSCALL(__NR_faccessat, sys_faccessat)
 #define __NR_pselect6 335
-__SYSCALL(__NR_pselect6, compat_sys_pselect6)
+__SYSCALL(__NR_pselect6, compat_sys_pselect6_time32)
 #define __NR_ppoll 336
-__SYSCALL(__NR_ppoll, compat_sys_ppoll)
+__SYSCALL(__NR_ppoll, compat_sys_ppoll_time32)
 #define __NR_unshare 337
 __SYSCALL(__NR_unshare, sys_unshare)
 #define __NR_set_robust_list 338
@@ -718,7 +718,7 @@ __SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait)
 #define __NR_kexec_load 347
 __SYSCALL(__NR_kexec_load, compat_sys_kexec_load)
 #define __NR_utimensat 348
-__SYSCALL(__NR_utimensat, compat_sys_utimensat)
+__SYSCALL(__NR_utimensat, sys_utimensat_time32)
 #define __NR_signalfd 349
 __SYSCALL(__NR_signalfd, compat_sys_signalfd)
 #define __NR_timerfd_create 350
@@ -728,9 +728,9 @@ __SYSCALL(__NR_eventfd, sys_eventfd)
 #define __NR_fallocate 352
 __SYSCALL(__NR_fallocate, compat_sys_aarch32_fallocate)
 #define __NR_timerfd_settime 353
-__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime)
+__SYSCALL(__NR_timerfd_settime, sys_timerfd_settime32)
 #define __NR_timerfd_gettime 354
-__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime)
+__SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime32)
 #define __NR_signalfd4 355
 __SYSCALL(__NR_signalfd4, compat_sys_signalfd4)
 #define __NR_eventfd2 356
@@ -752,7 +752,7 @@ __SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo)
 #define __NR_perf_event_open 364
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_recvmmsg 365
-__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg)
+__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg_time32)
 #define __NR_accept4 366
 __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_fanotify_init 367
@@ -766,7 +766,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 #define __NR_open_by_handle_at 371
 __SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at)
 #define __NR_clock_adjtime 372
-__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime)
+__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime32)
 #define __NR_syncfs 373
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_sendmmsg 374
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index cc134b1211aa..6d1e019817c8 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -41,7 +41,7 @@
 31	n32	dup				sys_dup
 32	n32	dup2				sys_dup2
 33	n32	pause				sys_pause
-34	n32	nanosleep			compat_sys_nanosleep
+34	n32	nanosleep			sys_nanosleep_time32
 35	n32	getitimer			compat_sys_getitimer
 36	n32	setitimer			compat_sys_setitimer
 37	n32	alarm				sys_alarm
@@ -133,11 +133,11 @@
 123	n32	capget				sys_capget
 124	n32	capset				sys_capset
 125	n32	rt_sigpending			compat_sys_rt_sigpending
-126	n32	rt_sigtimedwait			compat_sys_rt_sigtimedwait
+126	n32	rt_sigtimedwait			compat_sys_rt_sigtimedwait_time32
 127	n32	rt_sigqueueinfo			compat_sys_rt_sigqueueinfo
 128	n32	rt_sigsuspend			compat_sys_rt_sigsuspend
 129	n32	sigaltstack			compat_sys_sigaltstack
-130	n32	utime				compat_sys_utime
+130	n32	utime				sys_utime32
 131	n32	mknod				sys_mknod
 132	n32	personality			sys_32_personality
 133	n32	ustat				compat_sys_ustat
@@ -152,7 +152,7 @@
 142	n32	sched_getscheduler		sys_sched_getscheduler
 143	n32	sched_get_priority_max		sys_sched_get_priority_max
 144	n32	sched_get_priority_min		sys_sched_get_priority_min
-145	n32	sched_rr_get_interval		compat_sys_sched_rr_get_interval
+145	n32	sched_rr_get_interval		sys_sched_rr_get_interval_time32
 146	n32	mlock				sys_mlock
 147	n32	munlock				sys_munlock
 148	n32	mlockall			sys_mlockall
@@ -161,7 +161,7 @@
 151	n32	pivot_root			sys_pivot_root
 152	n32	_sysctl				compat_sys_sysctl
 153	n32	prctl				sys_prctl
-154	n32	adjtimex			compat_sys_adjtimex
+154	n32	adjtimex			sys_adjtimex_time32
 155	n32	setrlimit			compat_sys_setrlimit
 156	n32	chroot				sys_chroot
 157	n32	sync				sys_sync
@@ -202,7 +202,7 @@
 191	n32	fremovexattr			sys_fremovexattr
 192	n32	tkill				sys_tkill
 193	n32	reserved193			sys_ni_syscall
-194	n32	futex				compat_sys_futex
+194	n32	futex				sys_futex_time32
 195	n32	sched_setaffinity		compat_sys_sched_setaffinity
 196	n32	sched_getaffinity		compat_sys_sched_getaffinity
 197	n32	cacheflush			sys_cacheflush
@@ -210,7 +210,7 @@
 199	n32	sysmips				__sys_sysmips
 200	n32	io_setup			compat_sys_io_setup
 201	n32	io_destroy			sys_io_destroy
-202	n32	io_getevents			compat_sys_io_getevents
+202	n32	io_getevents			sys_io_getevents_time32
 203	n32	io_submit			compat_sys_io_submit
 204	n32	io_cancel			sys_io_cancel
 205	n32	exit_group			sys_exit_group
@@ -223,29 +223,29 @@
 212	n32	fcntl64				compat_sys_fcntl64
 213	n32	set_tid_address			sys_set_tid_address
 214	n32	restart_syscall			sys_restart_syscall
-215	n32	semtimedop			compat_sys_semtimedop
+215	n32	semtimedop			sys_semtimedop_time32
 216	n32	fadvise64			sys_fadvise64_64
 217	n32	statfs64			compat_sys_statfs64
 218	n32	fstatfs64			compat_sys_fstatfs64
 219	n32	sendfile64			sys_sendfile64
 220	n32	timer_create			compat_sys_timer_create
-221	n32	timer_settime			compat_sys_timer_settime
-222	n32	timer_gettime			compat_sys_timer_gettime
+221	n32	timer_settime			sys_timer_settime32
+222	n32	timer_gettime			sys_timer_gettime32
 223	n32	timer_getoverrun		sys_timer_getoverrun
 224	n32	timer_delete			sys_timer_delete
-225	n32	clock_settime			compat_sys_clock_settime
-226	n32	clock_gettime			compat_sys_clock_gettime
-227	n32	clock_getres			compat_sys_clock_getres
-228	n32	clock_nanosleep			compat_sys_clock_nanosleep
+225	n32	clock_settime			sys_clock_settime32
+226	n32	clock_gettime			sys_clock_gettime32
+227	n32	clock_getres			sys_clock_getres_time32
+228	n32	clock_nanosleep			sys_clock_nanosleep_time32
 229	n32	tgkill				sys_tgkill
-230	n32	utimes				compat_sys_utimes
+230	n32	utimes				sys_utimes_time32
 231	n32	mbind				compat_sys_mbind
 232	n32	get_mempolicy			compat_sys_get_mempolicy
 233	n32	set_mempolicy			compat_sys_set_mempolicy
 234	n32	mq_open				compat_sys_mq_open
 235	n32	mq_unlink			sys_mq_unlink
-236	n32	mq_timedsend			compat_sys_mq_timedsend
-237	n32	mq_timedreceive			compat_sys_mq_timedreceive
+236	n32	mq_timedsend			sys_mq_timedsend_time32
+237	n32	mq_timedreceive			sys_mq_timedreceive_time32
 238	n32	mq_notify			compat_sys_mq_notify
 239	n32	mq_getsetattr			compat_sys_mq_getsetattr
 240	n32	vserver				sys_ni_syscall
@@ -263,7 +263,7 @@
 252	n32	mkdirat				sys_mkdirat
 253	n32	mknodat				sys_mknodat
 254	n32	fchownat			sys_fchownat
-255	n32	futimesat			compat_sys_futimesat
+255	n32	futimesat			sys_futimesat_time32
 256	n32	newfstatat			sys_newfstatat
 257	n32	unlinkat			sys_unlinkat
 258	n32	renameat			sys_renameat
@@ -272,8 +272,8 @@
 261	n32	readlinkat			sys_readlinkat
 262	n32	fchmodat			sys_fchmodat
 263	n32	faccessat			sys_faccessat
-264	n32	pselect6			compat_sys_pselect6
-265	n32	ppoll				compat_sys_ppoll
+264	n32	pselect6			compat_sys_pselect6_time32
+265	n32	ppoll				compat_sys_ppoll_time32
 266	n32	unshare				sys_unshare
 267	n32	splice				sys_splice
 268	n32	sync_file_range			sys_sync_file_range
@@ -287,14 +287,14 @@
 276	n32	epoll_pwait			compat_sys_epoll_pwait
 277	n32	ioprio_set			sys_ioprio_set
 278	n32	ioprio_get			sys_ioprio_get
-279	n32	utimensat			compat_sys_utimensat
+279	n32	utimensat			sys_utimensat_time32
 280	n32	signalfd			compat_sys_signalfd
 281	n32	timerfd				sys_ni_syscall
 282	n32	eventfd				sys_eventfd
 283	n32	fallocate			sys_fallocate
 284	n32	timerfd_create			sys_timerfd_create
-285	n32	timerfd_gettime			compat_sys_timerfd_gettime
-286	n32	timerfd_settime			compat_sys_timerfd_settime
+285	n32	timerfd_gettime			sys_timerfd_gettime32
+286	n32	timerfd_settime			sys_timerfd_settime32
 287	n32	signalfd4			compat_sys_signalfd4
 288	n32	eventfd2			sys_eventfd2
 289	n32	epoll_create1			sys_epoll_create1
@@ -306,14 +306,14 @@
 295	n32	rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 296	n32	perf_event_open			sys_perf_event_open
 297	n32	accept4				sys_accept4
-298	n32	recvmmsg			compat_sys_recvmmsg
+298	n32	recvmmsg			compat_sys_recvmmsg_time32
 299	n32	getdents64			sys_getdents64
 300	n32	fanotify_init			sys_fanotify_init
 301	n32	fanotify_mark			sys_fanotify_mark
 302	n32	prlimit64			sys_prlimit64
 303	n32	name_to_handle_at		sys_name_to_handle_at
 304	n32	open_by_handle_at		sys_open_by_handle_at
-305	n32	clock_adjtime			compat_sys_clock_adjtime
+305	n32	clock_adjtime			sys_clock_adjtime32
 306	n32	syncfs				sys_syncfs
 307	n32	sendmmsg			compat_sys_sendmmsg
 308	n32	setns				sys_setns
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index fa47ea8cc6ef..e9fec7bac5a9 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -20,7 +20,7 @@
 10	o32	unlink				sys_unlink
 11	o32	execve				sys_execve			compat_sys_execve
 12	o32	chdir				sys_chdir
-13	o32	time				sys_time			compat_sys_time
+13	o32	time				sys_time			sys_time32
 14	o32	mknod				sys_mknod
 15	o32	chmod				sys_chmod
 16	o32	lchown				sys_lchown
@@ -33,13 +33,13 @@
 22	o32	umount				sys_oldumount
 23	o32	setuid				sys_setuid
 24	o32	getuid				sys_getuid
-25	o32	stime				sys_stime			compat_sys_stime
+25	o32	stime				sys_stime			sys_stime32
 26	o32	ptrace				sys_ptrace			compat_sys_ptrace
 27	o32	alarm				sys_alarm
 # 28 was sys_fstat
 28	o32	unused28			sys_ni_syscall
 29	o32	pause				sys_pause
-30	o32	utime				sys_utime			compat_sys_utime
+30	o32	utime				sys_utime			sys_utime32
 31	o32	stty				sys_ni_syscall
 32	o32	gtty				sys_ni_syscall
 33	o32	access				sys_access
@@ -135,7 +135,7 @@
 121	o32	setdomainname			sys_setdomainname
 122	o32	uname				sys_newuname
 123	o32	modify_ldt			sys_ni_syscall
-124	o32	adjtimex			sys_adjtimex			compat_sys_adjtimex
+124	o32	adjtimex			sys_adjtimex			sys_adjtimex_time32
 125	o32	mprotect			sys_mprotect
 126	o32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
 127	o32	create_module			sys_ni_syscall
@@ -176,8 +176,8 @@
 162	o32	sched_yield			sys_sched_yield
 163	o32	sched_get_priority_max		sys_sched_get_priority_max
 164	o32	sched_get_priority_min		sys_sched_get_priority_min
-165	o32	sched_rr_get_interval		sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-166	o32	nanosleep			sys_nanosleep			compat_sys_nanosleep
+165	o32	sched_rr_get_interval		sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+166	o32	nanosleep			sys_nanosleep			sys_nanosleep_time32
 167	o32	mremap				sys_mremap
 168	o32	accept				sys_accept
 169	o32	bind				sys_bind
@@ -208,7 +208,7 @@
 194	o32	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
 195	o32	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 196	o32	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
-197	o32	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+197	o32	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 198	o32	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 199	o32	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 200	o32	pread64				sys_pread64			sys_32_pread
@@ -249,12 +249,12 @@
 235	o32	fremovexattr			sys_fremovexattr
 236	o32	tkill				sys_tkill
 237	o32	sendfile64			sys_sendfile64
-238	o32	futex				sys_futex			compat_sys_futex
+238	o32	futex				sys_futex			sys_futex_time32
 239	o32	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
 240	o32	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
 241	o32	io_setup			sys_io_setup			compat_sys_io_setup
 242	o32	io_destroy			sys_io_destroy
-243	o32	io_getevents			sys_io_getevents		compat_sys_io_getevents
+243	o32	io_getevents			sys_io_getevents		sys_io_getevents_time32
 244	o32	io_submit			sys_io_submit			compat_sys_io_submit
 245	o32	io_cancel			sys_io_cancel
 246	o32	exit_group			sys_exit_group
@@ -269,23 +269,23 @@
 255	o32	statfs64			sys_statfs64			compat_sys_statfs64
 256	o32	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
 257	o32	timer_create			sys_timer_create		compat_sys_timer_create
-258	o32	timer_settime			sys_timer_settime		compat_sys_timer_settime
-259	o32	timer_gettime			sys_timer_gettime		compat_sys_timer_gettime
+258	o32	timer_settime			sys_timer_settime		sys_timer_settime32
+259	o32	timer_gettime			sys_timer_gettime		sys_timer_gettime32
 260	o32	timer_getoverrun		sys_timer_getoverrun
 261	o32	timer_delete			sys_timer_delete
-262	o32	clock_settime			sys_clock_settime		compat_sys_clock_settime
-263	o32	clock_gettime			sys_clock_gettime		compat_sys_clock_gettime
-264	o32	clock_getres			sys_clock_getres		compat_sys_clock_getres
-265	o32	clock_nanosleep			sys_clock_nanosleep		compat_sys_clock_nanosleep
+262	o32	clock_settime			sys_clock_settime		sys_clock_settime32
+263	o32	clock_gettime			sys_clock_gettime		sys_clock_gettime32
+264	o32	clock_getres			sys_clock_getres		sys_clock_getres_time32
+265	o32	clock_nanosleep			sys_clock_nanosleep		sys_clock_nanosleep_time32
 266	o32	tgkill				sys_tgkill
-267	o32	utimes				sys_utimes			compat_sys_utimes
+267	o32	utimes				sys_utimes			sys_utimes_time32
 268	o32	mbind				sys_mbind			compat_sys_mbind
 269	o32	get_mempolicy			sys_get_mempolicy		compat_sys_get_mempolicy
 270	o32	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
 271	o32	mq_open				sys_mq_open			compat_sys_mq_open
 272	o32	mq_unlink			sys_mq_unlink
-273	o32	mq_timedsend			sys_mq_timedsend		compat_sys_mq_timedsend
-274	o32	mq_timedreceive			sys_mq_timedreceive		compat_sys_mq_timedreceive
+273	o32	mq_timedsend			sys_mq_timedsend		sys_mq_timedsend_time32
+274	o32	mq_timedreceive			sys_mq_timedreceive		sys_mq_timedreceive_time32
 275	o32	mq_notify			sys_mq_notify			compat_sys_mq_notify
 276	o32	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
 277	o32	vserver				sys_ni_syscall
@@ -303,7 +303,7 @@
 289	o32	mkdirat				sys_mkdirat
 290	o32	mknodat				sys_mknodat
 291	o32	fchownat			sys_fchownat
-292	o32	futimesat			sys_futimesat			compat_sys_futimesat
+292	o32	futimesat			sys_futimesat			sys_futimesat_time32
 293	o32	fstatat64			sys_fstatat64			sys_newfstatat
 294	o32	unlinkat			sys_unlinkat
 295	o32	renameat			sys_renameat
@@ -312,8 +312,8 @@
 298	o32	readlinkat			sys_readlinkat
 299	o32	fchmodat			sys_fchmodat
 300	o32	faccessat			sys_faccessat
-301	o32	pselect6			sys_pselect6			compat_sys_pselect6
-302	o32	ppoll				sys_ppoll			compat_sys_ppoll
+301	o32	pselect6			sys_pselect6			compat_sys_pselect6_time32
+302	o32	ppoll				sys_ppoll			compat_sys_ppoll_time32
 303	o32	unshare				sys_unshare
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
@@ -327,14 +327,14 @@
 313	o32	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
 314	o32	ioprio_set			sys_ioprio_set
 315	o32	ioprio_get			sys_ioprio_get
-316	o32	utimensat			sys_utimensat			compat_sys_utimensat
+316	o32	utimensat			sys_utimensat			sys_utimensat_time32
 317	o32	signalfd			sys_signalfd			compat_sys_signalfd
 318	o32	timerfd				sys_ni_syscall
 319	o32	eventfd				sys_eventfd
 320	o32	fallocate			sys_fallocate			sys32_fallocate
 321	o32	timerfd_create			sys_timerfd_create
-322	o32	timerfd_gettime			sys_timerfd_gettime		compat_sys_timerfd_gettime
-323	o32	timerfd_settime			sys_timerfd_settime		compat_sys_timerfd_settime
+322	o32	timerfd_gettime			sys_timerfd_gettime		sys_timerfd_gettime32
+323	o32	timerfd_settime			sys_timerfd_settime		sys_timerfd_settime32
 324	o32	signalfd4			sys_signalfd4			compat_sys_signalfd4
 325	o32	eventfd2			sys_eventfd2
 326	o32	epoll_create1			sys_epoll_create1
@@ -346,13 +346,13 @@
 332	o32	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 333	o32	perf_event_open			sys_perf_event_open
 334	o32	accept4				sys_accept4
-335	o32	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg
+335	o32	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg_time32
 336	o32	fanotify_init			sys_fanotify_init
 337	o32	fanotify_mark			sys_fanotify_mark		compat_sys_fanotify_mark
 338	o32	prlimit64			sys_prlimit64
 339	o32	name_to_handle_at		sys_name_to_handle_at
 340	o32	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
-341	o32	clock_adjtime			sys_clock_adjtime		compat_sys_clock_adjtime
+341	o32	clock_adjtime			sys_clock_adjtime		sys_clock_adjtime32
 342	o32	syncfs				sys_syncfs
 343	o32	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 344	o32	setns				sys_setns
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 71873bb72782..f7440427d459 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,7 @@
 10	common	unlink			sys_unlink
 11	common	execve			sys_execve			compat_sys_execve
 12	common	chdir			sys_chdir
-13	common	time			sys_time			compat_sys_time
+13	common	time			sys_time			sys_time32
 14	common	mknod			sys_mknod
 15	common	chmod			sys_chmod
 16	common	lchown			sys_lchown
@@ -32,12 +32,12 @@
 22	common	bind			sys_bind
 23	common	setuid			sys_setuid
 24	common	getuid			sys_getuid
-25	common	stime			sys_stime			compat_sys_stime
+25	common	stime			sys_stime			sys_stime32
 26	common	ptrace			sys_ptrace			compat_sys_ptrace
 27	common	alarm			sys_alarm
 28	common	fstat			sys_newfstat			compat_sys_newfstat
 29	common	pause			sys_pause
-30	common	utime			sys_utime			compat_sys_utime
+30	common	utime			sys_utime			sys_utime32
 31	common	connect			sys_connect
 32	common	listen			sys_listen
 33	common	access			sys_access
@@ -133,7 +133,7 @@
 121	common	setdomainname		sys_setdomainname
 122	common	sendfile		sys_sendfile			compat_sys_sendfile
 123	common	recvfrom		sys_recvfrom
-124	common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+124	common	adjtimex		sys_adjtimex			sys_adjtimex_time32
 125	common	mprotect		sys_mprotect
 126	common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 # 127 was create_module
@@ -171,8 +171,8 @@
 158	common	sched_yield		sys_sched_yield
 159	common	sched_get_priority_max	sys_sched_get_priority_max
 160	common	sched_get_priority_min	sys_sched_get_priority_min
-161	common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-162	common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+161	common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162	common	nanosleep		sys_nanosleep			sys_nanosleep_time32
 163	common	mremap			sys_mremap
 164	common	setresuid		sys_setresuid
 165	common	getresuid		sys_getresuid
@@ -187,7 +187,7 @@
 174	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 175	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 176	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-177	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+177	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 180	common	chown			sys_chown
@@ -223,14 +223,14 @@
 207	64	readahead		sys_readahead
 208	common	tkill			sys_tkill
 209	common	sendfile64		sys_sendfile64			compat_sys_sendfile64
-210	common	futex			sys_futex			compat_sys_futex
+210	common	futex			sys_futex			sys_futex_time32
 211	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
 212	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 # 213 was set_thread_area
 # 214 was get_thread_area
 215	common	io_setup		sys_io_setup			compat_sys_io_setup
 216	common	io_destroy		sys_io_destroy
-217	common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+217	common	io_getevents		sys_io_getevents		sys_io_getevents_time32
 218	common	io_submit		sys_io_submit			compat_sys_io_submit
 219	common	io_cancel		sys_io_cancel
 # 220 was alloc_hugepages
@@ -241,11 +241,11 @@
 225	common	epoll_ctl		sys_epoll_ctl
 226	common	epoll_wait		sys_epoll_wait
 227	common	remap_file_pages	sys_remap_file_pages
-228	common	semtimedop		sys_semtimedop			compat_sys_semtimedop
+228	common	semtimedop		sys_semtimedop			sys_semtimedop_time32
 229	common	mq_open			sys_mq_open			compat_sys_mq_open
 230	common	mq_unlink		sys_mq_unlink
-231	common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
-232	common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+231	common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+232	common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
 233	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 234	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 235	common	waitid			sys_waitid			compat_sys_waitid
@@ -265,14 +265,14 @@
 248	common	lremovexattr		sys_lremovexattr
 249	common	fremovexattr		sys_fremovexattr
 250	common	timer_create		sys_timer_create		compat_sys_timer_create
-251	common	timer_settime		sys_timer_settime		compat_sys_timer_settime
-252	common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+251	common	timer_settime		sys_timer_settime		sys_timer_settime32
+252	common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
 253	common	timer_getoverrun	sys_timer_getoverrun
 254	common	timer_delete		sys_timer_delete
-255	common	clock_settime		sys_clock_settime		compat_sys_clock_settime
-256	common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
-257	common	clock_getres		sys_clock_getres		compat_sys_clock_getres
-258	common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+255	common	clock_settime		sys_clock_settime		sys_clock_settime32
+256	common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+257	common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+258	common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
 259	common	tgkill			sys_tgkill
 260	common	mbind			sys_mbind			compat_sys_mbind
 261	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
@@ -287,13 +287,13 @@
 270	common	inotify_add_watch	sys_inotify_add_watch
 271	common	inotify_rm_watch	sys_inotify_rm_watch
 272	common	migrate_pages		sys_migrate_pages
-273	common	pselect6		sys_pselect6			compat_sys_pselect6
-274	common	ppoll			sys_ppoll			compat_sys_ppoll
+273	common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+274	common	ppoll			sys_ppoll			compat_sys_ppoll_time32
 275	common	openat			sys_openat			compat_sys_openat
 276	common	mkdirat			sys_mkdirat
 277	common	mknodat			sys_mknodat
 278	common	fchownat		sys_fchownat
-279	common	futimesat		sys_futimesat			compat_sys_futimesat
+279	common	futimesat		sys_futimesat			sys_futimesat_time32
 280	common	fstatat64		sys_fstatat64
 281	common	unlinkat		sys_unlinkat
 282	common	renameat		sys_renameat
@@ -316,15 +316,15 @@
 298	common	statfs64		sys_statfs64			compat_sys_statfs64
 299	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 300	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-301	common	utimensat		sys_utimensat			compat_sys_utimensat
+301	common	utimensat		sys_utimensat			sys_utimensat_time32
 302	common	signalfd		sys_signalfd			compat_sys_signalfd
 # 303 was timerfd
 304	common	eventfd			sys_eventfd
 305	32	fallocate		parisc_fallocate
 305	64	fallocate		sys_fallocate
 306	common	timerfd_create		sys_timerfd_create
-307	common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
-308	common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+307	common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+308	common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
 309	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 310	common	eventfd2		sys_eventfd2
 311	common	epoll_create1		sys_epoll_create1
@@ -335,12 +335,12 @@
 316	common	pwritev	sys_pwritev	compat_sys_pwritev
 317	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 318	common	perf_event_open		sys_perf_event_open
-319	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+319	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
 320	common	accept4			sys_accept4
 321	common	prlimit64		sys_prlimit64
 322	common	fanotify_init		sys_fanotify_init
 323	common	fanotify_mark		sys_fanotify_mark		sys32_fanotify_mark
-324	common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+324	common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 325	common	name_to_handle_at	sys_name_to_handle_at
 326	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
 327	common	syncfs			sys_syncfs
@@ -352,7 +352,7 @@
 333	common	finit_module		sys_finit_module
 334	common	sched_setattr		sys_sched_setattr
 335	common	sched_getattr		sys_sched_getattr
-336	common	utimes			sys_utimes			compat_sys_utimes
+336	common	utimes			sys_utimes			sys_utimes_time32
 337	common	renameat2		sys_renameat2
 338	common	seccomp			sys_seccomp
 339	common	getrandom		sys_getrandom
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 7555874ce39c..86650dcd2185 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -20,7 +20,7 @@
 10	common	unlink				sys_unlink
 11	nospu	execve				sys_execve			compat_sys_execve
 12	common	chdir				sys_chdir
-13	common	time				sys_time			compat_sys_time
+13	common	time				sys_time			sys_time32
 14	common	mknod				sys_mknod
 15	common	chmod				sys_chmod
 16	common	lchown				sys_lchown
@@ -36,14 +36,14 @@
 22	spu	umount				sys_ni_syscall
 23	common	setuid				sys_setuid
 24	common	getuid				sys_getuid
-25	common	stime				sys_stime			compat_sys_stime
+25	common	stime				sys_stime			sys_stime32
 26	nospu	ptrace				sys_ptrace			compat_sys_ptrace
 27	common	alarm				sys_alarm
 28	32	oldfstat			sys_fstat			sys_ni_syscall
 28	64	oldfstat			sys_ni_syscall
 28	spu	oldfstat			sys_ni_syscall
 29	nospu	pause				sys_pause
-30	nospu	utime				sys_utime			compat_sys_utime
+30	nospu	utime				sys_utime			sys_utime32
 31	common	stty				sys_ni_syscall
 32	common	gtty				sys_ni_syscall
 33	common	access				sys_access
@@ -157,7 +157,7 @@
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
-124	common	adjtimex			sys_adjtimex			compat_sys_adjtimex
+124	common	adjtimex			sys_adjtimex			sys_adjtimex_time32
 125	common	mprotect			sys_mprotect
 126	32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
 126	64	sigprocmask			sys_ni_syscall
@@ -198,8 +198,8 @@
 158	common	sched_yield			sys_sched_yield
 159	common	sched_get_priority_max		sys_sched_get_priority_max
 160	common	sched_get_priority_min		sys_sched_get_priority_min
-161	common	sched_rr_get_interval		sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-162	common	nanosleep			sys_nanosleep			compat_sys_nanosleep
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162	common	nanosleep			sys_nanosleep			sys_nanosleep_time32
 163	common	mremap				sys_mremap
 164	common	setresuid			sys_setresuid
 165	common	getresuid			sys_getresuid
@@ -213,7 +213,7 @@
 173	nospu	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
 174	nospu	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 175	nospu	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
-176	nospu	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+176	nospu	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 177	nospu 	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 178	nospu 	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 179	common	pread64				sys_pread64			compat_sys_pread64
@@ -260,7 +260,7 @@
 218	common	removexattr			sys_removexattr
 219	common	lremovexattr			sys_lremovexattr
 220	common	fremovexattr			sys_fremovexattr
-221	common	futex				sys_futex			compat_sys_futex
+221	common	futex				sys_futex			sys_futex_time32
 222	common	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
 223	common	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
 # 224 unused
@@ -268,7 +268,7 @@
 226	32	sendfile64			sys_sendfile64			compat_sys_sendfile64
 227	common	io_setup			sys_io_setup			compat_sys_io_setup
 228	common	io_destroy			sys_io_destroy
-229	common	io_getevents			sys_io_getevents		compat_sys_io_getevents
+229	common	io_getevents			sys_io_getevents		sys_io_getevents_time32
 230	common	io_submit			sys_io_submit			compat_sys_io_submit
 231	common	io_cancel			sys_io_cancel
 232	nospu	set_tid_address			sys_set_tid_address
@@ -280,19 +280,19 @@
 238	common	epoll_wait			sys_epoll_wait
 239	common	remap_file_pages		sys_remap_file_pages
 240	common	timer_create			sys_timer_create		compat_sys_timer_create
-241	common	timer_settime			sys_timer_settime		compat_sys_timer_settime
-242	common	timer_gettime			sys_timer_gettime		compat_sys_timer_gettime
+241	common	timer_settime			sys_timer_settime		sys_timer_settime32
+242	common	timer_gettime			sys_timer_gettime		sys_timer_gettime32
 243	common	timer_getoverrun		sys_timer_getoverrun
 244	common	timer_delete			sys_timer_delete
-245	common	clock_settime			sys_clock_settime		compat_sys_clock_settime
-246	common	clock_gettime			sys_clock_gettime		compat_sys_clock_gettime
-247	common	clock_getres			sys_clock_getres		compat_sys_clock_getres
-248	common	clock_nanosleep			sys_clock_nanosleep		compat_sys_clock_nanosleep
+245	common	clock_settime			sys_clock_settime		sys_clock_settime32
+246	common	clock_gettime			sys_clock_gettime		sys_clock_gettime32
+247	common	clock_getres			sys_clock_getres		sys_clock_getres_time32
+248	common	clock_nanosleep			sys_clock_nanosleep		sys_clock_nanosleep_time32
 249	32	swapcontext			ppc_swapcontext			ppc32_swapcontext
 249	64	swapcontext			ppc64_swapcontext
 249	spu	swapcontext			sys_ni_syscall
 250	common	tgkill				sys_tgkill
-251	common	utimes				sys_utimes			compat_sys_utimes
+251	common	utimes				sys_utimes			sys_utimes_time32
 252	common	statfs64			sys_statfs64			compat_sys_statfs64
 253	common	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
 254	32	fadvise64_64			ppc_fadvise64_64
@@ -308,8 +308,8 @@
 261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
 262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
 263	nospu	mq_unlink			sys_mq_unlink
-264	nospu	mq_timedsend			sys_mq_timedsend		compat_sys_mq_timedsend
-265	nospu	mq_timedreceive			sys_mq_timedreceive		compat_sys_mq_timedreceive
+264	nospu	mq_timedsend			sys_mq_timedsend		sys_mq_timedsend_time32
+265	nospu	mq_timedreceive			sys_mq_timedreceive		sys_mq_timedreceive_time32
 266	nospu	mq_notify			sys_mq_notify			compat_sys_mq_notify
 267	nospu	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
 268	nospu	kexec_load			sys_kexec_load			compat_sys_kexec_load
@@ -324,8 +324,8 @@
 277	nospu	inotify_rm_watch		sys_inotify_rm_watch
 278	nospu	spu_run				sys_spu_run
 279	nospu	spu_create			sys_spu_create
-280	nospu	pselect6			sys_pselect6			compat_sys_pselect6
-281	nospu	ppoll				sys_ppoll			compat_sys_ppoll
+280	nospu	pselect6			sys_pselect6			compat_sys_pselect6_time32
+281	nospu	ppoll				sys_ppoll			compat_sys_ppoll_time32
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
@@ -334,7 +334,7 @@
 287	common	mkdirat				sys_mkdirat
 288	common	mknodat				sys_mknodat
 289	common	fchownat			sys_fchownat
-290	common	futimesat			sys_futimesat			compat_sys_futimesat
+290	common	futimesat			sys_futimesat			sys_futimesat_time32
 291	32	fstatat64			sys_fstatat64
 291	64	newfstatat			sys_newfstatat
 291	spu	newfstatat			sys_newfstatat
@@ -350,15 +350,15 @@
 301	common	move_pages			sys_move_pages			compat_sys_move_pages
 302	common	getcpu				sys_getcpu
 303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
-304	common	utimensat			sys_utimensat			compat_sys_utimensat
+304	common	utimensat			sys_utimensat			sys_utimensat_time32
 305	common	signalfd			sys_signalfd			compat_sys_signalfd
 306	common	timerfd_create			sys_timerfd_create
 307	common	eventfd				sys_eventfd
 308	common	sync_file_range2		sys_sync_file_range2		compat_sys_sync_file_range2
 309	nospu	fallocate			sys_fallocate			compat_sys_fallocate
 310	nospu	subpage_prot			sys_subpage_prot
-311	common	timerfd_settime			sys_timerfd_settime		compat_sys_timerfd_settime
-312	common	timerfd_gettime			sys_timerfd_gettime		compat_sys_timerfd_gettime
+311	common	timerfd_settime			sys_timerfd_settime		sys_timerfd_settime32
+312	common	timerfd_gettime			sys_timerfd_gettime		sys_timerfd_gettime32
 313	common	signalfd4			sys_signalfd4			compat_sys_signalfd4
 314	common	eventfd2			sys_eventfd2
 315	common	epoll_create1			sys_epoll_create1
@@ -389,11 +389,11 @@
 340	common	getsockopt			sys_getsockopt			compat_sys_getsockopt
 341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
 342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
-343	common	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg
+343	common	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg_time32
 344	common	accept4				sys_accept4
 345	common	name_to_handle_at		sys_name_to_handle_at
 346	common	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
-347	common	clock_adjtime			sys_clock_adjtime		compat_sys_clock_adjtime
+347	common	clock_adjtime			sys_clock_adjtime		sys_clock_adjtime32
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 620e222003ca..285201cf1f83 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -20,7 +20,7 @@
 10   common	unlink			sys_unlink			sys_unlink
 11   common	execve			sys_execve			compat_sys_execve
 12   common	chdir			sys_chdir			sys_chdir
-13   32		time			-				compat_sys_time
+13   32		time			-				sys_time32
 14   common	mknod			sys_mknod			sys_mknod
 15   common	chmod			sys_chmod			sys_chmod
 16   32		lchown			-				sys_lchown16
@@ -30,11 +30,11 @@
 22   common	umount			sys_oldumount			sys_oldumount
 23   32		setuid			-				sys_setuid16
 24   32		getuid			-				sys_getuid16
-25   32		stime			-				compat_sys_stime
+25   32		stime			-				sys_stime32
 26   common	ptrace			sys_ptrace			compat_sys_ptrace
 27   common	alarm			sys_alarm			sys_alarm
 29   common	pause			sys_pause			sys_pause
-30   common	utime			sys_utime			compat_sys_utime
+30   common	utime			sys_utime			sys_utime32
 33   common	access			sys_access			sys_access
 34   common	nice			sys_nice			sys_nice
 36   common	sync			sys_sync			sys_sync
@@ -112,7 +112,7 @@
 120  common	clone			sys_clone			sys_clone
 121  common	setdomainname		sys_setdomainname		sys_setdomainname
 122  common	uname			sys_newuname			sys_newuname
-124  common	adjtimex		sys_adjtimex			compat_sys_adjtimex
+124  common	adjtimex		sys_adjtimex			sys_adjtimex_time32
 125  common	mprotect		sys_mprotect			sys_mprotect
 126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 127  common	create_module		-				-
@@ -150,8 +150,8 @@
 158  common	sched_yield		sys_sched_yield			sys_sched_yield
 159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
 160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
-161  common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-162  common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162  common	nanosleep		sys_nanosleep			sys_nanosleep_time32
 163  common	mremap			sys_mremap			sys_mremap
 164  32		setresuid		-				sys_setresuid16
 165  32		getresuid		-				sys_getresuid16
@@ -165,7 +165,7 @@
 174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 180  common	pread64			sys_pread64			compat_sys_s390_pread64
@@ -246,13 +246,13 @@
 235  common	fremovexattr		sys_fremovexattr		sys_fremovexattr
 236  common	gettid			sys_gettid			sys_gettid
 237  common	tkill			sys_tkill			sys_tkill
-238  common	futex			sys_futex			compat_sys_futex
+238  common	futex			sys_futex			sys_futex_time32
 239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
 240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 241  common	tgkill			sys_tgkill			sys_tgkill
 243  common	io_setup		sys_io_setup			compat_sys_io_setup
 244  common	io_destroy		sys_io_destroy			sys_io_destroy
-245  common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+245  common	io_getevents		sys_io_getevents		sys_io_getevents_time32
 246  common	io_submit		sys_io_submit			compat_sys_io_submit
 247  common	io_cancel		sys_io_cancel			sys_io_cancel
 248  common	exit_group		sys_exit_group			sys_exit_group
@@ -262,14 +262,14 @@
 252  common	set_tid_address		sys_set_tid_address		sys_set_tid_address
 253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
 254  common	timer_create		sys_timer_create		compat_sys_timer_create
-255  common	timer_settime		sys_timer_settime		compat_sys_timer_settime
-256  common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+255  common	timer_settime		sys_timer_settime		sys_timer_settime32
+256  common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
 257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
 258  common	timer_delete		sys_timer_delete		sys_timer_delete
-259  common	clock_settime		sys_clock_settime		compat_sys_clock_settime
-260  common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
-261  common	clock_getres		sys_clock_getres		compat_sys_clock_getres
-262  common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+259  common	clock_settime		sys_clock_settime		sys_clock_settime32
+260  common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+261  common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+262  common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
 264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
 265  common	statfs64		sys_statfs64			compat_sys_statfs64
 266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
@@ -279,8 +279,8 @@
 270  common	set_mempolicy		sys_set_mempolicy		compat_sys_set_mempolicy
 271  common	mq_open			sys_mq_open			compat_sys_mq_open
 272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
-273  common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
-274  common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+274  common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
 275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
@@ -298,7 +298,7 @@
 289  common	mkdirat			sys_mkdirat			sys_mkdirat
 290  common	mknodat			sys_mknodat			sys_mknodat
 291  common	fchownat		sys_fchownat			sys_fchownat
-292  common	futimesat		sys_futimesat			compat_sys_futimesat
+292  common	futimesat		sys_futimesat			sys_futimesat_time32
 293  32		fstatat64		-				compat_sys_s390_fstatat64
 293  64		newfstatat		sys_newfstatat			-
 294  common	unlinkat		sys_unlinkat			sys_unlinkat
@@ -308,8 +308,8 @@
 298  common	readlinkat		sys_readlinkat			sys_readlinkat
 299  common	fchmodat		sys_fchmodat			sys_fchmodat
 300  common	faccessat		sys_faccessat			sys_faccessat
-301  common	pselect6		sys_pselect6			compat_sys_pselect6
-302  common	ppoll			sys_ppoll			compat_sys_ppoll
+301  common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+302  common	ppoll			sys_ppoll			compat_sys_ppoll_time32
 303  common	unshare			sys_unshare			sys_unshare
 304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
@@ -320,15 +320,15 @@
 310  common	move_pages		sys_move_pages			compat_sys_move_pages
 311  common	getcpu			sys_getcpu			sys_getcpu
 312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-313  common	utimes			sys_utimes			compat_sys_utimes
+313  common	utimes			sys_utimes			sys_utimes_time32
 314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
-315  common	utimensat		sys_utimensat			compat_sys_utimensat
+315  common	utimensat		sys_utimensat			sys_utimensat_time32
 316  common	signalfd		sys_signalfd			compat_sys_signalfd
 317  common	timerfd			-				-
 318  common	eventfd			sys_eventfd			sys_eventfd
 319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
-320  common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
-321  common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+320  common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+321  common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
 322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 323  common	eventfd2		sys_eventfd2			sys_eventfd2
 324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
@@ -344,7 +344,7 @@
 334  common	prlimit64		sys_prlimit64			sys_prlimit64
 335  common	name_to_handle_at	sys_name_to_handle_at		sys_name_to_handle_at
 336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-337  common	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 338  common	syncfs			sys_syncfs			sys_syncfs
 339  common	setns			sys_setns			sys_setns
 340  common	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
@@ -364,7 +364,7 @@
 354  common	execveat		sys_execveat			compat_sys_execveat
 355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
 356  common	membarrier		sys_membarrier			sys_membarrier
-357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
 358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
 359  common	socket			sys_socket			sys_socket
 360  common	socketpair		sys_socketpair			sys_socketpair
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index e63cd013cc77..7cb05b50aeaa 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -44,7 +44,7 @@
 28	common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
 29	32    	pause			sys_pause
 29	64    	pause			sys_nis_syscall
-30	common	utime			sys_utime			compat_sys_utime
+30	common	utime			sys_utime			sys_utime32
 31	32    	lchown32		sys_lchown
 32	32    	fchown32		sys_fchown
 33	common	access			sys_access
@@ -128,7 +128,7 @@
 102	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 103	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 104	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-105	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait
+105	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
 106	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 107	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 108	32	setresuid32		sys_setresuid
@@ -168,11 +168,11 @@
 135	common	socketpair		sys_socketpair
 136	common	mkdir			sys_mkdir
 137	common	rmdir			sys_rmdir
-138	common	utimes			sys_utimes			compat_sys_utimes
+138	common	utimes			sys_utimes			sys_utimes_time32
 139	common	stat64			sys_stat64			compat_sys_stat64
 140	common	sendfile64		sys_sendfile64
 141	common	getpeername		sys_getpeername
-142	common	futex			sys_futex			compat_sys_futex
+142	common	futex			sys_futex			sys_futex_time32
 143	common	gettid			sys_gettid
 144	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
 145	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
@@ -258,7 +258,7 @@
 216	64	sigreturn		sys_nis_syscall
 217	common	clone			sys_clone
 218	common	ioprio_get		sys_ioprio_get
-219	32	adjtimex		sys_adjtimex			compat_sys_adjtimex
+219	32	adjtimex		sys_adjtimex			sys_adjtimex_time32
 219	64	adjtimex		sys_sparc_adjtimex
 220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 220	64	sigprocmask		sys_nis_syscall
@@ -272,9 +272,9 @@
 228	common	setfsuid		sys_setfsuid16
 229	common	setfsgid		sys_setfsgid16
 230	common	_newselect		sys_select			compat_sys_select
-231	32	time			sys_time			compat_sys_time
+231	32	time			sys_time			sys_time32
 232	common	splice			sys_splice
-233	common	stime			sys_stime			compat_sys_stime
+233	common	stime			sys_stime			sys_stime32
 234	common	statfs64		sys_statfs64			compat_sys_statfs64
 235	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 236	common	_llseek			sys_llseek
@@ -289,8 +289,8 @@
 245	common	sched_yield		sys_sched_yield
 246	common	sched_get_priority_max	sys_sched_get_priority_max
 247	common	sched_get_priority_min	sys_sched_get_priority_min
-248	common	sched_rr_get_interval	sys_sched_rr_get_interval	compat_sys_sched_rr_get_interval
-249	common	nanosleep		sys_nanosleep			compat_sys_nanosleep
+248	common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+249	common	nanosleep		sys_nanosleep			sys_nanosleep_time32
 250	32	mremap			sys_mremap
 250	64	mremap			sys_64_mremap
 251	common	_sysctl			sys_sysctl			compat_sys_sysctl
@@ -299,14 +299,14 @@
 254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
 254	64	nfsservctl		sys_nis_syscall
 255	common	sync_file_range		sys_sync_file_range		compat_sys_sync_file_range
-256	common	clock_settime		sys_clock_settime		compat_sys_clock_settime
-257	common	clock_gettime		sys_clock_gettime		compat_sys_clock_gettime
-258	common	clock_getres		sys_clock_getres		compat_sys_clock_getres
-259	common	clock_nanosleep		sys_clock_nanosleep		compat_sys_clock_nanosleep
+256	common	clock_settime		sys_clock_settime		sys_clock_settime32
+257	common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+258	common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+259	common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
 260	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 261	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-262	common	timer_settime		sys_timer_settime		compat_sys_timer_settime
-263	common	timer_gettime		sys_timer_gettime		compat_sys_timer_gettime
+262	common	timer_settime		sys_timer_settime		sys_timer_settime32
+263	common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
 264	common	timer_getoverrun	sys_timer_getoverrun
 265	common	timer_delete		sys_timer_delete
 266	common	timer_create		sys_timer_create		compat_sys_timer_create
@@ -316,11 +316,11 @@
 269	common	io_destroy		sys_io_destroy
 270	common	io_submit		sys_io_submit			compat_sys_io_submit
 271	common	io_cancel		sys_io_cancel
-272	common	io_getevents		sys_io_getevents		compat_sys_io_getevents
+272	common	io_getevents		sys_io_getevents		sys_io_getevents_time32
 273	common	mq_open			sys_mq_open			compat_sys_mq_open
 274	common	mq_unlink		sys_mq_unlink
-275	common	mq_timedsend		sys_mq_timedsend		compat_sys_mq_timedsend
-276	common	mq_timedreceive		sys_mq_timedreceive		compat_sys_mq_timedreceive
+275	common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+276	common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
 277	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 278	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 279	common	waitid			sys_waitid			compat_sys_waitid
@@ -332,7 +332,7 @@
 285	common	mkdirat			sys_mkdirat
 286	common	mknodat			sys_mknodat
 287	common	fchownat		sys_fchownat
-288	common	futimesat		sys_futimesat			compat_sys_futimesat
+288	common	futimesat		sys_futimesat			sys_futimesat_time32
 289	common	fstatat64		sys_fstatat64			compat_sys_fstatat64
 290	common	unlinkat		sys_unlinkat
 291	common	renameat		sys_renameat
@@ -341,8 +341,8 @@
 294	common	readlinkat		sys_readlinkat
 295	common	fchmodat		sys_fchmodat
 296	common	faccessat		sys_faccessat
-297	common	pselect6		sys_pselect6			compat_sys_pselect6
-298	common	ppoll			sys_ppoll			compat_sys_ppoll
+297	common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+298	common	ppoll			sys_ppoll			compat_sys_ppoll_time32
 299	common	unshare			sys_unshare
 300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
@@ -354,13 +354,13 @@
 307	common	move_pages		sys_move_pages			compat_sys_move_pages
 308	common	getcpu			sys_getcpu
 309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-310	common	utimensat		sys_utimensat			compat_sys_utimensat
+310	common	utimensat		sys_utimensat			sys_utimensat_time32
 311	common	signalfd		sys_signalfd			compat_sys_signalfd
 312	common	timerfd_create		sys_timerfd_create
 313	common	eventfd			sys_eventfd
 314	common	fallocate		sys_fallocate			compat_sys_fallocate
-315	common	timerfd_settime		sys_timerfd_settime		compat_sys_timerfd_settime
-316	common	timerfd_gettime		sys_timerfd_gettime		compat_sys_timerfd_gettime
+315	common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+316	common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
 317	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 318	common	eventfd2		sys_eventfd2
 319	common	epoll_create1		sys_epoll_create1
@@ -372,13 +372,13 @@
 325	common	pwritev			sys_pwritev			compat_sys_pwritev
 326	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 327	common	perf_event_open		sys_perf_event_open
-328	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg
+328	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
 329	common	fanotify_init		sys_fanotify_init
 330	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
 331	common	prlimit64		sys_prlimit64
 332	common	name_to_handle_at	sys_name_to_handle_at
 333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-334	32	clock_adjtime		sys_clock_adjtime		compat_sys_clock_adjtime
+334	32	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
 334	64	clock_adjtime		sys_sparc_clock_adjtime
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 2be1d0eb7754..7705d5ecad25 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -24,7 +24,7 @@
 10	i386	unlink			sys_unlink			__ia32_sys_unlink
 11	i386	execve			sys_execve			__ia32_compat_sys_execve
 12	i386	chdir			sys_chdir			__ia32_sys_chdir
-13	i386	time			sys_time			__ia32_compat_sys_time
+13	i386	time			sys_time			__ia32_sys_time32
 14	i386	mknod			sys_mknod			__ia32_sys_mknod
 15	i386	chmod			sys_chmod			__ia32_sys_chmod
 16	i386	lchown			sys_lchown16			__ia32_sys_lchown16
@@ -36,12 +36,12 @@
 22	i386	umount			sys_oldumount			__ia32_sys_oldumount
 23	i386	setuid			sys_setuid16			__ia32_sys_setuid16
 24	i386	getuid			sys_getuid16			__ia32_sys_getuid16
-25	i386	stime			sys_stime			__ia32_compat_sys_stime
+25	i386	stime			sys_stime			__ia32_sys_stime32
 26	i386	ptrace			sys_ptrace			__ia32_compat_sys_ptrace
 27	i386	alarm			sys_alarm			__ia32_sys_alarm
 28	i386	oldfstat		sys_fstat			__ia32_sys_fstat
 29	i386	pause			sys_pause			__ia32_sys_pause
-30	i386	utime			sys_utime			__ia32_compat_sys_utime
+30	i386	utime			sys_utime			__ia32_sys_utime32
 31	i386	stty
 32	i386	gtty
 33	i386	access			sys_access			__ia32_sys_access
@@ -135,7 +135,7 @@
 121	i386	setdomainname		sys_setdomainname		__ia32_sys_setdomainname
 122	i386	uname			sys_newuname			__ia32_sys_newuname
 123	i386	modify_ldt		sys_modify_ldt			__ia32_sys_modify_ldt
-124	i386	adjtimex		sys_adjtimex			__ia32_compat_sys_adjtimex
+124	i386	adjtimex		sys_adjtimex			__ia32_sys_adjtimex_time32
 125	i386	mprotect		sys_mprotect			__ia32_sys_mprotect
 126	i386	sigprocmask		sys_sigprocmask			__ia32_compat_sys_sigprocmask
 127	i386	create_module
@@ -172,8 +172,8 @@
 158	i386	sched_yield		sys_sched_yield			__ia32_sys_sched_yield
 159	i386	sched_get_priority_max	sys_sched_get_priority_max	__ia32_sys_sched_get_priority_max
 160	i386	sched_get_priority_min	sys_sched_get_priority_min	__ia32_sys_sched_get_priority_min
-161	i386	sched_rr_get_interval	sys_sched_rr_get_interval	__ia32_compat_sys_sched_rr_get_interval
-162	i386	nanosleep		sys_nanosleep			__ia32_compat_sys_nanosleep
+161	i386	sched_rr_get_interval	sys_sched_rr_get_interval	__ia32_sys_sched_rr_get_interval_time32
+162	i386	nanosleep		sys_nanosleep			__ia32_sys_nanosleep_time32
 163	i386	mremap			sys_mremap			__ia32_sys_mremap
 164	i386	setresuid		sys_setresuid16			__ia32_sys_setresuid16
 165	i386	getresuid		sys_getresuid16			__ia32_sys_getresuid16
@@ -188,7 +188,7 @@
 174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
 175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
-177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		__ia32_compat_sys_rt_sigtimedwait
+177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		__ia32_compat_sys_rt_sigtimedwait_time32
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		__ia32_compat_sys_rt_sigqueueinfo
 179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_sys_rt_sigsuspend
 180	i386	pread64			sys_pread64			__ia32_compat_sys_x86_pread
@@ -251,14 +251,14 @@
 237	i386	fremovexattr		sys_fremovexattr		__ia32_sys_fremovexattr
 238	i386	tkill			sys_tkill			__ia32_sys_tkill
 239	i386	sendfile64		sys_sendfile64			__ia32_sys_sendfile64
-240	i386	futex			sys_futex			__ia32_compat_sys_futex
+240	i386	futex			sys_futex			__ia32_sys_futex_time32
 241	i386	sched_setaffinity	sys_sched_setaffinity		__ia32_compat_sys_sched_setaffinity
 242	i386	sched_getaffinity	sys_sched_getaffinity		__ia32_compat_sys_sched_getaffinity
 243	i386	set_thread_area		sys_set_thread_area		__ia32_sys_set_thread_area
 244	i386	get_thread_area		sys_get_thread_area		__ia32_sys_get_thread_area
 245	i386	io_setup		sys_io_setup			__ia32_compat_sys_io_setup
 246	i386	io_destroy		sys_io_destroy			__ia32_sys_io_destroy
-247	i386	io_getevents		sys_io_getevents		__ia32_compat_sys_io_getevents
+247	i386	io_getevents		sys_io_getevents		__ia32_sys_io_getevents_time32
 248	i386	io_submit		sys_io_submit			__ia32_compat_sys_io_submit
 249	i386	io_cancel		sys_io_cancel			__ia32_sys_io_cancel
 250	i386	fadvise64		sys_fadvise64			__ia32_compat_sys_x86_fadvise64
@@ -271,18 +271,18 @@
 257	i386	remap_file_pages	sys_remap_file_pages		__ia32_sys_remap_file_pages
 258	i386	set_tid_address		sys_set_tid_address		__ia32_sys_set_tid_address
 259	i386	timer_create		sys_timer_create		__ia32_compat_sys_timer_create
-260	i386	timer_settime		sys_timer_settime		__ia32_compat_sys_timer_settime
-261	i386	timer_gettime		sys_timer_gettime		__ia32_compat_sys_timer_gettime
+260	i386	timer_settime		sys_timer_settime		__ia32_sys_timer_settime32
+261	i386	timer_gettime		sys_timer_gettime		__ia32_sys_timer_gettime32
 262	i386	timer_getoverrun	sys_timer_getoverrun		__ia32_sys_timer_getoverrun
 263	i386	timer_delete		sys_timer_delete		__ia32_sys_timer_delete
-264	i386	clock_settime		sys_clock_settime		__ia32_compat_sys_clock_settime
-265	i386	clock_gettime		sys_clock_gettime		__ia32_compat_sys_clock_gettime
-266	i386	clock_getres		sys_clock_getres		__ia32_compat_sys_clock_getres
-267	i386	clock_nanosleep		sys_clock_nanosleep		__ia32_compat_sys_clock_nanosleep
+264	i386	clock_settime		sys_clock_settime		__ia32_sys_clock_settime32
+265	i386	clock_gettime		sys_clock_gettime		__ia32_sys_clock_gettime32
+266	i386	clock_getres		sys_clock_getres		__ia32_sys_clock_getres_time32
+267	i386	clock_nanosleep		sys_clock_nanosleep		__ia32_sys_clock_nanosleep_time32
 268	i386	statfs64		sys_statfs64			__ia32_compat_sys_statfs64
 269	i386	fstatfs64		sys_fstatfs64			__ia32_compat_sys_fstatfs64
 270	i386	tgkill			sys_tgkill			__ia32_sys_tgkill
-271	i386	utimes			sys_utimes			__ia32_compat_sys_utimes
+271	i386	utimes			sys_utimes			__ia32_sys_utimes_time32
 272	i386	fadvise64_64		sys_fadvise64_64		__ia32_compat_sys_x86_fadvise64_64
 273	i386	vserver
 274	i386	mbind			sys_mbind			__ia32_sys_mbind
@@ -290,8 +290,8 @@
 276	i386	set_mempolicy		sys_set_mempolicy		__ia32_sys_set_mempolicy
 277	i386	mq_open			sys_mq_open			__ia32_compat_sys_mq_open
 278	i386	mq_unlink		sys_mq_unlink			__ia32_sys_mq_unlink
-279	i386	mq_timedsend		sys_mq_timedsend		__ia32_compat_sys_mq_timedsend
-280	i386	mq_timedreceive		sys_mq_timedreceive		__ia32_compat_sys_mq_timedreceive
+279	i386	mq_timedsend		sys_mq_timedsend		__ia32_sys_mq_timedsend_time32
+280	i386	mq_timedreceive		sys_mq_timedreceive		__ia32_sys_mq_timedreceive_time32
 281	i386	mq_notify		sys_mq_notify			__ia32_compat_sys_mq_notify
 282	i386	mq_getsetattr		sys_mq_getsetattr		__ia32_compat_sys_mq_getsetattr
 283	i386	kexec_load		sys_kexec_load			__ia32_compat_sys_kexec_load
@@ -310,7 +310,7 @@
 296	i386	mkdirat			sys_mkdirat			__ia32_sys_mkdirat
 297	i386	mknodat			sys_mknodat			__ia32_sys_mknodat
 298	i386	fchownat		sys_fchownat			__ia32_sys_fchownat
-299	i386	futimesat		sys_futimesat			__ia32_compat_sys_futimesat
+299	i386	futimesat		sys_futimesat			__ia32_sys_futimesat_time32
 300	i386	fstatat64		sys_fstatat64			__ia32_compat_sys_x86_fstatat
 301	i386	unlinkat		sys_unlinkat			__ia32_sys_unlinkat
 302	i386	renameat		sys_renameat			__ia32_sys_renameat
@@ -319,8 +319,8 @@
 305	i386	readlinkat		sys_readlinkat			__ia32_sys_readlinkat
 306	i386	fchmodat		sys_fchmodat			__ia32_sys_fchmodat
 307	i386	faccessat		sys_faccessat			__ia32_sys_faccessat
-308	i386	pselect6		sys_pselect6			__ia32_compat_sys_pselect6
-309	i386	ppoll			sys_ppoll			__ia32_compat_sys_ppoll
+308	i386	pselect6		sys_pselect6			__ia32_compat_sys_pselect6_time32
+309	i386	ppoll			sys_ppoll			__ia32_compat_sys_ppoll_time32
 310	i386	unshare			sys_unshare			__ia32_sys_unshare
 311	i386	set_robust_list		sys_set_robust_list		__ia32_compat_sys_set_robust_list
 312	i386	get_robust_list		sys_get_robust_list		__ia32_compat_sys_get_robust_list
@@ -331,13 +331,13 @@
 317	i386	move_pages		sys_move_pages			__ia32_compat_sys_move_pages
 318	i386	getcpu			sys_getcpu			__ia32_sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait			__ia32_sys_epoll_pwait
-320	i386	utimensat		sys_utimensat			__ia32_compat_sys_utimensat
+320	i386	utimensat		sys_utimensat			__ia32_sys_utimensat_time32
 321	i386	signalfd		sys_signalfd			__ia32_compat_sys_signalfd
 322	i386	timerfd_create		sys_timerfd_create		__ia32_sys_timerfd_create
 323	i386	eventfd			sys_eventfd			__ia32_sys_eventfd
 324	i386	fallocate		sys_fallocate			__ia32_compat_sys_x86_fallocate
-325	i386	timerfd_settime		sys_timerfd_settime		__ia32_compat_sys_timerfd_settime
-326	i386	timerfd_gettime		sys_timerfd_gettime		__ia32_compat_sys_timerfd_gettime
+325	i386	timerfd_settime		sys_timerfd_settime		__ia32_sys_timerfd_settime32
+326	i386	timerfd_gettime		sys_timerfd_gettime		__ia32_sys_timerfd_gettime32
 327	i386	signalfd4		sys_signalfd4			__ia32_compat_sys_signalfd4
 328	i386	eventfd2		sys_eventfd2			__ia32_sys_eventfd2
 329	i386	epoll_create1		sys_epoll_create1		__ia32_sys_epoll_create1
@@ -348,13 +348,13 @@
 334	i386	pwritev			sys_pwritev			__ia32_compat_sys_pwritev
 335	i386	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		__ia32_compat_sys_rt_tgsigqueueinfo
 336	i386	perf_event_open		sys_perf_event_open		__ia32_sys_perf_event_open
-337	i386	recvmmsg		sys_recvmmsg			__ia32_compat_sys_recvmmsg
+337	i386	recvmmsg		sys_recvmmsg			__ia32_compat_sys_recvmmsg_time32
 338	i386	fanotify_init		sys_fanotify_init		__ia32_sys_fanotify_init
 339	i386	fanotify_mark		sys_fanotify_mark		__ia32_compat_sys_fanotify_mark
 340	i386	prlimit64		sys_prlimit64			__ia32_sys_prlimit64
 341	i386	name_to_handle_at	sys_name_to_handle_at		__ia32_sys_name_to_handle_at
 342	i386	open_by_handle_at	sys_open_by_handle_at		__ia32_compat_sys_open_by_handle_at
-343	i386	clock_adjtime		sys_clock_adjtime		__ia32_compat_sys_clock_adjtime
+343	i386	clock_adjtime		sys_clock_adjtime		__ia32_sys_clock_adjtime32
 344	i386	syncfs			sys_syncfs			__ia32_sys_syncfs
 345	i386	sendmmsg		sys_sendmmsg			__ia32_compat_sys_sendmmsg
 346	i386	setns			sys_setns			__ia32_sys_setns
diff --git a/fs/aio.c b/fs/aio.c
index b906ff70c90f..4394d3fe116a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2198,11 +2198,11 @@ SYSCALL_DEFINE6(io_pgetevents_time32,
 
 #if defined(CONFIG_COMPAT_32BIT_TIME)
 
-COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
-		       compat_long_t, min_nr,
-		       compat_long_t, nr,
-		       struct io_event __user *, events,
-		       struct old_timespec32 __user *, timeout)
+SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
+		__s32, min_nr,
+		__s32, nr,
+		struct io_event __user *, events,
+		struct old_timespec32 __user *, timeout)
 {
 	struct timespec64 t;
 	int ret;
diff --git a/fs/select.c b/fs/select.c
index d0f35dbc0e8f..6cbc9ff56ba0 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1379,7 +1379,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6_time64, int, n, compat_ulong_t __user *, inp,
 
 #if defined(CONFIG_COMPAT_32BIT_TIME)
 
-COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
+COMPAT_SYSCALL_DEFINE6(pselect6_time32, int, n, compat_ulong_t __user *, inp,
 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
 	struct old_timespec32 __user *, tsp, void __user *, sig)
 {
@@ -1402,7 +1402,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
 #endif
 
 #if defined(CONFIG_COMPAT_32BIT_TIME)
-COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
+COMPAT_SYSCALL_DEFINE5(ppoll_time32, struct pollfd __user *, ufds,
 	unsigned int,  nfds, struct old_timespec32 __user *, tsp,
 	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
 {
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 803ca070d42e..6a6fc8aa1de7 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -560,7 +560,7 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *,
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
+SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags,
 		const struct old_itimerspec32 __user *, utmr,
 		struct old_itimerspec32 __user *, otmr)
 {
@@ -577,7 +577,7 @@ COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
 	return ret;
 }
 
-COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
+SYSCALL_DEFINE2(timerfd_gettime32, int, ufd,
 		struct old_itimerspec32 __user *, otmr)
 {
 	struct itimerspec64 kotmr;
diff --git a/fs/utimes.c b/fs/utimes.c
index bdcf2daf39c1..350c9c16ace1 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -224,8 +224,8 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
  * of sys_utimes.
  */
 #ifdef __ARCH_WANT_SYS_UTIME32
-COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
-		       struct old_utimbuf32 __user *, t)
+SYSCALL_DEFINE2(utime32, const char __user *, filename,
+		struct old_utimbuf32 __user *, t)
 {
 	struct timespec64 tv[2];
 
@@ -240,7 +240,7 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
 }
 #endif
 
-COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
+SYSCALL_DEFINE4(utimensat_time32, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
 {
 	struct timespec64 tv[2];
 
@@ -276,14 +276,14 @@ static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
 	return do_utimes(dfd, filename, t ? tv : NULL, 0);
 }
 
-COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd,
+SYSCALL_DEFINE3(futimesat_time32, unsigned int, dfd,
 		       const char __user *, filename,
 		       struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(dfd, filename, t);
 }
 
-COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval32 __user *, t)
+SYSCALL_DEFINE2(utimes_time32, const char __user *, filename, struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(AT_FDCWD, filename, t);
 }
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 657ca6abd855..ebddcb6cfcf8 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -520,11 +520,6 @@ int __compat_save_altstack(compat_stack_t __user *, unsigned long);
 asmlinkage long compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p);
 asmlinkage long compat_sys_io_submit(compat_aio_context_t ctx_id, int nr,
 				     u32 __user *iocb);
-asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
-					compat_long_t min_nr,
-					compat_long_t nr,
-					struct io_event __user *events,
-					struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
 					compat_long_t min_nr,
 					compat_long_t nr,
@@ -617,7 +612,7 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
 				    compat_loff_t __user *offset, compat_size_t count);
 
 /* fs/select.c */
-asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
+asmlinkage long compat_sys_pselect6_time32(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *outp,
 				    compat_ulong_t __user *exp,
 				    struct old_timespec32 __user *tsp,
@@ -627,7 +622,7 @@ asmlinkage long compat_sys_pselect6_time64(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *exp,
 				    struct __kernel_timespec __user *tsp,
 				    void __user *sig);
-asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
+asmlinkage long compat_sys_ppoll_time32(struct pollfd __user *ufds,
 				 unsigned int nfds,
 				 struct old_timespec32 __user *tsp,
 				 const compat_sigset_t __user *sigmask,
@@ -657,19 +652,6 @@ asmlinkage long compat_sys_newfstat(unsigned int fd,
 
 /* fs/sync.c: No generic prototype for sync_file_range and sync_file_range2 */
 
-/* fs/timerfd.c */
-asmlinkage long compat_sys_timerfd_gettime(int ufd,
-				   struct old_itimerspec32 __user *otmr);
-asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
-				   const struct old_itimerspec32 __user *utmr,
-				   struct old_itimerspec32 __user *otmr);
-
-/* fs/utimes.c */
-asmlinkage long compat_sys_utimensat(unsigned int dfd,
-				     const char __user *filename,
-				     struct old_timespec32 __user *t,
-				     int flags);
-
 /* kernel/exit.c */
 asmlinkage long compat_sys_waitid(int, compat_pid_t,
 		struct compat_siginfo __user *, int,
@@ -678,9 +660,6 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t,
 
 
 /* kernel/futex.c */
-asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
-		struct old_timespec32 __user *utime, u32 __user *uaddr2,
-		u32 val3);
 asmlinkage long
 compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
 			   compat_size_t len);
@@ -688,10 +667,6 @@ asmlinkage long
 compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
 
-/* kernel/hrtimer.c */
-asmlinkage long compat_sys_nanosleep(struct old_timespec32 __user *rqtp,
-				     struct old_timespec32 __user *rmtp);
-
 /* kernel/itimer.c */
 asmlinkage long compat_sys_getitimer(int which,
 				     struct compat_itimerval __user *it);
@@ -709,20 +684,6 @@ asmlinkage long compat_sys_kexec_load(compat_ulong_t entry,
 asmlinkage long compat_sys_timer_create(clockid_t which_clock,
 			struct compat_sigevent __user *timer_event_spec,
 			timer_t __user *created_timer_id);
-asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
-				 struct old_itimerspec32 __user *setting);
-asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
-					 struct old_itimerspec32 __user *new,
-					 struct old_itimerspec32 __user *old);
-asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
-					 struct old_timespec32 __user *tp);
-asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
-					 struct old_timespec32 __user *tp);
-asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
-					struct old_timespec32 __user *tp);
-asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
-					   struct old_timespec32 __user *rqtp,
-					   struct old_timespec32 __user *rmtp);
 
 /* kernel/ptrace.c */
 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
@@ -735,8 +696,6 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
 asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
 				     unsigned int len,
 				     compat_ulong_t __user *user_mask_ptr);
-asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						 struct old_timespec32 __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
@@ -754,7 +713,7 @@ asmlinkage long compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set,
 					  compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
-asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
+asmlinkage long compat_sys_rt_sigtimedwait_time32(compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
 		struct old_timespec32 __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigtimedwait_time64(compat_sigset_t __user *uthese,
@@ -777,7 +736,6 @@ asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
 asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
-asmlinkage long compat_sys_adjtimex(struct old_timex32 __user *utp);
 
 /* kernel/timer.c */
 asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
@@ -786,14 +744,6 @@ asmlinkage long compat_sys_sysinfo(struct compat_sysinfo __user *info);
 asmlinkage long compat_sys_mq_open(const char __user *u_name,
 			int oflag, compat_mode_t mode,
 			struct compat_mq_attr __user *u_attr);
-asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
-			const char __user *u_msg_ptr,
-			compat_size_t msg_len, unsigned int msg_prio,
-			const struct old_timespec32 __user *u_abs_timeout);
-asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
-			char __user *u_msg_ptr,
-			compat_size_t msg_len, unsigned int __user *u_msg_prio,
-			const struct old_timespec32 __user *u_abs_timeout);
 asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
 			const struct compat_sigevent __user *u_notification);
 asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
@@ -809,8 +759,6 @@ asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp,
 
 /* ipc/sem.c */
 asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg);
-asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
-		unsigned nsems, const struct old_timespec32 __user *timeout);
 
 /* ipc/shm.c */
 asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr);
@@ -868,7 +816,7 @@ asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
 asmlinkage long compat_sys_recvmmsg_time64(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags,
 				    struct __kernel_timespec __user *timeout);
-asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+asmlinkage long compat_sys_recvmmsg_time32(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags,
 				    struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
@@ -879,8 +827,6 @@ asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32,
 asmlinkage long compat_sys_open_by_handle_at(int mountdirfd,
 					     struct file_handle __user *handle,
 					     int flags);
-asmlinkage long compat_sys_clock_adjtime(clockid_t which_clock,
-					 struct old_timex32 __user *tp);
 asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags);
 asmlinkage ssize_t compat_sys_process_vm_readv(compat_pid_t pid,
@@ -921,8 +867,6 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd,
 /* __ARCH_WANT_SYSCALL_NO_AT */
 asmlinkage long compat_sys_open(const char __user *filename, int flags,
 				umode_t mode);
-asmlinkage long compat_sys_utimes(const char __user *filename,
-				  struct old_timeval32 __user *t);
 
 /* __ARCH_WANT_SYSCALL_NO_FLAGS */
 asmlinkage long compat_sys_signalfd(int ufd,
@@ -936,12 +880,6 @@ asmlinkage long compat_sys_newlstat(const char __user *filename,
 				    struct compat_stat __user *statbuf);
 
 /* __ARCH_WANT_SYSCALL_DEPRECATED */
-asmlinkage long compat_sys_time(old_time32_t __user *tloc);
-asmlinkage long compat_sys_utime(const char __user *filename,
-				 struct old_utimbuf32 __user *t);
-asmlinkage long compat_sys_futimesat(unsigned int dfd,
-				     const char __user *filename,
-				     struct old_timeval32 __user *t);
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 		struct old_timeval32 __user *tvp);
@@ -976,9 +914,6 @@ asmlinkage long compat_sys_sigaction(int sig,
                                    struct compat_old_sigaction __user *oact);
 #endif
 
-/* obsolete: kernel/time/time.c */
-asmlinkage long compat_sys_stime(old_time32_t __user *tptr);
-
 /* obsolete: net/socket.c */
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 09330d5bda0c..94369f5bd8e5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -297,6 +297,11 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
 				long nr,
 				struct io_event __user *events,
 				struct __kernel_timespec __user *timeout);
+asmlinkage long sys_io_getevents_time32(__u32 ctx_id,
+				__s32 min_nr,
+				__s32 nr,
+				struct io_event __user *events,
+				struct old_timespec32 __user *timeout);
 asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
 				long min_nr,
 				long nr,
@@ -522,11 +527,19 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
 				    const struct __kernel_itimerspec __user *utmr,
 				    struct __kernel_itimerspec __user *otmr);
 asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *otmr);
+asmlinkage long sys_timerfd_gettime32(int ufd,
+				   struct old_itimerspec32 __user *otmr);
+asmlinkage long sys_timerfd_settime32(int ufd, int flags,
+				   const struct old_itimerspec32 __user *utmr,
+				   struct old_itimerspec32 __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
 				struct __kernel_timespec __user *utimes,
 				int flags);
+asmlinkage long sys_utimensat_time32(unsigned int dfd,
+				const char __user *filename,
+				struct old_timespec32 __user *t, int flags);
 
 /* kernel/acct.c */
 asmlinkage long sys_acct(const char __user *name);
@@ -555,6 +568,9 @@ asmlinkage long sys_unshare(unsigned long unshare_flags);
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			struct __kernel_timespec __user *utime, u32 __user *uaddr2,
 			u32 val3);
+asmlinkage long sys_futex_time32(u32 __user *uaddr, int op, u32 val,
+			struct old_timespec32 __user *utime, u32 __user *uaddr2,
+			u32 val3);
 asmlinkage long sys_get_robust_list(int pid,
 				    struct robust_list_head __user * __user *head_ptr,
 				    size_t __user *len_ptr);
@@ -564,6 +580,8 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 /* kernel/hrtimer.c */
 asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
 			      struct __kernel_timespec __user *rmtp);
+asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,
+				     struct old_timespec32 __user *rmtp);
 
 /* kernel/itimer.c */
 asmlinkage long sys_getitimer(int which, struct itimerval __user *value);
@@ -602,6 +620,20 @@ asmlinkage long sys_clock_getres(clockid_t which_clock,
 asmlinkage long sys_clock_nanosleep(clockid_t which_clock, int flags,
 				const struct __kernel_timespec __user *rqtp,
 				struct __kernel_timespec __user *rmtp);
+asmlinkage long sys_timer_gettime32(timer_t timer_id,
+				 struct old_itimerspec32 __user *setting);
+asmlinkage long sys_timer_settime32(timer_t timer_id, int flags,
+					 struct old_itimerspec32 __user *new,
+					 struct old_itimerspec32 __user *old);
+asmlinkage long sys_clock_settime32(clockid_t which_clock,
+				struct old_timespec32 __user *tp);
+asmlinkage long sys_clock_gettime32(clockid_t which_clock,
+				struct old_timespec32 __user *tp);
+asmlinkage long sys_clock_getres_time32(clockid_t which_clock,
+				struct old_timespec32 __user *tp);
+asmlinkage long sys_clock_nanosleep_time32(clockid_t which_clock, int flags,
+				struct old_timespec32 __user *rqtp,
+				struct old_timespec32 __user *rmtp);
 
 /* kernel/printk.c */
 asmlinkage long sys_syslog(int type, char __user *buf, int len);
@@ -627,6 +659,8 @@ asmlinkage long sys_sched_get_priority_max(int policy);
 asmlinkage long sys_sched_get_priority_min(int policy);
 asmlinkage long sys_sched_rr_get_interval(pid_t pid,
 				struct __kernel_timespec __user *interval);
+asmlinkage long sys_sched_rr_get_interval_time32(pid_t pid,
+						 struct old_timespec32 __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long sys_restart_syscall(void);
@@ -696,6 +730,7 @@ asmlinkage long sys_gettimeofday(struct timeval __user *tv,
 asmlinkage long sys_settimeofday(struct timeval __user *tv,
 				struct timezone __user *tz);
 asmlinkage long sys_adjtimex(struct __kernel_timex __user *txc_p);
+asmlinkage long sys_adjtimex_time32(struct old_timex32 __user *txc_p);
 
 /* kernel/timer.c */
 asmlinkage long sys_getpid(void);
@@ -714,6 +749,14 @@ asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t
 asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct __kernel_timespec __user *abs_timeout);
 asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification);
 asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat);
+asmlinkage long sys_mq_timedreceive_time32(mqd_t mqdes,
+			char __user *u_msg_ptr,
+			unsigned int msg_len, unsigned int __user *u_msg_prio,
+			const struct old_timespec32 __user *u_abs_timeout);
+asmlinkage long sys_mq_timedsend_time32(mqd_t mqdes,
+			const char __user *u_msg_ptr,
+			unsigned int msg_len, unsigned int msg_prio,
+			const struct old_timespec32 __user *u_abs_timeout);
 
 /* ipc/msg.c */
 asmlinkage long sys_msgget(key_t key, int msgflg);
@@ -731,6 +774,9 @@ asmlinkage long sys_old_semctl(int semid, int semnum, int cmd, unsigned long arg
 asmlinkage long sys_semtimedop(int semid, struct sembuf __user *sops,
 				unsigned nsops,
 				const struct __kernel_timespec __user *timeout);
+asmlinkage long sys_semtimedop_time32(int semid, struct sembuf __user *sops,
+				unsigned nsops,
+				const struct old_timespec32 __user *timeout);
 asmlinkage long sys_semop(int semid, struct sembuf __user *sops,
 				unsigned nsops);
 
@@ -871,6 +917,8 @@ asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      int flags);
 asmlinkage long sys_clock_adjtime(clockid_t which_clock,
 				struct __kernel_timex __user *tx);
+asmlinkage long sys_clock_adjtime32(clockid_t which_clock,
+				struct old_timex32 __user *tx);
 asmlinkage long sys_syncfs(int fd);
 asmlinkage long sys_setns(int fd, int nstype);
 asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
@@ -1006,6 +1054,7 @@ asmlinkage long sys_alarm(unsigned int seconds);
 asmlinkage long sys_getpgrp(void);
 asmlinkage long sys_pause(void);
 asmlinkage long sys_time(time_t __user *tloc);
+asmlinkage long sys_time32(old_time32_t __user *tloc);
 #ifdef __ARCH_WANT_SYS_UTIME
 asmlinkage long sys_utime(char __user *filename,
 				struct utimbuf __user *times);
@@ -1014,6 +1063,13 @@ asmlinkage long sys_utimes(char __user *filename,
 asmlinkage long sys_futimesat(int dfd, const char __user *filename,
 			      struct timeval __user *utimes);
 #endif
+asmlinkage long sys_futimesat_time32(unsigned int dfd,
+				     const char __user *filename,
+				     struct old_timeval32 __user *t);
+asmlinkage long sys_utime32(const char __user *filename,
+				 struct old_utimbuf32 __user *t);
+asmlinkage long sys_utimes_time32(const char __user *filename,
+				  struct old_timeval32 __user *t);
 asmlinkage long sys_creat(const char __user *pathname, umode_t mode);
 asmlinkage long sys_getdents(unsigned int fd,
 				struct linux_dirent __user *dirent,
@@ -1038,6 +1094,7 @@ asmlinkage long sys_fork(void);
 
 /* obsolete: kernel/time/time.c */
 asmlinkage long sys_stime(time_t __user *tptr);
+asmlinkage long sys_stime32(old_time32_t __user *tptr);
 
 /* obsolete: kernel/signal.c */
 asmlinkage long sys_sigpending(old_sigset_t __user *uset);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 509484dbfd5d..153b55b94234 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -39,7 +39,7 @@ __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
 #define __NR_io_getevents 4
-__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
+__SC_COMP(__NR_io_getevents, sys_io_getevents, sys_io_getevents_time32)
 
 /* fs/xattr.c */
 #define __NR_setxattr 5
@@ -223,9 +223,9 @@ __SYSCALL(__NR3264_sendfile, sys_sendfile64)
 
 /* fs/select.c */
 #define __NR_pselect6 72
-__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
+__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6_time32)
 #define __NR_ppoll 73
-__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
+__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll_time32)
 
 /* fs/signalfd.c */
 #define __NR_signalfd4 74
@@ -271,14 +271,14 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_timerfd_settime 86
 __SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
-	  compat_sys_timerfd_settime)
+	  sys_timerfd_settime32)
 #define __NR_timerfd_gettime 87
 __SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
-	  compat_sys_timerfd_gettime)
+	  sys_timerfd_gettime32)
 
 /* fs/utimes.c */
 #define __NR_utimensat 88
-__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
+__SC_COMP(__NR_utimensat, sys_utimensat, sys_utimensat_time32)
 
 /* kernel/acct.c */
 #define __NR_acct 89
@@ -310,7 +310,7 @@ __SYSCALL(__NR_unshare, sys_unshare)
 
 /* kernel/futex.c */
 #define __NR_futex 98
-__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
+__SC_COMP(__NR_futex, sys_futex, sys_futex_time32)
 #define __NR_set_robust_list 99
 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 	  compat_sys_set_robust_list)
@@ -320,7 +320,7 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
 
 /* kernel/hrtimer.c */
 #define __NR_nanosleep 101
-__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
+__SC_COMP(__NR_nanosleep, sys_nanosleep, sys_nanosleep_time32)
 
 /* kernel/itimer.c */
 #define __NR_getitimer 102
@@ -342,22 +342,22 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
 #define __NR_timer_create 107
 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
 #define __NR_timer_gettime 108
-__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
+__SC_COMP(__NR_timer_gettime, sys_timer_gettime, sys_timer_gettime32)
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
 #define __NR_timer_settime 110
-__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
+__SC_COMP(__NR_timer_settime, sys_timer_settime, sys_timer_settime32)
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
 #define __NR_clock_settime 112
-__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
+__SC_COMP(__NR_clock_settime, sys_clock_settime, sys_clock_settime32)
 #define __NR_clock_gettime 113
-__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
+__SC_COMP(__NR_clock_gettime, sys_clock_gettime, sys_clock_gettime32)
 #define __NR_clock_getres 114
-__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
+__SC_COMP(__NR_clock_getres, sys_clock_getres, sys_clock_getres_time32)
 #define __NR_clock_nanosleep 115
 __SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
-	  compat_sys_clock_nanosleep)
+	  sys_clock_nanosleep_time32)
 
 /* kernel/printk.c */
 #define __NR_syslog 116
@@ -390,7 +390,7 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
 #define __NR_sched_rr_get_interval 127
 __SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
-	  compat_sys_sched_rr_get_interval)
+	  sys_sched_rr_get_interval_time32)
 
 /* kernel/signal.c */
 #define __NR_restart_syscall 128
@@ -413,7 +413,7 @@ __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
 #define __NR_rt_sigtimedwait 137
 __SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
-	  compat_sys_rt_sigtimedwait)
+	  compat_sys_rt_sigtimedwait_time32)
 #define __NR_rt_sigqueueinfo 138
 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
 	  compat_sys_rt_sigqueueinfo)
@@ -486,7 +486,7 @@ __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
 #define __NR_settimeofday 170
 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 #define __NR_adjtimex 171
-__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
+__SC_COMP(__NR_adjtimex, sys_adjtimex, sys_adjtimex_time32)
 
 /* kernel/timer.c */
 #define __NR_getpid 172
@@ -512,10 +512,10 @@ __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
 #define __NR_mq_timedsend 182
-__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
+__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, sys_mq_timedsend_time32)
 #define __NR_mq_timedreceive 183
 __SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
-	  compat_sys_mq_timedreceive)
+	  sys_mq_timedreceive_time32)
 #define __NR_mq_notify 184
 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
@@ -537,7 +537,7 @@ __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #define __NR_semtimedop 192
-__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
+__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
 
@@ -659,7 +659,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_recvmmsg 243
-__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
+__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg_time32)
 
 /*
  * Architectures may provide up to 16 syscalls of their own
@@ -681,7 +681,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 __SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
 	  compat_sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
-__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
+__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, sys_clock_adjtime32)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_setns 268
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c595bed7bfcb..c839bf83231d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1471,10 +1471,10 @@ static int compat_prepare_timeout(const struct old_timespec32 __user *p,
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
-		       const char __user *, u_msg_ptr,
-		       compat_size_t, msg_len, unsigned int, msg_prio,
-		       const struct old_timespec32 __user *, u_abs_timeout)
+SYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
+		const char __user *, u_msg_ptr,
+		unsigned int, msg_len, unsigned int, msg_prio,
+		const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
@@ -1486,10 +1486,10 @@ COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
 	return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
 }
 
-COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes,
-		       char __user *, u_msg_ptr,
-		       compat_size_t, msg_len, unsigned int __user *, u_msg_prio,
-		       const struct old_timespec32 __user *, u_abs_timeout)
+SYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
+		char __user *, u_msg_ptr,
+		unsigned int, msg_len, unsigned int __user *, u_msg_prio,
+		const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
diff --git a/ipc/sem.c b/ipc/sem.c
index d1efff3a81bb..80909464acff 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -2250,7 +2250,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 	return do_semtimedop(semid, tsems, nsops, NULL);
 }
 
-COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
+SYSCALL_DEFINE4(semtimedop_time32, int, semid, struct sembuf __user *, tsems,
 		       unsigned int, nsops,
 		       const struct old_timespec32 __user *, timeout)
 {
diff --git a/kernel/futex.c b/kernel/futex.c
index be3bff2315ff..caead6c113d4 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -3812,7 +3812,7 @@ err_unlock:
 #endif /* CONFIG_COMPAT */
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
 		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
 		u32, val3)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db2f29..62862419cd05 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5252,9 +5252,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
-		       compat_pid_t, pid,
-		       struct old_timespec32 __user *, interval)
+SYSCALL_DEFINE2(sched_rr_get_interval_time32, pid_t, pid,
+		struct old_timespec32 __user *, interval)
 {
 	struct timespec64 t;
 	int retval = sched_rr_get_interval(pid, &t);
diff --git a/kernel/signal.c b/kernel/signal.c
index e1d7ad8e6ab1..af27629918cf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3397,7 +3397,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time64, compat_sigset_t __user *, uthese,
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
+COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait_time32, compat_sigset_t __user *, uthese,
 		struct compat_siginfo __user *, uinfo,
 		struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
 {
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index ce04431a40d1..85e5ccec0955 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -42,9 +42,11 @@ COND_SYSCALL(io_destroy);
 COND_SYSCALL(io_submit);
 COND_SYSCALL_COMPAT(io_submit);
 COND_SYSCALL(io_cancel);
+COND_SYSCALL(io_getevents_time32);
 COND_SYSCALL(io_getevents);
+COND_SYSCALL(io_pgetevents_time32);
 COND_SYSCALL(io_pgetevents);
-COND_SYSCALL_COMPAT(io_getevents);
+COND_SYSCALL_COMPAT(io_pgetevents_time32);
 COND_SYSCALL_COMPAT(io_pgetevents);
 
 /* fs/xattr.c */
@@ -114,9 +116,9 @@ COND_SYSCALL_COMPAT(signalfd4);
 /* fs/timerfd.c */
 COND_SYSCALL(timerfd_create);
 COND_SYSCALL(timerfd_settime);
-COND_SYSCALL_COMPAT(timerfd_settime);
+COND_SYSCALL(timerfd_settime32);
 COND_SYSCALL(timerfd_gettime);
-COND_SYSCALL_COMPAT(timerfd_gettime);
+COND_SYSCALL(timerfd_gettime32);
 
 /* fs/utimes.c */
 
@@ -135,7 +137,7 @@ COND_SYSCALL(capset);
 
 /* kernel/futex.c */
 COND_SYSCALL(futex);
-COND_SYSCALL_COMPAT(futex);
+COND_SYSCALL(futex_time32);
 COND_SYSCALL(set_robust_list);
 COND_SYSCALL_COMPAT(set_robust_list);
 COND_SYSCALL(get_robust_list);
@@ -187,9 +189,9 @@ COND_SYSCALL(mq_open);
 COND_SYSCALL_COMPAT(mq_open);
 COND_SYSCALL(mq_unlink);
 COND_SYSCALL(mq_timedsend);
-COND_SYSCALL_COMPAT(mq_timedsend);
+COND_SYSCALL(mq_timedsend_time32);
 COND_SYSCALL(mq_timedreceive);
-COND_SYSCALL_COMPAT(mq_timedreceive);
+COND_SYSCALL(mq_timedreceive_time32);
 COND_SYSCALL(mq_notify);
 COND_SYSCALL_COMPAT(mq_notify);
 COND_SYSCALL(mq_getsetattr);
@@ -211,7 +213,7 @@ COND_SYSCALL(old_semctl);
 COND_SYSCALL(semctl);
 COND_SYSCALL_COMPAT(semctl);
 COND_SYSCALL(semtimedop);
-COND_SYSCALL_COMPAT(semtimedop);
+COND_SYSCALL(semtimedop_time32);
 COND_SYSCALL(semop);
 
 /* ipc/shm.c */
@@ -288,7 +290,7 @@ COND_SYSCALL(perf_event_open);
 COND_SYSCALL(accept4);
 COND_SYSCALL(recvmmsg);
 COND_SYSCALL(recvmmsg_time32);
-COND_SYSCALL_COMPAT(recvmmsg);
+COND_SYSCALL_COMPAT(recvmmsg_time32);
 COND_SYSCALL_COMPAT(recvmmsg_time64);
 
 /*
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index f5cfa1b73d6f..0f5f96075110 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1771,7 +1771,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct old_timespec32 __user *, rqtp,
+SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
 		       struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 tu;
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index a51895486e5e..67df65f887ac 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -45,6 +45,7 @@ SYS_NI(timer_delete);
 SYS_NI(clock_adjtime);
 SYS_NI(getitimer);
 SYS_NI(setitimer);
+SYS_NI(clock_adjtime32);
 #ifdef __ARCH_WANT_SYS_ALARM
 SYS_NI(alarm);
 #endif
@@ -150,16 +151,16 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 
 #ifdef CONFIG_COMPAT
 COMPAT_SYS_NI(timer_create);
-COMPAT_SYS_NI(clock_adjtime);
-COMPAT_SYS_NI(timer_settime);
-COMPAT_SYS_NI(timer_gettime);
 COMPAT_SYS_NI(getitimer);
 COMPAT_SYS_NI(setitimer);
 #endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYS_NI(timer_settime32);
+SYS_NI(timer_gettime32);
+
+SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	struct timespec64 new_tp;
 
@@ -171,8 +172,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
 	return do_sys_settimeofday64(&new_tp, NULL);
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	int ret;
 	struct timespec64 kernel_tp;
@@ -186,8 +187,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	struct timespec64 rtn_tp = {
 		.tv_sec = 0,
@@ -206,9 +207,9 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 	}
 }
 
-COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct old_timespec32 __user *, rqtp,
-		       struct old_timespec32 __user *, rmtp)
+SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+		struct old_timespec32 __user *, rqtp,
+		struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 t;
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index de79f85ae14f..29176635991f 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -730,8 +730,8 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		       struct old_itimerspec32 __user *, setting)
+SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
+		struct old_itimerspec32 __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
@@ -903,9 +903,9 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		       struct old_itimerspec32 __user *, new,
-		       struct old_itimerspec32 __user *, old)
+SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
+		struct old_itimerspec32 __user *, new,
+		struct old_itimerspec32 __user *, old)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old ? &old_spec : NULL;
@@ -1096,8 +1096,8 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1111,8 +1111,8 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
 	return kc->clock_set(which_clock, &ts);
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1129,8 +1129,8 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	return err;
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
-		       struct old_timex32 __user *, utp)
+SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
+		struct old_timex32 __user *, utp)
 {
 	struct __kernel_timex ktx;
 	int err;
@@ -1147,8 +1147,8 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 	return err;
 }
 
-COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct old_timespec32 __user *, tp)
+SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
+		struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1204,9 +1204,9 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct old_timespec32 __user *, rqtp,
-		       struct old_timespec32 __user *, rmtp)
+SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+		struct old_timespec32 __user *, rqtp,
+		struct old_timespec32 __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 t;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 78b5c8f1495a..6261f969dcb7 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -98,11 +98,11 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
 
 #endif /* __ARCH_WANT_SYS_TIME */
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* old_time32_t is a 32 bit "long" and needs to get converted. */
-COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc)
+SYSCALL_DEFINE1(time32, old_time32_t __user *, tloc)
 {
 	old_time32_t i;
 
@@ -116,7 +116,7 @@ COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc)
 	return i;
 }
 
-COMPAT_SYSCALL_DEFINE1(stime, old_time32_t __user *, tptr)
+SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr)
 {
 	struct timespec64 tv;
 	int err;
@@ -344,7 +344,7 @@ int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE1(adjtimex, struct old_timex32 __user *, utp)
+SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp)
 {
 	struct __kernel_timex txc;
 	int err, ret;
diff --git a/net/compat.c b/net/compat.c
index 959d1c51826d..2fef7b9db434 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -822,7 +822,7 @@ COMPAT_SYSCALL_DEFINE5(recvmmsg_time64, int, fd, struct compat_mmsghdr __user *,
 }
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
+COMPAT_SYSCALL_DEFINE5(recvmmsg_time32, int, fd, struct compat_mmsghdr __user *, mmsg,
 		       unsigned int, vlen, unsigned int, flags,
 		       struct old_timespec32 __user *, timeout)
 {
-- 
cgit v1.2.3


From 00bf25d693e7f69497cb7f61d46ef99fe295a8a5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 1 Jan 2019 01:13:32 +0100
Subject: y2038: use time32 syscall names on 32-bit

This is the big flip, where all 32-bit architectures set COMPAT_32BIT_TIME
and use the _time32 system calls from the former compat layer instead
of the system calls that take __kernel_timespec and similar arguments.

The temporary redirects for __kernel_timespec, __kernel_itimerspec
and __kernel_timex can get removed with this.

It would be easy to split this commit by architecture, but with the new
generated system call tables, it's easy enough to do it all at once,
which makes it a little easier to check that the changes are the same
in each table.

Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/Kconfig                                |  2 +-
 arch/arm/kernel/sys_oabi-compat.c           |  8 +--
 arch/arm/tools/syscall.tbl                  | 46 ++++++++--------
 arch/m68k/kernel/syscalls/syscall.tbl       | 42 +++++++--------
 arch/microblaze/kernel/syscalls/syscall.tbl | 46 ++++++++--------
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 44 ++++++++--------
 arch/parisc/kernel/syscalls/syscall.tbl     | 69 ++++++++++++++++--------
 arch/powerpc/kernel/syscalls/syscall.tbl    | 82 +++++++++++++++++++++--------
 arch/sh/kernel/syscalls/syscall.tbl         | 42 +++++++--------
 arch/sparc/kernel/syscalls/syscall.tbl      | 64 ++++++++++++++--------
 arch/x86/entry/syscalls/syscall_32.tbl      | 44 ++++++++--------
 arch/xtensa/kernel/syscalls/syscall.tbl     | 44 ++++++++--------
 include/uapi/asm-generic/unistd.h           | 56 ++++++++++----------
 13 files changed, 335 insertions(+), 254 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 4cfb6de48f79..46db715a7f42 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -759,7 +759,7 @@ config 64BIT_TIME
 	  handling.
 
 config COMPAT_32BIT_TIME
-	def_bool (!64BIT && 64BIT_TIME) || COMPAT
+	def_bool !64BIT || COMPAT
 	help
 	  This enables 32 bit time_t support in addition to 64 bit time_t support.
 	  This is relevant on all 32-bit architectures, and 64-bit architectures
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 92ab36f38795..acd054a42ba2 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -317,10 +317,10 @@ struct oabi_sembuf {
 asmlinkage long sys_oabi_semtimedop(int semid,
 				    struct oabi_sembuf __user *tsops,
 				    unsigned nsops,
-				    const struct timespec __user *timeout)
+				    const struct old_timespec32 __user *timeout)
 {
 	struct sembuf *sops;
-	struct timespec local_timeout;
+	struct old_timespec32 local_timeout;
 	long err;
 	int i;
 
@@ -350,7 +350,7 @@ asmlinkage long sys_oabi_semtimedop(int semid,
 	} else {
 		mm_segment_t fs = get_fs();
 		set_fs(KERNEL_DS);
-		err = sys_semtimedop(semid, sops, nsops, timeout);
+		err = sys_semtimedop_time32(semid, sops, nsops, timeout);
 		set_fs(fs);
 	}
 	kfree(sops);
@@ -375,7 +375,7 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
 		return  sys_oabi_semtimedop(first,
 					    (struct oabi_sembuf __user *)ptr,
 					    second,
-					    (const struct timespec __user *)fifth);
+					    (const struct old_timespec32 __user *)fifth);
 	default:
 		return sys_ipc(call, first, second, third, ptr, fifth);
 	}
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index b54b7f2bc24a..200f4b878a46 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -137,7 +137,7 @@
 121	common	setdomainname		sys_setdomainname
 122	common	uname			sys_newuname
 # 123 was sys_modify_ldt
-124	common	adjtimex		sys_adjtimex
+124	common	adjtimex		sys_adjtimex_time32
 125	common	mprotect		sys_mprotect
 126	common	sigprocmask		sys_sigprocmask
 # 127 was sys_create_module
@@ -174,8 +174,8 @@
 158	common	sched_yield		sys_sched_yield
 159	common	sched_get_priority_max	sys_sched_get_priority_max
 160	common	sched_get_priority_min	sys_sched_get_priority_min
-161	common	sched_rr_get_interval	sys_sched_rr_get_interval
-162	common	nanosleep		sys_nanosleep
+161	common	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162	common	nanosleep		sys_nanosleep_time32
 163	common	mremap			sys_mremap
 164	common	setresuid		sys_setresuid16
 165	common	getresuid		sys_getresuid16
@@ -190,7 +190,7 @@
 174	common	rt_sigaction		sys_rt_sigaction
 175	common	rt_sigprocmask		sys_rt_sigprocmask
 176	common	rt_sigpending		sys_rt_sigpending
-177	common	rt_sigtimedwait		sys_rt_sigtimedwait
+177	common	rt_sigtimedwait		sys_rt_sigtimedwait_time32
 178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo
 179	common	rt_sigsuspend		sys_rt_sigsuspend
 180	common	pread64			sys_pread64		sys_oabi_pread64
@@ -254,12 +254,12 @@
 237	common	fremovexattr		sys_fremovexattr
 238	common	tkill			sys_tkill
 239	common	sendfile64		sys_sendfile64
-240	common	futex			sys_futex
+240	common	futex			sys_futex_time32
 241	common	sched_setaffinity	sys_sched_setaffinity
 242	common	sched_getaffinity	sys_sched_getaffinity
 243	common	io_setup		sys_io_setup
 244	common	io_destroy		sys_io_destroy
-245	common	io_getevents		sys_io_getevents
+245	common	io_getevents		sys_io_getevents_time32
 246	common	io_submit		sys_io_submit
 247	common	io_cancel		sys_io_cancel
 248	common	exit_group		sys_exit_group
@@ -272,14 +272,14 @@
 # 255 for get_thread_area
 256	common	set_tid_address		sys_set_tid_address
 257	common	timer_create		sys_timer_create
-258	common	timer_settime		sys_timer_settime
-259	common	timer_gettime		sys_timer_gettime
+258	common	timer_settime		sys_timer_settime32
+259	common	timer_gettime		sys_timer_gettime32
 260	common	timer_getoverrun	sys_timer_getoverrun
 261	common	timer_delete		sys_timer_delete
-262	common	clock_settime		sys_clock_settime
-263	common	clock_gettime		sys_clock_gettime
-264	common	clock_getres		sys_clock_getres
-265	common	clock_nanosleep		sys_clock_nanosleep
+262	common	clock_settime		sys_clock_settime32
+263	common	clock_gettime		sys_clock_gettime32
+264	common	clock_getres		sys_clock_getres_time32
+265	common	clock_nanosleep		sys_clock_nanosleep_time32
 266	common	statfs64		sys_statfs64_wrapper
 267	common	fstatfs64		sys_fstatfs64_wrapper
 268	common	tgkill			sys_tgkill
@@ -290,8 +290,8 @@
 273	common	pciconfig_write		sys_pciconfig_write
 274	common	mq_open			sys_mq_open
 275	common	mq_unlink		sys_mq_unlink
-276	common	mq_timedsend		sys_mq_timedsend
-277	common	mq_timedreceive		sys_mq_timedreceive
+276	common	mq_timedsend		sys_mq_timedsend_time32
+277	common	mq_timedreceive		sys_mq_timedreceive_time32
 278	common	mq_notify		sys_mq_notify
 279	common	mq_getsetattr		sys_mq_getsetattr
 280	common	waitid			sys_waitid
@@ -326,7 +326,7 @@
 309	common	add_key			sys_add_key
 310	common	request_key		sys_request_key
 311	common	keyctl			sys_keyctl
-312	common	semtimedop		sys_semtimedop		sys_oabi_semtimedop
+312	common	semtimedop		sys_semtimedop_time32	sys_oabi_semtimedop
 313	common	vserver
 314	common	ioprio_set		sys_ioprio_set
 315	common	ioprio_get		sys_ioprio_get
@@ -349,8 +349,8 @@
 332	common	readlinkat		sys_readlinkat
 333	common	fchmodat		sys_fchmodat
 334	common	faccessat		sys_faccessat
-335	common	pselect6		sys_pselect6
-336	common	ppoll			sys_ppoll
+335	common	pselect6		sys_pselect6_time32
+336	common	ppoll			sys_ppoll_time32
 337	common	unshare			sys_unshare
 338	common	set_robust_list		sys_set_robust_list
 339	common	get_robust_list		sys_get_robust_list
@@ -362,13 +362,13 @@
 345	common	getcpu			sys_getcpu
 346	common	epoll_pwait		sys_epoll_pwait
 347	common	kexec_load		sys_kexec_load
-348	common	utimensat		sys_utimensat
+348	common	utimensat		sys_utimensat_time32
 349	common	signalfd		sys_signalfd
 350	common	timerfd_create		sys_timerfd_create
 351	common	eventfd			sys_eventfd
 352	common	fallocate		sys_fallocate
-353	common	timerfd_settime		sys_timerfd_settime
-354	common	timerfd_gettime		sys_timerfd_gettime
+353	common	timerfd_settime		sys_timerfd_settime32
+354	common	timerfd_gettime		sys_timerfd_gettime32
 355	common	signalfd4		sys_signalfd4
 356	common	eventfd2		sys_eventfd2
 357	common	epoll_create1		sys_epoll_create1
@@ -379,14 +379,14 @@
 362	common	pwritev			sys_pwritev
 363	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo
 364	common	perf_event_open		sys_perf_event_open
-365	common	recvmmsg		sys_recvmmsg
+365	common	recvmmsg		sys_recvmmsg_time32
 366	common	accept4			sys_accept4
 367	common	fanotify_init		sys_fanotify_init
 368	common	fanotify_mark		sys_fanotify_mark
 369	common	prlimit64		sys_prlimit64
 370	common	name_to_handle_at	sys_name_to_handle_at
 371	common	open_by_handle_at	sys_open_by_handle_at
-372	common	clock_adjtime		sys_clock_adjtime
+372	common	clock_adjtime		sys_clock_adjtime32
 373	common	syncfs			sys_syncfs
 374	common	sendmmsg		sys_sendmmsg
 375	common	setns			sys_setns
@@ -413,6 +413,6 @@
 396	common	pkey_free		sys_pkey_free
 397	common	statx			sys_statx
 398	common	rseq			sys_rseq
-399	common	io_pgetevents		sys_io_pgetevents
+399	common	io_pgetevents		sys_io_pgetevents_time32
 400	common	migrate_pages		sys_migrate_pages
 401	common	kexec_file_load		sys_kexec_file_load
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index bffffb202f8a..01529dce1d2e 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -131,7 +131,7 @@
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	cacheflush			sys_cacheflush
-124	common	adjtimex			sys_adjtimex
+124	common	adjtimex			sys_adjtimex_time32
 125	common	mprotect			sys_mprotect
 126	common	sigprocmask			sys_sigprocmask
 127	common	create_module			sys_ni_syscall
@@ -168,8 +168,8 @@
 158	common	sched_yield			sys_sched_yield
 159	common	sched_get_priority_max		sys_sched_get_priority_max
 160	common	sched_get_priority_min		sys_sched_get_priority_min
-161	common	sched_rr_get_interval		sys_sched_rr_get_interval
-162	common	nanosleep			sys_nanosleep
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+162	common	nanosleep			sys_nanosleep_time32
 163	common	mremap				sys_mremap
 164	common	setresuid			sys_setresuid16
 165	common	getresuid			sys_getresuid16
@@ -184,7 +184,7 @@
 174	common	rt_sigaction			sys_rt_sigaction
 175	common	rt_sigprocmask			sys_rt_sigprocmask
 176	common	rt_sigpending			sys_rt_sigpending
-177	common	rt_sigtimedwait			sys_rt_sigtimedwait
+177	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
 178	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
 179	common	rt_sigsuspend			sys_rt_sigsuspend
 180	common	pread64				sys_pread64
@@ -242,7 +242,7 @@
 232	common	removexattr			sys_removexattr
 233	common	lremovexattr			sys_lremovexattr
 234	common	fremovexattr			sys_fremovexattr
-235	common	futex				sys_futex
+235	common	futex				sys_futex_time32
 236	common	sendfile64			sys_sendfile64
 237	common	mincore				sys_mincore
 238	common	madvise				sys_madvise
@@ -250,7 +250,7 @@
 240	common	readahead			sys_readahead
 241	common	io_setup			sys_io_setup
 242	common	io_destroy			sys_io_destroy
-243	common	io_getevents			sys_io_getevents
+243	common	io_getevents			sys_io_getevents_time32
 244	common	io_submit			sys_io_submit
 245	common	io_cancel			sys_io_cancel
 246	common	fadvise64			sys_fadvise64
@@ -262,14 +262,14 @@
 252	common	remap_file_pages		sys_remap_file_pages
 253	common	set_tid_address			sys_set_tid_address
 254	common	timer_create			sys_timer_create
-255	common	timer_settime			sys_timer_settime
-256	common	timer_gettime			sys_timer_gettime
+255	common	timer_settime			sys_timer_settime32
+256	common	timer_gettime			sys_timer_gettime32
 257	common	timer_getoverrun		sys_timer_getoverrun
 258	common	timer_delete			sys_timer_delete
-259	common	clock_settime			sys_clock_settime
-260	common	clock_gettime			sys_clock_gettime
-261	common	clock_getres			sys_clock_getres
-262	common	clock_nanosleep			sys_clock_nanosleep
+259	common	clock_settime			sys_clock_settime32
+260	common	clock_gettime			sys_clock_gettime32
+261	common	clock_getres			sys_clock_getres_time32
+262	common	clock_nanosleep			sys_clock_nanosleep_time32
 263	common	statfs64			sys_statfs64
 264	common	fstatfs64			sys_fstatfs64
 265	common	tgkill				sys_tgkill
@@ -280,8 +280,8 @@
 270	common	set_mempolicy			sys_set_mempolicy
 271	common	mq_open				sys_mq_open
 272	common	mq_unlink			sys_mq_unlink
-273	common	mq_timedsend			sys_mq_timedsend
-274	common	mq_timedreceive			sys_mq_timedreceive
+273	common	mq_timedsend			sys_mq_timedsend_time32
+274	common	mq_timedreceive			sys_mq_timedreceive_time32
 275	common	mq_notify			sys_mq_notify
 276	common	mq_getsetattr			sys_mq_getsetattr
 277	common	waitid				sys_waitid
@@ -308,8 +308,8 @@
 298	common	readlinkat			sys_readlinkat
 299	common	fchmodat			sys_fchmodat
 300	common	faccessat			sys_faccessat
-301	common	pselect6			sys_pselect6
-302	common	ppoll				sys_ppoll
+301	common	pselect6			sys_pselect6_time32
+302	common	ppoll				sys_ppoll_time32
 303	common	unshare				sys_unshare
 304	common	set_robust_list			sys_set_robust_list
 305	common	get_robust_list			sys_get_robust_list
@@ -323,13 +323,13 @@
 313	common	kexec_load			sys_kexec_load
 314	common	getcpu				sys_getcpu
 315	common	epoll_pwait			sys_epoll_pwait
-316	common	utimensat			sys_utimensat
+316	common	utimensat			sys_utimensat_time32
 317	common	signalfd			sys_signalfd
 318	common	timerfd_create			sys_timerfd_create
 319	common	eventfd				sys_eventfd
 320	common	fallocate			sys_fallocate
-321	common	timerfd_settime			sys_timerfd_settime
-322	common	timerfd_gettime			sys_timerfd_gettime
+321	common	timerfd_settime			sys_timerfd_settime32
+322	common	timerfd_gettime			sys_timerfd_gettime32
 323	common	signalfd4			sys_signalfd4
 324	common	eventfd2			sys_eventfd2
 325	common	epoll_create1			sys_epoll_create1
@@ -349,7 +349,7 @@
 339	common	prlimit64			sys_prlimit64
 340	common	name_to_handle_at		sys_name_to_handle_at
 341	common	open_by_handle_at		sys_open_by_handle_at
-342	common	clock_adjtime			sys_clock_adjtime
+342	common	clock_adjtime			sys_clock_adjtime32
 343	common	syncfs				sys_syncfs
 344	common	setns				sys_setns
 345	common	process_vm_readv		sys_process_vm_readv
@@ -378,7 +378,7 @@
 368	common	recvfrom			sys_recvfrom
 369	common	recvmsg				sys_recvmsg
 370	common	shutdown			sys_shutdown
-371	common	recvmmsg			sys_recvmmsg
+371	common	recvmmsg			sys_recvmmsg_time32
 372	common	sendmmsg			sys_sendmmsg
 373	common	userfaultfd			sys_userfaultfd
 374	common	membarrier			sys_membarrier
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 7cc0f9554da3..492ff5c35b68 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -131,7 +131,7 @@
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
-124	common	adjtimex			sys_adjtimex
+124	common	adjtimex			sys_adjtimex_time32
 125	common	mprotect			sys_mprotect
 126	common	sigprocmask			sys_sigprocmask
 127	common	create_module			sys_ni_syscall
@@ -168,8 +168,8 @@
 158	common	sched_yield			sys_sched_yield
 159	common	sched_get_priority_max		sys_sched_get_priority_max
 160	common	sched_get_priority_min		sys_sched_get_priority_min
-161	common	sched_rr_get_interval		sys_sched_rr_get_interval
-162	common	nanosleep			sys_nanosleep
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+162	common	nanosleep			sys_nanosleep_time32
 163	common	mremap				sys_mremap
 164	common	setresuid			sys_setresuid
 165	common	getresuid			sys_getresuid
@@ -184,7 +184,7 @@
 174	common	rt_sigaction			sys_rt_sigaction
 175	common	rt_sigprocmask			sys_rt_sigprocmask
 176	common	rt_sigpending			sys_rt_sigpending
-177	common	rt_sigtimedwait			sys_rt_sigtimedwait
+177	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
 178	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
 179	common	rt_sigsuspend			sys_rt_sigsuspend
 180	common	pread64				sys_pread64
@@ -247,14 +247,14 @@
 237	common	fremovexattr			sys_fremovexattr
 238	common	tkill				sys_tkill
 239	common	sendfile64			sys_sendfile64
-240	common	futex				sys_futex
+240	common	futex				sys_futex_time32
 241	common	sched_setaffinity		sys_sched_setaffinity
 242	common	sched_getaffinity		sys_sched_getaffinity
 243	common	set_thread_area			sys_ni_syscall
 244	common	get_thread_area			sys_ni_syscall
 245	common	io_setup			sys_io_setup
 246	common	io_destroy			sys_io_destroy
-247	common	io_getevents			sys_io_getevents
+247	common	io_getevents			sys_io_getevents_time32
 248	common	io_submit			sys_io_submit
 249	common	io_cancel			sys_io_cancel
 250	common	fadvise64			sys_fadvise64
@@ -267,14 +267,14 @@
 257	common	remap_file_pages		sys_remap_file_pages
 258	common	set_tid_address			sys_set_tid_address
 259	common	timer_create			sys_timer_create
-260	common	timer_settime			sys_timer_settime
-261	common	timer_gettime			sys_timer_gettime
+260	common	timer_settime			sys_timer_settime32
+261	common	timer_gettime			sys_timer_gettime32
 262	common	timer_getoverrun		sys_timer_getoverrun
 263	common	timer_delete			sys_timer_delete
-264	common	clock_settime			sys_clock_settime
-265	common	clock_gettime			sys_clock_gettime
-266	common	clock_getres			sys_clock_getres
-267	common	clock_nanosleep			sys_clock_nanosleep
+264	common	clock_settime			sys_clock_settime32
+265	common	clock_gettime			sys_clock_gettime32
+266	common	clock_getres			sys_clock_getres_time32
+267	common	clock_nanosleep			sys_clock_nanosleep_time32
 268	common	statfs64			sys_statfs64
 269	common	fstatfs64			sys_fstatfs64
 270	common	tgkill				sys_tgkill
@@ -286,8 +286,8 @@
 276	common	set_mempolicy			sys_set_mempolicy
 277	common	mq_open				sys_mq_open
 278	common	mq_unlink			sys_mq_unlink
-279	common	mq_timedsend			sys_mq_timedsend
-280	common	mq_timedreceive			sys_mq_timedreceive
+279	common	mq_timedsend			sys_mq_timedsend_time32
+280	common	mq_timedreceive			sys_mq_timedreceive_time32
 281	common	mq_notify			sys_mq_notify
 282	common	mq_getsetattr			sys_mq_getsetattr
 283	common	kexec_load			sys_kexec_load
@@ -315,8 +315,8 @@
 305	common	readlinkat			sys_readlinkat
 306	common	fchmodat			sys_fchmodat
 307	common	faccessat			sys_faccessat
-308	common	pselect6			sys_pselect6
-309	common	ppoll				sys_ppoll
+308	common	pselect6			sys_pselect6_time32
+309	common	ppoll				sys_ppoll_time32
 310	common	unshare				sys_unshare
 311	common	set_robust_list			sys_set_robust_list
 312	common	get_robust_list			sys_get_robust_list
@@ -327,14 +327,14 @@
 317	common	move_pages			sys_move_pages
 318	common	getcpu				sys_getcpu
 319	common	epoll_pwait			sys_epoll_pwait
-320	common	utimensat			sys_utimensat
+320	common	utimensat			sys_utimensat_time32
 321	common	signalfd			sys_signalfd
 322	common	timerfd_create			sys_timerfd_create
 323	common	eventfd				sys_eventfd
 324	common	fallocate			sys_fallocate
-325	common	semtimedop			sys_semtimedop
-326	common	timerfd_settime			sys_timerfd_settime
-327	common	timerfd_gettime			sys_timerfd_gettime
+325	common	semtimedop			sys_semtimedop_time32
+326	common	timerfd_settime			sys_timerfd_settime32
+327	common	timerfd_gettime			sys_timerfd_gettime32
 328	common	semctl				sys_old_semctl
 329	common	semget				sys_semget
 330	common	semop				sys_semop
@@ -374,13 +374,13 @@
 364	common	pwritev				sys_pwritev
 365	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
 366	common	perf_event_open			sys_perf_event_open
-367	common	recvmmsg			sys_recvmmsg
+367	common	recvmmsg			sys_recvmmsg_time32
 368	common	fanotify_init			sys_fanotify_init
 369	common	fanotify_mark			sys_fanotify_mark
 370	common	prlimit64			sys_prlimit64
 371	common	name_to_handle_at		sys_name_to_handle_at
 372	common	open_by_handle_at		sys_open_by_handle_at
-373	common	clock_adjtime			sys_clock_adjtime
+373	common	clock_adjtime			sys_clock_adjtime32
 374	common	syncfs				sys_syncfs
 375	common	setns				sys_setns
 376	common	sendmmsg			sys_sendmmsg
@@ -406,5 +406,5 @@
 396	common	pkey_alloc			sys_pkey_alloc
 397	common	pkey_free			sys_pkey_free
 398	common	statx				sys_statx
-399	common	io_pgetevents			sys_io_pgetevents
+399	common	io_pgetevents			sys_io_pgetevents_time32
 400	common	rseq				sys_rseq
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index e9fec7bac5a9..5642d93b64c0 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -135,7 +135,7 @@
 121	o32	setdomainname			sys_setdomainname
 122	o32	uname				sys_newuname
 123	o32	modify_ldt			sys_ni_syscall
-124	o32	adjtimex			sys_adjtimex			sys_adjtimex_time32
+124	o32	adjtimex			sys_adjtimex_time32
 125	o32	mprotect			sys_mprotect
 126	o32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
 127	o32	create_module			sys_ni_syscall
@@ -176,8 +176,8 @@
 162	o32	sched_yield			sys_sched_yield
 163	o32	sched_get_priority_max		sys_sched_get_priority_max
 164	o32	sched_get_priority_min		sys_sched_get_priority_min
-165	o32	sched_rr_get_interval		sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
-166	o32	nanosleep			sys_nanosleep			sys_nanosleep_time32
+165	o32	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+166	o32	nanosleep			sys_nanosleep_time32
 167	o32	mremap				sys_mremap
 168	o32	accept				sys_accept
 169	o32	bind				sys_bind
@@ -208,7 +208,7 @@
 194	o32	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
 195	o32	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 196	o32	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
-197	o32	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+197	o32	rt_sigtimedwait			sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
 198	o32	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 199	o32	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 200	o32	pread64				sys_pread64			sys_32_pread
@@ -249,12 +249,12 @@
 235	o32	fremovexattr			sys_fremovexattr
 236	o32	tkill				sys_tkill
 237	o32	sendfile64			sys_sendfile64
-238	o32	futex				sys_futex			sys_futex_time32
+238	o32	futex				sys_futex_time32
 239	o32	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
 240	o32	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
 241	o32	io_setup			sys_io_setup			compat_sys_io_setup
 242	o32	io_destroy			sys_io_destroy
-243	o32	io_getevents			sys_io_getevents		sys_io_getevents_time32
+243	o32	io_getevents			sys_io_getevents_time32
 244	o32	io_submit			sys_io_submit			compat_sys_io_submit
 245	o32	io_cancel			sys_io_cancel
 246	o32	exit_group			sys_exit_group
@@ -269,14 +269,14 @@
 255	o32	statfs64			sys_statfs64			compat_sys_statfs64
 256	o32	fstatfs64			sys_fstatfs64			compat_sys_fstatfs64
 257	o32	timer_create			sys_timer_create		compat_sys_timer_create
-258	o32	timer_settime			sys_timer_settime		sys_timer_settime32
-259	o32	timer_gettime			sys_timer_gettime		sys_timer_gettime32
+258	o32	timer_settime			sys_timer_settime32
+259	o32	timer_gettime			sys_timer_gettime32
 260	o32	timer_getoverrun		sys_timer_getoverrun
 261	o32	timer_delete			sys_timer_delete
-262	o32	clock_settime			sys_clock_settime		sys_clock_settime32
-263	o32	clock_gettime			sys_clock_gettime		sys_clock_gettime32
-264	o32	clock_getres			sys_clock_getres		sys_clock_getres_time32
-265	o32	clock_nanosleep			sys_clock_nanosleep		sys_clock_nanosleep_time32
+262	o32	clock_settime			sys_clock_settime32
+263	o32	clock_gettime			sys_clock_gettime32
+264	o32	clock_getres			sys_clock_getres_time32
+265	o32	clock_nanosleep			sys_clock_nanosleep_time32
 266	o32	tgkill				sys_tgkill
 267	o32	utimes				sys_utimes			sys_utimes_time32
 268	o32	mbind				sys_mbind			compat_sys_mbind
@@ -284,8 +284,8 @@
 270	o32	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
 271	o32	mq_open				sys_mq_open			compat_sys_mq_open
 272	o32	mq_unlink			sys_mq_unlink
-273	o32	mq_timedsend			sys_mq_timedsend		sys_mq_timedsend_time32
-274	o32	mq_timedreceive			sys_mq_timedreceive		sys_mq_timedreceive_time32
+273	o32	mq_timedsend			sys_mq_timedsend_time32
+274	o32	mq_timedreceive			sys_mq_timedreceive_time32
 275	o32	mq_notify			sys_mq_notify			compat_sys_mq_notify
 276	o32	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
 277	o32	vserver				sys_ni_syscall
@@ -312,8 +312,8 @@
 298	o32	readlinkat			sys_readlinkat
 299	o32	fchmodat			sys_fchmodat
 300	o32	faccessat			sys_faccessat
-301	o32	pselect6			sys_pselect6			compat_sys_pselect6_time32
-302	o32	ppoll				sys_ppoll			compat_sys_ppoll_time32
+301	o32	pselect6			sys_pselect6_time32		compat_sys_pselect6_time32
+302	o32	ppoll				sys_ppoll_time32		compat_sys_ppoll_time32
 303	o32	unshare				sys_unshare
 304	o32	splice				sys_splice
 305	o32	sync_file_range			sys_sync_file_range		sys32_sync_file_range
@@ -327,14 +327,14 @@
 313	o32	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
 314	o32	ioprio_set			sys_ioprio_set
 315	o32	ioprio_get			sys_ioprio_get
-316	o32	utimensat			sys_utimensat			sys_utimensat_time32
+316	o32	utimensat			sys_utimensat_time32
 317	o32	signalfd			sys_signalfd			compat_sys_signalfd
 318	o32	timerfd				sys_ni_syscall
 319	o32	eventfd				sys_eventfd
 320	o32	fallocate			sys_fallocate			sys32_fallocate
 321	o32	timerfd_create			sys_timerfd_create
-322	o32	timerfd_gettime			sys_timerfd_gettime		sys_timerfd_gettime32
-323	o32	timerfd_settime			sys_timerfd_settime		sys_timerfd_settime32
+322	o32	timerfd_gettime			sys_timerfd_gettime32
+323	o32	timerfd_settime			sys_timerfd_settime32
 324	o32	signalfd4			sys_signalfd4			compat_sys_signalfd4
 325	o32	eventfd2			sys_eventfd2
 326	o32	epoll_create1			sys_epoll_create1
@@ -346,13 +346,13 @@
 332	o32	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 333	o32	perf_event_open			sys_perf_event_open
 334	o32	accept4				sys_accept4
-335	o32	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg_time32
+335	o32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
 336	o32	fanotify_init			sys_fanotify_init
 337	o32	fanotify_mark			sys_fanotify_mark		compat_sys_fanotify_mark
 338	o32	prlimit64			sys_prlimit64
 339	o32	name_to_handle_at		sys_name_to_handle_at
 340	o32	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
-341	o32	clock_adjtime			sys_clock_adjtime		sys_clock_adjtime32
+341	o32	clock_adjtime			sys_clock_adjtime32
 342	o32	syncfs				sys_syncfs
 343	o32	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 344	o32	setns				sys_setns
@@ -379,7 +379,7 @@
 365	o32	pkey_free			sys_pkey_free
 366	o32	statx				sys_statx
 367	o32	rseq				sys_rseq
-368	o32	io_pgetevents			sys_io_pgetevents		compat_sys_io_pgetevents
+368	o32	io_pgetevents			sys_io_pgetevents_time32	compat_sys_io_pgetevents
 # room for arch specific calls
 393	o32	semget				sys_semget
 394	o32	semctl				sys_semctl			compat_sys_semctl
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index f7440427d459..b110c7371b08 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -133,7 +133,8 @@
 121	common	setdomainname		sys_setdomainname
 122	common	sendfile		sys_sendfile			compat_sys_sendfile
 123	common	recvfrom		sys_recvfrom
-124	common	adjtimex		sys_adjtimex			sys_adjtimex_time32
+124	32	adjtimex		sys_adjtimex_time32
+124	64	adjtimex		sys_adjtimex
 125	common	mprotect		sys_mprotect
 126	common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 # 127 was create_module
@@ -171,8 +172,10 @@
 158	common	sched_yield		sys_sched_yield
 159	common	sched_get_priority_max	sys_sched_get_priority_max
 160	common	sched_get_priority_min	sys_sched_get_priority_min
-161	common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
-162	common	nanosleep		sys_nanosleep			sys_nanosleep_time32
+161	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+161	64	sched_rr_get_interval	sys_sched_rr_get_interval
+162	32	nanosleep		sys_nanosleep_time32
+162	64	nanosleep		sys_nanosleep
 163	common	mremap			sys_mremap
 164	common	setresuid		sys_setresuid
 165	common	getresuid		sys_getresuid
@@ -187,7 +190,8 @@
 174	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 175	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 176	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-177	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+177	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+177	64	rt_sigtimedwait		sys_rt_sigtimedwait
 178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 180	common	chown			sys_chown
@@ -223,14 +227,16 @@
 207	64	readahead		sys_readahead
 208	common	tkill			sys_tkill
 209	common	sendfile64		sys_sendfile64			compat_sys_sendfile64
-210	common	futex			sys_futex			sys_futex_time32
+210	32	futex			sys_futex_time32
+210	64	futex			sys_futex
 211	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
 212	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 # 213 was set_thread_area
 # 214 was get_thread_area
 215	common	io_setup		sys_io_setup			compat_sys_io_setup
 216	common	io_destroy		sys_io_destroy
-217	common	io_getevents		sys_io_getevents		sys_io_getevents_time32
+217	32	io_getevents		sys_io_getevents_time32
+217	64	io_getevents		sys_io_getevents
 218	common	io_submit		sys_io_submit			compat_sys_io_submit
 219	common	io_cancel		sys_io_cancel
 # 220 was alloc_hugepages
@@ -241,11 +247,14 @@
 225	common	epoll_ctl		sys_epoll_ctl
 226	common	epoll_wait		sys_epoll_wait
 227	common	remap_file_pages	sys_remap_file_pages
-228	common	semtimedop		sys_semtimedop			sys_semtimedop_time32
+228	32	semtimedop		sys_semtimedop_time32
+228	64	semtimedop		sys_semtimedop
 229	common	mq_open			sys_mq_open			compat_sys_mq_open
 230	common	mq_unlink		sys_mq_unlink
-231	common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
-232	common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
+231	32	mq_timedsend		sys_mq_timedsend_time32
+231	64	mq_timedsend		sys_mq_timedsend
+232	32	mq_timedreceive		sys_mq_timedreceive_time32
+232	64	mq_timedreceive		sys_mq_timedreceive
 233	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 234	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 235	common	waitid			sys_waitid			compat_sys_waitid
@@ -265,14 +274,20 @@
 248	common	lremovexattr		sys_lremovexattr
 249	common	fremovexattr		sys_fremovexattr
 250	common	timer_create		sys_timer_create		compat_sys_timer_create
-251	common	timer_settime		sys_timer_settime		sys_timer_settime32
-252	common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
+251	32	timer_settime		sys_timer_settime32
+251	64	timer_settime		sys_timer_settime
+252	32	timer_gettime		sys_timer_gettime32
+252	64	timer_gettime		sys_timer_gettime
 253	common	timer_getoverrun	sys_timer_getoverrun
 254	common	timer_delete		sys_timer_delete
-255	common	clock_settime		sys_clock_settime		sys_clock_settime32
-256	common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
-257	common	clock_getres		sys_clock_getres		sys_clock_getres_time32
-258	common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
+255	32	clock_settime		sys_clock_settime32
+255	64	clock_settime		sys_clock_settime
+256	32	clock_gettime		sys_clock_gettime32
+256	64	clock_gettime		sys_clock_gettime
+257	32	clock_getres		sys_clock_getres_time32
+257	64	clock_getres		sys_clock_getres
+258	32	clock_nanosleep		sys_clock_nanosleep_time32
+258	64	clock_nanosleep		sys_clock_nanosleep
 259	common	tgkill			sys_tgkill
 260	common	mbind			sys_mbind			compat_sys_mbind
 261	common	get_mempolicy		sys_get_mempolicy		compat_sys_get_mempolicy
@@ -287,8 +302,10 @@
 270	common	inotify_add_watch	sys_inotify_add_watch
 271	common	inotify_rm_watch	sys_inotify_rm_watch
 272	common	migrate_pages		sys_migrate_pages
-273	common	pselect6		sys_pselect6			compat_sys_pselect6_time32
-274	common	ppoll			sys_ppoll			compat_sys_ppoll_time32
+273	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
+273	64	pselect6		sys_pselect6
+274	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
+274	64	ppoll			sys_ppoll
 275	common	openat			sys_openat			compat_sys_openat
 276	common	mkdirat			sys_mkdirat
 277	common	mknodat			sys_mknodat
@@ -316,15 +333,18 @@
 298	common	statfs64		sys_statfs64			compat_sys_statfs64
 299	common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
 300	common	kexec_load		sys_kexec_load			compat_sys_kexec_load
-301	common	utimensat		sys_utimensat			sys_utimensat_time32
+301	32	utimensat		sys_utimensat_time32
+301	64	utimensat		sys_utimensat
 302	common	signalfd		sys_signalfd			compat_sys_signalfd
 # 303 was timerfd
 304	common	eventfd			sys_eventfd
 305	32	fallocate		parisc_fallocate
 305	64	fallocate		sys_fallocate
 306	common	timerfd_create		sys_timerfd_create
-307	common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
-308	common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
+307	32	timerfd_settime		sys_timerfd_settime32
+307	64	timerfd_settime		sys_timerfd_settime
+308	32	timerfd_gettime		sys_timerfd_gettime32
+308	64	timerfd_gettime		sys_timerfd_gettime
 309	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 310	common	eventfd2		sys_eventfd2
 311	common	epoll_create1		sys_epoll_create1
@@ -335,12 +355,14 @@
 316	common	pwritev	sys_pwritev	compat_sys_pwritev
 317	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 318	common	perf_event_open		sys_perf_event_open
-319	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
+319	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+319	64	recvmmsg		sys_recvmmsg
 320	common	accept4			sys_accept4
 321	common	prlimit64		sys_prlimit64
 322	common	fanotify_init		sys_fanotify_init
 323	common	fanotify_mark		sys_fanotify_mark		sys32_fanotify_mark
-324	common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
+324	32	clock_adjtime		sys_clock_adjtime32
+324	64	clock_adjtime		sys_clock_adjtime
 325	common	name_to_handle_at	sys_name_to_handle_at
 326	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
 327	common	syncfs			sys_syncfs
@@ -366,7 +388,8 @@
 347	common	preadv2			sys_preadv2			compat_sys_preadv2
 348	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
 349	common	statx			sys_statx
-350	common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+350	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
+350	64	io_pgetevents		sys_io_pgetevents
 351	common	pkey_mprotect		sys_pkey_mprotect
 352	common	pkey_alloc		sys_pkey_alloc
 353	common	pkey_free		sys_pkey_free
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 86650dcd2185..2a8b060f73b3 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -157,7 +157,9 @@
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
-124	common	adjtimex			sys_adjtimex			sys_adjtimex_time32
+124	32	adjtimex			sys_adjtimex_time32
+124	64	adjtimex			sys_adjtimex
+124	spu	adjtimex			sys_adjtimex
 125	common	mprotect			sys_mprotect
 126	32	sigprocmask			sys_sigprocmask			compat_sys_sigprocmask
 126	64	sigprocmask			sys_ni_syscall
@@ -198,8 +200,12 @@
 158	common	sched_yield			sys_sched_yield
 159	common	sched_get_priority_max		sys_sched_get_priority_max
 160	common	sched_get_priority_min		sys_sched_get_priority_min
-161	common	sched_rr_get_interval		sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
-162	common	nanosleep			sys_nanosleep			sys_nanosleep_time32
+161	32	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+161	64	sched_rr_get_interval		sys_sched_rr_get_interval
+161	spu	sched_rr_get_interval		sys_sched_rr_get_interval
+162	32	nanosleep			sys_nanosleep_time32
+162	64	nanosleep			sys_nanosleep
+162	spu	nanosleep			sys_nanosleep
 163	common	mremap				sys_mremap
 164	common	setresuid			sys_setresuid
 165	common	getresuid			sys_getresuid
@@ -213,7 +219,8 @@
 173	nospu	rt_sigaction			sys_rt_sigaction		compat_sys_rt_sigaction
 174	nospu	rt_sigprocmask			sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 175	nospu	rt_sigpending			sys_rt_sigpending		compat_sys_rt_sigpending
-176	nospu	rt_sigtimedwait			sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+176	32	rt_sigtimedwait			sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+176	64	rt_sigtimedwait			sys_rt_sigtimedwait
 177	nospu 	rt_sigqueueinfo			sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 178	nospu 	rt_sigsuspend			sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 179	common	pread64				sys_pread64			compat_sys_pread64
@@ -260,7 +267,9 @@
 218	common	removexattr			sys_removexattr
 219	common	lremovexattr			sys_lremovexattr
 220	common	fremovexattr			sys_fremovexattr
-221	common	futex				sys_futex			sys_futex_time32
+221	32	futex				sys_futex_time32
+221	64	futex				sys_futex
+221	spu	futex				sys_futex
 222	common	sched_setaffinity		sys_sched_setaffinity		compat_sys_sched_setaffinity
 223	common	sched_getaffinity		sys_sched_getaffinity		compat_sys_sched_getaffinity
 # 224 unused
@@ -268,7 +277,9 @@
 226	32	sendfile64			sys_sendfile64			compat_sys_sendfile64
 227	common	io_setup			sys_io_setup			compat_sys_io_setup
 228	common	io_destroy			sys_io_destroy
-229	common	io_getevents			sys_io_getevents		sys_io_getevents_time32
+229	32	io_getevents			sys_io_getevents_time32
+229	64	io_getevents			sys_io_getevents
+229	spu	io_getevents			sys_io_getevents
 230	common	io_submit			sys_io_submit			compat_sys_io_submit
 231	common	io_cancel			sys_io_cancel
 232	nospu	set_tid_address			sys_set_tid_address
@@ -280,14 +291,26 @@
 238	common	epoll_wait			sys_epoll_wait
 239	common	remap_file_pages		sys_remap_file_pages
 240	common	timer_create			sys_timer_create		compat_sys_timer_create
-241	common	timer_settime			sys_timer_settime		sys_timer_settime32
-242	common	timer_gettime			sys_timer_gettime		sys_timer_gettime32
+241	32	timer_settime			sys_timer_settime32
+241	64	timer_settime			sys_timer_settime
+241	spu	timer_settime			sys_timer_settime
+242	32	timer_gettime			sys_timer_gettime32
+242	64	timer_gettime			sys_timer_gettime
+242	spu	timer_gettime			sys_timer_gettime
 243	common	timer_getoverrun		sys_timer_getoverrun
 244	common	timer_delete			sys_timer_delete
-245	common	clock_settime			sys_clock_settime		sys_clock_settime32
-246	common	clock_gettime			sys_clock_gettime		sys_clock_gettime32
-247	common	clock_getres			sys_clock_getres		sys_clock_getres_time32
-248	common	clock_nanosleep			sys_clock_nanosleep		sys_clock_nanosleep_time32
+245	32	clock_settime			sys_clock_settime32
+245	64	clock_settime			sys_clock_settime
+245	spu	clock_settime			sys_clock_settime
+246	32	clock_gettime			sys_clock_gettime32
+246	64	clock_gettime			sys_clock_gettime
+246	spu	clock_gettime			sys_clock_gettime
+247	32	clock_getres			sys_clock_getres_time32
+247	64	clock_getres			sys_clock_getres
+247	spu	clock_getres			sys_clock_getres
+248	32	clock_nanosleep			sys_clock_nanosleep_time32
+248	64	clock_nanosleep			sys_clock_nanosleep
+248	spu	clock_nanosleep			sys_clock_nanosleep
 249	32	swapcontext			ppc_swapcontext			ppc32_swapcontext
 249	64	swapcontext			ppc64_swapcontext
 249	spu	swapcontext			sys_ni_syscall
@@ -308,8 +331,10 @@
 261	nospu	set_mempolicy			sys_set_mempolicy		compat_sys_set_mempolicy
 262	nospu	mq_open				sys_mq_open			compat_sys_mq_open
 263	nospu	mq_unlink			sys_mq_unlink
-264	nospu	mq_timedsend			sys_mq_timedsend		sys_mq_timedsend_time32
-265	nospu	mq_timedreceive			sys_mq_timedreceive		sys_mq_timedreceive_time32
+264	32	mq_timedsend			sys_mq_timedsend_time32
+264	64	mq_timedsend			sys_mq_timedsend
+265	32	mq_timedreceive			sys_mq_timedreceive_time32
+265	64	mq_timedreceive			sys_mq_timedreceive
 266	nospu	mq_notify			sys_mq_notify			compat_sys_mq_notify
 267	nospu	mq_getsetattr			sys_mq_getsetattr		compat_sys_mq_getsetattr
 268	nospu	kexec_load			sys_kexec_load			compat_sys_kexec_load
@@ -324,8 +349,10 @@
 277	nospu	inotify_rm_watch		sys_inotify_rm_watch
 278	nospu	spu_run				sys_spu_run
 279	nospu	spu_create			sys_spu_create
-280	nospu	pselect6			sys_pselect6			compat_sys_pselect6_time32
-281	nospu	ppoll				sys_ppoll			compat_sys_ppoll_time32
+280	32	pselect6			sys_pselect6_time32		compat_sys_pselect6_time32
+280	64	pselect6			sys_pselect6
+281	32	ppoll				sys_ppoll_time32		compat_sys_ppoll_time32
+281	64	ppoll				sys_ppoll
 282	common	unshare				sys_unshare
 283	common	splice				sys_splice
 284	common	tee				sys_tee
@@ -350,15 +377,21 @@
 301	common	move_pages			sys_move_pages			compat_sys_move_pages
 302	common	getcpu				sys_getcpu
 303	nospu	epoll_pwait			sys_epoll_pwait			compat_sys_epoll_pwait
-304	common	utimensat			sys_utimensat			sys_utimensat_time32
+304	32	utimensat			sys_utimensat_time32
+304	64	utimensat			sys_utimensat
+304	spu	utimensat			sys_utimensat
 305	common	signalfd			sys_signalfd			compat_sys_signalfd
 306	common	timerfd_create			sys_timerfd_create
 307	common	eventfd				sys_eventfd
 308	common	sync_file_range2		sys_sync_file_range2		compat_sys_sync_file_range2
 309	nospu	fallocate			sys_fallocate			compat_sys_fallocate
 310	nospu	subpage_prot			sys_subpage_prot
-311	common	timerfd_settime			sys_timerfd_settime		sys_timerfd_settime32
-312	common	timerfd_gettime			sys_timerfd_gettime		sys_timerfd_gettime32
+311	32	timerfd_settime			sys_timerfd_settime32
+311	64	timerfd_settime			sys_timerfd_settime
+311	spu	timerfd_settime			sys_timerfd_settime
+312	32	timerfd_gettime			sys_timerfd_gettime32
+312	64	timerfd_gettime			sys_timerfd_gettime
+312	spu	timerfd_gettime			sys_timerfd_gettime
 313	common	signalfd4			sys_signalfd4			compat_sys_signalfd4
 314	common	eventfd2			sys_eventfd2
 315	common	epoll_create1			sys_epoll_create1
@@ -389,11 +422,15 @@
 340	common	getsockopt			sys_getsockopt			compat_sys_getsockopt
 341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
 342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
-343	common	recvmmsg			sys_recvmmsg			compat_sys_recvmmsg_time32
+343	32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+343	64	recvmmsg			sys_recvmmsg
+343	spu	recvmmsg			sys_recvmmsg
 344	common	accept4				sys_accept4
 345	common	name_to_handle_at		sys_name_to_handle_at
 346	common	open_by_handle_at		sys_open_by_handle_at		compat_sys_open_by_handle_at
-347	common	clock_adjtime			sys_clock_adjtime		sys_clock_adjtime32
+347	32	clock_adjtime			sys_clock_adjtime32
+347	64	clock_adjtime			sys_clock_adjtime
+347	spu	clock_adjtime			sys_clock_adjtime
 348	common	syncfs				sys_syncfs
 349	common	sendmmsg			sys_sendmmsg			compat_sys_sendmmsg
 350	common	setns				sys_setns
@@ -425,7 +462,8 @@
 385	nospu	pkey_free			sys_pkey_free
 386	nospu	pkey_mprotect			sys_pkey_mprotect
 387	nospu	rseq				sys_rseq
-388	nospu	io_pgetevents			sys_io_pgetevents		compat_sys_io_pgetevents
+388	32	io_pgetevents			sys_io_pgetevents_time32	compat_sys_io_pgetevents
+388	64	io_pgetevents			sys_io_pgetevents
 # room for arch specific syscalls
 392	64	semtimedop			sys_semtimedop
 393	common	semget				sys_semget
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index cafa63c6a932..a08a5312cd12 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -131,7 +131,7 @@
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	cacheflush			sys_cacheflush
-124	common	adjtimex			sys_adjtimex
+124	common	adjtimex			sys_adjtimex_time32
 125	common	mprotect			sys_mprotect
 126	common	sigprocmask			sys_sigprocmask
 # 127 was create_module
@@ -168,8 +168,8 @@
 158	common	sched_yield			sys_sched_yield
 159	common	sched_get_priority_max		sys_sched_get_priority_max
 160	common	sched_get_priority_min		sys_sched_get_priority_min
-161	common	sched_rr_get_interval		sys_sched_rr_get_interval
-162	common	nanosleep			sys_nanosleep
+161	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
+162	common	nanosleep			sys_nanosleep_time32
 163	common	mremap				sys_mremap
 164	common	setresuid			sys_setresuid16
 165	common	getresuid			sys_getresuid16
@@ -184,7 +184,7 @@
 174	common	rt_sigaction			sys_rt_sigaction
 175	common	rt_sigprocmask			sys_rt_sigprocmask
 176	common	rt_sigpending			sys_rt_sigpending
-177	common	rt_sigtimedwait			sys_rt_sigtimedwait
+177	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
 178	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
 179	common	rt_sigsuspend			sys_rt_sigsuspend
 180	common	pread64				sys_pread_wrapper
@@ -247,14 +247,14 @@
 237	common	fremovexattr			sys_fremovexattr
 238	common	tkill				sys_tkill
 239	common	sendfile64			sys_sendfile64
-240	common	futex				sys_futex
+240	common	futex				sys_futex_time32
 241	common	sched_setaffinity		sys_sched_setaffinity
 242	common	sched_getaffinity		sys_sched_getaffinity
 # 243 is reserved for set_thread_area
 # 244 is reserved for get_thread_area
 245	common	io_setup			sys_io_setup
 246	common	io_destroy			sys_io_destroy
-247	common	io_getevents			sys_io_getevents
+247	common	io_getevents			sys_io_getevents_time32
 248	common	io_submit			sys_io_submit
 249	common	io_cancel			sys_io_cancel
 250	common	fadvise64			sys_fadvise64
@@ -267,14 +267,14 @@
 257	common	remap_file_pages		sys_remap_file_pages
 258	common	set_tid_address			sys_set_tid_address
 259	common	timer_create			sys_timer_create
-260	common	timer_settime			sys_timer_settime
-261	common	timer_gettime			sys_timer_gettime
+260	common	timer_settime			sys_timer_settime32
+261	common	timer_gettime			sys_timer_gettime32
 262	common	timer_getoverrun		sys_timer_getoverrun
 263	common	timer_delete			sys_timer_delete
-264	common	clock_settime			sys_clock_settime
-265	common	clock_gettime			sys_clock_gettime
-266	common	clock_getres			sys_clock_getres
-267	common	clock_nanosleep			sys_clock_nanosleep
+264	common	clock_settime			sys_clock_settime32
+265	common	clock_gettime			sys_clock_gettime32
+266	common	clock_getres			sys_clock_getres_time32
+267	common	clock_nanosleep			sys_clock_nanosleep_time32
 268	common	statfs64			sys_statfs64
 269	common	fstatfs64			sys_fstatfs64
 270	common	tgkill				sys_tgkill
@@ -286,8 +286,8 @@
 276	common	set_mempolicy			sys_set_mempolicy
 277	common	mq_open				sys_mq_open
 278	common	mq_unlink			sys_mq_unlink
-279	common	mq_timedsend			sys_mq_timedsend
-280	common	mq_timedreceive			sys_mq_timedreceive
+279	common	mq_timedsend			sys_mq_timedsend_time32
+280	common	mq_timedreceive			sys_mq_timedreceive_time32
 281	common	mq_notify			sys_mq_notify
 282	common	mq_getsetattr			sys_mq_getsetattr
 283	common	kexec_load			sys_kexec_load
@@ -315,8 +315,8 @@
 305	common	readlinkat			sys_readlinkat
 306	common	fchmodat			sys_fchmodat
 307	common	faccessat			sys_faccessat
-308	common	pselect6			sys_pselect6
-309	common	ppoll				sys_ppoll
+308	common	pselect6			sys_pselect6_time32
+309	common	ppoll				sys_ppoll_time32
 310	common	unshare				sys_unshare
 311	common	set_robust_list			sys_set_robust_list
 312	common	get_robust_list			sys_get_robust_list
@@ -327,13 +327,13 @@
 317	common	move_pages			sys_move_pages
 318	common	getcpu				sys_getcpu
 319	common	epoll_pwait			sys_epoll_pwait
-320	common	utimensat			sys_utimensat
+320	common	utimensat			sys_utimensat_time32
 321	common	signalfd			sys_signalfd
 322	common	timerfd_create			sys_timerfd_create
 323	common	eventfd				sys_eventfd
 324	common	fallocate			sys_fallocate
-325	common	timerfd_settime			sys_timerfd_settime
-326	common	timerfd_gettime			sys_timerfd_gettime
+325	common	timerfd_settime			sys_timerfd_settime32
+326	common	timerfd_gettime			sys_timerfd_gettime32
 327	common	signalfd4			sys_signalfd4
 328	common	eventfd2			sys_eventfd2
 329	common	epoll_create1			sys_epoll_create1
@@ -364,11 +364,11 @@
 354	common	getsockopt			sys_getsockopt
 355	common	sendmsg				sys_sendmsg
 356	common	recvmsg				sys_recvmsg
-357	common	recvmmsg			sys_recvmmsg
+357	common	recvmmsg			sys_recvmmsg_time32
 358	common	accept4				sys_accept4
 359	common	name_to_handle_at		sys_name_to_handle_at
 360	common	open_by_handle_at		sys_open_by_handle_at
-361	common	clock_adjtime			sys_clock_adjtime
+361	common	clock_adjtime			sys_clock_adjtime32
 362	common	syncfs				sys_syncfs
 363	common	sendmmsg			sys_sendmmsg
 364	common	setns				sys_setns
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 7cb05b50aeaa..dc1e08040b39 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -128,7 +128,8 @@
 102	common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
 103	common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
 104	common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
-105	common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+105	32	rt_sigtimedwait		sys_rt_sigtimedwait_time32	compat_sys_rt_sigtimedwait_time32
+105	64	rt_sigtimedwait		sys_rt_sigtimedwait
 106	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 107	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 108	32	setresuid32		sys_setresuid
@@ -172,7 +173,8 @@
 139	common	stat64			sys_stat64			compat_sys_stat64
 140	common	sendfile64		sys_sendfile64
 141	common	getpeername		sys_getpeername
-142	common	futex			sys_futex			sys_futex_time32
+142	32	futex			sys_futex_time32
+142	64	futex			sys_futex
 143	common	gettid			sys_gettid
 144	common	getrlimit		sys_getrlimit			compat_sys_getrlimit
 145	common	setrlimit		sys_setrlimit			compat_sys_setrlimit
@@ -258,7 +260,7 @@
 216	64	sigreturn		sys_nis_syscall
 217	common	clone			sys_clone
 218	common	ioprio_get		sys_ioprio_get
-219	32	adjtimex		sys_adjtimex			sys_adjtimex_time32
+219	32	adjtimex		sys_adjtimex_time32
 219	64	adjtimex		sys_sparc_adjtimex
 220	32	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
 220	64	sigprocmask		sys_nis_syscall
@@ -289,8 +291,10 @@
 245	common	sched_yield		sys_sched_yield
 246	common	sched_get_priority_max	sys_sched_get_priority_max
 247	common	sched_get_priority_min	sys_sched_get_priority_min
-248	common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
-249	common	nanosleep		sys_nanosleep			sys_nanosleep_time32
+248	32	sched_rr_get_interval	sys_sched_rr_get_interval_time32
+248	64	sched_rr_get_interval	sys_sched_rr_get_interval
+249	32	nanosleep		sys_nanosleep_time32
+249	64	nanosleep		sys_nanosleep
 250	32	mremap			sys_mremap
 250	64	mremap			sys_64_mremap
 251	common	_sysctl			sys_sysctl			compat_sys_sysctl
@@ -299,14 +303,20 @@
 254	32	nfsservctl		sys_ni_syscall			sys_nis_syscall
 254	64	nfsservctl		sys_nis_syscall
 255	common	sync_file_range		sys_sync_file_range		compat_sys_sync_file_range
-256	common	clock_settime		sys_clock_settime		sys_clock_settime32
-257	common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
-258	common	clock_getres		sys_clock_getres		sys_clock_getres_time32
-259	common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
+256	32	clock_settime		sys_clock_settime32
+256	64	clock_settime		sys_clock_settime
+257	32	clock_gettime		sys_clock_gettime32
+257	64	clock_gettime		sys_clock_gettime
+258	32	clock_getres		sys_clock_getres_time32
+258	64	clock_getres		sys_clock_getres
+259	32	clock_nanosleep		sys_clock_nanosleep_time32
+259	64	clock_nanosleep		sys_clock_nanosleep
 260	common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
 261	common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
-262	common	timer_settime		sys_timer_settime		sys_timer_settime32
-263	common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
+262	32	timer_settime		sys_timer_settime32
+262	64	timer_settime		sys_timer_settime
+263	32	timer_gettime		sys_timer_gettime32
+263	64	timer_gettime		sys_timer_gettime
 264	common	timer_getoverrun	sys_timer_getoverrun
 265	common	timer_delete		sys_timer_delete
 266	common	timer_create		sys_timer_create		compat_sys_timer_create
@@ -316,11 +326,14 @@
 269	common	io_destroy		sys_io_destroy
 270	common	io_submit		sys_io_submit			compat_sys_io_submit
 271	common	io_cancel		sys_io_cancel
-272	common	io_getevents		sys_io_getevents		sys_io_getevents_time32
+272	32	io_getevents		sys_io_getevents_time32
+272	64	io_getevents		sys_io_getevents
 273	common	mq_open			sys_mq_open			compat_sys_mq_open
 274	common	mq_unlink		sys_mq_unlink
-275	common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
-276	common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
+275	32	mq_timedsend		sys_mq_timedsend_time32
+275	64	mq_timedsend		sys_mq_timedsend
+276	32	mq_timedreceive		sys_mq_timedreceive_time32
+276	64	mq_timedreceive		sys_mq_timedreceive
 277	common	mq_notify		sys_mq_notify			compat_sys_mq_notify
 278	common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
 279	common	waitid			sys_waitid			compat_sys_waitid
@@ -341,8 +354,10 @@
 294	common	readlinkat		sys_readlinkat
 295	common	fchmodat		sys_fchmodat
 296	common	faccessat		sys_faccessat
-297	common	pselect6		sys_pselect6			compat_sys_pselect6_time32
-298	common	ppoll			sys_ppoll			compat_sys_ppoll_time32
+297	32	pselect6		sys_pselect6_time32		compat_sys_pselect6_time32
+297	64	pselect6		sys_pselect6
+298	32	ppoll			sys_ppoll_time32		compat_sys_ppoll_time32
+298	64	ppoll			sys_ppoll
 299	common	unshare			sys_unshare
 300	common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
 301	common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
@@ -354,13 +369,16 @@
 307	common	move_pages		sys_move_pages			compat_sys_move_pages
 308	common	getcpu			sys_getcpu
 309	common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
-310	common	utimensat		sys_utimensat			sys_utimensat_time32
+310	32	utimensat		sys_utimensat_time32
+310	64	utimensat		sys_utimensat
 311	common	signalfd		sys_signalfd			compat_sys_signalfd
 312	common	timerfd_create		sys_timerfd_create
 313	common	eventfd			sys_eventfd
 314	common	fallocate		sys_fallocate			compat_sys_fallocate
-315	common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
-316	common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
+315	32	timerfd_settime		sys_timerfd_settime32
+315	64	timerfd_settime		sys_timerfd_settime
+316	32	timerfd_gettime		sys_timerfd_gettime32
+316	64	timerfd_gettime		sys_timerfd_gettime
 317	common	signalfd4		sys_signalfd4			compat_sys_signalfd4
 318	common	eventfd2		sys_eventfd2
 319	common	epoll_create1		sys_epoll_create1
@@ -372,13 +390,14 @@
 325	common	pwritev			sys_pwritev			compat_sys_pwritev
 326	common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
 327	common	perf_event_open		sys_perf_event_open
-328	common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
+328	32	recvmmsg		sys_recvmmsg_time32		compat_sys_recvmmsg_time32
+328	64	recvmmsg		sys_recvmmsg
 329	common	fanotify_init		sys_fanotify_init
 330	common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
 331	common	prlimit64		sys_prlimit64
 332	common	name_to_handle_at	sys_name_to_handle_at
 333	common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
-334	32	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
+334	32	clock_adjtime		sys_clock_adjtime32
 334	64	clock_adjtime		sys_sparc_clock_adjtime
 335	common	syncfs			sys_syncfs
 336	common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
@@ -408,7 +427,8 @@
 358	common	preadv2			sys_preadv2			compat_sys_preadv2
 359	common	pwritev2		sys_pwritev2			compat_sys_pwritev2
 360	common	statx			sys_statx
-361	common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+361	32	io_pgetevents		sys_io_pgetevents_time32	compat_sys_io_pgetevents
+361	64	io_pgetevents		sys_io_pgetevents
 362	common	pkey_mprotect		sys_pkey_mprotect
 363	common	pkey_alloc		sys_pkey_alloc
 364	common	pkey_free		sys_pkey_free
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 7705d5ecad25..28888d6b7f61 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -135,7 +135,7 @@
 121	i386	setdomainname		sys_setdomainname		__ia32_sys_setdomainname
 122	i386	uname			sys_newuname			__ia32_sys_newuname
 123	i386	modify_ldt		sys_modify_ldt			__ia32_sys_modify_ldt
-124	i386	adjtimex		sys_adjtimex			__ia32_sys_adjtimex_time32
+124	i386	adjtimex		sys_adjtimex_time32			__ia32_sys_adjtimex_time32
 125	i386	mprotect		sys_mprotect			__ia32_sys_mprotect
 126	i386	sigprocmask		sys_sigprocmask			__ia32_compat_sys_sigprocmask
 127	i386	create_module
@@ -172,8 +172,8 @@
 158	i386	sched_yield		sys_sched_yield			__ia32_sys_sched_yield
 159	i386	sched_get_priority_max	sys_sched_get_priority_max	__ia32_sys_sched_get_priority_max
 160	i386	sched_get_priority_min	sys_sched_get_priority_min	__ia32_sys_sched_get_priority_min
-161	i386	sched_rr_get_interval	sys_sched_rr_get_interval	__ia32_sys_sched_rr_get_interval_time32
-162	i386	nanosleep		sys_nanosleep			__ia32_sys_nanosleep_time32
+161	i386	sched_rr_get_interval	sys_sched_rr_get_interval_time32	__ia32_sys_sched_rr_get_interval_time32
+162	i386	nanosleep		sys_nanosleep_time32		__ia32_sys_nanosleep_time32
 163	i386	mremap			sys_mremap			__ia32_sys_mremap
 164	i386	setresuid		sys_setresuid16			__ia32_sys_setresuid16
 165	i386	getresuid		sys_getresuid16			__ia32_sys_getresuid16
@@ -188,7 +188,7 @@
 174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
 175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
-177	i386	rt_sigtimedwait		sys_rt_sigtimedwait		__ia32_compat_sys_rt_sigtimedwait_time32
+177	i386	rt_sigtimedwait		sys_rt_sigtimedwait_time32	__ia32_compat_sys_rt_sigtimedwait_time32
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		__ia32_compat_sys_rt_sigqueueinfo
 179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_sys_rt_sigsuspend
 180	i386	pread64			sys_pread64			__ia32_compat_sys_x86_pread
@@ -251,14 +251,14 @@
 237	i386	fremovexattr		sys_fremovexattr		__ia32_sys_fremovexattr
 238	i386	tkill			sys_tkill			__ia32_sys_tkill
 239	i386	sendfile64		sys_sendfile64			__ia32_sys_sendfile64
-240	i386	futex			sys_futex			__ia32_sys_futex_time32
+240	i386	futex			sys_futex_time32		__ia32_sys_futex_time32
 241	i386	sched_setaffinity	sys_sched_setaffinity		__ia32_compat_sys_sched_setaffinity
 242	i386	sched_getaffinity	sys_sched_getaffinity		__ia32_compat_sys_sched_getaffinity
 243	i386	set_thread_area		sys_set_thread_area		__ia32_sys_set_thread_area
 244	i386	get_thread_area		sys_get_thread_area		__ia32_sys_get_thread_area
 245	i386	io_setup		sys_io_setup			__ia32_compat_sys_io_setup
 246	i386	io_destroy		sys_io_destroy			__ia32_sys_io_destroy
-247	i386	io_getevents		sys_io_getevents		__ia32_sys_io_getevents_time32
+247	i386	io_getevents		sys_io_getevents_time32		__ia32_sys_io_getevents_time32
 248	i386	io_submit		sys_io_submit			__ia32_compat_sys_io_submit
 249	i386	io_cancel		sys_io_cancel			__ia32_sys_io_cancel
 250	i386	fadvise64		sys_fadvise64			__ia32_compat_sys_x86_fadvise64
@@ -271,14 +271,14 @@
 257	i386	remap_file_pages	sys_remap_file_pages		__ia32_sys_remap_file_pages
 258	i386	set_tid_address		sys_set_tid_address		__ia32_sys_set_tid_address
 259	i386	timer_create		sys_timer_create		__ia32_compat_sys_timer_create
-260	i386	timer_settime		sys_timer_settime		__ia32_sys_timer_settime32
-261	i386	timer_gettime		sys_timer_gettime		__ia32_sys_timer_gettime32
+260	i386	timer_settime		sys_timer_settime32		__ia32_sys_timer_settime32
+261	i386	timer_gettime		sys_timer_gettime32		__ia32_sys_timer_gettime32
 262	i386	timer_getoverrun	sys_timer_getoverrun		__ia32_sys_timer_getoverrun
 263	i386	timer_delete		sys_timer_delete		__ia32_sys_timer_delete
-264	i386	clock_settime		sys_clock_settime		__ia32_sys_clock_settime32
-265	i386	clock_gettime		sys_clock_gettime		__ia32_sys_clock_gettime32
-266	i386	clock_getres		sys_clock_getres		__ia32_sys_clock_getres_time32
-267	i386	clock_nanosleep		sys_clock_nanosleep		__ia32_sys_clock_nanosleep_time32
+264	i386	clock_settime		sys_clock_settime32		__ia32_sys_clock_settime32
+265	i386	clock_gettime		sys_clock_gettime32		__ia32_sys_clock_gettime32
+266	i386	clock_getres		sys_clock_getres_time32		__ia32_sys_clock_getres_time32
+267	i386	clock_nanosleep		sys_clock_nanosleep_time32	__ia32_sys_clock_nanosleep_time32
 268	i386	statfs64		sys_statfs64			__ia32_compat_sys_statfs64
 269	i386	fstatfs64		sys_fstatfs64			__ia32_compat_sys_fstatfs64
 270	i386	tgkill			sys_tgkill			__ia32_sys_tgkill
@@ -290,8 +290,8 @@
 276	i386	set_mempolicy		sys_set_mempolicy		__ia32_sys_set_mempolicy
 277	i386	mq_open			sys_mq_open			__ia32_compat_sys_mq_open
 278	i386	mq_unlink		sys_mq_unlink			__ia32_sys_mq_unlink
-279	i386	mq_timedsend		sys_mq_timedsend		__ia32_sys_mq_timedsend_time32
-280	i386	mq_timedreceive		sys_mq_timedreceive		__ia32_sys_mq_timedreceive_time32
+279	i386	mq_timedsend		sys_mq_timedsend_time32		__ia32_sys_mq_timedsend_time32
+280	i386	mq_timedreceive		sys_mq_timedreceive_time32	__ia32_sys_mq_timedreceive_time32
 281	i386	mq_notify		sys_mq_notify			__ia32_compat_sys_mq_notify
 282	i386	mq_getsetattr		sys_mq_getsetattr		__ia32_compat_sys_mq_getsetattr
 283	i386	kexec_load		sys_kexec_load			__ia32_compat_sys_kexec_load
@@ -319,8 +319,8 @@
 305	i386	readlinkat		sys_readlinkat			__ia32_sys_readlinkat
 306	i386	fchmodat		sys_fchmodat			__ia32_sys_fchmodat
 307	i386	faccessat		sys_faccessat			__ia32_sys_faccessat
-308	i386	pselect6		sys_pselect6			__ia32_compat_sys_pselect6_time32
-309	i386	ppoll			sys_ppoll			__ia32_compat_sys_ppoll_time32
+308	i386	pselect6		sys_pselect6_time32		__ia32_compat_sys_pselect6_time32
+309	i386	ppoll			sys_ppoll_time32		__ia32_compat_sys_ppoll_time32
 310	i386	unshare			sys_unshare			__ia32_sys_unshare
 311	i386	set_robust_list		sys_set_robust_list		__ia32_compat_sys_set_robust_list
 312	i386	get_robust_list		sys_get_robust_list		__ia32_compat_sys_get_robust_list
@@ -331,13 +331,13 @@
 317	i386	move_pages		sys_move_pages			__ia32_compat_sys_move_pages
 318	i386	getcpu			sys_getcpu			__ia32_sys_getcpu
 319	i386	epoll_pwait		sys_epoll_pwait			__ia32_sys_epoll_pwait
-320	i386	utimensat		sys_utimensat			__ia32_sys_utimensat_time32
+320	i386	utimensat		sys_utimensat_time32		__ia32_sys_utimensat_time32
 321	i386	signalfd		sys_signalfd			__ia32_compat_sys_signalfd
 322	i386	timerfd_create		sys_timerfd_create		__ia32_sys_timerfd_create
 323	i386	eventfd			sys_eventfd			__ia32_sys_eventfd
 324	i386	fallocate		sys_fallocate			__ia32_compat_sys_x86_fallocate
-325	i386	timerfd_settime		sys_timerfd_settime		__ia32_sys_timerfd_settime32
-326	i386	timerfd_gettime		sys_timerfd_gettime		__ia32_sys_timerfd_gettime32
+325	i386	timerfd_settime		sys_timerfd_settime32		__ia32_sys_timerfd_settime32
+326	i386	timerfd_gettime		sys_timerfd_gettime32		__ia32_sys_timerfd_gettime32
 327	i386	signalfd4		sys_signalfd4			__ia32_compat_sys_signalfd4
 328	i386	eventfd2		sys_eventfd2			__ia32_sys_eventfd2
 329	i386	epoll_create1		sys_epoll_create1		__ia32_sys_epoll_create1
@@ -348,13 +348,13 @@
 334	i386	pwritev			sys_pwritev			__ia32_compat_sys_pwritev
 335	i386	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		__ia32_compat_sys_rt_tgsigqueueinfo
 336	i386	perf_event_open		sys_perf_event_open		__ia32_sys_perf_event_open
-337	i386	recvmmsg		sys_recvmmsg			__ia32_compat_sys_recvmmsg_time32
+337	i386	recvmmsg		sys_recvmmsg_time32		__ia32_compat_sys_recvmmsg_time32
 338	i386	fanotify_init		sys_fanotify_init		__ia32_sys_fanotify_init
 339	i386	fanotify_mark		sys_fanotify_mark		__ia32_compat_sys_fanotify_mark
 340	i386	prlimit64		sys_prlimit64			__ia32_sys_prlimit64
 341	i386	name_to_handle_at	sys_name_to_handle_at		__ia32_sys_name_to_handle_at
 342	i386	open_by_handle_at	sys_open_by_handle_at		__ia32_compat_sys_open_by_handle_at
-343	i386	clock_adjtime		sys_clock_adjtime		__ia32_sys_clock_adjtime32
+343	i386	clock_adjtime		sys_clock_adjtime32		__ia32_sys_clock_adjtime32
 344	i386	syncfs			sys_syncfs			__ia32_sys_syncfs
 345	i386	sendmmsg		sys_sendmmsg			__ia32_compat_sys_sendmmsg
 346	i386	setns			sys_setns			__ia32_sys_setns
@@ -396,7 +396,7 @@
 382	i386	pkey_free		sys_pkey_free			__ia32_sys_pkey_free
 383	i386	statx			sys_statx			__ia32_sys_statx
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
-385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents
+385	i386	io_pgetevents		sys_io_pgetevents_time32	__ia32_compat_sys_io_pgetevents
 386	i386	rseq			sys_rseq			__ia32_sys_rseq
 # don't use numbers 387 through 392, add new calls at the end
 393	i386	semget			sys_semget    			__ia32_sys_semget
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index c699e014e0dd..6f05bc8f015a 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -174,7 +174,7 @@
 158	common	capget				sys_capget
 159	common	capset				sys_capset
 160	common	ptrace				sys_ptrace
-161	common	semtimedop			sys_semtimedop
+161	common	semtimedop			sys_semtimedop_time32
 162	common	semget				sys_semget
 163	common	semop				sys_semop
 164	common	semctl				sys_old_semctl
@@ -206,11 +206,11 @@
 188	common	setrlimit			sys_setrlimit
 189	common	getrlimit			sys_getrlimit
 190	common	getrusage			sys_getrusage
-191	common	futex				sys_futex
+191	common	futex				sys_futex_time32
 192	common	gettimeofday			sys_gettimeofday
 193	common	settimeofday			sys_settimeofday
-194	common	adjtimex			sys_adjtimex
-195	common	nanosleep			sys_nanosleep
+194	common	adjtimex			sys_adjtimex_time32
+195	common	nanosleep			sys_nanosleep_time32
 196	common	getgroups			sys_getgroups
 197	common	setgroups			sys_setgroups
 198	common	sethostname			sys_sethostname
@@ -234,7 +234,7 @@
 215	common	sched_getscheduler		sys_sched_getscheduler
 216	common	sched_get_priority_max		sys_sched_get_priority_max
 217	common	sched_get_priority_min		sys_sched_get_priority_min
-218	common	sched_rr_get_interval		sys_sched_rr_get_interval
+218	common	sched_rr_get_interval		sys_sched_rr_get_interval_time32
 219	common	sched_yield			sys_sched_yield
 222	common	available222			sys_ni_syscall
 # Signal Handling
@@ -244,14 +244,14 @@
 226	common	rt_sigaction			sys_rt_sigaction
 227	common	rt_sigprocmask			sys_rt_sigprocmask
 228	common	rt_sigpending			sys_rt_sigpending
-229	common	rt_sigtimedwait			sys_rt_sigtimedwait
+229	common	rt_sigtimedwait			sys_rt_sigtimedwait_time32
 230	common	rt_sigqueueinfo			sys_rt_sigqueueinfo
 231	common	rt_sigsuspend			sys_rt_sigsuspend
 # Message
 232	common	mq_open				sys_mq_open
 233	common	mq_unlink			sys_mq_unlink
-234	common	mq_timedsend			sys_mq_timedsend
-235	common	mq_timedreceive			sys_mq_timedreceive
+234	common	mq_timedsend			sys_mq_timedsend_time32
+235	common	mq_timedreceive			sys_mq_timedreceive_time32
 236	common	mq_notify			sys_mq_notify
 237	common	mq_getsetattr			sys_mq_getsetattr
 238	common	available238			sys_ni_syscall
@@ -259,17 +259,17 @@
 # IO
 240	common	io_destroy			sys_io_destroy
 241	common	io_submit			sys_io_submit
-242	common	io_getevents			sys_io_getevents
+242	common	io_getevents			sys_io_getevents_time32
 243	common	io_cancel			sys_io_cancel
-244	common	clock_settime			sys_clock_settime
-245	common	clock_gettime			sys_clock_gettime
-246	common	clock_getres			sys_clock_getres
-247	common	clock_nanosleep			sys_clock_nanosleep
+244	common	clock_settime			sys_clock_settime32
+245	common	clock_gettime			sys_clock_gettime32
+246	common	clock_getres			sys_clock_getres_time32
+247	common	clock_nanosleep			sys_clock_nanosleep_time32
 # Timer
 248	common	timer_create			sys_timer_create
 249	common	timer_delete			sys_timer_delete
-250	common	timer_settime			sys_timer_settime
-251	common	timer_gettime			sys_timer_gettime
+250	common	timer_settime			sys_timer_settime32
+251	common	timer_gettime			sys_timer_gettime32
 252	common	timer_getoverrun		sys_timer_getoverrun
 # System
 253	common	reserved253			sys_ni_syscall
@@ -291,8 +291,8 @@
 269	common	tee				sys_tee
 270	common	vmsplice			sys_vmsplice
 271	common	available271			sys_ni_syscall
-272	common	pselect6			sys_pselect6
-273	common	ppoll				sys_ppoll
+272	common	pselect6			sys_pselect6_time32
+273	common	ppoll				sys_ppoll_time32
 274	common	epoll_pwait			sys_epoll_pwait
 275	common	epoll_create1			sys_epoll_create1
 276	common	inotify_init			sys_inotify_init
@@ -316,7 +316,7 @@
 293	common	linkat				sys_linkat
 294	common	symlinkat			sys_symlinkat
 295	common	readlinkat			sys_readlinkat
-296	common	utimensat			sys_utimensat
+296	common	utimensat			sys_utimensat_time32
 297	common	fchownat			sys_fchownat
 298	common	futimesat			sys_futimesat
 299	common	fstatat64			sys_fstatat64
@@ -327,14 +327,14 @@
 304	common	signalfd			sys_signalfd
 # 305 was timerfd
 306	common	eventfd				sys_eventfd
-307	common	recvmmsg			sys_recvmmsg
+307	common	recvmmsg			sys_recvmmsg_time32
 308	common	setns				sys_setns
 309	common	signalfd4			sys_signalfd4
 310	common	dup3				sys_dup3
 311	common	pipe2				sys_pipe2
 312	common	timerfd_create			sys_timerfd_create
-313	common	timerfd_settime			sys_timerfd_settime
-314	common	timerfd_gettime			sys_timerfd_gettime
+313	common	timerfd_settime			sys_timerfd_settime32
+314	common	timerfd_gettime			sys_timerfd_gettime32
 315	common	available315			sys_ni_syscall
 316	common	eventfd2			sys_eventfd2
 317	common	preadv				sys_preadv
@@ -349,7 +349,7 @@
 326	common	sync_file_range2		sys_sync_file_range2
 327	common	perf_event_open			sys_perf_event_open
 328	common	rt_tgsigqueueinfo		sys_rt_tgsigqueueinfo
-329	common	clock_adjtime			sys_clock_adjtime
+329	common	clock_adjtime			sys_clock_adjtime32
 330	common	prlimit64			sys_prlimit64
 331	common	kcmp				sys_kcmp
 332	common	finit_module			sys_finit_module
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 153b55b94234..ab1831769030 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -39,7 +39,7 @@ __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
 #define __NR_io_getevents 4
-__SC_COMP(__NR_io_getevents, sys_io_getevents, sys_io_getevents_time32)
+__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
 
 /* fs/xattr.c */
 #define __NR_setxattr 5
@@ -223,9 +223,9 @@ __SYSCALL(__NR3264_sendfile, sys_sendfile64)
 
 /* fs/select.c */
 #define __NR_pselect6 72
-__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6_time32)
+__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
 #define __NR_ppoll 73
-__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll_time32)
+__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
 
 /* fs/signalfd.c */
 #define __NR_signalfd4 74
@@ -270,15 +270,15 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
 #define __NR_timerfd_settime 86
-__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
-	  sys_timerfd_settime32)
+__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
+	  sys_timerfd_settime)
 #define __NR_timerfd_gettime 87
-__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
-	  sys_timerfd_gettime32)
+__SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
+	  sys_timerfd_gettime)
 
 /* fs/utimes.c */
 #define __NR_utimensat 88
-__SC_COMP(__NR_utimensat, sys_utimensat, sys_utimensat_time32)
+__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
 
 /* kernel/acct.c */
 #define __NR_acct 89
@@ -310,7 +310,7 @@ __SYSCALL(__NR_unshare, sys_unshare)
 
 /* kernel/futex.c */
 #define __NR_futex 98
-__SC_COMP(__NR_futex, sys_futex, sys_futex_time32)
+__SC_3264(__NR_futex, sys_futex_time32, sys_futex)
 #define __NR_set_robust_list 99
 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 	  compat_sys_set_robust_list)
@@ -320,7 +320,7 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
 
 /* kernel/hrtimer.c */
 #define __NR_nanosleep 101
-__SC_COMP(__NR_nanosleep, sys_nanosleep, sys_nanosleep_time32)
+__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
 
 /* kernel/itimer.c */
 #define __NR_getitimer 102
@@ -342,22 +342,22 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
 #define __NR_timer_create 107
 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
 #define __NR_timer_gettime 108
-__SC_COMP(__NR_timer_gettime, sys_timer_gettime, sys_timer_gettime32)
+__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
 #define __NR_timer_settime 110
-__SC_COMP(__NR_timer_settime, sys_timer_settime, sys_timer_settime32)
+__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
 #define __NR_clock_settime 112
-__SC_COMP(__NR_clock_settime, sys_clock_settime, sys_clock_settime32)
+__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
 #define __NR_clock_gettime 113
-__SC_COMP(__NR_clock_gettime, sys_clock_gettime, sys_clock_gettime32)
+__SC_3264(__NR_clock_gettime, sys_clock_gettime32, sys_clock_gettime)
 #define __NR_clock_getres 114
-__SC_COMP(__NR_clock_getres, sys_clock_getres, sys_clock_getres_time32)
+__SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres)
 #define __NR_clock_nanosleep 115
-__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
-	  sys_clock_nanosleep_time32)
+__SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
+	  sys_clock_nanosleep)
 
 /* kernel/printk.c */
 #define __NR_syslog 116
@@ -389,8 +389,8 @@ __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
 #define __NR_sched_rr_get_interval 127
-__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
-	  sys_sched_rr_get_interval_time32)
+__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
+	  sys_sched_rr_get_interval)
 
 /* kernel/signal.c */
 #define __NR_restart_syscall 128
@@ -412,8 +412,8 @@ __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
 #define __NR_rt_sigtimedwait 137
-__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
-	  compat_sys_rt_sigtimedwait_time32)
+__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
+	  sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
 #define __NR_rt_sigqueueinfo 138
 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
 	  compat_sys_rt_sigqueueinfo)
@@ -486,7 +486,7 @@ __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
 #define __NR_settimeofday 170
 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 #define __NR_adjtimex 171
-__SC_COMP(__NR_adjtimex, sys_adjtimex, sys_adjtimex_time32)
+__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
 
 /* kernel/timer.c */
 #define __NR_getpid 172
@@ -512,10 +512,10 @@ __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
 #define __NR_mq_timedsend 182
-__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, sys_mq_timedsend_time32)
+__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
 #define __NR_mq_timedreceive 183
-__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
-	  sys_mq_timedreceive_time32)
+__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
+	  sys_mq_timedreceive)
 #define __NR_mq_notify 184
 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
@@ -659,7 +659,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_recvmmsg 243
-__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg_time32)
+__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
 
 /*
  * Architectures may provide up to 16 syscalls of their own
@@ -681,7 +681,7 @@ __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 __SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
 	  compat_sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
-__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, sys_clock_adjtime32)
+__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_setns 268
@@ -735,7 +735,7 @@ __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
 #define __NR_io_pgetevents 292
-__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
-- 
cgit v1.2.3


From c70a772fda11570ebddecbce1543a3fda008db4a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 7 Jan 2019 00:00:34 +0100
Subject: y2038: remove struct definition redirects

We now use 64-bit time_t on all architectures, so the __kernel_timex,
__kernel_timeval and __kernel_timespec redirects can be removed
after having served their purpose.

This makes it all much less confusing, as the __kernel_* types
now always refer to the same layout based on 64-bit time_t across
all 32-bit and 64-bit architectures.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/time64.h     | 8 --------
 include/linux/timex.h      | 7 -------
 include/uapi/linux/time.h  | 4 ----
 include/uapi/linux/timex.h | 2 --
 4 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/time64.h b/include/linux/time64.h
index 05634afba0db..f38d382ffec1 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -7,14 +7,6 @@
 typedef __s64 time64_t;
 typedef __u64 timeu64_t;
 
-/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
- * and 32-bit emulation.
- */
-#ifndef CONFIG_64BIT_TIME
-#define __kernel_timespec timespec
-#define __kernel_itimerspec itimerspec
-#endif
-
 #include <uapi/linux/time.h>
 
 struct timespec64 {
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 4aff9f0d1367..ce0859763670 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -53,13 +53,6 @@
 #ifndef _LINUX_TIMEX_H
 #define _LINUX_TIMEX_H
 
-/* CONFIG_64BIT_TIME enables new 64 bit time_t syscalls in the compat path
- * and 32-bit emulation.
- */
-#ifndef CONFIG_64BIT_TIME
-#define __kernel_timex timex
-#endif
-
 #include <uapi/linux/timex.h>
 
 #define ADJ_ADJTIME		0x8000	/* switch between adjtime/adjtimex modes */
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 6b56a2208be7..b03f8717c312 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -42,19 +42,15 @@ struct itimerval {
 	struct timeval it_value;	/* current value */
 };
 
-#ifndef __kernel_timespec
 struct __kernel_timespec {
 	__kernel_time64_t       tv_sec;                 /* seconds */
 	long long               tv_nsec;                /* nanoseconds */
 };
-#endif
 
-#ifndef __kernel_itimerspec
 struct __kernel_itimerspec {
 	struct __kernel_timespec it_interval;    /* timer period */
 	struct __kernel_timespec it_value;       /* timer expiration */
 };
-#endif
 
 /*
  * legacy timeval structure, only embedded in structures that
diff --git a/include/uapi/linux/timex.h b/include/uapi/linux/timex.h
index a1c6b73016a5..9f517f9010bb 100644
--- a/include/uapi/linux/timex.h
+++ b/include/uapi/linux/timex.h
@@ -97,7 +97,6 @@ struct __kernel_timex_timeval {
 	long long		tv_usec;
 };
 
-#ifndef __kernel_timex
 struct __kernel_timex {
 	unsigned int modes;	/* mode selector */
 	int :32;            /* pad */
@@ -131,7 +130,6 @@ struct __kernel_timex {
 	int  :32; int  :32; int  :32; int  :32;
 	int  :32; int  :32; int  :32;
 };
-#endif
 
 /*
  * Mode codes (timex.mode)
-- 
cgit v1.2.3


From 48166e6ea47d23984f0b481ca199250e1ce0730a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 10 Jan 2019 12:45:11 +0100
Subject: y2038: add 64-bit time_t syscalls to all 32-bit architectures

This adds 21 new system calls on each ABI that has 32-bit time_t
today. All of these have the exact same semantics as their existing
counterparts, and the new ones all have macro names that end in 'time64'
for clarification.

This gets us to the point of being able to safely use a C library
that has 64-bit time_t in user space. There are still a couple of
loose ends to tie up in various areas of the code, but this is the
big one, and should be entirely uncontroversial at this point.

In particular, there are four system calls (getitimer, setitimer,
waitid, and getrusage) that don't have a 64-bit counterpart yet,
but these can all be safely implemented in the C library by wrapping
around the existing system calls because the 32-bit time_t they
pass only counts elapsed time, not time since the epoch. They
will be dealt with later.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/alpha/kernel/syscalls/syscall.tbl      |  2 ++
 arch/arm/tools/syscall.tbl                  | 21 ++++++++++++++
 arch/arm64/include/asm/unistd.h             |  2 +-
 arch/arm64/include/asm/unistd32.h           | 41 +++++++++++++++++++++++++++
 arch/ia64/kernel/syscalls/syscall.tbl       |  1 +
 arch/m68k/kernel/syscalls/syscall.tbl       | 20 +++++++++++++
 arch/microblaze/kernel/syscalls/syscall.tbl | 21 ++++++++++++++
 arch/mips/kernel/syscalls/syscall_n32.tbl   | 21 ++++++++++++++
 arch/mips/kernel/syscalls/syscall_n64.tbl   |  1 +
 arch/mips/kernel/syscalls/syscall_o32.tbl   | 20 +++++++++++++
 arch/parisc/kernel/syscalls/syscall.tbl     | 21 ++++++++++++++
 arch/powerpc/kernel/syscalls/syscall.tbl    | 20 +++++++++++++
 arch/s390/kernel/syscalls/syscall.tbl       | 20 +++++++++++++
 arch/sh/kernel/syscalls/syscall.tbl         | 20 +++++++++++++
 arch/sparc/kernel/syscalls/syscall.tbl      | 20 +++++++++++++
 arch/x86/entry/syscalls/syscall_32.tbl      | 20 +++++++++++++
 arch/xtensa/kernel/syscalls/syscall.tbl     | 21 ++++++++++++++
 include/uapi/asm-generic/unistd.h           | 44 ++++++++++++++++++++++++++++-
 scripts/checksyscalls.sh                    | 40 ++++++++++++++++++++++++++
 19 files changed, 374 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 340b88dd397e..63ed39cbd3bd 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -461,3 +461,5 @@
 530	common	getegid				sys_getegid
 531	common	geteuid				sys_geteuid
 532	common	getppid				sys_getppid
+# all other architectures have common numbers for new syscall, alpha
+# is the exception.
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index a96d9b5ee04e..9016f4081bb9 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -416,3 +416,24 @@
 399	common	io_pgetevents		sys_io_pgetevents_time32
 400	common	migrate_pages		sys_migrate_pages
 401	common	kexec_file_load		sys_kexec_file_load
+# 402 is unused
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2c30e6f145ff..d1dd93436e1e 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -44,7 +44,7 @@
 #define __ARM_NR_compat_set_tls		(__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END		(__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls		402
+#define __NR_compat_syscalls		424
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 1ded82857161..5590f2623690 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -825,6 +825,47 @@ __SYSCALL(__NR_io_pgetevents, compat_sys_io_pgetevents)
 __SYSCALL(__NR_migrate_pages, compat_sys_migrate_pages)
 #define __NR_kexec_file_load 401
 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load)
+/* 402 is unused */
+#define __NR_clock_gettime64 403
+__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
+#define __NR_clock_settime64 404
+__SYSCALL(__NR_clock_settime64, sys_clock_settime)
+#define __NR_clock_adjtime64 405
+__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime)
+#define __NR_clock_getres_time64 406
+__SYSCALL(__NR_clock_getres_time64, sys_clock_getres)
+#define __NR_clock_nanosleep_time64 407
+__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep)
+#define __NR_timer_gettime64 408
+__SYSCALL(__NR_timer_gettime64, sys_timer_gettime)
+#define __NR_timer_settime64 409
+__SYSCALL(__NR_timer_settime64, sys_timer_settime)
+#define __NR_timerfd_gettime64 410
+__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime)
+#define __NR_timerfd_settime64 411
+__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime)
+#define __NR_utimensat_time64 412
+__SYSCALL(__NR_utimensat_time64, sys_utimensat)
+#define __NR_pselect6_time64 413
+__SYSCALL(__NR_pselect6_time64, compat_sys_pselect6_time64)
+#define __NR_ppoll_time64 414
+__SYSCALL(__NR_ppoll_time64, compat_sys_ppoll_time64)
+#define __NR_io_pgetevents_time64 416
+__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+#define __NR_recvmmsg_time64 417
+__SYSCALL(__NR_recvmmsg_time64, compat_sys_recvmmsg_time64)
+#define __NR_mq_timedsend_time64 418
+__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend)
+#define __NR_mq_timedreceive_time64 419
+__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive)
+#define __NR_semtimedop_time64 420
+__SYSCALL(__NR_semtimedop_time64, sys_semtimedop)
+#define __NR_rt_sigtimedwait_time64 421
+__SYSCALL(__NR_rt_sigtimedwait_time64, compat_sys_rt_sigtimedwait_time64)
+#define __NR_futex_time64 422
+__SYSCALL(__NR_futex_time64, sys_futex)
+#define __NR_sched_rr_get_interval_time64 423
+__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
 
 /*
  * Please add new compat syscalls above this comment and update
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index 52319006de0d..ab9cda5f6136 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -343,3 +343,4 @@
 331	common	pkey_alloc			sys_pkey_alloc
 332	common	pkey_free			sys_pkey_free
 333	common	rseq				sys_rseq
+# 334 through 423 are reserved to sync up with other architectures
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 253bd2a069bd..125c14178979 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -403,3 +403,23 @@
 400	common	msgsnd				sys_msgsnd
 401	common	msgrcv				sys_msgrcv
 402	common	msgctl				sys_msgctl
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 44a87649d681..8ee3a8c18498 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -408,3 +408,24 @@
 398	common	statx				sys_statx
 399	common	io_pgetevents			sys_io_pgetevents_time32
 400	common	rseq				sys_rseq
+# 401 and 402 are unused
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 6d1e019817c8..15f4117900ee 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -341,3 +341,24 @@
 330	n32	statx				sys_statx
 331	n32	rseq				sys_rseq
 332	n32	io_pgetevents			compat_sys_io_pgetevents
+# 333 through 402 are unassigned to sync up with generic numbers
+403	n32	clock_gettime64			sys_clock_gettime
+404	n32	clock_settime64			sys_clock_settime
+405	n32	clock_adjtime64			sys_clock_adjtime
+406	n32	clock_getres_time64		sys_clock_getres
+407	n32	clock_nanosleep_time64		sys_clock_nanosleep
+408	n32	timer_gettime64			sys_timer_gettime
+409	n32	timer_settime64			sys_timer_settime
+410	n32	timerfd_gettime64		sys_timerfd_gettime
+411	n32	timerfd_settime64		sys_timerfd_settime
+412	n32	utimensat_time64		sys_utimensat
+413	n32	pselect6_time64			compat_sys_pselect6_time64
+414	n32	ppoll_time64			compat_sys_ppoll_time64
+416	n32	io_pgetevents_time64		sys_io_pgetevents
+417	n32	recvmmsg_time64			compat_sys_recvmmsg_time64
+418	n32	mq_timedsend_time64		sys_mq_timedsend
+419	n32	mq_timedreceive_time64		sys_mq_timedreceive
+420	n32	semtimedop_time64		sys_semtimedop
+421	n32	rt_sigtimedwait_time64		compat_sys_rt_sigtimedwait_time64
+422	n32	futex_time64			sys_futex
+423	n32	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index af0da757a7b2..c85502e67b44 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -337,3 +337,4 @@
 326	n64	statx				sys_statx
 327	n64	rseq				sys_rseq
 328	n64	io_pgetevents			sys_io_pgetevents
+# 329 through 423 are reserved to sync up with other architectures
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 54312c5b5343..2e063d0f837e 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -391,3 +391,23 @@
 400	o32	msgsnd				sys_msgsnd			compat_sys_msgsnd
 401	o32	msgrcv				sys_msgrcv			compat_sys_msgrcv
 402	o32	msgctl				sys_msgctl			compat_sys_msgctl
+403	o32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	o32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	o32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	o32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	o32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	o32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	o32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	o32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	o32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	o32	utimensat_time64		sys_utimensat			sys_utimensat
+413	o32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	o32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	o32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
+417	o32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	o32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	o32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	o32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	o32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	o32	futex_time64			sys_futex			sys_futex
+423	o32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 7eff3dc3d613..b26766c6647d 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -399,3 +399,24 @@
 352	common	pkey_alloc		sys_pkey_alloc
 353	common	pkey_free		sys_pkey_free
 354	common	rseq			sys_rseq
+# 355 through 402 are unassigned to sync up with generic numbers
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 500edbf9e8a6..b18abb0c3dae 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -485,3 +485,23 @@
 400	common	msgsnd				sys_msgsnd			compat_sys_msgsnd
 401	common	msgrcv				sys_msgrcv			compat_sys_msgrcv
 402	common	msgctl				sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 285201cf1f83..02579f95f391 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -406,3 +406,23 @@
 400  common	msgsnd			sys_msgsnd			compat_sys_msgsnd
 401  common	msgrcv			sys_msgrcv			compat_sys_msgrcv
 402  common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64		-				sys_clock_gettime
+404	32	clock_settime64		-				sys_clock_settime
+405	32	clock_adjtime64		-				sys_clock_adjtime
+406	32	clock_getres_time64	-				sys_clock_getres
+407	32	clock_nanosleep_time64	-				sys_clock_nanosleep
+408	32	timer_gettime64		-				sys_timer_gettime
+409	32	timer_settime64		-				sys_timer_settime
+410	32	timerfd_gettime64	-				sys_timerfd_gettime
+411	32	timerfd_settime64	-				sys_timerfd_settime
+412	32	utimensat_time64	-				sys_utimensat
+413	32	pselect6_time64		-				compat_sys_pselect6_time64
+414	32	ppoll_time64		-				compat_sys_ppoll_time64
+416	32	io_pgetevents_time64	-				sys_io_pgetevents
+417	32	recvmmsg_time64		-				compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64	-				sys_mq_timedsend
+419	32	mq_timedreceive_time64	-				sys_mq_timedreceive
+420	32	semtimedop_time64	-				sys_semtimedop
+421	32	rt_sigtimedwait_time64	-				compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64		-				sys_futex
+423	32	sched_rr_get_interval_time64	-			sys_sched_rr_get_interval
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 06d768c3cc4c..bfda678576e4 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -406,3 +406,23 @@
 400	common	msgsnd				sys_msgsnd
 401	common	msgrcv				sys_msgrcv
 402	common	msgctl				sys_msgctl
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 99c40abd8878..b9a5a04b2d2c 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -449,3 +449,23 @@
 400	common	msgsnd			sys_msgsnd			compat_sys_msgsnd
 401	common	msgrcv			sys_msgrcv			compat_sys_msgrcv
 402	common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64			sys_clock_gettime		sys_clock_gettime
+404	32	clock_settime64			sys_clock_settime		sys_clock_settime
+405	32	clock_adjtime64			sys_clock_adjtime		sys_clock_adjtime
+406	32	clock_getres_time64		sys_clock_getres		sys_clock_getres
+407	32	clock_nanosleep_time64		sys_clock_nanosleep		sys_clock_nanosleep
+408	32	timer_gettime64			sys_timer_gettime		sys_timer_gettime
+409	32	timer_settime64			sys_timer_settime		sys_timer_settime
+410	32	timerfd_gettime64		sys_timerfd_gettime		sys_timerfd_gettime
+411	32	timerfd_settime64		sys_timerfd_settime		sys_timerfd_settime
+412	32	utimensat_time64		sys_utimensat			sys_utimensat
+413	32	pselect6_time64			sys_pselect6			compat_sys_pselect6_time64
+414	32	ppoll_time64			sys_ppoll			compat_sys_ppoll_time64
+416	32	io_pgetevents_time64		sys_io_pgetevents		sys_io_pgetevents
+417	32	recvmmsg_time64			sys_recvmmsg			compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64		sys_mq_timedsend		sys_mq_timedsend
+419	32	mq_timedreceive_time64		sys_mq_timedreceive		sys_mq_timedreceive
+420	32	semtimedop_time64		sys_semtimedop			sys_semtimedop
+421	32	rt_sigtimedwait_time64		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64			sys_futex			sys_futex
+423	32	sched_rr_get_interval_time64	sys_sched_rr_get_interval	sys_sched_rr_get_interval
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 8c47c1191a53..955ab6a3b61f 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -409,3 +409,23 @@
 400	i386	msgsnd			sys_msgsnd    			__ia32_compat_sys_msgsnd
 401	i386	msgrcv			sys_msgrcv    			__ia32_compat_sys_msgrcv
 402	i386	msgctl			sys_msgctl    			__ia32_compat_sys_msgctl
+403	i386	clock_gettime64		sys_clock_gettime		__ia32_sys_clock_gettime
+404	i386	clock_settime64		sys_clock_settime		__ia32_sys_clock_settime
+405	i386	clock_adjtime64		sys_clock_adjtime		__ia32_sys_clock_adjtime
+406	i386	clock_getres_time64	sys_clock_getres		__ia32_sys_clock_getres
+407	i386	clock_nanosleep_time64	sys_clock_nanosleep		__ia32_sys_clock_nanosleep
+408	i386	timer_gettime64		sys_timer_gettime		__ia32_sys_timer_gettime
+409	i386	timer_settime64		sys_timer_settime		__ia32_sys_timer_settime
+410	i386	timerfd_gettime64	sys_timerfd_gettime		__ia32_sys_timerfd_gettime
+411	i386	timerfd_settime64	sys_timerfd_settime		__ia32_sys_timerfd_settime
+412	i386	utimensat_time64	sys_utimensat			__ia32_sys_utimensat
+413	i386	pselect6_time64		sys_pselect6			__ia32_compat_sys_pselect6_time64
+414	i386	ppoll_time64		sys_ppoll			__ia32_compat_sys_ppoll_time64
+416	i386	io_pgetevents_time64	sys_io_pgetevents		__ia32_sys_io_pgetevents
+417	i386	recvmmsg_time64		sys_recvmmsg			__ia32_compat_sys_recvmmsg_time64
+418	i386	mq_timedsend_time64	sys_mq_timedsend		__ia32_sys_mq_timedsend
+419	i386	mq_timedreceive_time64	sys_mq_timedreceive		__ia32_sys_mq_timedreceive
+420	i386	semtimedop_time64	sys_semtimedop			__ia32_sys_semtimedop
+421	i386	rt_sigtimedwait_time64	sys_rt_sigtimedwait		__ia32_compat_sys_rt_sigtimedwait_time64
+422	i386	futex_time64		sys_futex			__ia32_sys_futex
+423	i386	sched_rr_get_interval_time64	sys_sched_rr_get_interval	__ia32_sys_sched_rr_get_interval
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 482673389e21..6af49929de85 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -373,3 +373,24 @@
 350	common	pkey_free			sys_pkey_free
 351	common	statx				sys_statx
 352	common	rseq				sys_rseq
+# 353 through 402 are unassigned to sync up with generic numbers
+403	common	clock_gettime64			sys_clock_gettime
+404	common	clock_settime64			sys_clock_settime
+405	common	clock_adjtime64			sys_clock_adjtime
+406	common	clock_getres_time64		sys_clock_getres
+407	common	clock_nanosleep_time64		sys_clock_nanosleep
+408	common	timer_gettime64			sys_timer_gettime
+409	common	timer_settime64			sys_timer_settime
+410	common	timerfd_gettime64		sys_timerfd_gettime
+411	common	timerfd_settime64		sys_timerfd_settime
+412	common	utimensat_time64		sys_utimensat
+413	common	pselect6_time64			sys_pselect6
+414	common	ppoll_time64			sys_ppoll
+416	common	io_pgetevents_time64		sys_io_pgetevents
+417	common	recvmmsg_time64			sys_recvmmsg
+418	common	mq_timedsend_time64		sys_mq_timedsend
+419	common	mq_timedreceive_time64		sys_mq_timedreceive
+420	common	semtimedop_time64		sys_semtimedop
+421	common	rt_sigtimedwait_time64		sys_rt_sigtimedwait
+422	common	futex_time64			sys_futex
+423	common	sched_rr_get_interval_time64	sys_sched_rr_get_interval
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index ab1831769030..acf9a07ab2ff 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -741,9 +741,51 @@ __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
 /* 295 through 402 are unassigned to sync up with generic numbers, don't use */
+#if __BITS_PER_LONG == 32
+#define __NR_clock_gettime64 403
+__SYSCALL(__NR_clock_gettime64, sys_clock_gettime)
+#define __NR_clock_settime64 404
+__SYSCALL(__NR_clock_settime64, sys_clock_settime)
+#define __NR_clock_adjtime64 405
+__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime)
+#define __NR_clock_getres_time64 406
+__SYSCALL(__NR_clock_getres_time64, sys_clock_getres)
+#define __NR_clock_nanosleep_time64 407
+__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep)
+#define __NR_timer_gettime64 408
+__SYSCALL(__NR_timer_gettime64, sys_timer_gettime)
+#define __NR_timer_settime64 409
+__SYSCALL(__NR_timer_settime64, sys_timer_settime)
+#define __NR_timerfd_gettime64 410
+__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime)
+#define __NR_timerfd_settime64 411
+__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime)
+#define __NR_utimensat_time64 412
+__SYSCALL(__NR_utimensat_time64, sys_utimensat)
+#define __NR_pselect6_time64 413
+__SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
+#define __NR_ppoll_time64 414
+__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
+#define __NR_io_pgetevents_time64 416
+__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+#define __NR_recvmmsg_time64 417
+__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
+#define __NR_mq_timedsend_time64 418
+__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend)
+#define __NR_mq_timedreceive_time64 419
+__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive)
+#define __NR_semtimedop_time64 420
+__SYSCALL(__NR_semtimedop_time64, sys_semtimedop)
+#define __NR_rt_sigtimedwait_time64 421
+__SC_COMP(__NR_rt_sigtimedwait_time64, sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time64)
+#define __NR_futex_time64 422
+__SYSCALL(__NR_futex_time64, sys_futex)
+#define __NR_sched_rr_get_interval_time64 423
+__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval)
+#endif
 
 #undef __NR_syscalls
-#define __NR_syscalls 295
+#define __NR_syscalls 424
 
 /*
  * 32 bit systems traditionally used different
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index cf931003395f..cc70a64fa81f 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -84,6 +84,26 @@ cat << EOF
 #define __IGNORE_statfs64
 #define __IGNORE_llseek
 #define __IGNORE_mmap2
+#define __IGNORE_clock_gettime64
+#define __IGNORE_clock_settime64
+#define __IGNORE_clock_adjtime64
+#define __IGNORE_clock_getres_time64
+#define __IGNORE_clock_nanosleep_time64
+#define __IGNORE_timer_gettime64
+#define __IGNORE_timer_settime64
+#define __IGNORE_timerfd_gettime64
+#define __IGNORE_timerfd_settime64
+#define __IGNORE_utimensat_time64
+#define __IGNORE_pselect6_time64
+#define __IGNORE_ppoll_time64
+#define __IGNORE_io_pgetevents_time64
+#define __IGNORE_recvmmsg_time64
+#define __IGNORE_mq_timedsend_time64
+#define __IGNORE_mq_timedreceive_time64
+#define __IGNORE_semtimedop_time64
+#define __IGNORE_rt_sigtimedwait_time64
+#define __IGNORE_futex_time64
+#define __IGNORE_sched_rr_get_interval_time64
 #else
 #define __IGNORE_sendfile
 #define __IGNORE_ftruncate
@@ -98,6 +118,26 @@ cat << EOF
 #define __IGNORE_statfs
 #define __IGNORE_lseek
 #define __IGNORE_mmap
+#define __IGNORE_clock_gettime
+#define __IGNORE_clock_settime
+#define __IGNORE_clock_adjtime
+#define __IGNORE_clock_getres
+#define __IGNORE_clock_nanosleep
+#define __IGNORE_timer_gettime
+#define __IGNORE_timer_settime
+#define __IGNORE_timerfd_gettime
+#define __IGNORE_timerfd_settime
+#define __IGNORE_utimensat
+#define __IGNORE_pselect6
+#define __IGNORE_ppoll
+#define __IGNORE_io_pgetevents
+#define __IGNORE_recvmmsg
+#define __IGNORE_mq_timedsend
+#define __IGNORE_mq_timedreceiv
+#define __IGNORE_semtimedop
+#define __IGNORE_rt_sigtimedwait
+#define __IGNORE_futex
+#define __IGNORE_sched_rr_get_interval
 #endif
 
 /* i386-specific or historical system calls */
-- 
cgit v1.2.3


From a4f342b9607d8c2034d3135cbbb11b4028be3678 Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Mon, 4 Feb 2019 11:09:48 +0000
Subject: PM / OPP: Introduce a power estimation helper

The Energy Model (EM) framework provides an API to let drivers register
the active power of CPUs. The drivers are expected to provide a callback
method which estimates the power consumed by a CPU at each available
performance levels. How exactly this should be implemented, however,
depends on the platform.

On some systems, PM_OPP knows the voltage and frequency at which CPUs
can run. When coupled with the CPU 'capacitance' (as provided by the
'dynamic-power-coefficient' devicetree binding), it is possible to
estimate the dynamic power consumption of a CPU as P = C * V^2 * f, with
C its capacitance and V and f respectively the voltage and frequency of
the OPP. The Intelligent Power Allocator (IPA) thermal governor already
implements that estimation method, in the thermal framework.

However, this power estimation method can be applied to any platform
where all the parameters are known (C, V and f), and not only those
suffering thermal issues. As such, the code implementing this feature
can be re-used to also populate the EM framework now used by EAS.

As a first step, introduce in PM_OPP a helper function which CPUFreq
drivers can use to register into the EM framework. This duplicates the
power estimation done in IPA until it can be migrated to using the EM
framework. This will be done later, once the EM framework has support
for at least all platforms currently supported by IPA.

Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Tested-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c       | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h |  6 +++
 2 files changed, 105 insertions(+)

(limited to 'include')

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 06f0f632ec47..cd58959e5158 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -20,6 +20,7 @@
 #include <linux/pm_domain.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/energy_model.h>
 
 #include "opp.h"
 
@@ -1047,3 +1048,101 @@ struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
 	return of_node_get(opp->np);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node);
+
+/*
+ * Callback function provided to the Energy Model framework upon registration.
+ * This computes the power estimated by @CPU at @kHz if it is the frequency
+ * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise
+ * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled
+ * frequency and @mW to the associated power. The power is estimated as
+ * P = C * V^2 * f with C being the CPU's capacitance and V and f respectively
+ * the voltage and frequency of the OPP.
+ *
+ * Returns -ENODEV if the CPU device cannot be found, -EINVAL if the power
+ * calculation failed because of missing parameters, 0 otherwise.
+ */
+static int __maybe_unused _get_cpu_power(unsigned long *mW, unsigned long *kHz,
+					 int cpu)
+{
+	struct device *cpu_dev;
+	struct dev_pm_opp *opp;
+	struct device_node *np;
+	unsigned long mV, Hz;
+	u32 cap;
+	u64 tmp;
+	int ret;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev)
+		return -ENODEV;
+
+	np = of_node_get(cpu_dev->of_node);
+	if (!np)
+		return -EINVAL;
+
+	ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
+	of_node_put(np);
+	if (ret)
+		return -EINVAL;
+
+	Hz = *kHz * 1000;
+	opp = dev_pm_opp_find_freq_ceil(cpu_dev, &Hz);
+	if (IS_ERR(opp))
+		return -EINVAL;
+
+	mV = dev_pm_opp_get_voltage(opp) / 1000;
+	dev_pm_opp_put(opp);
+	if (!mV)
+		return -EINVAL;
+
+	tmp = (u64)cap * mV * mV * (Hz / 1000000);
+	do_div(tmp, 1000000000);
+
+	*mW = (unsigned long)tmp;
+	*kHz = Hz / 1000;
+
+	return 0;
+}
+
+/**
+ * dev_pm_opp_of_register_em() - Attempt to register an Energy Model
+ * @cpus	: CPUs for which an Energy Model has to be registered
+ *
+ * This checks whether the "dynamic-power-coefficient" devicetree property has
+ * been specified, and tries to register an Energy Model with it if it has.
+ */
+void dev_pm_opp_of_register_em(struct cpumask *cpus)
+{
+	struct em_data_callback em_cb = EM_DATA_CB(_get_cpu_power);
+	int ret, nr_opp, cpu = cpumask_first(cpus);
+	struct device *cpu_dev;
+	struct device_node *np;
+	u32 cap;
+
+	cpu_dev = get_cpu_device(cpu);
+	if (!cpu_dev)
+		return;
+
+	nr_opp = dev_pm_opp_get_opp_count(cpu_dev);
+	if (nr_opp <= 0)
+		return;
+
+	np = of_node_get(cpu_dev->of_node);
+	if (!np)
+		return;
+
+	/*
+	 * Register an EM only if the 'dynamic-power-coefficient' property is
+	 * set in devicetree. It is assumed the voltage values are known if that
+	 * property is set since it is useless otherwise. If voltages are not
+	 * known, just let the EM registration fail with an error to alert the
+	 * user about the inconsistent configuration.
+	 */
+	ret = of_property_read_u32(np, "dynamic-power-coefficient", &cap);
+	of_node_put(np);
+	if (ret || !cap)
+		return;
+
+	em_register_perf_domain(cpus, nr_opp, &em_cb);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_register_em);
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0a2a88e5a383..1470c57933cf 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -322,6 +322,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma
 struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev);
 struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp);
 int of_get_required_opp_performance_state(struct device_node *np, int index);
+void dev_pm_opp_of_register_em(struct cpumask *cpus);
 #else
 static inline int dev_pm_opp_of_add_table(struct device *dev)
 {
@@ -360,6 +361,11 @@ static inline struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp)
 {
 	return NULL;
 }
+
+static inline void dev_pm_opp_of_register_em(struct cpumask *cpus)
+{
+}
+
 static inline int of_get_required_opp_performance_state(struct device_node *np, int index)
 {
 	return -ENOTSUPP;
-- 
cgit v1.2.3


From 752b5da2359fee342d5264e2c10352daf5b9a199 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 21 Jan 2019 16:45:46 +0100
Subject: phy: dphy: Remove unused header

The videomode.h header inclusion is an artifact from the patches
development, remove it.

Suggested-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 include/linux/phy/phy-mipi-dphy.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h
index c08aacc0ac35..9cf97cd1d303 100644
--- a/include/linux/phy/phy-mipi-dphy.h
+++ b/include/linux/phy/phy-mipi-dphy.h
@@ -6,8 +6,6 @@
 #ifndef __PHY_MIPI_DPHY_H_
 #define __PHY_MIPI_DPHY_H_
 
-#include <video/videomode.h>
-
 /**
  * struct phy_configure_opts_mipi_dphy - MIPI D-PHY configuration set
  *
-- 
cgit v1.2.3


From 2204b2c45f7802f3fd96f7b260fe9d5f67329a8c Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 21 Jan 2019 16:45:47 +0100
Subject: phy: dphy: Change units of wakeup and init parameters

The Init and wakeup D-PHY parameters are in the micro/milliseconds range,
putting the values real close to the types limits if they were in
picoseconds.

Move them to microseconds which should be better fit.

Suggested-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-core-mipi-dphy.c  | 8 ++++----
 include/linux/phy/phy-mipi-dphy.h | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c
index 465fa1b91a5f..14e0551cd319 100644
--- a/drivers/phy/phy-core-mipi-dphy.c
+++ b/drivers/phy/phy-core-mipi-dphy.c
@@ -65,12 +65,12 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock,
 	 */
 	cfg->hs_trail = max(4 * 8 * ui, 60000 + 4 * 4 * ui);
 
-	cfg->init = 100000000;
+	cfg->init = 100;
 	cfg->lpx = 60000;
 	cfg->ta_get = 5 * cfg->lpx;
 	cfg->ta_go = 4 * cfg->lpx;
 	cfg->ta_sure = 2 * cfg->lpx;
-	cfg->wakeup = 1000000000;
+	cfg->wakeup = 1000;
 
 	cfg->hs_clk_rate = hs_clk_rate;
 	cfg->lanes = lanes;
@@ -143,7 +143,7 @@ int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg)
 	if (cfg->hs_trail < max(8 * ui, 60000 + 4 * ui))
 		return -EINVAL;
 
-	if (cfg->init < 100000000)
+	if (cfg->init < 100)
 		return -EINVAL;
 
 	if (cfg->lpx < 50000)
@@ -158,7 +158,7 @@ int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg)
 	if (cfg->ta_sure < cfg->lpx || cfg->ta_sure > (2 * cfg->lpx))
 		return -EINVAL;
 
-	if (cfg->wakeup < 1000000000)
+	if (cfg->wakeup < 1000)
 		return -EINVAL;
 
 	return 0;
diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h
index 9cf97cd1d303..627d28080d3a 100644
--- a/include/linux/phy/phy-mipi-dphy.h
+++ b/include/linux/phy/phy-mipi-dphy.h
@@ -190,10 +190,10 @@ struct phy_configure_opts_mipi_dphy {
 	/**
 	 * @init:
 	 *
-	 * Time, in picoseconds for the initialization period to
+	 * Time, in microseconds for the initialization period to
 	 * complete.
 	 *
-	 * Minimum value: 100000000 ps
+	 * Minimum value: 100 us
 	 */
 	unsigned int		init;
 
@@ -244,11 +244,11 @@ struct phy_configure_opts_mipi_dphy {
 	/**
 	 * @wakeup:
 	 *
-	 * Time, in picoseconds, that a transmitter drives a Mark-1
+	 * Time, in microseconds, that a transmitter drives a Mark-1
 	 * state prior to a Stop state in order to initiate an exit
 	 * from ULPS.
 	 *
-	 * Minimum value: 1000000000 ps
+	 * Minimum value: 1000 us
 	 */
 	unsigned int		wakeup;
 
-- 
cgit v1.2.3


From 1baafbe482e54c020751796d7bfdee669acca58b Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@bootlin.com>
Date: Mon, 21 Jan 2019 16:45:48 +0100
Subject: phy: dphy: Clarify lanes parameter documentation

The lanes parameter is not solely about the number of lanes, but it also
carries the fact that those are the first lanes in use during the
transmission.

It was implicit so far, so make sure it's explicit now.

Suggested-by: Sakari Ailus <sakari.ailus@iki.fi>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 include/linux/phy/phy-mipi-dphy.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy/phy-mipi-dphy.h b/include/linux/phy/phy-mipi-dphy.h
index 627d28080d3a..a877ffee845d 100644
--- a/include/linux/phy/phy-mipi-dphy.h
+++ b/include/linux/phy/phy-mipi-dphy.h
@@ -269,7 +269,8 @@ struct phy_configure_opts_mipi_dphy {
 	/**
 	 * @lanes:
 	 *
-	 * Number of active data lanes used for the transmissions.
+	 * Number of active, consecutive, data lanes, starting from
+	 * lane 0, used for the transmissions.
 	 */
 	unsigned char		lanes;
 };
-- 
cgit v1.2.3


From 626feb863274da93e44d644a9fd4a59b46851794 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Wed, 6 Feb 2019 21:53:48 -0800
Subject: Input: ili210x - drop platform data support

There is not a single user of the ili210x platform data in the kernel,
just drop it.

Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/ili210x.c | 12 ++----------
 include/linux/input/ili210x.h       | 11 -----------
 2 files changed, 2 insertions(+), 21 deletions(-)
 delete mode 100644 include/linux/input/ili210x.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c
index 6f76eeedf465..25b0ca6c07d9 100644
--- a/drivers/input/touchscreen/ili210x.c
+++ b/drivers/input/touchscreen/ili210x.c
@@ -6,7 +6,6 @@
 #include <linux/input/mt.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
-#include <linux/input/ili210x.h>
 
 #define MAX_TOUCHES		2
 #define DEFAULT_POLL_PERIOD	20
@@ -184,7 +183,6 @@ static int ili210x_i2c_probe(struct i2c_client *client,
 				       const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
-	const struct ili210x_platform_data *pdata = dev_get_platdata(dev);
 	struct ili210x *priv;
 	struct input_dev *input;
 	struct panel_info panel;
@@ -194,11 +192,6 @@ static int ili210x_i2c_probe(struct i2c_client *client,
 
 	dev_dbg(dev, "Probing for ILI210X I2C Touschreen driver");
 
-	if (!pdata) {
-		dev_err(dev, "No platform data!\n");
-		return -EINVAL;
-	}
-
 	if (client->irq <= 0) {
 		dev_err(dev, "No IRQ!\n");
 		return -EINVAL;
@@ -233,8 +226,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
 
 	priv->client = client;
 	priv->input = input;
-	priv->get_pendown_state = pdata->get_pendown_state;
-	priv->poll_period = pdata->poll_period ? : DEFAULT_POLL_PERIOD;
+	priv->poll_period = DEFAULT_POLL_PERIOD;
 	INIT_DELAYED_WORK(&priv->dwork, ili210x_work);
 
 	/* Setup input device */
@@ -258,7 +250,7 @@ static int ili210x_i2c_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, priv);
 
-	error = request_irq(client->irq, ili210x_irq, pdata->irq_flags,
+	error = request_irq(client->irq, ili210x_irq, 0,
 			    client->name, priv);
 	if (error) {
 		dev_err(dev, "Unable to request touchscreen IRQ, err: %d\n",
diff --git a/include/linux/input/ili210x.h b/include/linux/input/ili210x.h
deleted file mode 100644
index b76e7c1404cd..000000000000
--- a/include/linux/input/ili210x.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ILI210X_H
-#define _ILI210X_H
-
-struct ili210x_platform_data {
-	unsigned long irq_flags;
-	unsigned int poll_period;
-	bool (*get_pendown_state)(void);
-};
-
-#endif
-- 
cgit v1.2.3


From fdc98f070b149b038a66e57dea11f2fabb5fdfbc Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 30 Jan 2019 11:41:23 +0000
Subject: mfd: lochnagar: Add initial binding documentation

Lochnagar is an evaluation and development board for Cirrus
Logic Smart CODEC and Amp devices. It allows the connection of
most Cirrus Logic devices on mini-cards, as well as allowing
connection of various application processor systems to provide a
full evaluation platform. This driver supports the board
controller chip on the Lochnagar board.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/mfd/cirrus,lochnagar.txt   |  68 +++++++++++
 include/dt-bindings/clk/lochnagar.h                |  26 ++++
 include/dt-bindings/pinctrl/lochnagar.h            | 132 +++++++++++++++++++++
 3 files changed, 226 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
 create mode 100644 include/dt-bindings/clk/lochnagar.h
 create mode 100644 include/dt-bindings/pinctrl/lochnagar.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
new file mode 100644
index 000000000000..004b0158cf4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
@@ -0,0 +1,68 @@
+Cirrus Logic Lochnagar Audio Development Board
+
+Lochnagar is an evaluation and development board for Cirrus Logic
+Smart CODEC and Amp devices. It allows the connection of most Cirrus
+Logic devices on mini-cards, as well as allowing connection of
+various application processor systems to provide a full evaluation
+platform.  Audio system topology, clocking and power can all be
+controlled through the Lochnagar, allowing the device under test
+to be used in a variety of possible use cases.
+
+Also see these documents for generic binding information:
+  [1] GPIO : ../gpio/gpio.txt
+
+And these for relevant defines:
+  [2] include/dt-bindings/pinctrl/lochnagar.h
+  [3] include/dt-bindings/clock/lochnagar.h
+
+And these documents for the required sub-node binding details:
+  [4] Clock: ../clock/cirrus,lochnagar.txt
+  [5] Pinctrl: ../pinctrl/cirrus,lochnagar.txt
+  [6] Regulator: ../regulator/cirrus,lochnagar.txt
+
+Required properties:
+
+  - compatible : One of the following strings:
+                 "cirrus,lochnagar1"
+                 "cirrus,lochnagar2"
+
+  - reg : I2C slave address
+
+  - reset-gpios : Reset line to the Lochnagar, see [1].
+
+Required sub-nodes:
+
+  - lochnagar-clk : Binding for the clocking components, see [4].
+
+  - lochnagar-pinctrl : Binding for the pin control components, see [5].
+
+Optional sub-nodes:
+
+  - Bindings for the regulator components, see [6]. Only available on
+    Lochnagar 2.
+
+Optional properties:
+
+  - present-gpios : Host present line, indicating the presence of a
+    host system, see [1]. This can be omitted if the present line is
+    tied in hardware.
+
+Example:
+
+lochnagar: lochnagar@22 {
+	compatible = "cirrus,lochnagar2";
+	reg = <0x22>;
+
+	reset-gpios = <&gpio0 55 0>;
+	present-gpios = <&gpio0 60 0>;
+
+	lochnagar-clk {
+		compatible = "cirrus,lochnagar2-clk";
+		...
+	};
+
+	lochnagar-pinctrl {
+		compatible = "cirrus,lochnagar-pinctrl";
+		...
+	};
+};
diff --git a/include/dt-bindings/clk/lochnagar.h b/include/dt-bindings/clk/lochnagar.h
new file mode 100644
index 000000000000..8fa20551ff17
--- /dev/null
+++ b/include/dt-bindings/clk/lochnagar.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Device Tree defines for Lochnagar clocking
+ *
+ * Copyright (c) 2017-2018 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#ifndef DT_BINDINGS_CLK_LOCHNAGAR_H
+#define DT_BINDINGS_CLK_LOCHNAGAR_H
+
+#define LOCHNAGAR_CDC_MCLK1		0
+#define LOCHNAGAR_CDC_MCLK2		1
+#define LOCHNAGAR_DSP_CLKIN		2
+#define LOCHNAGAR_GF_CLKOUT1		3
+#define LOCHNAGAR_GF_CLKOUT2		4
+#define LOCHNAGAR_PSIA1_MCLK		5
+#define LOCHNAGAR_PSIA2_MCLK		6
+#define LOCHNAGAR_SPDIF_MCLK		7
+#define LOCHNAGAR_ADAT_MCLK		8
+#define LOCHNAGAR_SOUNDCARD_MCLK	9
+#define LOCHNAGAR_SPDIF_CLKOUT		10
+
+#endif
diff --git a/include/dt-bindings/pinctrl/lochnagar.h b/include/dt-bindings/pinctrl/lochnagar.h
new file mode 100644
index 000000000000..644760bf5725
--- /dev/null
+++ b/include/dt-bindings/pinctrl/lochnagar.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Device Tree defines for Lochnagar pinctrl
+ *
+ * Copyright (c) 2018 Cirrus Logic, Inc. and
+ *                    Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#ifndef DT_BINDINGS_PINCTRL_LOCHNAGAR_H
+#define DT_BINDINGS_PINCTRL_LOCHNAGAR_H
+
+#define LOCHNAGAR1_PIN_CDC_RESET		0
+#define LOCHNAGAR1_PIN_DSP_RESET		1
+#define LOCHNAGAR1_PIN_CDC_CIF1MODE		2
+#define LOCHNAGAR1_PIN_NUM_GPIOS		3
+
+#define LOCHNAGAR2_PIN_CDC_RESET		0
+#define LOCHNAGAR2_PIN_DSP_RESET		1
+#define LOCHNAGAR2_PIN_CDC_CIF1MODE		2
+#define LOCHNAGAR2_PIN_CDC_LDOENA		3
+#define LOCHNAGAR2_PIN_SPDIF_HWMODE		4
+#define LOCHNAGAR2_PIN_SPDIF_RESET		5
+#define LOCHNAGAR2_PIN_FPGA_GPIO1		6
+#define LOCHNAGAR2_PIN_FPGA_GPIO2		7
+#define LOCHNAGAR2_PIN_FPGA_GPIO3		8
+#define LOCHNAGAR2_PIN_FPGA_GPIO4		9
+#define LOCHNAGAR2_PIN_FPGA_GPIO5		10
+#define LOCHNAGAR2_PIN_FPGA_GPIO6		11
+#define LOCHNAGAR2_PIN_CDC_GPIO1		12
+#define LOCHNAGAR2_PIN_CDC_GPIO2		13
+#define LOCHNAGAR2_PIN_CDC_GPIO3		14
+#define LOCHNAGAR2_PIN_CDC_GPIO4		15
+#define LOCHNAGAR2_PIN_CDC_GPIO5		16
+#define LOCHNAGAR2_PIN_CDC_GPIO6		17
+#define LOCHNAGAR2_PIN_CDC_GPIO7		18
+#define LOCHNAGAR2_PIN_CDC_GPIO8		19
+#define LOCHNAGAR2_PIN_DSP_GPIO1		20
+#define LOCHNAGAR2_PIN_DSP_GPIO2		21
+#define LOCHNAGAR2_PIN_DSP_GPIO3		22
+#define LOCHNAGAR2_PIN_DSP_GPIO4		23
+#define LOCHNAGAR2_PIN_DSP_GPIO5		24
+#define LOCHNAGAR2_PIN_DSP_GPIO6		25
+#define LOCHNAGAR2_PIN_GF_GPIO2			26
+#define LOCHNAGAR2_PIN_GF_GPIO3			27
+#define LOCHNAGAR2_PIN_GF_GPIO7			28
+#define LOCHNAGAR2_PIN_CDC_AIF1_BCLK		29
+#define LOCHNAGAR2_PIN_CDC_AIF1_RXDAT		30
+#define LOCHNAGAR2_PIN_CDC_AIF1_LRCLK		31
+#define LOCHNAGAR2_PIN_CDC_AIF1_TXDAT		32
+#define LOCHNAGAR2_PIN_CDC_AIF2_BCLK		33
+#define LOCHNAGAR2_PIN_CDC_AIF2_RXDAT		34
+#define LOCHNAGAR2_PIN_CDC_AIF2_LRCLK		35
+#define LOCHNAGAR2_PIN_CDC_AIF2_TXDAT		36
+#define LOCHNAGAR2_PIN_CDC_AIF3_BCLK		37
+#define LOCHNAGAR2_PIN_CDC_AIF3_RXDAT		38
+#define LOCHNAGAR2_PIN_CDC_AIF3_LRCLK		39
+#define LOCHNAGAR2_PIN_CDC_AIF3_TXDAT		40
+#define LOCHNAGAR2_PIN_DSP_AIF1_BCLK		41
+#define LOCHNAGAR2_PIN_DSP_AIF1_RXDAT		42
+#define LOCHNAGAR2_PIN_DSP_AIF1_LRCLK		43
+#define LOCHNAGAR2_PIN_DSP_AIF1_TXDAT		44
+#define LOCHNAGAR2_PIN_DSP_AIF2_BCLK		45
+#define LOCHNAGAR2_PIN_DSP_AIF2_RXDAT		46
+#define LOCHNAGAR2_PIN_DSP_AIF2_LRCLK		47
+#define LOCHNAGAR2_PIN_DSP_AIF2_TXDAT		48
+#define LOCHNAGAR2_PIN_PSIA1_BCLK		49
+#define LOCHNAGAR2_PIN_PSIA1_RXDAT		50
+#define LOCHNAGAR2_PIN_PSIA1_LRCLK		51
+#define LOCHNAGAR2_PIN_PSIA1_TXDAT		52
+#define LOCHNAGAR2_PIN_PSIA2_BCLK		53
+#define LOCHNAGAR2_PIN_PSIA2_RXDAT		54
+#define LOCHNAGAR2_PIN_PSIA2_LRCLK		55
+#define LOCHNAGAR2_PIN_PSIA2_TXDAT		56
+#define LOCHNAGAR2_PIN_GF_AIF3_BCLK		57
+#define LOCHNAGAR2_PIN_GF_AIF3_RXDAT		58
+#define LOCHNAGAR2_PIN_GF_AIF3_LRCLK		59
+#define LOCHNAGAR2_PIN_GF_AIF3_TXDAT		60
+#define LOCHNAGAR2_PIN_GF_AIF4_BCLK		61
+#define LOCHNAGAR2_PIN_GF_AIF4_RXDAT		62
+#define LOCHNAGAR2_PIN_GF_AIF4_LRCLK		63
+#define LOCHNAGAR2_PIN_GF_AIF4_TXDAT		64
+#define LOCHNAGAR2_PIN_GF_AIF1_BCLK		65
+#define LOCHNAGAR2_PIN_GF_AIF1_RXDAT		66
+#define LOCHNAGAR2_PIN_GF_AIF1_LRCLK		67
+#define LOCHNAGAR2_PIN_GF_AIF1_TXDAT		68
+#define LOCHNAGAR2_PIN_GF_AIF2_BCLK		69
+#define LOCHNAGAR2_PIN_GF_AIF2_RXDAT		70
+#define LOCHNAGAR2_PIN_GF_AIF2_LRCLK		71
+#define LOCHNAGAR2_PIN_GF_AIF2_TXDAT		72
+#define LOCHNAGAR2_PIN_DSP_UART1_RX		73
+#define LOCHNAGAR2_PIN_DSP_UART1_TX		74
+#define LOCHNAGAR2_PIN_DSP_UART2_RX		75
+#define LOCHNAGAR2_PIN_DSP_UART2_TX		76
+#define LOCHNAGAR2_PIN_GF_UART2_RX		77
+#define LOCHNAGAR2_PIN_GF_UART2_TX		78
+#define LOCHNAGAR2_PIN_USB_UART_RX		79
+#define LOCHNAGAR2_PIN_CDC_PDMCLK1		80
+#define LOCHNAGAR2_PIN_CDC_PDMDAT1		81
+#define LOCHNAGAR2_PIN_CDC_PDMCLK2		82
+#define LOCHNAGAR2_PIN_CDC_PDMDAT2		83
+#define LOCHNAGAR2_PIN_CDC_DMICCLK1		84
+#define LOCHNAGAR2_PIN_CDC_DMICDAT1		85
+#define LOCHNAGAR2_PIN_CDC_DMICCLK2		86
+#define LOCHNAGAR2_PIN_CDC_DMICDAT2		87
+#define LOCHNAGAR2_PIN_CDC_DMICCLK3		88
+#define LOCHNAGAR2_PIN_CDC_DMICDAT3		89
+#define LOCHNAGAR2_PIN_CDC_DMICCLK4		90
+#define LOCHNAGAR2_PIN_CDC_DMICDAT4		91
+#define LOCHNAGAR2_PIN_DSP_DMICCLK1		92
+#define LOCHNAGAR2_PIN_DSP_DMICDAT1		93
+#define LOCHNAGAR2_PIN_DSP_DMICCLK2		94
+#define LOCHNAGAR2_PIN_DSP_DMICDAT2		95
+#define LOCHNAGAR2_PIN_I2C2_SCL			96
+#define LOCHNAGAR2_PIN_I2C2_SDA			97
+#define LOCHNAGAR2_PIN_I2C3_SCL			98
+#define LOCHNAGAR2_PIN_I2C3_SDA			99
+#define LOCHNAGAR2_PIN_I2C4_SCL			100
+#define LOCHNAGAR2_PIN_I2C4_SDA			101
+#define LOCHNAGAR2_PIN_DSP_STANDBY		102
+#define LOCHNAGAR2_PIN_CDC_MCLK1		103
+#define LOCHNAGAR2_PIN_CDC_MCLK2		104
+#define LOCHNAGAR2_PIN_DSP_CLKIN		105
+#define LOCHNAGAR2_PIN_PSIA1_MCLK		106
+#define LOCHNAGAR2_PIN_PSIA2_MCLK		107
+#define LOCHNAGAR2_PIN_GF_GPIO1			108
+#define LOCHNAGAR2_PIN_GF_GPIO5			109
+#define LOCHNAGAR2_PIN_DSP_GPIO20		110
+#define LOCHNAGAR2_PIN_NUM_GPIOS		111
+
+#endif
-- 
cgit v1.2.3


From 422dcafe477c7240d03c7b150704c45e0b17be57 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Wed, 30 Jan 2019 11:41:26 +0000
Subject: mfd: lochnagar: Add support for the Cirrus Logic Lochnagar

Lochnagar is an evaluation and development board for Cirrus
Logic Smart CODEC and Amp devices. It allows the connection of
most Cirrus Logic devices on mini-cards, as well as allowing
connection of various application processor systems to provide a
full evaluation platform. This driver supports the board
controller chip on the Lochnagar board. Audio system topology,
clocking and power can all be controlled through the Lochnagar
controller chip, allowing the device under test to be used in
a variety of possible use cases.

As the Lochnagar is a fairly complex device this MFD driver
allows the drivers for the various features to be bound
in. Initially clocking, regulator and pinctrl will be added as
these are necessary to configure the system. But in time at least
audio and voltage/current monitoring will also be added.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 MAINTAINERS                         |  17 ++
 drivers/mfd/Kconfig                 |   8 +
 drivers/mfd/Makefile                |   2 +
 drivers/mfd/lochnagar-i2c.c         | 398 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/lochnagar.h       |  55 +++++
 include/linux/mfd/lochnagar1_regs.h | 157 ++++++++++++++
 include/linux/mfd/lochnagar2_regs.h | 291 ++++++++++++++++++++++++++
 7 files changed, 928 insertions(+)
 create mode 100644 drivers/mfd/lochnagar-i2c.c
 create mode 100644 include/linux/mfd/lochnagar.h
 create mode 100644 include/linux/mfd/lochnagar1_regs.h
 create mode 100644 include/linux/mfd/lochnagar2_regs.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 51029a425dbe..3e3f0384362b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3700,6 +3700,23 @@ L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/cirrus/ep93xx_eth.c
 
+CIRRUS LOGIC LOCHNAGAR DRIVER
+M:	Charles Keepax <ckeepax@opensource.cirrus.com>
+M:	Richard Fitzgerald <rf@opensource.cirrus.com>
+L:	patches@opensource.cirrus.com
+S:	Supported
+F:	drivers/clk/clk-lochnagar.c
+F:	drivers/mfd/lochnagar-i2c.c
+F:	drivers/pinctrl/cirrus/pinctrl-lochnagar.c
+F:	drivers/regulator/lochnagar-regulator.c
+F:	include/dt-bindings/clk/lochnagar.h
+F:	include/dt-bindings/pinctrl/lochnagar.h
+F:	include/linux/mfd/lochnagar*
+F:	Documentation/devicetree/bindings/mfd/cirrus,lochnagar.txt
+F:	Documentation/devicetree/bindings/clock/cirrus,lochnagar.txt
+F:	Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.txt
+F:	Documentation/devicetree/bindings/regulator/cirrus,lochnagar.txt
+
 CISCO FCOE HBA DRIVER
 M:	Satish Kharat <satishkh@cisco.com>
 M:	Sesidhar Baddela <sebaddel@cisco.com>
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 6e58221f5c28..f38f8741c68e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1686,6 +1686,14 @@ config MFD_VX855
 	  VIA VX855/VX875 south bridge. You will need to enable the vx855_spi
 	  and/or vx855_gpio drivers for this to do anything useful.
 
+config MFD_LOCHNAGAR
+	bool "Cirrus Logic Lochnagar Audio Development Board"
+	select MFD_CORE
+	select REGMAP_I2C
+	depends on I2C=y && OF
+	help
+	  Support for Cirrus Logic Lochnagar audio development board.
+
 config MFD_ARIZONA
 	select REGMAP
 	select REGMAP_IRQ
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index a62fb0112d9f..a406fd3b8681 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -37,6 +37,8 @@ obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
 obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
 obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
+obj-$(CONFIG_MFD_LOCHNAGAR)	+= lochnagar-i2c.o
+
 obj-$(CONFIG_MFD_ARIZONA)	+= arizona-core.o
 obj-$(CONFIG_MFD_ARIZONA)	+= arizona-irq.o
 obj-$(CONFIG_MFD_ARIZONA_I2C)	+= arizona-i2c.o
diff --git a/drivers/mfd/lochnagar-i2c.c b/drivers/mfd/lochnagar-i2c.c
new file mode 100644
index 000000000000..3a65d9938902
--- /dev/null
+++ b/drivers/mfd/lochnagar-i2c.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lochnagar I2C bus interface
+ *
+ * Copyright (c) 2012-2018 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/lockdep.h>
+#include <linux/mfd/core.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+
+#include <linux/mfd/lochnagar.h>
+#include <linux/mfd/lochnagar1_regs.h>
+#include <linux/mfd/lochnagar2_regs.h>
+
+#define LOCHNAGAR_BOOT_RETRIES		10
+#define LOCHNAGAR_BOOT_DELAY_MS		350
+
+#define LOCHNAGAR_CONFIG_POLL_US	10000
+
+static bool lochnagar1_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LOCHNAGAR_SOFTWARE_RESET:
+	case LOCHNAGAR_FIRMWARE_ID1...LOCHNAGAR_FIRMWARE_ID2:
+	case LOCHNAGAR1_CDC_AIF1_SEL...LOCHNAGAR1_CDC_AIF3_SEL:
+	case LOCHNAGAR1_CDC_MCLK1_SEL...LOCHNAGAR1_CDC_MCLK2_SEL:
+	case LOCHNAGAR1_CDC_AIF_CTRL1...LOCHNAGAR1_CDC_AIF_CTRL2:
+	case LOCHNAGAR1_EXT_AIF_CTRL:
+	case LOCHNAGAR1_DSP_AIF1_SEL...LOCHNAGAR1_DSP_AIF2_SEL:
+	case LOCHNAGAR1_DSP_CLKIN_SEL:
+	case LOCHNAGAR1_DSP_AIF:
+	case LOCHNAGAR1_GF_AIF1...LOCHNAGAR1_GF_AIF2:
+	case LOCHNAGAR1_PSIA_AIF:
+	case LOCHNAGAR1_PSIA1_SEL...LOCHNAGAR1_PSIA2_SEL:
+	case LOCHNAGAR1_SPDIF_AIF_SEL:
+	case LOCHNAGAR1_GF_AIF3_SEL...LOCHNAGAR1_GF_AIF4_SEL:
+	case LOCHNAGAR1_GF_CLKOUT1_SEL:
+	case LOCHNAGAR1_GF_AIF1_SEL...LOCHNAGAR1_GF_AIF2_SEL:
+	case LOCHNAGAR1_GF_GPIO2...LOCHNAGAR1_GF_GPIO7:
+	case LOCHNAGAR1_RST:
+	case LOCHNAGAR1_LED1...LOCHNAGAR1_LED2:
+	case LOCHNAGAR1_I2C_CTRL:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config lochnagar1_i2c_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.reg_format_endian = REGMAP_ENDIAN_BIG,
+	.val_format_endian = REGMAP_ENDIAN_BIG,
+
+	.max_register = 0x50,
+	.readable_reg = lochnagar1_readable_register,
+
+	.use_single_read = true,
+	.use_single_write = true,
+
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static const struct reg_sequence lochnagar1_patch[] = {
+	{ 0x40, 0x0083 },
+	{ 0x47, 0x0018 },
+	{ 0x50, 0x0000 },
+};
+
+static bool lochnagar2_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LOCHNAGAR_SOFTWARE_RESET:
+	case LOCHNAGAR_FIRMWARE_ID1...LOCHNAGAR_FIRMWARE_ID2:
+	case LOCHNAGAR2_CDC_AIF1_CTRL...LOCHNAGAR2_CDC_AIF3_CTRL:
+	case LOCHNAGAR2_DSP_AIF1_CTRL...LOCHNAGAR2_DSP_AIF2_CTRL:
+	case LOCHNAGAR2_PSIA1_CTRL...LOCHNAGAR2_PSIA2_CTRL:
+	case LOCHNAGAR2_GF_AIF3_CTRL...LOCHNAGAR2_GF_AIF4_CTRL:
+	case LOCHNAGAR2_GF_AIF1_CTRL...LOCHNAGAR2_GF_AIF2_CTRL:
+	case LOCHNAGAR2_SPDIF_AIF_CTRL:
+	case LOCHNAGAR2_USB_AIF1_CTRL...LOCHNAGAR2_USB_AIF2_CTRL:
+	case LOCHNAGAR2_ADAT_AIF_CTRL:
+	case LOCHNAGAR2_CDC_MCLK1_CTRL...LOCHNAGAR2_CDC_MCLK2_CTRL:
+	case LOCHNAGAR2_DSP_CLKIN_CTRL:
+	case LOCHNAGAR2_PSIA1_MCLK_CTRL...LOCHNAGAR2_PSIA2_MCLK_CTRL:
+	case LOCHNAGAR2_SPDIF_MCLK_CTRL:
+	case LOCHNAGAR2_GF_CLKOUT1_CTRL...LOCHNAGAR2_GF_CLKOUT2_CTRL:
+	case LOCHNAGAR2_ADAT_MCLK_CTRL:
+	case LOCHNAGAR2_SOUNDCARD_MCLK_CTRL:
+	case LOCHNAGAR2_GPIO_FPGA_GPIO1...LOCHNAGAR2_GPIO_FPGA_GPIO6:
+	case LOCHNAGAR2_GPIO_CDC_GPIO1...LOCHNAGAR2_GPIO_CDC_GPIO8:
+	case LOCHNAGAR2_GPIO_DSP_GPIO1...LOCHNAGAR2_GPIO_DSP_GPIO6:
+	case LOCHNAGAR2_GPIO_GF_GPIO2...LOCHNAGAR2_GPIO_GF_GPIO7:
+	case LOCHNAGAR2_GPIO_CDC_AIF1_BCLK...LOCHNAGAR2_GPIO_CDC_AIF3_TXDAT:
+	case LOCHNAGAR2_GPIO_DSP_AIF1_BCLK...LOCHNAGAR2_GPIO_DSP_AIF2_TXDAT:
+	case LOCHNAGAR2_GPIO_PSIA1_BCLK...LOCHNAGAR2_GPIO_PSIA2_TXDAT:
+	case LOCHNAGAR2_GPIO_GF_AIF3_BCLK...LOCHNAGAR2_GPIO_GF_AIF4_TXDAT:
+	case LOCHNAGAR2_GPIO_GF_AIF1_BCLK...LOCHNAGAR2_GPIO_GF_AIF2_TXDAT:
+	case LOCHNAGAR2_GPIO_DSP_UART1_RX...LOCHNAGAR2_GPIO_DSP_UART2_TX:
+	case LOCHNAGAR2_GPIO_GF_UART2_RX...LOCHNAGAR2_GPIO_GF_UART2_TX:
+	case LOCHNAGAR2_GPIO_USB_UART_RX:
+	case LOCHNAGAR2_GPIO_CDC_PDMCLK1...LOCHNAGAR2_GPIO_CDC_PDMDAT2:
+	case LOCHNAGAR2_GPIO_CDC_DMICCLK1...LOCHNAGAR2_GPIO_CDC_DMICDAT4:
+	case LOCHNAGAR2_GPIO_DSP_DMICCLK1...LOCHNAGAR2_GPIO_DSP_DMICDAT2:
+	case LOCHNAGAR2_GPIO_I2C2_SCL...LOCHNAGAR2_GPIO_I2C4_SDA:
+	case LOCHNAGAR2_GPIO_DSP_STANDBY:
+	case LOCHNAGAR2_GPIO_CDC_MCLK1...LOCHNAGAR2_GPIO_CDC_MCLK2:
+	case LOCHNAGAR2_GPIO_DSP_CLKIN:
+	case LOCHNAGAR2_GPIO_PSIA1_MCLK...LOCHNAGAR2_GPIO_PSIA2_MCLK:
+	case LOCHNAGAR2_GPIO_GF_GPIO1...LOCHNAGAR2_GPIO_GF_GPIO5:
+	case LOCHNAGAR2_GPIO_DSP_GPIO20:
+	case LOCHNAGAR2_GPIO_CHANNEL1...LOCHNAGAR2_GPIO_CHANNEL16:
+	case LOCHNAGAR2_MINICARD_RESETS:
+	case LOCHNAGAR2_ANALOGUE_PATH_CTRL1...LOCHNAGAR2_ANALOGUE_PATH_CTRL2:
+	case LOCHNAGAR2_COMMS_CTRL4:
+	case LOCHNAGAR2_SPDIF_CTRL:
+	case LOCHNAGAR2_IMON_CTRL1...LOCHNAGAR2_IMON_CTRL4:
+	case LOCHNAGAR2_IMON_DATA1...LOCHNAGAR2_IMON_DATA2:
+	case LOCHNAGAR2_POWER_CTRL:
+	case LOCHNAGAR2_MICVDD_CTRL1:
+	case LOCHNAGAR2_MICVDD_CTRL2:
+	case LOCHNAGAR2_VDDCORE_CDC_CTRL1:
+	case LOCHNAGAR2_VDDCORE_CDC_CTRL2:
+	case LOCHNAGAR2_SOUNDCARD_AIF_CTRL:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool lochnagar2_volatile_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LOCHNAGAR2_GPIO_CHANNEL1...LOCHNAGAR2_GPIO_CHANNEL16:
+	case LOCHNAGAR2_ANALOGUE_PATH_CTRL1:
+	case LOCHNAGAR2_IMON_CTRL3...LOCHNAGAR2_IMON_CTRL4:
+	case LOCHNAGAR2_IMON_DATA1...LOCHNAGAR2_IMON_DATA2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config lochnagar2_i2c_regmap = {
+	.reg_bits = 16,
+	.val_bits = 16,
+	.reg_format_endian = REGMAP_ENDIAN_BIG,
+	.val_format_endian = REGMAP_ENDIAN_BIG,
+
+	.max_register = 0x1F1F,
+	.readable_reg = lochnagar2_readable_register,
+	.volatile_reg = lochnagar2_volatile_register,
+
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static const struct reg_sequence lochnagar2_patch[] = {
+	{ 0x00EE, 0x0000 },
+};
+
+struct lochnagar_config {
+	int id;
+	const char * const name;
+	enum lochnagar_type type;
+	const struct regmap_config *regmap;
+	const struct reg_sequence *patch;
+	int npatch;
+};
+
+static struct lochnagar_config lochnagar_configs[] = {
+	{
+		.id = 0x50,
+		.name = "lochnagar1",
+		.type = LOCHNAGAR1,
+		.regmap = &lochnagar1_i2c_regmap,
+		.patch = lochnagar1_patch,
+		.npatch = ARRAY_SIZE(lochnagar1_patch),
+	},
+	{
+		.id = 0xCB58,
+		.name = "lochnagar2",
+		.type = LOCHNAGAR2,
+		.regmap = &lochnagar2_i2c_regmap,
+		.patch = lochnagar2_patch,
+		.npatch = ARRAY_SIZE(lochnagar2_patch),
+	},
+};
+
+static const struct of_device_id lochnagar_of_match[] = {
+	{ .compatible = "cirrus,lochnagar1", .data = &lochnagar_configs[0] },
+	{ .compatible = "cirrus,lochnagar2", .data = &lochnagar_configs[1] },
+	{},
+};
+
+static int lochnagar_wait_for_boot(struct regmap *regmap, unsigned int *id)
+{
+	int i, ret;
+
+	for (i = 0; i < LOCHNAGAR_BOOT_RETRIES; ++i) {
+		msleep(LOCHNAGAR_BOOT_DELAY_MS);
+
+		/* The reset register will return the device ID when read */
+		ret = regmap_read(regmap, LOCHNAGAR_SOFTWARE_RESET, id);
+		if (!ret)
+			return ret;
+	}
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * lochnagar_update_config - Synchronise the boards analogue configuration to
+ *                           the hardware.
+ *
+ * @lochnagar: A pointer to the primary core data structure.
+ *
+ * Return: Zero on success or an appropriate negative error code on failure.
+ */
+int lochnagar_update_config(struct lochnagar *lochnagar)
+{
+	struct regmap *regmap = lochnagar->regmap;
+	unsigned int done = LOCHNAGAR2_ANALOGUE_PATH_UPDATE_STS_MASK;
+	int timeout_ms = LOCHNAGAR_BOOT_DELAY_MS * LOCHNAGAR_BOOT_RETRIES;
+	unsigned int val = 0;
+	int ret;
+
+	lockdep_assert_held(&lochnagar->analogue_config_lock);
+
+	if (lochnagar->type != LOCHNAGAR2)
+		return 0;
+
+	/*
+	 * Toggle the ANALOGUE_PATH_UPDATE bit and wait for the device to
+	 * acknowledge that any outstanding changes to the analogue
+	 * configuration have been applied.
+	 */
+	ret = regmap_write(regmap, LOCHNAGAR2_ANALOGUE_PATH_CTRL1, 0);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_write(regmap, LOCHNAGAR2_ANALOGUE_PATH_CTRL1,
+			   LOCHNAGAR2_ANALOGUE_PATH_UPDATE_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_read_poll_timeout(regmap,
+				       LOCHNAGAR2_ANALOGUE_PATH_CTRL1, val,
+				       (val & done), LOCHNAGAR_CONFIG_POLL_US,
+				       timeout_ms * 1000);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lochnagar_update_config);
+
+static int lochnagar_i2c_probe(struct i2c_client *i2c)
+{
+	struct device *dev = &i2c->dev;
+	const struct lochnagar_config *config = NULL;
+	const struct of_device_id *of_id;
+	struct lochnagar *lochnagar;
+	struct gpio_desc *reset, *present;
+	unsigned int val;
+	unsigned int firmwareid;
+	unsigned int devid, rev;
+	int ret;
+
+	lochnagar = devm_kzalloc(dev, sizeof(*lochnagar), GFP_KERNEL);
+	if (!lochnagar)
+		return -ENOMEM;
+
+	of_id = of_match_device(lochnagar_of_match, dev);
+	if (!of_id)
+		return -EINVAL;
+
+	config = of_id->data;
+
+	lochnagar->dev = dev;
+	mutex_init(&lochnagar->analogue_config_lock);
+
+	dev_set_drvdata(dev, lochnagar);
+
+	reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(reset)) {
+		ret = PTR_ERR(reset);
+		dev_err(dev, "Failed to get reset GPIO: %d\n", ret);
+		return ret;
+	}
+
+	present = devm_gpiod_get_optional(dev, "present", GPIOD_OUT_HIGH);
+	if (IS_ERR(present)) {
+		ret = PTR_ERR(present);
+		dev_err(dev, "Failed to get present GPIO: %d\n", ret);
+		return ret;
+	}
+
+	/* Leave the Lochnagar in reset for a reasonable amount of time */
+	msleep(20);
+
+	/* Bring Lochnagar out of reset */
+	gpiod_set_value_cansleep(reset, 1);
+
+	/* Identify Lochnagar */
+	lochnagar->type = config->type;
+
+	lochnagar->regmap = devm_regmap_init_i2c(i2c, config->regmap);
+	if (IS_ERR(lochnagar->regmap)) {
+		ret = PTR_ERR(lochnagar->regmap);
+		dev_err(dev, "Failed to allocate register map: %d\n", ret);
+		return ret;
+	}
+
+	/* Wait for Lochnagar to boot */
+	ret = lochnagar_wait_for_boot(lochnagar->regmap, &val);
+	if (ret < 0) {
+		dev_err(dev, "Failed to read device ID: %d\n", ret);
+		return ret;
+	}
+
+	devid = val & LOCHNAGAR_DEVICE_ID_MASK;
+	rev = val & LOCHNAGAR_REV_ID_MASK;
+
+	if (devid != config->id) {
+		dev_err(dev,
+			"ID does not match %s (expected 0x%x got 0x%x)\n",
+			config->name, config->id, devid);
+		return -ENODEV;
+	}
+
+	/* Identify firmware */
+	ret = regmap_read(lochnagar->regmap, LOCHNAGAR_FIRMWARE_ID1, &val);
+	if (ret < 0) {
+		dev_err(dev, "Failed to read firmware id 1: %d\n", ret);
+		return ret;
+	}
+
+	firmwareid = val;
+
+	ret = regmap_read(lochnagar->regmap, LOCHNAGAR_FIRMWARE_ID2, &val);
+	if (ret < 0) {
+		dev_err(dev, "Failed to read firmware id 2: %d\n", ret);
+		return ret;
+	}
+
+	firmwareid |= (val << config->regmap->val_bits);
+
+	dev_info(dev, "Found %s (0x%x) revision %u firmware 0x%.6x\n",
+		 config->name, devid, rev + 1, firmwareid);
+
+	ret = regmap_register_patch(lochnagar->regmap, config->patch,
+				    config->npatch);
+	if (ret < 0) {
+		dev_err(dev, "Failed to register patch: %d\n", ret);
+		return ret;
+	}
+
+	ret = devm_of_platform_populate(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to populate child nodes: %d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static struct i2c_driver lochnagar_i2c_driver = {
+	.driver = {
+		.name = "lochnagar",
+		.of_match_table = of_match_ptr(lochnagar_of_match),
+		.suppress_bind_attrs = true,
+	},
+	.probe_new = lochnagar_i2c_probe,
+};
+
+static int __init lochnagar_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&lochnagar_i2c_driver);
+	if (ret)
+		pr_err("Failed to register Lochnagar driver: %d\n", ret);
+
+	return ret;
+}
+subsys_initcall(lochnagar_i2c_init);
diff --git a/include/linux/mfd/lochnagar.h b/include/linux/mfd/lochnagar.h
new file mode 100644
index 000000000000..ff9e64cfc9fb
--- /dev/null
+++ b/include/linux/mfd/lochnagar.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lochnagar internals
+ *
+ * Copyright (c) 2013-2018 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/regmap.h>
+
+#ifndef CIRRUS_LOCHNAGAR_H
+#define CIRRUS_LOCHNAGAR_H
+
+enum lochnagar_type {
+	LOCHNAGAR1,
+	LOCHNAGAR2,
+};
+
+/**
+ * struct lochnagar - Core data for the Lochnagar audio board driver.
+ *
+ * @type: The type of Lochnagar device connected.
+ * @dev: A pointer to the struct device for the main MFD.
+ * @regmap: The devices main register map.
+ * @analogue_config_lock: Lock used to protect updates in the analogue
+ * configuration as these must not be changed whilst the hardware is processing
+ * the last update.
+ */
+struct lochnagar {
+	enum lochnagar_type type;
+	struct device *dev;
+	struct regmap *regmap;
+
+	/* Lock to protect updates to the analogue configuration */
+	struct mutex analogue_config_lock;
+};
+
+/* Register Addresses */
+#define LOCHNAGAR_SOFTWARE_RESET                             0x00
+#define LOCHNAGAR_FIRMWARE_ID1                               0x01
+#define LOCHNAGAR_FIRMWARE_ID2                               0x02
+
+/* (0x0000)  Software Reset */
+#define LOCHNAGAR_DEVICE_ID_MASK                           0xFFFC
+#define LOCHNAGAR_DEVICE_ID_SHIFT                               2
+#define LOCHNAGAR_REV_ID_MASK                              0x0003
+#define LOCHNAGAR_REV_ID_SHIFT                                  0
+
+int lochnagar_update_config(struct lochnagar *lochnagar);
+
+#endif
diff --git a/include/linux/mfd/lochnagar1_regs.h b/include/linux/mfd/lochnagar1_regs.h
new file mode 100644
index 000000000000..114b846245d9
--- /dev/null
+++ b/include/linux/mfd/lochnagar1_regs.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lochnagar1 register definitions
+ *
+ * Copyright (c) 2017-2018 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#ifndef LOCHNAGAR1_REGISTERS_H
+#define LOCHNAGAR1_REGISTERS_H
+
+/* Register Addresses */
+#define LOCHNAGAR1_CDC_AIF1_SEL                       0x0008
+#define LOCHNAGAR1_CDC_AIF2_SEL                       0x0009
+#define LOCHNAGAR1_CDC_AIF3_SEL                       0x000A
+#define LOCHNAGAR1_CDC_MCLK1_SEL                      0x000B
+#define LOCHNAGAR1_CDC_MCLK2_SEL                      0x000C
+#define LOCHNAGAR1_CDC_AIF_CTRL1                      0x000D
+#define LOCHNAGAR1_CDC_AIF_CTRL2                      0x000E
+#define LOCHNAGAR1_EXT_AIF_CTRL                       0x000F
+#define LOCHNAGAR1_DSP_AIF1_SEL                       0x0010
+#define LOCHNAGAR1_DSP_AIF2_SEL                       0x0011
+#define LOCHNAGAR1_DSP_CLKIN_SEL                      0x0012
+#define LOCHNAGAR1_DSP_AIF                            0x0013
+#define LOCHNAGAR1_GF_AIF1                            0x0014
+#define LOCHNAGAR1_GF_AIF2                            0x0015
+#define LOCHNAGAR1_PSIA_AIF                           0x0016
+#define LOCHNAGAR1_PSIA1_SEL                          0x0017
+#define LOCHNAGAR1_PSIA2_SEL                          0x0018
+#define LOCHNAGAR1_SPDIF_AIF_SEL                      0x0019
+#define LOCHNAGAR1_GF_AIF3_SEL                        0x001C
+#define LOCHNAGAR1_GF_AIF4_SEL                        0x001D
+#define LOCHNAGAR1_GF_CLKOUT1_SEL                     0x001E
+#define LOCHNAGAR1_GF_AIF1_SEL                        0x001F
+#define LOCHNAGAR1_GF_AIF2_SEL                        0x0020
+#define LOCHNAGAR1_GF_GPIO2                           0x0026
+#define LOCHNAGAR1_GF_GPIO3                           0x0027
+#define LOCHNAGAR1_GF_GPIO7                           0x0028
+#define LOCHNAGAR1_RST                                0x0029
+#define LOCHNAGAR1_LED1                               0x002A
+#define LOCHNAGAR1_LED2                               0x002B
+#define LOCHNAGAR1_I2C_CTRL                           0x0046
+
+/*
+ * (0x0008 - 0x000C, 0x0010 - 0x0012, 0x0017 - 0x0020)
+ * CDC_AIF1_SEL - GF_AIF2_SEL
+ */
+#define LOCHNAGAR1_SRC_MASK                             0xFF
+#define LOCHNAGAR1_SRC_SHIFT                               0
+
+/* (0x000D)  CDC_AIF_CTRL1 */
+#define LOCHNAGAR1_CDC_AIF2_LRCLK_DIR_MASK              0x40
+#define LOCHNAGAR1_CDC_AIF2_LRCLK_DIR_SHIFT                6
+#define LOCHNAGAR1_CDC_AIF2_BCLK_DIR_MASK               0x20
+#define LOCHNAGAR1_CDC_AIF2_BCLK_DIR_SHIFT                 5
+#define LOCHNAGAR1_CDC_AIF2_ENA_MASK                    0x10
+#define LOCHNAGAR1_CDC_AIF2_ENA_SHIFT                      4
+#define LOCHNAGAR1_CDC_AIF1_LRCLK_DIR_MASK              0x04
+#define LOCHNAGAR1_CDC_AIF1_LRCLK_DIR_SHIFT                2
+#define LOCHNAGAR1_CDC_AIF1_BCLK_DIR_MASK               0x02
+#define LOCHNAGAR1_CDC_AIF1_BCLK_DIR_SHIFT                 1
+#define LOCHNAGAR1_CDC_AIF1_ENA_MASK                    0x01
+#define LOCHNAGAR1_CDC_AIF1_ENA_SHIFT                      0
+
+/* (0x000E)  CDC_AIF_CTRL2 */
+#define LOCHNAGAR1_CDC_AIF3_LRCLK_DIR_MASK              0x40
+#define LOCHNAGAR1_CDC_AIF3_LRCLK_DIR_SHIFT                6
+#define LOCHNAGAR1_CDC_AIF3_BCLK_DIR_MASK               0x20
+#define LOCHNAGAR1_CDC_AIF3_BCLK_DIR_SHIFT                 5
+#define LOCHNAGAR1_CDC_AIF3_ENA_MASK                    0x10
+#define LOCHNAGAR1_CDC_AIF3_ENA_SHIFT                      4
+#define LOCHNAGAR1_CDC_MCLK1_ENA_MASK                   0x02
+#define LOCHNAGAR1_CDC_MCLK1_ENA_SHIFT                     1
+#define LOCHNAGAR1_CDC_MCLK2_ENA_MASK                   0x01
+#define LOCHNAGAR1_CDC_MCLK2_ENA_SHIFT                     0
+
+/* (0x000F)  EXT_AIF_CTRL */
+#define LOCHNAGAR1_SPDIF_AIF_LRCLK_DIR_MASK             0x20
+#define LOCHNAGAR1_SPDIF_AIF_LRCLK_DIR_SHIFT               5
+#define LOCHNAGAR1_SPDIF_AIF_BCLK_DIR_MASK              0x10
+#define LOCHNAGAR1_SPDIF_AIF_BCLK_DIR_SHIFT                4
+#define LOCHNAGAR1_SPDIF_AIF_ENA_MASK                   0x08
+#define LOCHNAGAR1_SPDIF_AIF_ENA_SHIFT                     3
+
+/* (0x0013)  DSP_AIF */
+#define LOCHNAGAR1_DSP_AIF2_LRCLK_DIR_MASK              0x40
+#define LOCHNAGAR1_DSP_AIF2_LRCLK_DIR_SHIFT                6
+#define LOCHNAGAR1_DSP_AIF2_BCLK_DIR_MASK               0x20
+#define LOCHNAGAR1_DSP_AIF2_BCLK_DIR_SHIFT                 5
+#define LOCHNAGAR1_DSP_AIF2_ENA_MASK                    0x10
+#define LOCHNAGAR1_DSP_AIF2_ENA_SHIFT                      4
+#define LOCHNAGAR1_DSP_CLKIN_ENA_MASK                   0x08
+#define LOCHNAGAR1_DSP_CLKIN_ENA_SHIFT                     3
+#define LOCHNAGAR1_DSP_AIF1_LRCLK_DIR_MASK              0x04
+#define LOCHNAGAR1_DSP_AIF1_LRCLK_DIR_SHIFT                2
+#define LOCHNAGAR1_DSP_AIF1_BCLK_DIR_MASK               0x02
+#define LOCHNAGAR1_DSP_AIF1_BCLK_DIR_SHIFT                 1
+#define LOCHNAGAR1_DSP_AIF1_ENA_MASK                    0x01
+#define LOCHNAGAR1_DSP_AIF1_ENA_SHIFT                      0
+
+/* (0x0014)  GF_AIF1 */
+#define LOCHNAGAR1_GF_CLKOUT1_ENA_MASK                  0x40
+#define LOCHNAGAR1_GF_CLKOUT1_ENA_SHIFT                    6
+#define LOCHNAGAR1_GF_AIF3_LRCLK_DIR_MASK               0x20
+#define LOCHNAGAR1_GF_AIF3_LRCLK_DIR_SHIFT                 5
+#define LOCHNAGAR1_GF_AIF3_BCLK_DIR_MASK                0x10
+#define LOCHNAGAR1_GF_AIF3_BCLK_DIR_SHIFT                  4
+#define LOCHNAGAR1_GF_AIF3_ENA_MASK                     0x08
+#define LOCHNAGAR1_GF_AIF3_ENA_SHIFT                       3
+#define LOCHNAGAR1_GF_AIF1_LRCLK_DIR_MASK               0x04
+#define LOCHNAGAR1_GF_AIF1_LRCLK_DIR_SHIFT                 2
+#define LOCHNAGAR1_GF_AIF1_BCLK_DIR_MASK                0x02
+#define LOCHNAGAR1_GF_AIF1_BCLK_DIR_SHIFT                  1
+#define LOCHNAGAR1_GF_AIF1_ENA_MASK                     0x01
+#define LOCHNAGAR1_GF_AIF1_ENA_SHIFT                       0
+
+/* (0x0015)  GF_AIF2 */
+#define LOCHNAGAR1_GF_AIF4_LRCLK_DIR_MASK               0x20
+#define LOCHNAGAR1_GF_AIF4_LRCLK_DIR_SHIFT                 5
+#define LOCHNAGAR1_GF_AIF4_BCLK_DIR_MASK                0x10
+#define LOCHNAGAR1_GF_AIF4_BCLK_DIR_SHIFT                  4
+#define LOCHNAGAR1_GF_AIF4_ENA_MASK                     0x08
+#define LOCHNAGAR1_GF_AIF4_ENA_SHIFT                       3
+#define LOCHNAGAR1_GF_AIF2_LRCLK_DIR_MASK               0x04
+#define LOCHNAGAR1_GF_AIF2_LRCLK_DIR_SHIFT                 2
+#define LOCHNAGAR1_GF_AIF2_BCLK_DIR_MASK                0x02
+#define LOCHNAGAR1_GF_AIF2_BCLK_DIR_SHIFT                  1
+#define LOCHNAGAR1_GF_AIF2_ENA_MASK                     0x01
+#define LOCHNAGAR1_GF_AIF2_ENA_SHIFT                       0
+
+/* (0x0016)  PSIA_AIF */
+#define LOCHNAGAR1_PSIA2_LRCLK_DIR_MASK                 0x40
+#define LOCHNAGAR1_PSIA2_LRCLK_DIR_SHIFT                   6
+#define LOCHNAGAR1_PSIA2_BCLK_DIR_MASK                  0x20
+#define LOCHNAGAR1_PSIA2_BCLK_DIR_SHIFT                    5
+#define LOCHNAGAR1_PSIA2_ENA_MASK                       0x10
+#define LOCHNAGAR1_PSIA2_ENA_SHIFT                         4
+#define LOCHNAGAR1_PSIA1_LRCLK_DIR_MASK                 0x04
+#define LOCHNAGAR1_PSIA1_LRCLK_DIR_SHIFT                   2
+#define LOCHNAGAR1_PSIA1_BCLK_DIR_MASK                  0x02
+#define LOCHNAGAR1_PSIA1_BCLK_DIR_SHIFT                    1
+#define LOCHNAGAR1_PSIA1_ENA_MASK                       0x01
+#define LOCHNAGAR1_PSIA1_ENA_SHIFT                         0
+
+/* (0x0029)  RST */
+#define LOCHNAGAR1_DSP_RESET_MASK                       0x02
+#define LOCHNAGAR1_DSP_RESET_SHIFT                         1
+#define LOCHNAGAR1_CDC_RESET_MASK                       0x01
+#define LOCHNAGAR1_CDC_RESET_SHIFT                         0
+
+/* (0x0046)  I2C_CTRL */
+#define LOCHNAGAR1_CDC_CIF_MODE_MASK                    0x01
+#define LOCHNAGAR1_CDC_CIF_MODE_SHIFT                      0
+
+#endif
diff --git a/include/linux/mfd/lochnagar2_regs.h b/include/linux/mfd/lochnagar2_regs.h
new file mode 100644
index 000000000000..419b25a332fd
--- /dev/null
+++ b/include/linux/mfd/lochnagar2_regs.h
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lochnagar2 register definitions
+ *
+ * Copyright (c) 2017-2018 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ *
+ * Author: Charles Keepax <ckeepax@opensource.cirrus.com>
+ */
+
+#ifndef LOCHNAGAR2_REGISTERS_H
+#define LOCHNAGAR2_REGISTERS_H
+
+/* Register Addresses */
+#define LOCHNAGAR2_CDC_AIF1_CTRL                      0x000D
+#define LOCHNAGAR2_CDC_AIF2_CTRL                      0x000E
+#define LOCHNAGAR2_CDC_AIF3_CTRL                      0x000F
+#define LOCHNAGAR2_DSP_AIF1_CTRL                      0x0010
+#define LOCHNAGAR2_DSP_AIF2_CTRL                      0x0011
+#define LOCHNAGAR2_PSIA1_CTRL                         0x0012
+#define LOCHNAGAR2_PSIA2_CTRL                         0x0013
+#define LOCHNAGAR2_GF_AIF3_CTRL                       0x0014
+#define LOCHNAGAR2_GF_AIF4_CTRL                       0x0015
+#define LOCHNAGAR2_GF_AIF1_CTRL                       0x0016
+#define LOCHNAGAR2_GF_AIF2_CTRL                       0x0017
+#define LOCHNAGAR2_SPDIF_AIF_CTRL                     0x0018
+#define LOCHNAGAR2_USB_AIF1_CTRL                      0x0019
+#define LOCHNAGAR2_USB_AIF2_CTRL                      0x001A
+#define LOCHNAGAR2_ADAT_AIF_CTRL                      0x001B
+#define LOCHNAGAR2_CDC_MCLK1_CTRL                     0x001E
+#define LOCHNAGAR2_CDC_MCLK2_CTRL                     0x001F
+#define LOCHNAGAR2_DSP_CLKIN_CTRL                     0x0020
+#define LOCHNAGAR2_PSIA1_MCLK_CTRL                    0x0021
+#define LOCHNAGAR2_PSIA2_MCLK_CTRL                    0x0022
+#define LOCHNAGAR2_SPDIF_MCLK_CTRL                    0x0023
+#define LOCHNAGAR2_GF_CLKOUT1_CTRL                    0x0024
+#define LOCHNAGAR2_GF_CLKOUT2_CTRL                    0x0025
+#define LOCHNAGAR2_ADAT_MCLK_CTRL                     0x0026
+#define LOCHNAGAR2_SOUNDCARD_MCLK_CTRL                0x0027
+#define LOCHNAGAR2_GPIO_FPGA_GPIO1                    0x0031
+#define LOCHNAGAR2_GPIO_FPGA_GPIO2                    0x0032
+#define LOCHNAGAR2_GPIO_FPGA_GPIO3                    0x0033
+#define LOCHNAGAR2_GPIO_FPGA_GPIO4                    0x0034
+#define LOCHNAGAR2_GPIO_FPGA_GPIO5                    0x0035
+#define LOCHNAGAR2_GPIO_FPGA_GPIO6                    0x0036
+#define LOCHNAGAR2_GPIO_CDC_GPIO1                     0x0037
+#define LOCHNAGAR2_GPIO_CDC_GPIO2                     0x0038
+#define LOCHNAGAR2_GPIO_CDC_GPIO3                     0x0039
+#define LOCHNAGAR2_GPIO_CDC_GPIO4                     0x003A
+#define LOCHNAGAR2_GPIO_CDC_GPIO5                     0x003B
+#define LOCHNAGAR2_GPIO_CDC_GPIO6                     0x003C
+#define LOCHNAGAR2_GPIO_CDC_GPIO7                     0x003D
+#define LOCHNAGAR2_GPIO_CDC_GPIO8                     0x003E
+#define LOCHNAGAR2_GPIO_DSP_GPIO1                     0x003F
+#define LOCHNAGAR2_GPIO_DSP_GPIO2                     0x0040
+#define LOCHNAGAR2_GPIO_DSP_GPIO3                     0x0041
+#define LOCHNAGAR2_GPIO_DSP_GPIO4                     0x0042
+#define LOCHNAGAR2_GPIO_DSP_GPIO5                     0x0043
+#define LOCHNAGAR2_GPIO_DSP_GPIO6                     0x0044
+#define LOCHNAGAR2_GPIO_GF_GPIO2                      0x0045
+#define LOCHNAGAR2_GPIO_GF_GPIO3                      0x0046
+#define LOCHNAGAR2_GPIO_GF_GPIO7                      0x0047
+#define LOCHNAGAR2_GPIO_CDC_AIF1_BCLK                 0x0048
+#define LOCHNAGAR2_GPIO_CDC_AIF1_RXDAT                0x0049
+#define LOCHNAGAR2_GPIO_CDC_AIF1_LRCLK                0x004A
+#define LOCHNAGAR2_GPIO_CDC_AIF1_TXDAT                0x004B
+#define LOCHNAGAR2_GPIO_CDC_AIF2_BCLK                 0x004C
+#define LOCHNAGAR2_GPIO_CDC_AIF2_RXDAT                0x004D
+#define LOCHNAGAR2_GPIO_CDC_AIF2_LRCLK                0x004E
+#define LOCHNAGAR2_GPIO_CDC_AIF2_TXDAT                0x004F
+#define LOCHNAGAR2_GPIO_CDC_AIF3_BCLK                 0x0050
+#define LOCHNAGAR2_GPIO_CDC_AIF3_RXDAT                0x0051
+#define LOCHNAGAR2_GPIO_CDC_AIF3_LRCLK                0x0052
+#define LOCHNAGAR2_GPIO_CDC_AIF3_TXDAT                0x0053
+#define LOCHNAGAR2_GPIO_DSP_AIF1_BCLK                 0x0054
+#define LOCHNAGAR2_GPIO_DSP_AIF1_RXDAT                0x0055
+#define LOCHNAGAR2_GPIO_DSP_AIF1_LRCLK                0x0056
+#define LOCHNAGAR2_GPIO_DSP_AIF1_TXDAT                0x0057
+#define LOCHNAGAR2_GPIO_DSP_AIF2_BCLK                 0x0058
+#define LOCHNAGAR2_GPIO_DSP_AIF2_RXDAT                0x0059
+#define LOCHNAGAR2_GPIO_DSP_AIF2_LRCLK                0x005A
+#define LOCHNAGAR2_GPIO_DSP_AIF2_TXDAT                0x005B
+#define LOCHNAGAR2_GPIO_PSIA1_BCLK                    0x005C
+#define LOCHNAGAR2_GPIO_PSIA1_RXDAT                   0x005D
+#define LOCHNAGAR2_GPIO_PSIA1_LRCLK                   0x005E
+#define LOCHNAGAR2_GPIO_PSIA1_TXDAT                   0x005F
+#define LOCHNAGAR2_GPIO_PSIA2_BCLK                    0x0060
+#define LOCHNAGAR2_GPIO_PSIA2_RXDAT                   0x0061
+#define LOCHNAGAR2_GPIO_PSIA2_LRCLK                   0x0062
+#define LOCHNAGAR2_GPIO_PSIA2_TXDAT                   0x0063
+#define LOCHNAGAR2_GPIO_GF_AIF3_BCLK                  0x0064
+#define LOCHNAGAR2_GPIO_GF_AIF3_RXDAT                 0x0065
+#define LOCHNAGAR2_GPIO_GF_AIF3_LRCLK                 0x0066
+#define LOCHNAGAR2_GPIO_GF_AIF3_TXDAT                 0x0067
+#define LOCHNAGAR2_GPIO_GF_AIF4_BCLK                  0x0068
+#define LOCHNAGAR2_GPIO_GF_AIF4_RXDAT                 0x0069
+#define LOCHNAGAR2_GPIO_GF_AIF4_LRCLK                 0x006A
+#define LOCHNAGAR2_GPIO_GF_AIF4_TXDAT                 0x006B
+#define LOCHNAGAR2_GPIO_GF_AIF1_BCLK                  0x006C
+#define LOCHNAGAR2_GPIO_GF_AIF1_RXDAT                 0x006D
+#define LOCHNAGAR2_GPIO_GF_AIF1_LRCLK                 0x006E
+#define LOCHNAGAR2_GPIO_GF_AIF1_TXDAT                 0x006F
+#define LOCHNAGAR2_GPIO_GF_AIF2_BCLK                  0x0070
+#define LOCHNAGAR2_GPIO_GF_AIF2_RXDAT                 0x0071
+#define LOCHNAGAR2_GPIO_GF_AIF2_LRCLK                 0x0072
+#define LOCHNAGAR2_GPIO_GF_AIF2_TXDAT                 0x0073
+#define LOCHNAGAR2_GPIO_DSP_UART1_RX                  0x0074
+#define LOCHNAGAR2_GPIO_DSP_UART1_TX                  0x0075
+#define LOCHNAGAR2_GPIO_DSP_UART2_RX                  0x0076
+#define LOCHNAGAR2_GPIO_DSP_UART2_TX                  0x0077
+#define LOCHNAGAR2_GPIO_GF_UART2_RX                   0x0078
+#define LOCHNAGAR2_GPIO_GF_UART2_TX                   0x0079
+#define LOCHNAGAR2_GPIO_USB_UART_RX                   0x007A
+#define LOCHNAGAR2_GPIO_CDC_PDMCLK1                   0x007C
+#define LOCHNAGAR2_GPIO_CDC_PDMDAT1                   0x007D
+#define LOCHNAGAR2_GPIO_CDC_PDMCLK2                   0x007E
+#define LOCHNAGAR2_GPIO_CDC_PDMDAT2                   0x007F
+#define LOCHNAGAR2_GPIO_CDC_DMICCLK1                  0x0080
+#define LOCHNAGAR2_GPIO_CDC_DMICDAT1                  0x0081
+#define LOCHNAGAR2_GPIO_CDC_DMICCLK2                  0x0082
+#define LOCHNAGAR2_GPIO_CDC_DMICDAT2                  0x0083
+#define LOCHNAGAR2_GPIO_CDC_DMICCLK3                  0x0084
+#define LOCHNAGAR2_GPIO_CDC_DMICDAT3                  0x0085
+#define LOCHNAGAR2_GPIO_CDC_DMICCLK4                  0x0086
+#define LOCHNAGAR2_GPIO_CDC_DMICDAT4                  0x0087
+#define LOCHNAGAR2_GPIO_DSP_DMICCLK1                  0x0088
+#define LOCHNAGAR2_GPIO_DSP_DMICDAT1                  0x0089
+#define LOCHNAGAR2_GPIO_DSP_DMICCLK2                  0x008A
+#define LOCHNAGAR2_GPIO_DSP_DMICDAT2                  0x008B
+#define LOCHNAGAR2_GPIO_I2C2_SCL                      0x008C
+#define LOCHNAGAR2_GPIO_I2C2_SDA                      0x008D
+#define LOCHNAGAR2_GPIO_I2C3_SCL                      0x008E
+#define LOCHNAGAR2_GPIO_I2C3_SDA                      0x008F
+#define LOCHNAGAR2_GPIO_I2C4_SCL                      0x0090
+#define LOCHNAGAR2_GPIO_I2C4_SDA                      0x0091
+#define LOCHNAGAR2_GPIO_DSP_STANDBY                   0x0092
+#define LOCHNAGAR2_GPIO_CDC_MCLK1                     0x0093
+#define LOCHNAGAR2_GPIO_CDC_MCLK2                     0x0094
+#define LOCHNAGAR2_GPIO_DSP_CLKIN                     0x0095
+#define LOCHNAGAR2_GPIO_PSIA1_MCLK                    0x0096
+#define LOCHNAGAR2_GPIO_PSIA2_MCLK                    0x0097
+#define LOCHNAGAR2_GPIO_GF_GPIO1                      0x0098
+#define LOCHNAGAR2_GPIO_GF_GPIO5                      0x0099
+#define LOCHNAGAR2_GPIO_DSP_GPIO20                    0x009A
+#define LOCHNAGAR2_GPIO_CHANNEL1                      0x00B9
+#define LOCHNAGAR2_GPIO_CHANNEL2                      0x00BA
+#define LOCHNAGAR2_GPIO_CHANNEL3                      0x00BB
+#define LOCHNAGAR2_GPIO_CHANNEL4                      0x00BC
+#define LOCHNAGAR2_GPIO_CHANNEL5                      0x00BD
+#define LOCHNAGAR2_GPIO_CHANNEL6                      0x00BE
+#define LOCHNAGAR2_GPIO_CHANNEL7                      0x00BF
+#define LOCHNAGAR2_GPIO_CHANNEL8                      0x00C0
+#define LOCHNAGAR2_GPIO_CHANNEL9                      0x00C1
+#define LOCHNAGAR2_GPIO_CHANNEL10                     0x00C2
+#define LOCHNAGAR2_GPIO_CHANNEL11                     0x00C3
+#define LOCHNAGAR2_GPIO_CHANNEL12                     0x00C4
+#define LOCHNAGAR2_GPIO_CHANNEL13                     0x00C5
+#define LOCHNAGAR2_GPIO_CHANNEL14                     0x00C6
+#define LOCHNAGAR2_GPIO_CHANNEL15                     0x00C7
+#define LOCHNAGAR2_GPIO_CHANNEL16                     0x00C8
+#define LOCHNAGAR2_MINICARD_RESETS                    0x00DF
+#define LOCHNAGAR2_ANALOGUE_PATH_CTRL1                0x00E3
+#define LOCHNAGAR2_ANALOGUE_PATH_CTRL2                0x00E4
+#define LOCHNAGAR2_COMMS_CTRL4                        0x00F0
+#define LOCHNAGAR2_SPDIF_CTRL                         0x00FE
+#define LOCHNAGAR2_IMON_CTRL1                         0x0108
+#define LOCHNAGAR2_IMON_CTRL2                         0x0109
+#define LOCHNAGAR2_IMON_CTRL3                         0x010A
+#define LOCHNAGAR2_IMON_CTRL4                         0x010B
+#define LOCHNAGAR2_IMON_DATA1                         0x010C
+#define LOCHNAGAR2_IMON_DATA2                         0x010D
+#define LOCHNAGAR2_POWER_CTRL                         0x0116
+#define LOCHNAGAR2_MICVDD_CTRL1                       0x0119
+#define LOCHNAGAR2_MICVDD_CTRL2                       0x011B
+#define LOCHNAGAR2_VDDCORE_CDC_CTRL1                  0x011E
+#define LOCHNAGAR2_VDDCORE_CDC_CTRL2                  0x0120
+#define LOCHNAGAR2_SOUNDCARD_AIF_CTRL                 0x0180
+
+/* (0x000D-0x001B, 0x0180)  CDC_AIF1_CTRL - SOUNCARD_AIF_CTRL */
+#define LOCHNAGAR2_AIF_ENA_MASK                       0x8000
+#define LOCHNAGAR2_AIF_ENA_SHIFT                          15
+#define LOCHNAGAR2_AIF_LRCLK_DIR_MASK                 0x4000
+#define LOCHNAGAR2_AIF_LRCLK_DIR_SHIFT                    14
+#define LOCHNAGAR2_AIF_BCLK_DIR_MASK                  0x2000
+#define LOCHNAGAR2_AIF_BCLK_DIR_SHIFT                     13
+#define LOCHNAGAR2_AIF_SRC_MASK                       0x00FF
+#define LOCHNAGAR2_AIF_SRC_SHIFT                           0
+
+/* (0x001E - 0x0027)  CDC_MCLK1_CTRL - SOUNDCARD_MCLK_CTRL */
+#define LOCHNAGAR2_CLK_ENA_MASK                       0x8000
+#define LOCHNAGAR2_CLK_ENA_SHIFT                          15
+#define LOCHNAGAR2_CLK_SRC_MASK                       0x00FF
+#define LOCHNAGAR2_CLK_SRC_SHIFT                           0
+
+/* (0x0031 - 0x009A)  GPIO_FPGA_GPIO1 - GPIO_DSP_GPIO20 */
+#define LOCHNAGAR2_GPIO_SRC_MASK                      0x00FF
+#define LOCHNAGAR2_GPIO_SRC_SHIFT                          0
+
+/* (0x00B9 - 0x00C8)  GPIO_CHANNEL1 - GPIO_CHANNEL16 */
+#define LOCHNAGAR2_GPIO_CHANNEL_STS_MASK              0x8000
+#define LOCHNAGAR2_GPIO_CHANNEL_STS_SHIFT                 15
+#define LOCHNAGAR2_GPIO_CHANNEL_SRC_MASK              0x00FF
+#define LOCHNAGAR2_GPIO_CHANNEL_SRC_SHIFT                  0
+
+/* (0x00DF)  MINICARD_RESETS */
+#define LOCHNAGAR2_DSP_RESET_MASK                     0x0002
+#define LOCHNAGAR2_DSP_RESET_SHIFT                         1
+#define LOCHNAGAR2_CDC_RESET_MASK                     0x0001
+#define LOCHNAGAR2_CDC_RESET_SHIFT                         0
+
+/* (0x00E3)  ANALOGUE_PATH_CTRL1 */
+#define LOCHNAGAR2_ANALOGUE_PATH_UPDATE_MASK          0x8000
+#define LOCHNAGAR2_ANALOGUE_PATH_UPDATE_SHIFT             15
+#define LOCHNAGAR2_ANALOGUE_PATH_UPDATE_STS_MASK      0x4000
+#define LOCHNAGAR2_ANALOGUE_PATH_UPDATE_STS_SHIFT         14
+
+/* (0x00E4)  ANALOGUE_PATH_CTRL2 */
+#define LOCHNAGAR2_P2_INPUT_BIAS_ENA_MASK             0x0080
+#define LOCHNAGAR2_P2_INPUT_BIAS_ENA_SHIFT                 7
+#define LOCHNAGAR2_P1_INPUT_BIAS_ENA_MASK             0x0040
+#define LOCHNAGAR2_P1_INPUT_BIAS_ENA_SHIFT                 6
+#define LOCHNAGAR2_P2_MICBIAS_SRC_MASK                0x0038
+#define LOCHNAGAR2_P2_MICBIAS_SRC_SHIFT                    3
+#define LOCHNAGAR2_P1_MICBIAS_SRC_MASK                0x0007
+#define LOCHNAGAR2_P1_MICBIAS_SRC_SHIFT                    0
+
+/* (0x00F0)  COMMS_CTRL4 */
+#define LOCHNAGAR2_CDC_CIF1MODE_MASK                  0x0001
+#define LOCHNAGAR2_CDC_CIF1MODE_SHIFT                      0
+
+/* (0x00FE)  SPDIF_CTRL */
+#define LOCHNAGAR2_SPDIF_HWMODE_MASK                  0x0008
+#define LOCHNAGAR2_SPDIF_HWMODE_SHIFT                      3
+#define LOCHNAGAR2_SPDIF_RESET_MASK                   0x0001
+#define LOCHNAGAR2_SPDIF_RESET_SHIFT                       0
+
+/* (0x0108)  IMON_CTRL1 */
+#define LOCHNAGAR2_IMON_ENA_MASK                      0x8000
+#define LOCHNAGAR2_IMON_ENA_SHIFT                         15
+#define LOCHNAGAR2_IMON_MEASURED_CHANNELS_MASK        0x03FC
+#define LOCHNAGAR2_IMON_MEASURED_CHANNELS_SHIFT            2
+#define LOCHNAGAR2_IMON_MODE_SEL_MASK                 0x0003
+#define LOCHNAGAR2_IMON_MODE_SEL_SHIFT                     0
+
+/* (0x0109)  IMON_CTRL2 */
+#define LOCHNAGAR2_IMON_FSR_MASK                      0x03FF
+#define LOCHNAGAR2_IMON_FSR_SHIFT                          0
+
+/* (0x010A)  IMON_CTRL3 */
+#define LOCHNAGAR2_IMON_DONE_MASK                     0x0004
+#define LOCHNAGAR2_IMON_DONE_SHIFT                         2
+#define LOCHNAGAR2_IMON_CONFIGURE_MASK                0x0002
+#define LOCHNAGAR2_IMON_CONFIGURE_SHIFT                    1
+#define LOCHNAGAR2_IMON_MEASURE_MASK                  0x0001
+#define LOCHNAGAR2_IMON_MEASURE_SHIFT                      0
+
+/* (0x010B)  IMON_CTRL4 */
+#define LOCHNAGAR2_IMON_DATA_REQ_MASK                 0x0080
+#define LOCHNAGAR2_IMON_DATA_REQ_SHIFT                     7
+#define LOCHNAGAR2_IMON_CH_SEL_MASK                   0x0070
+#define LOCHNAGAR2_IMON_CH_SEL_SHIFT                       4
+#define LOCHNAGAR2_IMON_DATA_RDY_MASK                 0x0008
+#define LOCHNAGAR2_IMON_DATA_RDY_SHIFT                     3
+#define LOCHNAGAR2_IMON_CH_SRC_MASK                   0x0007
+#define LOCHNAGAR2_IMON_CH_SRC_SHIFT                       0
+
+/* (0x010C, 0x010D)  IMON_DATA1, IMON_DATA2 */
+#define LOCHNAGAR2_IMON_DATA_MASK                     0xFFFF
+#define LOCHNAGAR2_IMON_DATA_SHIFT                         0
+
+/* (0x0116)  POWER_CTRL */
+#define LOCHNAGAR2_PWR_ENA_MASK                       0x0001
+#define LOCHNAGAR2_PWR_ENA_SHIFT                           0
+
+/* (0x0119)  MICVDD_CTRL1 */
+#define LOCHNAGAR2_MICVDD_REG_ENA_MASK                0x8000
+#define LOCHNAGAR2_MICVDD_REG_ENA_SHIFT                   15
+
+/* (0x011B)  MICVDD_CTRL2 */
+#define LOCHNAGAR2_MICVDD_VSEL_MASK                   0x001F
+#define LOCHNAGAR2_MICVDD_VSEL_SHIFT                       0
+
+/* (0x011E)  VDDCORE_CDC_CTRL1 */
+#define LOCHNAGAR2_VDDCORE_CDC_REG_ENA_MASK           0x8000
+#define LOCHNAGAR2_VDDCORE_CDC_REG_ENA_SHIFT              15
+
+/* (0x0120)  VDDCORE_CDC_CTRL2 */
+#define LOCHNAGAR2_VDDCORE_CDC_VSEL_MASK              0x007F
+#define LOCHNAGAR2_VDDCORE_CDC_VSEL_SHIFT                  0
+
+#endif
-- 
cgit v1.2.3


From f2fee24c5aae13d8170d011f97f41e1968ae5b5f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 4 Feb 2019 16:32:10 +0100
Subject: ACPICA: Get rid of acpi_sleep_dispatch()

No need for the array of structs of function pointers when we can just
call the handfull of functions directly.

This could be further cleaned up if acpi_gbl_reduced_hardware was defined
true in the ACPI_REDUCED_HARDWARE case, but that's material for the next
round.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/hwxfsleep.c | 96 ++++++++---------------------------------
 include/acpi/actypes.h          |  9 ----
 2 files changed, 18 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c
index 669fa73dd794..abbf9702aa7f 100644
--- a/drivers/acpi/acpica/hwxfsleep.c
+++ b/drivers/acpi/acpica/hwxfsleep.c
@@ -23,33 +23,6 @@ acpi_hw_set_firmware_waking_vector(struct acpi_table_facs *facs,
 				   acpi_physical_address physical_address64);
 #endif
 
-static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id);
-
-/*
- * Dispatch table used to efficiently branch to the various sleep
- * functions.
- */
-#define ACPI_SLEEP_FUNCTION_ID         0
-#define ACPI_WAKE_PREP_FUNCTION_ID     1
-#define ACPI_WAKE_FUNCTION_ID          2
-
-/* Legacy functions are optional, based upon ACPI_REDUCED_HARDWARE */
-
-static struct acpi_sleep_functions acpi_sleep_dispatch[] = {
-	{ACPI_STRUCT_INIT(legacy_function,
-			  ACPI_HW_OPTIONAL_FUNCTION(acpi_hw_legacy_sleep)),
-	 ACPI_STRUCT_INIT(extended_function,
-			  acpi_hw_extended_sleep)},
-	{ACPI_STRUCT_INIT(legacy_function,
-			  ACPI_HW_OPTIONAL_FUNCTION(acpi_hw_legacy_wake_prep)),
-	 ACPI_STRUCT_INIT(extended_function,
-			  acpi_hw_extended_wake_prep)},
-	{ACPI_STRUCT_INIT(legacy_function,
-			  ACPI_HW_OPTIONAL_FUNCTION(acpi_hw_legacy_wake)),
-	 ACPI_STRUCT_INIT(extended_function,
-			  acpi_hw_extended_wake)}
-};
-
 /*
  * These functions are removed for the ACPI_REDUCED_HARDWARE case:
  *      acpi_set_firmware_waking_vector
@@ -209,53 +182,6 @@ acpi_status acpi_enter_sleep_state_s4bios(void)
 
 ACPI_EXPORT_SYMBOL(acpi_enter_sleep_state_s4bios)
 #endif				/* !ACPI_REDUCED_HARDWARE */
-/*******************************************************************************
- *
- * FUNCTION:    acpi_hw_sleep_dispatch
- *
- * PARAMETERS:  sleep_state         - Which sleep state to enter/exit
- *              function_id         - Sleep, wake_prep, or Wake
- *
- * RETURN:      Status from the invoked sleep handling function.
- *
- * DESCRIPTION: Dispatch a sleep/wake request to the appropriate handling
- *              function.
- *
- ******************************************************************************/
-static acpi_status acpi_hw_sleep_dispatch(u8 sleep_state, u32 function_id)
-{
-	acpi_status status;
-	struct acpi_sleep_functions *sleep_functions =
-	    &acpi_sleep_dispatch[function_id];
-
-#if (!ACPI_REDUCED_HARDWARE)
-	/*
-	 * If the Hardware Reduced flag is set (from the FADT), we must
-	 * use the extended sleep registers (FADT). Note: As per the ACPI
-	 * specification, these extended registers are to be used for HW-reduced
-	 * platforms only. They are not general-purpose replacements for the
-	 * legacy PM register sleep support.
-	 */
-	if (acpi_gbl_reduced_hardware) {
-		status = sleep_functions->extended_function(sleep_state);
-	} else {
-		/* Legacy sleep */
-
-		status = sleep_functions->legacy_function(sleep_state);
-	}
-
-	return (status);
-
-#else
-	/*
-	 * For the case where reduced-hardware-only code is being generated,
-	 * we know that only the extended sleep registers are available
-	 */
-	status = sleep_functions->extended_function(sleep_state);
-	return (status);
-
-#endif				/* !ACPI_REDUCED_HARDWARE */
-}
 
 /*******************************************************************************
  *
@@ -362,7 +288,12 @@ acpi_status acpi_enter_sleep_state(u8 sleep_state)
 		return_ACPI_STATUS(AE_AML_OPERAND_VALUE);
 	}
 
-	status = acpi_hw_sleep_dispatch(sleep_state, ACPI_SLEEP_FUNCTION_ID);
+#if !ACPI_REDUCED_HARDWARE
+	if (!acpi_gbl_reduced_hardware)
+		status = acpi_hw_legacy_sleep(sleep_state);
+	else
+#endif
+		status = acpi_hw_extended_sleep(sleep_state);
 	return_ACPI_STATUS(status);
 }
 
@@ -388,8 +319,12 @@ acpi_status acpi_leave_sleep_state_prep(u8 sleep_state)
 
 	ACPI_FUNCTION_TRACE(acpi_leave_sleep_state_prep);
 
-	status =
-	    acpi_hw_sleep_dispatch(sleep_state, ACPI_WAKE_PREP_FUNCTION_ID);
+#if !ACPI_REDUCED_HARDWARE
+	if (!acpi_gbl_reduced_hardware)
+		status = acpi_hw_legacy_wake_prep(sleep_state);
+	else
+#endif
+		status = acpi_hw_extended_wake_prep(sleep_state);
 	return_ACPI_STATUS(status);
 }
 
@@ -413,7 +348,12 @@ acpi_status acpi_leave_sleep_state(u8 sleep_state)
 
 	ACPI_FUNCTION_TRACE(acpi_leave_sleep_state);
 
-	status = acpi_hw_sleep_dispatch(sleep_state, ACPI_WAKE_FUNCTION_ID);
+#if !ACPI_REDUCED_HARDWARE
+	if (!acpi_gbl_reduced_hardware)
+		status = acpi_hw_legacy_wake(sleep_state);
+	else
+#endif
+		status = acpi_hw_extended_wake(sleep_state);
 	return_ACPI_STATUS(status);
 }
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 913a7cfb61e6..4c1395a6d82a 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -885,15 +885,6 @@ typedef u8 acpi_adr_space_type;
 #define ACPI_ENABLE_EVENT                       1
 #define ACPI_DISABLE_EVENT                      0
 
-/* Sleep function dispatch */
-
-typedef acpi_status (*acpi_sleep_function) (u8 sleep_state);
-
-struct acpi_sleep_functions {
-	acpi_sleep_function legacy_function;
-	acpi_sleep_function extended_function;
-};
-
 /*
  * External ACPI object definition
  */
-- 
cgit v1.2.3


From cd10b9343d1fd092cbf5fc3c2062942618158891 Mon Sep 17 00:00:00 2001
From: Weiyi Lu <weiyi.lu@mediatek.com>
Date: Fri, 1 Feb 2019 16:30:13 +0800
Subject: dt-bindings: soc: fix typo of MT8173 power dt-bindings

fix incorrect IC name that will affect the MT8183 power dt-bindings

Signed-off-by: Weiyi Lu <weiyi.lu@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 include/dt-bindings/power/mt8173-power.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/power/mt8173-power.h b/include/dt-bindings/power/mt8173-power.h
index 15d531aa6e78..ef4a7f944848 100644
--- a/include/dt-bindings/power/mt8173-power.h
+++ b/include/dt-bindings/power/mt8173-power.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _DT_BINDINGS_POWER_MT8183_POWER_H
-#define _DT_BINDINGS_POWER_MT8183_POWER_H
+#ifndef _DT_BINDINGS_POWER_MT8173_POWER_H
+#define _DT_BINDINGS_POWER_MT8173_POWER_H
 
 #define MT8173_POWER_DOMAIN_VDEC	0
 #define MT8173_POWER_DOMAIN_VENC	1
@@ -13,4 +13,4 @@
 #define MT8173_POWER_DOMAIN_MFG_2D	8
 #define MT8173_POWER_DOMAIN_MFG		9
 
-#endif /* _DT_BINDINGS_POWER_MT8183_POWER_H */
+#endif /* _DT_BINDINGS_POWER_MT8173_POWER_H */
-- 
cgit v1.2.3


From e9e0c8903009477b630e37a8b6364b26a00720da Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:34 +0200
Subject: fanotify: encode file identifier for FAN_REPORT_FID

When user requests the flag FAN_REPORT_FID in fanotify_init(),
a unique file identifier of the event target object will be reported
with the event.

The file identifier includes the filesystem's fsid (i.e. from statfs(2))
and an NFS file handle of the file (i.e. from name_to_handle_at(2)).

The file identifier makes holding the path reference and passing a file
descriptor to user redundant, so those are disabled in a group with
FAN_REPORT_FID.

Encode fid and store it in event for a group with FAN_REPORT_FID.
Up to 12 bytes of file handle on 32bit arch (16 bytes on 64bit arch)
are stored inline in fanotify_event struct. Larger file handles are
stored in an external allocated buffer.

On failure to encode fid, we print a warning and queue the event
without the fid information.

[JK: Fold part of later patched into this one to use
exportfs_encode_inode_fh() right away]

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 84 +++++++++++++++++++++++++++++++++++---
 fs/notify/fanotify/fanotify.h      | 78 +++++++++++++++++++++++++++++++++--
 fs/notify/fanotify/fanotify_user.c |  6 +--
 include/uapi/linux/fanotify.h      |  1 +
 4 files changed, 156 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d8e3b6e50844..dd33227e518a 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -13,6 +13,7 @@
 #include <linux/wait.h>
 #include <linux/audit.h>
 #include <linux/sched/mm.h>
+#include <linux/statfs.h>
 
 #include "fanotify.h"
 
@@ -25,10 +26,18 @@ static bool should_merge(struct fsnotify_event *old_fsn,
 	old = FANOTIFY_E(old_fsn);
 	new = FANOTIFY_E(new_fsn);
 
-	if (old_fsn->inode == new_fsn->inode && old->pid == new->pid &&
-	    old->path.mnt == new->path.mnt &&
-	    old->path.dentry == new->path.dentry)
-		return true;
+	if (old_fsn->inode != new_fsn->inode || old->pid != new->pid ||
+	    old->fh_type != new->fh_type || old->fh_len != new->fh_len)
+		return false;
+
+	if (fanotify_event_has_path(old)) {
+		return old->path.mnt == new->path.mnt &&
+			old->path.dentry == new->path.dentry;
+	} else if (fanotify_event_has_fid(old)) {
+		return fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
+	}
+
+	/* Do not merge events if we failed to encode fid */
 	return false;
 }
 
@@ -143,6 +152,60 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
 		~marks_ignored_mask;
 }
 
+static int fanotify_encode_fid(struct fanotify_event *event,
+			       const struct path *path, gfp_t gfp)
+{
+	struct fanotify_fid *fid = &event->fid;
+	int dwords, bytes = 0;
+	struct kstatfs stat;
+	int err, type;
+
+	stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0;
+	fid->ext_fh = NULL;
+	dwords = 0;
+	err = -ENOENT;
+	type = exportfs_encode_inode_fh(d_inode(path->dentry), NULL, &dwords,
+					NULL);
+	if (!dwords)
+		goto out_err;
+
+	err = vfs_statfs(path, &stat);
+	if (err)
+		goto out_err;
+
+	bytes = dwords << 2;
+	if (bytes > FANOTIFY_INLINE_FH_LEN) {
+		/* Treat failure to allocate fh as failure to allocate event */
+		err = -ENOMEM;
+		fid->ext_fh = kmalloc(bytes, gfp);
+		if (!fid->ext_fh)
+			goto out_err;
+	}
+
+	type = exportfs_encode_inode_fh(d_inode(path->dentry),
+					fanotify_fid_fh(fid, bytes), &dwords,
+					NULL);
+	err = -EINVAL;
+	if (!type || type == FILEID_INVALID || bytes != dwords << 2)
+		goto out_err;
+
+	fid->fsid = stat.f_fsid;
+	event->fh_len = bytes;
+
+	return type;
+
+out_err:
+	pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
+			    "type=%d, bytes=%d, err=%i)\n",
+			    stat.f_fsid.val[0], stat.f_fsid.val[1],
+			    type, bytes, err);
+	kfree(fid->ext_fh);
+	fid->ext_fh = NULL;
+	event->fh_len = 0;
+
+	return FILEID_INVALID;
+}
+
 struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
 						 struct inode *inode, u32 mask,
 						 const struct path *path)
@@ -181,10 +244,16 @@ init: __maybe_unused
 		event->pid = get_pid(task_pid(current));
 	else
 		event->pid = get_pid(task_tgid(current));
-	if (path) {
+	event->fh_len = 0;
+	if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		/* Report the event without a file identifier on encode error */
+		event->fh_type = fanotify_encode_fid(event, path, gfp);
+	} else if (path) {
+		event->fh_type = FILEID_ROOT;
 		event->path = *path;
 		path_get(&event->path);
 	} else {
+		event->fh_type = FILEID_INVALID;
 		event->path.mnt = NULL;
 		event->path.dentry = NULL;
 	}
@@ -281,7 +350,10 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
 	struct fanotify_event *event;
 
 	event = FANOTIFY_E(fsn_event);
-	path_put(&event->path);
+	if (fanotify_event_has_path(event))
+		path_put(&event->path);
+	else if (fanotify_event_has_ext_fh(event))
+		kfree(event->fid.ext_fh);
 	put_pid(event->pid);
 	if (fanotify_is_perm_event(event->mask)) {
 		kmem_cache_free(fanotify_perm_event_cachep,
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 898b5b2bc1c7..271482fb9611 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -2,11 +2,49 @@
 #include <linux/fsnotify_backend.h>
 #include <linux/path.h>
 #include <linux/slab.h>
+#include <linux/exportfs.h>
 
 extern struct kmem_cache *fanotify_mark_cache;
 extern struct kmem_cache *fanotify_event_cachep;
 extern struct kmem_cache *fanotify_perm_event_cachep;
 
+/*
+ * 3 dwords are sufficient for most local fs (64bit ino, 32bit generation).
+ * For 32bit arch, fid increases the size of fanotify_event by 12 bytes and
+ * fh_* fields increase the size of fanotify_event by another 4 bytes.
+ * For 64bit arch, fid increases the size of fanotify_fid by 8 bytes and
+ * fh_* fields are packed in a hole after mask.
+ */
+#if BITS_PER_LONG == 32
+#define FANOTIFY_INLINE_FH_LEN	(3 << 2)
+#else
+#define FANOTIFY_INLINE_FH_LEN	(4 << 2)
+#endif
+
+struct fanotify_fid {
+	__kernel_fsid_t fsid;
+	union {
+		unsigned char fh[FANOTIFY_INLINE_FH_LEN];
+		unsigned char *ext_fh;
+	};
+};
+
+static inline void *fanotify_fid_fh(struct fanotify_fid *fid,
+				    unsigned int fh_len)
+{
+	return fh_len <= FANOTIFY_INLINE_FH_LEN ? fid->fh : fid->ext_fh;
+}
+
+static inline bool fanotify_fid_equal(struct fanotify_fid *fid1,
+				      struct fanotify_fid *fid2,
+				      unsigned int fh_len)
+{
+	return fid1->fsid.val[0] == fid2->fsid.val[0] &&
+		fid1->fsid.val[1] == fid2->fsid.val[1] &&
+		!memcmp(fanotify_fid_fh(fid1, fh_len),
+			fanotify_fid_fh(fid2, fh_len), fh_len);
+}
+
 /*
  * Structure for normal fanotify events. It gets allocated in
  * fanotify_handle_event() and freed when the information is retrieved by
@@ -16,13 +54,47 @@ struct fanotify_event {
 	struct fsnotify_event fse;
 	u32 mask;
 	/*
-	 * We hold ref to this path so it may be dereferenced at any point
-	 * during this object's lifetime
+	 * Those fields are outside fanotify_fid to pack fanotify_event nicely
+	 * on 64bit arch and to use fh_type as an indication of whether path
+	 * or fid are used in the union:
+	 * FILEID_ROOT (0) for path, > 0 for fid, FILEID_INVALID for neither.
 	 */
-	struct path path;
+	u8 fh_type;
+	u8 fh_len;
+	u16 pad;
+	union {
+		/*
+		 * We hold ref to this path so it may be dereferenced at any
+		 * point during this object's lifetime
+		 */
+		struct path path;
+		/*
+		 * With FAN_REPORT_FID, we do not hold any reference on the
+		 * victim object. Instead we store its NFS file handle and its
+		 * filesystem's fsid as a unique identifier.
+		 */
+		struct fanotify_fid fid;
+	};
 	struct pid *pid;
 };
 
+static inline bool fanotify_event_has_path(struct fanotify_event *event)
+{
+	return event->fh_type == FILEID_ROOT;
+}
+
+static inline bool fanotify_event_has_fid(struct fanotify_event *event)
+{
+	return event->fh_type != FILEID_ROOT &&
+		event->fh_type != FILEID_INVALID;
+}
+
+static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event)
+{
+	return fanotify_event_has_fid(event) &&
+		event->fh_len > FANOTIFY_INLINE_FH_LEN;
+}
+
 /*
  * Structure for permission fanotify events. It gets allocated and freed in
  * fanotify_handle_event() since we wait there for user response. When the
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 096503bd0edb..c965fcf4979e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -181,7 +181,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	struct fanotify_event_metadata metadata;
 	struct fanotify_event *event;
 	struct file *f = NULL;
-	int fd, ret;
+	int ret, fd = FAN_NOFD;
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
 
@@ -193,9 +193,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	metadata.mask = event->mask & FANOTIFY_OUTGOING_EVENTS;
 	metadata.pid = pid_vnr(event->pid);
 
-	if (unlikely(event->mask & FAN_Q_OVERFLOW)) {
-		fd = FAN_NOFD;
-	} else {
+	if (fanotify_event_has_path(event)) {
 		fd = create_fd(group, event, &f);
 		if (fd < 0)
 			return fd;
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 909c98fcace2..d07f3cbc2786 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -44,6 +44,7 @@
 
 /* Flags to determine fanotify event format */
 #define FAN_REPORT_TID		0x00000100	/* event->pid is thread id */
+#define FAN_REPORT_FID		0x00000200	/* Report unique file id */
 
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
-- 
cgit v1.2.3


From 5e469c830fdb5a1ebaa69b375b87f583326fd296 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:35 +0200
Subject: fanotify: copy event fid info to user

If group requested FAN_REPORT_FID and event has file identifier,
copy that information to user reading the event after event metadata.

fid information is formatted as struct fanotify_event_info_fid
that includes a generic header struct fanotify_event_info_header,
so that other info types could be defined in the future using the
same header.

metadata->event_len includes the length of the fid information.

The fid information includes the filesystem's fsid (see statfs(2))
followed by an NFS file handle of the file that could be passed as
an argument to open_by_handle_at(2).

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.h      |  5 +++
 fs/notify/fanotify/fanotify_user.c | 82 +++++++++++++++++++++++++++++++++++---
 include/uapi/linux/fanotify.h      | 20 ++++++++++
 3 files changed, 102 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 271482fb9611..4aafc7144c3d 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -95,6 +95,11 @@ static inline bool fanotify_event_has_ext_fh(struct fanotify_event *event)
 		event->fh_len > FANOTIFY_INLINE_FH_LEN;
 }
 
+static inline void *fanotify_event_fh(struct fanotify_event *event)
+{
+	return fanotify_fid_fh(&event->fid, event->fh_len);
+}
+
 /*
  * Structure for permission fanotify events. It gets allocated and freed in
  * fanotify_handle_event() since we wait there for user response. When the
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c965fcf4979e..cd82dd713c91 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -47,6 +47,18 @@ struct kmem_cache *fanotify_mark_cache __read_mostly;
 struct kmem_cache *fanotify_event_cachep __read_mostly;
 struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 
+#define FANOTIFY_EVENT_ALIGN 4
+
+static int fanotify_event_info_len(struct fanotify_event *event)
+{
+	if (!fanotify_event_has_fid(event))
+		return 0;
+
+	return roundup(sizeof(struct fanotify_event_info_fid) +
+		       sizeof(struct file_handle) + event->fh_len,
+		       FANOTIFY_EVENT_ALIGN);
+}
+
 /*
  * Get an fsnotify notification event if one exists and is small
  * enough to fit in "count". Return an error pointer if the count
@@ -57,6 +69,9 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 					    size_t count)
 {
+	size_t event_size = FAN_EVENT_METADATA_LEN;
+	struct fanotify_event *event;
+
 	assert_spin_locked(&group->notification_lock);
 
 	pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
@@ -64,11 +79,18 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
 	if (fsnotify_notify_queue_is_empty(group))
 		return NULL;
 
-	if (FAN_EVENT_METADATA_LEN > count)
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		event = FANOTIFY_E(fsnotify_peek_first_event(group));
+		event_size += fanotify_event_info_len(event);
+	}
+
+	if (event_size > count)
 		return ERR_PTR(-EINVAL);
 
-	/* held the notification_lock the whole time, so this is the
-	 * same event we peeked above */
+	/*
+	 * Held the notification_lock the whole time, so this is the
+	 * same event we peeked above
+	 */
 	return fsnotify_remove_first_event(group);
 }
 
@@ -174,6 +196,48 @@ static int process_access_response(struct fsnotify_group *group,
 	return 0;
 }
 
+static int copy_fid_to_user(struct fanotify_event *event, char __user *buf)
+{
+	struct fanotify_event_info_fid info = { };
+	struct file_handle handle = { };
+	size_t fh_len = event->fh_len;
+	size_t len = fanotify_event_info_len(event);
+
+	if (!len)
+		return 0;
+
+	if (WARN_ON_ONCE(len < sizeof(info) + sizeof(handle) + fh_len))
+		return -EFAULT;
+
+	/* Copy event info fid header followed by vaiable sized file handle */
+	info.hdr.info_type = FAN_EVENT_INFO_TYPE_FID;
+	info.hdr.len = len;
+	info.fsid = event->fid.fsid;
+	if (copy_to_user(buf, &info, sizeof(info)))
+		return -EFAULT;
+
+	buf += sizeof(info);
+	len -= sizeof(info);
+	handle.handle_type = event->fh_type;
+	handle.handle_bytes = fh_len;
+	if (copy_to_user(buf, &handle, sizeof(handle)))
+		return -EFAULT;
+
+	buf += sizeof(handle);
+	len -= sizeof(handle);
+	if (copy_to_user(buf, fanotify_event_fh(event), fh_len))
+		return -EFAULT;
+
+	/* Pad with 0's */
+	buf += fh_len;
+	len -= fh_len;
+	WARN_ON_ONCE(len < 0 || len >= FANOTIFY_EVENT_ALIGN);
+	if (len > 0 && clear_user(buf, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static ssize_t copy_event_to_user(struct fsnotify_group *group,
 				  struct fsnotify_event *fsn_event,
 				  char __user *buf, size_t count)
@@ -197,6 +261,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 		fd = create_fd(group, event, &f);
 		if (fd < 0)
 			return fd;
+	} else if (fanotify_event_has_fid(event)) {
+		metadata.event_len += fanotify_event_info_len(event);
 	}
 	metadata.fd = fd;
 
@@ -208,14 +274,20 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 	if (WARN_ON_ONCE(metadata.event_len > count))
 		goto out_close_fd;
 
-	if (copy_to_user(buf, &metadata, metadata.event_len))
+	if (copy_to_user(buf, &metadata, FAN_EVENT_METADATA_LEN))
 		goto out_close_fd;
 
 	if (fanotify_is_perm_event(event->mask))
 		FANOTIFY_PE(fsn_event)->fd = fd;
 
-	if (fd != FAN_NOFD)
+	if (fanotify_event_has_path(event)) {
 		fd_install(fd, f);
+	} else if (fanotify_event_has_fid(event)) {
+		ret = copy_fid_to_user(event, buf + FAN_EVENT_METADATA_LEN);
+		if (ret < 0)
+			return ret;
+	}
+
 	return metadata.event_len;
 
 out_close_fd:
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index d07f3cbc2786..959ae2bdc7ca 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -107,6 +107,26 @@ struct fanotify_event_metadata {
 	__s32 pid;
 };
 
+#define FAN_EVENT_INFO_TYPE_FID		1
+
+/* Variable length info record following event metadata */
+struct fanotify_event_info_header {
+	__u8 info_type;
+	__u8 pad;
+	__u16 len;
+};
+
+/* Unique file identifier info record */
+struct fanotify_event_info_fid {
+	struct fanotify_event_info_header hdr;
+	__kernel_fsid_t fsid;
+	/*
+	 * Following is an opaque struct file_handle that can be passed as
+	 * an argument to open_by_handle_at(2).
+	 */
+	unsigned char handle[0];
+};
+
 struct fanotify_response {
 	__s32 fd;
 	__u32 response;
-- 
cgit v1.2.3


From a8b13aa20afb69161b5123b4f1acc7ea0a03d360 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:36 +0200
Subject: fanotify: enable FAN_REPORT_FID init flag

When setting up an fanotify listener, user may request to get fid
information in event instead of an open file descriptor.

The fid obtained with event on a watched object contains the file
handle returned by name_to_handle_at(2) and fsid returned by statfs(2).

Restrict FAN_REPORT_FID to class FAN_CLASS_NOTIF, because we have have
no good reason to support reporting fid on permission events.

When setting a mark, we need to make sure that the filesystem
supports encoding file handles with name_to_handle_at(2) and that
statfs(2) encodes a non-zero fsid.

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 61 +++++++++++++++++++++++++++++++++++++-
 include/linux/fanotify.h           |  2 +-
 2 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index cd82dd713c91..1638c171ca82 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -17,6 +17,8 @@
 #include <linux/compat.h>
 #include <linux/sched/signal.h>
 #include <linux/memcontrol.h>
+#include <linux/statfs.h>
+#include <linux/exportfs.h>
 
 #include <asm/ioctls.h>
 
@@ -768,6 +770,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		return -EINVAL;
 	}
 
+	if ((flags & FAN_REPORT_FID) &&
+	    (flags & FANOTIFY_CLASS_BITS) != FAN_CLASS_NOTIF)
+		return -EINVAL;
+
 	user = get_current_user();
 	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
 		free_uid(user);
@@ -854,6 +860,52 @@ out_destroy_group:
 	return fd;
 }
 
+/* Check if filesystem can encode a unique fid */
+static int fanotify_test_fid(struct path *path)
+{
+	struct kstatfs stat, root_stat;
+	struct path root = {
+		.mnt = path->mnt,
+		.dentry = path->dentry->d_sb->s_root,
+	};
+	int err;
+
+	/*
+	 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
+	 */
+	err = vfs_statfs(path, &stat);
+	if (err)
+		return err;
+
+	if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1])
+		return -ENODEV;
+
+	/*
+	 * Make sure path is not inside a filesystem subvolume (e.g. btrfs)
+	 * which uses a different fsid than sb root.
+	 */
+	err = vfs_statfs(&root, &root_stat);
+	if (err)
+		return err;
+
+	if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] ||
+	    root_stat.f_fsid.val[1] != stat.f_fsid.val[1])
+		return -EXDEV;
+
+	/*
+	 * We need to make sure that the file system supports at least
+	 * encoding a file handle so user can use name_to_handle_at() to
+	 * compare fid returned with event to the file handle of watched
+	 * objects. However, name_to_handle_at() requires that the
+	 * filesystem also supports decoding file handles.
+	 */
+	if (!path->dentry->d_sb->s_export_op ||
+	    !path->dentry->d_sb->s_export_op->fh_to_dentry)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 			    int dfd, const char  __user *pathname)
 {
@@ -939,6 +991,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (ret)
 		goto fput_and_out;
 
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		ret = fanotify_test_fid(&path);
+		if (ret)
+			goto path_put_and_out;
+	}
+
 	/* inode held in place by reference to path; group by fget on fd */
 	if (mark_type == FAN_MARK_INODE)
 		inode = path.dentry->d_inode;
@@ -967,6 +1025,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		ret = -EINVAL;
 	}
 
+path_put_and_out:
 	path_put(&path);
 fput_and_out:
 	fdput(f);
@@ -1003,7 +1062,7 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 8);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 9e2142795335..f59be967f72b 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -19,7 +19,7 @@
 				 FAN_CLASS_PRE_CONTENT)
 
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | \
-				 FAN_REPORT_TID | \
+				 FAN_REPORT_TID | FAN_REPORT_FID | \
 				 FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
 
-- 
cgit v1.2.3


From 77115225acc67d9ac4b15f04dd138006b9cd1ef2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:37 +0200
Subject: fanotify: cache fsid in fsnotify_mark_connector

For FAN_REPORT_FID, we need to encode fid with fsid of the filesystem on
every event. To avoid having to call vfs_statfs() on every event to get
fsid, we store the fsid in fsnotify_mark_connector on the first time we
add a mark and on handle event we use the cached fsid.

Subsequent calls to add mark on the same object are expected to pass the
same fsid, so the call will fail on cached fsid mismatch.

If an event is reported on several mark types (inode, mount, filesystem),
all connectors should already have the same fsid, so we use the cached
fsid from the first connector.

[JK: Simplify code flow around fanotify_get_fid()
     make fsid argument of fsnotify_add_mark_locked() unconditional]

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 59 ++++++++++++++++++++++++------------
 fs/notify/fanotify/fanotify.h      |  5 +--
 fs/notify/fanotify/fanotify_user.c | 62 +++++++++++++++++++++++---------------
 fs/notify/mark.c                   | 42 +++++++++++++++++++++-----
 include/linux/fsnotify_backend.h   | 18 ++++++++---
 5 files changed, 128 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index dd33227e518a..555831603637 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -153,26 +153,20 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
 }
 
 static int fanotify_encode_fid(struct fanotify_event *event,
-			       const struct path *path, gfp_t gfp)
+			       struct inode *inode, gfp_t gfp,
+			       __kernel_fsid_t *fsid)
 {
 	struct fanotify_fid *fid = &event->fid;
 	int dwords, bytes = 0;
-	struct kstatfs stat;
 	int err, type;
 
-	stat.f_fsid.val[0] = stat.f_fsid.val[1] = 0;
 	fid->ext_fh = NULL;
 	dwords = 0;
 	err = -ENOENT;
-	type = exportfs_encode_inode_fh(d_inode(path->dentry), NULL, &dwords,
-					NULL);
+	type = exportfs_encode_inode_fh(inode, NULL, &dwords, NULL);
 	if (!dwords)
 		goto out_err;
 
-	err = vfs_statfs(path, &stat);
-	if (err)
-		goto out_err;
-
 	bytes = dwords << 2;
 	if (bytes > FANOTIFY_INLINE_FH_LEN) {
 		/* Treat failure to allocate fh as failure to allocate event */
@@ -182,14 +176,13 @@ static int fanotify_encode_fid(struct fanotify_event *event,
 			goto out_err;
 	}
 
-	type = exportfs_encode_inode_fh(d_inode(path->dentry),
-					fanotify_fid_fh(fid, bytes), &dwords,
-					NULL);
+	type = exportfs_encode_inode_fh(inode, fanotify_fid_fh(fid, bytes),
+					&dwords, NULL);
 	err = -EINVAL;
 	if (!type || type == FILEID_INVALID || bytes != dwords << 2)
 		goto out_err;
 
-	fid->fsid = stat.f_fsid;
+	fid->fsid = *fsid;
 	event->fh_len = bytes;
 
 	return type;
@@ -197,8 +190,7 @@ static int fanotify_encode_fid(struct fanotify_event *event,
 out_err:
 	pr_warn_ratelimited("fanotify: failed to encode fid (fsid=%x.%x, "
 			    "type=%d, bytes=%d, err=%i)\n",
-			    stat.f_fsid.val[0], stat.f_fsid.val[1],
-			    type, bytes, err);
+			    fsid->val[0], fsid->val[1], type, bytes, err);
 	kfree(fid->ext_fh);
 	fid->ext_fh = NULL;
 	event->fh_len = 0;
@@ -207,8 +199,9 @@ out_err:
 }
 
 struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
-						 struct inode *inode, u32 mask,
-						 const struct path *path)
+					    struct inode *inode, u32 mask,
+					    const struct path *path,
+					    __kernel_fsid_t *fsid)
 {
 	struct fanotify_event *event = NULL;
 	gfp_t gfp = GFP_KERNEL_ACCOUNT;
@@ -247,7 +240,8 @@ init: __maybe_unused
 	event->fh_len = 0;
 	if (path && FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
 		/* Report the event without a file identifier on encode error */
-		event->fh_type = fanotify_encode_fid(event, path, gfp);
+		event->fh_type = fanotify_encode_fid(event,
+					d_inode(path->dentry), gfp, fsid);
 	} else if (path) {
 		event->fh_type = FILEID_ROOT;
 		event->path = *path;
@@ -262,6 +256,29 @@ out:
 	return event;
 }
 
+/*
+ * Get cached fsid of the filesystem containing the object from any connector.
+ * All connectors are supposed to have the same fsid, but we do not verify that
+ * here.
+ */
+static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
+{
+	int type;
+	__kernel_fsid_t fsid = {};
+
+	fsnotify_foreach_obj_type(type) {
+		if (!fsnotify_iter_should_report_type(iter_info, type))
+			continue;
+
+		fsid = iter_info->marks[type]->connector->fsid;
+		if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
+			continue;
+		return fsid;
+	}
+
+	return fsid;
+}
+
 static int fanotify_handle_event(struct fsnotify_group *group,
 				 struct inode *inode,
 				 u32 mask, const void *data, int data_type,
@@ -271,6 +288,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	int ret = 0;
 	struct fanotify_event *event;
 	struct fsnotify_event *fsn_event;
+	__kernel_fsid_t fsid = {};
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
@@ -303,7 +321,10 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 			return 0;
 	}
 
-	event = fanotify_alloc_event(group, inode, mask, data);
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID))
+		fsid = fanotify_get_fsid(iter_info);
+
+	event = fanotify_alloc_event(group, inode, mask, data, &fsid);
 	ret = -ENOMEM;
 	if (unlikely(!event)) {
 		/*
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 4aafc7144c3d..5b072afa4e19 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -131,5 +131,6 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
 }
 
 struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
-						 struct inode *inode, u32 mask,
-						 const struct path *path);
+					    struct inode *inode, u32 mask,
+					    const struct path *path,
+					    __kernel_fsid_t *fsid);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1638c171ca82..603419ce096f 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -653,7 +653,8 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 						   fsnotify_connp_t *connp,
-						   unsigned int type)
+						   unsigned int type,
+						   __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *mark;
 	int ret;
@@ -666,7 +667,7 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 		return ERR_PTR(-ENOMEM);
 
 	fsnotify_init_mark(mark, group);
-	ret = fsnotify_add_mark_locked(mark, connp, type, 0);
+	ret = fsnotify_add_mark_locked(mark, connp, type, 0, fsid);
 	if (ret) {
 		fsnotify_put_mark(mark);
 		return ERR_PTR(ret);
@@ -678,7 +679,8 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 
 static int fanotify_add_mark(struct fsnotify_group *group,
 			     fsnotify_connp_t *connp, unsigned int type,
-			     __u32 mask, unsigned int flags)
+			     __u32 mask, unsigned int flags,
+			     __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
@@ -686,7 +688,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 	mutex_lock(&group->mark_mutex);
 	fsn_mark = fsnotify_find_mark(connp, group);
 	if (!fsn_mark) {
-		fsn_mark = fanotify_add_new_mark(group, connp, type);
+		fsn_mark = fanotify_add_new_mark(group, connp, type, fsid);
 		if (IS_ERR(fsn_mark)) {
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
@@ -703,23 +705,23 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 				      struct vfsmount *mnt, __u32 mask,
-				      unsigned int flags)
+				      unsigned int flags, __kernel_fsid_t *fsid)
 {
 	return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags);
+				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
 }
 
 static int fanotify_add_sb_mark(struct fsnotify_group *group,
-				      struct super_block *sb, __u32 mask,
-				      unsigned int flags)
+				struct super_block *sb, __u32 mask,
+				unsigned int flags, __kernel_fsid_t *fsid)
 {
 	return fanotify_add_mark(group, &sb->s_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_SB, mask, flags);
+				 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
 				   struct inode *inode, __u32 mask,
-				   unsigned int flags)
+				   unsigned int flags, __kernel_fsid_t *fsid)
 {
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
@@ -734,7 +736,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 		return 0;
 
 	return fanotify_add_mark(group, &inode->i_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_INODE, mask, flags);
+				 FSNOTIFY_OBJ_TYPE_INODE, mask, flags, fsid);
 }
 
 /* fanotify syscalls */
@@ -798,7 +800,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	atomic_inc(&user->fanotify_listeners);
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
-	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL);
+	oevent = fanotify_alloc_event(group, NULL, FS_Q_OVERFLOW, NULL, NULL);
 	if (unlikely(!oevent)) {
 		fd = -ENOMEM;
 		goto out_destroy_group;
@@ -861,9 +863,9 @@ out_destroy_group:
 }
 
 /* Check if filesystem can encode a unique fid */
-static int fanotify_test_fid(struct path *path)
+static int fanotify_test_fid(struct path *path, struct kstatfs *stat)
 {
-	struct kstatfs stat, root_stat;
+	struct kstatfs root_stat;
 	struct path root = {
 		.mnt = path->mnt,
 		.dentry = path->dentry->d_sb->s_root,
@@ -873,11 +875,11 @@ static int fanotify_test_fid(struct path *path)
 	/*
 	 * Make sure path is not in filesystem with zero fsid (e.g. tmpfs).
 	 */
-	err = vfs_statfs(path, &stat);
+	err = vfs_statfs(path, stat);
 	if (err)
 		return err;
 
-	if (!stat.f_fsid.val[0] && !stat.f_fsid.val[1])
+	if (!stat->f_fsid.val[0] && !stat->f_fsid.val[1])
 		return -ENODEV;
 
 	/*
@@ -888,8 +890,8 @@ static int fanotify_test_fid(struct path *path)
 	if (err)
 		return err;
 
-	if (root_stat.f_fsid.val[0] != stat.f_fsid.val[0] ||
-	    root_stat.f_fsid.val[1] != stat.f_fsid.val[1])
+	if (root_stat.f_fsid.val[0] != stat->f_fsid.val[0] ||
+	    root_stat.f_fsid.val[1] != stat->f_fsid.val[1])
 		return -EXDEV;
 
 	/*
@@ -914,6 +916,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
+	struct kstatfs stat;
+	__kernel_fsid_t *fsid = NULL;
 	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	int ret;
@@ -992,9 +996,11 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		goto fput_and_out;
 
 	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
-		ret = fanotify_test_fid(&path);
+		ret = fanotify_test_fid(&path, &stat);
 		if (ret)
 			goto path_put_and_out;
+
+		fsid = &stat.f_fsid;
 	}
 
 	/* inode held in place by reference to path; group by fget on fd */
@@ -1007,19 +1013,25 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
 	case FAN_MARK_ADD:
 		if (mark_type == FAN_MARK_MOUNT)
-			ret = fanotify_add_vfsmount_mark(group, mnt, mask, flags);
+			ret = fanotify_add_vfsmount_mark(group, mnt, mask,
+							 flags, fsid);
 		else if (mark_type == FAN_MARK_FILESYSTEM)
-			ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask, flags);
+			ret = fanotify_add_sb_mark(group, mnt->mnt_sb, mask,
+						   flags, fsid);
 		else
-			ret = fanotify_add_inode_mark(group, inode, mask, flags);
+			ret = fanotify_add_inode_mark(group, inode, mask,
+						      flags, fsid);
 		break;
 	case FAN_MARK_REMOVE:
 		if (mark_type == FAN_MARK_MOUNT)
-			ret = fanotify_remove_vfsmount_mark(group, mnt, mask, flags);
+			ret = fanotify_remove_vfsmount_mark(group, mnt, mask,
+							    flags);
 		else if (mark_type == FAN_MARK_FILESYSTEM)
-			ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask, flags);
+			ret = fanotify_remove_sb_mark(group, mnt->mnt_sb, mask,
+						      flags);
 		else
-			ret = fanotify_remove_inode_mark(group, inode, mask, flags);
+			ret = fanotify_remove_inode_mark(group, inode, mask,
+							 flags);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index d2dd16cb5989..d593d4269561 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -82,6 +82,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/srcu.h>
+#include <linux/ratelimit.h>
 
 #include <linux/atomic.h>
 
@@ -481,7 +482,8 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 }
 
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
-					       unsigned int type)
+					       unsigned int type,
+					       __kernel_fsid_t *fsid)
 {
 	struct inode *inode = NULL;
 	struct fsnotify_mark_connector *conn;
@@ -493,6 +495,11 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 	INIT_HLIST_HEAD(&conn->list);
 	conn->type = type;
 	conn->obj = connp;
+	/* Cache fsid of filesystem containing the object */
+	if (fsid)
+		conn->fsid = *fsid;
+	else
+		conn->fsid.val[0] = conn->fsid.val[1] = 0;
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE)
 		inode = igrab(fsnotify_conn_inode(conn));
 	/*
@@ -544,7 +551,7 @@ out:
  */
 static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 				  fsnotify_connp_t *connp, unsigned int type,
-				  int allow_dups)
+				  int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_mark *lmark, *last = NULL;
 	struct fsnotify_mark_connector *conn;
@@ -553,15 +560,36 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark,
 
 	if (WARN_ON(!fsnotify_valid_obj_type(type)))
 		return -EINVAL;
+
+	/* Backend is expected to check for zero fsid (e.g. tmpfs) */
+	if (fsid && WARN_ON_ONCE(!fsid->val[0] && !fsid->val[1]))
+		return -ENODEV;
+
 restart:
 	spin_lock(&mark->lock);
 	conn = fsnotify_grab_connector(connp);
 	if (!conn) {
 		spin_unlock(&mark->lock);
-		err = fsnotify_attach_connector_to_object(connp, type);
+		err = fsnotify_attach_connector_to_object(connp, type, fsid);
 		if (err)
 			return err;
 		goto restart;
+	} else if (fsid && (conn->fsid.val[0] || conn->fsid.val[1]) &&
+		   (fsid->val[0] != conn->fsid.val[0] ||
+		    fsid->val[1] != conn->fsid.val[1])) {
+		/*
+		 * Backend is expected to check for non uniform fsid
+		 * (e.g. btrfs), but maybe we missed something?
+		 * Only allow setting conn->fsid once to non zero fsid.
+		 * inotify and non-fid fanotify groups do not set nor test
+		 * conn->fsid.
+		 */
+		pr_warn_ratelimited("%s: fsid mismatch on object of type %u: "
+				    "%x.%x != %x.%x\n", __func__, conn->type,
+				    fsid->val[0], fsid->val[1],
+				    conn->fsid.val[0], conn->fsid.val[1]);
+		err = -EXDEV;
+		goto out_err;
 	}
 
 	/* is mark the first mark? */
@@ -606,7 +634,7 @@ out_err:
  */
 int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 			     fsnotify_connp_t *connp, unsigned int type,
-			     int allow_dups)
+			     int allow_dups, __kernel_fsid_t *fsid)
 {
 	struct fsnotify_group *group = mark->group;
 	int ret = 0;
@@ -627,7 +655,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 	fsnotify_get_mark(mark); /* for g_list */
 	spin_unlock(&mark->lock);
 
-	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups);
+	ret = fsnotify_add_mark_list(mark, connp, type, allow_dups, fsid);
 	if (ret)
 		goto err;
 
@@ -648,13 +676,13 @@ err:
 }
 
 int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
-		      unsigned int type, int allow_dups)
+		      unsigned int type, int allow_dups, __kernel_fsid_t *fsid)
 {
 	int ret;
 	struct fsnotify_group *group = mark->group;
 
 	mutex_lock(&group->mark_mutex);
-	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups);
+	ret = fsnotify_add_mark_locked(mark, connp, type, allow_dups, fsid);
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1e4b88bd1443..7b93f15b4944 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -293,6 +293,7 @@ typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
 struct fsnotify_mark_connector {
 	spinlock_t lock;
 	unsigned int type;	/* Type of object [lock] */
+	__kernel_fsid_t fsid;	/* fsid of filesystem containing object */
 	union {
 		/* Object pointer [lock] */
 		fsnotify_connp_t *obj;
@@ -433,28 +434,35 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark,
 /* Find mark belonging to given group in the list of marks */
 extern struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
 						struct fsnotify_group *group);
+/* Get cached fsid of filesystem containing object */
+extern int fsnotify_get_conn_fsid(const struct fsnotify_mark_connector *conn,
+				  __kernel_fsid_t *fsid);
 /* attach the mark to the object */
 extern int fsnotify_add_mark(struct fsnotify_mark *mark,
 			     fsnotify_connp_t *connp, unsigned int type,
-			     int allow_dups);
+			     int allow_dups, __kernel_fsid_t *fsid);
 extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
-				    fsnotify_connp_t *connp, unsigned int type,
-				    int allow_dups);
+				    fsnotify_connp_t *connp,
+				    unsigned int type, int allow_dups,
+				    __kernel_fsid_t *fsid);
+
 /* attach the mark to the inode */
 static inline int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 					  struct inode *inode,
 					  int allow_dups)
 {
 	return fsnotify_add_mark(mark, &inode->i_fsnotify_marks,
-				 FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
+				 FSNOTIFY_OBJ_TYPE_INODE, allow_dups, NULL);
 }
 static inline int fsnotify_add_inode_mark_locked(struct fsnotify_mark *mark,
 						 struct inode *inode,
 						 int allow_dups)
 {
 	return fsnotify_add_mark_locked(mark, &inode->i_fsnotify_marks,
-					FSNOTIFY_OBJ_TYPE_INODE, allow_dups);
+					FSNOTIFY_OBJ_TYPE_INODE, allow_dups,
+					NULL);
 }
+
 /* given a group and a mark, flag mark to be freed when all references are dropped */
 extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 				  struct fsnotify_group *group);
-- 
cgit v1.2.3


From ec86ff5689ff9605e2d57e016098764ad9a2fee5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:38 +0200
Subject: vfs: add vfs_get_fsid() helper

Wrapper around statfs() interface.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/statfs.c            | 14 ++++++++++++++
 include/linux/statfs.h |  3 +++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/fs/statfs.c b/fs/statfs.c
index f0216629621d..eea7af6f2f22 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -67,6 +67,20 @@ static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
 	return retval;
 }
 
+int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
+{
+	struct kstatfs st;
+	int error;
+
+	error = statfs_by_dentry(dentry, &st);
+	if (error)
+		return error;
+
+	*fsid = st.f_fsid;
+	return 0;
+}
+EXPORT_SYMBOL(vfs_get_fsid);
+
 int vfs_statfs(const struct path *path, struct kstatfs *buf)
 {
 	int error;
diff --git a/include/linux/statfs.h b/include/linux/statfs.h
index 3142e98546ac..9bc69edb8f18 100644
--- a/include/linux/statfs.h
+++ b/include/linux/statfs.h
@@ -41,4 +41,7 @@ struct kstatfs {
 #define ST_NODIRATIME	0x0800	/* do not update directory access times */
 #define ST_RELATIME	0x1000	/* update atime relative to mtime/ctime */
 
+struct dentry;
+extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid);
+
 #endif
-- 
cgit v1.2.3


From 0a20df7ed3349dfa3260ddee2efa919df44d0ad5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:40 +0200
Subject: fsnotify: report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events

We need to report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events
for fanotify, because fanotify API requires the user to explicitly
request events on directories by FAN_ONDIR flag.

inotify never reported IN_ISDIR with those events. It looks like an
oversight, but to avoid the risk of breaking existing inotify programs,
mask the FS_ISDIR flag out when reprting those events to inotify backend.

We also add the FS_ISDIR flag with FS_ATTRIB event in the case of rename
over an empty target directory. inotify did not report IN_ISDIR in this
case, but it normally does report IN_ISDIR along with IN_ATTRIB event,
so in this case, we do not mask out the FS_ISDIR flag.

[JK: Simplify the checks in fsnotify_move()]

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/inotify/inotify_fsnotify.c |  9 +++++++++
 include/linux/fsnotify.h             | 21 +++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index fe97299975f2..ff30abd6a49b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -113,6 +113,15 @@ int inotify_handle_event(struct fsnotify_group *group,
 		return -ENOMEM;
 	}
 
+	/*
+	 * We now report FS_ISDIR flag with MOVE_SELF and DELETE_SELF events
+	 * for fanotify. inotify never reported IN_ISDIR with those events.
+	 * It looks like an oversight, but to avoid the risk of breaking
+	 * existing inotify programs, mask the flag out from those events.
+	 */
+	if (mask & (IN_MOVE_SELF | IN_DELETE_SELF))
+		mask &= ~IN_ISDIR;
+
 	fsn_event = &event->fse;
 	fsnotify_init_event(fsn_event, inode);
 	event->mask = mask;
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 39b22e88423d..9becae610022 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -87,7 +87,12 @@ static inline int fsnotify_perm(struct file *file, int mask)
  */
 static inline void fsnotify_link_count(struct inode *inode)
 {
-	fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+	__u32 mask = FS_ATTRIB;
+
+	if (S_ISDIR(inode->i_mode))
+		mask |= FS_ISDIR;
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 }
 
 /*
@@ -95,12 +100,14 @@ static inline void fsnotify_link_count(struct inode *inode)
  */
 static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 				 const unsigned char *old_name,
-				 int isdir, struct inode *target, struct dentry *moved)
+				 int isdir, struct inode *target,
+				 struct dentry *moved)
 {
 	struct inode *source = moved->d_inode;
 	u32 fs_cookie = fsnotify_get_cookie();
 	__u32 old_dir_mask = FS_MOVED_FROM;
 	__u32 new_dir_mask = FS_MOVED_TO;
+	__u32 mask = FS_MOVE_SELF;
 	const unsigned char *new_name = moved->d_name.name;
 
 	if (old_dir == new_dir)
@@ -109,6 +116,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 	if (isdir) {
 		old_dir_mask |= FS_ISDIR;
 		new_dir_mask |= FS_ISDIR;
+		mask |= FS_ISDIR;
 	}
 
 	fsnotify(old_dir, old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_name,
@@ -120,7 +128,7 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		fsnotify_link_count(target);
 
 	if (source)
-		fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+		fsnotify(source, mask, source, FSNOTIFY_EVENT_INODE, NULL, 0);
 	audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE);
 }
 
@@ -178,7 +186,12 @@ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
  */
 static inline void fsnotify_inoderemove(struct inode *inode)
 {
-	fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
+	__u32 mask = FS_DELETE_SELF;
+
+	if (S_ISDIR(inode->i_mode))
+		mask |= FS_ISDIR;
+
+	fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
 	__fsnotify_inode_delete(inode);
 }
 
-- 
cgit v1.2.3


From 0321e03cb4572fb3b56582bcb4927c1fe985b191 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:41 +0200
Subject: fanotify: check FS_ISDIR flag instead of d_is_dir()

All fsnotify hooks set the FS_ISDIR flag for events that happen
on directory victim inodes except for fsnotify_perm().

Add the missing FS_ISDIR flag in fsnotify_perm() hook and let
fanotify_group_event_mask() check the FS_ISDIR flag instead of
checking if path argument is a directory.

This is needed for fanotify support for event types that do not
carry path information.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c | 2 +-
 include/linux/fsnotify.h      | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 555831603637..195fc9fe0150 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -144,7 +144,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_iter_info *iter_info,
 		marks_ignored_mask |= mark->ignored_mask;
 	}
 
-	if (d_is_dir(path->dentry) &&
+	if (event_mask & FS_ISDIR &&
 	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
 		return 0;
 
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 9becae610022..09587e2860b5 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -79,6 +79,9 @@ static inline int fsnotify_perm(struct file *file, int mask)
 		fsnotify_mask = FS_ACCESS_PERM;
 	}
 
+	if (S_ISDIR(inode->i_mode))
+		fsnotify_mask |= FS_ISDIR;
+
 	return fsnotify_path(inode, path, fsnotify_mask);
 }
 
-- 
cgit v1.2.3


From 235328d1fa4251c6dcb32351219bb553a58838d2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:43 +0200
Subject: fanotify: add support for create/attrib/move/delete events

Add support for events with data type FSNOTIFY_EVENT_INODE
(e.g. create/attrib/move/delete) for inode and filesystem mark types.

The "inode" events do not carry enough information (i.e. path) to
report event->fd, so we do not allow setting a mask for those events
unless group supports reporting fid.

The "inode" events are not supported on a mount mark, because they do
not carry enough information (i.e. path) to be filtered by mount point.

The "dirent" events (create/move/delete) report the fid of the parent
directory where events took place without specifying the filename of the
child. In the future, fanotify may get support for reporting filename
information for those events.

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      |  9 ++++++++-
 fs/notify/fanotify/fanotify_user.c | 12 ++++++++++++
 include/linux/fanotify.h           | 22 ++++++++++++++++++++--
 include/uapi/linux/fanotify.h      |  8 ++++++++
 4 files changed, 48 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 974239b03442..158c69acb04d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -313,9 +313,16 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 
 	BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+	BUILD_BUG_ON(FAN_ATTRIB != FS_ATTRIB);
 	BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
 	BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
 	BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+	BUILD_BUG_ON(FAN_MOVED_TO != FS_MOVED_TO);
+	BUILD_BUG_ON(FAN_MOVED_FROM != FS_MOVED_FROM);
+	BUILD_BUG_ON(FAN_CREATE != FS_CREATE);
+	BUILD_BUG_ON(FAN_DELETE != FS_DELETE);
+	BUILD_BUG_ON(FAN_DELETE_SELF != FS_DELETE_SELF);
+	BUILD_BUG_ON(FAN_MOVE_SELF != FS_MOVE_SELF);
 	BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
 	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
@@ -324,7 +331,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
 	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 12);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
 
 	mask = fanotify_group_event_mask(group, iter_info, mask, data,
 					 data_type);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bf06fd6ef761..6c61a06d0ef5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -976,6 +976,18 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
+	/*
+	 * Events with data type inode do not carry enough information to report
+	 * event->fd, so we do not allow setting a mask for inode events unless
+	 * group supports reporting fid.
+	 * inode events are not supported on a mount mark, because they do not
+	 * carry enough information (i.e. path) to be filtered by mount point.
+	 */
+	if (mask & FANOTIFY_INODE_EVENTS &&
+	    (!FAN_GROUP_FLAG(group, FAN_REPORT_FID) ||
+	     mark_type == FAN_MARK_MOUNT))
+		goto fput_and_out;
+
 	if (flags & FAN_MARK_FLUSH) {
 		ret = 0;
 		if (mark_type == FAN_MARK_MOUNT)
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index f59be967f72b..e9d45387089f 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -35,10 +35,28 @@
 				 FAN_MARK_IGNORED_SURV_MODIFY | \
 				 FAN_MARK_FLUSH)
 
-/* Events that user can request to be notified on */
-#define FANOTIFY_EVENTS		(FAN_ACCESS | FAN_MODIFY | \
+/*
+ * Events that can be reported with data type FSNOTIFY_EVENT_PATH.
+ * Note that FAN_MODIFY can also be reported with data type
+ * FSNOTIFY_EVENT_INODE.
+ */
+#define FANOTIFY_PATH_EVENTS	(FAN_ACCESS | FAN_MODIFY | \
 				 FAN_CLOSE | FAN_OPEN | FAN_OPEN_EXEC)
 
+/*
+ * Directory entry modification events - reported only to directory
+ * where entry is modified and not to a watching parent.
+ */
+#define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE)
+
+/* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */
+#define FANOTIFY_INODE_EVENTS	(FANOTIFY_DIRENT_EVENTS | \
+				 FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
+
+/* Events that user can request to be notified on */
+#define FANOTIFY_EVENTS		(FANOTIFY_PATH_EVENTS | \
+				 FANOTIFY_INODE_EVENTS)
+
 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
 				 FAN_OPEN_EXEC_PERM)
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index 959ae2bdc7ca..b9effa6f8503 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -7,9 +7,16 @@
 /* the following events that user-space can register for */
 #define FAN_ACCESS		0x00000001	/* File was accessed */
 #define FAN_MODIFY		0x00000002	/* File was modified */
+#define FAN_ATTRIB		0x00000004	/* Metadata changed */
 #define FAN_CLOSE_WRITE		0x00000008	/* Writtable file closed */
 #define FAN_CLOSE_NOWRITE	0x00000010	/* Unwrittable file closed */
 #define FAN_OPEN		0x00000020	/* File was opened */
+#define FAN_MOVED_FROM		0x00000040	/* File was moved from X */
+#define FAN_MOVED_TO		0x00000080	/* File was moved to Y */
+#define FAN_CREATE		0x00000100	/* Subfile was created */
+#define FAN_DELETE		0x00000200	/* Subfile was deleted */
+#define FAN_DELETE_SELF		0x00000400	/* Self was deleted */
+#define FAN_MOVE_SELF		0x00000800	/* Self was moved */
 #define FAN_OPEN_EXEC		0x00001000	/* File was opened for exec */
 
 #define FAN_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
@@ -24,6 +31,7 @@
 
 /* helper events */
 #define FAN_CLOSE		(FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */
+#define FAN_MOVE		(FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */
 
 /* flags used for fanotify_init() */
 #define FAN_CLOEXEC		0x00000001
-- 
cgit v1.2.3


From e7fce6d94cc1f7d7ccb6e79dbf7062baec45e142 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 10 Jan 2019 19:04:44 +0200
Subject: fanotify: report FAN_ONDIR to listener with FAN_REPORT_FID

dirent modification events (create/delete/move) do not carry the
child entry name/inode information. Instead, we report FAN_ONDIR
for mkdir/rmdir so user can differentiate them from creat/unlink.

This is consistent with inotify reporting IN_ISDIR with dirent events
and is useful for implementing recursive directory tree watcher.

We avoid merging dirent events referring to subdirs with dirent events
referring to non subdirs, otherwise, user won't be able to tell from a
mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+unlink pair
or rmdir+create pair of events.

For backward compatibility and consistency, do not report FAN_ONDIR
to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
to user in FAN_REPORT_FID mode for all event types.

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c | 34 +++++++++++++++++++++++++++++++---
 include/linux/fanotify.h      |  2 +-
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 158c69acb04d..4ff84bc5772e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -34,7 +34,16 @@ static bool should_merge(struct fsnotify_event *old_fsn,
 		return old->path.mnt == new->path.mnt &&
 			old->path.dentry == new->path.dentry;
 	} else if (fanotify_event_has_fid(old)) {
-		return fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
+		/*
+		 * We want to merge many dirent events in the same dir (i.e.
+		 * creates/unlinks/renames), but we do not want to merge dirent
+		 * events referring to subdirs with dirent events referring to
+		 * non subdirs, otherwise, user won't be able to tell from a
+		 * mask FAN_CREATE|FAN_DELETE|FAN_ONDIR if it describes mkdir+
+		 * unlink pair or rmdir+create pair of events.
+		 */
+		return (old->mask & FS_ISDIR) == (new->mask & FS_ISDIR) &&
+			fanotify_fid_equal(&old->fid, &new->fid, old->fh_len);
 	}
 
 	/* Do not merge events if we failed to encode fid */
@@ -112,6 +121,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 				     int data_type)
 {
 	__u32 marks_mask = 0, marks_ignored_mask = 0;
+	__u32 test_mask, user_mask = FANOTIFY_OUTGOING_EVENTS;
 	const struct path *path = data;
 	struct fsnotify_mark *mark;
 	int type;
@@ -145,12 +155,30 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 		marks_ignored_mask |= mark->ignored_mask;
 	}
 
+	test_mask = event_mask & marks_mask & ~marks_ignored_mask;
+
+	/*
+	 * dirent modification events (create/delete/move) do not carry the
+	 * child entry name/inode information. Instead, we report FAN_ONDIR
+	 * for mkdir/rmdir so user can differentiate them from creat/unlink.
+	 *
+	 * For backward compatibility and consistency, do not report FAN_ONDIR
+	 * to user in legacy fanotify mode (reporting fd) and report FAN_ONDIR
+	 * to user in FAN_REPORT_FID mode for all event types.
+	 */
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_FID)) {
+		/* Do not report FAN_ONDIR without any event */
+		if (!(test_mask & ~FAN_ONDIR))
+			return 0;
+	} else {
+		user_mask &= ~FAN_ONDIR;
+	}
+
 	if (event_mask & FS_ISDIR &&
 	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
 		return 0;
 
-	return event_mask & FANOTIFY_OUTGOING_EVENTS & marks_mask &
-		~marks_ignored_mask;
+	return test_mask & user_mask;
 }
 
 static int fanotify_encode_fid(struct fanotify_event *event,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e9d45387089f..b79fa9bb7359 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -67,7 +67,7 @@
 /* Events that may be reported to user */
 #define FANOTIFY_OUTGOING_EVENTS	(FANOTIFY_EVENTS | \
 					 FANOTIFY_PERM_EVENTS | \
-					 FAN_Q_OVERFLOW)
+					 FAN_Q_OVERFLOW | FAN_ONDIR)
 
 #define ALL_FANOTIFY_EVENT_BITS		(FANOTIFY_OUTGOING_EVENTS | \
 					 FANOTIFY_EVENT_FLAGS)
-- 
cgit v1.2.3


From 63635b54e07f646aa1d8e54c96b099162a226f7d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 21 Jan 2019 08:32:22 -0500
Subject: media: v4l2-event: keep track of the timestamp in ns

Internally use ktime_get_ns() to get the timestamp of the event.
Only convert to timespec when interfacing with userspace.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-event.c | 19 +++++++++----------
 include/media/v4l2-event.h           |  2 ++
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-event.c b/drivers/media/v4l2-core/v4l2-event.c
index 481e3c65cf97..c46d14c996fc 100644
--- a/drivers/media/v4l2-core/v4l2-event.c
+++ b/drivers/media/v4l2-core/v4l2-event.c
@@ -52,6 +52,7 @@ static int __v4l2_event_dequeue(struct v4l2_fh *fh, struct v4l2_event *event)
 
 	kev->event.pending = fh->navailable;
 	*event = kev->event;
+	event->timestamp = ns_to_timespec(kev->ts);
 	kev->sev->first = sev_pos(kev->sev, 1);
 	kev->sev->in_use--;
 
@@ -103,8 +104,8 @@ static struct v4l2_subscribed_event *v4l2_event_subscribed(
 	return NULL;
 }
 
-static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *ev,
-		const struct timespec *ts)
+static void __v4l2_event_queue_fh(struct v4l2_fh *fh,
+				  const struct v4l2_event *ev, u64 ts)
 {
 	struct v4l2_subscribed_event *sev;
 	struct v4l2_kevent *kev;
@@ -144,7 +145,7 @@ static void __v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *e
 	if (copy_payload)
 		kev->event.u = ev->u;
 	kev->event.id = ev->id;
-	kev->event.timestamp = *ts;
+	kev->ts = ts;
 	kev->event.sequence = fh->sequence;
 	sev->in_use++;
 	list_add_tail(&kev->list, &fh->available);
@@ -158,17 +159,17 @@ void v4l2_event_queue(struct video_device *vdev, const struct v4l2_event *ev)
 {
 	struct v4l2_fh *fh;
 	unsigned long flags;
-	struct timespec timestamp;
+	u64 ts;
 
 	if (vdev == NULL)
 		return;
 
-	ktime_get_ts(&timestamp);
+	ts = ktime_get_ns();
 
 	spin_lock_irqsave(&vdev->fh_lock, flags);
 
 	list_for_each_entry(fh, &vdev->fh_list, list)
-		__v4l2_event_queue_fh(fh, ev, &timestamp);
+		__v4l2_event_queue_fh(fh, ev, ts);
 
 	spin_unlock_irqrestore(&vdev->fh_lock, flags);
 }
@@ -177,12 +178,10 @@ EXPORT_SYMBOL_GPL(v4l2_event_queue);
 void v4l2_event_queue_fh(struct v4l2_fh *fh, const struct v4l2_event *ev)
 {
 	unsigned long flags;
-	struct timespec timestamp;
-
-	ktime_get_ts(&timestamp);
+	u64 ts = ktime_get_ns();
 
 	spin_lock_irqsave(&fh->vdev->fh_lock, flags);
-	__v4l2_event_queue_fh(fh, ev, &timestamp);
+	__v4l2_event_queue_fh(fh, ev, ts);
 	spin_unlock_irqrestore(&fh->vdev->fh_lock, flags);
 }
 EXPORT_SYMBOL_GPL(v4l2_event_queue_fh);
diff --git a/include/media/v4l2-event.h b/include/media/v4l2-event.h
index 17833e886e11..c2b6cdc714d2 100644
--- a/include/media/v4l2-event.h
+++ b/include/media/v4l2-event.h
@@ -34,11 +34,13 @@ struct video_device;
  * @list:	List node for the v4l2_fh->available list.
  * @sev:	Pointer to parent v4l2_subscribed_event.
  * @event:	The event itself.
+ * @ts:		The timestamp of the event.
  */
 struct v4l2_kevent {
 	struct list_head	list;
 	struct v4l2_subscribed_event *sev;
 	struct v4l2_event	event;
+	u64			ts;
 };
 
 /**
-- 
cgit v1.2.3


From 15a40b27beb0a85d7f11d747bfc587dbeb69a96c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 21 Jan 2019 08:32:23 -0500
Subject: media: videobuf: use u64 for the timestamp internally

Just like vb2 does, use u64 internally to store the timestamps
of the buffers. Only convert to timeval when interfacing with
userspace.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/saa7146/saa7146_fops.c   |  2 +-
 drivers/media/pci/bt8xx/bttv-driver.c         |  8 +++-----
 drivers/media/pci/cx18/cx18-mailbox.c         |  2 +-
 drivers/media/platform/davinci/vpfe_capture.c |  2 +-
 drivers/media/platform/fsl-viu.c              |  2 +-
 drivers/media/platform/omap/omap_vout.c       | 12 ++++++------
 drivers/media/usb/cx231xx/cx231xx-417.c       |  4 ++--
 drivers/media/usb/cx231xx/cx231xx-vbi.c       |  2 +-
 drivers/media/usb/cx231xx/cx231xx-video.c     |  2 +-
 drivers/media/usb/tm6000/tm6000-video.c       |  2 +-
 drivers/media/usb/zr364xx/zr364xx.c           |  4 ++--
 drivers/media/v4l2-core/videobuf-core.c       |  4 ++--
 include/media/videobuf-core.h                 |  2 +-
 13 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c
index c790ae264464..be4355a4c126 100644
--- a/drivers/media/common/saa7146/saa7146_fops.c
+++ b/drivers/media/common/saa7146/saa7146_fops.c
@@ -105,7 +105,7 @@ void saa7146_buffer_finish(struct saa7146_dev *dev,
 	}
 
 	q->curr->vb.state = state;
-	v4l2_get_timestamp(&q->curr->vb.ts);
+	q->curr->vb.ts = ktime_get_ns();
 	wake_up(&q->curr->vb.done);
 
 	q->curr = NULL;
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index d09785fd37a8..5e769c09dbd0 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -3600,9 +3600,7 @@ static void
 bttv_irq_wakeup_video(struct bttv *btv, struct bttv_buffer_set *wakeup,
 		      struct bttv_buffer_set *curr, unsigned int state)
 {
-	struct timeval ts;
-
-	v4l2_get_timestamp(&ts);
+	u64 ts = ktime_get_ns();
 
 	if (wakeup->top == wakeup->bottom) {
 		if (NULL != wakeup->top && curr->top != wakeup->top) {
@@ -3643,7 +3641,7 @@ bttv_irq_wakeup_vbi(struct bttv *btv, struct bttv_buffer *wakeup,
 	if (NULL == wakeup)
 		return;
 
-	v4l2_get_timestamp(&wakeup->vb.ts);
+	wakeup->vb.ts = ktime_get_ns();
 	wakeup->vb.field_count = btv->field_count;
 	wakeup->vb.state = state;
 	wake_up(&wakeup->vb.done);
@@ -3713,7 +3711,7 @@ bttv_irq_wakeup_top(struct bttv *btv)
 	btv->curr.top = NULL;
 	bttv_risc_hook(btv, RISC_SLOT_O_FIELD, NULL, 0);
 
-	v4l2_get_timestamp(&wakeup->vb.ts);
+	wakeup->vb.ts = ktime_get_ns();
 	wakeup->vb.field_count = btv->field_count;
 	wakeup->vb.state = VIDEOBUF_DONE;
 	wake_up(&wakeup->vb.done);
diff --git a/drivers/media/pci/cx18/cx18-mailbox.c b/drivers/media/pci/cx18/cx18-mailbox.c
index f66dd63e1994..0ffd2196a980 100644
--- a/drivers/media/pci/cx18/cx18-mailbox.c
+++ b/drivers/media/pci/cx18/cx18-mailbox.c
@@ -197,7 +197,7 @@ static void cx18_mdl_send_to_videobuf(struct cx18_stream *s,
 	}
 
 	if (dispatch) {
-		v4l2_get_timestamp(&vb_buf->vb.ts);
+		vb_buf->vb.ts = ktime_get_ns();
 		list_del(&vb_buf->vb.queue);
 		vb_buf->vb.state = VIDEOBUF_DONE;
 		wake_up(&vb_buf->vb.done);
diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c
index 9996bab98fe3..26dadbba930f 100644
--- a/drivers/media/platform/davinci/vpfe_capture.c
+++ b/drivers/media/platform/davinci/vpfe_capture.c
@@ -518,7 +518,7 @@ static void vpfe_schedule_bottom_field(struct vpfe_device *vpfe_dev)
 
 static void vpfe_process_buffer_complete(struct vpfe_device *vpfe_dev)
 {
-	v4l2_get_timestamp(&vpfe_dev->cur_frm->ts);
+	vpfe_dev->cur_frm->ts = ktime_get_ns();
 	vpfe_dev->cur_frm->state = VIDEOBUF_DONE;
 	vpfe_dev->cur_frm->size = vpfe_dev->fmt.fmt.pix.sizeimage;
 	wake_up_interruptible(&vpfe_dev->cur_frm->done);
diff --git a/drivers/media/platform/fsl-viu.c b/drivers/media/platform/fsl-viu.c
index ca6d0317ab42..cffebcaacb90 100644
--- a/drivers/media/platform/fsl-viu.c
+++ b/drivers/media/platform/fsl-viu.c
@@ -1090,7 +1090,7 @@ static void viu_capture_intr(struct viu_dev *dev, u32 status)
 
 		if (waitqueue_active(&buf->vb.done)) {
 			list_del(&buf->vb.queue);
-			v4l2_get_timestamp(&buf->vb.ts);
+			buf->vb.ts = ktime_get_ns();
 			buf->vb.state = VIDEOBUF_DONE;
 			buf->vb.field_count++;
 			wake_up(&buf->vb.done);
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index f447ae3bb465..ff3de2dce5a2 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -513,7 +513,7 @@ static int omapvid_apply_changes(struct omap_vout_device *vout)
 }
 
 static int omapvid_handle_interlace_display(struct omap_vout_device *vout,
-		unsigned int irqstatus, struct timeval timevalue)
+		unsigned int irqstatus, u64 ts)
 {
 	u32 fid;
 
@@ -537,7 +537,7 @@ static int omapvid_handle_interlace_display(struct omap_vout_device *vout,
 		if (vout->cur_frm == vout->next_frm)
 			goto err;
 
-		vout->cur_frm->ts = timevalue;
+		vout->cur_frm->ts = ts;
 		vout->cur_frm->state = VIDEOBUF_DONE;
 		wake_up_interruptible(&vout->cur_frm->done);
 		vout->cur_frm = vout->next_frm;
@@ -557,7 +557,7 @@ static void omap_vout_isr(void *arg, unsigned int irqstatus)
 	int ret, fid, mgr_id;
 	u32 addr, irq;
 	struct omap_overlay *ovl;
-	struct timeval timevalue;
+	u64 ts;
 	struct omapvideo_info *ovid;
 	struct omap_dss_device *cur_display;
 	struct omap_vout_device *vout = (struct omap_vout_device *)arg;
@@ -577,7 +577,7 @@ static void omap_vout_isr(void *arg, unsigned int irqstatus)
 		return;
 
 	spin_lock(&vout->vbq_lock);
-	v4l2_get_timestamp(&timevalue);
+	ts = ktime_get_ns();
 
 	switch (cur_display->type) {
 	case OMAP_DISPLAY_TYPE_DSI:
@@ -595,7 +595,7 @@ static void omap_vout_isr(void *arg, unsigned int irqstatus)
 		break;
 	case OMAP_DISPLAY_TYPE_VENC:
 		fid = omapvid_handle_interlace_display(vout, irqstatus,
-				timevalue);
+				ts);
 		if (!fid)
 			goto vout_isr_err;
 		break;
@@ -608,7 +608,7 @@ static void omap_vout_isr(void *arg, unsigned int irqstatus)
 	}
 
 	if (!vout->first_int && (vout->cur_frm != vout->next_frm)) {
-		vout->cur_frm->ts = timevalue;
+		vout->cur_frm->ts = ts;
 		vout->cur_frm->state = VIDEOBUF_DONE;
 		wake_up_interruptible(&vout->cur_frm->done);
 		vout->cur_frm = vout->next_frm;
diff --git a/drivers/media/usb/cx231xx/cx231xx-417.c b/drivers/media/usb/cx231xx/cx231xx-417.c
index 1c48c497bd6a..0f8ae81f4820 100644
--- a/drivers/media/usb/cx231xx/cx231xx-417.c
+++ b/drivers/media/usb/cx231xx/cx231xx-417.c
@@ -1316,7 +1316,7 @@ static void buffer_copy(struct cx231xx *dev, char *data, int len, struct urb *ur
 
 		buf->vb.state = VIDEOBUF_DONE;
 		buf->vb.field_count++;
-		v4l2_get_timestamp(&buf->vb.ts);
+		buf->vb.ts = ktime_get_ns();
 		list_del(&buf->vb.queue);
 		wake_up(&buf->vb.done);
 		dma_q->mpeg_buffer_completed = 0;
@@ -1347,7 +1347,7 @@ static void buffer_filled(char *data, int len, struct urb *urb,
 	memcpy(vbuf, data, len);
 	buf->vb.state = VIDEOBUF_DONE;
 	buf->vb.field_count++;
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 	list_del(&buf->vb.queue);
 	wake_up(&buf->vb.done);
 }
diff --git a/drivers/media/usb/cx231xx/cx231xx-vbi.c b/drivers/media/usb/cx231xx/cx231xx-vbi.c
index 10b2eb7338ad..d16b73c04445 100644
--- a/drivers/media/usb/cx231xx/cx231xx-vbi.c
+++ b/drivers/media/usb/cx231xx/cx231xx-vbi.c
@@ -528,7 +528,7 @@ static inline void vbi_buffer_filled(struct cx231xx *dev,
 
 	buf->vb.state = VIDEOBUF_DONE;
 	buf->vb.field_count++;
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 
 	dev->vbi_mode.bulk_ctl.buf = NULL;
 
diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c
index 0d451c4ea3b9..aebbaf9d92a6 100644
--- a/drivers/media/usb/cx231xx/cx231xx-video.c
+++ b/drivers/media/usb/cx231xx/cx231xx-video.c
@@ -182,7 +182,7 @@ static inline void buffer_filled(struct cx231xx *dev,
 	cx231xx_isocdbg("[%p/%d] wakeup\n", buf, buf->vb.i);
 	buf->vb.state = VIDEOBUF_DONE;
 	buf->vb.field_count++;
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 
 	if (dev->USE_ISO)
 		dev->video_mode.isoc_ctl.buf = NULL;
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index ee7b5318b351..5127be71dd03 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -106,7 +106,7 @@ static inline void buffer_filled(struct tm6000_core *dev,
 	dprintk(dev, V4L2_DEBUG_ISOC, "[%p/%d] wakeup\n", buf, buf->vb.i);
 	buf->vb.state = VIDEOBUF_DONE;
 	buf->vb.field_count++;
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 
 	list_del(&buf->vb.queue);
 	wake_up(&buf->vb.done);
diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index ab35554cbffa..51aad2cf742f 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -521,7 +521,7 @@ static void zr364xx_fillbuff(struct zr364xx_camera *cam,
 	/* tell v4l buffer was filled */
 
 	buf->vb.field_count = cam->frame_count * 2;
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 	buf->vb.state = VIDEOBUF_DONE;
 }
 
@@ -549,7 +549,7 @@ static int zr364xx_got_frame(struct zr364xx_camera *cam, int jpgsize)
 		goto unlock;
 	}
 	list_del(&buf->vb.queue);
-	v4l2_get_timestamp(&buf->vb.ts);
+	buf->vb.ts = ktime_get_ns();
 	DBG("[%p/%d] wakeup\n", buf, buf->vb.i);
 	zr364xx_fillbuff(cam, buf, jpgsize);
 	wake_up(&buf->vb.done);
diff --git a/drivers/media/v4l2-core/videobuf-core.c b/drivers/media/v4l2-core/videobuf-core.c
index 7491b337002c..d1bcfa91aaf8 100644
--- a/drivers/media/v4l2-core/videobuf-core.c
+++ b/drivers/media/v4l2-core/videobuf-core.c
@@ -367,7 +367,7 @@ static void videobuf_status(struct videobuf_queue *q, struct v4l2_buffer *b,
 	}
 
 	b->field     = vb->field;
-	b->timestamp = vb->ts;
+	b->timestamp = ns_to_timeval(vb->ts);
 	b->bytesused = vb->size;
 	b->sequence  = vb->field_count >> 1;
 }
@@ -581,7 +581,7 @@ int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b)
 		    || q->type == V4L2_BUF_TYPE_SDR_OUTPUT) {
 			buf->size = b->bytesused;
 			buf->field = b->field;
-			buf->ts = b->timestamp;
+			buf->ts = v4l2_timeval_to_ns(&b->timestamp);
 		}
 		break;
 	case V4L2_MEMORY_USERPTR:
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index 60a664febba0..5684dc6f0d0d 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -80,7 +80,7 @@ struct videobuf_buffer {
 	struct list_head        queue;
 	wait_queue_head_t       done;
 	unsigned int            field_count;
-	struct timeval          ts;
+	u64			ts;
 
 	/* Memory type */
 	enum v4l2_memory        memory;
-- 
cgit v1.2.3


From 276c1f066bdaaed6fe82ed231e1eff2f41c34882 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 21 Jan 2019 08:32:29 -0500
Subject: media: v4l2-common: drop v4l2_get_timestamp

This function is no longer used, so drop it.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-common.c | 10 ----------
 include/media/v4l2-common.h           |  9 ---------
 2 files changed, 19 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index 50763fb42a1b..663730f088cd 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -398,16 +398,6 @@ __v4l2_find_nearest_size(const void *array, size_t array_size,
 }
 EXPORT_SYMBOL_GPL(__v4l2_find_nearest_size);
 
-void v4l2_get_timestamp(struct timeval *tv)
-{
-	struct timespec ts;
-
-	ktime_get_ts(&ts);
-	tv->tv_sec = ts.tv_sec;
-	tv->tv_usec = ts.tv_nsec / NSEC_PER_USEC;
-}
-EXPORT_SYMBOL_GPL(v4l2_get_timestamp);
-
 int v4l2_g_parm_cap(struct video_device *vdev,
 		    struct v4l2_subdev *sd, struct v4l2_streamparm *a)
 {
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 0c511ed8ffb0..2b93cb281fa5 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -361,15 +361,6 @@ __v4l2_find_nearest_size(const void *array, size_t array_size,
 			 size_t entry_size, size_t width_offset,
 			 size_t height_offset, s32 width, s32 height);
 
-/**
- * v4l2_get_timestamp - helper routine to get a timestamp to be used when
- *	filling streaming metadata. Internally, it uses ktime_get_ts(),
- *	which is the recommended way to get it.
- *
- * @tv: pointer to &struct timeval to be filled.
- */
-void v4l2_get_timestamp(struct timeval *tv);
-
 /**
  * v4l2_g_parm_cap - helper routine for vidioc_g_parm to fill this in by
  *      calling the g_frame_interval op of the given subdev. It only works
-- 
cgit v1.2.3


From c2d88e7d66f9d327593e0d0f9d7f94755b637e5c Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Tue, 5 Feb 2019 16:11:15 -0500
Subject: media: vb2: Fix buf_out_validate documentation

The .buf_out_validate callback is mandatory for OUTPUT
queues. Mark it as such in the callback's doc.

Fixes: 28d77c21cb ("media: vb2: add buf_out_validate callback")

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/videobuf2-core.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 4849b865b908..06142c1469cc 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -345,7 +345,8 @@ struct vb2_buffer {
  *			waiting for a new buffer to arrive.
  * @buf_out_validate:	called when the output buffer is prepared or queued
  *			to a request; drivers can use this to validate
- *			userspace-provided information; optional.
+ *			userspace-provided information; this is required only
+ *			for OUTPUT queues.
  * @buf_init:		called once after allocating a buffer (in MMAP case)
  *			or after acquiring a new USERPTR buffer; drivers may
  *			perform additional buffer-related initialization;
-- 
cgit v1.2.3


From a4d3d61254d3645d8de738102c3c473b176180a5 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Tue, 5 Feb 2019 16:20:33 -0500
Subject: media: v4l2-mem2mem: Rename v4l2_m2m_buf_copy_data to
 v4l2_m2m_buf_copy_metadata

The v4l2_m2m_buf_copy_data helper is used to copy the buffer
metadata, such as its timestamp and its flags.

Therefore, the v4l2_m2m_buf_copy_metadata name is more clear
and avoids confusion with a payload data copy.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
[hverkuil-cisco@xs4all.nl: also fix cedrus_dec.c]
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vicodec/vicodec-core.c   |  4 ++--
 drivers/media/platform/vim2m.c                  |  2 +-
 drivers/media/v4l2-core/v4l2-mem2mem.c          |  8 ++++----
 drivers/staging/media/sunxi/cedrus/cedrus_dec.c |  2 +-
 include/media/v4l2-mem2mem.h                    | 14 +++++++-------
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c
index 212da185e4f2..9d739ea5542d 100644
--- a/drivers/media/platform/vicodec/vicodec-core.c
+++ b/drivers/media/platform/vicodec/vicodec-core.c
@@ -199,7 +199,7 @@ static int device_process(struct vicodec_ctx *ctx,
 
 	dst_vb->sequence = q_dst->sequence++;
 	dst_vb->flags &= ~V4L2_BUF_FLAG_LAST;
-	v4l2_m2m_buf_copy_data(src_vb, dst_vb, !ctx->is_enc);
+	v4l2_m2m_buf_copy_metadata(src_vb, dst_vb, !ctx->is_enc);
 
 	return 0;
 }
@@ -414,7 +414,7 @@ static void set_last_buffer(struct vb2_v4l2_buffer *dst_buf,
 	vb2_set_plane_payload(&dst_buf->vb2_buf, 0, 0);
 	dst_buf->sequence = q_dst->sequence++;
 
-	v4l2_m2m_buf_copy_data(src_buf, dst_buf, !ctx->is_enc);
+	v4l2_m2m_buf_copy_metadata(src_buf, dst_buf, !ctx->is_enc);
 	dst_buf->flags |= V4L2_BUF_FLAG_LAST;
 	v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
 }
diff --git a/drivers/media/platform/vim2m.c b/drivers/media/platform/vim2m.c
index bd125ad34343..98be92a4e0b0 100644
--- a/drivers/media/platform/vim2m.c
+++ b/drivers/media/platform/vim2m.c
@@ -432,7 +432,7 @@ static int device_process(struct vim2m_ctx *ctx,
 	out_vb->sequence = get_q_data(ctx,
 				      V4L2_BUF_TYPE_VIDEO_CAPTURE)->sequence++;
 	in_vb->sequence = q_data_in->sequence++;
-	v4l2_m2m_buf_copy_data(in_vb, out_vb, true);
+	v4l2_m2m_buf_copy_metadata(in_vb, out_vb, true);
 
 	if (ctx->mode & MEM2MEM_VFLIP) {
 		start = height - 1;
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 631f4e2aa942..1494d0d5951a 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -975,9 +975,9 @@ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_buf_queue);
 
-void v4l2_m2m_buf_copy_data(const struct vb2_v4l2_buffer *out_vb,
-			    struct vb2_v4l2_buffer *cap_vb,
-			    bool copy_frame_flags)
+void v4l2_m2m_buf_copy_metadata(const struct vb2_v4l2_buffer *out_vb,
+				struct vb2_v4l2_buffer *cap_vb,
+				bool copy_frame_flags)
 {
 	u32 mask = V4L2_BUF_FLAG_TIMECODE | V4L2_BUF_FLAG_TSTAMP_SRC_MASK;
 
@@ -993,7 +993,7 @@ void v4l2_m2m_buf_copy_data(const struct vb2_v4l2_buffer *out_vb,
 	cap_vb->flags &= ~mask;
 	cap_vb->flags |= out_vb->flags & mask;
 }
-EXPORT_SYMBOL_GPL(v4l2_m2m_buf_copy_data);
+EXPORT_SYMBOL_GPL(v4l2_m2m_buf_copy_metadata);
 
 void v4l2_m2m_request_queue(struct media_request *req)
 {
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index 443fb037e1cf..4d6d602cdde6 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -50,7 +50,7 @@ void cedrus_device_run(void *priv)
 		break;
 	}
 
-	v4l2_m2m_buf_copy_data(run.src, run.dst, true);
+	v4l2_m2m_buf_copy_metadata(run.src, run.dst, true);
 
 	dev->dec_ops[ctx->current_codec]->setup(ctx, &run);
 
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 43e447dcf69d..47c6d9aa0bf4 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -623,11 +623,11 @@ v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx)
 }
 
 /**
- * v4l2_m2m_buf_copy_data() - copy buffer data from the output buffer to the
- * capture buffer
+ * v4l2_m2m_buf_copy_metadata() - copy buffer metadata from
+ * the output buffer to the capture buffer
  *
- * @out_vb: the output buffer that is the source of the data.
- * @cap_vb: the capture buffer that will receive the data.
+ * @out_vb: the output buffer that is the source of the metadata.
+ * @cap_vb: the capture buffer that will receive the metadata.
  * @copy_frame_flags: copy the KEY/B/PFRAME flags as well.
  *
  * This helper function copies the timestamp, timecode (if the TIMECODE
@@ -638,9 +638,9 @@ v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx)
  * flags are not copied. This is typically needed for encoders that
  * set this bits explicitly.
  */
-void v4l2_m2m_buf_copy_data(const struct vb2_v4l2_buffer *out_vb,
-			    struct vb2_v4l2_buffer *cap_vb,
-			    bool copy_frame_flags);
+void v4l2_m2m_buf_copy_metadata(const struct vb2_v4l2_buffer *out_vb,
+				struct vb2_v4l2_buffer *cap_vb,
+				bool copy_frame_flags);
 
 /* v4l2 request helper */
 
-- 
cgit v1.2.3


From 1db64e8733f653814f041ffe1428524494ef6123 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:32 +0200
Subject: devlink: Add devlink formatted message (fmsg) API

Devlink fmsg is a mechanism to pass descriptors between drivers and
devlink, in json-like format. The API allows the driver to add nested
attributes such as object, object pair and value array, in addition to
attributes such as name and value.

Driver can use this API to fill the fmsg context in a format which will be
translated by the devlink to the netlink message later.
There is no memory allocation in advance (other than the initial list
head), and it dynamically allocates messages descriptors and add them to
the list on the fly.

When it needs to send the data using SKBs to the netlink layer, it
fragments the data between different SKBs. In order to do this
fragmentation, it uses virtual nests attributes, to avoid actual
nesting use which cannot be divided between different SKBs.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        | 149 +++++++++++++
 include/uapi/linux/devlink.h |   8 +
 net/core/devlink.c           | 483 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 640 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 74d992a68a06..7c5722e816aa 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -448,6 +448,8 @@ struct devlink_info_req;
 
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
+struct devlink_fmsg;
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -639,6 +641,37 @@ int devlink_info_version_running_put(struct devlink_info_req *req,
 				     const char *version_name,
 				     const char *version_value);
 
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg);
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name);
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+				     const char *name);
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg);
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value);
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value);
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value);
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value);
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value);
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+			    u16 value_len);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			       bool value);
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			     u8 value);
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u32 value);
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u64 value);
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const char *value);
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const void *value, u16 value_len);
+
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -971,6 +1004,122 @@ devlink_info_version_running_put(struct devlink_info_req *req,
 {
 	return 0;
 }
+
+static inline int
+devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+				 const char *name)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+			u16 value_len)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			   bool value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			 u8 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			  u32 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			  u64 value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			     const char *value)
+{
+	return 0;
+}
+
+static inline int
+devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			     const void *value, u16 value_len)
+{
+	return 0;
+}
 #endif
 
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 054b2d1a4537..076692209a9b 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -302,6 +302,14 @@ enum devlink_attr {
 
 	DEVLINK_ATTR_SB_POOL_CELL_SIZE,		/* u32 */
 
+	DEVLINK_ATTR_FMSG,			/* nested */
+	DEVLINK_ATTR_FMSG_OBJ_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_PAIR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_ARR_NEST_START,	/* flag */
+	DEVLINK_ATTR_FMSG_NEST_END,		/* flag */
+	DEVLINK_ATTR_FMSG_OBJ_NAME,		/* string */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,	/* u8 */
+	DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA,	/* dynamic */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index cd0d393bc62d..03883697fcf0 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3879,6 +3879,489 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
 	return msg->len;
 }
 
+struct devlink_fmsg_item {
+	struct list_head list;
+	int attrtype;
+	u8 nla_type;
+	u16 len;
+	int value[0];
+};
+
+struct devlink_fmsg {
+	struct list_head item_list;
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+	struct devlink_fmsg *fmsg;
+
+	fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+	if (!fmsg)
+		return NULL;
+
+	INIT_LIST_HEAD(&fmsg->item_list);
+
+	return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+	struct devlink_fmsg_item *item, *tmp;
+
+	list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+		list_del(&item->list);
+		kfree(item);
+	}
+	kfree(fmsg);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+				    int attrtype)
+{
+	struct devlink_fmsg_item *item;
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->attrtype = attrtype;
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+	struct devlink_fmsg_item *item;
+
+	if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+		return -EMSGSIZE;
+
+	item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->nla_type = NLA_NUL_STRING;
+	item->len = strlen(name) + 1;
+	item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+	memcpy(&item->value, name, item->len);
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+	int err;
+
+	err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_put_name(fmsg, name);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+				     const char *name)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+	int err;
+
+	err = devlink_fmsg_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+				  const void *value, u16 value_len,
+				  u8 value_nla_type)
+{
+	struct devlink_fmsg_item *item;
+
+	if (value_len > DEVLINK_FMSG_MAX_SIZE)
+		return -EMSGSIZE;
+
+	item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+	if (!item)
+		return -ENOMEM;
+
+	item->nla_type = value_nla_type;
+	item->len = value_len;
+	item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+	memcpy(&item->value, value, item->len);
+	list_add_tail(&item->list, &fmsg->item_list);
+
+	return 0;
+}
+
+int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_put);
+
+int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_put);
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+	return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_put);
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+	return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+				      NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+			    u16 value_len)
+{
+	return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			       bool value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_bool_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			     u8 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u8_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u32 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+			      u64 value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const char *value)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_put(fmsg, value);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+				 const void *value, u16 value_len)
+{
+	int err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, name);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_binary_put(fmsg, value, value_len);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+	switch (msg->nla_type) {
+	case NLA_FLAG:
+	case NLA_U8:
+	case NLA_U32:
+	case NLA_U64:
+	case NLA_NUL_STRING:
+	case NLA_BINARY:
+		return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+				  msg->nla_type);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+	int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+	u8 tmp;
+
+	switch (msg->nla_type) {
+	case NLA_FLAG:
+		/* Always provide flag data, regardless of its value */
+		tmp = *(bool *) msg->value;
+
+		return nla_put_u8(skb, attrtype, tmp);
+	case NLA_U8:
+		return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
+	case NLA_U32:
+		return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
+	case NLA_U64:
+		return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
+					 DEVLINK_ATTR_PAD);
+	case NLA_NUL_STRING:
+		return nla_put_string(skb, attrtype, (char *) &msg->value);
+	case NLA_BINARY:
+		return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+			 int *start)
+{
+	struct devlink_fmsg_item *item;
+	struct nlattr *fmsg_nlattr;
+	int i = 0;
+	int err;
+
+	fmsg_nlattr = nla_nest_start(skb, DEVLINK_ATTR_FMSG);
+	if (!fmsg_nlattr)
+		return -EMSGSIZE;
+
+	list_for_each_entry(item, &fmsg->item_list, list) {
+		if (i < *start) {
+			i++;
+			continue;
+		}
+
+		switch (item->attrtype) {
+		case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+		case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+		case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+		case DEVLINK_ATTR_FMSG_NEST_END:
+			err = nla_put_flag(skb, item->attrtype);
+			break;
+		case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+			err = devlink_fmsg_item_fill_type(item, skb);
+			if (err)
+				break;
+			err = devlink_fmsg_item_fill_data(item, skb);
+			break;
+		case DEVLINK_ATTR_FMSG_OBJ_NAME:
+			err = nla_put_string(skb, item->attrtype,
+					     (char *) &item->value);
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (!err)
+			*start = ++i;
+		else
+			break;
+	}
+
+	nla_nest_end(skb, fmsg_nlattr);
+	return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+			    struct genl_info *info,
+			    enum devlink_command cmd, int flags)
+{
+	struct nlmsghdr *nlh;
+	struct sk_buff *skb;
+	bool last = false;
+	int index = 0;
+	void *hdr;
+	int err;
+
+	while (!last) {
+		int tmp_index = index;
+
+		skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+
+		hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+				  &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+		if (!hdr) {
+			err = -EMSGSIZE;
+			goto nla_put_failure;
+		}
+
+		err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+		if (!err)
+			last = true;
+		else if (err != -EMSGSIZE || tmp_index == index)
+			goto nla_put_failure;
+
+		genlmsg_end(skb, hdr);
+		err = genlmsg_reply(skb, info);
+		if (err)
+			return err;
+	}
+
+	skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+			NLMSG_DONE, 0, flags | NLM_F_MULTI);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto nla_put_failure;
+	}
+	err = genlmsg_reply(skb, info);
+	if (err)
+		return err;
+
+	return 0;
+
+nla_put_failure:
+	nlmsg_free(skb);
+	return err;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
-- 
cgit v1.2.3


From a0bdcc59d194d9d2ccb679015e66d8562b0b9e84 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:33 +0200
Subject: devlink: Add health reporter create/destroy functionality

Devlink health reporter is an instance for reporting, diagnosing and
recovering from run time errors discovered by the reporters.
Define it's data structure and supported operations.
In addition, expose devlink API to create and destroy a reporter.
Each devlink instance will hold it's own reporters list.

As part of the allocation, driver shall provide a set of callbacks which
will be used by devlink in order to handle health reports and user
commands related to this reporter. In addition, driver is entitled to
provide some priv pointer, which can be fetched from the reporter by
devlink_health_reporter_priv function.

For each reporter, devlink will hold a metadata of statistics,
dump msg and status.

For passing dumps and diagnose data to the user-space, it will use devlink
fmsg API.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 53 +++++++++++++++++++++++++++++
 net/core/devlink.c    | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 145 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7c5722e816aa..3dfe30235878 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -30,6 +30,7 @@ struct devlink {
 	struct list_head param_list;
 	struct list_head region_list;
 	u32 snapshot_id;
+	struct list_head reporter_list;
 	struct devlink_dpipe_headers *dpipe_headers;
 	const struct devlink_ops *ops;
 	struct device *dev;
@@ -449,6 +450,27 @@ struct devlink_info_req;
 typedef void devlink_snapshot_data_dest_t(const void *data);
 
 struct devlink_fmsg;
+struct devlink_health_reporter;
+
+/**
+ * struct devlink_health_reporter_ops - Reporter operations
+ * @name: reporter name
+ * @recover: callback to recover from reported error
+ *           if priv_ctx is NULL, run a full recover
+ * @dump: callback to dump an object
+ *        if priv_ctx is NULL, run a full dump
+ * @diagnose: callback to diagnose the current status
+ */
+
+struct devlink_health_reporter_ops {
+	char *name;
+	int (*recover)(struct devlink_health_reporter *reporter,
+		       void *priv_ctx);
+	int (*dump)(struct devlink_health_reporter *reporter,
+		    struct devlink_fmsg *fmsg, void *priv_ctx);
+	int (*diagnose)(struct devlink_health_reporter *reporter,
+			struct devlink_fmsg *fmsg);
+};
 
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
@@ -672,6 +694,17 @@ int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
 int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
 				 const void *value, u16 value_len);
 
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv);
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -1120,6 +1153,26 @@ devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
 {
 	return 0;
 }
+
+static inline struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv)
+{
+	return NULL;
+}
+
+static inline void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+}
+
+static inline void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+	return NULL;
+}
 #endif
 
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 03883697fcf0..341548d7f1f1 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4362,6 +4362,97 @@ nla_put_failure:
 	return err;
 }
 
+struct devlink_health_reporter {
+	struct list_head list;
+	void *priv;
+	const struct devlink_health_reporter_ops *ops;
+	struct devlink *devlink;
+	u64 graceful_period;
+	bool auto_recover;
+	u8 health_state;
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+	return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+				     const char *reporter_name)
+{
+	struct devlink_health_reporter *reporter;
+
+	list_for_each_entry(reporter, &devlink->reporter_list, list)
+		if (!strcmp(reporter->ops->name, reporter_name))
+			return reporter;
+	return NULL;
+}
+
+/**
+ *	devlink_health_reporter_create - create devlink health reporter
+ *
+ *	@devlink: devlink
+ *	@ops: ops
+ *	@graceful_period: to avoid recovery loops, in msecs
+ *	@auto_recover: auto recover when error occurs
+ *	@priv: priv
+ */
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+			       const struct devlink_health_reporter_ops *ops,
+			       u64 graceful_period, bool auto_recover,
+			       void *priv)
+{
+	struct devlink_health_reporter *reporter;
+
+	mutex_lock(&devlink->lock);
+	if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
+		reporter = ERR_PTR(-EEXIST);
+		goto unlock;
+	}
+
+	if (WARN_ON(auto_recover && !ops->recover) ||
+	    WARN_ON(graceful_period && !ops->recover)) {
+		reporter = ERR_PTR(-EINVAL);
+		goto unlock;
+	}
+
+	reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+	if (!reporter) {
+		reporter = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+
+	reporter->priv = priv;
+	reporter->ops = ops;
+	reporter->devlink = devlink;
+	reporter->graceful_period = graceful_period;
+	reporter->auto_recover = auto_recover;
+	list_add_tail(&reporter->list, &devlink->reporter_list);
+unlock:
+	mutex_unlock(&devlink->lock);
+	return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+/**
+ *	devlink_health_reporter_destroy - destroy devlink health reporter
+ *
+ *	@reporter: devlink health reporter to destroy
+ */
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+	mutex_lock(&reporter->devlink->lock);
+	list_del(&reporter->list);
+	mutex_unlock(&reporter->devlink->lock);
+	kfree(reporter);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4670,6 +4761,7 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->resource_list);
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
+	INIT_LIST_HEAD(&devlink->reporter_list);
 	mutex_init(&devlink->lock);
 	return devlink;
 }
-- 
cgit v1.2.3


From c8e1da0bf9238e460c73ebc5a07ce93697e5e207 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:34 +0200
Subject: devlink: Add health report functionality

Upon error discover, every driver can report it to the devlink health
mechanism via devlink_health_report function, using the appropriate
reporter registered to it. Driver can pass error specific context which
will be delivered to it as part of the dump / recovery callbacks.

Once an error is reported, devlink health will do the following actions:
* A log is being send to the kernel trace events buffer
* Health status and statistics are being updated for the reporter instance
* Object dump is being taken and stored at the reporter instance (as long
  as there is no other dump which is already stored)
* Auto recovery attempt is being done. Depends on:
  - Auto Recovery configuration
  - Grace period vs. Time since last recover

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h          |   9 ++++
 include/trace/events/devlink.h |  65 ++++++++++++++++++++++
 net/core/devlink.c             | 119 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 193 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 3dfe30235878..c12ad6e9095d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -704,6 +704,8 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter);
 
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx);
 
 #else
 
@@ -1173,6 +1175,13 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
 {
 	return NULL;
 }
+
+static inline int
+devlink_health_report(struct devlink_health_reporter *reporter,
+		      const char *msg, void *priv_ctx)
+{
+	return 0;
+}
 #endif
 
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 40705364a50f..191ddf67d769 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -75,6 +75,71 @@ TRACE_EVENT(devlink_hwerr,
 			__get_str(driver_name), __entry->err, __get_str(msg))
 );
 
+/*
+ * Tracepoint for devlink health message:
+ */
+TRACE_EVENT(devlink_health_report,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 const char *msg),
+
+	TP_ARGS(devlink, reporter_name, msg),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, msg)
+		__string(msg, msg)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__assign_str(msg, msg);
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: %s",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __get_str(msg))
+);
+
+/*
+ * Tracepoint for devlink health recover aborted message:
+ */
+TRACE_EVENT(devlink_health_recover_aborted,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 bool health_state, u64 time_since_last_recover),
+
+	TP_ARGS(devlink, reporter_name, health_state, time_since_last_recover),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, reporter_name)
+		__field(bool, health_state)
+		__field(u64, time_since_last_recover)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__entry->health_state = health_state;
+		__entry->time_since_last_recover = time_since_last_recover;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: health_state=%d time_since_last_recover=%llu recover aborted",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __entry->health_state,
+		  __entry->time_since_last_recover)
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 341548d7f1f1..3eaa290831aa 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4367,9 +4367,20 @@ struct devlink_health_reporter {
 	void *priv;
 	const struct devlink_health_reporter_ops *ops;
 	struct devlink *devlink;
+	struct devlink_fmsg *dump_fmsg;
+	struct mutex dump_lock; /* lock parallel read/write from dump buffers */
 	u64 graceful_period;
 	bool auto_recover;
 	u8 health_state;
+	u64 dump_ts;
+	u64 error_count;
+	u64 recovery_count;
+	u64 last_recovery_ts;
+};
+
+enum devlink_health_reporter_state {
+	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
 };
 
 void *
@@ -4431,6 +4442,7 @@ devlink_health_reporter_create(struct devlink *devlink,
 	reporter->devlink = devlink;
 	reporter->graceful_period = graceful_period;
 	reporter->auto_recover = auto_recover;
+	mutex_init(&reporter->dump_lock);
 	list_add_tail(&reporter->list, &devlink->reporter_list);
 unlock:
 	mutex_unlock(&devlink->lock);
@@ -4449,10 +4461,117 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
 	mutex_lock(&reporter->devlink->lock);
 	list_del(&reporter->list);
 	mutex_unlock(&reporter->devlink->lock);
+	if (reporter->dump_fmsg)
+		devlink_fmsg_free(reporter->dump_fmsg);
 	kfree(reporter);
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
 
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+				void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->recover)
+		return -EOPNOTSUPP;
+
+	err = reporter->ops->recover(reporter, priv_ctx);
+	if (err)
+		return err;
+
+	reporter->recovery_count++;
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+	reporter->last_recovery_ts = jiffies;
+
+	return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+	if (!reporter->dump_fmsg)
+		return;
+	devlink_fmsg_free(reporter->dump_fmsg);
+	reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+				  void *priv_ctx)
+{
+	int err;
+
+	if (!reporter->ops->dump)
+		return 0;
+
+	if (reporter->dump_fmsg)
+		return 0;
+
+	reporter->dump_fmsg = devlink_fmsg_alloc();
+	if (!reporter->dump_fmsg) {
+		err = -ENOMEM;
+		return err;
+	}
+
+	err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+				  priv_ctx);
+	if (err)
+		goto dump_err;
+
+	err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+	if (err)
+		goto dump_err;
+
+	reporter->dump_ts = jiffies;
+
+	return 0;
+
+dump_err:
+	devlink_health_dump_clear(reporter);
+	return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+			  const char *msg, void *priv_ctx)
+{
+	struct devlink *devlink = reporter->devlink;
+
+	/* write a log message of the current error */
+	WARN_ON(!msg);
+	trace_devlink_health_report(devlink, reporter->ops->name, msg);
+	reporter->error_count++;
+
+	/* abort if the previous error wasn't recovered */
+	if (reporter->auto_recover &&
+	    (reporter->health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+	     jiffies - reporter->last_recovery_ts <
+	     msecs_to_jiffies(reporter->graceful_period))) {
+		trace_devlink_health_recover_aborted(devlink,
+						     reporter->ops->name,
+						     reporter->health_state,
+						     jiffies -
+						     reporter->last_recovery_ts);
+		return -ECANCELED;
+	}
+
+	reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+
+	mutex_lock(&reporter->dump_lock);
+	/* store current dump of current error, for later analysis */
+	devlink_health_do_dump(reporter, priv_ctx);
+	mutex_unlock(&reporter->dump_lock);
+
+	if (reporter->auto_recover)
+		return devlink_health_reporter_recover(reporter, priv_ctx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
-- 
cgit v1.2.3


From 7afe335a8bede4e2839b0e0fa36ef629fe4a0206 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:35 +0200
Subject: devlink: Add health get command

Add devlink health get command to provide reporter/s data for user space.
Add the ability to get data per reporter or dump data from all available
reporters.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  11 ++++
 net/core/devlink.c           | 149 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 160 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 076692209a9b..d8f20d6ce139 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -96,6 +96,8 @@ enum devlink_command {
 
 	DEVLINK_CMD_INFO_GET,		/* can dump */
 
+	DEVLINK_CMD_HEALTH_REPORTER_GET,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -310,6 +312,15 @@ enum devlink_attr {
 	DEVLINK_ATTR_FMSG_OBJ_NAME,		/* string */
 	DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,	/* u8 */
 	DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA,	/* dynamic */
+
+	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
+	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
+	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
+	DEVLINK_ATTR_HEALTH_REPORTER_ERR,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 3eaa290831aa..86f7c0e5d4bc 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4572,6 +4572,146 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
 }
 EXPORT_SYMBOL_GPL(devlink_health_report);
 
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	char *reporter_name;
+
+	if (!info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+		return NULL;
+
+	reporter_name =
+		nla_data(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+	return devlink_health_reporter_find_by_name(devlink, reporter_name);
+}
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+				struct devlink *devlink,
+				struct devlink_health_reporter *reporter,
+				enum devlink_command cmd, u32 portid,
+				u32 seq, int flags)
+{
+	struct nlattr *reporter_attr;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto genlmsg_cancel;
+
+	reporter_attr = nla_nest_start(msg, DEVLINK_ATTR_HEALTH_REPORTER);
+	if (!reporter_attr)
+		goto genlmsg_cancel;
+	if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+			   reporter->ops->name))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+		       reporter->health_state))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
+			      reporter->error_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
+			      reporter->recovery_count, DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+			      reporter->graceful_period,
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+		       reporter->auto_recover))
+		goto reporter_nest_cancel;
+	if (reporter->dump_fmsg &&
+	    nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+			      jiffies_to_msecs(reporter->dump_ts),
+			      DEVLINK_ATTR_PAD))
+		goto reporter_nest_cancel;
+
+	nla_nest_end(msg, reporter_attr);
+	genlmsg_end(msg, hdr);
+	return 0;
+
+reporter_nest_cancel:
+	nla_nest_end(msg, reporter_attr);
+genlmsg_cancel:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+						   struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	struct sk_buff *msg;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_health_reporter_fill(msg, devlink, reporter,
+					      DEVLINK_CMD_HEALTH_REPORTER_GET,
+					      info->snd_portid, info->snd_seq,
+					      0);
+	if (err) {
+		nlmsg_free(msg);
+		return err;
+	}
+
+	return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_health_reporter *reporter;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(reporter, &devlink->reporter_list,
+				    list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_health_reporter_fill(msg, devlink,
+							      reporter,
+							      DEVLINK_CMD_HEALTH_REPORTER_GET,
+							      NETLINK_CB(cb->skb).portid,
+							      cb->nlh->nlmsg_seq,
+							      NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4597,6 +4737,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4840,6 +4981,14 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
+		.doit = devlink_nl_cmd_health_reporter_get_doit,
+		.dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+		.policy = devlink_nl_policy,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From a1e55ec0a0c6969cb7e9d9080a84041bb7b2b6e6 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:36 +0200
Subject: devlink: Add health set command

Add devlink health set command, in order to set configuration parameters
for a specific reporter.
Supported parameters are:
- graceful_period: Time interval between auto recoveries (in msec)
- auto_recover: Determines if the devlink shall execute recover upon
		receiving error for the reporter

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index d8f20d6ce139..b03065a99884 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -97,6 +97,7 @@ enum devlink_command {
 	DEVLINK_CMD_INFO_GET,		/* can dump */
 
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_SET,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 86f7c0e5d4bc..0b231fb76e59 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4712,6 +4712,33 @@ out:
 	return msg->len;
 }
 
+static int
+devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->recover &&
+	    (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+	     info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+		return -EOPNOTSUPP;
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+		reporter->graceful_period =
+			nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+	if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+		reporter->auto_recover =
+			nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -4738,6 +4765,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
 	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -4989,6 +5018,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_SET,
+		.doit = devlink_nl_cmd_health_reporter_set_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 20a0943a5b237f7d59dc581e9e3637f5c87f1fde Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:37 +0200
Subject: devlink: Add health recover command

Add devlink health recover command to the uapi, in order to allow the user
to execute a recover operation over a specific reporter.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index b03065a99884..a3a97e6edad8 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -98,6 +98,7 @@ enum devlink_command {
 
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
+	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0b231fb76e59..0e6b0e034863 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4739,6 +4739,19 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
 	return 0;
 }
 
+static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+						       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	return devlink_health_reporter_recover(reporter, NULL);
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5025,6 +5038,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+		.doit = devlink_nl_cmd_health_reporter_recover_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From fca42a2794e31379855c7d687055da43a6e05eef Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:38 +0200
Subject: devlink: Add health diagnose command

Add devlink health diagnose command, in order to run a diagnose
operation over a specific reporter.

It is expected from driver's callback for diagnose command to fill it
via the devlink fmsg API. Devlink will parse it and convert it to
netlink nla API in order to pass it to the user.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  1 +
 net/core/devlink.c           | 46 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index a3a97e6edad8..09be37137841 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -99,6 +99,7 @@ enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
 	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
+	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 0e6b0e034863..1e8613db2bf7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4752,6 +4752,45 @@ static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
 	return devlink_health_reporter_recover(reporter, NULL);
 }
 
+static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+							struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	struct devlink_fmsg *fmsg;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->diagnose)
+		return -EOPNOTSUPP;
+
+	fmsg = devlink_fmsg_alloc();
+	if (!fmsg)
+		return -ENOMEM;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		goto out;
+
+	err = reporter->ops->diagnose(reporter, fmsg);
+	if (err)
+		goto out;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		goto out;
+
+	err = devlink_fmsg_snd(fmsg, info,
+			       DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+	devlink_fmsg_free(fmsg);
+	return err;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5045,6 +5084,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+		.doit = devlink_nl_cmd_health_reporter_diagnose_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 35455e23e6f3cffe20e2b948e57597a8dc240b1e Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 Feb 2019 11:36:39 +0200
Subject: devlink: Add health dump {get,clear} commands

Add devlink health dump commands, in order to run an dump operation
over a specific reporter.

The supported operations are dump_get in order to get last saved
dump (if not exist, dump now) and dump_clear to clear last saved
dump.

It is expected from driver's callback for diagnose command to fill it
via the devlink fmsg API. Devlink will parse it and convert it to
netlink nla API in order to pass it to the user.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 63 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 09be37137841..72d9f7c89190 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -100,6 +100,8 @@ enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_SET,
 	DEVLINK_CMD_HEALTH_REPORTER_RECOVER,
 	DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
 
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1e8613db2bf7..7fbdba547d4f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4791,6 +4791,53 @@ out:
 	return err;
 }
 
+static int devlink_nl_cmd_health_reporter_dump_get_doit(struct sk_buff *skb,
+							struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+	int err;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->dump)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&reporter->dump_lock);
+	err = devlink_health_do_dump(reporter, NULL);
+	if (err)
+		goto out;
+
+	err = devlink_fmsg_snd(reporter->dump_fmsg, info,
+			       DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET, 0);
+
+out:
+	mutex_unlock(&reporter->dump_lock);
+	return err;
+}
+
+static int
+devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+					       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_health_reporter *reporter;
+
+	reporter = devlink_health_reporter_get_from_info(devlink, info);
+	if (!reporter)
+		return -EINVAL;
+
+	if (!reporter->ops->dump)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&reporter->dump_lock);
+	devlink_health_dump_clear(reporter);
+	mutex_unlock(&reporter->dump_lock);
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5091,6 +5138,22 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
+		.doit = devlink_nl_cmd_health_reporter_dump_get_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
+	{
+		.cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
+		.doit = devlink_nl_cmd_health_reporter_dump_clear_doit,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
+				  DEVLINK_NL_FLAG_NO_LOCK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 70f8a3ca68d3e1f3344d959981ca55d5f6ec77f7 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 6 Feb 2019 09:59:15 -0800
Subject: mm: make mm->pinned_vm an atomic64 counter

Taking a sleeping lock to _only_ increment a variable is quite the
overkill, and pretty much all users do this. Furthermore, some drivers
(ie: infiniband and scif) that need pinned semantics can go to quite
some trouble to actually delay via workqueue (un)accounting for pinned
pages when not possible to acquire it.

By making the counter atomic we no longer need to hold the mmap_sem and
can simply some code around it for pinned_vm users. The counter is 64-bit
such that we need not worry about overflows such as rdma user input
controlled from userspace.

Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem.c             | 12 ++++++------
 drivers/infiniband/hw/hfi1/user_pages.c    |  6 +++---
 drivers/infiniband/hw/qib/qib_user_pages.c |  4 ++--
 drivers/infiniband/hw/usnic/usnic_uiom.c   |  8 ++++----
 drivers/misc/mic/scif/scif_rma.c           |  6 +++---
 fs/proc/task_mmu.c                         |  2 +-
 include/linux/mm_types.h                   |  2 +-
 kernel/events/core.c                       |  8 ++++----
 kernel/fork.c                              |  2 +-
 mm/debug.c                                 |  5 +++--
 10 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 1efe0a74e06b..678abe1afcba 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -166,13 +166,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
 	down_write(&mm->mmap_sem);
-	if (check_add_overflow(mm->pinned_vm, npages, &new_pinned) ||
-	    (new_pinned > lock_limit && !capable(CAP_IPC_LOCK))) {
+	new_pinned = atomic64_read(&mm->pinned_vm) + npages;
+	if (new_pinned > lock_limit && !capable(CAP_IPC_LOCK)) {
 		up_write(&mm->mmap_sem);
 		ret = -ENOMEM;
 		goto out;
 	}
-	mm->pinned_vm = new_pinned;
+	atomic64_set(&mm->pinned_vm, new_pinned);
 	up_write(&mm->mmap_sem);
 
 	cur_base = addr & PAGE_MASK;
@@ -234,7 +234,7 @@ umem_release:
 	__ib_umem_release(context->device, umem, 0);
 vma:
 	down_write(&mm->mmap_sem);
-	mm->pinned_vm -= ib_umem_num_pages(umem);
+	atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
 	up_write(&mm->mmap_sem);
 out:
 	if (vma_list)
@@ -263,7 +263,7 @@ static void ib_umem_release_defer(struct work_struct *work)
 	struct ib_umem *umem = container_of(work, struct ib_umem, work);
 
 	down_write(&umem->owning_mm->mmap_sem);
-	umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
 	up_write(&umem->owning_mm->mmap_sem);
 
 	__ib_umem_release_tail(umem);
@@ -302,7 +302,7 @@ void ib_umem_release(struct ib_umem *umem)
 	} else {
 		down_write(&umem->owning_mm->mmap_sem);
 	}
-	umem->owning_mm->pinned_vm -= ib_umem_num_pages(umem);
+	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
 	up_write(&umem->owning_mm->mmap_sem);
 
 	__ib_umem_release_tail(umem);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index e341e6dcc388..40a6e434190f 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -92,7 +92,7 @@ bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
 	size = DIV_ROUND_UP(size, PAGE_SIZE);
 
 	down_read(&mm->mmap_sem);
-	pinned = mm->pinned_vm;
+	pinned = atomic64_read(&mm->pinned_vm);
 	up_read(&mm->mmap_sem);
 
 	/* First, check the absolute limit against all pinned pages. */
@@ -112,7 +112,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 		return ret;
 
 	down_write(&mm->mmap_sem);
-	mm->pinned_vm += ret;
+	atomic64_add(ret, &mm->pinned_vm);
 	up_write(&mm->mmap_sem);
 
 	return ret;
@@ -131,7 +131,7 @@ void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 
 	if (mm) { /* during close after signal, mm can be NULL */
 		down_write(&mm->mmap_sem);
-		mm->pinned_vm -= npages;
+		atomic64_sub(npages, &mm->pinned_vm);
 		up_write(&mm->mmap_sem);
 	}
 }
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 075f09fb7ce3..c6c81022d313 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -75,7 +75,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
 			goto bail_release;
 	}
 
-	current->mm->pinned_vm += num_pages;
+	atomic64_add(num_pages, &current->mm->pinned_vm);
 
 	ret = 0;
 	goto bail;
@@ -156,7 +156,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
 	__qib_release_user_pages(p, num_pages, 1);
 
 	if (current->mm) {
-		current->mm->pinned_vm -= num_pages;
+		atomic64_sub(num_pages, &current->mm->pinned_vm);
 		up_write(&current->mm->mmap_sem);
 	}
 }
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index ce01a59fccc4..854436a2b437 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -129,7 +129,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	uiomr->owning_mm = mm = current->mm;
 	down_write(&mm->mmap_sem);
 
-	locked = npages + current->mm->pinned_vm;
+	locked = npages + atomic64_read(&current->mm->pinned_vm);
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -187,7 +187,7 @@ out:
 	if (ret < 0)
 		usnic_uiom_put_pages(chunk_list, 0);
 	else {
-		mm->pinned_vm = locked;
+		atomic64_set(&mm->pinned_vm, locked);
 		mmgrab(uiomr->owning_mm);
 	}
 
@@ -441,7 +441,7 @@ static void usnic_uiom_release_defer(struct work_struct *work)
 		container_of(work, struct usnic_uiom_reg, work);
 
 	down_write(&uiomr->owning_mm->mmap_sem);
-	uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+	atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
 	up_write(&uiomr->owning_mm->mmap_sem);
 
 	__usnic_uiom_release_tail(uiomr);
@@ -469,7 +469,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
 	} else {
 		down_write(&uiomr->owning_mm->mmap_sem);
 	}
-	uiomr->owning_mm->pinned_vm -= usnic_uiom_num_pages(uiomr);
+	atomic64_sub(usnic_uiom_num_pages(uiomr), &uiomr->owning_mm->pinned_vm);
 	up_write(&uiomr->owning_mm->mmap_sem);
 
 	__usnic_uiom_release_tail(uiomr);
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index 749321eb91ae..2448368f181e 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -285,7 +285,7 @@ __scif_dec_pinned_vm_lock(struct mm_struct *mm,
 	} else {
 		down_write(&mm->mmap_sem);
 	}
-	mm->pinned_vm -= nr_pages;
+	atomic64_sub(nr_pages, &mm->pinned_vm);
 	up_write(&mm->mmap_sem);
 	return 0;
 }
@@ -299,7 +299,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 		return 0;
 
 	locked = nr_pages;
-	locked += mm->pinned_vm;
+	locked += atomic64_read(&mm->pinned_vm);
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		dev_err(scif_info.mdev.this_device,
@@ -307,7 +307,7 @@ static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 			locked, lock_limit);
 		return -ENOMEM;
 	}
-	mm->pinned_vm = locked;
+	atomic64_set(&mm->pinned_vm, locked);
 	return 0;
 }
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f0ec9edab2f3..d2902962244d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -59,7 +59,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 	SEQ_PUT_DEC("VmPeak:\t", hiwater_vm);
 	SEQ_PUT_DEC(" kB\nVmSize:\t", total_vm);
 	SEQ_PUT_DEC(" kB\nVmLck:\t", mm->locked_vm);
-	SEQ_PUT_DEC(" kB\nVmPin:\t", mm->pinned_vm);
+	SEQ_PUT_DEC(" kB\nVmPin:\t", atomic64_read(&mm->pinned_vm));
 	SEQ_PUT_DEC(" kB\nVmHWM:\t", hiwater_rss);
 	SEQ_PUT_DEC(" kB\nVmRSS:\t", total_rss);
 	SEQ_PUT_DEC(" kB\nRssAnon:\t", anon);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2c471a2c43fa..acea2ea2d6c4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -405,7 +405,7 @@ struct mm_struct {
 
 		unsigned long total_vm;	   /* Total pages mapped */
 		unsigned long locked_vm;   /* Pages that have PG_mlocked set */
-		unsigned long pinned_vm;   /* Refcount permanently increased */
+		atomic64_t    pinned_vm;   /* Refcount permanently increased */
 		unsigned long data_vm;	   /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
 		unsigned long exec_vm;	   /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
 		unsigned long stack_vm;	   /* VM_STACK */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e5ede6918050..29e9f2473656 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5459,7 +5459,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 		/* now it's safe to free the pages */
 		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
-		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+		atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
 
 		/* this has to be the last one */
 		rb_free_aux(rb);
@@ -5532,7 +5532,7 @@ again:
 	 */
 
 	atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
-	vma->vm_mm->pinned_vm -= mmap_locked;
+	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
 	free_uid(mmap_user);
 
 out_put:
@@ -5680,7 +5680,7 @@ accounting:
 
 	lock_limit = rlimit(RLIMIT_MEMLOCK);
 	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->pinned_vm + extra;
+	locked = atomic64_read(&vma->vm_mm->pinned_vm) + extra;
 
 	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
 		!capable(CAP_IPC_LOCK)) {
@@ -5721,7 +5721,7 @@ accounting:
 unlock:
 	if (!ret) {
 		atomic_long_add(user_extra, &user->locked_vm);
-		vma->vm_mm->pinned_vm += extra;
+		atomic64_add(extra, &vma->vm_mm->pinned_vm);
 
 		atomic_inc(&event->mmap_count);
 	} else if (rb) {
diff --git a/kernel/fork.c b/kernel/fork.c
index b69248e6f0e0..85e08c379a9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -981,7 +981,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
-	mm->pinned_vm = 0;
+	atomic64_set(&mm->pinned_vm, 0);
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	spin_lock_init(&mm->arg_lock);
diff --git a/mm/debug.c b/mm/debug.c
index 0abb987dad9b..7d13941a72f9 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -135,7 +135,7 @@ void dump_mm(const struct mm_struct *mm)
 		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
 		"pgd %px mm_users %d mm_count %d pgtables_bytes %lu map_count %d\n"
 		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
-		"pinned_vm %lx data_vm %lx exec_vm %lx stack_vm %lx\n"
+		"pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx\n"
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
 		"start_brk %lx brk %lx start_stack %lx\n"
 		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
@@ -166,7 +166,8 @@ void dump_mm(const struct mm_struct *mm)
 		mm_pgtables_bytes(mm),
 		mm->map_count,
 		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
-		mm->pinned_vm, mm->data_vm, mm->exec_vm, mm->stack_vm,
+		atomic64_read(&mm->pinned_vm),
+		mm->data_vm, mm->exec_vm, mm->stack_vm,
 		mm->start_code, mm->end_code, mm->start_data, mm->end_data,
 		mm->start_brk, mm->brk, mm->start_stack,
 		mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
-- 
cgit v1.2.3


From 95b86d1c91ad3b19f882d9e70aa37c8e99e8dc17 Mon Sep 17 00:00:00 2001
From: Devesh Sharma <devesh.sharma@broadcom.com>
Date: Thu, 7 Feb 2019 01:31:27 -0500
Subject: RDMA/bnxt_re: Update kernel user abi to pass chip context

User space verbs provider library would need chip context.  Changing the
ABI to add chip version details in structure.  Furthermore, changing the
kernel driver ucontext allocation code to initialize the abi structure
with appropriate values.

As suggested by community, appended the new fields at the bottom of the
ABI structure and retaining to older fields as those were in the older
versions.

Keeping the ABI version at 1 and adding a new field in the ucontext
response structure to hold the component mask.  The user space library
should check pre-defined flags to figure out if a certain feature is
supported on not.

Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c | 17 ++++++++++++++---
 include/uapi/rdma/bnxt_re-abi.h          | 11 +++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 08c1725f371a..1d7469e23cde 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3692,9 +3692,10 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata)
 {
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
 	struct bnxt_re_uctx_resp resp;
 	struct bnxt_re_ucontext *uctx;
-	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+	u32 chip_met_rev_num = 0;
 	int rc;
 
 	dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
@@ -3719,14 +3720,24 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 	}
 	spin_lock_init(&uctx->sh_lock);
 
-	resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
+	resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_CCTX;
+	chip_met_rev_num = rdev->chip_ctx.chip_num;
+	chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_rev & 0xFF) <<
+			     BNXT_RE_CHIP_ID0_CHIP_REV_SFT;
+	chip_met_rev_num |= ((u32)rdev->chip_ctx.chip_metal & 0xFF) <<
+			     BNXT_RE_CHIP_ID0_CHIP_MET_SFT;
+	resp.chip_id0 = chip_met_rev_num;
+	/* Future extension of chip info */
+	resp.chip_id1 = 0;
+	/*Temp, Use idr_alloc instead */
+	resp.dev_id = rdev->en_dev->pdev->devfn;
 	resp.max_qp = rdev->qplib_ctx.qpc_count;
 	resp.pg_size = PAGE_SIZE;
 	resp.cqe_sz = sizeof(struct cq_base);
 	resp.max_cqd = dev_attr->max_cq_wqes;
 	resp.rsvd    = 0;
 
-	rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
 	if (rc) {
 		dev_err(rdev_to_dev(rdev), "Failed to copy user context");
 		rc = -EFAULT;
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index a7a6111e50c7..dc52e3cf574c 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -44,6 +44,14 @@
 
 #define BNXT_RE_ABI_VERSION	1
 
+#define BNXT_RE_CHIP_ID0_CHIP_NUM_SFT		0x00
+#define BNXT_RE_CHIP_ID0_CHIP_REV_SFT		0x10
+#define BNXT_RE_CHIP_ID0_CHIP_MET_SFT		0x18
+
+enum {
+	BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL
+};
+
 struct bnxt_re_uctx_resp {
 	__u32 dev_id;
 	__u32 max_qp;
@@ -51,6 +59,9 @@ struct bnxt_re_uctx_resp {
 	__u32 cqe_sz;
 	__u32 max_cqd;
 	__u32 rsvd;
+	__aligned_u64 comp_mask;
+	__u32 chip_id0;
+	__u32 chip_id1;
 };
 
 /*
-- 
cgit v1.2.3


From e147133a42cb9df6cbc99503fdf58d0e6388bf2a Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:48:40 +0000
Subject: ACPI / APEI: Make hest.c manage the estatus memory pool

ghes.c has a memory pool it uses for the estatus cache and the estatus
queue. The cache is initialised when registering the platform driver.
For the queue, an NMI-like notification has to grow/shrink the pool
as it is registered and unregistered.

This is all pretty noisy when adding new NMI-like notifications, it
would be better to replace this with a static pool size based on the
number of users.

As a precursor, move the call that creates the pool from ghes_init(),
into hest.c. Later this will take the number of ghes entries and
consolidate the queue allocations.
Remove ghes_estatus_pool_exit() as hest.c doesn't have anywhere to put
this.

The pool is now initialised as part of ACPI's subsys_initcall():
(acpi_init(), acpi_scan_init(), acpi_pci_root_init(), acpi_hest_init())
Before this patch it happened later as a GHES specific device_initcall().

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 33 ++++++---------------------------
 drivers/acpi/apei/hest.c | 10 +++++++++-
 include/acpi/ghes.h      |  2 ++
 3 files changed, 17 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ee9206d5e119..4150c72c78cb 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -162,26 +162,16 @@ static void ghes_iounmap_irq(void)
 	clear_fixmap(FIX_APEI_GHES_IRQ);
 }
 
-static int ghes_estatus_pool_init(void)
+static int ghes_estatus_pool_expand(unsigned long len); //temporary
+
+int ghes_estatus_pool_init(void)
 {
 	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
 	if (!ghes_estatus_pool)
 		return -ENOMEM;
-	return 0;
-}
 
-static void ghes_estatus_pool_free_chunk(struct gen_pool *pool,
-					      struct gen_pool_chunk *chunk,
-					      void *data)
-{
-	vfree((void *)chunk->start_addr);
-}
-
-static void ghes_estatus_pool_exit(void)
-{
-	gen_pool_for_each_chunk(ghes_estatus_pool,
-				ghes_estatus_pool_free_chunk, NULL);
-	gen_pool_destroy(ghes_estatus_pool);
+	return ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
+					GHES_ESTATUS_CACHE_ALLOCED_MAX);
 }
 
 static int ghes_estatus_pool_expand(unsigned long len)
@@ -1227,18 +1217,9 @@ static int __init ghes_init(void)
 
 	ghes_nmi_init_cxt();
 
-	rc = ghes_estatus_pool_init();
-	if (rc)
-		goto err;
-
-	rc = ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
-				      GHES_ESTATUS_CACHE_ALLOCED_MAX);
-	if (rc)
-		goto err_pool_exit;
-
 	rc = platform_driver_register(&ghes_platform_driver);
 	if (rc)
-		goto err_pool_exit;
+		goto err;
 
 	rc = apei_osc_setup();
 	if (rc == 0 && osc_sb_apei_support_acked)
@@ -1251,8 +1232,6 @@ static int __init ghes_init(void)
 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
 
 	return 0;
-err_pool_exit:
-	ghes_estatus_pool_exit();
 err:
 	return rc;
 }
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 0935a0ef37e5..e33bfd9d256c 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -32,6 +32,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <acpi/apei.h>
+#include <acpi/ghes.h>
 
 #include "apei-internal.h"
 
@@ -209,6 +210,11 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count)
 	rc = apei_hest_parse(hest_parse_ghes, &ghes_arr);
 	if (rc)
 		goto err;
+
+	rc = ghes_estatus_pool_init();
+	if (rc)
+		goto err;
+
 out:
 	kfree(ghes_arr.ghes_devs);
 	return rc;
@@ -257,7 +263,9 @@ void __init acpi_hest_init(void)
 		rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
 		if (rc)
 			goto err;
-		rc = hest_ghes_dev_register(ghes_count);
+
+		if (ghes_count)
+			rc = hest_ghes_dev_register(ghes_count);
 		if (rc)
 			goto err;
 	}
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 82cb4eb225a4..46ef5566e052 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -52,6 +52,8 @@ enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
+int ghes_estatus_pool_init(void);
+
 /* From drivers/edac/ghes_edac.c */
 
 #ifdef CONFIG_EDAC_GHES
-- 
cgit v1.2.3


From fb7be08f1a091ec243780bfdad4bf0c492057808 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:48:41 +0000
Subject: ACPI / APEI: Make estatus pool allocation a static size

Adding new NMI-like notifications duplicates the calls that grow
and shrink the estatus pool. This is all pretty pointless, as the
size is capped to 64K. Allocate this for each ghes and drop
the code that grows and shrinks the pool.

Suggested-by: Borislav Petkov <bp@suse.de>
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 49 ++++++------------------------------------------
 drivers/acpi/apei/hest.c |  2 +-
 include/acpi/ghes.h      |  2 +-
 3 files changed, 8 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 4150c72c78cb..33144ab0661a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -162,27 +162,18 @@ static void ghes_iounmap_irq(void)
 	clear_fixmap(FIX_APEI_GHES_IRQ);
 }
 
-static int ghes_estatus_pool_expand(unsigned long len); //temporary
-
-int ghes_estatus_pool_init(void)
+int ghes_estatus_pool_init(int num_ghes)
 {
+	unsigned long addr, len;
+
 	ghes_estatus_pool = gen_pool_create(GHES_ESTATUS_POOL_MIN_ALLOC_ORDER, -1);
 	if (!ghes_estatus_pool)
 		return -ENOMEM;
 
-	return ghes_estatus_pool_expand(GHES_ESTATUS_CACHE_AVG_SIZE *
-					GHES_ESTATUS_CACHE_ALLOCED_MAX);
-}
-
-static int ghes_estatus_pool_expand(unsigned long len)
-{
-	unsigned long size, addr;
-
-	ghes_estatus_pool_size_request += PAGE_ALIGN(len);
-	size = gen_pool_size(ghes_estatus_pool);
-	if (size >= ghes_estatus_pool_size_request)
-		return 0;
+	len = GHES_ESTATUS_CACHE_AVG_SIZE * GHES_ESTATUS_CACHE_ALLOCED_MAX;
+	len += (num_ghes * GHES_ESOURCE_PREALLOC_MAX_SIZE);
 
+	ghes_estatus_pool_size_request = PAGE_ALIGN(len);
 	addr = (unsigned long)vmalloc(PAGE_ALIGN(len));
 	if (!addr)
 		return -ENOMEM;
@@ -956,32 +947,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 	return ret;
 }
 
-static unsigned long ghes_esource_prealloc_size(
-	const struct acpi_hest_generic *generic)
-{
-	unsigned long block_length, prealloc_records, prealloc_size;
-
-	block_length = min_t(unsigned long, generic->error_block_length,
-			     GHES_ESTATUS_MAX_SIZE);
-	prealloc_records = max_t(unsigned long,
-				 generic->records_to_preallocate, 1);
-	prealloc_size = min_t(unsigned long, block_length * prealloc_records,
-			      GHES_ESOURCE_PREALLOC_MAX_SIZE);
-
-	return prealloc_size;
-}
-
-static void ghes_estatus_pool_shrink(unsigned long len)
-{
-	ghes_estatus_pool_size_request -= PAGE_ALIGN(len);
-}
-
 static void ghes_nmi_add(struct ghes *ghes)
 {
-	unsigned long len;
-
-	len = ghes_esource_prealloc_size(ghes->generic);
-	ghes_estatus_pool_expand(len);
 	mutex_lock(&ghes_list_mutex);
 	if (list_empty(&ghes_nmi))
 		register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes");
@@ -991,8 +958,6 @@ static void ghes_nmi_add(struct ghes *ghes)
 
 static void ghes_nmi_remove(struct ghes *ghes)
 {
-	unsigned long len;
-
 	mutex_lock(&ghes_list_mutex);
 	list_del_rcu(&ghes->list);
 	if (list_empty(&ghes_nmi))
@@ -1003,8 +968,6 @@ static void ghes_nmi_remove(struct ghes *ghes)
 	 * freed after NMI handler finishes.
 	 */
 	synchronize_rcu();
-	len = ghes_esource_prealloc_size(ghes->generic);
-	ghes_estatus_pool_shrink(len);
 }
 
 static void ghes_nmi_init_cxt(void)
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index e33bfd9d256c..8113ddb14d28 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -211,7 +211,7 @@ static int __init hest_ghes_dev_register(unsigned int ghes_count)
 	if (rc)
 		goto err;
 
-	rc = ghes_estatus_pool_init();
+	rc = ghes_estatus_pool_init(ghes_count);
 	if (rc)
 		goto err;
 
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 46ef5566e052..cd9ee507d860 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -52,7 +52,7 @@ enum {
 	GHES_SEV_PANIC = 0x3,
 };
 
-int ghes_estatus_pool_init(void);
+int ghes_estatus_pool_init(int num_ghes);
 
 /* From drivers/edac/ghes_edac.c */
 
-- 
cgit v1.2.3


From eeb2555779471abdbcc6289a52dc54ce513feaf2 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:48:42 +0000
Subject: ACPI / APEI: Don't store CPER records physical address in struct ghes

When CPER records are found the address of the records is stashed
in the struct ghes. Once the records have been processed, this
address is overwritten with zero so that it won't be processed
again without being re-populated by firmware.

This goes wrong if a struct ghes can be processed concurrently,
as can happen at probe time when an NMI occurs. If the NMI arrives
on another CPU, the probing CPU may call ghes_clear_estatus() on the
records before the handler had finished with them.
Even on the same CPU, once the interrupted handler is resumed, it
will call ghes_clear_estatus() on the NMIs records, this memory may
have already been re-used by firmware.

Avoid this stashing by letting the caller hold the address. A
later patch will do away with the use of ghes->flags in the
read/clear code too.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 46 +++++++++++++++++++++++++++-------------------
 include/acpi/ghes.h      |  1 -
 2 files changed, 27 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 33144ab0661a..a34f79153b1a 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -305,29 +305,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
 	}
 }
 
-static int ghes_read_estatus(struct ghes *ghes)
+static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr)
 {
 	struct acpi_hest_generic *g = ghes->generic;
-	u64 buf_paddr;
 	u32 len;
 	int rc;
 
-	rc = apei_read(&buf_paddr, &g->error_status_address);
+	rc = apei_read(buf_paddr, &g->error_status_address);
 	if (rc) {
+		*buf_paddr = 0;
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 "Failed to read error status block address for hardware error source: %d.\n",
 				   g->header.source_id);
 		return -EIO;
 	}
-	if (!buf_paddr)
+	if (!*buf_paddr)
 		return -ENOENT;
 
-	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+	ghes_copy_tofrom_phys(ghes->estatus, *buf_paddr,
 			      sizeof(*ghes->estatus), 1);
-	if (!ghes->estatus->block_status)
+	if (!ghes->estatus->block_status) {
+		*buf_paddr = 0;
 		return -ENOENT;
+	}
 
-	ghes->buffer_paddr = buf_paddr;
 	ghes->flags |= GHES_TO_CLEAR;
 
 	rc = -EIO;
@@ -339,7 +340,7 @@ static int ghes_read_estatus(struct ghes *ghes)
 	if (cper_estatus_check_header(ghes->estatus))
 		goto err_read_block;
 	ghes_copy_tofrom_phys(ghes->estatus + 1,
-			      buf_paddr + sizeof(*ghes->estatus),
+			      *buf_paddr + sizeof(*ghes->estatus),
 			      len - sizeof(*ghes->estatus), 1);
 	if (cper_estatus_check(ghes->estatus))
 		goto err_read_block;
@@ -349,15 +350,20 @@ err_read_block:
 	if (rc)
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 				    "Failed to read error status block!\n");
+
 	return rc;
 }
 
-static void ghes_clear_estatus(struct ghes *ghes)
+static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr)
 {
 	ghes->estatus->block_status = 0;
 	if (!(ghes->flags & GHES_TO_CLEAR))
 		return;
-	ghes_copy_tofrom_phys(ghes->estatus, ghes->buffer_paddr,
+
+	if (!buf_paddr)
+		return;
+
+	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
 			      sizeof(ghes->estatus->block_status), 0);
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
@@ -666,11 +672,11 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 *gv2)
 	return apei_write(val, &gv2->read_ack_register);
 }
 
-static void __ghes_panic(struct ghes *ghes)
+static void __ghes_panic(struct ghes *ghes, u64 buf_paddr)
 {
 	__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
 
-	ghes_clear_estatus(ghes);
+	ghes_clear_estatus(ghes, buf_paddr);
 
 	/* reboot to log the error! */
 	if (!panic_timeout)
@@ -680,14 +686,15 @@ static void __ghes_panic(struct ghes *ghes)
 
 static int ghes_proc(struct ghes *ghes)
 {
+	u64 buf_paddr;
 	int rc;
 
-	rc = ghes_read_estatus(ghes);
+	rc = ghes_read_estatus(ghes, &buf_paddr);
 	if (rc)
 		goto out;
 
 	if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
-		__ghes_panic(ghes);
+		__ghes_panic(ghes, buf_paddr);
 	}
 
 	if (!ghes_estatus_cached(ghes->estatus)) {
@@ -697,7 +704,7 @@ static int ghes_proc(struct ghes *ghes)
 	ghes_do_proc(ghes, ghes->estatus);
 
 out:
-	ghes_clear_estatus(ghes);
+	ghes_clear_estatus(ghes, buf_paddr);
 
 	if (rc == -ENOENT)
 		return rc;
@@ -912,6 +919,7 @@ static void __process_error(struct ghes *ghes)
 
 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 {
+	u64 buf_paddr;
 	struct ghes *ghes;
 	int sev, ret = NMI_DONE;
 
@@ -919,8 +927,8 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 		return ret;
 
 	list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
-		if (ghes_read_estatus(ghes)) {
-			ghes_clear_estatus(ghes);
+		if (ghes_read_estatus(ghes, &buf_paddr)) {
+			ghes_clear_estatus(ghes, buf_paddr);
 			continue;
 		} else {
 			ret = NMI_HANDLED;
@@ -929,14 +937,14 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 		sev = ghes_severity(ghes->estatus->error_severity);
 		if (sev >= GHES_SEV_PANIC) {
 			ghes_print_queued_estatus();
-			__ghes_panic(ghes);
+			__ghes_panic(ghes, buf_paddr);
 		}
 
 		if (!(ghes->flags & GHES_TO_CLEAR))
 			continue;
 
 		__process_error(ghes);
-		ghes_clear_estatus(ghes);
+		ghes_clear_estatus(ghes, buf_paddr);
 	}
 
 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index cd9ee507d860..f82f4a7ddd90 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -22,7 +22,6 @@ struct ghes {
 		struct acpi_hest_generic_v2 *generic_v2;
 	};
 	struct acpi_hest_generic_status *estatus;
-	u64 buffer_paddr;
 	unsigned long flags;
 	union {
 		struct list_head list;
-- 
cgit v1.2.3


From 5cc6c68287ae4be22c40b41cf6844746cddebbcc Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:48:44 +0000
Subject: ACPI / APEI: Don't update struct ghes' flags in read/clear estatus

ghes_read_estatus() sets a flag in struct ghes if the buffer of
CPER records needs to be cleared once the records have been
processed. This flag value is a problem if a struct ghes can be
processed concurrently, as happens at probe time if an NMI arrives
for the same error source. The NMI clears the flag, meaning the
interrupted handler may never do the ghes_estatus_clear() work.

The GHES_TO_CLEAR flags is only set at the same time as
buffer_paddr, which is now owned by the caller and passed to
ghes_clear_estatus(). Use this value as the flag.

A non-zero buf_paddr returned by ghes_read_estatus() means
ghes_clear_estatus() should clear this address. ghes_read_estatus()
already checks for a read of error_status_address being zero,
so CPER records cannot be written here.

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 5 -----
 include/acpi/ghes.h      | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index c20e1d0947b1..af3c10f47f20 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -329,8 +329,6 @@ static int ghes_read_estatus(struct ghes *ghes, u64 *buf_paddr)
 		return -ENOENT;
 	}
 
-	ghes->flags |= GHES_TO_CLEAR;
-
 	rc = -EIO;
 	len = cper_estatus_len(ghes->estatus);
 	if (len < sizeof(*ghes->estatus))
@@ -357,15 +355,12 @@ err_read_block:
 static void ghes_clear_estatus(struct ghes *ghes, u64 buf_paddr)
 {
 	ghes->estatus->block_status = 0;
-	if (!(ghes->flags & GHES_TO_CLEAR))
-		return;
 
 	if (!buf_paddr)
 		return;
 
 	ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
 			      sizeof(ghes->estatus->block_status), 0);
-	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
 static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index f82f4a7ddd90..e3f1cddb4ac8 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -13,7 +13,6 @@
  * estatus: memory buffer for error status block, allocated during
  * HEST parsing.
  */
-#define GHES_TO_CLEAR		0x0001
 #define GHES_EXITING		0x0002
 
 struct ghes {
-- 
cgit v1.2.3


From 71bd106d2567675668e253cba3960e3c4bf2e80e Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Thu, 7 Feb 2019 09:52:10 -0800
Subject: net: fixed-phy: Add fixed_phy_register_with_gpiod() API

Add fixed_phy_register_with_gpiod() API. It lets users create a
fixed_phy instance that uses a GPIO descriptor which was obtained
externally e.g. through platform data.
This enables platform devices (non-DT based) to use GPIOs for link
status.

Signed-off-by: Moritz Fischer <mdf@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c | 32 +++++++++++++++++++++++++-------
 include/linux/phy_fixed.h   | 15 +++++++++++++++
 2 files changed, 40 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index d810f914aaa4..b0d1368c3400 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -229,12 +229,12 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np)
 }
 #endif
 
-struct phy_device *fixed_phy_register(unsigned int irq,
-				      struct fixed_phy_status *status,
-				      struct device_node *np)
+static struct phy_device *__fixed_phy_register(unsigned int irq,
+					       struct fixed_phy_status *status,
+					       struct device_node *np,
+					       struct gpio_desc *gpiod)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct gpio_desc *gpiod = NULL;
 	struct phy_device *phy;
 	int phy_addr;
 	int ret;
@@ -243,9 +243,11 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 		return ERR_PTR(-EPROBE_DEFER);
 
 	/* Check if we have a GPIO associated with this fixed phy */
-	gpiod = fixed_phy_get_gpiod(np);
-	if (IS_ERR(gpiod))
-		return ERR_CAST(gpiod);
+	if (!gpiod) {
+		gpiod = fixed_phy_get_gpiod(np);
+		if (IS_ERR(gpiod))
+			return ERR_CAST(gpiod);
+	}
 
 	/* Get the next available PHY address, up to PHY_MAX_ADDR */
 	phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL);
@@ -308,8 +310,24 @@ struct phy_device *fixed_phy_register(unsigned int irq,
 
 	return phy;
 }
+
+struct phy_device *fixed_phy_register(unsigned int irq,
+				      struct fixed_phy_status *status,
+				      struct device_node *np)
+{
+	return __fixed_phy_register(irq, status, np, NULL);
+}
 EXPORT_SYMBOL_GPL(fixed_phy_register);
 
+struct phy_device *
+fixed_phy_register_with_gpiod(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct gpio_desc *gpiod)
+{
+	return __fixed_phy_register(irq, status, NULL, gpiod);
+}
+EXPORT_SYMBOL_GPL(fixed_phy_register_with_gpiod);
+
 void fixed_phy_unregister(struct phy_device *phy)
 {
 	phy_device_remove(phy);
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index c78fc203db43..1e5d86ebdaeb 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -19,6 +19,12 @@ extern int fixed_phy_add(unsigned int irq, int phy_id,
 extern struct phy_device *fixed_phy_register(unsigned int irq,
 					     struct fixed_phy_status *status,
 					     struct device_node *np);
+
+extern struct phy_device *
+fixed_phy_register_with_gpiod(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct gpio_desc *gpiod);
+
 extern void fixed_phy_unregister(struct phy_device *phydev);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -35,6 +41,15 @@ static inline struct phy_device *fixed_phy_register(unsigned int irq,
 {
 	return ERR_PTR(-ENODEV);
 }
+
+static inline struct phy_device *
+fixed_phy_register_with_gpiod(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct gpio_desc *gpiod)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void fixed_phy_unregister(struct phy_device *phydev)
 {
 }
-- 
cgit v1.2.3


From 998a8a8387ff5f65da456d1fc448dbb926fb5d78 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 7 Feb 2019 21:41:46 +0100
Subject: net: phy: let genphy_c45_read_link manage the devices to check

Let genphy_c45_read_link manage the devices to check, this removes
overhead from callers. Add C22EXT to the list of excluded devices
because it doesn't implement the status register. According to the
802.3 clause 45 spec registers 29.0 - 29.4 are reserved.

At the moment we have very few clause 45 PHY drivers, so we are
lacking experience whether other drivers will have to exclude further
devices, or may need to check PHY XS. If we should figure out that
list of devices to check needs to be configurable, I think best will
be to add a device list member to struct phy_driver.

v2:
- adjusted commit message
- exclude also device C22EXT from link checking

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 10 +---------
 drivers/net/phy/phy-c45.c    | 18 ++++++++++--------
 include/linux/phy.h          |  2 +-
 include/uapi/linux/mdio.h    |  2 ++
 4 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 296a537cdfcb..96a79c6c7810 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -428,16 +428,8 @@ static int mv3310_read_10gbr_status(struct phy_device *phydev)
 
 static int mv3310_read_status(struct phy_device *phydev)
 {
-	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int val;
 
-	/* The vendor devads do not report link status.  Avoid the PHYXS
-	 * instance as there are three, and its status depends on the MAC
-	 * being appropriately configured for the negotiated speed.
-	 */
-	mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2) |
-		      BIT(MDIO_MMD_PHYXS));
-
 	phydev->speed = SPEED_UNKNOWN;
 	phydev->duplex = DUPLEX_UNKNOWN;
 	linkmode_zero(phydev->lp_advertising);
@@ -453,7 +445,7 @@ static int mv3310_read_status(struct phy_device *phydev)
 	if (val & MDIO_STAT1_LSTATUS)
 		return mv3310_read_10gbr_status(phydev);
 
-	val = genphy_c45_read_link(phydev, mmd_mask);
+	val = genphy_c45_read_link(phydev);
 	if (val < 0)
 		return val;
 
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index c92d0fb7ec4f..6adfe1f6319e 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -118,17 +118,24 @@ EXPORT_SYMBOL_GPL(genphy_c45_aneg_done);
 /**
  * genphy_c45_read_link - read the overall link status from the MMDs
  * @phydev: target phy_device struct
- * @mmd_mask: MMDs to read status from
  *
  * Read the link status from the specified MMDs, and if they all indicate
  * that the link is up, set phydev->link to 1.  If an error is encountered,
  * a negative errno will be returned, otherwise zero.
  */
-int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask)
+int genphy_c45_read_link(struct phy_device *phydev)
 {
+	u32 mmd_mask = phydev->c45_ids.devices_in_package;
 	int val, devad;
 	bool link = true;
 
+	/* The vendor devads and C22EXT do not report link status. Avoid the
+	 * PHYXS instance as its status may depend on the MAC being
+	 * appropriately configured for the negotiated speed.
+	 */
+	mmd_mask &= ~(MDIO_DEVS_VEND1 | MDIO_DEVS_VEND2 | MDIO_DEVS_C22EXT |
+		      MDIO_DEVS_PHYXS);
+
 	while (mmd_mask && link) {
 		devad = __ffs(mmd_mask);
 		mmd_mask &= ~BIT(devad);
@@ -266,16 +273,11 @@ EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
 int gen10g_read_status(struct phy_device *phydev)
 {
-	u32 mmd_mask = phydev->c45_ids.devices_in_package;
-
 	/* For now just lie and say it's 10G all the time */
 	phydev->speed = SPEED_10000;
 	phydev->duplex = DUPLEX_FULL;
 
-	/* Avoid reading the vendor MMDs */
-	mmd_mask &= ~(BIT(MDIO_MMD_VEND1) | BIT(MDIO_MMD_VEND2));
-
-	return genphy_c45_read_link(phydev, mmd_mask);
+	return genphy_c45_read_link(phydev);
 }
 EXPORT_SYMBOL_GPL(gen10g_read_status);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 237dd035858a..f41bf651f6a0 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1094,7 +1094,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
 int genphy_c45_aneg_done(struct phy_device *phydev);
-int genphy_c45_read_link(struct phy_device *phydev, u32 mmd_mask);
+int genphy_c45_read_link(struct phy_device *phydev);
 int genphy_c45_read_lpa(struct phy_device *phydev);
 int genphy_c45_read_pma(struct phy_device *phydev);
 int genphy_c45_pma_setup_forced(struct phy_device *phydev);
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index d435b00d64ad..2e6e309f0847 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -123,6 +123,8 @@
 #define MDIO_DEVS_TC			MDIO_DEVS_PRESENT(MDIO_MMD_TC)
 #define MDIO_DEVS_AN			MDIO_DEVS_PRESENT(MDIO_MMD_AN)
 #define MDIO_DEVS_C22EXT		MDIO_DEVS_PRESENT(MDIO_MMD_C22EXT)
+#define MDIO_DEVS_VEND1			MDIO_DEVS_PRESENT(MDIO_MMD_VEND1)
+#define MDIO_DEVS_VEND2			MDIO_DEVS_PRESENT(MDIO_MMD_VEND2)
 
 /* Control register 2. */
 #define MDIO_PMA_CTRL2_TYPE		0x000f	/* PMA/PMD type selection */
-- 
cgit v1.2.3


From cd108b5c51db30aa01657322bb89e48c98216ff9 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 5 Feb 2019 16:06:30 -0500
Subject: audit: hide auditsc_get_stamp and audit_serial prototypes

auditsc_get_stamp() and audit_serial() are internal audit functions so
move their prototypes from include/linux/audit.h to kernel/audit.h
so they are not visible to the rest of the kernel.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/audit.h | 9 ---------
 kernel/audit.h        | 5 +++++
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 29251b18331a..1e69d9fe16da 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -348,10 +348,6 @@ static inline void audit_ptrace(struct task_struct *t)
 }
 
 				/* Private API (for audit.c only) */
-extern unsigned int audit_serial(void);
-extern int auditsc_get_stamp(struct audit_context *ctx,
-			      struct timespec64 *t, unsigned int *serial);
-
 extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
 extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
 extern void __audit_bprm(struct linux_binprm *bprm);
@@ -531,11 +527,6 @@ static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 static inline void audit_seccomp_actions_logged(const char *names,
 						const char *old_names, int res)
 { }
-static inline int auditsc_get_stamp(struct audit_context *ctx,
-			      struct timespec64 *t, unsigned int *serial)
-{
-	return 0;
-}
 static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
 { }
 static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
diff --git a/kernel/audit.h b/kernel/audit.h
index 82734f438ddd..958d5b8fc1b3 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -261,6 +261,10 @@ extern void audit_put_tty(struct tty_struct *tty);
 
 /* audit watch/mark/tree functions */
 #ifdef CONFIG_AUDITSYSCALL
+extern unsigned int audit_serial(void);
+extern int auditsc_get_stamp(struct audit_context *ctx,
+			      struct timespec64 *t, unsigned int *serial);
+
 extern void audit_put_watch(struct audit_watch *watch);
 extern void audit_get_watch(struct audit_watch *watch);
 extern int audit_to_watch(struct audit_krule *krule, char *path, int len,
@@ -300,6 +304,7 @@ extern void audit_filter_inodes(struct task_struct *tsk,
 				struct audit_context *ctx);
 extern struct list_head *audit_killed_trees(void);
 #else /* CONFIG_AUDITSYSCALL */
+#define auditsc_get_stamp(c, t, s) 0
 #define audit_put_watch(w) {}
 #define audit_get_watch(w) {}
 #define audit_to_watch(k, p, l, o) (-EINVAL)
-- 
cgit v1.2.3


From 382e8fa80da1571271faf1bd2220ac22cee13866 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Fri, 1 Feb 2019 13:47:54 +0300
Subject: usb: typec: displayport: Move the Configuration VDO helpers to the
 header

The helpers used for reading and writing the pin assignment
from and to the Configuration VDO will be useful in GPU
drivers, and also UCSI driver after DisplayPort alt mode
support is added to it.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/altmodes/displayport.c | 4 ----
 include/linux/usb/typec_dp.h             | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 3f06e94771a7..610d790bc9be 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -24,10 +24,6 @@ enum {
 	DP_CONF_DUAL_D,
 };
 
-/* Helper for setting/getting the pin assignement value to the configuration */
-#define DP_CONF_SET_PIN_ASSIGN(_a_)	((_a_) << 8)
-#define DP_CONF_GET_PIN_ASSIGN(_conf_)	(((_conf_) & GENMASK(15, 8)) >> 8)
-
 /* Pin assignments that use USB3.1 Gen2 signaling to carry DP protocol */
 #define DP_PIN_ASSIGN_GEN2_BR_MASK	(BIT(DP_PIN_ASSIGN_A) | \
 					 BIT(DP_PIN_ASSIGN_B))
diff --git a/include/linux/usb/typec_dp.h b/include/linux/usb/typec_dp.h
index 55ae781d60a9..7fa12ef8d09a 100644
--- a/include/linux/usb/typec_dp.h
+++ b/include/linux/usb/typec_dp.h
@@ -92,4 +92,8 @@ enum {
 #define DP_CONF_PIN_ASSIGNEMENT_SHIFT	8
 #define DP_CONF_PIN_ASSIGNEMENT_MASK	GENMASK(15, 8)
 
+/* Helper for setting/getting the pin assignement value to the configuration */
+#define DP_CONF_SET_PIN_ASSIGN(_a_)	((_a_) << 8)
+#define DP_CONF_GET_PIN_ASSIGN(_conf_)	(((_conf_) & GENMASK(15, 8)) >> 8)
+
 #endif /* __USB_TYPEC_DP_H */
-- 
cgit v1.2.3


From e11a5795cb7cd1e25bbd1697baa109943938c0f6 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Tue, 5 Feb 2019 16:24:56 -0700
Subject: perf/aux: Make perf_event accessible to setup_aux()

When pmu::setup_aux() is called the coresight PMU needs to know which
sink to use for the session by looking up the information in the
event's attr::config2 field.

As such simply replace the cpu information by the complete perf_event
structure and change all affected customers.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/perf_cpum_sf.c                  | 6 +++---
 arch/x86/events/intel/bts.c                      | 4 +++-
 arch/x86/events/intel/pt.c                       | 5 +++--
 drivers/hwtracing/coresight/coresight-etm-perf.c | 6 +++---
 drivers/perf/arm_spe_pmu.c                       | 6 +++---
 include/linux/perf_event.h                       | 2 +-
 kernel/events/ring_buffer.c                      | 2 +-
 7 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index bfabeb1889cc..1266194afb02 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1600,7 +1600,7 @@ static void aux_sdb_init(unsigned long sdb)
 
 /*
  * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
- * @cpu:	On which to allocate, -1 means current
+ * @event:	Event the buffer is setup for, event->cpu == -1 means current
  * @pages:	Array of pointers to buffer pages passed from perf core
  * @nr_pages:	Total pages
  * @snapshot:	Flag for snapshot mode
@@ -1612,8 +1612,8 @@ static void aux_sdb_init(unsigned long sdb)
  *
  * Return the private AUX buffer structure if success or NULL if fails.
  */
-static void *aux_buffer_setup(int cpu, void **pages, int nr_pages,
-			      bool snapshot)
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+			      int nr_pages, bool snapshot)
 {
 	struct sf_buffer *sfb;
 	struct aux_buffer *aux;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index a01ef1b0f883..7cdd7b13bbda 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -77,10 +77,12 @@ static size_t buf_size(struct page *page)
 }
 
 static void *
-bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
+bts_buffer_setup_aux(struct perf_event *event, void **pages,
+		     int nr_pages, bool overwrite)
 {
 	struct bts_buffer *buf;
 	struct page *page;
+	int cpu = event->cpu;
 	int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
 	unsigned long offset;
 	size_t size = nr_pages << PAGE_SHIFT;
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 9494ca68fd9d..c0e86ff21f81 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1114,10 +1114,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages,
  * Return:	Our private PT buffer structure.
  */
 static void *
-pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot)
+pt_buffer_setup_aux(struct perf_event *event, void **pages,
+		    int nr_pages, bool snapshot)
 {
 	struct pt_buffer *buf;
-	int node, ret;
+	int node, ret, cpu = event->cpu;
 
 	if (!nr_pages)
 		return NULL;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index abe8249b893b..f21eb28b6782 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -177,15 +177,15 @@ static void etm_free_aux(void *data)
 	schedule_work(&event_data->work);
 }
 
-static void *etm_setup_aux(int event_cpu, void **pages,
+static void *etm_setup_aux(struct perf_event *event, void **pages,
 			   int nr_pages, bool overwrite)
 {
-	int cpu;
+	int cpu = event->cpu;
 	cpumask_t *mask;
 	struct coresight_device *sink;
 	struct etm_event_data *event_data = NULL;
 
-	event_data = alloc_event_data(event_cpu);
+	event_data = alloc_event_data(cpu);
 	if (!event_data)
 		return NULL;
 	INIT_WORK(&event_data->work, free_event_data);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 8e46a9dad2fa..7cb766dafe85 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -824,10 +824,10 @@ static void arm_spe_pmu_read(struct perf_event *event)
 {
 }
 
-static void *arm_spe_pmu_setup_aux(int cpu, void **pages, int nr_pages,
-				   bool snapshot)
+static void *arm_spe_pmu_setup_aux(struct perf_event *event, void **pages,
+				   int nr_pages, bool snapshot)
 {
-	int i;
+	int i, cpu = event->cpu;
 	struct page **pglist;
 	struct arm_spe_pmu_buf *buf;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1d5c551a5add..3e49b2144808 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -409,7 +409,7 @@ struct pmu {
 	/*
 	 * Set up pmu-private data structures for an AUX area
 	 */
-	void *(*setup_aux)		(int cpu, void **pages,
+	void *(*setup_aux)		(struct perf_event *event, void **pages,
 					 int nr_pages, bool overwrite);
 					/* optional */
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 4a9937076331..857308295f63 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -658,7 +658,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 			goto out;
 	}
 
-	rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
+	rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages,
 					     overwrite);
 	if (!rb->aux_priv)
 		goto out;
-- 
cgit v1.2.3


From 988036f9d322cbd787d8f6a776dbe903d05bae22 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Tue, 5 Feb 2019 16:24:57 -0700
Subject: coresight: perf: Add "sinks" group to PMU directory

Add a "sinks" directory entry so that users can see all the sinks
available in the system in a single place.  Individual sink are added
as they are registered with the coresight bus.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etm-perf.c | 82 ++++++++++++++++++++++++
 drivers/hwtracing/coresight/coresight-etm-perf.h |  6 +-
 drivers/hwtracing/coresight/coresight.c          | 18 ++++++
 include/linux/coresight.h                        |  7 +-
 4 files changed, 110 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index f21eb28b6782..cdbdb28dc175 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -14,6 +14,7 @@
 #include <linux/perf_event.h>
 #include <linux/percpu-defs.h>
 #include <linux/slab.h>
+#include <linux/stringhash.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
@@ -43,8 +44,18 @@ static const struct attribute_group etm_pmu_format_group = {
 	.attrs  = etm_config_formats_attr,
 };
 
+static struct attribute *etm_config_sinks_attr[] = {
+	NULL,
+};
+
+static const struct attribute_group etm_pmu_sinks_group = {
+	.name   = "sinks",
+	.attrs  = etm_config_sinks_attr,
+};
+
 static const struct attribute_group *etm_pmu_attr_groups[] = {
 	&etm_pmu_format_group,
+	&etm_pmu_sinks_group,
 	NULL,
 };
 
@@ -479,6 +490,77 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link)
 	return 0;
 }
 
+static ssize_t etm_perf_sink_name_show(struct device *dev,
+				       struct device_attribute *dattr,
+				       char *buf)
+{
+	struct dev_ext_attribute *ea;
+
+	ea = container_of(dattr, struct dev_ext_attribute, attr);
+	return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var));
+}
+
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{
+	int ret;
+	unsigned long hash;
+	const char *name;
+	struct device *pmu_dev = etm_pmu.dev;
+	struct device *pdev = csdev->dev.parent;
+	struct dev_ext_attribute *ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return -EINVAL;
+
+	if (csdev->ea != NULL)
+		return -EINVAL;
+
+	if (!etm_perf_up)
+		return -EPROBE_DEFER;
+
+	ea = devm_kzalloc(pdev, sizeof(*ea), GFP_KERNEL);
+	if (!ea)
+		return -ENOMEM;
+
+	name = dev_name(pdev);
+	/* See function coresight_get_sink_by_id() to know where this is used */
+	hash = hashlen_hash(hashlen_string(NULL, name));
+
+	ea->attr.attr.name = devm_kstrdup(pdev, name, GFP_KERNEL);
+	if (!ea->attr.attr.name)
+		return -ENOMEM;
+
+	ea->attr.attr.mode = 0444;
+	ea->attr.show = etm_perf_sink_name_show;
+	ea->var = (unsigned long *)hash;
+
+	ret = sysfs_add_file_to_group(&pmu_dev->kobj,
+				      &ea->attr.attr, "sinks");
+
+	if (!ret)
+		csdev->ea = ea;
+
+	return ret;
+}
+
+void etm_perf_del_symlink_sink(struct coresight_device *csdev)
+{
+	struct device *pmu_dev = etm_pmu.dev;
+	struct dev_ext_attribute *ea = csdev->ea;
+
+	if (csdev->type != CORESIGHT_DEV_TYPE_SINK &&
+	    csdev->type != CORESIGHT_DEV_TYPE_LINKSINK)
+		return;
+
+	if (!ea)
+		return;
+
+	sysfs_remove_file_from_group(&pmu_dev->kobj,
+				     &ea->attr.attr, "sinks");
+	csdev->ea = NULL;
+}
+
 static int __init etm_perf_init(void)
 {
 	int ret;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index da7d9336a15c..015213abe00a 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -59,6 +59,8 @@ struct etm_event_data {
 
 #ifdef CONFIG_CORESIGHT
 int etm_perf_symlink(struct coresight_device *csdev, bool link);
+int etm_perf_add_symlink_sink(struct coresight_device *csdev);
+void etm_perf_del_symlink_sink(struct coresight_device *csdev);
 static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 {
 	struct etm_event_data *data = perf_get_aux(handle);
@@ -70,7 +72,9 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 #else
 static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
 { return -EINVAL; }
-
+int etm_perf_add_symlink_sink(struct coresight_device *csdev)
+{ return -EINVAL; }
+void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
 static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
 {
 	return NULL;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 2b0df1a0a8df..d7fa90be6f42 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 
 static DEFINE_MUTEX(coresight_mutex);
@@ -1167,6 +1168,22 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 		goto err_out;
 	}
 
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+	    csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		ret = etm_perf_add_symlink_sink(csdev);
+
+		if (ret) {
+			device_unregister(&csdev->dev);
+			/*
+			 * As with the above, all resources are free'd
+			 * explicitly via coresight_device_release() triggered
+			 * from put_device(), which is in turn called from
+			 * function device_unregister().
+			 */
+			goto err_out;
+		}
+	}
+
 	mutex_lock(&coresight_mutex);
 
 	coresight_fixup_device_conns(csdev);
@@ -1185,6 +1202,7 @@ EXPORT_SYMBOL_GPL(coresight_register);
 
 void coresight_unregister(struct coresight_device *csdev)
 {
+	etm_perf_del_symlink_sink(csdev);
 	/* Remove references of that device in the topology */
 	coresight_remove_conns(csdev);
 	device_unregister(&csdev->dev);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 46c67a764877..7b87965f7a65 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -154,8 +154,9 @@ struct coresight_connection {
  * @orphan:	true if the component has connections that haven't been linked.
  * @enable:	'true' if component is currently part of an active path.
  * @activated:	'true' only if a _sink_ has been activated.  A sink can be
-		activated but not yet enabled.  Enabling for a _sink_
-		happens when a source has been selected for that it.
+ *		activated but not yet enabled.  Enabling for a _sink_
+ *		appens when a source has been selected for that it.
+ * @ea:		Device attribute for sink representation under PMU directory.
  */
 struct coresight_device {
 	struct coresight_connection *conns;
@@ -168,7 +169,9 @@ struct coresight_device {
 	atomic_t *refcnt;
 	bool orphan;
 	bool enable;	/* true only if configured as part of a path */
+	/* sink specific fields */
 	bool activated;	/* true only if a sink is part of a path */
+	struct dev_ext_attribute *ea;
 };
 
 #define to_coresight_device(d) container_of(d, struct coresight_device, dev)
-- 
cgit v1.2.3


From 2c6f4fc884a46b17c501e7f276e8a4ab97437b50 Mon Sep 17 00:00:00 2001
From: David Engraf <david.engraf@sysgo.com>
Date: Tue, 5 Feb 2019 13:19:52 +0100
Subject: device: Fix comment for driver_data in struct device

dev_set_drvdata/dev_get_drvdata is used to access driver_data
in struct device.

Signed-off-by: David Engraf <david.engraf@sysgo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index a36830e2d0e5..292b720c4bc2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -988,7 +988,7 @@ struct device {
 	void		*platform_data;	/* Platform specific data, device
 					   core doesn't touch it */
 	void		*driver_data;	/* Driver data, set and get with
-					   dev_set/get_drvdata */
+					   dev_set_drvdata/dev_get_drvdata */
 	struct dev_links_info	links;
 	struct dev_pm_info	power;
 	struct dev_pm_domain	*pm_domain;
-- 
cgit v1.2.3


From 64e21add8cd9717f042b523f35ea831eab14261b Mon Sep 17 00:00:00 2001
From: Claudiu Beznea <claudiu.beznea@microchip.com>
Date: Thu, 7 Feb 2019 09:25:05 +0000
Subject: pinctrl: at91: add slewrate support for SAM9X60

Add slew rate support for SAM9X60 pin controller.

Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Acked-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/pinctrl-at91.c     | 48 ++++++++++++++++++++++++++++++++++++++
 drivers/pinctrl/pinctrl-at91.h     |  1 +
 include/dt-bindings/pinctrl/at91.h |  4 ++++
 3 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 128003597f43..cb7c432769a5 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -59,6 +59,9 @@ static int gpio_banks;
 #define OUTPUT		(1 << 7)
 #define OUTPUT_VAL_SHIFT	8
 #define OUTPUT_VAL	(0x1 << OUTPUT_VAL_SHIFT)
+#define SLEWRATE_SHIFT	9
+#define SLEWRATE_MASK	0x1
+#define SLEWRATE	(SLEWRATE_MASK << SLEWRATE_SHIFT)
 #define DEBOUNCE	(1 << 16)
 #define DEBOUNCE_VAL_SHIFT	17
 #define DEBOUNCE_VAL	(0x3fff << DEBOUNCE_VAL_SHIFT)
@@ -82,6 +85,13 @@ enum drive_strength_bit {
 #define DRIVE_STRENGTH_BIT_MSK(name)	(DRIVE_STRENGTH_BIT_##name << \
 					 DRIVE_STRENGTH_SHIFT)
 
+enum slewrate_bit {
+	SLEWRATE_BIT_DIS,
+	SLEWRATE_BIT_ENA,
+};
+
+#define SLEWRATE_BIT_MSK(name)		(SLEWRATE_BIT_##name << SLEWRATE_SHIFT)
+
 /**
  * struct at91_pmx_func - describes AT91 pinmux functions
  * @name: the name of this specific function
@@ -171,6 +181,8 @@ struct at91_pinctrl_mux_ops {
 	unsigned (*get_drivestrength)(void __iomem *pio, unsigned pin);
 	void (*set_drivestrength)(void __iomem *pio, unsigned pin,
 					u32 strength);
+	unsigned (*get_slewrate)(void __iomem *pio, unsigned pin);
+	void (*set_slewrate)(void __iomem *pio, unsigned pin, u32 slewrate);
 	/* irq */
 	int (*irq_type)(struct irq_data *d, unsigned type);
 };
@@ -585,6 +597,16 @@ static unsigned at91_mux_sam9x60_get_drivestrength(void __iomem *pio,
 	return DRIVE_STRENGTH_BIT_LOW;
 }
 
+static unsigned at91_mux_sam9x60_get_slewrate(void __iomem *pio, unsigned pin)
+{
+	unsigned tmp = readl_relaxed(pio + SAM9X60_PIO_SLEWR);
+
+	if ((tmp & BIT(pin)))
+		return SLEWRATE_BIT_ENA;
+
+	return SLEWRATE_BIT_DIS;
+}
+
 static void set_drive_strength(void __iomem *reg, unsigned pin, u32 strength)
 {
 	unsigned tmp = readl_relaxed(reg);
@@ -643,6 +665,24 @@ static void at91_mux_sam9x60_set_drivestrength(void __iomem *pio, unsigned pin,
 	writel_relaxed(tmp, pio + SAM9X60_PIO_DRIVER1);
 }
 
+static void at91_mux_sam9x60_set_slewrate(void __iomem *pio, unsigned pin,
+					  u32 setting)
+{
+	unsigned int tmp;
+
+	if (setting < SLEWRATE_BIT_DIS || setting > SLEWRATE_BIT_ENA)
+		return;
+
+	tmp = readl_relaxed(pio + SAM9X60_PIO_SLEWR);
+
+	if (setting == SLEWRATE_BIT_DIS)
+		tmp &= ~BIT(pin);
+	else
+		tmp |= BIT(pin);
+
+	writel_relaxed(tmp, pio + SAM9X60_PIO_SLEWR);
+}
+
 static struct at91_pinctrl_mux_ops at91rm9200_ops = {
 	.get_periph	= at91_mux_get_periph,
 	.mux_A_periph	= at91_mux_set_A_periph,
@@ -687,6 +727,8 @@ static const struct at91_pinctrl_mux_ops sam9x60_ops = {
 	.disable_schmitt_trig = at91_mux_pio3_disable_schmitt_trig,
 	.get_drivestrength = at91_mux_sam9x60_get_drivestrength,
 	.set_drivestrength = at91_mux_sam9x60_set_drivestrength,
+	.get_slewrate   = at91_mux_sam9x60_get_slewrate,
+	.set_slewrate   = at91_mux_sam9x60_set_slewrate,
 	.irq_type	= alt_gpio_irq_type,
 
 };
@@ -950,6 +992,8 @@ static int at91_pinconf_get(struct pinctrl_dev *pctldev,
 	if (info->ops->get_drivestrength)
 		*config |= (info->ops->get_drivestrength(pio, pin)
 				<< DRIVE_STRENGTH_SHIFT);
+	if (info->ops->get_slewrate)
+		*config |= (info->ops->get_slewrate(pio, pin) << SLEWRATE_SHIFT);
 	if (at91_mux_get_output(pio, pin, &out))
 		*config |= OUTPUT | (out << OUTPUT_VAL_SHIFT);
 
@@ -1001,6 +1045,9 @@ static int at91_pinconf_set(struct pinctrl_dev *pctldev,
 			info->ops->set_drivestrength(pio, pin,
 				(config & DRIVE_STRENGTH)
 					>> DRIVE_STRENGTH_SHIFT);
+		if (info->ops->set_slewrate)
+			info->ops->set_slewrate(pio, pin,
+				(config & SLEWRATE) >> SLEWRATE_SHIFT);
 
 	} /* for each config */
 
@@ -1044,6 +1091,7 @@ static void at91_pinconf_dbg_show(struct pinctrl_dev *pctldev,
 			     DRIVE_STRENGTH_MED);
 	DBG_SHOW_FLAG_MASKED(DRIVE_STRENGTH, DRIVE_STRENGTH_BIT_MSK(HI),
 			     DRIVE_STRENGTH_HI);
+	DBG_SHOW_FLAG(SLEWRATE);
 	DBG_SHOW_FLAG(DEBOUNCE);
 	if (config & DEBOUNCE) {
 		val = config >> DEBOUNCE_VAL_SHIFT;
diff --git a/drivers/pinctrl/pinctrl-at91.h b/drivers/pinctrl/pinctrl-at91.h
index 19fc27e66bfd..223620f14b05 100644
--- a/drivers/pinctrl/pinctrl-at91.h
+++ b/drivers/pinctrl/pinctrl-at91.h
@@ -69,6 +69,7 @@
 #define AT91SAM9X5_PIO_DRIVER1	0x114  /*PIO Driver 1 register offset*/
 #define AT91SAM9X5_PIO_DRIVER2	0x118  /*PIO Driver 2 register offset*/
 
+#define SAM9X60_PIO_SLEWR	0x110	/* PIO Slew Rate Control Register */
 #define SAM9X60_PIO_DRIVER1	0x118	/* PIO Driver 1 register offset */
 
 #endif
diff --git a/include/dt-bindings/pinctrl/at91.h b/include/dt-bindings/pinctrl/at91.h
index eb81867eac77..8dc10e00c627 100644
--- a/include/dt-bindings/pinctrl/at91.h
+++ b/include/dt-bindings/pinctrl/at91.h
@@ -17,6 +17,7 @@
 #define AT91_PINCTRL_DIS_SCHMIT		(1 << 4)
 #define AT91_PINCTRL_OUTPUT		(1 << 7)
 #define AT91_PINCTRL_OUTPUT_VAL(x)	((x & 0x1) << 8)
+#define AT91_PINCTRL_SLEWRATE		(1 << 9)
 #define AT91_PINCTRL_DEBOUNCE		(1 << 16)
 #define AT91_PINCTRL_DEBOUNCE_VAL(x)	(x << 17)
 
@@ -27,6 +28,9 @@
 #define AT91_PINCTRL_DRIVE_STRENGTH_MED			(0x2 << 5)
 #define AT91_PINCTRL_DRIVE_STRENGTH_HI			(0x3 << 5)
 
+#define AT91_PINCTRL_SLEWRATE_DIS	(0x0 << 9)
+#define AT91_PINCTRL_SLEWRATE_ENA	(0x1 << 9)
+
 #define AT91_PIOA	0
 #define AT91_PIOB	1
 #define AT91_PIOC	2
-- 
cgit v1.2.3


From 0f3b07f027f87a38ebe5c436490095df762819be Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 7 Feb 2019 21:44:41 +0100
Subject: cfg80211: add and use strongly typed element iteration macros

Rather than always iterating elements from frames with pure
u8 pointers, add a type "struct element" that encapsulates
the id/datalen/data format of them.

Then, add the element iteration macros
 * for_each_element
 * for_each_element_id
 * for_each_element_extid

which take, as their first 'argument', such a structure and
iterate through a given u8 array interpreting it as elements.

While at it and since we'll need it, also add
 * for_each_subelement
 * for_each_subelement_id
 * for_each_subelement_extid

which instead of taking data/length just take an outer element
and use its data/datalen.

Also add for_each_element_completed() to determine if any of
the loops above completed, i.e. it was able to parse all of
the elements successfully and no data remained.

Use for_each_element_id() in cfg80211_find_ie_match() as the
first user of this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/scan.c       | 14 ++++++-------
 2 files changed, 59 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3b04e72315e1..4e3a4e293348 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3243,4 +3243,57 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
 	return true;
 }
 
+struct element {
+	u8 id;
+	u8 datalen;
+	u8 data[];
+};
+
+/* element iteration helpers */
+#define for_each_element(element, _data, _datalen)			\
+	for (element = (void *)(_data);					\
+	     (u8 *)(_data) + (_datalen) - (u8 *)element >=		\
+		sizeof(*element) &&					\
+	     (u8 *)(_data) + (_datalen) - (u8 *)element >=		\
+		sizeof(*element) + element->datalen;			\
+	     element = (void *)(element->data + element->datalen))
+
+#define for_each_element_id(element, _id, data, datalen)		\
+	for_each_element(element, data, datalen)			\
+		if (element->id == (_id))
+
+#define for_each_element_extid(element, extid, data, datalen)		\
+	for_each_element(element, data, datalen)			\
+		if (element->id == WLAN_EID_EXTENSION &&		\
+		    element->datalen > 0 &&				\
+		    element->data[0] == (extid))
+
+#define for_each_subelement(sub, element)				\
+	for_each_element(sub, (element)->data, (element)->datalen)
+
+#define for_each_subelement_id(sub, id, element)			\
+	for_each_element_id(sub, id, (element)->data, (element)->datalen)
+
+#define for_each_subelement_extid(sub, extid, element)			\
+	for_each_element_extid(sub, extid, (element)->data, (element)->datalen)
+
+/**
+ * for_each_element_completed - determine if element parsing consumed all data
+ * @element: element pointer after for_each_element() or friends
+ * @data: same data pointer as passed to for_each_element() or friends
+ * @datalen: same data length as passed to for_each_element() or friends
+ *
+ * This function returns %true if all the data was parsed or considered
+ * while walking the elements. Only use this if your for_each_element()
+ * loop cannot be broken out of, otherwise it always returns %false.
+ *
+ * If some data was malformed, this returns %false since the last parsed
+ * element will not fill the whole remaining data.
+ */
+static inline bool for_each_element_completed(const struct element *element,
+					      const void *data, size_t datalen)
+{
+	return (u8 *)element == (u8 *)data + datalen;
+}
+
 #endif /* LINUX_IEEE80211_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 5123667f4569..c7f64bb9c581 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -484,6 +484,8 @@ const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
 				 const u8 *match, int match_len,
 				 int match_offset)
 {
+	const struct element *elem;
+
 	/* match_offset can't be smaller than 2, unless match_len is
 	 * zero, in which case match_offset must be zero as well.
 	 */
@@ -491,14 +493,10 @@ const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
 		    (!match_len && match_offset)))
 		return NULL;
 
-	while (len >= 2 && len >= ies[1] + 2) {
-		if ((ies[0] == eid) &&
-		    (ies[1] + 2 >= match_offset + match_len) &&
-		    !memcmp(ies + match_offset, match, match_len))
-			return ies;
-
-		len -= ies[1] + 2;
-		ies += ies[1] + 2;
+	for_each_element_id(elem, eid, ies, len) {
+		if (elem->datalen >= match_offset - 2 + match_len &&
+		    !memcmp(elem->data + match_offset - 2, match, match_len))
+			return (void *)elem;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 49a68e0d88890060a2b9b6c6ad1ec53eb50abccf Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 7 Feb 2019 23:26:38 +0100
Subject: cfg80211: add various struct element finding helpers

We currently have a number of helpers to find elements that just
return a u8 *, change those to return a struct element and add
inlines to deal with the u8 * compatibility.

Note that the match behaviour is changed to start the natch at
the data, so conversion from _ie_match to _elem_match need to
be done carefully.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 135 ++++++++++++++++++++++++++++++++++++++++++++++---
 net/wireless/scan.c    |  39 +++++++-------
 net/wireless/util.c    |   6 +--
 3 files changed, 148 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7f2739a90bdb..c077de7fbd8a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2062,16 +2062,30 @@ struct cfg80211_bss {
 	u8 priv[0] __aligned(sizeof(void *));
 };
 
+/**
+ * ieee80211_bss_get_elem - find element with given ID
+ * @bss: the bss to search
+ * @id: the element ID
+ *
+ * Note that the return value is an RCU-protected pointer, so
+ * rcu_read_lock() must be held when calling this function.
+ * Return: %NULL if not found.
+ */
+const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id);
+
 /**
  * ieee80211_bss_get_ie - find IE with given ID
  * @bss: the bss to search
- * @ie: the IE ID
+ * @id: the element ID
  *
  * Note that the return value is an RCU-protected pointer, so
  * rcu_read_lock() must be held when calling this function.
  * Return: %NULL if not found.
  */
-const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie);
+static inline const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 id)
+{
+	return (void *)ieee80211_bss_get_elem(bss, id);
+}
 
 
 /**
@@ -4999,6 +5013,33 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
 unsigned int cfg80211_classify8021d(struct sk_buff *skb,
 				    struct cfg80211_qos_map *qos_map);
 
+/**
+ * cfg80211_find_elem_match - match information element and byte array in data
+ *
+ * @eid: element ID
+ * @ies: data consisting of IEs
+ * @len: length of data
+ * @match: byte array to match
+ * @match_len: number of bytes in the match array
+ * @match_offset: offset in the IE data where the byte array should match.
+ *	Note the difference to cfg80211_find_ie_match() which considers
+ *	the offset to start from the element ID byte, but here we take
+ *	the data portion instead.
+ *
+ * Return: %NULL if the element ID could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data) or if the byte array doesn't match; otherwise return the
+ * requested element struct.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data and being large enough for the
+ * byte array to match.
+ */
+const struct element *
+cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len,
+			 const u8 *match, unsigned int match_len,
+			 unsigned int match_offset);
+
 /**
  * cfg80211_find_ie_match - match information element and byte array in data
  *
@@ -5023,9 +5064,44 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
  * having to fit into the given data and being large enough for the
  * byte array to match.
  */
-const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
-				 const u8 *match, int match_len,
-				 int match_offset);
+static inline const u8 *
+cfg80211_find_ie_match(u8 eid, const u8 *ies, unsigned int len,
+		       const u8 *match, unsigned int match_len,
+		       unsigned int match_offset)
+{
+	/* match_offset can't be smaller than 2, unless match_len is
+	 * zero, in which case match_offset must be zero as well.
+	 */
+	if (WARN_ON((match_len && match_offset < 2) ||
+		    (!match_len && match_offset)))
+		return NULL;
+
+	return (void *)cfg80211_find_elem_match(eid, ies, len,
+						match, match_len,
+						match_offset ?
+							match_offset - 2 : 0);
+}
+
+/**
+ * cfg80211_find_elem - find information element in data
+ *
+ * @eid: element ID
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * Return: %NULL if the element ID could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data) or if the byte array doesn't match; otherwise return the
+ * requested element struct.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data.
+ */
+static inline const struct element *
+cfg80211_find_elem(u8 eid, const u8 *ies, int len)
+{
+	return cfg80211_find_elem_match(eid, ies, len, NULL, 0, 0);
+}
 
 /**
  * cfg80211_find_ie - find information element in data
@@ -5047,6 +5123,28 @@ static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
 	return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0);
 }
 
+/**
+ * cfg80211_find_ext_elem - find information element with EID Extension in data
+ *
+ * @ext_eid: element ID Extension
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * Return: %NULL if the etended element could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data) or if the byte array doesn't match; otherwise return the
+ * requested element struct.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data.
+ */
+static inline const struct element *
+cfg80211_find_ext_elem(u8 ext_eid, const u8 *ies, int len)
+{
+	return cfg80211_find_elem_match(WLAN_EID_EXTENSION, ies, len,
+					&ext_eid, 1, 0);
+}
+
 /**
  * cfg80211_find_ext_ie - find information element with EID Extension in data
  *
@@ -5068,6 +5166,25 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
 				      &ext_eid, 1, 2);
 }
 
+/**
+ * cfg80211_find_vendor_elem - find vendor specific information element in data
+ *
+ * @oui: vendor OUI
+ * @oui_type: vendor-specific OUI type (must be < 0xff), negative means any
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * Return: %NULL if the vendor specific element ID could not be found or if the
+ * element is invalid (claims to be longer than the given data); otherwise
+ * return the element structure for the requested element.
+ *
+ * Note: There are no checks on the element length other than having to fit into
+ * the given data.
+ */
+const struct element *cfg80211_find_vendor_elem(unsigned int oui, int oui_type,
+						const u8 *ies,
+						unsigned int len);
+
 /**
  * cfg80211_find_vendor_ie - find vendor specific information element in data
  *
@@ -5084,8 +5201,12 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len)
  * Note: There are no checks on the element length other than having to fit into
  * the given data.
  */
-const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
-				  const u8 *ies, int len);
+static inline const u8 *
+cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
+			const u8 *ies, unsigned int len)
+{
+	return (void *)cfg80211_find_vendor_elem(oui, oui_type, ies, len);
+}
 
 /**
  * cfg80211_send_layer2_update - send layer 2 update frame
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index c7f64bb9c581..d2c9ca5f4f57 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -480,48 +480,43 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev)
 	__cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
 }
 
-const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
-				 const u8 *match, int match_len,
-				 int match_offset)
+const struct element *
+cfg80211_find_elem_match(u8 eid, const u8 *ies, unsigned int len,
+			 const u8 *match, unsigned int match_len,
+			 unsigned int match_offset)
 {
 	const struct element *elem;
 
-	/* match_offset can't be smaller than 2, unless match_len is
-	 * zero, in which case match_offset must be zero as well.
-	 */
-	if (WARN_ON((match_len && match_offset < 2) ||
-		    (!match_len && match_offset)))
-		return NULL;
-
 	for_each_element_id(elem, eid, ies, len) {
-		if (elem->datalen >= match_offset - 2 + match_len &&
-		    !memcmp(elem->data + match_offset - 2, match, match_len))
-			return (void *)elem;
+		if (elem->datalen >= match_offset + match_len &&
+		    !memcmp(elem->data + match_offset, match, match_len))
+			return elem;
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(cfg80211_find_ie_match);
+EXPORT_SYMBOL(cfg80211_find_elem_match);
 
-const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
-				  const u8 *ies, int len)
+const struct element *cfg80211_find_vendor_elem(unsigned int oui, int oui_type,
+						const u8 *ies,
+						unsigned int len)
 {
-	const u8 *ie;
+	const struct element *elem;
 	u8 match[] = { oui >> 16, oui >> 8, oui, oui_type };
 	int match_len = (oui_type < 0) ? 3 : sizeof(match);
 
 	if (WARN_ON(oui_type > 0xff))
 		return NULL;
 
-	ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len,
-				    match, match_len, 2);
+	elem = cfg80211_find_elem_match(WLAN_EID_VENDOR_SPECIFIC, ies, len,
+					match, match_len, 0);
 
-	if (ie && (ie[1] < 4))
+	if (!elem || elem->datalen < 4)
 		return NULL;
 
-	return ie;
+	return elem;
 }
-EXPORT_SYMBOL(cfg80211_find_vendor_ie);
+EXPORT_SYMBOL(cfg80211_find_vendor_elem);
 
 static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
 		   const u8 *ssid, size_t ssid_len)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index cd48cdd582c0..61fa33d0019e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -776,7 +776,7 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(cfg80211_classify8021d);
 
-const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
+const struct element *ieee80211_bss_get_elem(struct cfg80211_bss *bss, u8 id)
 {
 	const struct cfg80211_bss_ies *ies;
 
@@ -784,9 +784,9 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
 	if (!ies)
 		return NULL;
 
-	return cfg80211_find_ie(ie, ies->data, ies->len);
+	return cfg80211_find_elem(id, ies->data, ies->len);
 }
-EXPORT_SYMBOL(ieee80211_bss_get_ie);
+EXPORT_SYMBOL(ieee80211_bss_get_elem);
 
 void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
 {
-- 
cgit v1.2.3


From 7011ba583f425780ab1ee52f41161a0171197365 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Mon, 21 Jan 2019 12:25:59 +0200
Subject: cfg80211: Move Multiple BSS info to struct cfg80211_bss to be visible

Previously the transmitted BSS and the non-trasmitted BSS list were
defined in struct cfg80211_internal_bss. Move them to struct cfg80211_bss
since mac80211 needs this info.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  4 ++-
 net/wireless/core.h    | 10 +++----
 net/wireless/scan.c    | 80 ++++++++++++++++++++++++--------------------------
 3 files changed, 46 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c077de7fbd8a..86de6b62a44b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6,7 +6,7 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014 Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2049,6 +2049,8 @@ struct cfg80211_bss {
 	const struct cfg80211_bss_ies __rcu *proberesp_ies;
 
 	struct cfg80211_bss *hidden_beacon_bss;
+	struct cfg80211_bss *transmitted_bss;
+	struct list_head nontrans_list;
 
 	s32 signal;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index c20c75df60f5..f257a2b32ba4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -152,8 +152,6 @@ extern int cfg80211_rdev_list_generation;
 struct cfg80211_internal_bss {
 	struct list_head list;
 	struct list_head hidden_list;
-	struct list_head nontrans_list;
-	struct cfg80211_bss *transmitted_bss;
 	struct rb_node rbn;
 	u64 ts_boottime;
 	unsigned long ts;
@@ -184,8 +182,8 @@ static inline struct cfg80211_internal_bss *bss_from_pub(struct cfg80211_bss *pu
 static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss)
 {
 	atomic_inc(&bss->hold);
-	if (bss->transmitted_bss) {
-		bss = container_of(bss->transmitted_bss,
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
 				   struct cfg80211_internal_bss, pub);
 		atomic_inc(&bss->hold);
 	}
@@ -195,8 +193,8 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss)
 {
 	int r = atomic_dec_return(&bss->hold);
 	WARN_ON(r < 0);
-	if (bss->transmitted_bss) {
-		bss = container_of(bss->transmitted_bss,
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
 				   struct cfg80211_internal_bss, pub);
 		r = atomic_dec_return(&bss->hold);
 		WARN_ON(r < 0);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index d5950a23e619..325323a842d8 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -110,8 +110,8 @@ static inline void bss_ref_get(struct cfg80211_registered_device *rdev,
 				   pub);
 		bss->refcount++;
 	}
-	if (bss->transmitted_bss) {
-		bss = container_of(bss->transmitted_bss,
+	if (bss->pub.transmitted_bss) {
+		bss = container_of(bss->pub.transmitted_bss,
 				   struct cfg80211_internal_bss,
 				   pub);
 		bss->refcount++;
@@ -133,10 +133,10 @@ static inline void bss_ref_put(struct cfg80211_registered_device *rdev,
 			bss_free(hbss);
 	}
 
-	if (bss->transmitted_bss) {
+	if (bss->pub.transmitted_bss) {
 		struct cfg80211_internal_bss *tbss;
 
-		tbss = container_of(bss->transmitted_bss,
+		tbss = container_of(bss->pub.transmitted_bss,
 				    struct cfg80211_internal_bss,
 				    pub);
 		tbss->refcount--;
@@ -169,7 +169,7 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
 	}
 
 	list_del_init(&bss->list);
-	list_del_init(&bss->nontrans_list);
+	list_del_init(&bss->pub.nontrans_list);
 	rb_erase(&bss->rbn, &rdev->bss_tree);
 	rdev->bss_entries--;
 	WARN_ONCE((rdev->bss_entries == 0) ^ list_empty(&rdev->bss_list),
@@ -317,15 +317,15 @@ static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
 }
 
 static int
-cfg80211_add_nontrans_list(struct cfg80211_internal_bss *trans_bss,
-			   struct cfg80211_internal_bss *nontrans_bss)
+cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
+			   struct cfg80211_bss *nontrans_bss)
 {
 	const u8 *ssid;
 	size_t ssid_len;
-	struct cfg80211_internal_bss *bss = NULL;
+	struct cfg80211_bss *bss = NULL;
 
 	rcu_read_lock();
-	ssid = ieee80211_bss_get_ie(&nontrans_bss->pub, WLAN_EID_SSID);
+	ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
 	if (!ssid) {
 		rcu_read_unlock();
 		return -EINVAL;
@@ -336,7 +336,7 @@ cfg80211_add_nontrans_list(struct cfg80211_internal_bss *trans_bss,
 
 	/* check if nontrans_bss is in the list */
 	list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) {
-		if (is_bss(&bss->pub, nontrans_bss->pub.bssid, ssid, ssid_len))
+		if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len))
 			return 0;
 	}
 
@@ -1166,7 +1166,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		memcpy(new, tmp, sizeof(*new));
 		new->refcount = 1;
 		INIT_LIST_HEAD(&new->hidden_list);
-		INIT_LIST_HEAD(&new->nontrans_list);
+		INIT_LIST_HEAD(&new->pub.nontrans_list);
 
 		if (rcu_access_pointer(tmp->pub.proberesp_ies)) {
 			hidden = rb_find_bss(rdev, tmp, BSS_CMP_HIDE_ZLEN);
@@ -1207,7 +1207,7 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 					     struct cfg80211_internal_bss,
 					     pub);
 
-			new->transmitted_bss = trans_bss;
+			new->pub.transmitted_bss = trans_bss;
 			bss_ref_get(rdev, pbss);
 		}
 
@@ -1311,7 +1311,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_bss_ies *ies;
 	struct ieee80211_channel *channel;
-	struct cfg80211_internal_bss tmp = {}, *res, *trans_internal;
+	struct cfg80211_internal_bss tmp = {}, *res;
 	int bss_type;
 	bool signal_valid;
 
@@ -1385,10 +1385,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 		/* this is a nontransmitting bss, we need to add it to
 		 * transmitting bss' list if it is not there
 		 */
-		trans_internal = container_of(trans_bss,
-					      struct cfg80211_internal_bss,
-					      pub);
-		if (cfg80211_add_nontrans_list(trans_internal, res)) {
+		if (cfg80211_add_nontrans_list(trans_bss, &res->pub)) {
 			if (__cfg80211_unlink_bss(rdev, res))
 				rdev->bss_generation++;
 		}
@@ -1523,7 +1520,7 @@ cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
 
 static void
 cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
-				   struct cfg80211_internal_bss *nontrans_bss,
+				   struct cfg80211_bss *nontrans_bss,
 				   struct ieee80211_mgmt *mgmt, size_t len,
 				   gfp_t gfp)
 {
@@ -1548,7 +1545,7 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
 		return;
 	new_ie_len -= mbssid[1];
 	rcu_read_lock();
-	nontrans_ssid = ieee80211_bss_get_ie(&nontrans_bss->pub, WLAN_EID_SSID);
+	nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
 	if (!nontrans_ssid) {
 		rcu_read_unlock();
 		return;
@@ -1589,15 +1586,15 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
 	new_ies->from_beacon = ieee80211_is_beacon(mgmt->frame_control);
 	memcpy(new_ies->data, new_ie, new_ie_len);
 	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
-		old = rcu_access_pointer(nontrans_bss->pub.proberesp_ies);
-		rcu_assign_pointer(nontrans_bss->pub.proberesp_ies, new_ies);
-		rcu_assign_pointer(nontrans_bss->pub.ies, new_ies);
+		old = rcu_access_pointer(nontrans_bss->proberesp_ies);
+		rcu_assign_pointer(nontrans_bss->proberesp_ies, new_ies);
+		rcu_assign_pointer(nontrans_bss->ies, new_ies);
 		if (old)
 			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
 	} else {
-		old = rcu_access_pointer(nontrans_bss->pub.beacon_ies);
-		rcu_assign_pointer(nontrans_bss->pub.beacon_ies, new_ies);
-		rcu_assign_pointer(nontrans_bss->pub.ies, new_ies);
+		old = rcu_access_pointer(nontrans_bss->beacon_ies);
+		rcu_assign_pointer(nontrans_bss->beacon_ies, new_ies);
+		rcu_assign_pointer(nontrans_bss->ies, new_ies);
 		if (old)
 			kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
 	}
@@ -1696,8 +1693,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 			       struct ieee80211_mgmt *mgmt, size_t len,
 			       gfp_t gfp)
 {
-	struct cfg80211_bss *res;
-	struct cfg80211_internal_bss *trans_bss, *tmp_bss;
+	struct cfg80211_bss *res, *tmp_bss;
 	const u8 *ie = mgmt->u.probe_resp.variable;
 	const struct cfg80211_bss_ies *ies1, *ies2;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
@@ -1715,17 +1711,14 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 	 * in MBSSID IE
 	 */
 	ies1 = rcu_access_pointer(res->ies);
-	trans_bss = container_of(res, struct cfg80211_internal_bss, pub);
-	if (!trans_bss)
-		return res;
 
 	/* go through nontrans_list, if the timestamp of the BSS is
 	 * earlier than the timestamp of the transmitting BSS then
 	 * update it
 	 */
-	list_for_each_entry(tmp_bss, &trans_bss->nontrans_list,
+	list_for_each_entry(tmp_bss, &res->nontrans_list,
 			    nontrans_list) {
-		ies2 = rcu_access_pointer(tmp_bss->pub.ies);
+		ies2 = rcu_access_pointer(tmp_bss->ies);
 		if (ies2->tsf < ies1->tsf)
 			cfg80211_update_notlisted_nontrans(wiphy, tmp_bss,
 							   mgmt, len, gfp);
@@ -1770,7 +1763,8 @@ EXPORT_SYMBOL(cfg80211_put_bss);
 void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
-	struct cfg80211_internal_bss *bss, *nontrans_bss, *tmp;
+	struct cfg80211_internal_bss *bss, *tmp1;
+	struct cfg80211_bss *nontrans_bss, *tmp;
 
 	if (WARN_ON(!pub))
 		return;
@@ -1778,17 +1772,21 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 	bss = container_of(pub, struct cfg80211_internal_bss, pub);
 
 	spin_lock_bh(&rdev->bss_lock);
-	if (!list_empty(&bss->list)) {
-		list_for_each_entry_safe(nontrans_bss, tmp,
-					 &bss->nontrans_list,
-					 nontrans_list) {
-			if (__cfg80211_unlink_bss(rdev, nontrans_bss))
-				rdev->bss_generation++;
-		}
+	if (list_empty(&bss->list))
+		goto out;
 
-		if (__cfg80211_unlink_bss(rdev, bss))
+	list_for_each_entry_safe(nontrans_bss, tmp,
+				 &pub->nontrans_list,
+				 nontrans_list) {
+		tmp1 = container_of(nontrans_bss,
+				    struct cfg80211_internal_bss, pub);
+		if (__cfg80211_unlink_bss(rdev, tmp1))
 			rdev->bss_generation++;
 	}
+
+	if (__cfg80211_unlink_bss(rdev, bss))
+		rdev->bss_generation++;
+out:
 	spin_unlock_bh(&rdev->bss_lock);
 }
 EXPORT_SYMBOL(cfg80211_unlink_bss);
-- 
cgit v1.2.3


From 213ed579d38ef47c55694a4a411926af3dfc6558 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 16 Jan 2019 23:02:03 +0200
Subject: cfg80211: parse multi-bssid only if HW supports it

Parsing and exposing nontransmitted APs is problematic
when underlying HW doesn't support it. Do it only if
driver indicated support. Allow HE restriction as well,
since the HE spec defined the exact manner that Multiple
BSSID set should behave. APs that not support the HE
spec will have less predictable Multiple BSSID set
support/behavior

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  8 ++++++++
 net/wireless/scan.c    | 11 ++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 86de6b62a44b..67aeb7199617 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4315,6 +4315,11 @@ struct cfg80211_pmsr_capabilities {
  * @txq_memory_limit: configuration internal TX queue memory limit
  * @txq_quantum: configuration of internal TX queue scheduler quantum
  *
+ * @support_mbssid: can HW support association with nontransmitted AP
+ * @support_only_he_mbssid: don't parse MBSSID elements if it is not
+ *	HE AP, in order to avoid compatibility issues.
+ *	@support_mbssid must be set for this to have any effect.
+ *
  * @pmsr_capa: peer measurement capabilities
  */
 struct wiphy {
@@ -4455,6 +4460,9 @@ struct wiphy {
 	u32 txq_memory_limit;
 	u32 txq_quantum;
 
+	u8 support_mbssid:1,
+	   support_only_he_mbssid:1;
+
 	const struct cfg80211_pmsr_capabilities *pmsr_capa;
 
 	char priv[0] __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 325323a842d8..ab0f0178ee0e 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1417,6 +1417,11 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
 		return;
 	if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
 		return;
+	if (!wiphy->support_mbssid)
+		return;
+	if (wiphy->support_only_he_mbssid &&
+	    !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
+		return;
 
 	new_ie = kmalloc(IEEE80211_MAX_DATA_LEN, gfp);
 	if (!new_ie)
@@ -1701,7 +1706,11 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 
 	res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
 						    len, NULL, gfp);
-	if (!res || !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+	if (!res || !wiphy->support_mbssid ||
+	    !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
+		return res;
+	if (wiphy->support_only_he_mbssid &&
+	    !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
 		return res;
 
 	/* process each non-transmitting bss */
-- 
cgit v1.2.3


From 7ece9c372b21635120e7ab5ea3fc41ce9892ead8 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 16 Jan 2019 21:26:25 +0200
Subject: cfg80211: make BSSID generation function inline

This will enable reuse by mac80211.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 23 +++++++++++++++++++++++
 net/wireless/scan.c    | 16 ----------------
 2 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 67aeb7199617..c33ebc41d797 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5461,6 +5461,29 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	return cfg80211_inform_bss_frame_data(wiphy, &data, mgmt, len, gfp);
 }
 
+/**
+ * cfg80211_gen_new_bssid - generate a nontransmitted BSSID for multi-BSSID
+ * @bssid: transmitter BSSID
+ * @max_bssid: max BSSID indicator, taken from Multiple BSSID element
+ * @mbssid_index: BSSID index, taken from Multiple BSSID index element
+ * @new_bssid_addr: address of the resulting BSSID
+ */
+static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
+					  u8 mbssid_index, u8 *new_bssid_addr)
+{
+	u64 bssid_tmp, new_bssid;
+	u64 lsb_n;
+
+	bssid_tmp = ether_addr_to_u64(bssid);
+
+	lsb_n = bssid_tmp & ((1 << max_bssid) - 1);
+	new_bssid = bssid_tmp;
+	new_bssid &= ~((1 << max_bssid) - 1);
+	new_bssid |= (lsb_n + mbssid_index) % (1 << max_bssid);
+
+	u64_to_ether_addr(new_bssid, new_bssid_addr);
+}
+
 /**
  * enum cfg80211_bss_frame_type - frame type that the BSS data came from
  * @CFG80211_BSS_FTYPE_UNKNOWN: driver doesn't know whether the data is
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ab0f0178ee0e..f1b06e9c1038 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -179,22 +179,6 @@ static bool __cfg80211_unlink_bss(struct cfg80211_registered_device *rdev,
 	return true;
 }
 
-static void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
-				   u8 mbssid_index, u8 *new_bssid_addr)
-{
-	u64 bssid_tmp, new_bssid = 0;
-	u64 lsb_n;
-
-	bssid_tmp = ether_addr_to_u64(bssid);
-
-	lsb_n = bssid_tmp & ((1 << max_bssid) - 1);
-	new_bssid = bssid_tmp;
-	new_bssid &= ~((1 << max_bssid) - 1);
-	new_bssid |= (lsb_n + mbssid_index) % (1 << max_bssid);
-
-	u64_to_ether_addr(new_bssid, new_bssid_addr);
-}
-
 static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
 				  const u8 *subelement, size_t subie_len,
 				  u8 *new_ie, gfp_t gfp)
-- 
cgit v1.2.3


From 0cd01efb03396c5368b1a32eed0ccb2aa453bdc8 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Tue, 22 Jan 2019 09:50:50 +0200
Subject: cfg80211: save multi-bssid properties

When the new IEs are generated, the multiple BSSID elements
are not saved. Save aside properties that are needed later
for PS.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  5 ++++
 net/wireless/scan.c    | 69 +++++++++++++++++++++++++++++++++++---------------
 2 files changed, 53 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c33ebc41d797..76efdc3a2200 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2038,6 +2038,8 @@ struct cfg80211_bss_ies {
  * @signal: signal strength value (type depends on the wiphy's signal_type)
  * @chains: bitmask for filled values in @chain_signal.
  * @chain_signal: per-chain signal strength of last received BSS in dBm.
+ * @bssid_index: index in the multiple BSS set
+ * @max_bssid_indicator: max number of members in the BSS set
  * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
  */
 struct cfg80211_bss {
@@ -2061,6 +2063,9 @@ struct cfg80211_bss {
 	u8 chains;
 	s8 chain_signal[IEEE80211_MAX_CHAINS];
 
+	u8 bssid_index;
+	u8 max_bssid_indicator;
+
 	u8 priv[0] __aligned(sizeof(void *));
 };
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index f1b06e9c1038..387e5f868684 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1022,11 +1022,16 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *rdev,
 	return true;
 }
 
+struct cfg80211_non_tx_bss {
+	struct cfg80211_bss *tx_bss;
+	u8 max_bssid_indicator;
+	u8 bssid_index;
+};
+
 /* Returned bss is reference counted and must be cleaned up appropriately. */
 static struct cfg80211_internal_bss *
 cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		    struct cfg80211_internal_bss *tmp,
-		    struct cfg80211_bss *trans_bss,
 		    bool signal_valid)
 {
 	struct cfg80211_internal_bss *found = NULL;
@@ -1126,6 +1131,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		memcpy(found->pub.chain_signal, tmp->pub.chain_signal,
 		       IEEE80211_MAX_CHAINS);
 		ether_addr_copy(found->parent_bssid, tmp->parent_bssid);
+		found->pub.max_bssid_indicator = tmp->pub.max_bssid_indicator;
+		found->pub.bssid_index = tmp->pub.bssid_index;
 	} else {
 		struct cfg80211_internal_bss *new;
 		struct cfg80211_internal_bss *hidden;
@@ -1185,13 +1192,13 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
 		}
 
 		/* This must be before the call to bss_ref_get */
-		if (trans_bss) {
+		if (tmp->pub.transmitted_bss) {
 			struct cfg80211_internal_bss *pbss =
-				container_of(trans_bss,
+				container_of(tmp->pub.transmitted_bss,
 					     struct cfg80211_internal_bss,
 					     pub);
 
-			new->pub.transmitted_bss = trans_bss;
+			new->pub.transmitted_bss = tmp->pub.transmitted_bss;
 			bss_ref_get(rdev, pbss);
 		}
 
@@ -1289,7 +1296,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 				enum cfg80211_bss_frame_type ftype,
 				const u8 *bssid, u64 tsf, u16 capability,
 				u16 beacon_interval, const u8 *ie, size_t ielen,
-				struct cfg80211_bss *trans_bss,
+				struct cfg80211_non_tx_bss *non_tx_data,
 				gfp_t gfp)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1318,6 +1325,11 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 	tmp.pub.beacon_interval = beacon_interval;
 	tmp.pub.capability = capability;
 	tmp.ts_boottime = data->boottime_ns;
+	if (non_tx_data) {
+		tmp.pub.transmitted_bss = non_tx_data->tx_bss;
+		tmp.pub.bssid_index = non_tx_data->bssid_index;
+		tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator;
+	}
 
 	/*
 	 * If we do not know here whether the IEs are from a Beacon or Probe
@@ -1350,8 +1362,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, trans_bss,
-				  signal_valid);
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
@@ -1365,11 +1376,12 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
 			regulatory_hint_found_beacon(wiphy, channel, gfp);
 	}
 
-	if (trans_bss) {
+	if (non_tx_data && non_tx_data->tx_bss) {
 		/* this is a nontransmitting bss, we need to add it to
 		 * transmitting bss' list if it is not there
 		 */
-		if (cfg80211_add_nontrans_list(trans_bss, &res->pub)) {
+		if (cfg80211_add_nontrans_list(non_tx_data->tx_bss,
+					       &res->pub)) {
 			if (__cfg80211_unlink_bss(rdev, res))
 				rdev->bss_generation++;
 		}
@@ -1386,7 +1398,7 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
 				       const u8 *bssid, u64 tsf,
 				       u16 beacon_interval, const u8 *ie,
 				       size_t ielen,
-				       struct cfg80211_bss *trans_bss,
+				       struct cfg80211_non_tx_bss *non_tx_data,
 				       gfp_t gfp)
 {
 	const u8 *mbssid_index_ie;
@@ -1397,7 +1409,7 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
 	u16 capability;
 	struct cfg80211_bss *bss;
 
-	if (!trans_bss)
+	if (!non_tx_data)
 		return;
 	if (!cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen))
 		return;
@@ -1439,8 +1451,12 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
 				continue;
 			}
 
-			cfg80211_gen_new_bssid(bssid, elem->data[0],
-					       mbssid_index_ie[2],
+			non_tx_data->bssid_index = mbssid_index_ie[2];
+			non_tx_data->max_bssid_indicator = elem->data[0];
+
+			cfg80211_gen_new_bssid(bssid,
+					       non_tx_data->max_bssid_indicator,
+					       non_tx_data->bssid_index,
 					       new_bssid);
 			memset(new_ie, 0, IEEE80211_MAX_DATA_LEN);
 			new_ie_len = cfg80211_gen_new_ie(ie, ielen, sub->data,
@@ -1457,7 +1473,8 @@ static void cfg80211_parse_mbssid_data(struct wiphy *wiphy,
 							      beacon_interval,
 							      new_ie,
 							      new_ie_len,
-							      trans_bss, gfp);
+							      non_tx_data,
+							      gfp);
 			if (!bss)
 				break;
 			cfg80211_put_bss(wiphy, bss);
@@ -1476,12 +1493,15 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
 			 gfp_t gfp)
 {
 	struct cfg80211_bss *res;
+	struct cfg80211_non_tx_bss non_tx_data;
 
 	res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf,
 					      capability, beacon_interval, ie,
 					      ielen, NULL, gfp);
+	non_tx_data.tx_bss = res;
 	cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf,
-				   beacon_interval, ie, ielen, res, gfp);
+				   beacon_interval, ie, ielen, &non_tx_data,
+				   gfp);
 	return res;
 }
 EXPORT_SYMBOL(cfg80211_inform_bss_data);
@@ -1490,7 +1510,7 @@ static void
 cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
 				 struct cfg80211_inform_bss *data,
 				 struct ieee80211_mgmt *mgmt, size_t len,
-				 struct cfg80211_bss *trans_bss,
+				 struct cfg80211_non_tx_bss *non_tx_data,
 				 gfp_t gfp)
 {
 	enum cfg80211_bss_frame_type ftype;
@@ -1504,7 +1524,7 @@ cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
 	cfg80211_parse_mbssid_data(wiphy, data, ftype, mgmt->bssid,
 				   le64_to_cpu(mgmt->u.probe_resp.timestamp),
 				   le16_to_cpu(mgmt->u.probe_resp.beacon_int),
-				   ie, ielen, trans_bss, gfp);
+				   ie, ielen, non_tx_data, gfp);
 }
 
 static void
@@ -1594,7 +1614,7 @@ static struct cfg80211_bss *
 cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
 				      struct cfg80211_inform_bss *data,
 				      struct ieee80211_mgmt *mgmt, size_t len,
-				      struct cfg80211_bss *trans_bss,
+				      struct cfg80211_non_tx_bss *non_tx_data,
 				      gfp_t gfp)
 {
 	struct cfg80211_internal_bss tmp = {}, *res;
@@ -1653,11 +1673,15 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
 	tmp.pub.chains = data->chains;
 	memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
 	ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
+	if (non_tx_data) {
+		tmp.pub.transmitted_bss = non_tx_data->tx_bss;
+		tmp.pub.bssid_index = non_tx_data->bssid_index;
+		tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator;
+	}
 
 	signal_valid = abs(data->chan->center_freq - channel->center_freq) <=
 		wiphy->max_adj_channel_rssi_comp;
-	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, trans_bss,
-				  signal_valid);
+	res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid);
 	if (!res)
 		return NULL;
 
@@ -1687,6 +1711,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 	const struct cfg80211_bss_ies *ies1, *ies2;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
+	struct cfg80211_non_tx_bss non_tx_data;
 
 	res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt,
 						    len, NULL, gfp);
@@ -1697,8 +1722,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
 	    !cfg80211_find_ext_ie(WLAN_EID_EXT_HE_CAPABILITY, ie, ielen))
 		return res;
 
+	non_tx_data.tx_bss = res;
 	/* process each non-transmitting bss */
-	cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len, res, gfp);
+	cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len,
+					 &non_tx_data, gfp);
 
 	/* check if the res has other nontransmitting bss which is not
 	 * in MBSSID IE
-- 
cgit v1.2.3


From 78ac51f81532c1e361a31ac112c1fea470ea9036 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 16 Jan 2019 18:22:56 +0200
Subject: mac80211: support multi-bssid

Add support for multi-bssid.

This includes:
- Parsing multi-bssid element
- Overriding DTIM values
- Taking into account in various places the inner BSSID instead of
  transmitter BSSID
- Save aside some multi-bssid properties needed by drivers

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  34 +++++++++++-
 include/net/mac80211.h     |  15 ++++++
 net/mac80211/ieee80211_i.h |   7 +++
 net/mac80211/mlme.c        | 125 +++++++++++++++++++++++++++++++--------------
 net/mac80211/scan.c        |  11 +++-
 net/mac80211/util.c        | 111 +++++++++++++++++++++++++++++++++++++---
 6 files changed, 255 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4e3a4e293348..7479f0bd50e1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -8,7 +8,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018        Intel Corporation
+ * Copyright (c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2475,6 +2475,7 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_HE_OPERATION = 36,
 	WLAN_EID_EXT_UORA = 37,
 	WLAN_EID_EXT_HE_MU_EDCA = 38,
+	WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55,
 };
 
 /* Action category code */
@@ -2691,6 +2692,9 @@ enum ieee80211_tdls_actioncode {
 #define WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT	BIT(5)
 #define WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT	BIT(6)
 
+/* Defines support for enhanced multi-bssid advertisement*/
+#define WLAN_EXT_CAPA11_EMA_SUPPORT	BIT(1)
+
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
@@ -2882,6 +2886,34 @@ enum ieee80211_sa_query_action {
 	WLAN_ACTION_SA_QUERY_RESPONSE = 1,
 };
 
+/**
+ * struct ieee80211_bssid_index
+ *
+ * This structure refers to "Multiple BSSID-index element"
+ *
+ * @bssid_index: BSSID index
+ * @dtim_period: optional, overrides transmitted BSS dtim period
+ * @dtim_count: optional, overrides transmitted BSS dtim count
+ */
+struct ieee80211_bssid_index {
+	u8 bssid_index;
+	u8 dtim_period;
+	u8 dtim_count;
+};
+
+/**
+ * struct ieee80211_multiple_bssid_configuration
+ *
+ * This structure refers to "Multiple BSSID Configuration element"
+ *
+ * @bssid_count: total number of active BSSIDs in the set
+ * @profile_periodicity: the least number of beacon frames need to be received
+ *	in order to discover all the nontransmitted BSSIDs in the set.
+ */
+struct ieee80211_multiple_bssid_configuration {
+	u8 bssid_count;
+	u8 profile_periodicity;
+};
 
 #define SUITE(oui, id)	(((oui) << 8) | (id))
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index de866a7253c9..b0e364f50285 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -591,6 +591,14 @@ struct ieee80211_ftm_responder_params {
  * @ftm_responder: whether to enable or disable fine timing measurement FTM
  *	responder functionality.
  * @ftmr_params: configurable lci/civic parameter when enabling FTM responder.
+ * @nontransmitted: this BSS is a nontransmitted BSS profile
+ * @transmitter_bssid: the address of transmitter AP
+ * @bssid_index: index inside the multiple BSSID set
+ * @bssid_indicator: 2^bssid_indicator is the maximum number of APs in set
+ * @ema_ap: AP supports enhancements of discovery and advertisement of
+ *	nontransmitted BSSIDs
+ * @profile_periodicity: the least number of beacon frames need to be received
+ *	in order to discover all the nontransmitted BSSIDs in the set.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -644,6 +652,13 @@ struct ieee80211_bss_conf {
 	bool protected_keep_alive;
 	bool ftm_responder;
 	struct ieee80211_ftm_responder_params *ftmr_params;
+	/* Multiple BSSID data */
+	bool nontransmitted;
+	u8 transmitter_bssid[ETH_ALEN];
+	u8 bssid_index;
+	u8 bssid_indicator;
+	bool ema_ap;
+	u8 profile_periodicity;
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index cc3f833db022..5795eef98771 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1495,6 +1495,12 @@ struct ieee802_11_elems {
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
 	struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie;
 	const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie;
+	const struct ieee80211_multiple_bssid_configuration *mbssid_config_ie;
+	const struct ieee80211_bssid_index *bssid_index;
+	const u8 *nontransmitted_bssid_profile;
+	u8 max_bssid_indicator;
+	u8 dtim_count;
+	u8 dtim_period;
 
 	/* length of them, respectively */
 	u8 ext_capab_len;
@@ -1513,6 +1519,7 @@ struct ieee802_11_elems {
 	u8 prep_len;
 	u8 perr_len;
 	u8 country_elem_len;
+	u8 bssid_index_len;
 
 	/* whether a parse error occurred while retrieving these elements */
 	bool parse_error;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1f41f760bd22..64b6ddb67456 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3308,6 +3308,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		/* TODO: OPEN: what happens if BSS color disable is set? */
 	}
 
+	if (cbss->transmitted_bss) {
+		bss_conf->nontransmitted = true;
+		ether_addr_copy(bss_conf->transmitter_bssid,
+				cbss->transmitted_bss->bssid);
+		bss_conf->bssid_indicator = cbss->max_bssid_indicator;
+		bss_conf->bssid_index = cbss->bssid_index;
+	}
+
 	/*
 	 * Some APs, e.g. Netgear WNDR3700, report invalid HT operation data
 	 * in their association response, so ignore that data for our own
@@ -3692,6 +3700,16 @@ static void ieee80211_handle_beacon_sig(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
+static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
+				    struct cfg80211_bss *bss)
+{
+	if (ether_addr_equal(tx_bssid, bss->bssid))
+		return true;
+	if (!bss->transmitted_bss)
+		return false;
+	return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
+}
+
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_mgmt *mgmt, size_t len,
 				     struct ieee80211_rx_status *rx_status)
@@ -3733,17 +3751,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	rcu_read_unlock();
 
 	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&
-	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {
+	    ieee80211_rx_our_beacon(mgmt->bssid, ifmgd->assoc_data->bss)) {
 		ieee802_11_parse_elems(mgmt->u.beacon.variable,
 				       len - baselen, false, &elems,
 				       mgmt->bssid,
 				       ifmgd->assoc_data->bss->bssid);
 
 		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status);
-		if (elems.tim && !elems.parse_error) {
-			const struct ieee80211_tim_ie *tim_ie = elems.tim;
-			ifmgd->dtim_period = tim_ie->dtim_period;
-		}
+
+		if (elems.dtim_period)
+			ifmgd->dtim_period = elems.dtim_period;
 		ifmgd->have_beacon = true;
 		ifmgd->assoc_data->need_beacon = false;
 		if (ieee80211_hw_check(&local->hw, TIMING_BEACON_ONLY)) {
@@ -3751,12 +3768,17 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				le64_to_cpu(mgmt->u.beacon.timestamp);
 			sdata->vif.bss_conf.sync_device_ts =
 				rx_status->device_timestamp;
-			if (elems.tim)
-				sdata->vif.bss_conf.sync_dtim_count =
-					elems.tim->dtim_count;
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
+			sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
 		}
+
+		if (elems.mbssid_config_ie)
+			bss_conf->profile_periodicity =
+				elems.mbssid_config_ie->profile_periodicity;
+
+		if (elems.ext_capab_len >= 11 &&
+		    (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+			bss_conf->ema_ap = true;
+
 		/* continue assoc process */
 		ifmgd->assoc_data->timeout = jiffies;
 		ifmgd->assoc_data->timeout_started = true;
@@ -3765,7 +3787,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (!ifmgd->associated ||
-	    !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
+	    !ieee80211_rx_our_beacon(mgmt->bssid,  ifmgd->associated))
 		return;
 	bssid = ifmgd->associated->bssid;
 
@@ -3861,11 +3883,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 			le64_to_cpu(mgmt->u.beacon.timestamp);
 		sdata->vif.bss_conf.sync_device_ts =
 			rx_status->device_timestamp;
-		if (elems.tim)
-			sdata->vif.bss_conf.sync_dtim_count =
-				elems.tim->dtim_count;
-		else
-			sdata->vif.bss_conf.sync_dtim_count = 0;
+		sdata->vif.bss_conf.sync_dtim_count = elems.dtim_count;
 	}
 
 	if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
@@ -3891,10 +3909,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	 */
 	if (!ifmgd->have_beacon) {
 		/* a few bogus AP send dtim_period = 0 or no TIM IE */
-		if (elems.tim)
-			bss_conf->dtim_period = elems.tim->dtim_period ?: 1;
-		else
-			bss_conf->dtim_period = 1;
+		bss_conf->dtim_period = elems.dtim_period ?: 1;
 
 		changed |= BSS_CHANGED_BEACON_INFO;
 		ifmgd->have_beacon = true;
@@ -4761,6 +4776,40 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	return ret;
 }
 
+static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies,
+			       u8 *dtim_count, u8 *dtim_period)
+{
+	const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len);
+	const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data,
+					 ies->len);
+	const struct ieee80211_tim_ie *tim = NULL;
+	const struct ieee80211_bssid_index *idx;
+	bool valid = tim_ie && tim_ie[1] >= 2;
+
+	if (valid)
+		tim = (void *)(tim_ie + 2);
+
+	if (dtim_count)
+		*dtim_count = valid ? tim->dtim_count : 0;
+
+	if (dtim_period)
+		*dtim_period = valid ? tim->dtim_period : 0;
+
+	/* Check if value is overridden by non-transmitted profile */
+	if (!idx_ie || idx_ie[1] < 3)
+		return valid;
+
+	idx = (void *)(idx_ie + 2);
+
+	if (dtim_count)
+		*dtim_count = idx->dtim_count;
+
+	if (dtim_period)
+		*dtim_period = idx->dtim_period;
+
+	return true;
+}
+
 static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 				     struct cfg80211_bss *cbss, bool assoc,
 				     bool override)
@@ -4852,17 +4901,13 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 		rcu_read_lock();
 		ies = rcu_dereference(cbss->beacon_ies);
 		if (ies) {
-			const u8 *tim_ie;
-
 			sdata->vif.bss_conf.sync_tsf = ies->tsf;
 			sdata->vif.bss_conf.sync_device_ts =
 				bss->device_ts_beacon;
-			tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
-						  ies->data, ies->len);
-			if (tim_ie && tim_ie[1] >= 2)
-				sdata->vif.bss_conf.sync_dtim_count = tim_ie[2];
-			else
-				sdata->vif.bss_conf.sync_dtim_count = 0;
+
+			ieee80211_get_dtim(ies,
+					   &sdata->vif.bss_conf.sync_dtim_count,
+					   NULL);
 		} else if (!ieee80211_hw_check(&sdata->local->hw,
 					       TIMING_BEACON_ONLY)) {
 			ies = rcu_dereference(cbss->proberesp_ies);
@@ -5332,17 +5377,12 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		assoc_data->timeout_started = true;
 		assoc_data->need_beacon = true;
 	} else if (beacon_ies) {
-		const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM,
-						    beacon_ies->data,
-						    beacon_ies->len);
+		const u8 *ie;
 		u8 dtim_count = 0;
 
-		if (tim_ie && tim_ie[1] >= sizeof(struct ieee80211_tim_ie)) {
-			const struct ieee80211_tim_ie *tim;
-			tim = (void *)(tim_ie + 2);
-			ifmgd->dtim_period = tim->dtim_period;
-			dtim_count = tim->dtim_count;
-		}
+		ieee80211_get_dtim(beacon_ies, &dtim_count,
+				   &ifmgd->dtim_period);
+
 		ifmgd->have_beacon = true;
 		assoc_data->timeout = jiffies;
 		assoc_data->timeout_started = true;
@@ -5353,6 +5393,17 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 				bss->device_ts_beacon;
 			sdata->vif.bss_conf.sync_dtim_count = dtim_count;
 		}
+
+		ie = cfg80211_find_ext_ie(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
+					  beacon_ies->data, beacon_ies->len);
+		if (ie && ie[1] >= 3)
+			sdata->vif.bss_conf.profile_periodicity = ie[4];
+
+		ie = cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY,
+				      beacon_ies->data, beacon_ies->len);
+		if (ie && ie[1] >= 11 &&
+		    (ie[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+			sdata->vif.bss_conf.ema_ap = true;
 	} else {
 		assoc_data->timeout = jiffies;
 		assoc_data->timeout_started = true;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 20211cbc63f4..0cf066700623 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -144,8 +144,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_channel *channel)
 {
 	bool beacon = ieee80211_is_beacon(mgmt->frame_control);
-	struct cfg80211_bss *cbss;
-	struct ieee80211_bss *bss;
+	struct cfg80211_bss *cbss, *non_tx_cbss;
+	struct ieee80211_bss *bss, *non_tx_bss;
 	struct cfg80211_inform_bss bss_meta = {
 		.boottime_ns = rx_status->boottime_ns,
 	};
@@ -212,6 +212,13 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	bss = (void *)cbss->priv;
 	ieee80211_update_bss_from_elems(local, bss, &elems, rx_status, beacon);
 
+	list_for_each_entry(non_tx_cbss, &cbss->nontrans_list, nontrans_list) {
+		non_tx_bss = (void *)non_tx_cbss->priv;
+
+		ieee80211_update_bss_from_elems(local, non_tx_bss, &elems,
+						rx_status, beacon);
+	}
+
 	return bss;
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 77882ca327de..8349c91250ef 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -891,20 +891,18 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_queue_delayed_work);
 
-u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
-			       struct ieee802_11_elems *elems,
-			       u64 filter, u32 crc, u8 *transmitter_bssid,
-			       u8 *bss_bssid)
+static u32
+_ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+			    struct ieee802_11_elems *elems,
+			    u64 filter, u32 crc, u8 *transmitter_bssid,
+			    u8 *bss_bssid)
 {
-	struct element *elem;
+	const struct element *elem, *sub;
 	bool calc_crc = filter != 0;
 	DECLARE_BITMAP(seen_elems, 256);
 	const u8 *ie;
 
 	bitmap_zero(seen_elems, 256);
-	memset(elems, 0, sizeof(*elems));
-	elems->ie_start = start;
-	elems->total_len = len;
 
 	for_each_element(elem, start, len) {
 		bool elem_parse_failed;
@@ -1210,6 +1208,57 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			if (elen >= sizeof(*elems->max_idle_period_ie))
 				elems->max_idle_period_ie = (void *)pos;
 			break;
+		case WLAN_EID_MULTIPLE_BSSID:
+			if (!bss_bssid || !transmitter_bssid || elen < 4)
+				break;
+
+			elems->max_bssid_indicator = pos[0];
+
+			for_each_element(sub, pos + 1, elen - 1) {
+				u8 sub_len = sub->datalen;
+				u8 new_bssid[ETH_ALEN];
+				const u8 *index;
+
+				/*
+				 * we only expect the "non-transmitted BSSID
+				 * profile" subelement (subelement id 0)
+				 */
+				if (sub->id != 0 || sub->datalen < 4) {
+					/* not a valid BSS profile */
+					continue;
+				}
+
+				if (sub->data[0] != WLAN_EID_NON_TX_BSSID_CAP ||
+				    sub->data[1] != 2) {
+					/* The first element of the
+					 * Nontransmitted BSSID Profile is not
+					 * the Nontransmitted BSSID Capability
+					 * element.
+					 */
+					continue;
+				}
+
+				/* found a Nontransmitted BSSID Profile */
+				index = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX,
+							 sub->data, sub_len);
+				if (!index || index[1] < 1 || index[2] == 0) {
+					/* Invalid MBSSID Index element */
+					continue;
+				}
+
+				cfg80211_gen_new_bssid(transmitter_bssid,
+						       pos[0],
+						       index[2],
+						       new_bssid);
+				if (ether_addr_equal(new_bssid, bss_bssid)) {
+					elems->nontransmitted_bssid_profile =
+						(void *)sub;
+					elems->bssid_index_len = index[1];
+					elems->bssid_index = (void *)&index[2];
+					break;
+				}
+			}
+			break;
 		case WLAN_EID_EXTENSION:
 			if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
 			    elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
@@ -1225,6 +1274,10 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 				elems->he_operation = (void *)&pos[1];
 			} else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
 				elems->uora_element = (void *)&pos[1];
+			} else if (pos[0] ==
+				   WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION &&
+				   elen == 3) {
+				elems->mbssid_config_ie = (void *)&pos[1];
 			}
 			break;
 		default:
@@ -1243,6 +1296,48 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 	return crc;
 }
 
+u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
+			       struct ieee802_11_elems *elems,
+			       u64 filter, u32 crc, u8 *transmitter_bssid,
+			       u8 *bss_bssid)
+{
+	memset(elems, 0, sizeof(*elems));
+	elems->ie_start = start;
+	elems->total_len = len;
+
+	crc = _ieee802_11_parse_elems_crc(start, len, action, elems, filter,
+					  crc, transmitter_bssid, bss_bssid);
+
+	/* Override with nontransmitted profile, if found */
+	if (transmitter_bssid && elems->nontransmitted_bssid_profile) {
+		const u8 *profile = elems->nontransmitted_bssid_profile;
+
+		_ieee802_11_parse_elems_crc(&profile[2], profile[1],
+					    action, elems, 0, 0,
+					    transmitter_bssid, bss_bssid);
+	}
+
+	if (elems->tim && !elems->parse_error) {
+		const struct ieee80211_tim_ie *tim_ie = elems->tim;
+
+		elems->dtim_period = tim_ie->dtim_period;
+		elems->dtim_count = tim_ie->dtim_count;
+	}
+
+	/* Override DTIM period and count if needed */
+	if (elems->bssid_index &&
+	    elems->bssid_index_len >=
+	    offsetofend(struct ieee80211_bssid_index, dtim_period))
+		elems->dtim_period = elems->bssid_index->dtim_period;
+
+	if (elems->bssid_index &&
+	    elems->bssid_index_len >=
+	    offsetofend(struct ieee80211_bssid_index, dtim_count))
+		elems->dtim_count = elems->bssid_index->dtim_count;
+
+	return crc;
+}
+
 void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 					   struct ieee80211_tx_queue_params
 					   *qparam, int ac)
-- 
cgit v1.2.3


From caf56338c22f00098bf2acd646b0ddc691c80c24 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 16 Jan 2019 23:03:25 +0200
Subject: mac80211: indicate support for multiple BSSID

Set multi-bssid support flags according to driver support.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  5 +++++
 include/net/mac80211.h    |  7 +++++++
 net/mac80211/debugfs.c    |  4 +++-
 net/mac80211/main.c       | 13 ++++++++++++-
 net/mac80211/mlme.c       | 15 +++++++++++++++
 5 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 7479f0bd50e1..8da5ba97328f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2657,6 +2657,11 @@ enum ieee80211_tdls_actioncode {
  */
 #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING	BIT(2)
 
+/* Multiple BSSID capability is set in the 6th bit of 3rd byte of the
+ * @WLAN_EID_EXT_CAPABILITY information element
+ */
+#define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT	BIT(6)
+
 /* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */
 #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA		BIT(4)
 #define WLAN_EXT_CAPA4_TDLS_PEER_PSM		BIT(5)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b0e364f50285..97aed7b1ba5d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2234,6 +2234,11 @@ struct ieee80211_txq {
  * @IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN: Driver does not report accurate A-MPDU
  *	length in tx status information
  *
+ * @IEEE80211_HW_SUPPORTS_MULTI_BSSID: Hardware supports multi BSSID
+ *
+ * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID
+ *	only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2283,6 +2288,8 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_VHT_EXT_NSS_BW,
 	IEEE80211_HW_STA_MMPDU_TXQ,
 	IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN,
+	IEEE80211_HW_SUPPORTS_MULTI_BSSID,
+	IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 343ad0a915e4..2d43bc127043 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -3,7 +3,7 @@
  *
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * GPLv2
  *
@@ -219,6 +219,8 @@ static const char *hw_flag_names[] = {
 	FLAG(SUPPORTS_VHT_EXT_NSS_BW),
 	FLAG(STA_MMPDU_TXQ),
 	FLAG(TX_STATUS_NO_AMPDU_LEN),
+	FLAG(SUPPORTS_MULTI_BSSID),
+	FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
 #undef FLAG
 };
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 71005b6dfcd1..5055aeba5c5a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2017     Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1112,6 +1112,17 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (ieee80211_hw_check(&local->hw, CHANCTX_STA_CSA))
 		local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;
 
+	/* mac80211 supports multi BSSID, if the driver supports it */
+	if (ieee80211_hw_check(&local->hw, SUPPORTS_MULTI_BSSID)) {
+		local->hw.wiphy->support_mbssid = true;
+		if (ieee80211_hw_check(&local->hw,
+				       SUPPORTS_ONLY_HE_MULTI_BSSID))
+			local->hw.wiphy->support_only_he_mbssid = true;
+		else
+			local->ext_capa[2] |=
+				WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
+	}
+
 	local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
 
 	result = wiphy_register(local->hw.wiphy);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 64b6ddb67456..a49fbb3f3ed7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -813,6 +813,21 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		}
 	}
 
+	/* Set MBSSID support for HE AP if needed */
+	if (ieee80211_hw_check(&local->hw, SUPPORTS_ONLY_HE_MULTI_BSSID) &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) && assoc_data->ie_len) {
+		struct element *elem;
+
+		/* we know it's writable, cast away the const */
+		elem = (void *)cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+						  assoc_data->ie,
+						  assoc_data->ie_len);
+
+		/* We can probably assume both always true */
+		if (elem && elem->datalen >= 3)
+			elem->data[2] |= WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT;
+	}
+
 	/* if present, add any custom IEs that go before HT */
 	if (assoc_data->ie_len) {
 		static const u8 before_ht[] = {
-- 
cgit v1.2.3


From 6ba9dd6c893b8e60639cfe34e983786068dba9fa Mon Sep 17 00:00:00 2001
From: James Schulman <james.schulman@cirrus.com>
Date: Thu, 7 Feb 2019 12:12:15 -0600
Subject: ASoC: cs35l36: Add support for Cirrus CS35L36 Amplifier

Add driver support for Cirrus Logic CS35L36 boosted
speaker amplifier

Signed-off-by: James Schulman <james.schulman@cirrus.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Acked-by: Brian Austin <brian.austin@cirrus.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/cs35l36.h    |   43 +
 sound/soc/codecs/Kconfig   |    5 +
 sound/soc/codecs/Makefile  |    2 +
 sound/soc/codecs/cs35l36.c | 1958 ++++++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/cs35l36.h |  446 ++++++++++
 5 files changed, 2454 insertions(+)
 create mode 100644 include/sound/cs35l36.h
 create mode 100644 sound/soc/codecs/cs35l36.c
 create mode 100644 sound/soc/codecs/cs35l36.h

(limited to 'include')

diff --git a/include/sound/cs35l36.h b/include/sound/cs35l36.h
new file mode 100644
index 000000000000..8f8049d390f0
--- /dev/null
+++ b/include/sound/cs35l36.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * linux/sound/cs35l36.h -- Platform data for CS35L36
+ *
+ * Copyright 2018 Cirrus Logic, Inc.
+ *
+ * Author: James Schulman <james.schulman@cirrus.com>
+ *
+ */
+
+#ifndef __CS35L36_H
+#define __CS35L36_H
+
+struct cs35l36_vpbr_cfg {
+	bool is_present;
+	bool vpbr_en;
+	int vpbr_thld;
+	int vpbr_atk_rate;
+	int vpbr_atk_vol;
+	int vpbr_max_attn;
+	int vpbr_wait;
+	int vpbr_rel_rate;
+	int vpbr_mute_en;
+};
+
+struct cs35l36_platform_data {
+	bool multi_amp_mode;
+	bool dcm_mode;
+	bool amp_pcm_inv;
+	bool imon_pol_inv;
+	bool vmon_pol_inv;
+	int boost_ind;
+	int bst_vctl;
+	int bst_vctl_sel;
+	int bst_ipk;
+	bool extern_boost;
+	int temp_warn_thld;
+	int irq_drv_sel;
+	int irq_gpio_sel;
+	struct cs35l36_vpbr_cfg vpbr_config;
+};
+
+#endif /* __CS35L36_H */
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index e6ce18c21b98..419114edfd57 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -55,6 +55,7 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_CS35L33 if I2C
 	select SND_SOC_CS35L34 if I2C
 	select SND_SOC_CS35L35 if I2C
+	select SND_SOC_CS35L36 if I2C
 	select SND_SOC_CS42L42 if I2C
 	select SND_SOC_CS42L51_I2C if I2C
 	select SND_SOC_CS42L52 if I2C && INPUT
@@ -484,6 +485,10 @@ config SND_SOC_CS35L35
 	tristate "Cirrus Logic CS35L35 CODEC"
 	depends on I2C
 
+config SND_SOC_CS35L36
+	tristate "Cirrus Logic CS35L36 CODEC"
+	depends on I2C
+
 config SND_SOC_CS42L42
 	tristate "Cirrus Logic CS42L42 CODEC"
 	depends on I2C
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index b07dfb5fa700..aab2ad95a137 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -47,6 +47,7 @@ snd-soc-cs35l32-objs := cs35l32.o
 snd-soc-cs35l33-objs := cs35l33.o
 snd-soc-cs35l34-objs := cs35l34.o
 snd-soc-cs35l35-objs := cs35l35.o
+snd-soc-cs35l36-objs := cs35l36.o
 snd-soc-cs42l42-objs := cs42l42.o
 snd-soc-cs42l51-objs := cs42l51.o
 snd-soc-cs42l51-i2c-objs := cs42l51-i2c.o
@@ -319,6 +320,7 @@ obj-$(CONFIG_SND_SOC_CS35L32)	+= snd-soc-cs35l32.o
 obj-$(CONFIG_SND_SOC_CS35L33)	+= snd-soc-cs35l33.o
 obj-$(CONFIG_SND_SOC_CS35L34)	+= snd-soc-cs35l34.o
 obj-$(CONFIG_SND_SOC_CS35L35)	+= snd-soc-cs35l35.o
+obj-$(CONFIG_SND_SOC_CS35L36)	+= snd-soc-cs35l36.o
 obj-$(CONFIG_SND_SOC_CS42L42)	+= snd-soc-cs42l42.o
 obj-$(CONFIG_SND_SOC_CS42L51)	+= snd-soc-cs42l51.o
 obj-$(CONFIG_SND_SOC_CS42L51_I2C)	+= snd-soc-cs42l51-i2c.o
diff --git a/sound/soc/codecs/cs35l36.c b/sound/soc/codecs/cs35l36.c
new file mode 100644
index 000000000000..4f880a678812
--- /dev/null
+++ b/sound/soc/codecs/cs35l36.c
@@ -0,0 +1,1958 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// cs35l36.c -- CS35L36 ALSA SoC audio driver
+//
+// Copyright 2018 Cirrus Logic, Inc.
+//
+// Author: James Schulman <james.schulman@cirrus.com>
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/gpio/consumer.h>
+#include <linux/of_device.h>
+#include <linux/of_gpio.h>
+#include <linux/regmap.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/soc-dapm.h>
+#include <linux/gpio.h>
+#include <sound/initval.h>
+#include <sound/tlv.h>
+#include <sound/cs35l36.h>
+#include <linux/of_irq.h>
+#include <linux/completion.h>
+
+#include "cs35l36.h"
+
+/*
+ * Some fields take zero as a valid value so use a high bit flag that won't
+ * get written to the device to mark those.
+ */
+#define CS35L36_VALID_PDATA 0x80000000
+
+static const char * const cs35l36_supplies[] = {
+	"VA",
+	"VP",
+};
+
+struct  cs35l36_private {
+	struct device *dev;
+	struct cs35l36_platform_data pdata;
+	struct regmap *regmap;
+	struct regulator_bulk_data supplies[2];
+	int num_supplies;
+	int clksrc;
+	int chip_version;
+	int rev_id;
+	int ldm_mode_sel;
+	struct gpio_desc *reset_gpio;
+};
+
+struct cs35l36_pll_config {
+	int freq;
+	int clk_cfg;
+	int fll_igain;
+};
+
+static const struct cs35l36_pll_config cs35l36_pll_sysclk[] = {
+	{32768,		0x00, 0x05},
+	{8000,		0x01, 0x03},
+	{11025,		0x02, 0x03},
+	{12000,		0x03, 0x03},
+	{16000,		0x04, 0x04},
+	{22050,		0x05, 0x04},
+	{24000,		0x06, 0x04},
+	{32000,		0x07, 0x05},
+	{44100,		0x08, 0x05},
+	{48000,		0x09, 0x05},
+	{88200,		0x0A, 0x06},
+	{96000,		0x0B, 0x06},
+	{128000,	0x0C, 0x07},
+	{176400,	0x0D, 0x07},
+	{192000,	0x0E, 0x07},
+	{256000,	0x0F, 0x08},
+	{352800,	0x10, 0x08},
+	{384000,	0x11, 0x08},
+	{512000,	0x12, 0x09},
+	{705600,	0x13, 0x09},
+	{750000,	0x14, 0x09},
+	{768000,	0x15, 0x09},
+	{1000000,	0x16, 0x0A},
+	{1024000,	0x17, 0x0A},
+	{1200000,	0x18, 0x0A},
+	{1411200,	0x19, 0x0A},
+	{1500000,	0x1A, 0x0A},
+	{1536000,	0x1B, 0x0A},
+	{2000000,	0x1C, 0x0A},
+	{2048000,	0x1D, 0x0A},
+	{2400000,	0x1E, 0x0A},
+	{2822400,	0x1F, 0x0A},
+	{3000000,	0x20, 0x0A},
+	{3072000,	0x21, 0x0A},
+	{3200000,	0x22, 0x0A},
+	{4000000,	0x23, 0x0A},
+	{4096000,	0x24, 0x0A},
+	{4800000,	0x25, 0x0A},
+	{5644800,	0x26, 0x0A},
+	{6000000,	0x27, 0x0A},
+	{6144000,	0x28, 0x0A},
+	{6250000,	0x29, 0x08},
+	{6400000,	0x2A, 0x0A},
+	{6500000,	0x2B, 0x08},
+	{6750000,	0x2C, 0x09},
+	{7526400,	0x2D, 0x0A},
+	{8000000,	0x2E, 0x0A},
+	{8192000,	0x2F, 0x0A},
+	{9600000,	0x30, 0x0A},
+	{11289600,	0x31, 0x0A},
+	{12000000,	0x32, 0x0A},
+	{12288000,	0x33, 0x0A},
+	{12500000,	0x34, 0x08},
+	{12800000,	0x35, 0x0A},
+	{13000000,	0x36, 0x0A},
+	{13500000,	0x37, 0x0A},
+	{19200000,	0x38, 0x0A},
+	{22579200,	0x39, 0x0A},
+	{24000000,	0x3A, 0x0A},
+	{24576000,	0x3B, 0x0A},
+	{25000000,	0x3C, 0x0A},
+	{25600000,	0x3D, 0x0A},
+	{26000000,	0x3E, 0x0A},
+	{27000000,	0x3F, 0x0A},
+};
+
+struct reg_default cs35l36_reg[] = {
+	{CS35L36_TESTKEY_CTRL,			0x00000000},
+	{CS35L36_USERKEY_CTL,			0x00000000},
+	{CS35L36_OTP_CTRL1,			0x00002460},
+	{CS35L36_OTP_CTRL2,			0x00000000},
+	{CS35L36_OTP_CTRL3,			0x00000000},
+	{CS35L36_OTP_CTRL4,			0x00000000},
+	{CS35L36_OTP_CTRL5,			0x00000000},
+	{CS35L36_PAC_CTL1,			0x00000004},
+	{CS35L36_PAC_CTL2,			0x00000000},
+	{CS35L36_PAC_CTL3,			0x00000000},
+	{CS35L36_PWR_CTRL1,			0x00000000},
+	{CS35L36_PWR_CTRL2,			0x00003321},
+	{CS35L36_PWR_CTRL3,			0x01000010},
+	{CS35L36_CTRL_OVRRIDE,			0x00000002},
+	{CS35L36_AMP_OUT_MUTE,			0x00000000},
+	{CS35L36_OTP_TRIM_STATUS,		0x00000000},
+	{CS35L36_DISCH_FILT,			0x00000000},
+	{CS35L36_PROTECT_REL_ERR,		0x00000000},
+	{CS35L36_PAD_INTERFACE,			0x00000038},
+	{CS35L36_PLL_CLK_CTRL,			0x00000010},
+	{CS35L36_GLOBAL_CLK_CTRL,		0x00000003},
+	{CS35L36_ADC_CLK_CTRL,			0x00000000},
+	{CS35L36_SWIRE_CLK_CTRL,		0x00000000},
+	{CS35L36_SP_SCLK_CLK_CTRL,		0x00000000},
+	{CS35L36_MDSYNC_EN,			0x00000000},
+	{CS35L36_MDSYNC_TX_ID,			0x00000000},
+	{CS35L36_MDSYNC_PWR_CTRL,		0x00000000},
+	{CS35L36_MDSYNC_DATA_TX,		0x00000000},
+	{CS35L36_MDSYNC_TX_STATUS,		0x00000002},
+	{CS35L36_MDSYNC_RX_STATUS,		0x00000000},
+	{CS35L36_MDSYNC_ERR_STATUS,		0x00000000},
+	{CS35L36_BSTCVRT_VCTRL1,		0x00000000},
+	{CS35L36_BSTCVRT_VCTRL2,		0x00000001},
+	{CS35L36_BSTCVRT_PEAK_CUR,		0x0000004A},
+	{CS35L36_BSTCVRT_SFT_RAMP,		0x00000003},
+	{CS35L36_BSTCVRT_COEFF,			0x00002424},
+	{CS35L36_BSTCVRT_SLOPE_LBST,		0x00005800},
+	{CS35L36_BSTCVRT_SW_FREQ,		0x00010000},
+	{CS35L36_BSTCVRT_DCM_CTRL,		0x00002001},
+	{CS35L36_BSTCVRT_DCM_MODE_FORCE,	0x00000000},
+	{CS35L36_BSTCVRT_OVERVOLT_CTRL,		0x00000130},
+	{CS35L36_VPI_LIMIT_MODE,		0x00000000},
+	{CS35L36_VPI_LIMIT_MINMAX,		0x00003000},
+	{CS35L36_VPI_VP_THLD,			0x00101010},
+	{CS35L36_VPI_TRACK_CTRL,		0x00000000},
+	{CS35L36_VPI_TRIG_MODE_CTRL,		0x00000000},
+	{CS35L36_VPI_TRIG_STEPS,		0x00000000},
+	{CS35L36_VI_SPKMON_FILT,		0x00000003},
+	{CS35L36_VI_SPKMON_GAIN,		0x00000909},
+	{CS35L36_VI_SPKMON_IP_SEL,		0x00000000},
+	{CS35L36_DTEMP_WARN_THLD,		0x00000002},
+	{CS35L36_DTEMP_STATUS,			0x00000000},
+	{CS35L36_VPVBST_FS_SEL,			0x00000001},
+	{CS35L36_VPVBST_VP_CTRL,		0x000001C0},
+	{CS35L36_VPVBST_VBST_CTRL,		0x000001C0},
+	{CS35L36_ASP_TX_PIN_CTRL,		0x00000028},
+	{CS35L36_ASP_RATE_CTRL,			0x00090000},
+	{CS35L36_ASP_FORMAT,			0x00000002},
+	{CS35L36_ASP_FRAME_CTRL,		0x00180018},
+	{CS35L36_ASP_TX1_TX2_SLOT,		0x00010000},
+	{CS35L36_ASP_TX3_TX4_SLOT,		0x00030002},
+	{CS35L36_ASP_TX5_TX6_SLOT,		0x00050004},
+	{CS35L36_ASP_TX7_TX8_SLOT,		0x00070006},
+	{CS35L36_ASP_RX1_SLOT,			0x00000000},
+	{CS35L36_ASP_RX_TX_EN,			0x00000000},
+	{CS35L36_ASP_RX1_SEL,			0x00000008},
+	{CS35L36_ASP_TX1_SEL,			0x00000018},
+	{CS35L36_ASP_TX2_SEL,			0x00000019},
+	{CS35L36_ASP_TX3_SEL,			0x00000028},
+	{CS35L36_ASP_TX4_SEL,			0x00000029},
+	{CS35L36_ASP_TX5_SEL,			0x00000020},
+	{CS35L36_ASP_TX6_SEL,			0x00000000},
+	{CS35L36_SWIRE_P1_TX1_SEL,		0x00000018},
+	{CS35L36_SWIRE_P1_TX2_SEL,		0x00000019},
+	{CS35L36_SWIRE_P2_TX1_SEL,		0x00000028},
+	{CS35L36_SWIRE_P2_TX2_SEL,		0x00000029},
+	{CS35L36_SWIRE_P2_TX3_SEL,		0x00000020},
+	{CS35L36_SWIRE_DP1_FIFO_CFG,		0x0000001B},
+	{CS35L36_SWIRE_DP2_FIFO_CFG,		0x0000001B},
+	{CS35L36_SWIRE_DP3_FIFO_CFG,		0x0000001B},
+	{CS35L36_SWIRE_PCM_RX_DATA,		0x00000000},
+	{CS35L36_SWIRE_FS_SEL,			0x00000001},
+	{CS35L36_AMP_DIG_VOL_CTRL,		0x00008000},
+	{CS35L36_VPBR_CFG,			0x02AA1905},
+	{CS35L36_VBBR_CFG,			0x02AA1905},
+	{CS35L36_VPBR_STATUS,			0x00000000},
+	{CS35L36_VBBR_STATUS,			0x00000000},
+	{CS35L36_OVERTEMP_CFG,			0x00000001},
+	{CS35L36_AMP_ERR_VOL,			0x00000000},
+	{CS35L36_CLASSH_CFG,			0x000B0405},
+	{CS35L36_CLASSH_FET_DRV_CFG,		0x00000111},
+	{CS35L36_NG_CFG,			0x00000033},
+	{CS35L36_AMP_GAIN_CTRL,			0x00000273},
+	{CS35L36_PWM_MOD_IO_CTRL,		0x00000000},
+	{CS35L36_PWM_MOD_STATUS,		0x00000000},
+	{CS35L36_DAC_MSM_CFG,			0x00000000},
+	{CS35L36_AMP_SLOPE_CTRL,		0x00000B00},
+	{CS35L36_AMP_PDM_VOLUME,		0x00000000},
+	{CS35L36_AMP_PDM_RATE_CTRL,		0x00000000},
+	{CS35L36_PDM_CH_SEL,			0x00000000},
+	{CS35L36_AMP_NG_CTRL,			0x0000212F},
+	{CS35L36_PDM_HIGHFILT_CTRL,		0x00000000},
+	{CS35L36_PAC_INT0_CTRL,			0x00000001},
+	{CS35L36_PAC_INT1_CTRL,			0x00000001},
+	{CS35L36_PAC_INT2_CTRL,			0x00000001},
+	{CS35L36_PAC_INT3_CTRL,			0x00000001},
+	{CS35L36_PAC_INT4_CTRL,			0x00000001},
+	{CS35L36_PAC_INT5_CTRL,			0x00000001},
+	{CS35L36_PAC_INT6_CTRL,			0x00000001},
+	{CS35L36_PAC_INT7_CTRL,			0x00000001},
+};
+
+bool cs35l36_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS35L36_SW_RESET:
+	case CS35L36_SW_REV:
+	case CS35L36_HW_REV:
+	case CS35L36_TESTKEY_CTRL:
+	case CS35L36_USERKEY_CTL:
+	case CS35L36_OTP_MEM30:
+	case CS35L36_OTP_CTRL1:
+	case CS35L36_OTP_CTRL2:
+	case CS35L36_OTP_CTRL3:
+	case CS35L36_OTP_CTRL4:
+	case CS35L36_OTP_CTRL5:
+	case CS35L36_PAC_CTL1:
+	case CS35L36_PAC_CTL2:
+	case CS35L36_PAC_CTL3:
+	case CS35L36_DEVICE_ID:
+	case CS35L36_FAB_ID:
+	case CS35L36_REV_ID:
+	case CS35L36_PWR_CTRL1:
+	case CS35L36_PWR_CTRL2:
+	case CS35L36_PWR_CTRL3:
+	case CS35L36_CTRL_OVRRIDE:
+	case CS35L36_AMP_OUT_MUTE:
+	case CS35L36_OTP_TRIM_STATUS:
+	case CS35L36_DISCH_FILT:
+	case CS35L36_PROTECT_REL_ERR:
+	case CS35L36_PAD_INTERFACE:
+	case CS35L36_PLL_CLK_CTRL:
+	case CS35L36_GLOBAL_CLK_CTRL:
+	case CS35L36_ADC_CLK_CTRL:
+	case CS35L36_SWIRE_CLK_CTRL:
+	case CS35L36_SP_SCLK_CLK_CTRL:
+	case CS35L36_TST_FS_MON0:
+	case CS35L36_MDSYNC_EN:
+	case CS35L36_MDSYNC_TX_ID:
+	case CS35L36_MDSYNC_PWR_CTRL:
+	case CS35L36_MDSYNC_DATA_TX:
+	case CS35L36_MDSYNC_TX_STATUS:
+	case CS35L36_MDSYNC_RX_STATUS:
+	case CS35L36_MDSYNC_ERR_STATUS:
+	case CS35L36_BSTCVRT_VCTRL1:
+	case CS35L36_BSTCVRT_VCTRL2:
+	case CS35L36_BSTCVRT_PEAK_CUR:
+	case CS35L36_BSTCVRT_SFT_RAMP:
+	case CS35L36_BSTCVRT_COEFF:
+	case CS35L36_BSTCVRT_SLOPE_LBST:
+	case CS35L36_BSTCVRT_SW_FREQ:
+	case CS35L36_BSTCVRT_DCM_CTRL:
+	case CS35L36_BSTCVRT_DCM_MODE_FORCE:
+	case CS35L36_BSTCVRT_OVERVOLT_CTRL:
+	case CS35L36_BST_TST_MANUAL:
+	case CS35L36_BST_ANA2_TEST:
+	case CS35L36_VPI_LIMIT_MODE:
+	case CS35L36_VPI_LIMIT_MINMAX:
+	case CS35L36_VPI_VP_THLD:
+	case CS35L36_VPI_TRACK_CTRL:
+	case CS35L36_VPI_TRIG_MODE_CTRL:
+	case CS35L36_VPI_TRIG_STEPS:
+	case CS35L36_VI_SPKMON_FILT:
+	case CS35L36_VI_SPKMON_GAIN:
+	case CS35L36_VI_SPKMON_IP_SEL:
+	case CS35L36_DTEMP_WARN_THLD:
+	case CS35L36_DTEMP_STATUS:
+	case CS35L36_VPVBST_FS_SEL:
+	case CS35L36_VPVBST_VP_CTRL:
+	case CS35L36_VPVBST_VBST_CTRL:
+	case CS35L36_ASP_TX_PIN_CTRL:
+	case CS35L36_ASP_RATE_CTRL:
+	case CS35L36_ASP_FORMAT:
+	case CS35L36_ASP_FRAME_CTRL:
+	case CS35L36_ASP_TX1_TX2_SLOT:
+	case CS35L36_ASP_TX3_TX4_SLOT:
+	case CS35L36_ASP_TX5_TX6_SLOT:
+	case CS35L36_ASP_TX7_TX8_SLOT:
+	case CS35L36_ASP_RX1_SLOT:
+	case CS35L36_ASP_RX_TX_EN:
+	case CS35L36_ASP_RX1_SEL:
+	case CS35L36_ASP_TX1_SEL:
+	case CS35L36_ASP_TX2_SEL:
+	case CS35L36_ASP_TX3_SEL:
+	case CS35L36_ASP_TX4_SEL:
+	case CS35L36_ASP_TX5_SEL:
+	case CS35L36_ASP_TX6_SEL:
+	case CS35L36_SWIRE_P1_TX1_SEL:
+	case CS35L36_SWIRE_P1_TX2_SEL:
+	case CS35L36_SWIRE_P2_TX1_SEL:
+	case CS35L36_SWIRE_P2_TX2_SEL:
+	case CS35L36_SWIRE_P2_TX3_SEL:
+	case CS35L36_SWIRE_DP1_FIFO_CFG:
+	case CS35L36_SWIRE_DP2_FIFO_CFG:
+	case CS35L36_SWIRE_DP3_FIFO_CFG:
+	case CS35L36_SWIRE_PCM_RX_DATA:
+	case CS35L36_SWIRE_FS_SEL:
+	case CS35L36_AMP_DIG_VOL_CTRL:
+	case CS35L36_VPBR_CFG:
+	case CS35L36_VBBR_CFG:
+	case CS35L36_VPBR_STATUS:
+	case CS35L36_VBBR_STATUS:
+	case CS35L36_OVERTEMP_CFG:
+	case CS35L36_AMP_ERR_VOL:
+	case CS35L36_CLASSH_CFG:
+	case CS35L36_CLASSH_FET_DRV_CFG:
+	case CS35L36_NG_CFG:
+	case CS35L36_AMP_GAIN_CTRL:
+	case CS35L36_PWM_MOD_IO_CTRL:
+	case CS35L36_PWM_MOD_STATUS:
+	case CS35L36_DAC_MSM_CFG:
+	case CS35L36_AMP_SLOPE_CTRL:
+	case CS35L36_AMP_PDM_VOLUME:
+	case CS35L36_AMP_PDM_RATE_CTRL:
+	case CS35L36_PDM_CH_SEL:
+	case CS35L36_AMP_NG_CTRL:
+	case CS35L36_PDM_HIGHFILT_CTRL:
+	case CS35L36_INT1_STATUS:
+	case CS35L36_INT2_STATUS:
+	case CS35L36_INT3_STATUS:
+	case CS35L36_INT4_STATUS:
+	case CS35L36_INT1_RAW_STATUS:
+	case CS35L36_INT2_RAW_STATUS:
+	case CS35L36_INT3_RAW_STATUS:
+	case CS35L36_INT4_RAW_STATUS:
+	case CS35L36_INT1_MASK:
+	case CS35L36_INT2_MASK:
+	case CS35L36_INT3_MASK:
+	case CS35L36_INT4_MASK:
+	case CS35L36_INT1_EDGE_LVL_CTRL:
+	case CS35L36_INT3_EDGE_LVL_CTRL:
+	case CS35L36_PAC_INT_STATUS:
+	case CS35L36_PAC_INT_RAW_STATUS:
+	case CS35L36_PAC_INT_FLUSH_CTRL:
+	case CS35L36_PAC_INT0_CTRL:
+	case CS35L36_PAC_INT1_CTRL:
+	case CS35L36_PAC_INT2_CTRL:
+	case CS35L36_PAC_INT3_CTRL:
+	case CS35L36_PAC_INT4_CTRL:
+	case CS35L36_PAC_INT5_CTRL:
+	case CS35L36_PAC_INT6_CTRL:
+	case CS35L36_PAC_INT7_CTRL:
+		return true;
+	default:
+		if (reg >= CS35L36_PAC_PMEM_WORD0 &&
+			reg <= CS35L36_PAC_PMEM_WORD1023)
+			return true;
+		else
+			return false;
+	}
+}
+
+bool cs35l36_precious_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS35L36_TESTKEY_CTRL:
+	case CS35L36_USERKEY_CTL:
+	case CS35L36_TST_FS_MON0:
+		return true;
+	default:
+		return false;
+	}
+}
+
+bool cs35l36_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case CS35L36_SW_RESET:
+	case CS35L36_SW_REV:
+	case CS35L36_HW_REV:
+	case CS35L36_TESTKEY_CTRL:
+	case CS35L36_USERKEY_CTL:
+	case CS35L36_DEVICE_ID:
+	case CS35L36_FAB_ID:
+	case CS35L36_REV_ID:
+	case CS35L36_INT1_STATUS:
+	case CS35L36_INT2_STATUS:
+	case CS35L36_INT3_STATUS:
+	case CS35L36_INT4_STATUS:
+	case CS35L36_INT1_RAW_STATUS:
+	case CS35L36_INT2_RAW_STATUS:
+	case CS35L36_INT3_RAW_STATUS:
+	case CS35L36_INT4_RAW_STATUS:
+	case CS35L36_INT1_MASK:
+	case CS35L36_INT2_MASK:
+	case CS35L36_INT3_MASK:
+	case CS35L36_INT4_MASK:
+	case CS35L36_INT1_EDGE_LVL_CTRL:
+	case CS35L36_INT3_EDGE_LVL_CTRL:
+	case CS35L36_PAC_INT_STATUS:
+	case CS35L36_PAC_INT_RAW_STATUS:
+	case CS35L36_PAC_INT_FLUSH_CTRL:
+		return true;
+	default:
+		if (reg >= CS35L36_PAC_PMEM_WORD0 &&
+			reg <= CS35L36_PAC_PMEM_WORD1023)
+			return true;
+		else
+			return false;
+	}
+}
+
+static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 25, 0);
+static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+
+static const char * const cs35l36_pcm_sftramp_text[] =  {
+	"Off", ".5ms", "1ms", "2ms", "4ms", "8ms", "15ms", "30ms"};
+
+static SOC_ENUM_SINGLE_DECL(pcm_sft_ramp, CS35L36_AMP_DIG_VOL_CTRL, 0,
+			    cs35l36_pcm_sftramp_text);
+
+static int cs35l36_ldm_sel_get(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+			snd_soc_kcontrol_component(kcontrol);
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+
+	ucontrol->value.integer.value[0] = cs35l36->ldm_mode_sel;
+
+	return 0;
+}
+
+static int cs35l36_ldm_sel_put(struct snd_kcontrol *kcontrol,
+			       struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_component *component =
+			snd_soc_kcontrol_component(kcontrol);
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+	int val = (ucontrol->value.integer.value[0]) ? CS35L36_NG_AMP_EN_MASK :
+						       0;
+
+	cs35l36->ldm_mode_sel = val;
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_NG_CFG,
+			   CS35L36_NG_AMP_EN_MASK, val);
+
+	return 0;
+}
+
+static const struct snd_kcontrol_new cs35l36_aud_controls[] = {
+	SOC_SINGLE_SX_TLV("Digital PCM Volume", CS35L36_AMP_DIG_VOL_CTRL,
+		3, 0x4D0, 0x390, dig_vol_tlv),
+	SOC_SINGLE_TLV("Analog PCM Volume", CS35L36_AMP_GAIN_CTRL, 5, 0x13, 0,
+		amp_gain_tlv),
+	SOC_ENUM("PCM Soft Ramp", pcm_sft_ramp),
+	SOC_SINGLE("Amp Gain Zero-Cross Switch", CS35L36_AMP_GAIN_CTRL,
+		CS35L36_AMP_ZC_SHIFT, 1, 0),
+	SOC_SINGLE("PDM LDM Enter Ramp Switch", CS35L36_DAC_MSM_CFG,
+		CS35L36_PDM_LDM_ENTER_SHIFT, 1, 0),
+	SOC_SINGLE("PDM LDM Exit Ramp Switch", CS35L36_DAC_MSM_CFG,
+		CS35L36_PDM_LDM_EXIT_SHIFT, 1, 0),
+	SOC_SINGLE_BOOL_EXT("LDM Select Switch", 0, cs35l36_ldm_sel_get,
+		cs35l36_ldm_sel_put),
+};
+
+static int cs35l36_main_amp_event(struct snd_soc_dapm_widget *w,
+				  struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+			snd_soc_dapm_to_component(w->dapm);
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+	u32 reg;
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		regmap_update_bits(cs35l36->regmap, CS35L36_PWR_CTRL1,
+				   CS35L36_GLOBAL_EN_MASK,
+				   1 << CS35L36_GLOBAL_EN_SHIFT);
+
+		usleep_range(2000, 2100);
+
+		regmap_read(cs35l36->regmap, CS35L36_INT4_RAW_STATUS, &reg);
+
+		if (WARN_ON_ONCE(reg & CS35L36_PLL_UNLOCK_MASK))
+			dev_crit(cs35l36->dev, "PLL Unlocked\n");
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_ASP_RX1_SEL,
+				   CS35L36_PCM_RX_SEL_MASK,
+				   CS35L36_PCM_RX_SEL_PCM);
+		regmap_update_bits(cs35l36->regmap, CS35L36_AMP_OUT_MUTE,
+				   CS35L36_AMP_MUTE_MASK,
+				   0 << CS35L36_AMP_MUTE_SHIFT);
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		regmap_update_bits(cs35l36->regmap, CS35L36_ASP_RX1_SEL,
+				   CS35L36_PCM_RX_SEL_MASK,
+				   CS35L36_PCM_RX_SEL_ZERO);
+		regmap_update_bits(cs35l36->regmap, CS35L36_AMP_OUT_MUTE,
+				   CS35L36_AMP_MUTE_MASK,
+				   1 << CS35L36_AMP_MUTE_SHIFT);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		regmap_update_bits(cs35l36->regmap, CS35L36_PWR_CTRL1,
+				   CS35L36_GLOBAL_EN_MASK,
+				   0 << CS35L36_GLOBAL_EN_SHIFT);
+
+		usleep_range(2000, 2100);
+		break;
+	default:
+		dev_dbg(component->dev, "Invalid event = 0x%x\n", event);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cs35l36_boost_event(struct snd_soc_dapm_widget *w,
+			       struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_component *component =
+			snd_soc_dapm_to_component(w->dapm);
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+
+	switch (event) {
+	case SND_SOC_DAPM_POST_PMU:
+		if (!cs35l36->pdata.extern_boost)
+			regmap_update_bits(cs35l36->regmap, CS35L36_PWR_CTRL2,
+					   CS35L36_BST_EN_MASK,
+					   CS35L36_BST_EN <<
+					   CS35L36_BST_EN_SHIFT);
+		break;
+	case SND_SOC_DAPM_POST_PMD:
+		if (!cs35l36->pdata.extern_boost)
+			regmap_update_bits(cs35l36->regmap, CS35L36_PWR_CTRL2,
+					   CS35L36_BST_EN_MASK,
+					   CS35L36_BST_DIS_VP <<
+					   CS35L36_BST_EN_SHIFT);
+		break;
+	default:
+		dev_dbg(component->dev, "Invalid event = 0x%x\n", event);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const char * const cs35l36_chan_text[] = {
+	"RX1",
+	"RX2",
+};
+
+static SOC_ENUM_SINGLE_DECL(chansel_enum, CS35L36_ASP_RX1_SLOT, 0,
+			    cs35l36_chan_text);
+
+static const struct snd_kcontrol_new cs35l36_chan_mux =
+		SOC_DAPM_ENUM("Input Mux", chansel_enum);
+
+static const struct snd_kcontrol_new amp_enable_ctrl =
+		SOC_DAPM_SINGLE_AUTODISABLE("Switch", CS35L36_AMP_OUT_MUTE,
+					    CS35L36_AMP_MUTE_SHIFT, 1, 1);
+
+static const struct snd_kcontrol_new boost_ctrl =
+		SOC_DAPM_SINGLE_VIRT("Switch", 1);
+
+static const char * const asp_tx_src_text[] = {
+	"Zero Fill", "ASPRX1", "VMON", "IMON", "ERRVOL", "VPMON", "VBSTMON"
+};
+
+static const unsigned int asp_tx_src_values[] = {
+	0x00, 0x08, 0x18, 0x19, 0x20, 0x28, 0x29
+};
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx1_src_enum, CS35L36_ASP_TX1_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx1_src =
+		SOC_DAPM_ENUM("ASPTX1SRC", asp_tx1_src_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx2_src_enum, CS35L36_ASP_TX2_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx2_src =
+		SOC_DAPM_ENUM("ASPTX2SRC", asp_tx2_src_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx3_src_enum, CS35L36_ASP_TX3_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx3_src =
+		SOC_DAPM_ENUM("ASPTX3SRC", asp_tx3_src_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx4_src_enum, CS35L36_ASP_TX4_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx4_src =
+		SOC_DAPM_ENUM("ASPTX4SRC", asp_tx4_src_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx5_src_enum, CS35L36_ASP_TX5_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx5_src =
+		SOC_DAPM_ENUM("ASPTX5SRC", asp_tx5_src_enum);
+
+static SOC_VALUE_ENUM_SINGLE_DECL(asp_tx6_src_enum, CS35L36_ASP_TX6_SEL, 0,
+				  CS35L36_APS_TX_SEL_MASK, asp_tx_src_text,
+				  asp_tx_src_values);
+
+static const struct snd_kcontrol_new asp_tx6_src =
+		SOC_DAPM_ENUM("ASPTX6SRC", asp_tx6_src_enum);
+
+static const struct snd_soc_dapm_widget cs35l36_dapm_widgets[] = {
+	SND_SOC_DAPM_MUX("Channel Mux", SND_SOC_NOPM, 0, 0, &cs35l36_chan_mux),
+	SND_SOC_DAPM_AIF_IN("SDIN", NULL, 0, CS35L36_ASP_RX_TX_EN, 16, 0),
+
+	SND_SOC_DAPM_OUT_DRV_E("Main AMP", CS35L36_PWR_CTRL2, 0, 0, NULL, 0,
+			       cs35l36_main_amp_event, SND_SOC_DAPM_POST_PMD |
+			       SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD),
+
+	SND_SOC_DAPM_OUTPUT("SPK"),
+	SND_SOC_DAPM_SWITCH("AMP Enable", SND_SOC_NOPM, 0, 1, &amp_enable_ctrl),
+	SND_SOC_DAPM_MIXER("CLASS H", CS35L36_PWR_CTRL3, 4, 0, NULL, 0),
+	SND_SOC_DAPM_SWITCH_E("BOOST Enable", SND_SOC_NOPM, 0, 0, &boost_ctrl,
+			      cs35l36_boost_event, SND_SOC_DAPM_POST_PMD |
+			      SND_SOC_DAPM_POST_PMU),
+
+	SND_SOC_DAPM_AIF_OUT("ASPTX1", NULL, 0, CS35L36_ASP_RX_TX_EN, 0, 0),
+	SND_SOC_DAPM_AIF_OUT("ASPTX2", NULL, 1, CS35L36_ASP_RX_TX_EN, 1, 0),
+	SND_SOC_DAPM_AIF_OUT("ASPTX3", NULL, 2, CS35L36_ASP_RX_TX_EN, 2, 0),
+	SND_SOC_DAPM_AIF_OUT("ASPTX4", NULL, 3, CS35L36_ASP_RX_TX_EN, 3, 0),
+	SND_SOC_DAPM_AIF_OUT("ASPTX5", NULL, 4, CS35L36_ASP_RX_TX_EN, 4, 0),
+	SND_SOC_DAPM_AIF_OUT("ASPTX6", NULL, 5, CS35L36_ASP_RX_TX_EN, 5, 0),
+
+	SND_SOC_DAPM_MUX("ASPTX1SRC", SND_SOC_NOPM, 0, 0, &asp_tx1_src),
+	SND_SOC_DAPM_MUX("ASPTX2SRC", SND_SOC_NOPM, 0, 0, &asp_tx2_src),
+	SND_SOC_DAPM_MUX("ASPTX3SRC", SND_SOC_NOPM, 0, 0, &asp_tx3_src),
+	SND_SOC_DAPM_MUX("ASPTX4SRC", SND_SOC_NOPM, 0, 0, &asp_tx4_src),
+	SND_SOC_DAPM_MUX("ASPTX5SRC", SND_SOC_NOPM, 0, 0, &asp_tx5_src),
+	SND_SOC_DAPM_MUX("ASPTX6SRC", SND_SOC_NOPM, 0, 0, &asp_tx6_src),
+
+	SND_SOC_DAPM_ADC("VMON ADC", NULL, CS35L36_PWR_CTRL2, 12, 0),
+	SND_SOC_DAPM_ADC("IMON ADC", NULL, CS35L36_PWR_CTRL2, 13, 0),
+	SND_SOC_DAPM_ADC("VPMON ADC", NULL, CS35L36_PWR_CTRL2, 8, 0),
+	SND_SOC_DAPM_ADC("VBSTMON ADC", NULL, CS35L36_PWR_CTRL2, 9, 0),
+
+	SND_SOC_DAPM_INPUT("VP"),
+	SND_SOC_DAPM_INPUT("VBST"),
+	SND_SOC_DAPM_INPUT("VSENSE"),
+};
+
+static const struct snd_soc_dapm_route cs35l36_audio_map[] = {
+	{"VPMON ADC", NULL, "VP"},
+	{"VBSTMON ADC", NULL, "VBST"},
+	{"IMON ADC", NULL, "VSENSE"},
+	{"VMON ADC", NULL, "VSENSE"},
+
+	{"ASPTX1SRC", "IMON", "IMON ADC"},
+	{"ASPTX1SRC", "VMON", "VMON ADC"},
+	{"ASPTX1SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX1SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX2SRC", "IMON", "IMON ADC"},
+	{"ASPTX2SRC", "VMON", "VMON ADC"},
+	{"ASPTX2SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX2SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX3SRC", "IMON", "IMON ADC"},
+	{"ASPTX3SRC", "VMON", "VMON ADC"},
+	{"ASPTX3SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX3SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX4SRC", "IMON", "IMON ADC"},
+	{"ASPTX4SRC", "VMON", "VMON ADC"},
+	{"ASPTX4SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX4SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX5SRC", "IMON", "IMON ADC"},
+	{"ASPTX5SRC", "VMON", "VMON ADC"},
+	{"ASPTX5SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX5SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX6SRC", "IMON", "IMON ADC"},
+	{"ASPTX6SRC", "VMON", "VMON ADC"},
+	{"ASPTX6SRC", "VBSTMON", "VBSTMON ADC"},
+	{"ASPTX6SRC", "VPMON", "VPMON ADC"},
+
+	{"ASPTX1", NULL, "ASPTX1SRC"},
+	{"ASPTX2", NULL, "ASPTX2SRC"},
+	{"ASPTX3", NULL, "ASPTX3SRC"},
+	{"ASPTX4", NULL, "ASPTX4SRC"},
+	{"ASPTX5", NULL, "ASPTX5SRC"},
+	{"ASPTX6", NULL, "ASPTX6SRC"},
+
+	{"AMP Capture", NULL, "ASPTX1"},
+	{"AMP Capture", NULL, "ASPTX2"},
+	{"AMP Capture", NULL, "ASPTX3"},
+	{"AMP Capture", NULL, "ASPTX4"},
+	{"AMP Capture", NULL, "ASPTX5"},
+	{"AMP Capture", NULL, "ASPTX6"},
+
+	{"AMP Enable", "Switch", "AMP Playback"},
+	{"SDIN", NULL, "AMP Enable"},
+	{"Channel Mux", "RX1", "SDIN"},
+	{"Channel Mux", "RX2", "SDIN"},
+	{"BOOST Enable", "Switch", "Channel Mux"},
+	{"CLASS H", NULL, "BOOST Enable"},
+	{"Main AMP", NULL, "Channel Mux"},
+	{"Main AMP", NULL, "CLASS H"},
+	{"SPK", NULL, "Main AMP"},
+};
+
+static int cs35l36_set_dai_fmt(struct snd_soc_dai *component_dai,
+			       unsigned int fmt)
+{
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component_dai->component);
+	unsigned int asp_fmt, lrclk_fmt, sclk_fmt, slave_mode, clk_frc;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		slave_mode = 1;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		slave_mode = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_TX_PIN_CTRL,
+				CS35L36_SCLK_MSTR_MASK,
+				slave_mode << CS35L36_SCLK_MSTR_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_RATE_CTRL,
+				CS35L36_LRCLK_MSTR_MASK,
+				slave_mode << CS35L36_LRCLK_MSTR_SHIFT);
+
+	switch (fmt & SND_SOC_DAIFMT_CLOCK_MASK) {
+	case SND_SOC_DAIFMT_CONT:
+		clk_frc = 1;
+		break;
+	case SND_SOC_DAIFMT_GATED:
+		clk_frc = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_TX_PIN_CTRL,
+			   CS35L36_SCLK_FRC_MASK, clk_frc <<
+			   CS35L36_SCLK_FRC_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_RATE_CTRL,
+			   CS35L36_LRCLK_FRC_MASK, clk_frc <<
+			   CS35L36_LRCLK_FRC_SHIFT);
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_DSP_A:
+		asp_fmt = 0;
+		break;
+	case SND_SOC_DAIFMT_I2S:
+		asp_fmt = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_IF:
+		lrclk_fmt = 1;
+		sclk_fmt = 0;
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		lrclk_fmt = 0;
+		sclk_fmt = 1;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		lrclk_fmt = 1;
+		sclk_fmt = 1;
+		break;
+	case SND_SOC_DAIFMT_NB_NF:
+		lrclk_fmt = 0;
+		sclk_fmt = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_RATE_CTRL,
+			   CS35L36_LRCLK_INV_MASK,
+			   lrclk_fmt << CS35L36_LRCLK_INV_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_TX_PIN_CTRL,
+			   CS35L36_SCLK_INV_MASK,
+			   sclk_fmt << CS35L36_SCLK_INV_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_ASP_FORMAT,
+			   CS35L36_ASP_FMT_MASK, asp_fmt);
+
+	return 0;
+}
+
+struct cs35l36_global_fs_config {
+	int rate;
+	int fs_cfg;
+};
+
+static const struct cs35l36_global_fs_config cs35l36_fs_rates[] = {
+	{12000, 0x01},
+	{24000, 0x02},
+	{48000, 0x03},
+	{96000, 0x04},
+	{192000, 0x05},
+	{384000, 0x06},
+	{11025, 0x09},
+	{22050, 0x0A},
+	{44100, 0x0B},
+	{88200, 0x0C},
+	{176400, 0x0D},
+	{8000, 0x11},
+	{16000, 0x12},
+	{32000, 0x13},
+};
+
+static int cs35l36_pcm_hw_params(struct snd_pcm_substream *substream,
+				 struct snd_pcm_hw_params *params,
+				 struct snd_soc_dai *dai)
+{
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(dai->component);
+	unsigned int asp_width, global_fs = params_rate(params);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs35l36_fs_rates); i++) {
+		if (global_fs == cs35l36_fs_rates[i].rate)
+			regmap_update_bits(cs35l36->regmap,
+					   CS35L36_GLOBAL_CLK_CTRL,
+					   CS35L36_GLOBAL_FS_MASK,
+					   cs35l36_fs_rates[i].fs_cfg <<
+					   CS35L36_GLOBAL_FS_SHIFT);
+	}
+
+	switch (params_width(params)) {
+	case 16:
+		asp_width = CS35L36_ASP_WIDTH_16;
+		break;
+	case 24:
+		asp_width = CS35L36_ASP_WIDTH_24;
+		break;
+	case 32:
+		asp_width = CS35L36_ASP_WIDTH_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+		regmap_update_bits(cs35l36->regmap, CS35L36_ASP_FRAME_CTRL,
+				   CS35L36_ASP_RX_WIDTH_MASK,
+				   asp_width << CS35L36_ASP_RX_WIDTH_SHIFT);
+	} else {
+		regmap_update_bits(cs35l36->regmap, CS35L36_ASP_FRAME_CTRL,
+				   CS35L36_ASP_TX_WIDTH_MASK,
+				   asp_width << CS35L36_ASP_TX_WIDTH_SHIFT);
+	}
+
+	return 0;
+}
+
+static int cs35l36_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id,
+				  unsigned int freq, int dir)
+{
+	struct snd_soc_component *component = dai->component;
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+	int fs1, fs2;
+
+	if (freq > CS35L36_FS_NOM_6MHZ) {
+		fs1 = CS35L36_FS1_DEFAULT_VAL;
+		fs2 = CS35L36_FS2_DEFAULT_VAL;
+	} else {
+		fs1 = 3 * ((CS35L36_FS_NOM_6MHZ * 4 + freq - 1) / freq) + 4;
+		fs2 = 5 * ((CS35L36_FS_NOM_6MHZ * 4 + freq - 1) / freq) + 4;
+	}
+
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			CS35L36_TEST_UNLOCK1);
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			CS35L36_TEST_UNLOCK2);
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_TST_FS_MON0,
+			   CS35L36_FS1_WINDOW_MASK | CS35L36_FS2_WINDOW_MASK,
+			   fs1 | (fs2 << CS35L36_FS2_WINDOW_SHIFT));
+
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			CS35L36_TEST_LOCK1);
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			CS35L36_TEST_LOCK2);
+	return 0;
+}
+
+static const struct cs35l36_pll_config *cs35l36_get_clk_config(
+		struct cs35l36_private *cs35l36, int freq)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cs35l36_pll_sysclk); i++) {
+		if (cs35l36_pll_sysclk[i].freq == freq)
+			return &cs35l36_pll_sysclk[i];
+	}
+
+	return NULL;
+}
+
+static const unsigned int cs35l36_src_rates[] = {
+	8000, 12000, 11025, 16000, 22050, 24000, 32000,
+	44100, 48000, 88200, 96000, 176400, 192000, 384000
+};
+
+static const struct snd_pcm_hw_constraint_list cs35l36_constraints = {
+	.count  = ARRAY_SIZE(cs35l36_src_rates),
+	.list   = cs35l36_src_rates,
+};
+
+static int cs35l36_pcm_startup(struct snd_pcm_substream *substream,
+			       struct snd_soc_dai *dai)
+{
+	snd_pcm_hw_constraint_list(substream->runtime, 0,
+				SNDRV_PCM_HW_PARAM_RATE, &cs35l36_constraints);
+
+	return 0;
+}
+
+static const struct snd_soc_dai_ops cs35l36_ops = {
+	.startup = cs35l36_pcm_startup,
+	.set_fmt = cs35l36_set_dai_fmt,
+	.hw_params = cs35l36_pcm_hw_params,
+	.set_sysclk = cs35l36_dai_set_sysclk,
+};
+
+static struct snd_soc_dai_driver cs35l36_dai[] = {
+	{
+		.name = "cs35l36-pcm",
+		.id = 0,
+		.playback = {
+			.stream_name = "AMP Playback",
+			.channels_min = 1,
+			.channels_max = 8,
+			.rates = SNDRV_PCM_RATE_KNOT,
+			.formats = CS35L36_RX_FORMATS,
+		},
+		.capture = {
+			.stream_name = "AMP Capture",
+			.channels_min = 1,
+			.channels_max = 8,
+			.rates = SNDRV_PCM_RATE_KNOT,
+			.formats = CS35L36_TX_FORMATS,
+		},
+		.ops = &cs35l36_ops,
+		.symmetric_rates = 1,
+	},
+};
+
+static int cs35l36_component_set_sysclk(struct snd_soc_component *component,
+				int clk_id, int source, unsigned int freq,
+				int dir)
+{
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+	const struct cs35l36_pll_config *clk_cfg;
+	int prev_clksrc;
+	bool pdm_switch;
+
+	prev_clksrc = cs35l36->clksrc;
+
+	switch (clk_id) {
+	case 0:
+		cs35l36->clksrc = CS35L36_PLLSRC_SCLK;
+		break;
+	case 1:
+		cs35l36->clksrc = CS35L36_PLLSRC_LRCLK;
+		break;
+	case 2:
+		cs35l36->clksrc = CS35L36_PLLSRC_PDMCLK;
+		break;
+	case 3:
+		cs35l36->clksrc = CS35L36_PLLSRC_SELF;
+		break;
+	case 4:
+		cs35l36->clksrc = CS35L36_PLLSRC_MCLK;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	clk_cfg = cs35l36_get_clk_config(cs35l36, freq);
+	if (clk_cfg == NULL) {
+		dev_err(component->dev, "Invalid CLK Config Freq: %d\n", freq);
+		return -EINVAL;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_PLL_OPENLOOP_MASK,
+			   1 << CS35L36_PLL_OPENLOOP_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_REFCLK_FREQ_MASK,
+			   clk_cfg->clk_cfg << CS35L36_REFCLK_FREQ_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_PLL_REFCLK_EN_MASK,
+			   0 << CS35L36_PLL_REFCLK_EN_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_PLL_CLK_SEL_MASK,
+			   cs35l36->clksrc);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_PLL_OPENLOOP_MASK,
+			   0 << CS35L36_PLL_OPENLOOP_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PLL_CLK_CTRL,
+			   CS35L36_PLL_REFCLK_EN_MASK,
+			   1 << CS35L36_PLL_REFCLK_EN_SHIFT);
+
+	if (cs35l36->rev_id == CS35L36_REV_A0) {
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK2);
+
+		regmap_write(cs35l36->regmap, CS35L36_DCO_CTRL, 0x00036DA8);
+		regmap_write(cs35l36->regmap, CS35L36_MISC_CTRL, 0x0100EE0E);
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_PLL_LOOP_PARAMS,
+				   CS35L36_PLL_IGAIN_MASK,
+				   CS35L36_PLL_IGAIN <<
+				   CS35L36_PLL_IGAIN_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PLL_LOOP_PARAMS,
+				   CS35L36_PLL_FFL_IGAIN_MASK,
+				   clk_cfg->fll_igain);
+
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_LOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_LOCK2);
+	}
+
+	if (cs35l36->clksrc == CS35L36_PLLSRC_PDMCLK) {
+		pdm_switch = cs35l36->ldm_mode_sel &&
+			     (prev_clksrc != CS35L36_PLLSRC_PDMCLK);
+
+		if (pdm_switch)
+			regmap_update_bits(cs35l36->regmap, CS35L36_NG_CFG,
+					   CS35L36_NG_DELAY_MASK,
+					   0 << CS35L36_NG_DELAY_SHIFT);
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_DAC_MSM_CFG,
+				   CS35L36_PDM_MODE_MASK,
+				   1 << CS35L36_PDM_MODE_SHIFT);
+
+		if (pdm_switch)
+			regmap_update_bits(cs35l36->regmap, CS35L36_NG_CFG,
+					   CS35L36_NG_DELAY_MASK,
+					   3 << CS35L36_NG_DELAY_SHIFT);
+	} else {
+		pdm_switch = cs35l36->ldm_mode_sel &&
+			     (prev_clksrc == CS35L36_PLLSRC_PDMCLK);
+
+		if (pdm_switch)
+			regmap_update_bits(cs35l36->regmap, CS35L36_NG_CFG,
+					   CS35L36_NG_DELAY_MASK,
+					   0 << CS35L36_NG_DELAY_SHIFT);
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_DAC_MSM_CFG,
+				   CS35L36_PDM_MODE_MASK,
+				   0 << CS35L36_PDM_MODE_SHIFT);
+
+		if (pdm_switch)
+			regmap_update_bits(cs35l36->regmap, CS35L36_NG_CFG,
+					   CS35L36_NG_DELAY_MASK,
+					   3 << CS35L36_NG_DELAY_SHIFT);
+	}
+
+	return 0;
+}
+
+static int cs35l36_boost_inductor(struct cs35l36_private *cs35l36, int inductor)
+{
+	regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_COEFF,
+			   CS35L36_BSTCVRT_K1_MASK, 0x3C);
+	regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_COEFF,
+			   CS35L36_BSTCVRT_K2_MASK,
+			   0x3C << CS35L36_BSTCVRT_K2_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_SW_FREQ,
+			   CS35L36_BSTCVRT_CCMFREQ_MASK, 0x00);
+
+	switch (inductor) {
+	case 1000: /* 1 uH */
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_SLOPE_LBST,
+				   CS35L36_BSTCVRT_SLOPE_MASK,
+				   0x75 << CS35L36_BSTCVRT_SLOPE_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_SLOPE_LBST,
+				   CS35L36_BSTCVRT_LBSTVAL_MASK, 0x00);
+		break;
+	case 1200: /* 1.2 uH */
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_SLOPE_LBST,
+				   CS35L36_BSTCVRT_SLOPE_MASK,
+				   0x6B << CS35L36_BSTCVRT_SLOPE_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_SLOPE_LBST,
+				   CS35L36_BSTCVRT_LBSTVAL_MASK, 0x01);
+		break;
+	default:
+		dev_err(cs35l36->dev, "%s Invalid Inductor Value %d uH\n",
+			__func__, inductor);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cs35l36_component_probe(struct snd_soc_component *component)
+{
+	struct cs35l36_private *cs35l36 =
+			snd_soc_component_get_drvdata(component);
+	int ret = 0;
+
+	if ((cs35l36->rev_id == CS35L36_REV_A0) && cs35l36->pdata.dcm_mode) {
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_DCM_CTRL,
+				   CS35L36_DCM_AUTO_MASK,
+				   CS35L36_DCM_AUTO_MASK);
+
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK2);
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_BST_TST_MANUAL,
+				   CS35L36_BST_MAN_IPKCOMP_MASK,
+				   0 << CS35L36_BST_MAN_IPKCOMP_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_BST_TST_MANUAL,
+				   CS35L36_BST_MAN_IPKCOMP_EN_MASK,
+				   CS35L36_BST_MAN_IPKCOMP_EN_MASK);
+
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+				CS35L36_TEST_LOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+				CS35L36_TEST_LOCK2);
+	}
+
+	if (cs35l36->pdata.amp_pcm_inv)
+		regmap_update_bits(cs35l36->regmap, CS35L36_AMP_DIG_VOL_CTRL,
+				   CS35L36_AMP_PCM_INV_MASK,
+				   CS35L36_AMP_PCM_INV_MASK);
+
+	if (cs35l36->pdata.multi_amp_mode)
+		regmap_update_bits(cs35l36->regmap, CS35L36_ASP_TX_PIN_CTRL,
+				   CS35L36_ASP_TX_HIZ_MASK,
+				   CS35L36_ASP_TX_HIZ_MASK);
+
+	if (cs35l36->pdata.imon_pol_inv)
+		regmap_update_bits(cs35l36->regmap, CS35L36_VI_SPKMON_FILT,
+				   CS35L36_IMON_POL_MASK, 0);
+
+	if (cs35l36->pdata.vmon_pol_inv)
+		regmap_update_bits(cs35l36->regmap, CS35L36_VI_SPKMON_FILT,
+				   CS35L36_VMON_POL_MASK, 0);
+
+	if (cs35l36->pdata.bst_vctl)
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_VCTRL1,
+				   CS35L35_BSTCVRT_CTL_MASK,
+				   cs35l36->pdata.bst_vctl);
+
+	if (cs35l36->pdata.bst_vctl_sel)
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_VCTRL2,
+				   CS35L35_BSTCVRT_CTL_SEL_MASK,
+				   cs35l36->pdata.bst_vctl_sel);
+
+	if (cs35l36->pdata.bst_ipk)
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_PEAK_CUR,
+				   CS35L36_BST_IPK_MASK,
+				   cs35l36->pdata.bst_ipk);
+
+	if (cs35l36->pdata.boost_ind) {
+		ret = cs35l36_boost_inductor(cs35l36, cs35l36->pdata.boost_ind);
+		if (ret < 0) {
+			dev_err(cs35l36->dev,
+				"Boost inductor config failed(%d)\n", ret);
+			return ret;
+		}
+	}
+
+	if (cs35l36->pdata.temp_warn_thld)
+		regmap_update_bits(cs35l36->regmap, CS35L36_DTEMP_WARN_THLD,
+				   CS35L36_TEMP_THLD_MASK,
+				   cs35l36->pdata.temp_warn_thld);
+
+	if (cs35l36->pdata.irq_drv_sel)
+		regmap_update_bits(cs35l36->regmap, CS35L36_PAD_INTERFACE,
+				   CS35L36_INT_DRV_SEL_MASK,
+				   cs35l36->pdata.irq_drv_sel <<
+				   CS35L36_INT_DRV_SEL_SHIFT);
+
+	if (cs35l36->pdata.irq_gpio_sel)
+		regmap_update_bits(cs35l36->regmap, CS35L36_PAD_INTERFACE,
+				   CS35L36_INT_GPIO_SEL_MASK,
+				   cs35l36->pdata.irq_gpio_sel <<
+				   CS35L36_INT_GPIO_SEL_SHIFT);
+
+	/*
+	 * Rev B0 has 2 versions
+	 * L36 is 10V
+	 * L37 is 12V
+	 * If L36 we need to clamp some values for safety
+	 * after probe has setup dt values. We want to make
+	 * sure we dont miss any values set in probe
+	 */
+	if (cs35l36->chip_version == CS35L36_10V_L36) {
+		regmap_update_bits(cs35l36->regmap,
+				   CS35L36_BSTCVRT_OVERVOLT_CTRL,
+				   CS35L36_BST_OVP_THLD_MASK,
+				   CS35L36_BST_OVP_THLD_11V);
+
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_UNLOCK2);
+
+		regmap_update_bits(cs35l36->regmap, CS35L36_BST_ANA2_TEST,
+				   CS35L36_BST_OVP_TRIM_MASK,
+				   CS35L36_BST_OVP_TRIM_11V <<
+				   CS35L36_BST_OVP_TRIM_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_VCTRL2,
+				   CS35L36_BST_CTRL_LIM_MASK,
+				   1 << CS35L36_BST_CTRL_LIM_SHIFT);
+		regmap_update_bits(cs35l36->regmap, CS35L36_BSTCVRT_VCTRL1,
+				   CS35L35_BSTCVRT_CTL_MASK,
+				   CS35L36_BST_CTRL_10V_CLAMP);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_LOCK1);
+		regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+			     CS35L36_TEST_LOCK2);
+	}
+
+	/*
+	 * RevA and B require the disabling of
+	 * SYNC_GLOBAL_OVR when GLOBAL_EN = 0.
+	 * Just turn it off from default
+	 */
+	regmap_update_bits(cs35l36->regmap, CS35L36_CTRL_OVRRIDE,
+			   CS35L36_SYNC_GLOBAL_OVR_MASK,
+			   0 << CS35L36_SYNC_GLOBAL_OVR_SHIFT);
+
+	return 0;
+}
+
+static const struct snd_soc_component_driver soc_component_dev_cs35l36 = {
+	.probe			= &cs35l36_component_probe,
+	.set_sysclk		= cs35l36_component_set_sysclk,
+	.dapm_widgets		= cs35l36_dapm_widgets,
+	.num_dapm_widgets	= ARRAY_SIZE(cs35l36_dapm_widgets),
+	.dapm_routes		= cs35l36_audio_map,
+	.num_dapm_routes	= ARRAY_SIZE(cs35l36_audio_map),
+	.controls		= cs35l36_aud_controls,
+	.num_controls		= ARRAY_SIZE(cs35l36_aud_controls),
+	.idle_bias_on		= 1,
+	.use_pmdown_time	= 1,
+	.endianness		= 1,
+	.non_legacy_dai_naming	= 1,
+};
+
+static struct regmap_config cs35l36_regmap = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = CS35L36_PAC_PMEM_WORD1023,
+	.reg_defaults = cs35l36_reg,
+	.num_reg_defaults = ARRAY_SIZE(cs35l36_reg),
+	.precious_reg = cs35l36_precious_reg,
+	.volatile_reg = cs35l36_volatile_reg,
+	.readable_reg = cs35l36_readable_reg,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static irqreturn_t cs35l36_irq(int irq, void *data)
+{
+	struct cs35l36_private *cs35l36 = data;
+	unsigned int status[4];
+	unsigned int masks[4];
+	int ret = IRQ_NONE;
+
+	/* ack the irq by reading all status registers */
+	regmap_bulk_read(cs35l36->regmap, CS35L36_INT1_STATUS, status,
+			 ARRAY_SIZE(status));
+
+	regmap_bulk_read(cs35l36->regmap, CS35L36_INT1_MASK, masks,
+			 ARRAY_SIZE(masks));
+
+	/* Check to see if unmasked bits are active */
+	if (!(status[0] & ~masks[0]) && !(status[1] & ~masks[1]) &&
+		!(status[2] & ~masks[2]) && !(status[3] & ~masks[3])) {
+		return IRQ_NONE;
+	}
+
+	/*
+	 * The following interrupts require a
+	 * protection release cycle to get the
+	 * speaker out of Safe-Mode.
+	 */
+	if (status[2] & CS35L36_AMP_SHORT_ERR) {
+		dev_crit(cs35l36->dev, "Amp short error\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_AMP_SHORT_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_AMP_SHORT_ERR_RLS,
+				   CS35L36_AMP_SHORT_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_AMP_SHORT_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT3_STATUS,
+				   CS35L36_AMP_SHORT_ERR,
+				   CS35L36_AMP_SHORT_ERR);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status[0] & CS35L36_TEMP_WARN) {
+		dev_crit(cs35l36->dev, "Over temperature warning\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_WARN_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_WARN_ERR_RLS,
+				   CS35L36_TEMP_WARN_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_WARN_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT1_STATUS,
+				   CS35L36_TEMP_WARN, CS35L36_TEMP_WARN);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status[0] & CS35L36_TEMP_ERR) {
+		dev_crit(cs35l36->dev, "Over temperature error\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, CS35L36_TEMP_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT1_STATUS,
+				   CS35L36_TEMP_ERR, CS35L36_TEMP_ERR);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status[0] & CS35L36_BST_OVP_ERR) {
+		dev_crit(cs35l36->dev, "VBST Over Voltage error\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, CS35L36_TEMP_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_TEMP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT1_STATUS,
+				   CS35L36_BST_OVP_ERR, CS35L36_BST_OVP_ERR);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status[0] & CS35L36_BST_DCM_UVP_ERR) {
+		dev_crit(cs35l36->dev, "DCM VBST Under Voltage Error\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_UVP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_UVP_ERR_RLS,
+				   CS35L36_BST_UVP_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_UVP_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT1_STATUS,
+				   CS35L36_BST_DCM_UVP_ERR,
+				   CS35L36_BST_DCM_UVP_ERR);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status[0] & CS35L36_BST_SHORT_ERR) {
+		dev_crit(cs35l36->dev, "LBST SHORT error!\n");
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_SHORT_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_SHORT_ERR_RLS,
+				   CS35L36_BST_SHORT_ERR_RLS);
+		regmap_update_bits(cs35l36->regmap, CS35L36_PROTECT_REL_ERR,
+				   CS35L36_BST_SHORT_ERR_RLS, 0);
+		regmap_update_bits(cs35l36->regmap, CS35L36_INT1_STATUS,
+				   CS35L36_BST_SHORT_ERR,
+				   CS35L36_BST_SHORT_ERR);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int cs35l36_handle_of_data(struct i2c_client *i2c_client,
+				struct cs35l36_platform_data *pdata)
+{
+	struct device_node *np = i2c_client->dev.of_node;
+	struct cs35l36_vpbr_cfg *vpbr_config = &pdata->vpbr_config;
+	struct device_node *vpbr_node;
+	unsigned int val;
+	int ret;
+
+	if (!np)
+		return 0;
+
+	ret = of_property_read_u32(np, "cirrus,boost-ctl-millivolt", &val);
+	if (!ret) {
+		if (val < 2550 || val > 12000) {
+			dev_err(&i2c_client->dev,
+				"Invalid Boost Voltage %d mV\n", val);
+			return -EINVAL;
+		}
+		pdata->bst_vctl = (((val - 2550) / 100) + 1) << 1;
+	} else {
+		dev_err(&i2c_client->dev,
+			"Unable to find required parameter 'cirrus,boost-ctl-millivolt'");
+		return -EINVAL;
+	}
+
+	ret = of_property_read_u32(np, "cirrus,boost-ctl-select", &val);
+	if (!ret)
+		pdata->bst_vctl_sel = val | CS35L36_VALID_PDATA;
+
+	ret = of_property_read_u32(np, "cirrus,boost-peak-milliamp", &val);
+	if (!ret) {
+		if (val < 1600 || val > 4500) {
+			dev_err(&i2c_client->dev,
+				"Invalid Boost Peak Current %u mA\n", val);
+			return -EINVAL;
+		}
+
+		pdata->bst_ipk = (val - 1600) / 50;
+	} else {
+		dev_err(&i2c_client->dev,
+			"Unable to find required parameter 'cirrus,boost-peak-milliamp'");
+		return -EINVAL;
+	}
+
+	pdata->multi_amp_mode = of_property_read_bool(np,
+					"cirrus,multi-amp-mode");
+
+	pdata->dcm_mode = of_property_read_bool(np,
+					"cirrus,dcm-mode-enable");
+
+	pdata->amp_pcm_inv = of_property_read_bool(np,
+					"cirrus,amp-pcm-inv");
+
+	pdata->imon_pol_inv = of_property_read_bool(np,
+					"cirrus,imon-pol-inv");
+
+	pdata->vmon_pol_inv = of_property_read_bool(np,
+					"cirrus,vmon-pol-inv");
+
+	if (of_property_read_u32(np, "cirrus,temp-warn-threshold", &val) >= 0)
+		pdata->temp_warn_thld = val | CS35L36_VALID_PDATA;
+
+	if (of_property_read_u32(np, "cirrus,boost-ind-nanohenry", &val) >= 0) {
+		pdata->boost_ind = val;
+	} else {
+		dev_err(&i2c_client->dev, "Inductor not specified.\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(np, "cirrus,irq-drive-select", &val) >= 0)
+		pdata->irq_drv_sel = val | CS35L36_VALID_PDATA;
+
+	if (of_property_read_u32(np, "cirrus,irq-gpio-select", &val) >= 0)
+		pdata->irq_gpio_sel = val | CS35L36_VALID_PDATA;
+
+	/* VPBR Config */
+	vpbr_node = of_get_child_by_name(np, "cirrus,vpbr-config");
+	vpbr_config->is_present = vpbr_node ? true : false;
+	if (vpbr_config->is_present) {
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-en",
+					 &val) >= 0)
+			vpbr_config->vpbr_en = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-thld",
+					 &val) >= 0)
+			vpbr_config->vpbr_thld = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-atk-rate",
+					 &val) >= 0)
+			vpbr_config->vpbr_atk_rate = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-atk-vol",
+					 &val) >= 0)
+			vpbr_config->vpbr_atk_vol = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-max-attn",
+					 &val) >= 0)
+			vpbr_config->vpbr_max_attn = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-wait",
+					 &val) >= 0)
+			vpbr_config->vpbr_wait = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-rel-rate",
+					 &val) >= 0)
+			vpbr_config->vpbr_rel_rate = val;
+		if (of_property_read_u32(vpbr_node, "cirrus,vpbr-mute-en",
+					 &val) >= 0)
+			vpbr_config->vpbr_mute_en = val;
+	}
+	of_node_put(vpbr_node);
+
+	return 0;
+}
+
+static int cs35l36_pac(struct cs35l36_private *cs35l36)
+{
+	int ret, count;
+	unsigned int val;
+
+	if (cs35l36->rev_id != CS35L36_REV_B0)
+		return 0;
+
+	/*
+	 * Magic code for internal PAC
+	 */
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+		     CS35L36_TEST_UNLOCK1);
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+		     CS35L36_TEST_UNLOCK2);
+
+	usleep_range(9500, 10500);
+
+	regmap_write(cs35l36->regmap, CS35L36_PAC_CTL1,
+		     CS35L36_PAC_RESET);
+	regmap_write(cs35l36->regmap, CS35L36_PAC_CTL3,
+		     CS35L36_PAC_MEM_ACCESS);
+	regmap_write(cs35l36->regmap, CS35L36_PAC_PMEM_WORD0,
+		     CS35L36_B0_PAC_PATCH);
+
+	regmap_write(cs35l36->regmap, CS35L36_PAC_CTL3,
+		     CS35L36_PAC_MEM_ACCESS_CLR);
+	regmap_write(cs35l36->regmap, CS35L36_PAC_CTL1,
+		     CS35L36_PAC_ENABLE_MASK);
+
+	usleep_range(9500, 10500);
+
+	ret = regmap_read(cs35l36->regmap, CS35L36_INT4_STATUS, &val);
+	if (ret < 0) {
+		dev_err(cs35l36->dev, "Failed to read int4_status %d\n", ret);
+		return ret;
+	}
+
+	count = 0;
+	while (!(val & CS35L36_MCU_CONFIG_CLR)) {
+		usleep_range(100, 200);
+		count++;
+
+		ret = regmap_read(cs35l36->regmap, CS35L36_INT4_STATUS,
+				  &val);
+		if (ret < 0) {
+			dev_err(cs35l36->dev, "Failed to read int4_status %d\n",
+				ret);
+			return ret;
+		}
+
+		if (count >= 100)
+			return -EINVAL;
+	}
+
+	regmap_write(cs35l36->regmap, CS35L36_INT4_STATUS,
+		     CS35L36_MCU_CONFIG_CLR);
+	regmap_update_bits(cs35l36->regmap, CS35L36_PAC_CTL1,
+			   CS35L36_PAC_ENABLE_MASK, 0);
+
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+		     CS35L36_TEST_LOCK1);
+	regmap_write(cs35l36->regmap, CS35L36_TESTKEY_CTRL,
+		     CS35L36_TEST_LOCK2);
+
+	return 0;
+}
+
+static void cs35l36_apply_vpbr_config(struct cs35l36_private *cs35l36)
+{
+	struct cs35l36_platform_data *pdata = &cs35l36->pdata;
+	struct cs35l36_vpbr_cfg *vpbr_config = &pdata->vpbr_config;
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_PWR_CTRL3,
+			   CS35L36_VPBR_EN_MASK,
+			   vpbr_config->vpbr_en <<
+			   CS35L36_VPBR_EN_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_THLD_MASK,
+			   vpbr_config->vpbr_thld <<
+			   CS35L36_VPBR_THLD_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_MAX_ATTN_MASK,
+			   vpbr_config->vpbr_max_attn <<
+			   CS35L36_VPBR_MAX_ATTN_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_ATK_VOL_MASK,
+			   vpbr_config->vpbr_atk_vol <<
+			   CS35L36_VPBR_ATK_VOL_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_ATK_RATE_MASK,
+			   vpbr_config->vpbr_atk_rate <<
+			   CS35L36_VPBR_ATK_RATE_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_WAIT_MASK,
+			   vpbr_config->vpbr_wait <<
+			   CS35L36_VPBR_WAIT_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_REL_RATE_MASK,
+			   vpbr_config->vpbr_rel_rate <<
+			   CS35L36_VPBR_REL_RATE_SHIFT);
+	regmap_update_bits(cs35l36->regmap, CS35L36_VPBR_CFG,
+			   CS35L36_VPBR_MUTE_EN_MASK,
+			   vpbr_config->vpbr_mute_en <<
+			   CS35L36_VPBR_MUTE_EN_SHIFT);
+}
+
+static const struct reg_sequence cs35l36_reva0_errata_patch[] = {
+	{ CS35L36_TESTKEY_CTRL,		CS35L36_TEST_UNLOCK1 },
+	{ CS35L36_TESTKEY_CTRL,		CS35L36_TEST_UNLOCK2 },
+	/* Errata Writes */
+	{ CS35L36_OTP_CTRL1,		0x00002060 },
+	{ CS35L36_OTP_CTRL2,		0x00000001 },
+	{ CS35L36_OTP_CTRL1,		0x00002460 },
+	{ CS35L36_OTP_CTRL2,		0x00000001 },
+	{ 0x00002088,			0x012A1838 },
+	{ 0x00003014,			0x0100EE0E },
+	{ 0x00003008,			0x0008184A },
+	{ 0x00007418,			0x509001C8 },
+	{ 0x00007064,			0x0929A800 },
+	{ 0x00002D10,			0x0002C01C },
+	{ 0x0000410C,			0x00000A11 },
+	{ 0x00006E08,			0x8B19140C },
+	{ 0x00006454,			0x0300000A },
+	{ CS35L36_AMP_NG_CTRL,		0x000020EF },
+	{ 0x00007E34,			0x0000000E },
+	{ 0x0000410C,			0x00000A11 },
+	{ 0x00007410,			0x20514B00 },
+	/* PAC Config */
+	{ CS35L36_CTRL_OVRRIDE,		0x00000000 },
+	{ CS35L36_PAC_INT0_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT1_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT2_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT3_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT4_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT5_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT6_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT7_CTRL,	0x00860001 },
+	{ CS35L36_PAC_INT_FLUSH_CTRL,	0x000000FF },
+	{ CS35L36_TESTKEY_CTRL,		CS35L36_TEST_LOCK1 },
+	{ CS35L36_TESTKEY_CTRL,		CS35L36_TEST_LOCK2 },
+};
+
+static const struct reg_sequence cs35l36_revb0_errata_patch[] = {
+	{ CS35L36_TESTKEY_CTRL,	CS35L36_TEST_UNLOCK1 },
+	{ CS35L36_TESTKEY_CTRL, CS35L36_TEST_UNLOCK2 },
+	{ 0x00007064,		0x0929A800 },
+	{ 0x00007850,		0x00002FA9 },
+	{ 0x00007854,		0x0003F1D5 },
+	{ 0x00007858,		0x0003F5E3 },
+	{ 0x0000785C,		0x00001137 },
+	{ 0x00007860,		0x0001A7A5 },
+	{ 0x00007864,		0x0002F16A },
+	{ 0x00007868,		0x00003E21 },
+	{ 0x00007848,		0x00000001 },
+	{ 0x00003854,		0x05180240 },
+	{ 0x00007418,		0x509001C8 },
+	{ 0x0000394C,		0x028764BD },
+	{ CS35L36_TESTKEY_CTRL,	CS35L36_TEST_LOCK1 },
+	{ CS35L36_TESTKEY_CTRL, CS35L36_TEST_LOCK2 },
+};
+
+static int cs35l36_i2c_probe(struct i2c_client *i2c_client,
+			      const struct i2c_device_id *id)
+{
+	struct cs35l36_private *cs35l36;
+	struct device *dev = &i2c_client->dev;
+	struct cs35l36_platform_data *pdata = dev_get_platdata(dev);
+	struct irq_data *irq_d;
+	int ret, irq_pol, chip_irq_pol, i;
+	u32 reg_id, reg_revid, l37_id_reg;
+
+	cs35l36 = devm_kzalloc(dev, sizeof(struct cs35l36_private), GFP_KERNEL);
+	if (!cs35l36)
+		return -ENOMEM;
+
+	cs35l36->dev = dev;
+
+	i2c_set_clientdata(i2c_client, cs35l36);
+	cs35l36->regmap = devm_regmap_init_i2c(i2c_client, &cs35l36_regmap);
+	if (IS_ERR(cs35l36->regmap)) {
+		ret = PTR_ERR(cs35l36->regmap);
+		dev_err(dev, "regmap_init() failed: %d\n", ret);
+		goto err;
+	}
+
+	cs35l36->num_supplies = ARRAY_SIZE(cs35l36_supplies);
+	for (i = 0; i < ARRAY_SIZE(cs35l36_supplies); i++)
+		cs35l36->supplies[i].supply = cs35l36_supplies[i];
+
+	ret = devm_regulator_bulk_get(dev, cs35l36->num_supplies,
+				      cs35l36->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to request core supplies: %d\n", ret);
+		return ret;
+	}
+
+	if (pdata) {
+		cs35l36->pdata = *pdata;
+	} else {
+		pdata = devm_kzalloc(dev, sizeof(struct cs35l36_platform_data),
+				     GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		if (i2c_client->dev.of_node) {
+			ret = cs35l36_handle_of_data(i2c_client, pdata);
+			if (ret != 0)
+				return ret;
+
+		}
+
+		cs35l36->pdata = *pdata;
+	}
+
+	ret = regulator_bulk_enable(cs35l36->num_supplies, cs35l36->supplies);
+	if (ret != 0) {
+		dev_err(dev, "Failed to enable core supplies: %d\n", ret);
+		return ret;
+	}
+
+	/* returning NULL can be an option if in stereo mode */
+	cs35l36->reset_gpio = devm_gpiod_get_optional(dev, "reset",
+						      GPIOD_OUT_LOW);
+	if (IS_ERR(cs35l36->reset_gpio)) {
+		ret = PTR_ERR(cs35l36->reset_gpio);
+		cs35l36->reset_gpio = NULL;
+		if (ret == -EBUSY) {
+			dev_info(dev, "Reset line busy, assuming shared reset\n");
+		} else {
+			dev_err(dev, "Failed to get reset GPIO: %d\n", ret);
+			goto err_disable_regs;
+		}
+	}
+
+	if (cs35l36->reset_gpio)
+		gpiod_set_value_cansleep(cs35l36->reset_gpio, 1);
+
+	usleep_range(2000, 2100);
+
+	/* initialize amplifier */
+	ret = regmap_read(cs35l36->regmap, CS35L36_SW_RESET, &reg_id);
+	if (ret < 0) {
+		dev_err(dev, "Get Device ID failed %d\n", ret);
+		goto err;
+	}
+
+	if (reg_id != CS35L36_CHIP_ID) {
+		dev_err(dev, "Device ID (%X). Expected ID %X\n", reg_id,
+			CS35L36_CHIP_ID);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	ret = regmap_read(cs35l36->regmap, CS35L36_REV_ID, &reg_revid);
+	if (ret < 0) {
+		dev_err(&i2c_client->dev, "Get Revision ID failed %d\n", ret);
+		goto err;
+	}
+
+	cs35l36->rev_id = reg_revid >> 8;
+
+	ret = regmap_read(cs35l36->regmap, CS35L36_OTP_MEM30, &l37_id_reg);
+	if (ret < 0) {
+		dev_err(&i2c_client->dev, "Failed to read otp_id Register %d\n",
+			ret);
+		return ret;
+	}
+
+	if ((l37_id_reg & CS35L36_OTP_REV_MASK) == CS35L36_OTP_REV_L37)
+		cs35l36->chip_version = CS35L36_12V_L37;
+	else
+		cs35l36->chip_version = CS35L36_10V_L36;
+
+	switch (cs35l36->rev_id) {
+	case CS35L36_REV_A0:
+		ret = regmap_register_patch(cs35l36->regmap,
+				cs35l36_reva0_errata_patch,
+				ARRAY_SIZE(cs35l36_reva0_errata_patch));
+		if (ret < 0) {
+			dev_err(dev, "Failed to apply A0 errata patch %d\n",
+				ret);
+			goto err;
+		}
+		break;
+	case CS35L36_REV_B0:
+		ret = cs35l36_pac(cs35l36);
+		if (ret < 0) {
+			dev_err(dev, "Failed to Trim OTP %d\n", ret);
+			goto err;
+		}
+
+		ret = regmap_register_patch(cs35l36->regmap,
+				cs35l36_revb0_errata_patch,
+				ARRAY_SIZE(cs35l36_revb0_errata_patch));
+		if (ret < 0) {
+			dev_err(dev, "Failed to apply B0 errata patch %d\n",
+				ret);
+			goto err;
+		}
+		break;
+	}
+
+	if (pdata->vpbr_config.is_present)
+		cs35l36_apply_vpbr_config(cs35l36);
+
+	irq_d = irq_get_irq_data(i2c_client->irq);
+	if (IS_ERR(irq_d)) {
+		dev_err(&i2c_client->dev, "Invalid IRQ: %d\n", i2c_client->irq);
+		ret = PTR_ERR(irq_d);
+		goto err;
+	}
+
+	irq_pol = irqd_get_trigger_type(irq_d);
+
+	switch (irq_pol) {
+	case IRQF_TRIGGER_FALLING:
+	case IRQF_TRIGGER_LOW:
+		chip_irq_pol = 0;
+		break;
+	case IRQF_TRIGGER_RISING:
+	case IRQF_TRIGGER_HIGH:
+		chip_irq_pol = 1;
+		break;
+	default:
+		dev_err(cs35l36->dev, "Invalid IRQ polarity: %d\n", irq_pol);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_PAD_INTERFACE,
+			   CS35L36_INT_POL_SEL_MASK,
+			   chip_irq_pol << CS35L36_INT_POL_SEL_SHIFT);
+
+	ret = devm_request_threaded_irq(dev, i2c_client->irq, NULL, cs35l36_irq,
+					IRQF_ONESHOT | irq_pol, "cs35l36",
+					cs35l36);
+	if (ret != 0) {
+		dev_err(dev, "Failed to request IRQ: %d\n", ret);
+		goto err;
+	}
+
+	regmap_update_bits(cs35l36->regmap, CS35L36_PAD_INTERFACE,
+			   CS35L36_INT_OUTPUT_EN_MASK, 1);
+
+	/* Set interrupt masks for critical errors */
+	regmap_write(cs35l36->regmap, CS35L36_INT1_MASK,
+		     CS35L36_INT1_MASK_DEFAULT);
+	regmap_write(cs35l36->regmap, CS35L36_INT3_MASK,
+		     CS35L36_INT3_MASK_DEFAULT);
+
+	dev_info(&i2c_client->dev, "Cirrus Logic CS35L%d, Revision: %02X\n",
+		 cs35l36->chip_version, reg_revid >> 8);
+
+	ret =  devm_snd_soc_register_component(dev, &soc_component_dev_cs35l36,
+					       cs35l36_dai,
+					       ARRAY_SIZE(cs35l36_dai));
+	if (ret < 0) {
+		dev_err(dev, "%s: Register component failed %d\n", __func__,
+			ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	gpiod_set_value_cansleep(cs35l36->reset_gpio, 0);
+
+err_disable_regs:
+	regulator_bulk_disable(cs35l36->num_supplies, cs35l36->supplies);
+	return ret;
+}
+
+static int cs35l36_i2c_remove(struct i2c_client *client)
+{
+	struct cs35l36_private *cs35l36 = i2c_get_clientdata(client);
+
+	/* Reset interrupt masks for device removal */
+	regmap_write(cs35l36->regmap, CS35L36_INT1_MASK,
+		     CS35L36_INT1_MASK_RESET);
+	regmap_write(cs35l36->regmap, CS35L36_INT3_MASK,
+		     CS35L36_INT3_MASK_RESET);
+
+	if (cs35l36->reset_gpio)
+		gpiod_set_value_cansleep(cs35l36->reset_gpio, 0);
+
+	regulator_bulk_disable(cs35l36->num_supplies, cs35l36->supplies);
+
+	return 0;
+}
+static const struct of_device_id cs35l36_of_match[] = {
+	{.compatible = "cirrus,cs35l36"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cs35l36_of_match);
+
+static const struct i2c_device_id cs35l36_id[] = {
+	{"cs35l36", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, cs35l36_id);
+
+static struct i2c_driver cs35l36_i2c_driver = {
+	.driver = {
+		.name = "cs35l36",
+		.of_match_table = cs35l36_of_match,
+	},
+	.id_table = cs35l36_id,
+	.probe = cs35l36_i2c_probe,
+	.remove = cs35l36_i2c_remove,
+};
+module_i2c_driver(cs35l36_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC CS35L36 driver");
+MODULE_AUTHOR("James Schulman, Cirrus Logic Inc, <james.schulman@cirrus.com>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/cs35l36.h b/sound/soc/codecs/cs35l36.h
new file mode 100644
index 000000000000..f6e38c633b93
--- /dev/null
+++ b/sound/soc/codecs/cs35l36.h
@@ -0,0 +1,446 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cs35l36.h -- CS35L36 ALSA SoC audio driver
+ *
+ * Copyright 2018 Cirrus Logic, Inc.
+ *
+ * Author: James Schulman <james.schulman@cirrus.com>
+ *
+ */
+
+#ifndef __CS35L36_H__
+#define __CS35L36_H__
+
+#include <linux/regmap.h>
+
+#define CS35L36_FIRSTREG		0x00000000
+#define CS35L36_LASTREG			0x00E037FC
+#define CS35L36_SW_RESET		0x00000000
+#define CS35L36_SW_REV			0x00000004
+#define CS35L36_HW_REV			0x00000008
+#define CS35L36_TESTKEY_CTRL		0x00000020
+#define CS35L36_USERKEY_CTL		0x00000024
+#define CS35L36_OTP_MEM30		0x00000478
+#define CS35L36_OTP_CTRL1		0x00000500
+#define CS35L36_OTP_CTRL2		0x00000504
+#define CS35L36_OTP_CTRL3		0x00000508
+#define CS35L36_OTP_CTRL4		0x0000050C
+#define CS35L36_OTP_CTRL5		0x00000510
+#define CS35L36_PAC_CTL1		0x00000C00
+#define CS35L36_PAC_CTL2		0x00000C04
+#define CS35L36_PAC_CTL3		0x00000C08
+#define CS35L36_DEVICE_ID		0x00002004
+#define CS35L36_FAB_ID			0x00002008
+#define CS35L36_REV_ID			0x0000200C
+#define CS35L36_PWR_CTRL1		0x00002014
+#define CS35L36_PWR_CTRL2		0x00002018
+#define CS35L36_PWR_CTRL3		0x0000201C
+#define CS35L36_CTRL_OVRRIDE		0x00002020
+#define CS35L36_AMP_OUT_MUTE		0x00002024
+#define CS35L36_OTP_TRIM_STATUS		0x00002028
+#define CS35L36_DISCH_FILT		0x0000202C
+#define CS35L36_OSC_TRIM		0x00002030
+#define CS35L36_PROTECT_REL_ERR		0x00002034
+#define CS35L36_PAD_INTERFACE		0x00002400
+#define CS35L36_PLL_CLK_CTRL		0x00002C04
+#define CS35L36_GLOBAL_CLK_CTRL		0x00002C0C
+#define CS35L36_ADC_CLK_CTRL		0x00002C10
+#define CS35L36_SWIRE_CLK_CTRL		0x00002C14
+#define CS35L36_SP_SCLK_CLK_CTRL	0x00002D00
+#define CS35L36_TST_FS_MON0		0x00002D10
+#define CS35L36_PLL_LOOP_PARAMS		0x00003008
+#define CS35L36_DCO_CTRL		0x00003010
+#define CS35L36_MISC_CTRL		0x00003014
+#define CS35L36_MDSYNC_EN		0x00003404
+#define CS35L36_MDSYNC_TX_ID		0x00003408
+#define CS35L36_MDSYNC_PWR_CTRL		0x0000340C
+#define CS35L36_MDSYNC_DATA_TX		0x00003410
+#define CS35L36_MDSYNC_TX_STATUS	0x0000341C
+#define CS35L36_MDSYNC_RX_STATUS	0x00003420
+#define CS35L36_MDSYNC_ERR_STATUS	0x00003424
+#define CS35L36_BSTCVRT_VCTRL1		0x00003800
+#define CS35L36_BSTCVRT_VCTRL2		0x00003804
+#define CS35L36_BSTCVRT_PEAK_CUR	0x00003808
+#define CS35L36_BSTCVRT_SFT_RAMP	0x0000380C
+#define CS35L36_BSTCVRT_COEFF		0x00003810
+#define CS35L36_BSTCVRT_SLOPE_LBST	0x00003814
+#define CS35L36_BSTCVRT_SW_FREQ		0x00003818
+#define CS35L36_BSTCVRT_DCM_CTRL	0x0000381C
+#define CS35L36_BSTCVRT_DCM_MODE_FORCE	0x00003820
+#define CS35L36_BSTCVRT_OVERVOLT_CTRL	0x00003830
+#define CS35L36_BST_TST_MANUAL		0x0000393C
+#define CS35L36_BST_ANA2_TEST		0x0000394C
+#define CS35L36_VPI_LIMIT_MODE		0x00003C04
+#define CS35L36_VPI_LIMIT_MINMAX	0x00003C08
+#define CS35L36_VPI_VP_THLD		0x00003C0C
+#define CS35L36_VPI_TRACK_CTRL		0x00003C10
+#define CS35L36_VPI_TRIG_MODE_CTRL	0x00003C14
+#define CS35L36_VPI_TRIG_STEPS		0x00003C18
+#define CS35L36_VI_SPKMON_FILT		0x00004004
+#define CS35L36_VI_SPKMON_GAIN		0x00004008
+#define CS35L36_VI_SPKMON_IP_SEL	0x00004100
+#define CS35L36_DTEMP_WARN_THLD		0x00004220
+#define CS35L36_DTEMP_STATUS		0x00004300
+#define CS35L36_VPVBST_FS_SEL		0x00004400
+#define CS35L36_VPVBST_VP_CTRL		0x00004440
+#define CS35L36_VPVBST_VBST_CTRL	0x00004444
+#define CS35L36_ASP_TX_PIN_CTRL		0x00004800
+#define CS35L36_ASP_RATE_CTRL		0x00004804
+#define CS35L36_ASP_FORMAT		0x00004808
+#define CS35L36_ASP_FRAME_CTRL		0x00004818
+#define CS35L36_ASP_TX1_TX2_SLOT	0x0000481C
+#define CS35L36_ASP_TX3_TX4_SLOT	0x00004820
+#define CS35L36_ASP_TX5_TX6_SLOT	0x00004824
+#define CS35L36_ASP_TX7_TX8_SLOT	0x00004828
+#define CS35L36_ASP_RX1_SLOT		0x0000482C
+#define CS35L36_ASP_RX_TX_EN		0x0000483C
+#define CS35L36_ASP_RX1_SEL		0x00004C00
+#define CS35L36_ASP_TX1_SEL		0x00004C20
+#define CS35L36_ASP_TX2_SEL		0x00004C24
+#define CS35L36_ASP_TX3_SEL		0x00004C28
+#define CS35L36_ASP_TX4_SEL		0x00004C2C
+#define CS35L36_ASP_TX5_SEL		0x00004C30
+#define CS35L36_ASP_TX6_SEL		0x00004C34
+#define CS35L36_SWIRE_P1_TX1_SEL	0x00004C40
+#define CS35L36_SWIRE_P1_TX2_SEL	0x00004C44
+#define CS35L36_SWIRE_P2_TX1_SEL	0x00004C60
+#define CS35L36_SWIRE_P2_TX2_SEL	0x00004C64
+#define CS35L36_SWIRE_P2_TX3_SEL	0x00004C68
+#define CS35L36_SWIRE_DP1_FIFO_CFG	0x00005000
+#define CS35L36_SWIRE_DP2_FIFO_CFG	0x00005004
+#define CS35L36_SWIRE_DP3_FIFO_CFG	0x00005008
+#define CS35L36_SWIRE_PCM_RX_DATA	0x0000500C
+#define CS35L36_SWIRE_FS_SEL		0x00005010
+#define CS35L36_SPARE_CP_BITS		0x00005C00
+#define CS35L36_AMP_DIG_VOL_CTRL	0x00006000
+#define CS35L36_VPBR_CFG		0x00006404
+#define CS35L36_VBBR_CFG		0x00006408
+#define CS35L36_VPBR_STATUS		0x0000640C
+#define CS35L36_VBBR_STATUS		0x00006410
+#define CS35L36_OVERTEMP_CFG		0x00006414
+#define CS35L36_AMP_ERR_VOL		0x00006418
+#define CS35L36_CLASSH_CFG		0x00006800
+#define CS35L36_CLASSH_FET_DRV_CFG	0x00006804
+#define CS35L36_NG_CFG			0x00006808
+#define CS35L36_AMP_GAIN_CTRL		0x00006C04
+#define CS35L36_PWM_MOD_IO_CTRL		0x0000706C
+#define CS35L36_PWM_MOD_STATUS		0x00007070
+#define CS35L36_DAC_MSM_CFG		0x00007400
+#define CS35L36_AMP_SLOPE_CTRL		0x00007410
+#define CS35L36_AMP_PDM_VOLUME		0x00007E04
+#define CS35L36_AMP_PDM_RATE_CTRL	0x00007E08
+#define CS35L36_PDM_CH_SEL		0x00007E10
+#define CS35L36_AMP_NG_CTRL		0x00007E14
+#define CS35L36_PDM_HIGHFILT_CTRL	0x00007E3C
+#define CS35L36_INT1_STATUS		0x00D00000
+#define CS35L36_INT2_STATUS		0x00D00004
+#define CS35L36_INT3_STATUS		0x00D00008
+#define CS35L36_INT4_STATUS		0x00D0000C
+#define CS35L36_INT1_RAW_STATUS		0x00D00020
+#define CS35L36_INT2_RAW_STATUS		0x00D00024
+#define CS35L36_INT3_RAW_STATUS		0x00D00028
+#define CS35L36_INT4_RAW_STATUS		0x00D0002C
+#define CS35L36_INT1_MASK		0x00D00040
+#define CS35L36_INT2_MASK		0x00D00044
+#define CS35L36_INT3_MASK		0x00D00048
+#define CS35L36_INT4_MASK		0x00D0004C
+#define CS35L36_INT1_EDGE_LVL_CTRL	0x00D00060
+#define CS35L36_INT3_EDGE_LVL_CTRL	0x00D00068
+#define CS35L36_PAC_INT_STATUS		0x00D00200
+#define CS35L36_PAC_INT_RAW_STATUS	0x00D00210
+#define CS35L36_PAC_INT_FLUSH_CTRL	0x00D00218
+#define CS35L36_PAC_INT0_CTRL		0x00D00220
+#define CS35L36_PAC_INT1_CTRL		0x00D00224
+#define CS35L36_PAC_INT2_CTRL		0x00D00228
+#define CS35L36_PAC_INT3_CTRL		0x00D0022C
+#define CS35L36_PAC_INT4_CTRL		0x00D00230
+#define CS35L36_PAC_INT5_CTRL		0x00D00234
+#define CS35L36_PAC_INT6_CTRL		0x00D00238
+#define CS35L36_PAC_INT7_CTRL		0x00D0023C
+#define CS35L36_PAC_PMEM_WORD0		0x00E02800
+#define CS35L36_PAC_PMEM_WORD1		0x00E02804
+#define CS35L36_PAC_PMEM_WORD1023	0x00E037FC
+
+#define CS35L36_INTPAC_REG_COUNT	25
+#define CS35L36_CHIP_ID			0x00035A36
+
+#define CS35L36_INT_OUTPUT_EN_MASK	0x01
+#define CS35L36_INT_GPIO_SEL_MASK	0x02
+#define CS35L36_INT_GPIO_SEL_SHIFT	1
+#define CS35L36_INT_POL_SEL_MASK	0x04
+#define CS35L36_INT_POL_SEL_SHIFT	2
+#define CS35L36_INT_DRV_SEL_MASK	0x20
+#define CS35L36_INT_DRV_SEL_SHIFT	5
+#define CS35L36_IRQ_SRC_MASK		0x08
+#define CS35L36_IRQ_SRC_SHIFT		3
+
+#define CS35L36_SCLK_MSTR_MASK		0x40
+#define CS35L36_SCLK_MSTR_SHIFT		6
+#define CS35L36_LRCLK_MSTR_MASK		0x01
+#define CS35L36_LRCLK_MSTR_SHIFT	0
+#define CS35L36_SCLK_INV_MASK		0x100
+#define CS35L36_SCLK_INV_SHIFT		8
+#define CS35L36_LRCLK_INV_MASK		0x04
+#define CS35L36_LRCLK_INV_SHIFT		2
+#define CS35L36_SCLK_FRC_MASK		0x80
+#define CS35L36_SCLK_FRC_SHIFT		7
+#define CS35L36_LRCLK_FRC_MASK		0x02
+#define CS35L36_LRCLK_FRC_SHIFT		1
+
+#define CS35L36_PDM_MODE_MASK		0x01
+#define CS35L36_PDM_MODE_SHIFT		0
+
+#define CS35L36_ASP_FMT_MASK		0x07
+#define CS35L36_ASP_FMT_SHIFT		0
+
+#define CS35L36_ASP_RX_WIDTH_MASK	0xFF0000
+#define CS35L36_ASP_RX_WIDTH_SHIFT	16
+#define CS35L36_ASP_TX_WIDTH_MASK	0xFF
+#define CS35L36_ASP_TX_WIDTH_SHIFT	0
+#define CS35L36_ASP_WIDTH_16		0x10
+#define CS35L36_ASP_WIDTH_24		0x18
+#define CS35L36_ASP_WIDTH_32		0x20
+
+#define CS35L36_ASP_RX1_SLOT_MASK	0x3F
+#define CS35L36_ASP_RX1_EN_MASK		0x00010000
+#define CS35L36_ASP_RX1_EN_SHIFT	16
+
+#define CS35L36_ASP_TX1_SLOT_MASK	0x3F
+#define CS35L36_ASP_TX2_SLOT_MASK	0x3F0000
+#define CS35L36_ASP_TX2_SLOT_SHIFT	16
+#define CS35L36_ASP_TX3_SLOT_MASK	0x3F
+#define CS35L36_ASP_TX4_SLOT_MASK	0x3F0000
+#define CS35L36_ASP_TX4_SLOT_SHIFT	16
+#define CS35L36_ASP_TX5_SLOT_MASK	0x3F
+#define CS35L36_ASP_TX6_SLOT_MASK	0x3F0000
+#define CS35L36_ASP_TX6_SLOT_SHIFT	16
+#define CS35L36_ASP_TX7_SLOT_MASK	0x3F
+#define CS35L36_ASP_TX8_SLOT_MASK	0x3F0000
+#define CS35L36_ASP_TX8_SLOT_SHIFT	16
+#define CS35L36_ASP_TX_HIZ_MASK		0x200000
+
+#define CS35L36_APS_TX_SEL_MASK		0x7F
+
+#define CS35L36_ASP_TX1_EN_MASK		0x01
+#define CS35L36_ASP_TX2_EN_MASK		0x02
+#define CS35L36_ASP_TX2_EN_SHIFT	1
+#define CS35L36_ASP_TX3_EN_MASK		0x04
+#define CS35L36_ASP_TX3_EN_SHIFT	2
+#define CS35L36_ASP_TX4_EN_MASK		0x08
+#define CS35L36_ASP_TX4_EN_SHIFT	3
+#define CS35L36_ASP_TX5_EN_MASK		0x10
+#define CS35L36_ASP_TX5_EN_SHIFT	4
+#define CS35L36_ASP_TX6_EN_MASK		0x20
+#define CS35L36_ASP_TX6_EN_SHIFT	5
+#define CS35L36_ASP_TX7_EN_MASK		0x40
+#define CS35L36_ASP_TX7_EN_SHIFT	6
+#define CS35L36_ASP_TX8_EN_MASK		0x80
+#define CS35L36_ASP_TX8_EN_SHIFT	7
+
+
+#define CS35L36_PLL_CLK_SEL_MASK	0x07
+#define CS35L36_PLL_CLK_SEL_SHIFT	0
+#define CS35L36_PLLSRC_SCLK		0
+#define CS35L36_PLLSRC_LRCLK		1
+#define CS35L36_PLLSRC_SELF		3
+#define CS35L36_PLLSRC_PDMCLK		4
+#define CS35L36_PLLSRC_MCLK		5
+#define CS35L36_PLLSRC_SWIRE		7
+#define CS35L36_REFCLK_FREQ_MASK	0x7E0
+#define CS35L36_REFCLK_FREQ_SHIFT	5
+#define CS35L36_PLL_OPENLOOP_MASK	0x800
+#define CS35L36_PLL_OPENLOOP_SHIFT	11
+#define CS35L36_PLL_REFCLK_EN_MASK	0x10
+#define CS35L36_PLL_REFCLK_EN_SHIFT	4
+
+
+#define CS35L36_GLOBAL_FS_MASK		0x1F
+#define CS35L36_GLOBAL_FS_SHIFT		0
+
+#define CS35L36_HPF_PCM_EN_MASK		0x800
+#define CS35L36_HPF_PCM_EN_SHIFT	15
+#define CS35L36_PCM_RX_SEL_MASK		0x7F
+#define CS35L36_PCM_RX_SEL_SHIFT	0
+
+#define CS35L36_PCM_RX_SEL_ZERO		0x00
+#define CS35L36_PCM_RX_SEL_PCM		0x08
+#define CS35L36_PCM_RX_SEL_SWIRE	0x10
+#define CS35L36_PCM_RX_SEL_DIAG		0x04
+
+#define CS35L36_GLOBAL_EN_MASK		0x01
+#define CS35L36_GLOBAL_EN_SHIFT		0x00
+
+#define CS35L36_AMP_PCM_INV_MASK	0x4000
+#define CS35L36_AMP_PCM_INV_SHIFT	14
+
+#define CS35L36_AMP_VOL_PCM_MASK	0x3FF8
+#define CS35L36_AMP_VOL_PCM_SHIFT	3
+#define CS35L36_DIGITAL_MUTE		0x04CF
+
+#define CS35L36_AMP_RAMP_MASK		0x0007
+#define CS35L36_AMP_RAMP_SHIFT		0
+
+#define CS35L36_AMP_MUTE_MASK		0x0010
+#define CS35L36_AMP_MUTE_SHIFT		4
+
+#define CS35L36_GLOBAL_RESYNC_FS1_MASK	0x00000200
+#define CS35L36_GLOBAL_RESYNC_FS2_MASK	0x00000400
+#define CS35L36_SYNC_GLOBAL_OVR_MASK	0x00000002
+#define CS35L36_SYNC_GLOBAL_OVR_SHIFT	1
+
+#define CS35L36_REFCLK_IN_MASK		0x00100000
+#define CS35L36_PLL_UNLOCK_MASK		0x00002000
+
+#define CS35L36_ASP_RX_UDF_MASK		0x00000040
+#define CS35L36_ASP_RX_OVF_MASK		0x00000080
+
+#define CS35L36_IMON_POL_MASK		0x02
+#define CS35L36_IMON_POL_SHIFT		1
+
+#define CS35L36_VMON_POL_MASK		0x01
+#define CS35L36_VMON_POL_SHIFT		0
+
+#define CS35L36_PDN_DONE		0x40
+#define CS35L36_PDN_DONE_SHIFT		6
+#define CS35L36_PUP_DONE		0x80
+#define CS35L36_PUP_DONE_SHIFT		7
+#define CS35L36_GLOBAL_EN_ASSRT		0x20
+#define CS35L36_PUP_DONE_IRQ_UNMASK	0x7F
+#define CS35L36_PUP_DONE_IRQ_MASK	0xBF
+
+#define CS35L36_FS1_WINDOW_MASK		0x000007FF
+#define CS35L36_FS2_WINDOW_MASK		0x00FFF800
+#define CS35L36_FS2_WINDOW_SHIFT	12
+
+#define CS35L36_PLL_FFL_IGAIN_MASK	0x0F
+#define CS35L36_PLL_IGAIN_MASK		0x3F0
+#define CS35L36_PLL_IGAIN_SHIFT		4
+#define CS35L36_PLL_IGAIN		0x04
+
+#define CS35L36_BST_EN_MASK		0x30
+#define CS35L36_BST_EN			0x02
+#define CS35L36_BST_DIS_VP		0x01
+#define CS35L36_BST_DIS_EXTN		0x00
+#define CS35L36_BST_EN_SHIFT		4
+#define CS35L36_BST_MAN_IPKCOMP_MASK	0x200
+#define CS35L36_BST_MAN_IPKCOMP_SHIFT	9
+
+#define CS35L36_BST_MAN_IPKCOMP_EN_MASK		0x100
+#define CS35L36_BST_MAN_IPKCOMP_EN_SHIFT	8
+
+#define CS35L36_BST_IPK_MASK		0x7F
+#define CS35L36_BST_OVP_THLD_MASK	0x3F
+#define CS35L36_BST_OVP_THLD_11V	0x10
+#define CS35L36_BST_OVP_TRIM_MASK	0x00078000
+#define CS35L36_BST_OVP_TRIM_SHIFT	15
+#define CS35L36_BST_OVP_TRIM_11V	0x0C
+#define CS35L36_BST_CTRL_LIM_MASK	0x04
+#define CS35L36_BST_CTRL_LIM_SHIFT	2
+#define CS35L36_BST_CTRL_10V_CLAMP	0x96
+
+#define CS35L36_NG_AMP_EN_MASK		0x3F00
+#define CS35L36_NG_DELAY_MASK		0x70
+#define CS35L36_NG_DELAY_SHIFT		4
+#define CS35L36_AMP_ZC_SHIFT		10
+#define CS35L36_PDM_LDM_ENTER_SHIFT	3
+#define CS35L36_PDM_LDM_EXIT_SHIFT	4
+
+#define CS35L36_BSTCVRT_K1_MASK		0xFF
+#define CS35L36_BSTCVRT_K2_MASK		0xFF00
+#define CS35L36_BSTCVRT_K2_SHIFT	8
+#define CS35L36_BSTCVRT_SLOPE_MASK	0xFF00
+#define CS35L36_BSTCVRT_SLOPE_SHIFT	8
+#define CS35L36_BSTCVRT_CCMFREQ_MASK	0x0F
+#define CS35L36_BSTCVRT_LBSTVAL_MASK	0x03
+#define CS35L35_BSTCVRT_CTL_MASK	0xFF
+#define CS35L35_BSTCVRT_CTL_SEL_MASK	0x03
+#define CS35L36_DCM_AUTO_MASK		0x01
+
+#define CS35L36_INT1_MASK_DEFAULT	0xF9BA7FFF
+#define CS35L36_INT1_MASK_RESET		0xFFFFFFFF
+#define CS35L36_INT3_MASK_DEFAULT	0xFFFFEFFF
+#define CS35L36_INT3_MASK_RESET		0xFFFFFFFF
+
+
+#define CS35L36_AMP_SHORT_ERR		0x1000
+#define CS35L36_BST_SHORT_ERR		0x40000
+#define CS35L36_TEMP_WARN		0x2000000
+#define CS35L36_TEMP_ERR		0x4000000
+#define CS35L36_BST_OVP_ERR		0x10000
+#define CS35L36_BST_DCM_UVP_ERR		0x20000
+
+#define CS35L36_AMP_SHORT_ERR_RLS	0x02
+#define CS35L36_BST_SHORT_ERR_RLS	0x04
+#define CS35L36_BST_OVP_ERR_RLS		0x08
+#define CS35L36_BST_UVP_ERR_RLS		0x10
+#define CS35L36_TEMP_WARN_ERR_RLS	0x20
+#define CS35L36_TEMP_ERR_RLS		0x40
+#define CS35L36_TEMP_THLD_MASK		0x03
+
+#define CS35L36_REV_B0			0xb0
+#define CS35L36_REV_A0			0xa0
+#define CS35L36_B0_PAC_PATCH		0x00DD0102
+
+#define CS35L36_OTP_ECC_EN_MASK		0x400
+#define CS35L36_OTP_ECC_EN_SHIFT	10
+#define CS35L36_OTP_RUN_BOOT_MASK	0x01
+#define CS35L36_OTP_BOOT_DONE		0x2000000
+#define CS35L36_PAC_RESET_MASK		0x04
+#define CS35L36_PAC_RESET_SHIFT		2
+#define CS35L36_PAC_STALL_MASK		0x02
+#define CS35L36_PAC_STALL_SHIFT		1
+#define CS35L36_PAC_ENABLE_MASK		0x00000001
+#define CS35L36_PAC_MEM_ACCESS		0x01
+#define CS35L36_PAC_MEM_ACCESS_CLR	0
+#define CS35L36_SOFT_RESET		0x5AAA
+#define CS35L36_MCU_BOOT_COMPLETE	0x02
+#define CS35L36_MCU_CONFIG_UNMASK	0x00FEFFFF
+#define CS35L36_MCU_CONFIG_CLR		0x00010000
+#define CS35L36_MCU_CONFIG_MASK		0x00FFFFFF
+#define CS35L36_GPIO_INT_SEL_MASK	0x0000003B
+#define CS35L36_GPIO_INT_SEL_UNMASK	0x0000003A
+#define CS35L36_PAC_RESET		0x00000000
+#define CS35L36_OTP_REV_MASK		0x00FF0000
+#define CS35L36_OTP_REV_L37		0x00CC0000
+#define CS35L36_12V_L37			37
+#define CS35L36_10V_L36			36
+
+#define CS35L36_VPBR_EN_MASK		0x00001000
+#define CS35L36_VPBR_EN_SHIFT		12
+
+#define CS35L36_VPBR_THLD_MASK		0x0000001F
+#define CS35L36_VPBR_THLD_SHIFT		0
+#define CS35L36_VPBR_MAX_ATTN_MASK	0x00000F00
+#define CS35L36_VPBR_MAX_ATTN_SHIFT	8
+#define CS35L36_VPBR_ATK_VOL_MASK	0x0000F000
+#define CS35L36_VPBR_ATK_VOL_SHIFT	12
+#define CS35L36_VPBR_ATK_RATE_MASK	0x00070000
+#define CS35L36_VPBR_ATK_RATE_SHIFT	16
+#define CS35L36_VPBR_WAIT_MASK		0x00180000
+#define CS35L36_VPBR_WAIT_SHIFT		19
+#define CS35L36_VPBR_REL_RATE_MASK	0x00E00000
+#define CS35L36_VPBR_REL_RATE_SHIFT	21
+#define CS35L36_VPBR_MUTE_EN_MASK	0x01000000
+#define CS35L36_VPBR_MUTE_EN_SHIFT	24
+
+#define CS35L36_OSC_FREQ_TRIM_MASK	0x070
+#define CS35L36_OSC_TRIM_DONE		0x08
+
+#define CS35L36_FS1_DEFAULT_VAL		16
+#define CS35L36_FS2_DEFAULT_VAL		36
+#define CS35L36_FS_NOM_6MHZ		6000000
+
+#define CS35L36_TEST_UNLOCK1		0x00005555
+#define CS35L36_TEST_UNLOCK2		0x0000AAAA
+#define CS35L36_TEST_LOCK1		0x0000CCCC
+#define CS35L36_TEST_LOCK2		0x00003333
+
+#define CS35L36_PAC_PROG_MEM		512
+
+#define CS35L36_RX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE)
+#define CS35L36_TX_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE \
+				| SNDRV_PCM_FMTBIT_S32_LE)
+
+extern const int cs35l36_a0_pac_patch[CS35L36_PAC_PROG_MEM];
+
+#endif
-- 
cgit v1.2.3


From 851ae31d34063deb1eae49f5d797a12a5557e832 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 8 Feb 2019 14:12:25 +0100
Subject: cfg80211: add missing kernel-doc for multi-BSSID fields

Add the missing kernel-doc for the new multi-BSSID fields
in struct cfg80211_bss.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 76efdc3a2200..b61b71f369c7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2035,6 +2035,10 @@ struct cfg80211_bss_ies {
  *	a BSS that hides the SSID in its beacon, this points to the BSS struct
  *	that holds the beacon data. @beacon_ies is still valid, of course, and
  *	points to the same data as hidden_beacon_bss->beacon_ies in that case.
+ * @transmitted_bss: pointer to the transmitted BSS, if this is a
+ *	non-transmitted one (multi-BSSID support)
+ * @nontrans_list: list of non-transmitted BSS, if this is a transmitted one
+ *	(multi-BSSID support)
  * @signal: signal strength value (type depends on the wiphy's signal_type)
  * @chains: bitmask for filled values in @chain_signal.
  * @chain_signal: per-chain signal strength of last received BSS in dBm.
-- 
cgit v1.2.3


From bb580602f3924976d8bc36c171266de73e92cbf7 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 4 Feb 2019 16:42:24 +0100
Subject: ALSA: pcm: Define snd_pcm_lib_preallocate_*() as returning void

Now all callers no longer check the return value from
snd_pcm_lib_preallocate_pages() and co, let's make them to return
void, so that any new code won't fall into the same pitfall.

Reviewed-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h     |  8 ++++----
 sound/core/pcm_memory.c | 29 ++++++++---------------------
 2 files changed, 12 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index ca20f80f8976..465d7d033c4c 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -1185,12 +1185,12 @@ static inline void snd_pcm_gettime(struct snd_pcm_runtime *runtime,
  *  Memory
  */
 
-int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream);
-int snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm);
-int snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
+void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream);
+void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm);
+void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
 				  int type, struct device *data,
 				  size_t size, size_t max);
-int snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
+void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
 					  int type, void *data,
 					  size_t size, size_t max);
 int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size);
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index 4012a3a01de1..ed73be80bd29 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -87,13 +87,10 @@ static void snd_pcm_lib_preallocate_dma_free(struct snd_pcm_substream *substream
  * @substream: the pcm substream instance
  *
  * Releases the pre-allocated buffer of the given substream.
- *
- * Return: Zero if successful, or a negative error code on failure.
  */
-int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
+void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
 {
 	snd_pcm_lib_preallocate_dma_free(substream);
-	return 0;
 }
 
 /**
@@ -101,10 +98,8 @@ int snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream)
  * @pcm: the pcm instance
  *
  * Releases all the pre-allocated buffers on the given pcm.
- *
- * Return: Zero if successful, or a negative error code on failure.
  */
-int snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm)
+void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm)
 {
 	struct snd_pcm_substream *substream;
 	int stream;
@@ -112,7 +107,6 @@ int snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm)
 	for (stream = 0; stream < 2; stream++)
 		for (substream = pcm->streams[stream].substream; substream; substream = substream->next)
 			snd_pcm_lib_preallocate_free(substream);
-	return 0;
 }
 EXPORT_SYMBOL(snd_pcm_lib_preallocate_free_for_all);
 
@@ -214,7 +208,7 @@ static inline void preallocate_info_init(struct snd_pcm_substream *substream)
 /*
  * pre-allocate the buffer and create a proc file for the substream
  */
-static int snd_pcm_lib_preallocate_pages1(struct snd_pcm_substream *substream,
+static void snd_pcm_lib_preallocate_pages1(struct snd_pcm_substream *substream,
 					  size_t size, size_t max)
 {
 
@@ -225,7 +219,6 @@ static int snd_pcm_lib_preallocate_pages1(struct snd_pcm_substream *substream,
 		substream->buffer_bytes_max = substream->dma_buffer.bytes;
 	substream->dma_max = max;
 	preallocate_info_init(substream);
-	return 0;
 }
 
 
@@ -238,16 +231,14 @@ static int snd_pcm_lib_preallocate_pages1(struct snd_pcm_substream *substream,
  * @max: the max. allowed pre-allocation size
  *
  * Do pre-allocation for the given DMA buffer type.
- *
- * Return: Zero if successful, or a negative error code on failure.
  */
-int snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
+void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream,
 				  int type, struct device *data,
 				  size_t size, size_t max)
 {
 	substream->dma_buffer.dev.type = type;
 	substream->dma_buffer.dev.dev = data;
-	return snd_pcm_lib_preallocate_pages1(substream, size, max);
+	snd_pcm_lib_preallocate_pages1(substream, size, max);
 }
 EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages);
 
@@ -261,21 +252,17 @@ EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages);
  *
  * Do pre-allocation to all substreams of the given pcm for the
  * specified DMA type.
- *
- * Return: Zero if successful, or a negative error code on failure.
  */
-int snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
+void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
 					  int type, void *data,
 					  size_t size, size_t max)
 {
 	struct snd_pcm_substream *substream;
-	int stream, err;
+	int stream;
 
 	for (stream = 0; stream < 2; stream++)
 		for (substream = pcm->streams[stream].substream; substream; substream = substream->next)
-			if ((err = snd_pcm_lib_preallocate_pages(substream, type, data, size, max)) < 0)
-				return err;
-	return 0;
+			snd_pcm_lib_preallocate_pages(substream, type, data, size, max);
 }
 EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages_for_all);
 
-- 
cgit v1.2.3


From f3d5e4f18dba18d7c2303dda68b9dbcf5ccc05cd Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sat, 19 Jan 2019 07:52:01 +0300
Subject: ata: pata_of_platform: Allow to use 16-bit wide data transfer

In some cases, the system bus can be configured for 16-bit mode,
in this case using read/write functions for 32-bit values
results in two cycles of 16 bits each, which is wrong.
This patch adds the devicetree flag to switch the driver to
use 16-bit mode for I/O transfers.

Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/pata_of_platform.c |  6 +++++-
 drivers/ata/pata_platform.c    | 22 ++++++++++++----------
 include/linux/ata_platform.h   |  3 ++-
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 01161c1aef4d..7a0b1759e5f0 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -32,6 +32,7 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
 	unsigned int reg_shift = 0;
 	int pio_mode = 0;
 	int pio_mask;
+	bool use16bit;
 
 	ret = of_address_to_resource(dn, 0, &io_res);
 	if (ret) {
@@ -60,11 +61,14 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
 		dev_info(&ofdev->dev, "pio-mode unspecified, assuming PIO0\n");
 	}
 
+	use16bit = of_property_read_bool(dn, "ata-generic,use16bit");
+
 	pio_mask = 1 << pio_mode;
 	pio_mask |= (1 << pio_mode) - 1;
 
 	return __pata_platform_probe(&ofdev->dev, &io_res, &ctl_res, irq_res,
-				     reg_shift, pio_mask, &pata_platform_sht);
+				     reg_shift, pio_mask, &pata_platform_sht,
+				     use16bit);
 }
 
 static const struct of_device_id pata_of_platform_match[] = {
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index d6f8f5406442..31cd0f39b0a7 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -47,13 +47,6 @@ static struct scsi_host_template pata_platform_sht = {
 	ATA_PIO_SHT(DRV_NAME),
 };
 
-static struct ata_port_operations pata_platform_port_ops = {
-	.inherits		= &ata_sff_port_ops,
-	.sff_data_xfer		= ata_sff_data_xfer32,
-	.cable_detect		= ata_cable_unknown,
-	.set_mode		= pata_platform_set_mode,
-};
-
 static void pata_platform_setup_port(struct ata_ioports *ioaddr,
 				     unsigned int shift)
 {
@@ -79,6 +72,7 @@ static void pata_platform_setup_port(struct ata_ioports *ioaddr,
  *	@ioport_shift: I/O port shift
  *	@__pio_mask: PIO mask
  *	@sht: scsi_host_template to use when registering
+ *	@use16bit: Flag to indicate 16-bit IO instead of 32-bit
  *
  *	Register a platform bus IDE interface. Such interfaces are PIO and we
  *	assume do not support IRQ sharing.
@@ -101,7 +95,7 @@ static void pata_platform_setup_port(struct ata_ioports *ioaddr,
 int __pata_platform_probe(struct device *dev, struct resource *io_res,
 			  struct resource *ctl_res, struct resource *irq_res,
 			  unsigned int ioport_shift, int __pio_mask,
-			  struct scsi_host_template *sht)
+			  struct scsi_host_template *sht, bool use16bit)
 {
 	struct ata_host *host;
 	struct ata_port *ap;
@@ -131,7 +125,15 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res,
 		return -ENOMEM;
 	ap = host->ports[0];
 
-	ap->ops = &pata_platform_port_ops;
+	ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL);
+	ap->ops->inherits = &ata_sff_port_ops;
+	ap->ops->cable_detect = ata_cable_unknown;
+	ap->ops->set_mode = pata_platform_set_mode;
+	if (use16bit)
+		ap->ops->sff_data_xfer = ata_sff_data_xfer;
+	else
+		ap->ops->sff_data_xfer = ata_sff_data_xfer32;
+
 	ap->pio_mask = __pio_mask;
 	ap->flags |= ATA_FLAG_SLAVE_POSS;
 
@@ -218,7 +220,7 @@ static int pata_platform_probe(struct platform_device *pdev)
 
 	return __pata_platform_probe(&pdev->dev, io_res, ctl_res, irq_res,
 				     pp_info ? pp_info->ioport_shift : 0,
-				     pio_mask, &pata_platform_sht);
+				     pio_mask, &pata_platform_sht, false);
 }
 
 static struct platform_driver pata_platform_driver = {
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index ff2120215dec..9cafec92282d 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -19,7 +19,8 @@ extern int __pata_platform_probe(struct device *dev,
 				 struct resource *irq_res,
 				 unsigned int ioport_shift,
 				 int __pio_mask,
-				 struct scsi_host_template *sht);
+				 struct scsi_host_template *sht,
+				 bool use16bit);
 
 /*
  * Marvell SATA private data
-- 
cgit v1.2.3


From dbfc54534dfcaee004d54137d9b34d000f847e41 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 1 Feb 2019 13:50:03 +0100
Subject: dt-bindings: reset: meson: add g12a bindings

Add device tree bindings for the reset controller of g12a SoC family.

Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../dt-bindings/reset/amlogic,meson-g12a-reset.h   | 134 +++++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 include/dt-bindings/reset/amlogic,meson-g12a-reset.h

(limited to 'include')

diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
new file mode 100644
index 000000000000..8063e8314eef
--- /dev/null
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) 2019 BayLibre, SAS.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ *
+ */
+
+#ifndef _DT_BINDINGS_AMLOGIC_MESON_G12A_RESET_H
+#define _DT_BINDINGS_AMLOGIC_MESON_G12A_RESET_H
+
+/*	RESET0					*/
+#define RESET_HIU			0
+/*					1	*/
+#define RESET_DOS			2
+/*					3-4	*/
+#define RESET_VIU			5
+#define RESET_AFIFO			6
+#define RESET_VID_PLL_DIV		7
+/*					8-9	*/
+#define RESET_VENC			10
+#define RESET_ASSIST			11
+#define RESET_PCIE_CTRL_A		12
+#define RESET_VCBUS			13
+#define RESET_PCIE_PHY			14
+#define RESET_PCIE_APB			15
+#define RESET_GIC			16
+#define RESET_CAPB3_DECODE		17
+/*					18	*/
+#define RESET_HDMITX_CAPB3		19
+#define RESET_DVALIN_CAPB3		20
+#define RESET_DOS_CAPB3			21
+/*					22	*/
+#define RESET_CBUS_CAPB3		23
+#define RESET_AHB_CNTL			24
+#define RESET_AHB_DATA			25
+#define RESET_VCBUS_CLK81		26
+/*					27-31	*/
+/*	RESET1					*/
+/*					32	*/
+#define RESET_DEMUX			33
+#define RESET_USB			34
+#define RESET_DDR			35
+/*					36	*/
+#define RESET_BT656			37
+#define RESET_AHB_SRAM			38
+/*					39	*/
+#define RESET_PARSER			40
+/*					41	*/
+#define RESET_ISA			42
+#define RESET_ETHERNET			43
+#define RESET_SD_EMMC_A			44
+#define RESET_SD_EMMC_B			45
+#define RESET_SD_EMMC_C			46
+/*					47-60 */
+#define RESET_AUDIO_CODEC		61
+/*					62-63	*/
+/*	RESET2					*/
+/*					64	*/
+#define RESET_AUDIO			65
+#define RESET_HDMITX_PHY		66
+/*					67	*/
+#define RESET_MIPI_DSI_HOST		68
+#define RESET_ALOCKER			69
+#define RESET_GE2D			70
+#define RESET_PARSER_REG		71
+#define RESET_PARSER_FETCH		72
+#define RESET_CTL			73
+#define RESET_PARSER_TOP		74
+/*					75-77	*/
+#define RESET_DVALIN			78
+#define RESET_HDMITX			79
+/*					80-95	*/
+/*	RESET3					*/
+/*					96-95	*/
+#define RESET_DEMUX_TOP			105
+#define RESET_DEMUX_DES_PL		106
+#define RESET_DEMUX_S2P_0		107
+#define RESET_DEMUX_S2P_1		108
+#define RESET_DEMUX_0			109
+#define RESET_DEMUX_1			110
+#define RESET_DEMUX_2			111
+/*					112-127	*/
+/*	RESET4					*/
+/*					128-129	*/
+#define RESET_MIPI_DSI_PHY		130
+/*					131-132	*/
+#define RESET_RDMA			133
+#define RESET_VENCI			134
+#define RESET_VENCP			135
+/*					136	*/
+#define RESET_VDAC			137
+/*					138-139 */
+#define RESET_VDI6			140
+#define RESET_VENCL			141
+#define RESET_I2C_M1			142
+#define RESET_I2C_M2			143
+/*					144-159	*/
+/*	RESET5					*/
+/*					160-191	*/
+/*	RESET6					*/
+#define RESET_GEN			192
+#define RESET_SPICC0			193
+#define RESET_SC			194
+#define RESET_SANA_3			195
+#define RESET_I2C_M0			196
+#define RESET_TS_PLL			197
+#define RESET_SPICC1			198
+#define RESET_STREAM			199
+#define RESET_TS_CPU			200
+#define RESET_UART0			201
+#define RESET_UART1_2			202
+#define RESET_ASYNC0			203
+#define RESET_ASYNC1			204
+#define RESET_SPIFC0			205
+#define RESET_I2C_M3			206
+/*					207-223	*/
+/*	RESET7					*/
+#define RESET_USB_DDR_0			224
+#define RESET_USB_DDR_1			225
+#define RESET_USB_DDR_2			226
+#define RESET_USB_DDR_3			227
+#define RESET_TS_GPU			228
+#define RESET_DEVICE_MMC_ARB		229
+#define RESET_DVALIN_DMC_PIPL		230
+#define RESET_VID_LOCK			231
+#define RESET_NIC_DMC_PIPL		232
+#define RESET_DMC_VPU_PIPL		233
+#define RESET_GE2D_DMC_PIPL		234
+#define RESET_HCODEC_DMC_PIPL		235
+#define RESET_WAVE420_DMC_PIPL		236
+#define RESET_HEVCF_DMC_PIPL		237
+/*					238-255	*/
+
+#endif
-- 
cgit v1.2.3


From 61edb116cab9bf7d623e31bf7455a82bc042c087 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 8 Feb 2019 17:56:33 +0100
Subject: ieee80211: fix for_each_element_extid()

The data/datalen argument names cannot be used as those
are also the struct element names, fix that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8da5ba97328f..3c9dfcada45f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3299,8 +3299,8 @@ struct element {
 	for_each_element(element, data, datalen)			\
 		if (element->id == (_id))
 
-#define for_each_element_extid(element, extid, data, datalen)		\
-	for_each_element(element, data, datalen)			\
+#define for_each_element_extid(element, extid, _data, _datalen)		\
+	for_each_element(element, _data, _datalen)			\
 		if (element->id == WLAN_EID_EXTENSION &&		\
 		    element->datalen > 0 &&				\
 		    element->data[0] == (extid))
-- 
cgit v1.2.3


From 4d1f7a6eabd45639d9de22a8a004f3c208d13c1a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 6 Feb 2019 22:49:46 +0200
Subject: gpiolib: acpi: Introduce ACPI_GPIO_QUIRK_ONLY_GPIOIO

New quirk enforces search for GPIO based on its type,
i.e. iterate over GpioIo resources only.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/gpio/gpiolib-acpi.c           | 15 +++++--
 include/linux/acpi.h                  |  7 ++++
 sound/soc/intel/boards/bytcr_rt5651.c | 74 +++++------------------------------
 3 files changed, 27 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 259cf6ab969b..4d291b75cb9f 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -530,17 +530,24 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
 	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
 		return 1;
 
-	if (lookup->n++ == lookup->index && !lookup->desc) {
+	if (!lookup->desc) {
 		const struct acpi_resource_gpio *agpio = &ares->data.gpio;
-		int pin_index = lookup->pin_index;
+		bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+		int pin_index;
 
+		if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
+			lookup->index++;
+
+		if (lookup->n++ != lookup->index)
+			return 1;
+
+		pin_index = lookup->pin_index;
 		if (pin_index >= agpio->pin_table_length)
 			return 1;
 
 		lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
 					      agpio->pin_table[pin_index]);
-		lookup->info.gpioint =
-			agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+		lookup->info.gpioint = gpioint;
 
 		/*
 		 * Polarity and triggering are only specified for GpioInt
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 87715f20b69a..03b4c4f225d0 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1014,6 +1014,13 @@ struct acpi_gpio_mapping {
 
 /* Ignore IoRestriction field */
 #define ACPI_GPIO_QUIRK_NO_IO_RESTRICTION	BIT(0)
+/*
+ * When ACPI GPIO mapping table is in use the index parameter inside it
+ * refers to the GPIO resource in _CRS method. That index has no
+ * distinction of actual type of the resource. When consumer wants to
+ * get GpioIo type explicitly, this quirk may be used.
+ */
+#define ACPI_GPIO_QUIRK_ONLY_GPIOIO		BIT(1)
 
 	unsigned int quirks;
 };
diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c
index c3b7732929cc..b0a4d297176e 100644
--- a/sound/soc/intel/boards/bytcr_rt5651.c
+++ b/sound/soc/intel/boards/bytcr_rt5651.c
@@ -844,74 +844,18 @@ static const struct x86_cpu_id cherrytrail_cpu_ids[] = {
 	{}
 };
 
-static const struct acpi_gpio_params first_gpio = { 0, 0, false };
-static const struct acpi_gpio_params second_gpio = { 1, 0, false };
+static const struct acpi_gpio_params ext_amp_enable_gpios = { 0, 0, false };
 
-static const struct acpi_gpio_mapping byt_rt5651_amp_en_first[] = {
-	{ "ext-amp-enable-gpios", &first_gpio, 1 },
-	{ },
-};
-
-static const struct acpi_gpio_mapping byt_rt5651_amp_en_second[] = {
-	{ "ext-amp-enable-gpios", &second_gpio, 1 },
+static const struct acpi_gpio_mapping cht_rt5651_gpios[] = {
+	/*
+	 * Some boards have I2cSerialBusV2, GpioIo, GpioInt as ACPI resources,
+	 * other boards may  have I2cSerialBusV2, GpioInt, GpioIo instead.
+	 * We want the GpioIo one for the ext-amp-enable-gpio.
+	 */
+	{ "ext-amp-enable-gpios", &ext_amp_enable_gpios, 1, ACPI_GPIO_QUIRK_ONLY_GPIOIO },
 	{ },
 };
 
-/*
- * Some boards have I2cSerialBusV2, GpioIo, GpioInt as ACPI resources, other
- * boards may  have I2cSerialBusV2, GpioInt, GpioIo instead. We want the
- * GpioIo one for the ext-amp-enable-gpio and both count for the index in
- * acpi_gpio_params index.  So we have 2 different mappings and the code
- * below figures out which one to use.
- */
-struct byt_rt5651_acpi_resource_data {
-	int gpio_count;
-	int gpio_int_idx;
-};
-
-static int snd_byt_rt5651_acpi_resource(struct acpi_resource *ares, void *arg)
-{
-	struct byt_rt5651_acpi_resource_data *data = arg;
-
-	if (ares->type != ACPI_RESOURCE_TYPE_GPIO)
-		return 0;
-
-	if (ares->data.gpio.connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
-		data->gpio_int_idx = data->gpio_count;
-
-	data->gpio_count++;
-	return 0;
-}
-
-static void snd_byt_rt5651_mc_pick_amp_en_gpio_mapping(struct device *codec)
-{
-	struct byt_rt5651_acpi_resource_data data = { 0, -1 };
-	LIST_HEAD(resources);
-	int ret;
-
-	ret = acpi_dev_get_resources(ACPI_COMPANION(codec), &resources,
-				     snd_byt_rt5651_acpi_resource, &data);
-	if (ret < 0) {
-		dev_warn(codec, "Failed to get ACPI resources, not adding external amplifier GPIO mapping\n");
-		return;
-	}
-
-	/* All info we need is gathered during the walk */
-	acpi_dev_free_resource_list(&resources);
-
-	switch (data.gpio_int_idx) {
-	case 0:
-		byt_rt5651_gpios = byt_rt5651_amp_en_second;
-		break;
-	case 1:
-		byt_rt5651_gpios = byt_rt5651_amp_en_first;
-		break;
-	default:
-		dev_warn(codec, "Unknown GpioInt index %d, not adding external amplifier GPIO mapping\n",
-			 data.gpio_int_idx);
-	}
-}
-
 struct acpi_chan_package {   /* ACPICA seems to require 64 bit integers */
 	u64 aif_value;       /* 1: AIF1, 2: AIF2 */
 	u64 mclock_value;    /* usually 25MHz (0x17d7940), ignored */
@@ -1038,7 +982,7 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev)
 
 	/* Cherry Trail devices use an external amplifier enable gpio */
 	if (x86_match_cpu(cherrytrail_cpu_ids) && !byt_rt5651_gpios)
-		snd_byt_rt5651_mc_pick_amp_en_gpio_mapping(codec_dev);
+		byt_rt5651_gpios = cht_rt5651_gpios;
 
 	if (byt_rt5651_gpios) {
 		devm_acpi_dev_add_driver_gpios(codec_dev, byt_rt5651_gpios);
-- 
cgit v1.2.3


From b450b87847b157d69dbf9af7aefb4cec29e89cc9 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 1 Feb 2019 11:22:23 -0600
Subject: ASoC: core: don't increase component module refcount unconditionally

The ASoC core has for the longest time increased the module reference
counts, even before the transition to the component model. This is
probably fine on most platforms, but it introduces a deadlock case on
Intel devices with the Skylake and SOF drivers which cannot be removed
due to their reference counts being modified by the core.

In these 2 cases, the PCI or ACPI driver .probe creates a platform
device to let the machine driver .probe register the audio
card. Conversely the PCI or ACPI driver .remove will unregister the
platform device which results in the card being removed by the machine
driver .remove.

With ascii art, this can be represented as

modprobe
snd_soc_skl/
soc-pci-dev/sof-acpci-dev  ----------> pci/acpi probe
       ^                                    |
       |                     ---------------|
       |                    |               |
       |                    V               V
    increase            register        register machine
    refcount            component       platform_device
       ^                                    |
       |                                    |
       |                                    V
    component <----   register card  <---- probe
    probe

The issue is that by playing with the component's module reference
counts during the card registration, it's no longer possible to remove
the module which controls the component. This can be shown, e.g. with
the following error:

root@plb-XPS-13-9350:~# lsmod | grep snd_soc_skl
snd_soc_skl           110592  1

root@plb-XPS-13-9350:~# rmmod snd_soc_skl
rmmod: ERROR: Module snd_soc_skl is in use

Increasing the reference count during the component probe is not
useful. If the PCI/ACPI module is removed, the card will be removed
anyway.

To avoid breaking existing platforms and allowing Intel platforms to
safely deal with module load/unload cases, this patch introduces a
flag which needs to be set during the component initialization. This
is a strictly opt-in capability that should only be used when the
handling of the component module does not require a reference count
increase to prevent removal during use.

Note that this solution is not directly applicable to the legacy
Atom/SST driver, which uses a different device hierarchy. There are
however additional refcount issues which prevent the ACPI driver from
being removed. This is a different issue which would need a different
patch.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 3 +++
 sound/soc/soc-core.c | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 95689680336b..eb7db605955b 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -802,6 +802,9 @@ struct snd_soc_component_driver {
 	int probe_order;
 	int remove_order;
 
+	/* signal if the module handling the component cannot be removed */
+	unsigned int ignore_module_refcount:1;
+
 	/* bits */
 	unsigned int idle_bias_on:1;
 	unsigned int suspend_bias_off:1;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 994d21d7ba0f..93d316d5bf8e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -947,7 +947,8 @@ static void soc_cleanup_component(struct snd_soc_component *component)
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 	soc_cleanup_component_debugfs(component);
 	component->card = NULL;
-	module_put(component->dev->driver->owner);
+	if (!component->driver->ignore_module_refcount)
+		module_put(component->dev->driver->owner);
 }
 
 static void soc_remove_component(struct snd_soc_component *component)
@@ -1380,7 +1381,8 @@ static int soc_probe_component(struct snd_soc_card *card,
 		return 0;
 	}
 
-	if (!try_module_get(component->dev->driver->owner))
+	if (!component->driver->ignore_module_refcount &&
+	    !try_module_get(component->dev->driver->owner))
 		return -ENODEV;
 
 	component->card = card;
-- 
cgit v1.2.3


From ddd065e423c1bad6c573297455ef0a9755ef12d4 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 8 Feb 2019 09:54:38 -0700
Subject: genirq/msi: Clean up usage of __u8/__u16 types

The double underscore types are meant for compatibility in userspace
headers which does not apply here. Therefore, change to use the standard
no-underscore types.

The origin of the double underscore types dates back to before the git era
so I was not able to find a commit to see the original justification.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/msi.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 784fb52b9900..7e9b81c3b50d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -83,12 +83,12 @@ struct msi_desc {
 		struct {
 			u32 masked;
 			struct {
-				__u8	is_msix		: 1;
-				__u8	multiple	: 3;
-				__u8	multi_cap	: 3;
-				__u8	maskbit		: 1;
-				__u8	is_64		: 1;
-				__u16	entry_nr;
+				u8	is_msix		: 1;
+				u8	multiple	: 3;
+				u8	multi_cap	: 3;
+				u8	maskbit		: 1;
+				u8	is_64		: 1;
+				u16	entry_nr;
 				unsigned default_irq;
 			} msi_attrib;
 			union {
-- 
cgit v1.2.3


From 2e5a662de36a92a95b5939273468b01785dc41ec Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 6 Feb 2019 08:16:51 +0100
Subject: i2c: cbus-gpio: Switch to use GPIO descriptors

This augments the CBUS GPIO I2C driver to use GPIO
descriptors for clock, sel and data. We drop the platform
data that was only used for carrying GPIO numbers and
use machine descriptor tables instead.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 arch/arm/mach-omap1/board-nokia770.c        | 18 ++++---
 drivers/i2c/busses/i2c-cbus-gpio.c          | 80 +++++++++++------------------
 include/linux/platform_data/i2c-cbus-gpio.h | 27 ----------
 3 files changed, 40 insertions(+), 85 deletions(-)
 delete mode 100644 include/linux/platform_data/i2c-cbus-gpio.h

(limited to 'include')

diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c
index eb41db78cd47..10848f573d37 100644
--- a/arch/arm/mach-omap1/board-nokia770.c
+++ b/arch/arm/mach-omap1/board-nokia770.c
@@ -10,6 +10,7 @@
 #include <linux/clkdev.h>
 #include <linux/irq.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
@@ -25,7 +26,6 @@
 #include <linux/platform_data/keypad-omap.h>
 #include <linux/platform_data/lcd-mipid.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/i2c-cbus-gpio.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
@@ -217,18 +217,19 @@ static inline void nokia770_mmc_init(void)
 #endif
 
 #if IS_ENABLED(CONFIG_I2C_CBUS_GPIO)
-static struct i2c_cbus_platform_data nokia770_cbus_data = {
-	.clk_gpio = OMAP_MPUIO(9),
-	.dat_gpio = OMAP_MPUIO(10),
-	.sel_gpio = OMAP_MPUIO(11),
+static struct gpiod_lookup_table nokia770_cbus_gpio_table = {
+	.dev_id = "i2c-cbus-gpio.2",
+	.table = {
+		GPIO_LOOKUP_IDX("mpuio", 9, NULL, 0, 0), /* clk */
+		GPIO_LOOKUP_IDX("mpuio", 10, NULL, 1, 0), /* dat */
+		GPIO_LOOKUP_IDX("mpuio", 11, NULL, 2, 0), /* sel */
+		{ },
+	},
 };
 
 static struct platform_device nokia770_cbus_device = {
 	.name   = "i2c-cbus-gpio",
 	.id     = 2,
-	.dev    = {
-		.platform_data = &nokia770_cbus_data,
-	},
 };
 
 static struct i2c_board_info nokia770_i2c_board_info_2[] __initdata = {
@@ -257,6 +258,7 @@ static void __init nokia770_cbus_init(void)
 	nokia770_i2c_board_info_2[1].irq = gpio_to_irq(tahvo_irq_gpio);
 	i2c_register_board_info(2, nokia770_i2c_board_info_2,
 				ARRAY_SIZE(nokia770_i2c_board_info_2));
+	gpiod_add_lookup_table(&nokia770_cbus_gpio_table);
 	platform_device_register(&nokia770_cbus_device);
 }
 #else /* CONFIG_I2C_CBUS_GPIO */
diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c
index b4f91e48948a..72df563477b1 100644
--- a/drivers/i2c/busses/i2c-cbus-gpio.c
+++ b/drivers/i2c/busses/i2c-cbus-gpio.c
@@ -18,16 +18,14 @@
 
 #include <linux/io.h>
 #include <linux/i2c.h>
-#include <linux/gpio.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/i2c-cbus-gpio.h>
 
 /*
  * Bit counts are derived from Nokia implementation. These should be checked
@@ -39,9 +37,9 @@
 struct cbus_host {
 	spinlock_t	lock;		/* host lock */
 	struct device	*dev;
-	int		clk_gpio;
-	int		dat_gpio;
-	int		sel_gpio;
+	struct gpio_desc *clk;
+	struct gpio_desc *dat;
+	struct gpio_desc *sel;
 };
 
 /**
@@ -51,9 +49,9 @@ struct cbus_host {
  */
 static void cbus_send_bit(struct cbus_host *host, unsigned bit)
 {
-	gpio_set_value(host->dat_gpio, bit ? 1 : 0);
-	gpio_set_value(host->clk_gpio, 1);
-	gpio_set_value(host->clk_gpio, 0);
+	gpiod_set_value(host->dat, bit ? 1 : 0);
+	gpiod_set_value(host->clk, 1);
+	gpiod_set_value(host->clk, 0);
 }
 
 /**
@@ -78,9 +76,9 @@ static int cbus_receive_bit(struct cbus_host *host)
 {
 	int ret;
 
-	gpio_set_value(host->clk_gpio, 1);
-	ret = gpio_get_value(host->dat_gpio);
-	gpio_set_value(host->clk_gpio, 0);
+	gpiod_set_value(host->clk, 1);
+	ret = gpiod_get_value(host->dat);
+	gpiod_set_value(host->clk, 0);
 	return ret;
 }
 
@@ -123,10 +121,10 @@ static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
 	spin_lock_irqsave(&host->lock, flags);
 
 	/* Reset state and start of transfer, SEL stays down during transfer */
-	gpio_set_value(host->sel_gpio, 0);
+	gpiod_set_value(host->sel, 0);
 
 	/* Set the DAT pin to output */
-	gpio_direction_output(host->dat_gpio, 1);
+	gpiod_direction_output(host->dat, 1);
 
 	/* Send the device address */
 	cbus_send_data(host, dev, CBUS_ADDR_BITS);
@@ -141,12 +139,12 @@ static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
 		cbus_send_data(host, data, 16);
 		ret = 0;
 	} else {
-		ret = gpio_direction_input(host->dat_gpio);
+		ret = gpiod_direction_input(host->dat);
 		if (ret) {
 			dev_dbg(host->dev, "failed setting direction\n");
 			goto out;
 		}
-		gpio_set_value(host->clk_gpio, 1);
+		gpiod_set_value(host->clk, 1);
 
 		ret = cbus_receive_word(host);
 		if (ret < 0) {
@@ -156,9 +154,9 @@ static int cbus_transfer(struct cbus_host *host, char rw, unsigned dev,
 	}
 
 	/* Indicate end of transfer, SEL goes up until next transfer */
-	gpio_set_value(host->sel_gpio, 1);
-	gpio_set_value(host->clk_gpio, 1);
-	gpio_set_value(host->clk_gpio, 0);
+	gpiod_set_value(host->sel, 1);
+	gpiod_set_value(host->clk, 1);
+	gpiod_set_value(host->clk, 0);
 
 out:
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -214,7 +212,6 @@ static int cbus_i2c_probe(struct platform_device *pdev)
 {
 	struct i2c_adapter *adapter;
 	struct cbus_host *chost;
-	int ret;
 
 	adapter = devm_kzalloc(&pdev->dev, sizeof(struct i2c_adapter),
 			       GFP_KERNEL);
@@ -225,22 +222,20 @@ static int cbus_i2c_probe(struct platform_device *pdev)
 	if (!chost)
 		return -ENOMEM;
 
-	if (pdev->dev.of_node) {
-		struct device_node *dnode = pdev->dev.of_node;
-		if (of_gpio_count(dnode) != 3)
-			return -ENODEV;
-		chost->clk_gpio = of_get_gpio(dnode, 0);
-		chost->dat_gpio = of_get_gpio(dnode, 1);
-		chost->sel_gpio = of_get_gpio(dnode, 2);
-	} else if (dev_get_platdata(&pdev->dev)) {
-		struct i2c_cbus_platform_data *pdata =
-			dev_get_platdata(&pdev->dev);
-		chost->clk_gpio = pdata->clk_gpio;
-		chost->dat_gpio = pdata->dat_gpio;
-		chost->sel_gpio = pdata->sel_gpio;
-	} else {
+	if (gpiod_count(&pdev->dev, NULL) != 3)
 		return -ENODEV;
-	}
+	chost->clk = devm_gpiod_get_index(&pdev->dev, NULL, 0, GPIOD_OUT_LOW);
+	if (IS_ERR(chost->clk))
+		return PTR_ERR(chost->clk);
+	chost->dat = devm_gpiod_get_index(&pdev->dev, NULL, 1, GPIOD_IN);
+	if (IS_ERR(chost->dat))
+		return PTR_ERR(chost->dat);
+	chost->sel = devm_gpiod_get_index(&pdev->dev, NULL, 2, GPIOD_OUT_HIGH);
+	if (IS_ERR(chost->sel))
+		return PTR_ERR(chost->sel);
+	gpiod_set_consumer_name(chost->clk, "CBUS clk");
+	gpiod_set_consumer_name(chost->dat, "CBUS dat");
+	gpiod_set_consumer_name(chost->sel, "CBUS sel");
 
 	adapter->owner		= THIS_MODULE;
 	adapter->class		= I2C_CLASS_HWMON;
@@ -254,21 +249,6 @@ static int cbus_i2c_probe(struct platform_device *pdev)
 	spin_lock_init(&chost->lock);
 	chost->dev = &pdev->dev;
 
-	ret = devm_gpio_request_one(&pdev->dev, chost->clk_gpio,
-				    GPIOF_OUT_INIT_LOW, "CBUS clk");
-	if (ret)
-		return ret;
-
-	ret = devm_gpio_request_one(&pdev->dev, chost->dat_gpio, GPIOF_IN,
-				    "CBUS data");
-	if (ret)
-		return ret;
-
-	ret = devm_gpio_request_one(&pdev->dev, chost->sel_gpio,
-				    GPIOF_OUT_INIT_HIGH, "CBUS sel");
-	if (ret)
-		return ret;
-
 	i2c_set_adapdata(adapter, chost);
 	platform_set_drvdata(pdev, adapter);
 
diff --git a/include/linux/platform_data/i2c-cbus-gpio.h b/include/linux/platform_data/i2c-cbus-gpio.h
deleted file mode 100644
index 6faa992a9502..000000000000
--- a/include/linux/platform_data/i2c-cbus-gpio.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * i2c-cbus-gpio.h - CBUS I2C platform_data definition
- *
- * Copyright (C) 2004-2009 Nokia Corporation
- *
- * Written by Felipe Balbi and Aaro Koskinen.
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
-#ifndef __INCLUDE_LINUX_I2C_CBUS_GPIO_H
-#define __INCLUDE_LINUX_I2C_CBUS_GPIO_H
-
-struct i2c_cbus_platform_data {
-	int dat_gpio;
-	int clk_gpio;
-	int sel_gpio;
-};
-
-#endif /* __INCLUDE_LINUX_I2C_CBUS_GPIO_H */
-- 
cgit v1.2.3


From 6f4e626fb0cc93d50b49b79c2ee33bd769ee57f0 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 7 Feb 2019 09:07:20 -0700
Subject: scsi: ata: Use unsigned int for cmd's type in ioctls in
 scsi_host_template

Clang warns several times in the scsi subsystem (trimmed for brevity):

drivers/scsi/hpsa.c:6209:7: warning: overflow converting case value to
switch condition type (2147762695 to 18446744071562347015) [-Wswitch]
        case CCISS_GETBUSTYPES:
             ^
drivers/scsi/hpsa.c:6208:7: warning: overflow converting case value to
switch condition type (2147762694 to 18446744071562347014) [-Wswitch]
        case CCISS_GETHEARTBEAT:
             ^

The root cause is that the _IOC macro can generate really large numbers,
which don't fit into type 'int', which is used for the cmd parameter in
the ioctls in scsi_host_template. My research into how GCC and Clang are
handling this at a low level didn't prove fruitful. However, looking at
the rest of the kernel tree, all ioctls use an 'unsigned int' for the
cmd parameter, which will fit all of the _IOC values in the scsi/ata
subsystems.

Make that change because none of the ioctls expect a negative value for
any command, it brings the ioctls inline with the reset of the kernel,
and it removes ambiguity, which is never good when dealing with compilers.

Link: https://github.com/ClangBuiltLinux/linux/issues/85
Link: https://github.com/ClangBuiltLinux/linux/issues/154
Link: https://github.com/ClangBuiltLinux/linux/issues/157
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Bradley Grove <bgrove@attotech.com>
Acked-by: Don Brace <don.brace@microsemi.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-scsi.c             |  5 +++--
 drivers/scsi/aacraid/aachba.c         |  2 +-
 drivers/scsi/aacraid/aacraid.h        |  4 ++--
 drivers/scsi/aacraid/commctrl.c       |  2 +-
 drivers/scsi/aacraid/linit.c          |  6 ++++--
 drivers/scsi/cxlflash/common.h        |  3 ++-
 drivers/scsi/cxlflash/main.c          |  2 +-
 drivers/scsi/cxlflash/superpipe.c     | 12 +++++-------
 drivers/scsi/esas2r/esas2r.h          |  4 ++--
 drivers/scsi/esas2r/esas2r_ioctl.c    | 16 +++++++---------
 drivers/scsi/esas2r/esas2r_main.c     |  2 +-
 drivers/scsi/hpsa.c                   | 15 +++++++++------
 drivers/scsi/ipr.c                    |  3 ++-
 drivers/scsi/libsas/sas_scsi_host.c   |  2 +-
 drivers/scsi/scsi_debug.c             |  3 ++-
 drivers/scsi/smartpqi/smartpqi_init.c |  3 ++-
 include/linux/libata.h                |  5 +++--
 include/scsi/libsas.h                 |  3 ++-
 include/scsi/scsi_host.h              |  6 ++++--
 19 files changed, 54 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3d4887d0e84a..6291f1dbf342 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -778,7 +778,7 @@ static int ata_ioc32(struct ata_port *ap)
 }
 
 int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
-		     int cmd, void __user *arg)
+		     unsigned int cmd, void __user *arg)
 {
 	unsigned long val;
 	int rc = -EINVAL;
@@ -829,7 +829,8 @@ int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
 }
 EXPORT_SYMBOL_GPL(ata_sas_scsi_ioctl);
 
-int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg)
+int ata_scsi_ioctl(struct scsi_device *scsidev, unsigned int cmd,
+		   void __user *arg)
 {
 	return ata_sas_scsi_ioctl(ata_shost_to_port(scsidev->host),
 				scsidev, cmd, arg);
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 75ab5ff6b78c..6085aa087a2f 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -3455,7 +3455,7 @@ static int delete_disk(struct aac_dev *dev, void __user *arg)
 	}
 }
 
-int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg)
+int aac_dev_ioctl(struct aac_dev *dev, unsigned int cmd, void __user *arg)
 {
 	switch (cmd) {
 	case FSACTL_QUERY_DISK:
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 3291d1c16864..1df5171594b8 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -2706,12 +2706,12 @@ void aac_set_intx_mode(struct aac_dev *dev);
 int aac_get_config_status(struct aac_dev *dev, int commit_flag);
 int aac_get_containers(struct aac_dev *dev);
 int aac_scsi_cmd(struct scsi_cmnd *cmd);
-int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
+int aac_dev_ioctl(struct aac_dev *dev, unsigned int cmd, void __user *arg);
 #ifndef shost_to_class
 #define shost_to_class(shost) &shost->shost_dev
 #endif
 ssize_t aac_get_serial_number(struct device *dev, char *buf);
-int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg);
+int aac_do_ioctl(struct aac_dev *dev, unsigned int cmd, void __user *arg);
 int aac_rx_init(struct aac_dev *dev);
 int aac_rkt_init(struct aac_dev *dev);
 int aac_nark_init(struct aac_dev *dev);
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index e2899ff7913e..f0ff40332753 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -1060,7 +1060,7 @@ static int aac_send_reset_adapter(struct aac_dev *dev, void __user *arg)
 	return retval;
 }
 
-int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg)
+int aac_do_ioctl(struct aac_dev *dev, unsigned int cmd, void __user *arg)
 {
 	int status;
 
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index dd701d5243b1..22ecacffeca6 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -616,7 +616,8 @@ static struct device_attribute *aac_dev_attrs[] = {
 	NULL,
 };
 
-static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
+static int aac_ioctl(struct scsi_device *sdev, unsigned int cmd,
+		     void __user *arg)
 {
 	struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
 	if (!capable(CAP_SYS_RAWIO))
@@ -1205,7 +1206,8 @@ static long aac_compat_do_ioctl(struct aac_dev *dev, unsigned cmd, unsigned long
 	return ret;
 }
 
-static int aac_compat_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
+static int aac_compat_ioctl(struct scsi_device *sdev, unsigned int cmd,
+			    void __user *arg)
 {
 	struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
 	if (!capable(CAP_SYS_RAWIO))
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index 8908a20065c8..4d90106fcb37 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -334,7 +334,8 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t c, res_hndl_t r, u8 mode);
 void cxlflash_list_init(void);
 void cxlflash_term_global_luns(void);
 void cxlflash_free_errpage(void);
-int cxlflash_ioctl(struct scsi_device *sdev, int cmd, void __user *arg);
+int cxlflash_ioctl(struct scsi_device *sdev, unsigned int cmd,
+		   void __user *arg);
 void cxlflash_stop_term_user_contexts(struct cxlflash_cfg *cfg);
 int cxlflash_mark_contexts_error(struct cxlflash_cfg *cfg);
 void cxlflash_term_local_luns(struct cxlflash_cfg *cfg);
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index bfa13e3b191c..a0ea2dea7518 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3282,7 +3282,7 @@ static int cxlflash_chr_open(struct inode *inode, struct file *file)
  *
  * Return: A string identifying the decoded host ioctl.
  */
-static char *decode_hioctl(int cmd)
+static char *decode_hioctl(unsigned int cmd)
 {
 	switch (cmd) {
 	case HT_CXLFLASH_LUN_PROVISION:
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index acac6152f50b..1a94a469051e 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -1924,7 +1924,7 @@ out:
  *
  * Return: A string identifying the decoded ioctl.
  */
-static char *decode_ioctl(int cmd)
+static char *decode_ioctl(unsigned int cmd)
 {
 	switch (cmd) {
 	case DK_CXLFLASH_ATTACH:
@@ -2051,7 +2051,7 @@ err1:
  *
  * Return: 0 on success, -errno on failure
  */
-static int ioctl_common(struct scsi_device *sdev, int cmd)
+static int ioctl_common(struct scsi_device *sdev, unsigned int cmd)
 {
 	struct cxlflash_cfg *cfg = shost_priv(sdev->host);
 	struct device *dev = &cfg->dev->dev;
@@ -2096,7 +2096,7 @@ out:
  *
  * Return: 0 on success, -errno on failure
  */
-int cxlflash_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
+int cxlflash_ioctl(struct scsi_device *sdev, unsigned int cmd, void __user *arg)
 {
 	typedef int (*sioctl) (struct scsi_device *, void *);
 
@@ -2179,8 +2179,7 @@ int cxlflash_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 	}
 
 	if (unlikely(copy_from_user(&buf, arg, size))) {
-		dev_err(dev, "%s: copy_from_user() fail "
-			"size=%lu cmd=%d (%s) arg=%p\n",
+		dev_err(dev, "%s: copy_from_user() fail size=%lu cmd=%u (%s) arg=%p\n",
 			__func__, size, cmd, decode_ioctl(cmd), arg);
 		rc = -EFAULT;
 		goto cxlflash_ioctl_exit;
@@ -2203,8 +2202,7 @@ int cxlflash_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
 	rc = do_ioctl(sdev, (void *)&buf);
 	if (likely(!rc))
 		if (unlikely(copy_to_user(arg, &buf, size))) {
-			dev_err(dev, "%s: copy_to_user() fail "
-				"size=%lu cmd=%d (%s) arg=%p\n",
+			dev_err(dev, "%s: copy_to_user() fail size=%lu cmd=%u (%s) arg=%p\n",
 				__func__, size, cmd, decode_ioctl(cmd), arg);
 			rc = -EFAULT;
 		}
diff --git a/drivers/scsi/esas2r/esas2r.h b/drivers/scsi/esas2r/esas2r.h
index 858c3b33db78..7f43b95f4e94 100644
--- a/drivers/scsi/esas2r/esas2r.h
+++ b/drivers/scsi/esas2r/esas2r.h
@@ -965,8 +965,8 @@ struct esas2r_adapter {
 const char *esas2r_info(struct Scsi_Host *);
 int esas2r_write_params(struct esas2r_adapter *a, struct esas2r_request *rq,
 			struct esas2r_sas_nvram *data);
-int esas2r_ioctl_handler(void *hostdata, int cmd, void __user *arg);
-int esas2r_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
+int esas2r_ioctl_handler(void *hostdata, unsigned int cmd, void __user *arg);
+int esas2r_ioctl(struct scsi_device *dev, unsigned int cmd, void __user *arg);
 u8 handle_hba_ioctl(struct esas2r_adapter *a,
 		    struct atto_ioctl *ioctl_hba);
 int esas2r_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd);
diff --git a/drivers/scsi/esas2r/esas2r_ioctl.c b/drivers/scsi/esas2r/esas2r_ioctl.c
index 34bcc8c04ff4..3d130523c288 100644
--- a/drivers/scsi/esas2r/esas2r_ioctl.c
+++ b/drivers/scsi/esas2r/esas2r_ioctl.c
@@ -1274,7 +1274,7 @@ int esas2r_write_params(struct esas2r_adapter *a, struct esas2r_request *rq,
 
 
 /* This function only cares about ATTO-specific ioctls (atto_express_ioctl) */
-int esas2r_ioctl_handler(void *hostdata, int cmd, void __user *arg)
+int esas2r_ioctl_handler(void *hostdata, unsigned int cmd, void __user *arg)
 {
 	struct atto_express_ioctl *ioctl = NULL;
 	struct esas2r_adapter *a;
@@ -1292,9 +1292,8 @@ int esas2r_ioctl_handler(void *hostdata, int cmd, void __user *arg)
 	ioctl = memdup_user(arg, sizeof(struct atto_express_ioctl));
 	if (IS_ERR(ioctl)) {
 		esas2r_log(ESAS2R_LOG_WARN,
-			   "ioctl_handler access_ok failed for cmd %d, "
-			   "address %p", cmd,
-			   arg);
+			   "ioctl_handler access_ok failed for cmd %u, address %p",
+			   cmd, arg);
 		return PTR_ERR(ioctl);
 	}
 
@@ -1493,7 +1492,7 @@ int esas2r_ioctl_handler(void *hostdata, int cmd, void __user *arg)
 ioctl_done:
 
 	if (err < 0) {
-		esas2r_log(ESAS2R_LOG_WARN, "err %d on ioctl cmd %d", err,
+		esas2r_log(ESAS2R_LOG_WARN, "err %d on ioctl cmd %u", err,
 			   cmd);
 
 		switch (err) {
@@ -1518,9 +1517,8 @@ ioctl_done:
 	err = __copy_to_user(arg, ioctl, sizeof(struct atto_express_ioctl));
 	if (err != 0) {
 		esas2r_log(ESAS2R_LOG_WARN,
-			   "ioctl_handler copy_to_user didn't copy "
-			   "everything (err %d, cmd %d)", err,
-			   cmd);
+			   "ioctl_handler copy_to_user didn't copy everything (err %d, cmd %u)",
+			   err, cmd);
 		kfree(ioctl);
 
 		return -EFAULT;
@@ -1531,7 +1529,7 @@ ioctl_done:
 	return 0;
 }
 
-int esas2r_ioctl(struct scsi_device *sd, int cmd, void __user *arg)
+int esas2r_ioctl(struct scsi_device *sd, unsigned int cmd, void __user *arg)
 {
 	return esas2r_ioctl_handler(sd->host->hostdata, cmd, arg);
 }
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 64397d441bae..fdbda5c05aa0 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -623,7 +623,7 @@ static int esas2r_proc_major;
 long esas2r_proc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
 {
 	return esas2r_ioctl_handler(esas2r_proc_host->hostdata,
-				    (int)cmd, (void __user *)arg);
+				    cmd, (void __user *)arg);
 }
 
 static void __exit esas2r_exit(void)
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 5284444fdd10..f044e7d10d63 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -251,10 +251,11 @@ static int number_of_controllers;
 
 static irqreturn_t do_hpsa_intr_intx(int irq, void *dev_id);
 static irqreturn_t do_hpsa_intr_msi(int irq, void *dev_id);
-static int hpsa_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
+static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd,
+		      void __user *arg);
 
 #ifdef CONFIG_COMPAT
-static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd,
+static int hpsa_compat_ioctl(struct scsi_device *dev, unsigned int cmd,
 	void __user *arg);
 #endif
 
@@ -6127,7 +6128,7 @@ static void cmd_free(struct ctlr_info *h, struct CommandList *c)
 
 #ifdef CONFIG_COMPAT
 
-static int hpsa_ioctl32_passthru(struct scsi_device *dev, int cmd,
+static int hpsa_ioctl32_passthru(struct scsi_device *dev, unsigned int cmd,
 	void __user *arg)
 {
 	IOCTL32_Command_struct __user *arg32 =
@@ -6164,7 +6165,7 @@ static int hpsa_ioctl32_passthru(struct scsi_device *dev, int cmd,
 }
 
 static int hpsa_ioctl32_big_passthru(struct scsi_device *dev,
-	int cmd, void __user *arg)
+	unsigned int cmd, void __user *arg)
 {
 	BIG_IOCTL32_Command_struct __user *arg32 =
 	    (BIG_IOCTL32_Command_struct __user *) arg;
@@ -6201,7 +6202,8 @@ static int hpsa_ioctl32_big_passthru(struct scsi_device *dev,
 	return err;
 }
 
-static int hpsa_compat_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
+static int hpsa_compat_ioctl(struct scsi_device *dev, unsigned int cmd,
+			     void __user *arg)
 {
 	switch (cmd) {
 	case CCISS_GETPCIINFO:
@@ -6521,7 +6523,8 @@ static void check_ioctl_unit_attention(struct ctlr_info *h,
 /*
  * ioctl
  */
-static int hpsa_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
+static int hpsa_ioctl(struct scsi_device *dev, unsigned int cmd,
+		      void __user *arg)
 {
 	struct ctlr_info *h;
 	void __user *argp = (void __user *)arg;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d1b4025a4503..6d053e220153 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -6696,7 +6696,8 @@ err_nodev:
  * Return value:
  * 	0 on success / other on failure
  **/
-static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
+static int ipr_ioctl(struct scsi_device *sdev, unsigned int cmd,
+		     void __user *arg)
 {
 	struct ipr_resource_entry *res;
 
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index c43a00a9d819..b775445892af 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -799,7 +799,7 @@ out:
 		  shost->host_failed, tries);
 }
 
-int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
+int sas_ioctl(struct scsi_device *sdev, unsigned int cmd, void __user *arg)
 {
 	struct domain_device *dev = sdev_to_domain_dev(sdev);
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 8044bb08455d..c5014e9f4a50 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -832,7 +832,8 @@ static void mk_sense_invalid_opcode(struct scsi_cmnd *scp)
 	mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_OPCODE, 0);
 }
 
-static int scsi_debug_ioctl(struct scsi_device *dev, int cmd, void __user *arg)
+static int scsi_debug_ioctl(struct scsi_device *dev, unsigned int cmd,
+			    void __user *arg)
 {
 	if (sdebug_verbose) {
 		if (0x1261 == cmd)
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index e2fa3f476227..f6eaea2eadd1 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -6043,7 +6043,8 @@ out:
 	return rc;
 }
 
-static int pqi_ioctl(struct scsi_device *sdev, int cmd, void __user *arg)
+static int pqi_ioctl(struct scsi_device *sdev, unsigned int cmd,
+		     void __user *arg)
 {
 	int rc;
 	struct pqi_ctrl_info *ctrl_info;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 68133842e6d7..c9419c05a90a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1122,10 +1122,11 @@ extern int ata_host_activate(struct ata_host *host, int irq,
 extern void ata_host_detach(struct ata_host *host);
 extern void ata_host_init(struct ata_host *, struct device *, struct ata_port_operations *);
 extern int ata_scsi_detect(struct scsi_host_template *sht);
-extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg);
+extern int ata_scsi_ioctl(struct scsi_device *dev, unsigned int cmd,
+			  void __user *arg);
 extern int ata_scsi_queuecmd(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev,
-			    int cmd, void __user *arg);
+			    unsigned int cmd, void __user *arg);
 extern void ata_sas_port_destroy(struct ata_port *);
 extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
 					   struct ata_port_info *, struct Scsi_Host *);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 857086cf7ebf..56b2dba7d911 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -707,7 +707,8 @@ int sas_eh_target_reset_handler(struct scsi_cmnd *cmd);
 
 extern void sas_target_destroy(struct scsi_target *);
 extern int sas_slave_alloc(struct scsi_device *);
-extern int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg);
+extern int sas_ioctl(struct scsi_device *sdev, unsigned int cmd,
+		     void __user *arg);
 extern int sas_drain_work(struct sas_ha_struct *ha);
 
 extern void sas_ssp_task_response(struct device *dev, struct sas_task *task,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 6ca954e9f752..4047d68d1b08 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -60,7 +60,8 @@ struct scsi_host_template {
 	 *
 	 * Status: OPTIONAL
 	 */
-	int (* ioctl)(struct scsi_device *dev, int cmd, void __user *arg);
+	int (*ioctl)(struct scsi_device *dev, unsigned int cmd,
+		     void __user *arg);
 
 
 #ifdef CONFIG_COMPAT
@@ -70,7 +71,8 @@ struct scsi_host_template {
 	 *
 	 * Status: OPTIONAL
 	 */
-	int (* compat_ioctl)(struct scsi_device *dev, int cmd, void __user *arg);
+	int (*compat_ioctl)(struct scsi_device *dev, unsigned int cmd,
+			    void __user *arg);
 #endif
 
 	/*
-- 
cgit v1.2.3


From 7c62cfb8c5744b377e9f33806e0db87a00dc6884 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 7 Feb 2019 11:22:45 +0000
Subject: devlink: publish params only after driver init is done

Currently, user can do dump or get of param values right after the
devlink params are registered. However the driver may not be initialized
which is an issue. The same problem happens during notification
upon param registration. Allow driver to publish devlink params
whenever it is ready to handle get() ops. Note that this cannot
be resolved by init reordering, as the "driverinit" params have
to be available before the driver is initialized (it needs the param
values there).

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c |  3 ++
 drivers/net/ethernet/mellanox/mlx4/main.c         |  1 +
 drivers/net/ethernet/mellanox/mlxsw/core.c        |  5 +++
 include/net/devlink.h                             | 11 ++++++
 net/core/devlink.c                                | 48 ++++++++++++++++++++++-
 5 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index a6abfa4fb168..2955e404fd18 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -258,6 +258,9 @@ int bnxt_dl_register(struct bnxt *bp)
 		netdev_err(bp->dev, "devlink_port_params_register failed");
 		goto err_dl_port_unreg;
 	}
+
+	devlink_params_publish(dl);
+
 	return 0;
 
 err_dl_port_unreg:
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index bdb8dd161923..1f6e16d5ea6b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3981,6 +3981,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto err_params_unregister;
 
+	devlink_params_publish(devlink);
 	pci_save_state(pdev);
 	return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 4f6fa515394e..b505d3858235 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1062,6 +1062,9 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 			goto err_driver_init;
 	}
 
+	if (mlxsw_driver->params_register && !reload)
+		devlink_params_publish(devlink);
+
 	return 0;
 
 err_driver_init:
@@ -1131,6 +1134,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 			return;
 	}
 
+	if (mlxsw_core->driver->params_unregister && !reload)
+		devlink_params_unpublish(devlink);
 	if (mlxsw_core->driver->fini)
 		mlxsw_core->driver->fini(mlxsw_core);
 	mlxsw_thermal_fini(mlxsw_core->thermal);
diff --git a/include/net/devlink.h b/include/net/devlink.h
index c12ad6e9095d..2b384a38911b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -358,6 +358,7 @@ struct devlink_param_item {
 	const struct devlink_param *param;
 	union devlink_param_value driverinit_value;
 	bool driverinit_value_valid;
+	bool published;
 };
 
 enum devlink_param_generic_id {
@@ -618,6 +619,8 @@ int devlink_params_register(struct devlink *devlink,
 void devlink_params_unregister(struct devlink *devlink,
 			       const struct devlink_param *params,
 			       size_t params_count);
+void devlink_params_publish(struct devlink *devlink);
+void devlink_params_unpublish(struct devlink *devlink);
 int devlink_port_params_register(struct devlink_port *devlink_port,
 				 const struct devlink_param *params,
 				 size_t params_count);
@@ -724,6 +727,14 @@ static inline void devlink_unregister(struct devlink *devlink)
 {
 }
 
+static inline void devlink_params_publish(struct devlink *devlink)
+{
+}
+
+static inline void devlink_params_unpublish(struct devlink *devlink)
+{
+}
+
 static inline void devlink_free(struct devlink *devlink)
 {
 	kfree(devlink);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7fbdba547d4f..e6a015b8ac9b 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2858,6 +2858,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
 				 u32 portid, u32 seq, int flags)
 {
 	union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1];
+	bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {};
 	const struct devlink_param *param = param_item->param;
 	struct devlink_param_gset_ctx ctx;
 	struct nlattr *param_values_list;
@@ -2876,12 +2877,15 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
 				return -EOPNOTSUPP;
 			param_value[i] = param_item->driverinit_value;
 		} else {
+			if (!param_item->published)
+				continue;
 			ctx.cmode = i;
 			err = devlink_param_get(devlink, param, &ctx);
 			if (err)
 				return err;
 			param_value[i] = ctx.val;
 		}
+		param_value_set[i] = true;
 	}
 
 	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2916,7 +2920,7 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
 		goto param_nest_cancel;
 
 	for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) {
-		if (!devlink_param_cmode_is_supported(param, i))
+		if (!param_value_set[i])
 			continue;
 		err = devlink_nl_param_value_fill_one(msg, param->type,
 						      i, param_value[i]);
@@ -5886,6 +5890,48 @@ void devlink_params_unregister(struct devlink *devlink,
 }
 EXPORT_SYMBOL_GPL(devlink_params_unregister);
 
+/**
+ *	devlink_params_publish - publish configuration parameters
+ *
+ *	@devlink: devlink
+ *
+ *	Publish previously registered configuration parameters.
+ */
+void devlink_params_publish(struct devlink *devlink)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, &devlink->param_list, list) {
+		if (param_item->published)
+			continue;
+		param_item->published = true;
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_NEW);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_params_publish);
+
+/**
+ *	devlink_params_unpublish - unpublish configuration parameters
+ *
+ *	@devlink: devlink
+ *
+ *	Unpublish previously registered configuration parameters.
+ */
+void devlink_params_unpublish(struct devlink *devlink)
+{
+	struct devlink_param_item *param_item;
+
+	list_for_each_entry(param_item, &devlink->param_list, list) {
+		if (!param_item->published)
+			continue;
+		param_item->published = false;
+		devlink_param_notify(devlink, 0, param_item,
+				     DEVLINK_CMD_PARAM_DEL);
+	}
+}
+EXPORT_SYMBOL_GPL(devlink_params_unpublish);
+
 /**
  *	devlink_port_params_register - register port configuration parameters
  *
-- 
cgit v1.2.3


From 9069a3817d82b01b3a55da382c774e3575946130 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 7 Feb 2019 11:22:46 +0000
Subject: lib: objagg: implement optimization hints assembly and use hints for
 object creation

Implement simple greedy algo to find more optimized root-delta tree for
a given objagg instance. This "hints" can be used by a driver to:
1) check if the hints are better (driver's choice) than the original
   objagg tree. Driver does comparison of objagg stats and hints stats.
2) use the hints to create a new objagg instance which will construct
   the root-delta tree according to the passed hints. Currently, only a
   simple greedy algorithm is implemented. Basically it finds the roots
   according to the maximal possible user count including deltas.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c |  37 +-
 include/linux/objagg.h                             |  20 +-
 lib/objagg.c                                       | 573 ++++++++++++++++++++-
 lib/test_objagg.c                                  | 194 ++++++-
 4 files changed, 802 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
index 2941967e1cc5..302070a74f2e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c
@@ -1200,6 +1200,32 @@ mlxsw_sp_acl_erp_delta_fill(const struct mlxsw_sp_acl_erp_key *parent_key,
 	return 0;
 }
 
+static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj,
+					 const void *obj)
+{
+	const struct mlxsw_sp_acl_erp_key *parent_key = parent_obj;
+	const struct mlxsw_sp_acl_erp_key *key = obj;
+	u16 delta_start;
+	u8 delta_mask;
+	int err;
+
+	err = mlxsw_sp_acl_erp_delta_fill(parent_key, key,
+					  &delta_start, &delta_mask);
+	return err ? false : true;
+}
+
+static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2)
+{
+	const struct mlxsw_sp_acl_erp_key *key1 = obj1;
+	const struct mlxsw_sp_acl_erp_key *key2 = obj2;
+
+	/* For hints purposes, two objects are considered equal
+	 * in case the masks are the same. Does not matter what
+	 * the "ctcam" value is.
+	 */
+	return memcmp(key1->mask, key2->mask, sizeof(key1->mask));
+}
+
 static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj,
 					   void *obj)
 {
@@ -1254,12 +1280,17 @@ static void mlxsw_sp_acl_erp_delta_destroy(void *priv, void *delta_priv)
 	kfree(delta);
 }
 
-static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj)
+static void *mlxsw_sp_acl_erp_root_create(void *priv, void *obj,
+					  unsigned int root_id)
 {
 	struct mlxsw_sp_acl_atcam_region *aregion = priv;
 	struct mlxsw_sp_acl_erp_table *erp_table = aregion->erp_table;
 	struct mlxsw_sp_acl_erp_key *key = obj;
 
+	if (!key->ctcam &&
+	    root_id != OBJAGG_OBJ_ROOT_ID_INVALID &&
+	    root_id >= MLXSW_SP_ACL_ERP_MAX_PER_REGION)
+		return ERR_PTR(-ENOBUFS);
 	return erp_table->ops->erp_create(erp_table, key);
 }
 
@@ -1273,6 +1304,8 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv)
 
 static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = {
 	.obj_size = sizeof(struct mlxsw_sp_acl_erp_key),
+	.delta_check = mlxsw_sp_acl_erp_delta_check,
+	.hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp,
 	.delta_create = mlxsw_sp_acl_erp_delta_create,
 	.delta_destroy = mlxsw_sp_acl_erp_delta_destroy,
 	.root_create = mlxsw_sp_acl_erp_root_create,
@@ -1290,7 +1323,7 @@ mlxsw_sp_acl_erp_table_create(struct mlxsw_sp_acl_atcam_region *aregion)
 		return ERR_PTR(-ENOMEM);
 
 	erp_table->objagg = objagg_create(&mlxsw_sp_acl_erp_objagg_ops,
-					  aregion);
+					  NULL, aregion);
 	if (IS_ERR(erp_table->objagg)) {
 		err = PTR_ERR(erp_table->objagg);
 		goto err_objagg_create;
diff --git a/include/linux/objagg.h b/include/linux/objagg.h
index 34f38c186ea0..a675286df1af 100644
--- a/include/linux/objagg.h
+++ b/include/linux/objagg.h
@@ -6,14 +6,19 @@
 
 struct objagg_ops {
 	size_t obj_size;
+	bool (*delta_check)(void *priv, const void *parent_obj,
+			    const void *obj);
+	int (*hints_obj_cmp)(const void *obj1, const void *obj2);
 	void * (*delta_create)(void *priv, void *parent_obj, void *obj);
 	void (*delta_destroy)(void *priv, void *delta_priv);
-	void * (*root_create)(void *priv, void *obj);
+	void * (*root_create)(void *priv, void *obj, unsigned int root_id);
+#define OBJAGG_OBJ_ROOT_ID_INVALID UINT_MAX
 	void (*root_destroy)(void *priv, void *root_priv);
 };
 
 struct objagg;
 struct objagg_obj;
+struct objagg_hints;
 
 const void *objagg_obj_root_priv(const struct objagg_obj *objagg_obj);
 const void *objagg_obj_delta_priv(const struct objagg_obj *objagg_obj);
@@ -21,7 +26,8 @@ const void *objagg_obj_raw(const struct objagg_obj *objagg_obj);
 
 struct objagg_obj *objagg_obj_get(struct objagg *objagg, void *obj);
 void objagg_obj_put(struct objagg *objagg, struct objagg_obj *objagg_obj);
-struct objagg *objagg_create(const struct objagg_ops *ops, void *priv);
+struct objagg *objagg_create(const struct objagg_ops *ops,
+			     struct objagg_hints *hints, void *priv);
 void objagg_destroy(struct objagg *objagg);
 
 struct objagg_obj_stats {
@@ -43,4 +49,14 @@ struct objagg_stats {
 const struct objagg_stats *objagg_stats_get(struct objagg *objagg);
 void objagg_stats_put(const struct objagg_stats *objagg_stats);
 
+enum objagg_opt_algo_type {
+	OBJAGG_OPT_ALGO_SIMPLE_GREEDY,
+};
+
+struct objagg_hints *objagg_hints_get(struct objagg *objagg,
+				      enum objagg_opt_algo_type opt_algo_type);
+void objagg_hints_put(struct objagg_hints *objagg_hints);
+const struct objagg_stats *
+objagg_hints_stats_get(struct objagg_hints *objagg_hints);
+
 #endif
diff --git a/lib/objagg.c b/lib/objagg.c
index dae390bcef1a..befe8a47d080 100644
--- a/lib/objagg.c
+++ b/lib/objagg.c
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/rhashtable.h>
+#include <linux/idr.h>
 #include <linux/list.h>
 #include <linux/sort.h>
 #include <linux/objagg.h>
@@ -11,6 +12,34 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/objagg.h>
 
+struct objagg_hints {
+	struct rhashtable node_ht;
+	struct rhashtable_params ht_params;
+	struct list_head node_list;
+	unsigned int node_count;
+	unsigned int root_count;
+	unsigned int refcount;
+	const struct objagg_ops *ops;
+};
+
+struct objagg_hints_node {
+	struct rhash_head ht_node; /* member of objagg_hints->node_ht */
+	struct list_head list; /* member of objagg_hints->node_list */
+	struct objagg_hints_node *parent;
+	unsigned int root_id;
+	struct objagg_obj_stats_info stats_info;
+	unsigned long obj[0];
+};
+
+static struct objagg_hints_node *
+objagg_hints_lookup(struct objagg_hints *objagg_hints, void *obj)
+{
+	if (!objagg_hints)
+		return NULL;
+	return rhashtable_lookup_fast(&objagg_hints->node_ht, obj,
+				      objagg_hints->ht_params);
+}
+
 struct objagg {
 	const struct objagg_ops *ops;
 	void *priv;
@@ -18,6 +47,8 @@ struct objagg {
 	struct rhashtable_params ht_params;
 	struct list_head obj_list;
 	unsigned int obj_count;
+	struct ida root_ida;
+	struct objagg_hints *hints;
 };
 
 struct objagg_obj {
@@ -30,6 +61,7 @@ struct objagg_obj {
 		void *delta_priv; /* user delta private */
 		void *root_priv; /* user root private */
 	};
+	unsigned int root_id;
 	unsigned int refcount; /* counts number of users of this object
 				* including nested objects
 				*/
@@ -130,7 +162,8 @@ static struct objagg_obj *objagg_obj_lookup(struct objagg *objagg, void *obj)
 
 static int objagg_obj_parent_assign(struct objagg *objagg,
 				    struct objagg_obj *objagg_obj,
-				    struct objagg_obj *parent)
+				    struct objagg_obj *parent,
+				    bool take_parent_ref)
 {
 	void *delta_priv;
 
@@ -144,7 +177,8 @@ static int objagg_obj_parent_assign(struct objagg *objagg,
 	 */
 	objagg_obj->parent = parent;
 	objagg_obj->delta_priv = delta_priv;
-	objagg_obj_ref_inc(objagg_obj->parent);
+	if (take_parent_ref)
+		objagg_obj_ref_inc(objagg_obj->parent);
 	trace_objagg_obj_parent_assign(objagg, objagg_obj,
 				       parent,
 				       parent->refcount);
@@ -164,7 +198,7 @@ static int objagg_obj_parent_lookup_assign(struct objagg *objagg,
 		if (!objagg_obj_is_root(objagg_obj_cur))
 			continue;
 		err = objagg_obj_parent_assign(objagg, objagg_obj,
-					       objagg_obj_cur);
+					       objagg_obj_cur, true);
 		if (!err)
 			return 0;
 	}
@@ -184,16 +218,68 @@ static void objagg_obj_parent_unassign(struct objagg *objagg,
 	__objagg_obj_put(objagg, objagg_obj->parent);
 }
 
+static int objagg_obj_root_id_alloc(struct objagg *objagg,
+				    struct objagg_obj *objagg_obj,
+				    struct objagg_hints_node *hnode)
+{
+	unsigned int min, max;
+	int root_id;
+
+	/* In case there are no hints available, the root id is invalid. */
+	if (!objagg->hints) {
+		objagg_obj->root_id = OBJAGG_OBJ_ROOT_ID_INVALID;
+		return 0;
+	}
+
+	if (hnode) {
+		min = hnode->root_id;
+		max = hnode->root_id;
+	} else {
+		/* For objects with no hint, start after the last
+		 * hinted root_id.
+		 */
+		min = objagg->hints->root_count;
+		max = ~0;
+	}
+
+	root_id = ida_alloc_range(&objagg->root_ida, min, max, GFP_KERNEL);
+
+	if (root_id < 0)
+		return root_id;
+	objagg_obj->root_id = root_id;
+	return 0;
+}
+
+static void objagg_obj_root_id_free(struct objagg *objagg,
+				    struct objagg_obj *objagg_obj)
+{
+	if (!objagg->hints)
+		return;
+	ida_free(&objagg->root_ida, objagg_obj->root_id);
+}
+
 static int objagg_obj_root_create(struct objagg *objagg,
-				  struct objagg_obj *objagg_obj)
+				  struct objagg_obj *objagg_obj,
+				  struct objagg_hints_node *hnode)
 {
-	objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
-							 objagg_obj->obj);
-	if (IS_ERR(objagg_obj->root_priv))
-		return PTR_ERR(objagg_obj->root_priv);
+	int err;
 
+	err = objagg_obj_root_id_alloc(objagg, objagg_obj, hnode);
+	if (err)
+		return err;
+	objagg_obj->root_priv = objagg->ops->root_create(objagg->priv,
+							 objagg_obj->obj,
+							 objagg_obj->root_id);
+	if (IS_ERR(objagg_obj->root_priv)) {
+		err = PTR_ERR(objagg_obj->root_priv);
+		goto err_root_create;
+	}
 	trace_objagg_obj_root_create(objagg, objagg_obj);
 	return 0;
+
+err_root_create:
+	objagg_obj_root_id_free(objagg, objagg_obj);
+	return err;
 }
 
 static void objagg_obj_root_destroy(struct objagg *objagg,
@@ -201,19 +287,69 @@ static void objagg_obj_root_destroy(struct objagg *objagg,
 {
 	trace_objagg_obj_root_destroy(objagg, objagg_obj);
 	objagg->ops->root_destroy(objagg->priv, objagg_obj->root_priv);
+	objagg_obj_root_id_free(objagg, objagg_obj);
+}
+
+static struct objagg_obj *__objagg_obj_get(struct objagg *objagg, void *obj);
+
+static int objagg_obj_init_with_hints(struct objagg *objagg,
+				      struct objagg_obj *objagg_obj,
+				      bool *hint_found)
+{
+	struct objagg_hints_node *hnode;
+	struct objagg_obj *parent;
+	int err;
+
+	hnode = objagg_hints_lookup(objagg->hints, objagg_obj->obj);
+	if (!hnode) {
+		*hint_found = false;
+		return 0;
+	}
+	*hint_found = true;
+
+	if (!hnode->parent)
+		return objagg_obj_root_create(objagg, objagg_obj, hnode);
+
+	parent = __objagg_obj_get(objagg, hnode->parent->obj);
+	if (IS_ERR(parent))
+		return PTR_ERR(parent);
+
+	err = objagg_obj_parent_assign(objagg, objagg_obj, parent, false);
+	if (err) {
+		*hint_found = false;
+		err = 0;
+		goto err_parent_assign;
+	}
+
+	return 0;
+
+err_parent_assign:
+	objagg_obj_put(objagg, parent);
+	return err;
 }
 
 static int objagg_obj_init(struct objagg *objagg,
 			   struct objagg_obj *objagg_obj)
 {
+	bool hint_found;
 	int err;
 
+	/* First, try to use hints if they are available and
+	 * if they provide result.
+	 */
+	err = objagg_obj_init_with_hints(objagg, objagg_obj, &hint_found);
+	if (err)
+		return err;
+
+	if (hint_found)
+		return 0;
+
 	/* Try to find if the object can be aggregated under an existing one. */
 	err = objagg_obj_parent_lookup_assign(objagg, objagg_obj);
 	if (!err)
 		return 0;
 	/* If aggregation is not possible, make the object a root. */
-	return objagg_obj_root_create(objagg, objagg_obj);
+	return objagg_obj_root_create(objagg, objagg_obj, NULL);
 }
 
 static void objagg_obj_fini(struct objagg *objagg,
@@ -349,8 +485,9 @@ EXPORT_SYMBOL(objagg_obj_put);
 
 /**
  * objagg_create - creates a new objagg instance
- * @ops:	user-specific callbacks
- * @priv:	pointer to a private data passed to the ops
+ * @ops:		user-specific callbacks
+ * @objagg_hints:	hints, can be NULL
+ * @priv:		pointer to a private data passed to the ops
  *
  * Note: all locking must be provided by the caller.
  *
@@ -374,18 +511,25 @@ EXPORT_SYMBOL(objagg_obj_put);
  * Returns a pointer to newly created objagg instance in case of success,
  * otherwise it returns pointer error using ERR_PTR macro.
  */
-struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
+struct objagg *objagg_create(const struct objagg_ops *ops,
+			     struct objagg_hints *objagg_hints, void *priv)
 {
 	struct objagg *objagg;
 	int err;
 
 	if (WARN_ON(!ops || !ops->root_create || !ops->root_destroy ||
-		    !ops->delta_create || !ops->delta_destroy))
+		    !ops->delta_check || !ops->delta_create ||
+		    !ops->delta_destroy))
 		return ERR_PTR(-EINVAL);
+
 	objagg = kzalloc(sizeof(*objagg), GFP_KERNEL);
 	if (!objagg)
 		return ERR_PTR(-ENOMEM);
 	objagg->ops = ops;
+	if (objagg_hints) {
+		objagg->hints = objagg_hints;
+		objagg_hints->refcount++;
+	}
 	objagg->priv = priv;
 	INIT_LIST_HEAD(&objagg->obj_list);
 
@@ -397,6 +541,8 @@ struct objagg *objagg_create(const struct objagg_ops *ops, void *priv)
 	if (err)
 		goto err_rhashtable_init;
 
+	ida_init(&objagg->root_ida);
+
 	trace_objagg_create(objagg);
 	return objagg;
 
@@ -415,8 +561,11 @@ EXPORT_SYMBOL(objagg_create);
 void objagg_destroy(struct objagg *objagg)
 {
 	trace_objagg_destroy(objagg);
+	ida_destroy(&objagg->root_ida);
 	WARN_ON(!list_empty(&objagg->obj_list));
 	rhashtable_destroy(&objagg->obj_ht);
+	if (objagg->hints)
+		objagg_hints_put(objagg->hints);
 	kfree(objagg);
 }
 EXPORT_SYMBOL(objagg_destroy);
@@ -496,6 +645,404 @@ void objagg_stats_put(const struct objagg_stats *objagg_stats)
 }
 EXPORT_SYMBOL(objagg_stats_put);
 
+static struct objagg_hints_node *
+objagg_hints_node_create(struct objagg_hints *objagg_hints,
+			 struct objagg_obj *objagg_obj, size_t obj_size,
+			 struct objagg_hints_node *parent_hnode)
+{
+	unsigned int user_count = objagg_obj->stats.user_count;
+	struct objagg_hints_node *hnode;
+	int err;
+
+	hnode = kzalloc(sizeof(*hnode) + obj_size, GFP_KERNEL);
+	if (!hnode)
+		return ERR_PTR(-ENOMEM);
+	memcpy(hnode->obj, &objagg_obj->obj, obj_size);
+	hnode->stats_info.stats.user_count = user_count;
+	hnode->stats_info.stats.delta_user_count = user_count;
+	if (parent_hnode) {
+		parent_hnode->stats_info.stats.delta_user_count += user_count;
+	} else {
+		hnode->root_id = objagg_hints->root_count++;
+		hnode->stats_info.is_root = true;
+	}
+	hnode->stats_info.objagg_obj = objagg_obj;
+
+	err = rhashtable_insert_fast(&objagg_hints->node_ht, &hnode->ht_node,
+				     objagg_hints->ht_params);
+	if (err)
+		goto err_ht_insert;
+
+	list_add(&hnode->list, &objagg_hints->node_list);
+	hnode->parent = parent_hnode;
+	objagg_hints->node_count++;
+
+	return hnode;
+
+err_ht_insert:
+	kfree(hnode);
+	return ERR_PTR(err);
+}
+
+static void objagg_hints_flush(struct objagg_hints *objagg_hints)
+{
+	struct objagg_hints_node *hnode, *tmp;
+
+	list_for_each_entry_safe(hnode, tmp, &objagg_hints->node_list, list) {
+		list_del(&hnode->list);
+		rhashtable_remove_fast(&objagg_hints->node_ht, &hnode->ht_node,
+				       objagg_hints->ht_params);
+		kfree(hnode);
+	}
+}
+
+struct objagg_tmp_node {
+	struct objagg_obj *objagg_obj;
+	bool crossed_out;
+};
+
+struct objagg_tmp_graph {
+	struct objagg_tmp_node *nodes;
+	unsigned long nodes_count;
+	unsigned long *edges;
+};
+
+static int objagg_tmp_graph_edge_index(struct objagg_tmp_graph *graph,
+				       int parent_index, int index)
+{
+	return index * graph->nodes_count + parent_index;
+}
+
+static void objagg_tmp_graph_edge_set(struct objagg_tmp_graph *graph,
+				      int parent_index, int index)
+{
+	int edge_index = objagg_tmp_graph_edge_index(graph, index,
+						     parent_index);
+
+	__set_bit(edge_index, graph->edges);
+}
+
+static bool objagg_tmp_graph_is_edge(struct objagg_tmp_graph *graph,
+				     int parent_index, int index)
+{
+	int edge_index = objagg_tmp_graph_edge_index(graph, index,
+						     parent_index);
+
+	return test_bit(edge_index, graph->edges);
+}
+
+static unsigned int objagg_tmp_graph_node_weight(struct objagg_tmp_graph *graph,
+						 unsigned int index)
+{
+	struct objagg_tmp_node *node = &graph->nodes[index];
+	unsigned int weight = node->objagg_obj->stats.user_count;
+	int j;
+
+	/* Node weight is sum of node users and all other nodes users
+	 * that this node can represent with delta.
+	 */
+
+	if (node->crossed_out)
+		return 0;
+	for (j = 0; j < graph->nodes_count; j++) {
+		if (!objagg_tmp_graph_is_edge(graph, index, j))
+			continue;
+		node = &graph->nodes[j];
+		if (node->crossed_out)
+			continue;
+		weight += node->objagg_obj->stats.user_count;
+	}
+	return weight;
+}
+
+static int objagg_tmp_graph_node_max_weight(struct objagg_tmp_graph *graph)
+{
+	unsigned int max_weight = 0;
+	unsigned int weight;
+	int max_index = -1;
+	int i;
+
+	for (i = 0; i < graph->nodes_count; i++) {
+		weight = objagg_tmp_graph_node_weight(graph, i);
+		if (weight > max_weight) {
+			max_weight = weight;
+			max_index = i;
+		}
+	}
+	return max_index;
+}
+
+static struct objagg_tmp_graph *objagg_tmp_graph_create(struct objagg *objagg)
+{
+	unsigned int nodes_count = objagg->obj_count;
+	struct objagg_tmp_graph *graph;
+	struct objagg_tmp_node *node;
+	struct objagg_tmp_node *pnode;
+	struct objagg_obj *objagg_obj;
+	size_t alloc_size;
+	int i, j;
+
+	graph = kzalloc(sizeof(*graph), GFP_KERNEL);
+	if (!graph)
+		return NULL;
+
+	graph->nodes = kcalloc(nodes_count, sizeof(*graph->nodes), GFP_KERNEL);
+	if (!graph->nodes)
+		goto err_nodes_alloc;
+	graph->nodes_count = nodes_count;
+
+	alloc_size = BITS_TO_LONGS(nodes_count * nodes_count) *
+		     sizeof(unsigned long);
+	graph->edges = kzalloc(alloc_size, GFP_KERNEL);
+	if (!graph->edges)
+		goto err_edges_alloc;
+
+	i = 0;
+	list_for_each_entry(objagg_obj, &objagg->obj_list, list) {
+		node = &graph->nodes[i++];
+		node->objagg_obj = objagg_obj;
+	}
+
+	/* Assemble a temporary graph. Insert edge X->Y in case Y can be
+	 * in delta of X.
+	 */
+	for (i = 0; i < nodes_count; i++) {
+		for (j = 0; j < nodes_count; j++) {
+			if (i == j)
+				continue;
+			pnode = &graph->nodes[i];
+			node = &graph->nodes[j];
+			if (objagg->ops->delta_check(objagg->priv,
+						     pnode->objagg_obj->obj,
+						     node->objagg_obj->obj)) {
+				objagg_tmp_graph_edge_set(graph, i, j);
+
+			}
+		}
+	}
+	return graph;
+
+err_edges_alloc:
+	kfree(graph->nodes);
+err_nodes_alloc:
+	kfree(graph);
+	return NULL;
+}
+
+static void objagg_tmp_graph_destroy(struct objagg_tmp_graph *graph)
+{
+	kfree(graph->edges);
+	kfree(graph->nodes);
+	kfree(graph);
+}
+
+static int
+objagg_opt_simple_greedy_fillup_hints(struct objagg_hints *objagg_hints,
+				      struct objagg *objagg)
+{
+	struct objagg_hints_node *hnode, *parent_hnode;
+	struct objagg_tmp_graph *graph;
+	struct objagg_tmp_node *node;
+	int index;
+	int j;
+	int err;
+
+	graph = objagg_tmp_graph_create(objagg);
+	if (!graph)
+		return -ENOMEM;
+
+	/* Find the nodes from the ones that can accommodate most users
+	 * and cross them out of the graph. Save them to the hint list.
+	 */
+	while ((index = objagg_tmp_graph_node_max_weight(graph)) != -1) {
+		node = &graph->nodes[index];
+		node->crossed_out = true;
+		hnode = objagg_hints_node_create(objagg_hints,
+						 node->objagg_obj,
+						 objagg->ops->obj_size,
+						 NULL);
+		if (IS_ERR(hnode)) {
+			err = PTR_ERR(hnode);
+			goto out;
+		}
+		parent_hnode = hnode;
+		for (j = 0; j < graph->nodes_count; j++) {
+			if (!objagg_tmp_graph_is_edge(graph, index, j))
+				continue;
+			node = &graph->nodes[j];
+			if (node->crossed_out)
+				continue;
+			node->crossed_out = true;
+			hnode = objagg_hints_node_create(objagg_hints,
+							 node->objagg_obj,
+							 objagg->ops->obj_size,
+							 parent_hnode);
+			if (IS_ERR(hnode)) {
+				err = PTR_ERR(hnode);
+				goto out;
+			}
+		}
+	}
+
+	err = 0;
+out:
+	objagg_tmp_graph_destroy(graph);
+	return err;
+}
+
+struct objagg_opt_algo {
+	int (*fillup_hints)(struct objagg_hints *objagg_hints,
+			    struct objagg *objagg);
+};
+
+static const struct objagg_opt_algo objagg_opt_simple_greedy = {
+	.fillup_hints = objagg_opt_simple_greedy_fillup_hints,
+};
+
+
+static const struct objagg_opt_algo *objagg_opt_algos[] = {
+	[OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy,
+};
+
+static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg,
+				const void *obj)
+{
+	struct rhashtable *ht = arg->ht;
+	struct objagg_hints *objagg_hints =
+			container_of(ht, struct objagg_hints, node_ht);
+	const struct objagg_ops *ops = objagg_hints->ops;
+	const char *ptr = obj;
+
+	ptr += ht->p.key_offset;
+	return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) :
+				    memcmp(ptr, arg->key, ht->p.key_len);
+}
+
+/**
+ * objagg_hints_get - obtains hints instance
+ * @objagg:		objagg instance
+ * @opt_algo_type:	type of hints finding algorithm
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * According to the algo type, the existing objects of objagg instance
+ * are going to be went-through to assemble an optimal tree. We call this
+ * tree hints. These hints can be later on used for creation of
+ * a new objagg instance. There, the future object creations are going
+ * to be consulted with these hints in order to find out, where exactly
+ * the new object should be put as a root or delta.
+ *
+ * Returns a pointer to hints instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+struct objagg_hints *objagg_hints_get(struct objagg *objagg,
+				      enum objagg_opt_algo_type opt_algo_type)
+{
+	const struct objagg_opt_algo *algo = objagg_opt_algos[opt_algo_type];
+	struct objagg_hints *objagg_hints;
+	int err;
+
+	objagg_hints = kzalloc(sizeof(*objagg_hints), GFP_KERNEL);
+	if (!objagg_hints)
+		return ERR_PTR(-ENOMEM);
+
+	objagg_hints->ops = objagg->ops;
+	objagg_hints->refcount = 1;
+
+	INIT_LIST_HEAD(&objagg_hints->node_list);
+
+	objagg_hints->ht_params.key_len = objagg->ops->obj_size;
+	objagg_hints->ht_params.key_offset =
+				offsetof(struct objagg_hints_node, obj);
+	objagg_hints->ht_params.head_offset =
+				offsetof(struct objagg_hints_node, ht_node);
+	objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp;
+
+	err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params);
+	if (err)
+		goto err_rhashtable_init;
+
+	err = algo->fillup_hints(objagg_hints, objagg);
+	if (err)
+		goto err_fillup_hints;
+
+	if (WARN_ON(objagg_hints->node_count != objagg->obj_count))
+		goto err_node_count_check;
+
+	return objagg_hints;
+
+err_node_count_check:
+err_fillup_hints:
+	objagg_hints_flush(objagg_hints);
+	rhashtable_destroy(&objagg_hints->node_ht);
+err_rhashtable_init:
+	kfree(objagg_hints);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(objagg_hints_get);
+
+/**
+ * objagg_hints_put - puts hints instance
+ * @objagg_hints:	objagg hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ */
+void objagg_hints_put(struct objagg_hints *objagg_hints)
+{
+	if (--objagg_hints->refcount)
+		return;
+	objagg_hints_flush(objagg_hints);
+	rhashtable_destroy(&objagg_hints->node_ht);
+	kfree(objagg_hints);
+}
+EXPORT_SYMBOL(objagg_hints_put);
+
+/**
+ * objagg_hints_stats_get - obtains stats of the hints instance
+ * @objagg_hints:	hints instance
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * The returned structure contains statistics of all objects
+ * currently in use, ordered by following rules:
+ * 1) Root objects are always on lower indexes than the rest.
+ * 2) Objects with higher delta user count are always on lower
+ *    indexes.
+ * 3) In case multiple objects have the same delta user count,
+ *    the objects are ordered by user count.
+ *
+ * Returns a pointer to stats instance in case of success,
+ * otherwise it returns pointer error using ERR_PTR macro.
+ */
+const struct objagg_stats *
+objagg_hints_stats_get(struct objagg_hints *objagg_hints)
+{
+	struct objagg_stats *objagg_stats;
+	struct objagg_hints_node *hnode;
+	int i;
+
+	objagg_stats = kzalloc(struct_size(objagg_stats, stats_info,
+					   objagg_hints->node_count),
+			       GFP_KERNEL);
+	if (!objagg_stats)
+		return ERR_PTR(-ENOMEM);
+
+	i = 0;
+	list_for_each_entry(hnode, &objagg_hints->node_list, list) {
+		memcpy(&objagg_stats->stats_info[i], &hnode->stats_info,
+		       sizeof(objagg_stats->stats_info[0]));
+		i++;
+	}
+	objagg_stats->stats_info_count = i;
+
+	sort(objagg_stats->stats_info, objagg_stats->stats_info_count,
+	     sizeof(struct objagg_obj_stats_info),
+	     objagg_stats_info_sort_cmp_func, NULL);
+
+	return objagg_stats;
+}
+EXPORT_SYMBOL(objagg_hints_stats_get);
+
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Object aggregation manager");
diff --git a/lib/test_objagg.c b/lib/test_objagg.c
index ab57144bb0cd..3744573b6365 100644
--- a/lib/test_objagg.c
+++ b/lib/test_objagg.c
@@ -87,6 +87,15 @@ static void world_obj_put(struct world *world, struct objagg *objagg,
 
 #define MAX_KEY_ID_DIFF 5
 
+static bool delta_check(void *priv, const void *parent_obj, const void *obj)
+{
+	const struct tokey *parent_key = parent_obj;
+	const struct tokey *key = obj;
+	int diff = key->id - parent_key->id;
+
+	return diff >= 0 && diff <= MAX_KEY_ID_DIFF;
+}
+
 static void *delta_create(void *priv, void *parent_obj, void *obj)
 {
 	struct tokey *parent_key = parent_obj;
@@ -95,7 +104,7 @@ static void *delta_create(void *priv, void *parent_obj, void *obj)
 	int diff = key->id - parent_key->id;
 	struct delta *delta;
 
-	if (diff < 0 || diff > MAX_KEY_ID_DIFF)
+	if (!delta_check(priv, parent_obj, obj))
 		return ERR_PTR(-EINVAL);
 
 	delta = kzalloc(sizeof(*delta), GFP_KERNEL);
@@ -115,7 +124,7 @@ static void delta_destroy(void *priv, void *delta_priv)
 	kfree(delta);
 }
 
-static void *root_create(void *priv, void *obj)
+static void *root_create(void *priv, void *obj, unsigned int id)
 {
 	struct world *world = priv;
 	struct tokey *key = obj;
@@ -268,6 +277,12 @@ stats_put:
 	return err;
 }
 
+static bool delta_check_dummy(void *priv, const void *parent_obj,
+			      const void *obj)
+{
+	return false;
+}
+
 static void *delta_create_dummy(void *priv, void *parent_obj, void *obj)
 {
 	return ERR_PTR(-EOPNOTSUPP);
@@ -279,6 +294,7 @@ static void delta_destroy_dummy(void *priv, void *delta_priv)
 
 static const struct objagg_ops nodelta_ops = {
 	.obj_size = sizeof(struct tokey),
+	.delta_check = delta_check_dummy,
 	.delta_create = delta_create_dummy,
 	.delta_destroy = delta_destroy_dummy,
 	.root_create = root_create,
@@ -292,7 +308,7 @@ static int test_nodelta(void)
 	int i;
 	int err;
 
-	objagg = objagg_create(&nodelta_ops, &world);
+	objagg = objagg_create(&nodelta_ops, NULL, &world);
 	if (IS_ERR(objagg))
 		return PTR_ERR(objagg);
 
@@ -357,6 +373,7 @@ err_stats_second_zero:
 
 static const struct objagg_ops delta_ops = {
 	.obj_size = sizeof(struct tokey),
+	.delta_check = delta_check,
 	.delta_create = delta_create,
 	.delta_destroy = delta_destroy,
 	.root_create = root_create,
@@ -793,7 +810,7 @@ static int test_delta(void)
 	int i;
 	int err;
 
-	objagg = objagg_create(&delta_ops, &world);
+	objagg = objagg_create(&delta_ops, NULL, &world);
 	if (IS_ERR(objagg))
 		return PTR_ERR(objagg);
 
@@ -815,6 +832,170 @@ err_do_action_item:
 	return err;
 }
 
+struct hints_case {
+	const unsigned int *key_ids;
+	size_t key_ids_count;
+	struct expect_stats expect_stats;
+	struct expect_stats expect_stats_hints;
+};
+
+static const unsigned int hints_case_key_ids[] = {
+	1, 7, 3, 5, 3, 1, 30, 8, 8, 5, 6, 8,
+};
+
+static const struct hints_case hints_case = {
+	.key_ids = hints_case_key_ids,
+	.key_ids_count = ARRAY_SIZE(hints_case_key_ids),
+	.expect_stats =
+		EXPECT_STATS(7, ROOT(1, 2, 7), ROOT(7, 1, 4), ROOT(30, 1, 1),
+				DELTA(8, 3), DELTA(3, 2),
+				DELTA(5, 2), DELTA(6, 1)),
+	.expect_stats_hints =
+		EXPECT_STATS(7, ROOT(3, 2, 9), ROOT(1, 2, 2), ROOT(30, 1, 1),
+				DELTA(8, 3), DELTA(5, 2),
+				DELTA(6, 1), DELTA(7, 1)),
+};
+
+static void __pr_debug_stats(const struct objagg_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < stats->stats_info_count; i++)
+		pr_debug("Stat index %d key %u: u %d, d %d, %s\n", i,
+			 obj_to_key_id(stats->stats_info[i].objagg_obj),
+			 stats->stats_info[i].stats.user_count,
+			 stats->stats_info[i].stats.delta_user_count,
+			 stats->stats_info[i].is_root ? "root" : "noroot");
+}
+
+static void pr_debug_stats(struct objagg *objagg)
+{
+	const struct objagg_stats *stats;
+
+	stats = objagg_stats_get(objagg);
+	if (IS_ERR(stats))
+		return;
+	__pr_debug_stats(stats);
+	objagg_stats_put(stats);
+}
+
+static void pr_debug_hints_stats(struct objagg_hints *objagg_hints)
+{
+	const struct objagg_stats *stats;
+
+	stats = objagg_hints_stats_get(objagg_hints);
+	if (IS_ERR(stats))
+		return;
+	__pr_debug_stats(stats);
+	objagg_stats_put(stats);
+}
+
+static int check_expect_hints_stats(struct objagg_hints *objagg_hints,
+				    const struct expect_stats *expect_stats,
+				    const char **errmsg)
+{
+	const struct objagg_stats *stats;
+	int err;
+
+	stats = objagg_hints_stats_get(objagg_hints);
+	if (IS_ERR(stats))
+		return PTR_ERR(stats);
+	err = __check_expect_stats(stats, expect_stats, errmsg);
+	objagg_stats_put(stats);
+	return err;
+}
+
+static int test_hints_case(const struct hints_case *hints_case)
+{
+	struct objagg_obj *objagg_obj;
+	struct objagg_hints *hints;
+	struct world world2 = {};
+	struct world world = {};
+	struct objagg *objagg2;
+	struct objagg *objagg;
+	const char *errmsg;
+	int i;
+	int err;
+
+	objagg = objagg_create(&delta_ops, NULL, &world);
+	if (IS_ERR(objagg))
+		return PTR_ERR(objagg);
+
+	for (i = 0; i < hints_case->key_ids_count; i++) {
+		objagg_obj = world_obj_get(&world, objagg,
+					   hints_case->key_ids[i]);
+		if (IS_ERR(objagg_obj)) {
+			err = PTR_ERR(objagg_obj);
+			goto err_world_obj_get;
+		}
+	}
+
+	pr_debug_stats(objagg);
+	err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg);
+	if (err) {
+		pr_err("Stats: %s\n", errmsg);
+		goto err_check_expect_stats;
+	}
+
+	hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY);
+	if (IS_ERR(hints)) {
+		err = PTR_ERR(hints);
+		goto err_hints_get;
+	}
+
+	pr_debug_hints_stats(hints);
+	err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints,
+				       &errmsg);
+	if (err) {
+		pr_err("Hints stats: %s\n", errmsg);
+		goto err_check_expect_hints_stats;
+	}
+
+	objagg2 = objagg_create(&delta_ops, hints, &world2);
+	if (IS_ERR(objagg))
+		return PTR_ERR(objagg);
+
+	for (i = 0; i < hints_case->key_ids_count; i++) {
+		objagg_obj = world_obj_get(&world2, objagg2,
+					   hints_case->key_ids[i]);
+		if (IS_ERR(objagg_obj)) {
+			err = PTR_ERR(objagg_obj);
+			goto err_world2_obj_get;
+		}
+	}
+
+	pr_debug_stats(objagg2);
+	err = check_expect_stats(objagg2, &hints_case->expect_stats_hints,
+				 &errmsg);
+	if (err) {
+		pr_err("Stats2: %s\n", errmsg);
+		goto err_check_expect_stats2;
+	}
+
+	err = 0;
+
+err_check_expect_stats2:
+err_world2_obj_get:
+	for (i--; i >= 0; i--)
+		world_obj_put(&world2, objagg, hints_case->key_ids[i]);
+	objagg_hints_put(hints);
+	objagg_destroy(objagg2);
+	i = hints_case->key_ids_count;
+err_check_expect_hints_stats:
+err_hints_get:
+err_check_expect_stats:
+err_world_obj_get:
+	for (i--; i >= 0; i--)
+		world_obj_put(&world, objagg, hints_case->key_ids[i]);
+
+	objagg_destroy(objagg);
+	return err;
+}
+static int test_hints(void)
+{
+	return test_hints_case(&hints_case);
+}
+
 static int __init test_objagg_init(void)
 {
 	int err;
@@ -822,7 +1003,10 @@ static int __init test_objagg_init(void)
 	err = test_nodelta();
 	if (err)
 		return err;
-	return test_delta();
+	err = test_delta();
+	if (err)
+		return err;
+	return test_hints();
 }
 
 static void __exit test_objagg_exit(void)
-- 
cgit v1.2.3


From 204f6a8c413ec41a7ec5e3f0f0590d4318f7a1f2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 7 Feb 2019 11:22:47 +0000
Subject: lib: objagg: add root count to stats

Count number of roots and add it to stats. It is handy for the library
user to have this stats available as it can act upon it without
counting roots itself.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/objagg.h | 1 +
 lib/objagg.c           | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/objagg.h b/include/linux/objagg.h
index a675286df1af..78021777df46 100644
--- a/include/linux/objagg.h
+++ b/include/linux/objagg.h
@@ -42,6 +42,7 @@ struct objagg_obj_stats_info {
 };
 
 struct objagg_stats {
+	unsigned int root_count;
 	unsigned int stats_info_count;
 	struct objagg_obj_stats_info stats_info[];
 };
diff --git a/lib/objagg.c b/lib/objagg.c
index befe8a47d080..781f41c3c47d 100644
--- a/lib/objagg.c
+++ b/lib/objagg.c
@@ -621,6 +621,8 @@ const struct objagg_stats *objagg_stats_get(struct objagg *objagg)
 		objagg_stats->stats_info[i].objagg_obj = objagg_obj;
 		objagg_stats->stats_info[i].is_root =
 					objagg_obj_is_root(objagg_obj);
+		if (objagg_stats->stats_info[i].is_root)
+			objagg_stats->root_count++;
 		i++;
 	}
 	objagg_stats->stats_info_count = i;
@@ -1031,6 +1033,8 @@ objagg_hints_stats_get(struct objagg_hints *objagg_hints)
 	list_for_each_entry(hnode, &objagg_hints->node_list, list) {
 		memcpy(&objagg_stats->stats_info[i], &hnode->stats_info,
 		       sizeof(objagg_stats->stats_info[0]));
+		if (objagg_stats->stats_info[i].is_root)
+			objagg_stats->root_count++;
 		i++;
 	}
 	objagg_stats->stats_info_count = i;
-- 
cgit v1.2.3


From 3985de7260a7342a474a0792fcd23737341657ce Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 7 Feb 2019 11:22:55 +0000
Subject: mlxsw: spectrum_acl: Add couple of vregion rehash tracepoints

As vregion rehash is happening in delayed work, add some visibility to
the process using a few tracepoints.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_acl_tcam.c    |  9 +++-
 include/trace/events/mlxsw.h                       | 61 ++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index f2cb37c0d300..7e225a86e3a8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <trace/events/mlxsw.h>
 
 #include "reg.h"
 #include "core.h"
@@ -1175,6 +1176,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl_tcam_region *region2, *unused_region;
 	int err;
 
+	trace_mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion);
+
 	region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam,
 						  vregion, hints_priv);
 	if (IS_ERR(region2))
@@ -1219,6 +1222,7 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
 	void *hints_priv;
 	int err;
 
+	trace_mlxsw_sp_acl_tcam_vregion_rehash(mlxsw_sp, vregion);
 	if (vregion->failed_rollback)
 		return -EBUSY;
 
@@ -1233,8 +1237,11 @@ mlxsw_sp_acl_tcam_vregion_rehash(struct mlxsw_sp *mlxsw_sp,
 	err = mlxsw_sp_acl_tcam_vregion_migrate(mlxsw_sp, vregion, hints_priv);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Failed to migrate vregion\n");
-		if (vregion->failed_rollback)
+		if (vregion->failed_rollback) {
+			trace_mlxsw_sp_acl_tcam_vregion_rehash_dis(mlxsw_sp,
+								   vregion);
 			dev_err(mlxsw_sp->bus_info->dev, "Failed to rollback during vregion migration fail\n");
+		}
 	}
 
 	ops->region_rehash_hints_put(hints_priv);
diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h
index 6c2bafcade18..a5ce6df9dc49 100644
--- a/include/trace/events/mlxsw.h
+++ b/include/trace/events/mlxsw.h
@@ -11,6 +11,7 @@
 
 struct mlxsw_sp;
 struct mlxsw_sp_acl_atcam_region;
+struct mlxsw_sp_acl_tcam_vregion;
 
 TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill,
 	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
@@ -32,6 +33,66 @@ TRACE_EVENT(mlxsw_sp_acl_atcam_entry_add_ctcam_spill,
 		  __entry->mlxsw_sp, __entry->aregion)
 );
 
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash,
+	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+		 const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+	TP_ARGS(mlxsw_sp, vregion),
+
+	TP_STRUCT__entry(
+		__field(const void *, mlxsw_sp)
+		__field(const void *, vregion)
+	),
+
+	TP_fast_assign(
+		__entry->mlxsw_sp = mlxsw_sp;
+		__entry->vregion = vregion;
+	),
+
+	TP_printk("mlxsw_sp %p, vregion %p",
+		  __entry->mlxsw_sp, __entry->vregion)
+);
+
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate,
+	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+		 const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+	TP_ARGS(mlxsw_sp, vregion),
+
+	TP_STRUCT__entry(
+		__field(const void *, mlxsw_sp)
+		__field(const void *, vregion)
+	),
+
+	TP_fast_assign(
+		__entry->mlxsw_sp = mlxsw_sp;
+		__entry->vregion = vregion;
+	),
+
+	TP_printk("mlxsw_sp %p, vregion %p",
+		  __entry->mlxsw_sp, __entry->vregion)
+);
+
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis,
+	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+		 const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+	TP_ARGS(mlxsw_sp, vregion),
+
+	TP_STRUCT__entry(
+		__field(const void *, mlxsw_sp)
+		__field(const void *, vregion)
+	),
+
+	TP_fast_assign(
+		__entry->mlxsw_sp = mlxsw_sp;
+		__entry->vregion = vregion;
+	),
+
+	TP_printk("mlxsw_sp %p, vregion %p",
+		  __entry->mlxsw_sp, __entry->vregion)
+);
+
 #endif /* _MLXSW_TRACEPOINT_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 2c1619edef61a03cb516efaa81750784c3071d10 Mon Sep 17 00:00:00 2001
From: Danit Goldberg <danitg@mellanox.com>
Date: Thu, 24 Jan 2019 14:18:15 +0200
Subject: IB/cma: Define option to set ack timeout and pack tos_set

Define new option in 'rdma_set_option' to override calculated QP timeout
when requested to provide QP attributes to modify a QP.

At the same time, pack tos_set to be bitfield.

Signed-off-by: Danit Goldberg <danitg@mellanox.com>
Reviewed-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cma.c      | 32 ++++++++++++++++++++++++++++++++
 drivers/infiniband/core/cma_priv.h |  4 +++-
 drivers/infiniband/core/ucma.c     |  7 +++++++
 include/rdma/rdma_cm.h             |  1 +
 include/uapi/rdma/rdma_user_cm.h   |  4 ++++
 5 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e15546ae4d0f..83aa2ad0c27e 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -888,6 +888,7 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 	id_priv->id.ps = ps;
 	id_priv->id.qp_type = qp_type;
 	id_priv->tos_set = false;
+	id_priv->timeout_set = false;
 	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
@@ -1130,6 +1131,9 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 	} else
 		ret = -ENOSYS;
 
+	if ((*qp_attr_mask & IB_QP_TIMEOUT) && id_priv->timeout_set)
+		qp_attr->timeout = id_priv->timeout;
+
 	return ret;
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
@@ -2490,6 +2494,34 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
 }
 EXPORT_SYMBOL(rdma_set_service_type);
 
+/**
+ * rdma_set_ack_timeout() - Set the ack timeout of QP associated
+ *                          with a connection identifier.
+ * @id: Communication identifier to associated with service type.
+ * @timeout: Ack timeout to set a QP, expressed as 4.096 * 2^(timeout) usec.
+ *
+ * This function should be called before rdma_connect() on active side,
+ * and on passive side before rdma_accept(). It is applicable to primary
+ * path only. The timeout will affect the local side of the QP, it is not
+ * negotiated with remote side and zero disables the timer.
+ *
+ * Return: 0 for success
+ */
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout)
+{
+	struct rdma_id_private *id_priv;
+
+	if (id->qp_type != IB_QPT_RC)
+		return -EINVAL;
+
+	id_priv = container_of(id, struct rdma_id_private, id);
+	id_priv->timeout = timeout;
+	id_priv->timeout_set = true;
+
+	return 0;
+}
+EXPORT_SYMBOL(rdma_set_ack_timeout);
+
 static void cma_query_handler(int status, struct sa_path_rec *path_rec,
 			      void *context)
 {
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index cf47c69436a7..ca7307277518 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -84,9 +84,11 @@ struct rdma_id_private {
 	u32			options;
 	u8			srq;
 	u8			tos;
-	bool			tos_set;
+	u8			tos_set:1;
+	u8                      timeout_set:1;
 	u8			reuseaddr;
 	u8			afonly;
+	u8			timeout;
 	enum ib_gid_type	gid_type;
 
 	/*
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 01d68ed46c1b..7468b26b8a01 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1236,6 +1236,13 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
 		}
 		ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
 		break;
+	case RDMA_OPTION_ID_ACK_TIMEOUT:
+		if (optlen != sizeof(u8)) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = rdma_set_ack_timeout(ctx->cm_id, *((u8 *)optval));
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 60987a5903b7..71f48cfdc24c 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -374,6 +374,7 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse);
  */
 int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
 
+int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout);
  /**
  * rdma_get_service_id - Return the IB service ID for a specified address.
  * @id: Communication identifier associated with the address.
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 0d1e78ebad05..e42940a215a3 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -300,6 +300,10 @@ enum {
 	RDMA_OPTION_ID_TOS	 = 0,
 	RDMA_OPTION_ID_REUSEADDR = 1,
 	RDMA_OPTION_ID_AFONLY	 = 2,
+	RDMA_OPTION_ID_ACK_TIMEOUT = 3
+};
+
+enum {
 	RDMA_OPTION_IB_PATH	 = 1
 };
 
-- 
cgit v1.2.3


From 926ba19b3574f6a80823a42484877ed65e91da9c Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 1 Feb 2019 12:44:32 -0800
Subject: RDMA/iwcm: add tos_set bool to iw_cm struct

This allows drivers to know the tos was actively set by the application.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cma.c | 2 ++
 include/rdma/iw_cm.h          | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e761ddd09aed..c43512752b8a 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2414,6 +2414,7 @@ static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
 		return PTR_ERR(id);
 
 	id->tos = id_priv->tos;
+	id->tos_set = id_priv->tos_set;
 	id_priv->cm_id.iw = id;
 
 	memcpy(&id_priv->cm_id.iw->local_addr, cma_src_addr(id_priv),
@@ -3843,6 +3844,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
 		return PTR_ERR(cm_id);
 
 	cm_id->tos = id_priv->tos;
+	cm_id->tos_set = id_priv->tos_set;
 	id_priv->cm_id.iw = cm_id;
 
 	memcpy(&cm_id->local_addr, cma_src_addr(id_priv),
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 48512abd3162..0e1f02815643 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -94,7 +94,8 @@ struct iw_cm_id {
 	void (*add_ref)(struct iw_cm_id *);
 	void (*rem_ref)(struct iw_cm_id *);
 	u8  tos;
-	bool mapped;
+	bool tos_set:1;
+	bool mapped:1;
 };
 
 struct iw_cm_conn_param {
-- 
cgit v1.2.3


From 805b754d492f6227e1646001bdf85ad4bb819e55 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Sat, 2 Feb 2019 11:09:44 +0200
Subject: IB/core: Eliminate a hole in MAD agent struct

Move the security related fields above the u8s to eliminate a hole in the
struct.

pahole before:
struct ib_mad_agent {
...
u32                        hi_tid;               /*    48     4 */
u32                        flags;                /*    52     4 */
u8                         port_num;             /*    56     1 */
u8                         rmpp_version;         /*    57     1 */

/* XXX 6 bytes hole, try to pack */

/* --- cacheline 1 boundary (64 bytes) --- */
void *                     security;             /*    64     8 */
bool                       smp_allowed;          /*    72     1 */
bool                       lsm_nb_reg;           /*    73     1 */

/* XXX 6 bytes hole, try to pack */

struct notifier_block      lsm_nb;               /*    80    24 */

/* XXX last struct has 4 bytes of padding */

/* size: 104, cachelines: 2, members: 14 */
...
};

pahole after:
struct ib_mad_agent {
...
u32                        hi_tid;               /*    48     4 */
u32                        flags;                /*    52     4 */
void *                     security;             /*    56     8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct notifier_block      lsm_nb;               /*    64    24 */

/* XXX last struct has 4 bytes of padding */

u8                         port_num;             /*    88     1 */
u8                         rmpp_version;         /*    89     1 */
bool                       smp_allowed;          /*    90     1 */
bool                       lsm_nb_reg;           /*    91     1 */

/* size: 96, cachelines: 2, members: 14 */
...
};

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/ib_mad.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index fdef558e3a2d..1c0b914f199d 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -616,12 +616,12 @@ struct ib_mad_agent {
 	void			*context;
 	u32			hi_tid;
 	u32			flags;
+	void			*security;
+	struct notifier_block   lsm_nb;
 	u8			port_num;
 	u8			rmpp_version;
-	void			*security;
 	bool			smp_allowed;
 	bool			lsm_nb_reg;
-	struct notifier_block   lsm_nb;
 };
 
 /**
-- 
cgit v1.2.3


From c66f67414c1f88554485bb2a0abf8b5c0d741de7 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@mellanox.com>
Date: Sat, 2 Feb 2019 11:09:45 +0200
Subject: IB/core: Don't register each MAD agent for LSM notifier

When creating many MAD agents in a short period of time, receive packet
processing can be delayed long enough to cause timeouts while new agents
are being added to the atomic notifier chain with IRQs disabled.  Notifier
chain registration and unregstration is an O(n) operation. With large
numbers of MAD agents being created and destroyed simultaneously the CPUs
spend too much time with interrupts disabled.

Instead of each MAD agent registering for it's own LSM notification,
maintain a list of agents internally and register once, this registration
already existed for handling the PKeys. This list is write mostly, so a
normal spin lock is used vs a read/write lock. All MAD agents must be
checked, so a single list is used instead of breaking them down per
device.

Notifier calls are done under rcu_read_lock, so there isn't a risk of
similar packet timeouts while checking the MAD agents security settings
when notified.

Signed-off-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/core_priv.h |  5 ++++
 drivers/infiniband/core/device.c    |  1 +
 drivers/infiniband/core/security.c  | 50 +++++++++++++++++++++----------------
 include/rdma/ib_mad.h               |  3 +--
 4 files changed, 35 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index bcb3e3029a9b..d053110207eb 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -202,6 +202,7 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 				enum ib_qp_type qp_type);
 void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent);
 int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index);
+void ib_mad_agent_security_change(void);
 #else
 static inline void ib_security_destroy_port_pkey_list(struct ib_device *device)
 {
@@ -267,6 +268,10 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
 {
 	return 0;
 }
+
+static inline void ib_mad_agent_security_change(void)
+{
+}
 #endif
 
 struct ib_device *ib_device_get_by_index(u32 ifindex);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 55221990d946..32cd35c9b21e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -452,6 +452,7 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
 		return NOTIFY_DONE;
 
 	schedule_work(&ib_policy_change_work);
+	ib_mad_agent_security_change();
 
 	return NOTIFY_OK;
 }
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 7662e9347238..a70d2ba312ed 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -39,6 +39,10 @@
 #include "core_priv.h"
 #include "mad_priv.h"
 
+static LIST_HEAD(mad_agent_list);
+/* Lock to protect mad_agent_list */
+static DEFINE_SPINLOCK(mad_agent_list_lock);
+
 static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
 {
 	struct pkey_index_qp_list *pkey = NULL;
@@ -676,19 +680,18 @@ static int ib_security_pkey_access(struct ib_device *dev,
 	return security_ib_pkey_access(sec, subnet_prefix, pkey);
 }
 
-static int ib_mad_agent_security_change(struct notifier_block *nb,
-					unsigned long event,
-					void *data)
+void ib_mad_agent_security_change(void)
 {
-	struct ib_mad_agent *ag = container_of(nb, struct ib_mad_agent, lsm_nb);
-
-	if (event != LSM_POLICY_CHANGE)
-		return NOTIFY_DONE;
-
-	ag->smp_allowed = !security_ib_endport_manage_subnet(
-		ag->security, dev_name(&ag->device->dev), ag->port_num);
-
-	return NOTIFY_OK;
+	struct ib_mad_agent *ag;
+
+	spin_lock(&mad_agent_list_lock);
+	list_for_each_entry(ag,
+			    &mad_agent_list,
+			    mad_agent_sec_list)
+		WRITE_ONCE(ag->smp_allowed,
+			   !security_ib_endport_manage_subnet(ag->security,
+				dev_name(&ag->device->dev), ag->port_num));
+	spin_unlock(&mad_agent_list_lock);
 }
 
 int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
@@ -699,6 +702,8 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 	if (!rdma_protocol_ib(agent->device, agent->port_num))
 		return 0;
 
+	INIT_LIST_HEAD(&agent->mad_agent_sec_list);
+
 	ret = security_ib_alloc_security(&agent->security);
 	if (ret)
 		return ret;
@@ -706,22 +711,20 @@ int ib_mad_agent_security_setup(struct ib_mad_agent *agent,
 	if (qp_type != IB_QPT_SMI)
 		return 0;
 
+	spin_lock(&mad_agent_list_lock);
 	ret = security_ib_endport_manage_subnet(agent->security,
 						dev_name(&agent->device->dev),
 						agent->port_num);
 	if (ret)
 		goto free_security;
 
-	agent->lsm_nb.notifier_call = ib_mad_agent_security_change;
-	ret = register_lsm_notifier(&agent->lsm_nb);
-	if (ret)
-		goto free_security;
-
-	agent->smp_allowed = true;
-	agent->lsm_nb_reg = true;
+	WRITE_ONCE(agent->smp_allowed, true);
+	list_add(&agent->mad_agent_sec_list, &mad_agent_list);
+	spin_unlock(&mad_agent_list_lock);
 	return 0;
 
 free_security:
+	spin_unlock(&mad_agent_list_lock);
 	security_ib_free_security(agent->security);
 	return ret;
 }
@@ -731,8 +734,11 @@ void ib_mad_agent_security_cleanup(struct ib_mad_agent *agent)
 	if (!rdma_protocol_ib(agent->device, agent->port_num))
 		return;
 
-	if (agent->lsm_nb_reg)
-		unregister_lsm_notifier(&agent->lsm_nb);
+	if (agent->qp->qp_type == IB_QPT_SMI) {
+		spin_lock(&mad_agent_list_lock);
+		list_del(&agent->mad_agent_sec_list);
+		spin_unlock(&mad_agent_list_lock);
+	}
 
 	security_ib_free_security(agent->security);
 }
@@ -743,7 +749,7 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
 		return 0;
 
 	if (map->agent.qp->qp_type == IB_QPT_SMI) {
-		if (!map->agent.smp_allowed)
+		if (!READ_ONCE(map->agent.smp_allowed))
 			return -EACCES;
 		return 0;
 	}
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 1c0b914f199d..79ba8219e7dc 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -617,11 +617,10 @@ struct ib_mad_agent {
 	u32			hi_tid;
 	u32			flags;
 	void			*security;
-	struct notifier_block   lsm_nb;
+	struct list_head	mad_agent_sec_list;
 	u8			port_num;
 	u8			rmpp_version;
 	bool			smp_allowed;
-	bool			lsm_nb_reg;
 };
 
 /**
-- 
cgit v1.2.3


From 30471d4b20335d9bd9ae9b2382a1e1e97d18d86d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 3 Feb 2019 14:55:50 +0200
Subject: RDMA/core: Share driver structure size with core

Add new macros to be used in drivers while registering ops structure and
IB/core while calling allocation routines, so drivers won't need to
perform kzalloc/kfree in their paths.

The change in allocation stage allows us to initialize common fields prior
to calling to drivers (e.g. restrack).

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c |  2 ++
 include/rdma/ib_verbs.h          | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 32cd35c9b21e..d806a5c7b202 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1228,6 +1228,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 				(ptr)->name = ops->name;                       \
 	} while (0)
 
+#define SET_OBJ_SIZE(ptr, name) SET_DEVICE_OP(ptr, size_##name)
+
 	SET_DEVICE_OP(dev_ops, add_gid);
 	SET_DEVICE_OP(dev_ops, advise_mr);
 	SET_DEVICE_OP(dev_ops, alloc_dm);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 2e1f1e885ee5..e29eae4aec84 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2264,6 +2264,19 @@ struct ib_counters_read_attr {
 
 struct uverbs_attr_bundle;
 
+#define INIT_RDMA_OBJ_SIZE(ib_struct, drv_struct, member)                      \
+	.size_##ib_struct =                                                    \
+		(sizeof(struct drv_struct) +                                   \
+		 BUILD_BUG_ON_ZERO(offsetof(struct drv_struct, member)) +      \
+		 BUILD_BUG_ON_ZERO(                                            \
+			 !__same_type(((struct drv_struct *)NULL)->member,     \
+				      struct ib_struct)))
+
+#define rdma_zalloc_drv_obj(ib_dev, ib_type)                                   \
+	((struct ib_type *)kzalloc(ib_dev->ops.size_##ib_type, GFP_KERNEL))
+
+#define DECLARE_RDMA_OBJ_SIZE(ib_struct) size_t size_##ib_struct
+
 /**
  * struct ib_device_ops - InfiniBand device operations
  * This structure defines all the InfiniBand device operations, providers will
-- 
cgit v1.2.3


From 21a428a019c9a6d133e745b529b9bf18c1187e70 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 3 Feb 2019 14:55:51 +0200
Subject: RDMA: Handle PD allocations by IB/core

The PD allocations in IB/core allows us to simplify drivers and their
error flows in their .alloc_pd() paths. The changes in .alloc_pd() go hand
in had with relevant update in .dealloc_pd().

We will use this opportunity and convert .dealloc_pd() to don't fail, as
it was suggested a long time ago, failures are not happening as we have
never seen a WARN_ON print.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c                |  2 +
 drivers/infiniband/core/uverbs_cmd.c            | 15 ++--
 drivers/infiniband/core/uverbs_std_types.c      |  2 +-
 drivers/infiniband/core/verbs.c                 | 27 +++++---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c        | 37 ++++------
 drivers/infiniband/hw/bnxt_re/ib_verbs.h        |  9 ++-
 drivers/infiniband/hw/bnxt_re/main.c            |  1 +
 drivers/infiniband/hw/cxgb3/iwch_provider.c     | 25 +++----
 drivers/infiniband/hw/cxgb4/provider.c          | 25 +++----
 drivers/infiniband/hw/hns/hns_roce_device.h     |  7 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c      | 27 +++++---
 drivers/infiniband/hw/hns/hns_roce_main.c       |  1 +
 drivers/infiniband/hw/hns/hns_roce_pd.c         | 25 +++----
 drivers/infiniband/hw/i40iw/i40iw_utils.c       |  1 -
 drivers/infiniband/hw/i40iw/i40iw_verbs.c       | 32 ++++-----
 drivers/infiniband/hw/mlx4/main.c               | 36 ++++------
 drivers/infiniband/hw/mlx5/main.c               | 48 +++++++------
 drivers/infiniband/hw/mthca/mthca_provider.c    | 29 +++-----
 drivers/infiniband/hw/nes/nes_verbs.c           | 32 +++------
 drivers/infiniband/hw/ocrdma/ocrdma_main.c      |  1 +
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c     | 92 +++++++++++--------------
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h     |  6 +-
 drivers/infiniband/hw/qedr/main.c               |  1 +
 drivers/infiniband/hw/qedr/verbs.c              | 34 +++------
 drivers/infiniband/hw/qedr/verbs.h              |  6 +-
 drivers/infiniband/hw/usnic/usnic_ib_main.c     |  1 +
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c    | 26 ++-----
 drivers/infiniband/hw/usnic/usnic_ib_verbs.h    |  7 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c  |  1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 43 ++++--------
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h |  7 +-
 drivers/infiniband/sw/rdmavt/pd.c               | 29 +++-----
 drivers/infiniband/sw/rdmavt/pd.h               |  7 +-
 drivers/infiniband/sw/rdmavt/vt.c               |  1 +
 drivers/infiniband/sw/rxe/rxe_pool.c            | 60 +++++++++++++---
 drivers/infiniband/sw/rxe/rxe_pool.h            |  4 ++
 drivers/infiniband/sw/rxe/rxe_verbs.c           | 16 ++---
 drivers/infiniband/sw/rxe/rxe_verbs.h           |  2 +-
 include/rdma/ib_verbs.h                         |  9 +--
 39 files changed, 325 insertions(+), 409 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d806a5c7b202..57e1e177921e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1319,6 +1319,8 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, set_vf_guid);
 	SET_DEVICE_OP(dev_ops, set_vf_link_state);
 	SET_DEVICE_OP(dev_ops, unmap_fmr);
+
+	SET_OBJ_SIZE(dev_ops, ib_pd);
 }
 EXPORT_SYMBOL(ib_set_device_ops);
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index aa260cafbd85..5ac143f22df0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -407,9 +407,9 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
-	if (IS_ERR(pd)) {
-		ret = PTR_ERR(pd);
+	pd = rdma_zalloc_drv_obj(ib_dev, ib_pd);
+	if (!pd) {
+		ret = -ENOMEM;
 		goto err;
 	}
 
@@ -417,11 +417,15 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 	pd->uobject = uobj;
 	pd->__internal_mr = NULL;
 	atomic_set(&pd->usecnt, 0);
+	pd->res.type = RDMA_RESTRACK_PD;
+
+	ret = ib_dev->ops.alloc_pd(pd, uobj->context, &attrs->driver_udata);
+	if (ret)
+		goto err_alloc;
 
 	uobj->object = pd;
 	memset(&resp, 0, sizeof resp);
 	resp.pd_handle = uobj->id;
-	pd->res.type = RDMA_RESTRACK_PD;
 	rdma_restrack_uadd(&pd->res);
 
 	ret = uverbs_response(attrs, &resp, sizeof(resp));
@@ -432,7 +436,8 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
 
 err_copy:
 	ib_dealloc_pd(pd);
-
+err_alloc:
+	kfree(pd);
 err:
 	uobj_alloc_abort(uobj);
 	return ret;
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index cbc72312eb41..f224cb727224 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -188,7 +188,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 	if (ret)
 		return ret;
 
-	ib_dealloc_pd((struct ib_pd *)uobject->object);
+	ib_dealloc_pd(pd);
 	return 0;
 }
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 3220fb42ecce..de5d895a5054 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -254,10 +254,11 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 {
 	struct ib_pd *pd;
 	int mr_access_flags = 0;
+	int ret;
 
-	pd = device->ops.alloc_pd(device, NULL, NULL);
-	if (IS_ERR(pd))
-		return pd;
+	pd = rdma_zalloc_drv_obj(device, ib_pd);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
 
 	pd->device = device;
 	pd->uobject = NULL;
@@ -265,6 +266,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 	atomic_set(&pd->usecnt, 0);
 	pd->flags = flags;
 
+	pd->res.type = RDMA_RESTRACK_PD;
+	rdma_restrack_set_task(&pd->res, caller);
+
+	ret = device->ops.alloc_pd(pd, NULL, NULL);
+	if (ret) {
+		kfree(pd);
+		return ERR_PTR(ret);
+	}
+	rdma_restrack_kadd(&pd->res);
+
 	if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
 		pd->local_dma_lkey = device->local_dma_lkey;
 	else
@@ -275,10 +286,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
 		mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
 	}
 
-	pd->res.type = RDMA_RESTRACK_PD;
-	rdma_restrack_set_task(&pd->res, caller);
-	rdma_restrack_kadd(&pd->res);
-
 	if (mr_access_flags) {
 		struct ib_mr *mr;
 
@@ -329,10 +336,8 @@ void ib_dealloc_pd(struct ib_pd *pd)
 	WARN_ON(atomic_read(&pd->usecnt));
 
 	rdma_restrack_del(&pd->res);
-	/* Making delalloc_pd a void return is a WIP, no driver should return
-	   an error here. */
-	ret = pd->device->ops.dealloc_pd(pd);
-	WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
+	pd->device->ops.dealloc_pd(pd);
+	kfree(pd);
 }
 EXPORT_SYMBOL(ib_dealloc_pd);
 
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 1d7469e23cde..1606571af63d 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -563,41 +563,29 @@ fail:
 }
 
 /* Protection Domains */
-int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
+void bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
 	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
 	struct bnxt_re_dev *rdev = pd->rdev;
-	int rc;
 
 	bnxt_re_destroy_fence_mr(pd);
 
-	if (pd->qplib_pd.id) {
-		rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
-					   &rdev->qplib_res.pd_tbl,
-					   &pd->qplib_pd);
-		if (rc)
-			dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
-	}
-
-	kfree(pd);
-	return 0;
+	if (pd->qplib_pd.id)
+		bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+				      &pd->qplib_pd);
 }
 
-struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
-			       struct ib_ucontext *ucontext,
-			       struct ib_udata *udata)
+int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *ucontext,
+		     struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ibpd->device;
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
 	struct bnxt_re_ucontext *ucntx = container_of(ucontext,
 						      struct bnxt_re_ucontext,
 						      ib_uctx);
-	struct bnxt_re_pd *pd;
+	struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
 	int rc;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	pd->rdev = rdev;
 	if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
 		dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
@@ -637,13 +625,12 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 		if (bnxt_re_create_fence_mr(pd))
 			dev_warn(rdev_to_dev(rdev),
 				 "Failed to create Fence-MR\n");
-	return &pd->ib_pd;
+	return 0;
 dbfail:
-	(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
-				    &pd->qplib_pd);
+	bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+			      &pd->qplib_pd);
 fail:
-	kfree(pd);
-	return ERR_PTR(rc);
+	return rc;
 }
 
 /* Address Handles */
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index c4af72604b4f..c7cca803cfa3 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -56,8 +56,8 @@ struct bnxt_re_fence_data {
 };
 
 struct bnxt_re_pd {
+	struct ib_pd            ib_pd;
 	struct bnxt_re_dev	*rdev;
-	struct ib_pd		ib_pd;
 	struct bnxt_qplib_pd	qplib_pd;
 	struct bnxt_re_fence_data fence;
 };
@@ -163,10 +163,9 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
 		      int index, union ib_gid *gid);
 enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
 					    u8 port_num);
-struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
-			       struct ib_ucontext *context,
-			       struct ib_udata *udata);
-int bnxt_re_dealloc_pd(struct ib_pd *pd);
+int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+		     struct ib_udata *udata);
+void bnxt_re_dealloc_pd(struct ib_pd *pd);
 struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
 				struct rdma_ah_attr *ah_attr,
 				u32 flags,
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 0d40a930c192..0a89ef6e5754 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -637,6 +637,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	.query_srq = bnxt_re_query_srq,
 	.reg_user_mr = bnxt_re_reg_user_mr,
 	.req_notify_cq = bnxt_re_req_notify_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
 };
 
 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 804c1fc7bfc1..4cc9a6ae2139 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -370,7 +370,7 @@ static int iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	return ret;
 }
 
-static int iwch_deallocate_pd(struct ib_pd *pd)
+static void iwch_deallocate_pd(struct ib_pd *pd)
 {
 	struct iwch_dev *rhp;
 	struct iwch_pd *php;
@@ -379,15 +379,13 @@ static int iwch_deallocate_pd(struct ib_pd *pd)
 	rhp = php->rhp;
 	pr_debug("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
 	cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid);
-	kfree(php);
-	return 0;
 }
 
-static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
-			       struct ib_ucontext *context,
-			       struct ib_udata *udata)
+static int iwch_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
-	struct iwch_pd *php;
+	struct iwch_pd *php = to_iwch_pd(pd);
+	struct ib_device *ibdev = pd->device;
 	u32 pdid;
 	struct iwch_dev *rhp;
 
@@ -395,12 +393,8 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
 	rhp = (struct iwch_dev *) ibdev;
 	pdid = cxio_hal_get_pdid(rhp->rdev.rscp);
 	if (!pdid)
-		return ERR_PTR(-EINVAL);
-	php = kzalloc(sizeof(*php), GFP_KERNEL);
-	if (!php) {
-		cxio_hal_put_pdid(rhp->rdev.rscp, pdid);
-		return ERR_PTR(-ENOMEM);
-	}
+		return -EINVAL;
+
 	php->pdid = pdid;
 	php->rhp = rhp;
 	if (context) {
@@ -408,11 +402,11 @@ static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev,
 
 		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
 			iwch_deallocate_pd(&php->ibpd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 	pr_debug("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
-	return &php->ibpd;
+	return 0;
 }
 
 static int iwch_dereg_mr(struct ib_mr *ib_mr)
@@ -1350,6 +1344,7 @@ static const struct ib_device_ops iwch_dev_ops = {
 	.reg_user_mr = iwch_reg_user_mr,
 	.req_notify_cq = iwch_arm_cq,
 	.resize_cq = iwch_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
 };
 
 int iwch_register_device(struct iwch_dev *dev)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index f59bf7e5a589..680b5e98491d 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	return ret;
 }
 
-static int c4iw_deallocate_pd(struct ib_pd *pd)
+static void c4iw_deallocate_pd(struct ib_pd *pd)
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
@@ -221,15 +221,13 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
 	mutex_lock(&rhp->rdev.stats.lock);
 	rhp->rdev.stats.pd.cur--;
 	mutex_unlock(&rhp->rdev.stats.lock);
-	kfree(php);
-	return 0;
 }
 
-static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
-				      struct ib_ucontext *context,
-				      struct ib_udata *udata)
+static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
-	struct c4iw_pd *php;
+	struct c4iw_pd *php = to_c4iw_pd(pd);
+	struct ib_device *ibdev = pd->device;
 	u32 pdid;
 	struct c4iw_dev *rhp;
 
@@ -237,12 +235,8 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
 	rhp = (struct c4iw_dev *) ibdev;
 	pdid =  c4iw_get_resource(&rhp->rdev.resource.pdid_table);
 	if (!pdid)
-		return ERR_PTR(-EINVAL);
-	php = kzalloc(sizeof(*php), GFP_KERNEL);
-	if (!php) {
-		c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
-		return ERR_PTR(-ENOMEM);
-	}
+		return -EINVAL;
+
 	php->pdid = pdid;
 	php->rhp = rhp;
 	if (context) {
@@ -250,7 +244,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
 
 		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
 			c4iw_deallocate_pd(&php->ibpd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 	mutex_lock(&rhp->rdev.stats.lock);
@@ -259,7 +253,7 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
 		rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
 	mutex_unlock(&rhp->rdev.stats.lock);
 	pr_debug("pdid 0x%0x ptr 0x%p\n", pdid, php);
-	return &php->ibpd;
+	return 0;
 }
 
 static int c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
@@ -570,6 +564,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
 	.query_qp = c4iw_ib_query_qp,
 	.reg_user_mr = c4iw_reg_user_mr,
 	.req_notify_cq = c4iw_arm_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
 };
 
 void c4iw_register_device(struct work_struct *work)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 8ca8d74dfb6a..9ee86daf1700 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -1114,10 +1114,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
 int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
 int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
 
-struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
-				struct ib_ucontext *context,
-				struct ib_udata *udata);
-int hns_roce_dealloc_pd(struct ib_pd *pd);
+int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+		      struct ib_udata *udata);
+void hns_roce_dealloc_pd(struct ib_pd *pd);
 
 struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index fa08c22aad66..a18b88c95995 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -711,13 +711,14 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	struct ib_qp_attr attr = { 0 };
 	struct hns_roce_v1_priv *priv;
 	struct hns_roce_qp *hr_qp;
+	struct ib_device *ibdev;
 	struct ib_cq *cq;
 	struct ib_pd *pd;
 	union ib_gid dgid;
 	u64 subnet_prefix;
 	int attr_mask = 0;
+	int ret = -ENOMEM;
 	int i, j;
-	int ret;
 	u8 queue_en[HNS_ROCE_V1_RESV_QP] = { 0 };
 	u8 phy_port;
 	u8 port = 0;
@@ -742,12 +743,16 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	free_mr->mr_free_cq->ib_cq.cq_context		= NULL;
 	atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
 
-	pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
-	if (IS_ERR(pd)) {
-		dev_err(dev, "Create pd for reserved loop qp failed!");
-		ret = -ENOMEM;
+	ibdev = &hr_dev->ib_dev;
+	pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
+	if (pd)
+		goto alloc_mem_failed;
+
+	pd->device  = ibdev;
+	ret = hns_roce_alloc_pd(pd, NULL, NULL);
+	if (ret)
 		goto alloc_pd_failed;
-	}
+
 	free_mr->mr_free_pd = to_hr_pd(pd);
 	free_mr->mr_free_pd->ibpd.device  = &hr_dev->ib_dev;
 	free_mr->mr_free_pd->ibpd.uobject = NULL;
@@ -854,10 +859,12 @@ create_lp_qp_failed:
 			dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
 	}
 
-	if (hns_roce_dealloc_pd(pd))
-		dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
+	hns_roce_dealloc_pd(pd);
 
 alloc_pd_failed:
+	kfree(pd);
+
+alloc_mem_failed:
 	if (hns_roce_ib_destroy_cq(cq))
 		dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
 
@@ -891,9 +898,7 @@ static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
 	if (ret)
 		dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
 
-	ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
-	if (ret)
-		dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
+	hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
 }
 
 static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 67a8c4333f4f..ccf10622586c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -472,6 +472,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.query_pkey = hns_roce_query_pkey,
 	.query_port = hns_roce_query_port,
 	.reg_user_mr = hns_roce_reg_user_mr,
+	INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
 };
 
 static const struct ib_device_ops hns_roce_dev_mr_ops = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 4a29b2cb9bab..b9b97c5e97e6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -57,24 +57,19 @@ void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev)
 	hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap);
 }
 
-struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
-				struct ib_ucontext *context,
-				struct ib_udata *udata)
+int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		      struct ib_udata *udata)
 {
+	struct ib_device *ib_dev = ibpd->device;
 	struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
 	struct device *dev = hr_dev->dev;
-	struct hns_roce_pd *pd;
+	struct hns_roce_pd *pd = to_hr_pd(ibpd);
 	int ret;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn);
 	if (ret) {
-		kfree(pd);
 		dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n");
-		return ERR_PTR(ret);
+		return ret;
 	}
 
 	if (context) {
@@ -83,21 +78,17 @@ struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
 		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
 			hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn);
 			dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n");
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 
-	return &pd->ibpd;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(hns_roce_alloc_pd);
 
-int hns_roce_dealloc_pd(struct ib_pd *pd)
+void hns_roce_dealloc_pd(struct ib_pd *pd)
 {
 	hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn);
-	kfree(to_hr_pd(pd));
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(hns_roce_dealloc_pd);
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 59e978141ad4..c5a881172524 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -601,7 +601,6 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev)
 	if (!atomic_dec_and_test(&iwpd->usecount))
 		return;
 	i40iw_free_resource(iwdev, iwdev->allocated_pds, iwpd->sc_pd.pd_id);
-	kfree(iwpd);
 }
 
 /**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index d4ab46dd9e6c..28449ad57b37 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -312,16 +312,15 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_
 
 /**
  * i40iw_alloc_pd - allocate protection domain
- * @ibdev: device pointer from stack
+ * @pd: PD pointer
  * @context: user context created during alloc
  * @udata: user data
  */
-static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
-				    struct ib_ucontext *context,
-				    struct ib_udata *udata)
+static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+			  struct ib_udata *udata)
 {
-	struct i40iw_pd *iwpd;
-	struct i40iw_device *iwdev = to_iwdev(ibdev);
+	struct i40iw_pd *iwpd = to_iwpd(pd);
+	struct i40iw_device *iwdev = to_iwdev(pd->device);
 	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
 	struct i40iw_alloc_pd_resp uresp;
 	struct i40iw_sc_pd *sc_pd;
@@ -330,19 +329,13 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
 	int err;
 
 	if (iwdev->closing)
-		return ERR_PTR(-ENODEV);
+		return -ENODEV;
 
 	err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
 				   iwdev->max_pd, &pd_id, &iwdev->next_pd);
 	if (err) {
 		i40iw_pr_err("alloc resource failed\n");
-		return ERR_PTR(err);
-	}
-
-	iwpd = kzalloc(sizeof(*iwpd), GFP_KERNEL);
-	if (!iwpd) {
-		err = -ENOMEM;
-		goto free_res;
+		return err;
 	}
 
 	sc_pd = &iwpd->sc_pd;
@@ -361,25 +354,23 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
 	}
 
 	i40iw_add_pdusecount(iwpd);
-	return &iwpd->ibpd;
+	return 0;
+
 error:
-	kfree(iwpd);
-free_res:
 	i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
-	return ERR_PTR(err);
+	return err;
 }
 
 /**
  * i40iw_dealloc_pd - deallocate pd
  * @ibpd: ptr of pd to be deallocated
  */
-static int i40iw_dealloc_pd(struct ib_pd *ibpd)
+static void i40iw_dealloc_pd(struct ib_pd *ibpd)
 {
 	struct i40iw_pd *iwpd = to_iwpd(ibpd);
 	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
 
 	i40iw_rem_pdusecount(iwpd, iwdev);
-	return 0;
 }
 
 /**
@@ -2750,6 +2741,7 @@ static const struct ib_device_ops i40iw_dev_ops = {
 	.query_qp = i40iw_query_qp,
 	.reg_user_mr = i40iw_reg_user_mr,
 	.req_notify_cq = i40iw_req_notify_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
 };
 
 /**
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index d66002a31000..c0f6aea7ed7c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1186,38 +1186,27 @@ static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	}
 }
 
-static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
-				      struct ib_ucontext *context,
-				      struct ib_udata *udata)
+static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
-	struct mlx4_ib_pd *pd;
+	struct mlx4_ib_pd *pd = to_mpd(ibpd);
+	struct ib_device *ibdev = ibpd->device;
 	int err;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
-	if (err) {
-		kfree(pd);
-		return ERR_PTR(err);
-	}
+	if (err)
+		return err;
 
-	if (context)
-		if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
-			mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
-		}
-	return &pd->ibpd;
+	if (context && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+		mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+		return -EFAULT;
+	}
+	return 0;
 }
 
-static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
+static void mlx4_ib_dealloc_pd(struct ib_pd *pd)
 {
 	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
-	kfree(pd);
-
-	return 0;
 }
 
 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
@@ -2580,6 +2569,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
 	.req_notify_cq = mlx4_ib_arm_cq,
 	.rereg_user_mr = mlx4_ib_rereg_user_mr,
 	.resize_cq = mlx4_ib_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
 };
 
 static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 76d6c2557d0c..f9cddc6f2ab6 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2280,30 +2280,24 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
 	return 0;
 }
 
-static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
-				      struct ib_ucontext *context,
-				      struct ib_udata *udata)
+static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
+	struct mlx5_ib_pd *pd = to_mpd(ibpd);
+	struct ib_device *ibdev = ibpd->device;
 	struct mlx5_ib_alloc_pd_resp resp;
-	struct mlx5_ib_pd *pd;
 	int err;
 	u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {};
 	u16 uid = 0;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	uid = context ? to_mucontext(context)->devx_uid : 0;
 	MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
 	MLX5_SET(alloc_pd_in, in, uid, uid);
 	err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
 			    out, sizeof(out));
-	if (err) {
-		kfree(pd);
-		return ERR_PTR(err);
-	}
+	if (err)
+		return err;
 
 	pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
 	pd->uid = uid;
@@ -2311,23 +2305,19 @@ static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
 		resp.pdn = pd->pdn;
 		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
 			mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 
-	return &pd->ibpd;
+	return 0;
 }
 
-static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
+static void mlx5_ib_dealloc_pd(struct ib_pd *pd)
 {
 	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
 	struct mlx5_ib_pd *mpd = to_mpd(pd);
 
 	mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
-	kfree(mpd);
-
-	return 0;
 }
 
 enum {
@@ -4680,23 +4670,28 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
 {
 	struct ib_srq_init_attr attr;
 	struct mlx5_ib_dev *dev;
+	struct ib_device *ibdev;
 	struct ib_cq_init_attr cq_attr = {.cqe = 1};
 	int port;
 	int ret = 0;
 
 	dev = container_of(devr, struct mlx5_ib_dev, devr);
+	ibdev = &dev->ib_dev;
 
 	mutex_init(&devr->mutex);
 
-	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
-	if (IS_ERR(devr->p0)) {
-		ret = PTR_ERR(devr->p0);
-		goto error0;
-	}
-	devr->p0->device  = &dev->ib_dev;
+	devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd);
+	if (!devr->p0)
+		return -ENOMEM;
+
+	devr->p0->device  = ibdev;
 	devr->p0->uobject = NULL;
 	atomic_set(&devr->p0->usecnt, 0);
 
+	ret = mlx5_ib_alloc_pd(devr->p0, NULL, NULL);
+	if (ret)
+		goto error0;
+
 	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
 	if (IS_ERR(devr->c0)) {
 		ret = PTR_ERR(devr->c0);
@@ -4794,6 +4789,7 @@ error2:
 error1:
 	mlx5_ib_dealloc_pd(devr->p0);
 error0:
+	kfree(devr->p0);
 	return ret;
 }
 
@@ -4809,6 +4805,7 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 	mlx5_ib_dealloc_xrcd(devr->x1);
 	mlx5_ib_destroy_cq(devr->c0);
 	mlx5_ib_dealloc_pd(devr->p0);
+	kfree(devr->p0);
 
 	/* Make sure no change P_Key work items are still executing */
 	for (port = 0; port < dev->num_ports; ++port)
@@ -5938,6 +5935,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
 	.req_notify_cq = mlx5_ib_arm_cq,
 	.rereg_user_mr = mlx5_ib_rereg_user_mr,
 	.resize_cq = mlx5_ib_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
 };
 
 static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 1bb67562c8c8..2c754bc226f3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -374,40 +374,30 @@ static int mthca_mmap_uar(struct ib_ucontext *context,
 	return 0;
 }
 
-static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
-				    struct ib_ucontext *context,
-				    struct ib_udata *udata)
+static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+			  struct ib_udata *udata)
 {
-	struct mthca_pd *pd;
+	struct ib_device *ibdev = ibpd->device;
+	struct mthca_pd *pd = to_mpd(ibpd);
 	int err;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
-	if (err) {
-		kfree(pd);
-		return ERR_PTR(err);
-	}
+	if (err)
+		return err;
 
 	if (context) {
 		if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
 			mthca_pd_free(to_mdev(ibdev), pd);
-			kfree(pd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 
-	return &pd->ibpd;
+	return 0;
 }
 
-static int mthca_dealloc_pd(struct ib_pd *pd)
+static void mthca_dealloc_pd(struct ib_pd *pd)
 {
 	mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
-	kfree(pd);
-
-	return 0;
 }
 
 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
@@ -1228,6 +1218,7 @@ static const struct ib_device_ops mthca_dev_ops = {
 	.query_qp = mthca_query_qp,
 	.reg_user_mr = mthca_reg_user_mr,
 	.resize_cq = mthca_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
 };
 
 static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 6eb991d40035..f18b28ae4bd9 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -658,10 +658,11 @@ static int nes_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 /**
  * nes_alloc_pd
  */
-static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
-		struct ib_ucontext *context, struct ib_udata *udata)
+static int nes_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+			struct ib_udata *udata)
 {
-	struct nes_pd *nespd;
+	struct ib_device *ibdev = pd->device;
+	struct nes_pd *nespd = to_nespd(pd);
 	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
@@ -676,15 +677,8 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
 
 	err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
 			nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
-	if (err) {
-		return ERR_PTR(err);
-	}
-
-	nespd = kzalloc(sizeof (struct nes_pd), GFP_KERNEL);
-	if (!nespd) {
-		nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (err)
+		return err;
 
 	nes_debug(NES_DBG_PD, "Allocating PD (%p) for ib device %s\n",
 			nespd, dev_name(&nesvnic->nesibdev->ibdev.dev));
@@ -700,16 +694,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
 		if (nespd->mmap_db_index >= NES_MAX_USER_DB_REGIONS) {
 			nes_debug(NES_DBG_PD, "mmap_db_index > MAX\n");
 			nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
-			kfree(nespd);
-			return ERR_PTR(-ENOMEM);
+			return -ENOMEM;
 		}
 
 		uresp.pd_id = nespd->pd_id;
 		uresp.mmap_db_index = nespd->mmap_db_index;
 		if (ib_copy_to_udata(udata, &uresp, sizeof (struct nes_alloc_pd_resp))) {
 			nes_free_resource(nesadapter, nesadapter->allocated_pds, pd_num);
-			kfree(nespd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 
 		set_bit(nespd->mmap_db_index, nesucontext->allocated_doorbells);
@@ -718,14 +710,14 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
 	}
 
 	nes_debug(NES_DBG_PD, "PD%u structure located @%p.\n", nespd->pd_id, nespd);
-	return &nespd->ibpd;
+	return 0;
 }
 
 
 /**
  * nes_dealloc_pd
  */
-static int nes_dealloc_pd(struct ib_pd *ibpd)
+static void nes_dealloc_pd(struct ib_pd *ibpd)
 {
 	struct nes_ucontext *nesucontext;
 	struct nes_pd *nespd = to_nespd(ibpd);
@@ -748,9 +740,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
 			nespd->pd_id, nespd);
 	nes_free_resource(nesadapter, nesadapter->allocated_pds,
 			(nespd->pd_id-nesadapter->base_pd)>>(PAGE_SHIFT-12));
-	kfree(nespd);
-
-	return 0;
 }
 
 
@@ -3658,6 +3647,7 @@ static const struct ib_device_ops nes_dev_ops = {
 	.query_qp = nes_query_qp,
 	.reg_user_mr = nes_reg_user_mr,
 	.req_notify_cq = nes_req_notify_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd),
 };
 
 /**
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 88970a6bb555..0de83c92691f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -179,6 +179,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	.reg_user_mr = ocrdma_reg_user_mr,
 	.req_notify_cq = ocrdma_arm_cq,
 	.resize_cq = ocrdma_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd),
 };
 
 static const struct ib_device_ops ocrdma_dev_srq_ops = {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 2a62936bef4d..980ba97188ff 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -367,17 +367,12 @@ static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
 	return status;
 }
 
-static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
-					  struct ocrdma_ucontext *uctx,
-					  struct ib_udata *udata)
+static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
+			    struct ocrdma_ucontext *uctx,
+			    struct ib_udata *udata)
 {
-	struct ocrdma_pd *pd = NULL;
 	int status;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	if (udata && uctx && dev->attr.max_dpp_pds) {
 		pd->dpp_enabled =
 			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
@@ -386,15 +381,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
 					   dev->attr.wqe_size) : 0;
 	}
 
-	if (dev->pd_mgr->pd_prealloc_valid) {
-		status = ocrdma_get_pd_num(dev, pd);
-		if (status == 0) {
-			return pd;
-		} else {
-			kfree(pd);
-			return ERR_PTR(status);
-		}
-	}
+	if (dev->pd_mgr->pd_prealloc_valid)
+		return ocrdma_get_pd_num(dev, pd);
 
 retry:
 	status = ocrdma_mbx_alloc_pd(dev, pd);
@@ -403,13 +391,11 @@ retry:
 			pd->dpp_enabled = false;
 			pd->num_dpp_qp = 0;
 			goto retry;
-		} else {
-			kfree(pd);
-			return ERR_PTR(status);
 		}
+		return status;
 	}
 
-	return pd;
+	return 0;
 }
 
 static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
@@ -418,30 +404,33 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
 	return (uctx->cntxt_pd == pd);
 }
 
-static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
+static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
 			      struct ocrdma_pd *pd)
 {
-	int status;
-
 	if (dev->pd_mgr->pd_prealloc_valid)
-		status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
+		ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
 	else
-		status = ocrdma_mbx_dealloc_pd(dev, pd);
-
-	kfree(pd);
-	return status;
+		ocrdma_mbx_dealloc_pd(dev, pd);
 }
 
 static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
 				    struct ocrdma_ucontext *uctx,
 				    struct ib_udata *udata)
 {
-	int status = 0;
+	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_pd *pd;
+	int status;
+
+	pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
+	if (!pd)
+		return -ENOMEM;
+
+	pd->device  = ibdev;
+	uctx->cntxt_pd = get_ocrdma_pd(pd);
 
-	uctx->cntxt_pd = _ocrdma_alloc_pd(dev, uctx, udata);
-	if (IS_ERR(uctx->cntxt_pd)) {
-		status = PTR_ERR(uctx->cntxt_pd);
-		uctx->cntxt_pd = NULL;
+	status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
+	if (status) {
+		kfree(uctx->cntxt_pd);
 		goto err;
 	}
 
@@ -460,6 +449,7 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
 		       __func__, dev->id, pd->id);
 	}
+	kfree(uctx->cntxt_pd);
 	uctx->cntxt_pd = NULL;
 	(void)_ocrdma_dealloc_pd(dev, pd);
 	return 0;
@@ -537,6 +527,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
 	return &ctx->ibucontext;
 
 cpy_err:
+	ocrdma_dealloc_ucontext_pd(ctx);
 pd_err:
 	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
 map_err:
@@ -658,10 +649,10 @@ dpp_map_err:
 	return status;
 }
 
-struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
+int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		    struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ibpd->device;
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
 	struct ocrdma_pd *pd;
 	struct ocrdma_ucontext *uctx = NULL;
@@ -677,11 +668,10 @@ struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
 		}
 	}
 
-	pd = _ocrdma_alloc_pd(dev, uctx, udata);
-	if (IS_ERR(pd)) {
-		status = PTR_ERR(pd);
+	pd = get_ocrdma_pd(ibpd);
+	status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
+	if (status)
 		goto exit;
-	}
 
 pd_mapping:
 	if (udata && context) {
@@ -689,25 +679,22 @@ pd_mapping:
 		if (status)
 			goto err;
 	}
-	return &pd->ibpd;
+	return 0;
 
 err:
-	if (is_uctx_pd) {
+	if (is_uctx_pd)
 		ocrdma_release_ucontext_pd(uctx);
-	} else {
-		if (_ocrdma_dealloc_pd(dev, pd))
-			pr_err("%s: _ocrdma_dealloc_pd() failed\n", __func__);
-	}
+	else
+		_ocrdma_dealloc_pd(dev, pd);
 exit:
-	return ERR_PTR(status);
+	return status;
 }
 
-int ocrdma_dealloc_pd(struct ib_pd *ibpd)
+void ocrdma_dealloc_pd(struct ib_pd *ibpd)
 {
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 	struct ocrdma_ucontext *uctx = NULL;
-	int status = 0;
 	u64 usr_db;
 
 	uctx = pd->uctx;
@@ -721,11 +708,10 @@ int ocrdma_dealloc_pd(struct ib_pd *ibpd)
 
 		if (is_ucontext_pd(uctx, pd)) {
 			ocrdma_release_ucontext_pd(uctx);
-			return status;
+			return;
 		}
 	}
-	status = _ocrdma_dealloc_pd(dev, pd);
-	return status;
+	_ocrdma_dealloc_pd(dev, pd);
 }
 
 static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index b69cfdce7970..1fd66721c930 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -70,9 +70,9 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *);
 
 int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
 
-struct ib_pd *ocrdma_alloc_pd(struct ib_device *,
-			      struct ib_ucontext *, struct ib_udata *);
-int ocrdma_dealloc_pd(struct ib_pd *pd);
+int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx,
+		    struct ib_udata *udata);
+void ocrdma_dealloc_pd(struct ib_pd *pd);
 
 struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev,
 			       const struct ib_cq_init_attr *attr,
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 878e9e23652b..44ce4989dcef 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -239,6 +239,7 @@ static const struct ib_device_ops qedr_dev_ops = {
 	.reg_user_mr = qedr_reg_user_mr,
 	.req_notify_cq = qedr_arm_cq,
 	.resize_cq = qedr_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
 };
 
 static int qedr_register_device(struct qedr_dev *dev)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 989f08633fbe..a06d2258394a 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -450,11 +450,12 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 				  vma->vm_page_prot);
 }
 
-struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
-			    struct ib_ucontext *context, struct ib_udata *udata)
+int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		  struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ibpd->device;
 	struct qedr_dev *dev = get_qedr_dev(ibdev);
-	struct qedr_pd *pd;
+	struct qedr_pd *pd = get_qedr_pd(ibpd);
 	u16 pd_id;
 	int rc;
 
@@ -463,16 +464,12 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
 
 	if (!dev->rdma_ctx) {
 		DP_ERR(dev, "invalid RDMA context\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
 	if (rc)
-		goto err;
+		return rc;
 
 	pd->pd_id = pd_id;
 
@@ -485,36 +482,23 @@ struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
 		if (rc) {
 			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
-			goto err;
+			return rc;
 		}
 
 		pd->uctx = get_qedr_ucontext(context);
 		pd->uctx->pd = pd;
 	}
 
-	return &pd->ibpd;
-
-err:
-	kfree(pd);
-	return ERR_PTR(rc);
+	return 0;
 }
 
-int qedr_dealloc_pd(struct ib_pd *ibpd)
+void qedr_dealloc_pd(struct ib_pd *ibpd)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
 	struct qedr_pd *pd = get_qedr_pd(ibpd);
 
-	if (!pd) {
-		pr_err("Invalid PD received in dealloc_pd\n");
-		return -EINVAL;
-	}
-
 	DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
 	dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
-
-	kfree(pd);
-
-	return 0;
 }
 
 static void qedr_free_pbl(struct qedr_dev *dev,
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 1852b7012bf4..97a6ff3f9afb 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -47,9 +47,9 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
 int qedr_dealloc_ucontext(struct ib_ucontext *);
 
 int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
-struct ib_pd *qedr_alloc_pd(struct ib_device *,
-			    struct ib_ucontext *, struct ib_udata *);
-int qedr_dealloc_pd(struct ib_pd *pd);
+int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx,
+		  struct ib_udata *udata);
+void qedr_dealloc_pd(struct ib_pd *pd);
 
 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
 			     const struct ib_cq_init_attr *attr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 1ec155823716..256ad2f236c8 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -352,6 +352,7 @@ static const struct ib_device_ops usnic_dev_ops = {
 	.query_port = usnic_ib_query_port,
 	.query_qp = usnic_ib_query_qp,
 	.reg_user_mr = usnic_ib_reg_mr,
+	INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
 };
 
 /* Start of PF discovery section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 9dea18106247..0ced89b51448 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -456,37 +456,23 @@ int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 	return 0;
 }
 
-struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
-					struct ib_ucontext *context,
-					struct ib_udata *udata)
+int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		      struct ib_udata *udata)
 {
-	struct usnic_ib_pd *pd;
+	struct usnic_ib_pd *pd = to_upd(ibpd);
 	void *umem_pd;
 
-	usnic_dbg("\n");
-
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd)
-		return ERR_PTR(-ENOMEM);
-
 	umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
 	if (IS_ERR_OR_NULL(umem_pd)) {
-		kfree(pd);
-		return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+		return umem_pd ? PTR_ERR(umem_pd) : -ENOMEM;
 	}
 
-	usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
-		   pd, context, dev_name(&ibdev->dev));
-	return &pd->ibpd;
+	return 0;
 }
 
-int usnic_ib_dealloc_pd(struct ib_pd *pd)
+void usnic_ib_dealloc_pd(struct ib_pd *pd)
 {
-	usnic_info("freeing domain 0x%p\n", pd);
-
 	usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
-	kfree(pd);
-	return 0;
 }
 
 struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 99a6d81c2bcd..44a9d2f82bf5 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -51,10 +51,9 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num);
 int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 				u16 *pkey);
-struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
-				struct ib_ucontext *context,
-				struct ib_udata *udata);
-int usnic_ib_dealloc_pd(struct ib_pd *pd);
+int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		      struct ib_udata *udata);
+void usnic_ib_dealloc_pd(struct ib_pd *pd);
 struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 					struct ib_qp_init_attr *init_attr,
 					struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index e582beaf9430..47e653d2495c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -195,6 +195,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
 	.query_qp = pvrdma_query_qp,
 	.reg_user_mr = pvrdma_reg_user_mr,
 	.req_notify_cq = pvrdma_req_notify_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
 };
 
 static const struct ib_device_ops pvrdma_dev_srq_ops = {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index fafb2add3b44..f44220f72e05 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -438,37 +438,29 @@ int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
 
 /**
  * pvrdma_alloc_pd - allocate protection domain
- * @ibdev: the IB device
+ * @ibpd: PD pointer
  * @context: user context
  * @udata: user data
  *
  * @return: the ib_pd protection domain pointer on success, otherwise errno.
  */
-struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata)
+int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		    struct ib_udata *udata)
 {
-	struct pvrdma_pd *pd;
+	struct ib_device *ibdev = ibpd->device;
+	struct pvrdma_pd *pd = to_vpd(ibpd);
 	struct pvrdma_dev *dev = to_vdev(ibdev);
-	union pvrdma_cmd_req req;
-	union pvrdma_cmd_resp rsp;
+	union pvrdma_cmd_req req = {};
+	union pvrdma_cmd_resp rsp = {};
 	struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
 	struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
 	struct pvrdma_alloc_pd_resp pd_resp = {0};
 	int ret;
-	void *ptr;
 
 	/* Check allowed max pds */
 	if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ptr = ERR_PTR(-ENOMEM);
-		goto err;
-	}
-
-	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
 	cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0;
 	ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
@@ -476,8 +468,7 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
 		dev_warn(&dev->pdev->dev,
 			 "failed to allocate protection domain, error: %d\n",
 			 ret);
-		ptr = ERR_PTR(ret);
-		goto freepd;
+		goto err;
 	}
 
 	pd->privileged = !context;
@@ -490,18 +481,16 @@ struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
 			dev_warn(&dev->pdev->dev,
 				 "failed to copy back protection domain\n");
 			pvrdma_dealloc_pd(&pd->ibpd);
-			return ERR_PTR(-EFAULT);
+			return -EFAULT;
 		}
 	}
 
 	/* u32 pd handle */
-	return &pd->ibpd;
+	return 0;
 
-freepd:
-	kfree(pd);
 err:
 	atomic_dec(&dev->num_pds);
-	return ptr;
+	return ret;
 }
 
 /**
@@ -510,14 +499,13 @@ err:
  *
  * @return: 0 on success, otherwise errno.
  */
-int pvrdma_dealloc_pd(struct ib_pd *pd)
+void pvrdma_dealloc_pd(struct ib_pd *pd)
 {
 	struct pvrdma_dev *dev = to_vdev(pd->device);
-	union pvrdma_cmd_req req;
+	union pvrdma_cmd_req req = {};
 	struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
 	int ret;
 
-	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
 	cmd->pd_handle = to_vpd(pd)->pd_handle;
 
@@ -527,10 +515,7 @@ int pvrdma_dealloc_pd(struct ib_pd *pd)
 			 "could not dealloc protection domain, error: %d\n",
 			 ret);
 
-	kfree(to_vpd(pd));
 	atomic_dec(&dev->num_pds);
-
-	return 0;
 }
 
 /**
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index f7f758d60110..ed91baad1ffa 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -399,10 +399,9 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
 					  struct ib_udata *udata);
 int pvrdma_dealloc_ucontext(struct ib_ucontext *context);
-struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
-			      struct ib_ucontext *context,
-			      struct ib_udata *udata);
-int pvrdma_dealloc_pd(struct ib_pd *ibpd);
+int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+		    struct ib_udata *udata);
+void pvrdma_dealloc_pd(struct ib_pd *ibpd);
 struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				 u64 virt_addr, int access_flags,
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index dcc1870b8d23..6033054b22fa 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -50,7 +50,7 @@
 
 /**
  * rvt_alloc_pd - allocate a protection domain
- * @ibdev: ib device
+ * @ibpd: PD
  * @context: optional user context
  * @udata: optional user data
  *
@@ -58,19 +58,14 @@
  *
  * Return: 0 on success
  */
-struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
-			   struct ib_ucontext *context,
-			   struct ib_udata *udata)
+int rvt_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+		 struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ibpd->device;
 	struct rvt_dev_info *dev = ib_to_rvt(ibdev);
-	struct rvt_pd *pd;
-	struct ib_pd *ret;
+	struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
+	int ret = 0;
 
-	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
 	/*
 	 * While we could continue allocating protecetion domains, being
 	 * constrained only by system resources. The IBTA spec defines that
@@ -81,8 +76,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
 	spin_lock(&dev->n_pds_lock);
 	if (dev->n_pds_allocated == dev->dparms.props.max_pd) {
 		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
 		goto bail;
 	}
 
@@ -92,8 +86,6 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
 	/* ib_alloc_pd() will initialize pd->ibpd. */
 	pd->user = !!udata;
 
-	ret = &pd->ibpd;
-
 bail:
 	return ret;
 }
@@ -104,16 +96,11 @@ bail:
  *
  * Return: always 0
  */
-int rvt_dealloc_pd(struct ib_pd *ibpd)
+void rvt_dealloc_pd(struct ib_pd *ibpd)
 {
-	struct rvt_pd *pd = ibpd_to_rvtpd(ibpd);
 	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
 
 	spin_lock(&dev->n_pds_lock);
 	dev->n_pds_allocated--;
 	spin_unlock(&dev->n_pds_lock);
-
-	kfree(pd);
-
-	return 0;
 }
diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h
index 1892ca4a9746..7a887e4a45e7 100644
--- a/drivers/infiniband/sw/rdmavt/pd.h
+++ b/drivers/infiniband/sw/rdmavt/pd.h
@@ -50,9 +50,8 @@
 
 #include <rdma/rdma_vt.h>
 
-struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
-			   struct ib_ucontext *context,
-			   struct ib_udata *udata);
-int rvt_dealloc_pd(struct ib_pd *ibpd);
+int rvt_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
+		 struct ib_udata *udata);
+void rvt_dealloc_pd(struct ib_pd *ibpd);
 
 #endif          /* DEF_RDMAVTPD_H */
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index b3f0c5578925..a19832c73d5a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -436,6 +436,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	.req_notify_cq = rvt_req_notify_cq,
 	.resize_cq = rvt_resize_cq,
 	.unmap_fmr = rvt_unmap_fmr,
+	INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
 };
 
 static noinline int check_support(struct rvt_dev_info *rdi, int verb)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index b5c91df22047..cd3f14629ba8 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -46,6 +46,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_PD] = {
 		.name		= "rxe-pd",
 		.size		= sizeof(struct rxe_pd),
+		.flags		= RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_AH] = {
 		.name		= "rxe-ah",
@@ -119,8 +120,10 @@ static void rxe_cache_clean(size_t cnt)
 
 	for (i = 0; i < cnt; i++) {
 		type = &rxe_type_info[i];
-		kmem_cache_destroy(type->cache);
-		type->cache = NULL;
+		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
+			kmem_cache_destroy(type->cache);
+			type->cache = NULL;
+		}
 	}
 }
 
@@ -134,14 +137,17 @@ int rxe_cache_init(void)
 	for (i = 0; i < RXE_NUM_TYPES; i++) {
 		type = &rxe_type_info[i];
 		size = ALIGN(type->size, RXE_POOL_ALIGN);
-		type->cache = kmem_cache_create(type->name, size,
-				RXE_POOL_ALIGN,
-				RXE_POOL_CACHE_FLAGS, NULL);
-		if (!type->cache) {
-			pr_err("Unable to init kmem cache for %s\n",
-			       type->name);
-			err = -ENOMEM;
-			goto err1;
+		if (!(type->flags & RXE_POOL_NO_ALLOC)) {
+			type->cache =
+				kmem_cache_create(type->name, size,
+						  RXE_POOL_ALIGN,
+						  RXE_POOL_CACHE_FLAGS, NULL);
+			if (!type->cache) {
+				pr_err("Unable to init kmem cache for %s\n",
+				       type->name);
+				err = -ENOMEM;
+				goto err1;
+			}
 		}
 	}
 
@@ -415,6 +421,37 @@ out_put_pool:
 	return NULL;
 }
 
+int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem)
+{
+	unsigned long flags;
+
+	might_sleep_if(!(pool->flags & RXE_POOL_ATOMIC));
+
+	read_lock_irqsave(&pool->pool_lock, flags);
+	if (pool->state != RXE_POOL_STATE_VALID) {
+		read_unlock_irqrestore(&pool->pool_lock, flags);
+		return -EINVAL;
+	}
+	kref_get(&pool->ref_cnt);
+	read_unlock_irqrestore(&pool->pool_lock, flags);
+
+	kref_get(&pool->rxe->ref_cnt);
+
+	if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
+		goto out_put_pool;
+
+	elem->pool = pool;
+	kref_init(&elem->ref_cnt);
+
+	return 0;
+
+out_put_pool:
+	atomic_dec(&pool->num_elem);
+	rxe_dev_put(pool->rxe);
+	rxe_pool_put(pool);
+	return -EINVAL;
+}
+
 void rxe_elem_release(struct kref *kref)
 {
 	struct rxe_pool_entry *elem =
@@ -424,7 +461,8 @@ void rxe_elem_release(struct kref *kref)
 	if (pool->cleanup)
 		pool->cleanup(elem);
 
-	kmem_cache_free(pool_cache(pool), elem);
+	if (!(pool->flags & RXE_POOL_NO_ALLOC))
+		kmem_cache_free(pool_cache(pool), elem);
 	atomic_dec(&pool->num_elem);
 	rxe_dev_put(pool->rxe);
 	rxe_pool_put(pool);
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 72968c29e01f..2f2cff1cbe43 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -41,6 +41,7 @@ enum rxe_pool_flags {
 	RXE_POOL_ATOMIC		= BIT(0),
 	RXE_POOL_INDEX		= BIT(1),
 	RXE_POOL_KEY		= BIT(2),
+	RXE_POOL_NO_ALLOC	= BIT(4),
 };
 
 enum rxe_elem_type {
@@ -131,6 +132,9 @@ void rxe_pool_cleanup(struct rxe_pool *pool);
 /* allocate an object from pool */
 void *rxe_alloc(struct rxe_pool *pool);
 
+/* connect already allocated object to pool */
+int rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_entry *elem);
+
 /* assign an index to an indexed object and insert object into
  *  pool's rb tree
  */
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index cc5a05124ece..051c3930e808 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -191,23 +191,20 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
 	return 0;
 }
 
-static struct ib_pd *rxe_alloc_pd(struct ib_device *dev,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
+static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_ucontext *context,
+			struct ib_udata *udata)
 {
-	struct rxe_dev *rxe = to_rdev(dev);
-	struct rxe_pd *pd;
+	struct rxe_dev *rxe = to_rdev(ibpd->device);
+	struct rxe_pd *pd = to_rpd(ibpd);
 
-	pd = rxe_alloc(&rxe->pd_pool);
-	return pd ? &pd->ibpd : ERR_PTR(-ENOMEM);
+	return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem);
 }
 
-static int rxe_dealloc_pd(struct ib_pd *ibpd)
+static void rxe_dealloc_pd(struct ib_pd *ibpd)
 {
 	struct rxe_pd *pd = to_rpd(ibpd);
 
 	rxe_drop_ref(pd);
-	return 0;
 }
 
 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -1183,6 +1180,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.reg_user_mr = rxe_reg_user_mr,
 	.req_notify_cq = rxe_req_notify_cq,
 	.resize_cq = rxe_resize_cq,
+	INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
 };
 
 int rxe_register_device(struct rxe_dev *rxe)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 74e04801d34d..70839d3f55d9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -66,8 +66,8 @@ struct rxe_ucontext {
 };
 
 struct rxe_pd {
+	struct ib_pd            ibpd;
 	struct rxe_pool_entry	pelem;
-	struct ib_pd		ibpd;
 };
 
 struct rxe_ah {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e29eae4aec84..854d7816787c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2385,10 +2385,9 @@ struct ib_device_ops {
 	int (*dealloc_ucontext)(struct ib_ucontext *context);
 	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
 	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
-	struct ib_pd *(*alloc_pd)(struct ib_device *device,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata);
-	int (*dealloc_pd)(struct ib_pd *pd);
+	int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context,
+			struct ib_udata *udata);
+	void (*dealloc_pd)(struct ib_pd *pd);
 	struct ib_ah *(*create_ah)(struct ib_pd *pd,
 				   struct rdma_ah_attr *ah_attr, u32 flags,
 				   struct ib_udata *udata);
@@ -2530,6 +2529,8 @@ struct ib_device_ops {
 	 */
 	int (*fill_res_entry)(struct sk_buff *msg,
 			      struct rdma_restrack_entry *entry);
+
+	DECLARE_RDMA_OBJ_SIZE(ib_pd);
 };
 
 struct ib_device {
-- 
cgit v1.2.3


From 652432f33c01b2edaa5b2550b423cd894b1c7b9a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 6 Feb 2019 22:41:50 -0700
Subject: RDMA/device: Get rid of reg_state

This really has no purpose anymore, refcount can be used to tell if the
device is still registered. Keeping it around just invites mis-use.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
---
 drivers/infiniband/core/device.c | 8 ++------
 include/rdma/ib_verbs.h          | 6 ------
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 872662a84b16..1c54ded776d0 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -243,7 +243,7 @@ static void ib_device_release(struct device *device)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
 
-	WARN_ON(dev->reg_state == IB_DEV_REGISTERED);
+	WARN_ON(refcount_read(&dev->refcount));
 	ib_cache_release_one(dev);
 	ib_security_release_port_pkey_list(dev);
 	kfree(dev->port_pkey_list);
@@ -316,8 +316,7 @@ EXPORT_SYMBOL(_ib_alloc_device);
 void ib_dealloc_device(struct ib_device *device)
 {
 	WARN_ON(!list_empty(&device->client_data_list));
-	WARN_ON(device->reg_state != IB_DEV_UNREGISTERED &&
-		device->reg_state != IB_DEV_UNINITIALIZED);
+	WARN_ON(refcount_read(&device->refcount));
 	rdma_restrack_clean(device);
 	put_device(&device->dev);
 }
@@ -602,7 +601,6 @@ int ib_register_device(struct ib_device *device, const char *name)
 	}
 
 	refcount_set(&device->refcount, 1);
-	device->reg_state = IB_DEV_REGISTERED;
 
 	list_for_each_entry(client, &client_list, list)
 		if (!add_client_context(device, client) && client->add)
@@ -673,8 +671,6 @@ void ib_unregister_device(struct ib_device *device)
 	}
 	write_unlock_irqrestore(&device->client_data_lock, flags);
 	up_write(&lists_rwsem);
-
-	device->reg_state = IB_DEV_UNREGISTERED;
 }
 EXPORT_SYMBOL(ib_unregister_device);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 854d7816787c..d8ba987e8b29 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2572,12 +2572,6 @@ struct ib_device {
 	struct kobject			*ports_kobj;
 	struct list_head             port_list;
 
-	enum {
-		IB_DEV_UNINITIALIZED,
-		IB_DEV_REGISTERED,
-		IB_DEV_UNREGISTERED
-	}                            reg_state;
-
 	int			     uverbs_abi_ver;
 	u64			     uverbs_cmd_mask;
 	u64			     uverbs_ex_cmd_mask;
-- 
cgit v1.2.3


From e59178d895afa29b671323f8265a1e50afe989e5 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 6 Feb 2019 22:41:52 -0700
Subject: RDMA/devices: Use xarray to store the clients

This gives each client a unique ID and will let us move client_data to use
xarray, and revise the locking scheme.

clients have to be add/removed in strict FIFO/LIFO order as they
interdepend. To support this the client_ids are assigned to increase in
FIFO order. The existing linked list is kept to support reverse iteration
until xarray can get a reverse iteration API.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
---
 drivers/infiniband/core/device.c | 50 ++++++++++++++++++++++++++++++++++++----
 include/rdma/ib_verbs.h          |  3 ++-
 2 files changed, 47 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 3a80f96c2919..f87d85659359 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -65,15 +65,17 @@ struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
-/* The device_list and client_list contain devices and clients after their
+/* The device_list and clients contain devices and clients after their
  * registration has completed, and the devices and clients are removed
  * during unregistration. */
 static LIST_HEAD(device_list);
 static LIST_HEAD(client_list);
+#define CLIENT_REGISTERED XA_MARK_1
+static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
 
 /*
  * device_mutex and lists_rwsem protect access to both device_list and
- * client_list.  device_mutex protects writer access by device and client
+ * clients.  device_mutex protects writer access by device and client
  * registration / de-registration.  lists_rwsem protects reader access to
  * these lists.  Iterators of these lists must lock it for read, while updates
  * to the lists must be done with a write lock. A special case is when the
@@ -564,6 +566,7 @@ int ib_register_device(struct ib_device *device, const char *name)
 {
 	int ret;
 	struct ib_client *client;
+	unsigned long index;
 
 	setup_dma_device(device);
 
@@ -608,7 +611,7 @@ int ib_register_device(struct ib_device *device, const char *name)
 
 	refcount_set(&device->refcount, 1);
 
-	list_for_each_entry(client, &client_list, list)
+	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED)
 		if (!add_client_context(device, client) && client->add)
 			client->add(device);
 
@@ -680,6 +683,32 @@ void ib_unregister_device(struct ib_device *device)
 }
 EXPORT_SYMBOL(ib_unregister_device);
 
+static int assign_client_id(struct ib_client *client)
+{
+	int ret;
+
+	/*
+	 * The add/remove callbacks must be called in FIFO/LIFO order. To
+	 * achieve this we assign client_ids so they are sorted in
+	 * registration order, and retain a linked list we can reverse iterate
+	 * to get the LIFO order. The extra linked list can go away if xarray
+	 * learns to reverse iterate.
+	 */
+	if (list_empty(&client_list))
+		client->client_id = 0;
+	else
+		client->client_id =
+			list_last_entry(&client_list, struct ib_client, list)
+				->client_id;
+	ret = xa_alloc(&clients, &client->client_id, INT_MAX, client,
+		       GFP_KERNEL);
+	if (ret)
+		goto out;
+
+out:
+	return ret;
+}
+
 /**
  * ib_register_client - Register an IB client
  * @client:Client to register
@@ -696,15 +725,21 @@ EXPORT_SYMBOL(ib_unregister_device);
 int ib_register_client(struct ib_client *client)
 {
 	struct ib_device *device;
+	int ret;
 
 	mutex_lock(&device_mutex);
+	ret = assign_client_id(client);
+	if (ret) {
+		mutex_unlock(&device_mutex);
+		return ret;
+	}
 
 	list_for_each_entry(device, &device_list, core_list)
 		if (!add_client_context(device, client) && client->add)
 			client->add(device);
 
 	down_write(&lists_rwsem);
-	list_add_tail(&client->list, &client_list);
+	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
 	up_write(&lists_rwsem);
 
 	mutex_unlock(&device_mutex);
@@ -729,7 +764,7 @@ void ib_unregister_client(struct ib_client *client)
 	mutex_lock(&device_mutex);
 
 	down_write(&lists_rwsem);
-	list_del(&client->list);
+	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
 	up_write(&lists_rwsem);
 
 	list_for_each_entry(device, &device_list, core_list) {
@@ -765,6 +800,10 @@ void ib_unregister_client(struct ib_client *client)
 		kfree(found_context);
 	}
 
+	down_write(&lists_rwsem);
+	list_del(&client->list);
+	xa_erase(&clients, client->client_id);
+	up_write(&lists_rwsem);
 	mutex_unlock(&device_mutex);
 }
 EXPORT_SYMBOL(ib_unregister_client);
@@ -1422,6 +1461,7 @@ static void __exit ib_core_cleanup(void)
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
+	WARN_ON(!xa_empty(&clients));
 }
 
 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index d8ba987e8b29..cc15820513cd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2610,7 +2610,7 @@ struct ib_device {
 };
 
 struct ib_client {
-	char  *name;
+	const char *name;
 	void (*add)   (struct ib_device *);
 	void (*remove)(struct ib_device *, void *client_data);
 
@@ -2637,6 +2637,7 @@ struct ib_client {
 			const struct sockaddr *addr,
 			void *client_data);
 	struct list_head list;
+	u32 client_id;
 
 	/* kverbs are not required by the client */
 	u8 no_kverbs_req:1;
-- 
cgit v1.2.3


From 0df91bb67334eebaf73d4ba32567e16d55f4f116 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 6 Feb 2019 22:41:53 -0700
Subject: RDMA/devices: Use xarray to store the client_data

Now that we have a small ID for each client we can use xarray instead of
linearly searching linked lists for client data. This will give much
faster and scalable client data lookup, and will lets us revise the
locking scheme.

Since xarray can store 'going_down' using a mark just entirely eliminate
the struct ib_client_data and directly store the client_data value in the
xarray. However this does require a special iterator as we must still
iterate over any NULL client_data values.

Also eliminate the client_data_lock in favour of internal xarray locking.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c | 348 +++++++++++++++++++--------------------
 include/rdma/ib_verbs.h          |  23 ++-
 2 files changed, 186 insertions(+), 185 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f87d85659359..5096593b99e9 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -51,30 +51,72 @@ MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("core kernel InfiniBand API");
 MODULE_LICENSE("Dual BSD/GPL");
 
-struct ib_client_data {
-	struct list_head  list;
-	struct ib_client *client;
-	void *            data;
-	/* The device or client is going down. Do not call client or device
-	 * callbacks other than remove(). */
-	bool		  going_down;
-};
-
 struct workqueue_struct *ib_comp_wq;
 struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
-/* The device_list and clients contain devices and clients after their
- * registration has completed, and the devices and clients are removed
- * during unregistration. */
-static LIST_HEAD(device_list);
+/*
+ * devices contains devices that have had their names assigned. The
+ * devices may not be registered. Users that care about the registration
+ * status need to call ib_device_try_get() on the device to ensure it is
+ * registered, and keep it registered, for the required duration.
+ *
+ */
+static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
+
+/*
+ * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the
+ * object is guaranteed to be and remain registered for the duration of the
+ * lock.
+ */
+#define DEVICE_REGISTERED XA_MARK_1
+
 static LIST_HEAD(client_list);
 #define CLIENT_REGISTERED XA_MARK_1
 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
 
 /*
- * device_mutex and lists_rwsem protect access to both device_list and
+ * If client_data is registered then the corresponding client must also still
+ * be registered.
+ */
+#define CLIENT_DATA_REGISTERED XA_MARK_1
+/*
+ * xarray has this behavior where it won't iterate over NULL values stored in
+ * allocated arrays.  So we need our own iterator to see all values stored in
+ * the array. This does the same thing as xa_for_each except that it also
+ * returns NULL valued entries if the array is allocating. Simplified to only
+ * work on simple xarrays.
+ */
+static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
+			     xa_mark_t filter)
+{
+	XA_STATE(xas, xa, *indexp);
+	void *entry;
+
+	rcu_read_lock();
+	do {
+		entry = xas_find_marked(&xas, ULONG_MAX, filter);
+		if (xa_is_zero(entry))
+			break;
+	} while (xas_retry(&xas, entry));
+	rcu_read_unlock();
+
+	if (entry) {
+		*indexp = xas.xa_index;
+		if (xa_is_zero(entry))
+			return NULL;
+		return entry;
+	}
+	return XA_ERROR(-ENOENT);
+}
+#define xan_for_each_marked(xa, index, entry, filter)                          \
+	for (index = 0, entry = xan_find_marked(xa, &(index), filter);         \
+	     !xa_is_err(entry);                                                \
+	     (index)++, entry = xan_find_marked(xa, &(index), filter))
+
+/*
+ * device_mutex and lists_rwsem protect access to both devices and
  * clients.  device_mutex protects writer access by device and client
  * registration / de-registration.  lists_rwsem protects reader access to
  * these lists.  Iterators of these lists must lock it for read, while updates
@@ -135,17 +177,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
 	return 0;
 }
 
-static struct ib_device *__ib_device_get_by_index(u32 index)
-{
-	struct ib_device *device;
-
-	list_for_each_entry(device, &device_list, core_list)
-		if (device->index == index)
-			return device;
-
-	return NULL;
-}
-
 /*
  * Caller must perform ib_device_put() to return the device reference count
  * when ib_device_get_by_index() returns valid device pointer.
@@ -155,7 +186,7 @@ struct ib_device *ib_device_get_by_index(u32 index)
 	struct ib_device *device;
 
 	down_read(&lists_rwsem);
-	device = __ib_device_get_by_index(index);
+	device = xa_load(&devices, index);
 	if (device) {
 		if (!ib_device_try_get(device))
 			device = NULL;
@@ -181,8 +212,9 @@ EXPORT_SYMBOL(ib_device_put);
 static struct ib_device *__ib_device_get_by_name(const char *name)
 {
 	struct ib_device *device;
+	unsigned long index;
 
-	list_for_each_entry(device, &device_list, core_list)
+	xa_for_each (&devices, index, device)
 		if (!strcmp(name, dev_name(&device->dev)))
 			return device;
 
@@ -216,12 +248,13 @@ out:
 static int alloc_name(struct ib_device *ibdev, const char *name)
 {
 	struct ib_device *device;
+	unsigned long index;
 	struct ida inuse;
 	int rc;
 	int i;
 
 	ida_init(&inuse);
-	list_for_each_entry(device, &device_list, core_list) {
+	xa_for_each (&devices, index, device) {
 		char buf[IB_DEVICE_NAME_MAX];
 
 		if (sscanf(dev_name(&device->dev), name, &i) != 1)
@@ -256,6 +289,7 @@ static void ib_device_release(struct device *device)
 	ib_security_release_port_pkey_list(dev);
 	kfree(dev->port_pkey_list);
 	kfree(dev->port_immutable);
+	xa_destroy(&dev->client_data);
 	kfree(dev);
 }
 
@@ -306,8 +340,11 @@ struct ib_device *_ib_alloc_device(size_t size)
 
 	INIT_LIST_HEAD(&device->event_handler_list);
 	spin_lock_init(&device->event_handler_lock);
-	rwlock_init(&device->client_data_lock);
-	INIT_LIST_HEAD(&device->client_data_list);
+	/*
+	 * client_data needs to be alloc because we don't want our mark to be
+	 * destroyed if the user stores NULL in the client data.
+	 */
+	xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
 	INIT_LIST_HEAD(&device->port_list);
 	init_completion(&device->unreg_completion);
 
@@ -323,7 +360,7 @@ EXPORT_SYMBOL(_ib_alloc_device);
  */
 void ib_dealloc_device(struct ib_device *device)
 {
-	WARN_ON(!list_empty(&device->client_data_list));
+	WARN_ON(!xa_empty(&device->client_data));
 	WARN_ON(refcount_read(&device->refcount));
 	rdma_restrack_clean(device);
 	put_device(&device->dev);
@@ -332,26 +369,20 @@ EXPORT_SYMBOL(ib_dealloc_device);
 
 static int add_client_context(struct ib_device *device, struct ib_client *client)
 {
-	struct ib_client_data *context;
+	void *entry;
 
 	if (!device->kverbs_provider && !client->no_kverbs_req)
 		return -EOPNOTSUPP;
 
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return -ENOMEM;
-
-	context->client = client;
-	context->data   = NULL;
-	context->going_down = false;
-
 	down_write(&lists_rwsem);
-	write_lock_irq(&device->client_data_lock);
-	list_add(&context->list, &device->client_data_list);
-	write_unlock_irq(&device->client_data_lock);
+	entry = xa_store(&device->client_data, client->client_id, NULL,
+			 GFP_KERNEL);
+	if (!xa_is_err(entry))
+		xa_set_mark(&device->client_data, client->client_id,
+			    CLIENT_DATA_REGISTERED);
 	up_write(&lists_rwsem);
 
-	return 0;
+	return xa_err(entry);
 }
 
 static int verify_immutable(const struct ib_device *dev, u8 port)
@@ -428,9 +459,10 @@ static int setup_port_pkey_list(struct ib_device *device)
 static void ib_policy_change_task(struct work_struct *work)
 {
 	struct ib_device *dev;
+	unsigned long index;
 
 	down_read(&lists_rwsem);
-	list_for_each_entry(dev, &device_list, core_list) {
+	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
 		int i;
 
 		for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) {
@@ -461,28 +493,48 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
 	return NOTIFY_OK;
 }
 
-/**
- *	__dev_new_index	-	allocate an device index
- *
- *	Returns a suitable unique value for a new device interface
- *	number.  It assumes that there are less than 2^32-1 ib devices
- *	will be present in the system.
+/*
+ * Assign the unique string device name and the unique device index.
  */
-static u32 __dev_new_index(void)
+static int assign_name(struct ib_device *device, const char *name)
 {
-	/*
-	 * The device index to allow stable naming.
-	 * Similar to struct net -> ifindex.
-	 */
-	static u32 index;
+	static u32 last_id;
+	int ret;
 
-	for (;;) {
-		if (!(++index))
-			index = 1;
+	/* Assign a unique name to the device */
+	if (strchr(name, '%'))
+		ret = alloc_name(device, name);
+	else
+		ret = dev_set_name(&device->dev, name);
+	if (ret)
+		goto out;
+
+	if (__ib_device_get_by_name(dev_name(&device->dev))) {
+		ret = -ENFILE;
+		goto out;
+	}
+	strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
 
-		if (!__ib_device_get_by_index(index))
-			return index;
+	/* Cyclically allocate a user visible ID for the device */
+	device->index = last_id;
+	ret = xa_alloc(&devices, &device->index, INT_MAX, device, GFP_KERNEL);
+	if (ret == -ENOSPC) {
+		device->index = 0;
+		ret = xa_alloc(&devices, &device->index, INT_MAX, device,
+			       GFP_KERNEL);
 	}
+	if (ret)
+		goto out;
+	last_id = device->index + 1;
+
+	ret = 0;
+out:
+	return ret;
+}
+
+static void release_name(struct ib_device *device)
+{
+	xa_erase(&devices, device->index);
 }
 
 static void setup_dma_device(struct ib_device *device)
@@ -572,34 +624,21 @@ int ib_register_device(struct ib_device *device, const char *name)
 
 	mutex_lock(&device_mutex);
 
-	if (strchr(name, '%')) {
-		ret = alloc_name(device, name);
-		if (ret)
-			goto out;
-	} else {
-		ret = dev_set_name(&device->dev, name);
-		if (ret)
-			goto out;
-	}
-	if (__ib_device_get_by_name(dev_name(&device->dev))) {
-		ret = -ENFILE;
+	ret = assign_name(device, name);
+	if (ret)
 		goto out;
-	}
-	strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX);
 
 	ret = setup_device(device);
 	if (ret)
-		goto out;
+		goto out_name;
 
 	ret = ib_cache_setup_one(device);
 	if (ret) {
 		dev_warn(&device->dev,
 			 "Couldn't set up InfiniBand P_Key/GID cache\n");
-		goto out;
+		goto out_name;
 	}
 
-	device->index = __dev_new_index();
-
 	ib_device_register_rdmacg(device);
 
 	ret = ib_device_register_sysfs(device);
@@ -616,7 +655,7 @@ int ib_register_device(struct ib_device *device, const char *name)
 			client->add(device);
 
 	down_write(&lists_rwsem);
-	list_add_tail(&device->core_list, &device_list);
+	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
 	up_write(&lists_rwsem);
 	mutex_unlock(&device_mutex);
 	return 0;
@@ -624,6 +663,8 @@ int ib_register_device(struct ib_device *device, const char *name)
 cg_cleanup:
 	ib_device_unregister_rdmacg(device);
 	ib_cache_cleanup_one(device);
+out_name:
+	release_name(device);
 out:
 	mutex_unlock(&device_mutex);
 	return ret;
@@ -638,8 +679,8 @@ EXPORT_SYMBOL(ib_register_device);
  */
 void ib_unregister_device(struct ib_device *device)
 {
-	struct ib_client_data *context, *tmp;
-	unsigned long flags;
+	struct ib_client *client;
+	unsigned long index;
 
 	/*
 	 * Wait for all netlink command callers to finish working on the
@@ -651,34 +692,31 @@ void ib_unregister_device(struct ib_device *device)
 	mutex_lock(&device_mutex);
 
 	down_write(&lists_rwsem);
-	list_del(&device->core_list);
-	write_lock_irq(&device->client_data_lock);
-	list_for_each_entry(context, &device->client_data_list, list)
-		context->going_down = true;
-	write_unlock_irq(&device->client_data_lock);
+	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
+	xa_for_each (&clients, index, client)
+		xa_clear_mark(&device->client_data, index,
+			      CLIENT_DATA_REGISTERED);
 	downgrade_write(&lists_rwsem);
 
-	list_for_each_entry(context, &device->client_data_list, list) {
-		if (context->client->remove)
-			context->client->remove(device, context->data);
-	}
+	list_for_each_entry_reverse(client, &client_list, list)
+		if (xa_get_mark(&device->client_data, client->client_id,
+				CLIENT_DATA_REGISTERED) &&
+		    client->remove)
+			client->remove(device, xa_load(&device->client_data,
+						       client->client_id));
 	up_read(&lists_rwsem);
 
 	ib_device_unregister_sysfs(device);
 	ib_device_unregister_rdmacg(device);
 
+	release_name(device);
+
 	mutex_unlock(&device_mutex);
 
 	ib_cache_cleanup_one(device);
 
 	down_write(&lists_rwsem);
-	write_lock_irqsave(&device->client_data_lock, flags);
-	list_for_each_entry_safe(context, tmp, &device->client_data_list,
-				 list) {
-		list_del(&context->list);
-		kfree(context);
-	}
-	write_unlock_irqrestore(&device->client_data_lock, flags);
+	xa_destroy(&device->client_data);
 	up_write(&lists_rwsem);
 }
 EXPORT_SYMBOL(ib_unregister_device);
@@ -725,6 +763,7 @@ out:
 int ib_register_client(struct ib_client *client)
 {
 	struct ib_device *device;
+	unsigned long index;
 	int ret;
 
 	mutex_lock(&device_mutex);
@@ -734,7 +773,7 @@ int ib_register_client(struct ib_client *client)
 		return ret;
 	}
 
-	list_for_each_entry(device, &device_list, core_list)
+	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED)
 		if (!add_client_context(device, client) && client->add)
 			client->add(device);
 
@@ -758,8 +797,8 @@ EXPORT_SYMBOL(ib_register_client);
  */
 void ib_unregister_client(struct ib_client *client)
 {
-	struct ib_client_data *context;
 	struct ib_device *device;
+	unsigned long index;
 
 	mutex_lock(&device_mutex);
 
@@ -767,37 +806,19 @@ void ib_unregister_client(struct ib_client *client)
 	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
 	up_write(&lists_rwsem);
 
-	list_for_each_entry(device, &device_list, core_list) {
-		struct ib_client_data *found_context = NULL;
-
+	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
 		down_write(&lists_rwsem);
-		write_lock_irq(&device->client_data_lock);
-		list_for_each_entry(context, &device->client_data_list, list)
-			if (context->client == client) {
-				context->going_down = true;
-				found_context = context;
-				break;
-			}
-		write_unlock_irq(&device->client_data_lock);
+		xa_clear_mark(&device->client_data, client->client_id,
+			      CLIENT_DATA_REGISTERED);
 		up_write(&lists_rwsem);
 
 		if (client->remove)
-			client->remove(device, found_context ?
-					       found_context->data : NULL);
-
-		if (!found_context) {
-			dev_warn(&device->dev,
-				 "No client context found for %s\n",
-				 client->name);
-			continue;
-		}
+			client->remove(device, xa_load(&device->client_data,
+						       client->client_id));
 
 		down_write(&lists_rwsem);
-		write_lock_irq(&device->client_data_lock);
-		list_del(&found_context->list);
-		write_unlock_irq(&device->client_data_lock);
+		xa_erase(&device->client_data, client->client_id);
 		up_write(&lists_rwsem);
-		kfree(found_context);
 	}
 
 	down_write(&lists_rwsem);
@@ -808,59 +829,28 @@ void ib_unregister_client(struct ib_client *client)
 }
 EXPORT_SYMBOL(ib_unregister_client);
 
-/**
- * ib_get_client_data - Get IB client context
- * @device:Device to get context for
- * @client:Client to get context for
- *
- * ib_get_client_data() returns client context set with
- * ib_set_client_data().
- */
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
-{
-	struct ib_client_data *context;
-	void *ret = NULL;
-	unsigned long flags;
-
-	read_lock_irqsave(&device->client_data_lock, flags);
-	list_for_each_entry(context, &device->client_data_list, list)
-		if (context->client == client) {
-			ret = context->data;
-			break;
-		}
-	read_unlock_irqrestore(&device->client_data_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL(ib_get_client_data);
-
 /**
  * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
  *
- * ib_set_client_data() sets client context that can be retrieved with
- * ib_get_client_data().
+ * ib_set_client_data() sets client context data that can be retrieved with
+ * ib_get_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
  */
 void ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			void *data)
 {
-	struct ib_client_data *context;
-	unsigned long flags;
-
-	write_lock_irqsave(&device->client_data_lock, flags);
-	list_for_each_entry(context, &device->client_data_list, list)
-		if (context->client == client) {
-			context->data = data;
-			goto out;
-		}
+	void *rc;
 
-	dev_warn(&device->dev, "No client context found for %s\n",
-		 client->name);
+	if (WARN_ON(IS_ERR(data)))
+		data = NULL;
 
-out:
-	write_unlock_irqrestore(&device->client_data_lock, flags);
+	rc = xa_store(&device->client_data, client->client_id, data,
+		      GFP_KERNEL);
+	WARN_ON(xa_is_err(rc));
 }
 EXPORT_SYMBOL(ib_set_client_data);
 
@@ -1018,9 +1008,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
 			      void *cookie)
 {
 	struct ib_device *dev;
+	unsigned long index;
 
 	down_read(&lists_rwsem);
-	list_for_each_entry(dev, &device_list, core_list)
+	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
 		ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
 	up_read(&lists_rwsem);
 }
@@ -1034,12 +1025,13 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
 int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
 		     struct netlink_callback *cb)
 {
+	unsigned long index;
 	struct ib_device *dev;
 	unsigned int idx = 0;
 	int ret = 0;
 
 	down_read(&lists_rwsem);
-	list_for_each_entry(dev, &device_list, core_list) {
+	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
 		ret = nldev_cb(dev, skb, cb, idx);
 		if (ret)
 			break;
@@ -1212,26 +1204,25 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 					    const struct sockaddr *addr)
 {
 	struct net_device *net_dev = NULL;
-	struct ib_client_data *context;
+	unsigned long index;
+	void *client_data;
 
 	if (!rdma_protocol_ib(dev, port))
 		return NULL;
 
 	down_read(&lists_rwsem);
 
-	list_for_each_entry(context, &dev->client_data_list, list) {
-		struct ib_client *client = context->client;
+	xan_for_each_marked (&dev->client_data, index, client_data,
+			     CLIENT_DATA_REGISTERED) {
+		struct ib_client *client = xa_load(&clients, index);
 
-		if (context->going_down)
+		if (!client || !client->get_net_dev_by_params)
 			continue;
 
-		if (client->get_net_dev_by_params) {
-			net_dev = client->get_net_dev_by_params(dev, port, pkey,
-								gid, addr,
-								context->data);
-			if (net_dev)
-				break;
-		}
+		net_dev = client->get_net_dev_by_params(dev, port, pkey, gid,
+							addr, client_data);
+		if (net_dev)
+			break;
 	}
 
 	up_read(&lists_rwsem);
@@ -1462,6 +1453,7 @@ static void __exit ib_core_cleanup(void)
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
 	WARN_ON(!xa_empty(&clients));
+	WARN_ON(!xa_empty(&devices));
 }
 
 MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_LS, 4);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index cc15820513cd..8558f31ca46f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2542,12 +2542,7 @@ struct ib_device {
 	struct list_head              event_handler_list;
 	spinlock_t                    event_handler_lock;
 
-	rwlock_t			client_data_lock;
-	struct list_head              core_list;
-	/* Access to the client_data_list is protected by the client_data_lock
-	 * rwlock and the lists_rwsem read-write semaphore
-	 */
-	struct list_head              client_data_list;
+	struct xarray                 client_data;
 
 	struct ib_cache               cache;
 	/**
@@ -2660,7 +2655,21 @@ void ib_unregister_device(struct ib_device *device);
 int ib_register_client   (struct ib_client *client);
 void ib_unregister_client(struct ib_client *client);
 
-void *ib_get_client_data(struct ib_device *device, struct ib_client *client);
+/**
+ * ib_get_client_data - Get IB client context
+ * @device:Device to get context for
+ * @client:Client to get context for
+ *
+ * ib_get_client_data() returns the client context data set with
+ * ib_set_client_data(). This can only be called while the client is
+ * registered to the device, once the ib_client remove() callback returns this
+ * cannot be called.
+ */
+static inline void *ib_get_client_data(struct ib_device *device,
+				       struct ib_client *client)
+{
+	return xa_load(&device->client_data, client->client_id);
+}
 void  ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			 void *data);
 void ib_set_device_ops(struct ib_device *device,
-- 
cgit v1.2.3


From 921eab1143aadf976a42cac4605b4d35159b355d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 6 Feb 2019 22:41:54 -0700
Subject: RDMA/devices: Re-organize device.c locking

The locking here started out with a single lock that covered everything
and then has lately veered into crazy town.

The fundamental problem is that several places need to iterate over a
linked list, but also need to drop their locks to avoid deadlock during
client callbacks.

xarray's restartable iteration offers a simple solution to the
problem. Once all the lists are xarrays we can drop locks in the places
that need that and rely on xarray to provide consistency and locking for
the data structure.

The resulting simplification is that each of the three lists has a
dedicated rwsem that must be held when working with the list it
covers. One data structure is no longer covered by multiple locks.

The sleeping semaphore is selected because the read side generally needs
to be held over something sleeping, and using RCU reader locking in those
cases is overkill.

In the process this simplifies the entire registration/unregistration flow
to be the expected list of setups and the reversed list of matching
teardowns, and the registration lock 'refcount' can now be revised to be
released after the ULPs are removed, providing a very sane semantic for
this feature.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c | 361 ++++++++++++++++++++++++---------------
 include/rdma/ib_verbs.h          |   1 +
 2 files changed, 222 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5096593b99e9..3325be4f91a5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -37,7 +37,6 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
@@ -56,6 +55,29 @@ struct workqueue_struct *ib_comp_unbound_wq;
 struct workqueue_struct *ib_wq;
 EXPORT_SYMBOL_GPL(ib_wq);
 
+/*
+ * Each of the three rwsem locks (devices, clients, client_data) protects the
+ * xarray of the same name. Specifically it allows the caller to assert that
+ * the MARK will/will not be changing under the lock, and for devices and
+ * clients, that the value in the xarray is still a valid pointer. Change of
+ * the MARK is linked to the object state, so holding the lock and testing the
+ * MARK also asserts that the contained object is in a certain state.
+ *
+ * This is used to build a two stage register/unregister flow where objects
+ * can continue to be in the xarray even though they are still in progress to
+ * register/unregister.
+ *
+ * The xarray itself provides additional locking, and restartable iteration,
+ * which is also relied on.
+ *
+ * Locks should not be nested, with the exception of client_data, which is
+ * allowed to nest under the read side of the other two locks.
+ *
+ * The devices_rwsem also protects the device name list, any change or
+ * assignment of device name must also hold the write side to guarantee unique
+ * names.
+ */
+
 /*
  * devices contains devices that have had their names assigned. The
  * devices may not be registered. Users that care about the registration
@@ -64,17 +86,13 @@ EXPORT_SYMBOL_GPL(ib_wq);
  *
  */
 static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
-
-/*
- * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the
- * object is guaranteed to be and remain registered for the duration of the
- * lock.
- */
+static DECLARE_RWSEM(devices_rwsem);
 #define DEVICE_REGISTERED XA_MARK_1
 
 static LIST_HEAD(client_list);
 #define CLIENT_REGISTERED XA_MARK_1
 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
+static DECLARE_RWSEM(clients_rwsem);
 
 /*
  * If client_data is registered then the corresponding client must also still
@@ -115,20 +133,6 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
 	     !xa_is_err(entry);                                                \
 	     (index)++, entry = xan_find_marked(xa, &(index), filter))
 
-/*
- * device_mutex and lists_rwsem protect access to both devices and
- * clients.  device_mutex protects writer access by device and client
- * registration / de-registration.  lists_rwsem protects reader access to
- * these lists.  Iterators of these lists must lock it for read, while updates
- * to the lists must be done with a write lock. A special case is when the
- * device_mutex is locked. In this case locking the lists for read access is
- * not necessary as the device_mutex implies it.
- *
- * lists_rwsem also protects access to the client data list.
- */
-static DEFINE_MUTEX(device_mutex);
-static DECLARE_RWSEM(lists_rwsem);
-
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
 static void ib_policy_change_task(struct work_struct *work);
@@ -185,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index)
 {
 	struct ib_device *device;
 
-	down_read(&lists_rwsem);
+	down_read(&devices_rwsem);
 	device = xa_load(&devices, index);
 	if (device) {
 		if (!ib_device_try_get(device))
 			device = NULL;
 	}
-	up_read(&lists_rwsem);
+	up_read(&devices_rwsem);
 	return device;
 }
 
@@ -225,7 +229,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
 {
 	int ret;
 
-	mutex_lock(&device_mutex);
+	down_write(&devices_rwsem);
 	if (!strcmp(name, dev_name(&ibdev->dev))) {
 		ret = 0;
 		goto out;
@@ -241,7 +245,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
 		goto out;
 	strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
 out:
-	mutex_unlock(&device_mutex);
+	up_write(&devices_rwsem);
 	return ret;
 }
 
@@ -253,6 +257,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
 	int rc;
 	int i;
 
+	lockdep_assert_held_exclusive(&devices_rwsem);
 	ida_init(&inuse);
 	xa_for_each (&devices, index, device) {
 		char buf[IB_DEVICE_NAME_MAX];
@@ -345,6 +350,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 	 * destroyed if the user stores NULL in the client data.
 	 */
 	xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
+	init_rwsem(&device->client_data_rwsem);
 	INIT_LIST_HEAD(&device->port_list);
 	init_completion(&device->unreg_completion);
 
@@ -367,22 +373,86 @@ void ib_dealloc_device(struct ib_device *device)
 }
 EXPORT_SYMBOL(ib_dealloc_device);
 
-static int add_client_context(struct ib_device *device, struct ib_client *client)
+/*
+ * add_client_context() and remove_client_context() must be safe against
+ * parallel calls on the same device - registration/unregistration of both the
+ * device and client can be occurring in parallel.
+ *
+ * The routines need to be a fence, any caller must not return until the add
+ * or remove is fully completed.
+ */
+static int add_client_context(struct ib_device *device,
+			      struct ib_client *client)
 {
-	void *entry;
+	int ret = 0;
 
 	if (!device->kverbs_provider && !client->no_kverbs_req)
-		return -EOPNOTSUPP;
+		return 0;
+
+	down_write(&device->client_data_rwsem);
+	/*
+	 * Another caller to add_client_context got here first and has already
+	 * completely initialized context.
+	 */
+	if (xa_get_mark(&device->client_data, client->client_id,
+		    CLIENT_DATA_REGISTERED))
+		goto out;
+
+	ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
+			      GFP_KERNEL));
+	if (ret)
+		goto out;
+	downgrade_write(&device->client_data_rwsem);
+	if (client->add)
+		client->add(device);
+
+	/* Readers shall not see a client until add has been completed */
+	xa_set_mark(&device->client_data, client->client_id,
+		    CLIENT_DATA_REGISTERED);
+	up_read(&device->client_data_rwsem);
+	return 0;
+
+out:
+	up_write(&device->client_data_rwsem);
+	return ret;
+}
+
+static void remove_client_context(struct ib_device *device,
+				  unsigned int client_id)
+{
+	struct ib_client *client;
+	void *client_data;
 
-	down_write(&lists_rwsem);
-	entry = xa_store(&device->client_data, client->client_id, NULL,
-			 GFP_KERNEL);
-	if (!xa_is_err(entry))
-		xa_set_mark(&device->client_data, client->client_id,
-			    CLIENT_DATA_REGISTERED);
-	up_write(&lists_rwsem);
+	down_write(&device->client_data_rwsem);
+	if (!xa_get_mark(&device->client_data, client_id,
+			 CLIENT_DATA_REGISTERED)) {
+		up_write(&device->client_data_rwsem);
+		return;
+	}
+	client_data = xa_load(&device->client_data, client_id);
+	xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
+	client = xa_load(&clients, client_id);
+	downgrade_write(&device->client_data_rwsem);
 
-	return xa_err(entry);
+	/*
+	 * Notice we cannot be holding any exclusive locks when calling the
+	 * remove callback as the remove callback can recurse back into any
+	 * public functions in this module and thus try for any locks those
+	 * functions take.
+	 *
+	 * For this reason clients and drivers should not call the
+	 * unregistration functions will holdling any locks.
+	 *
+	 * It tempting to drop the client_data_rwsem too, but this is required
+	 * to ensure that unregister_client does not return until all clients
+	 * are completely unregistered, which is required to avoid module
+	 * unloading races.
+	 */
+	if (client->remove)
+		client->remove(device, client_data);
+
+	xa_erase(&device->client_data, client_id);
+	up_read(&device->client_data_rwsem);
 }
 
 static int verify_immutable(const struct ib_device *dev, u8 port)
@@ -461,7 +531,7 @@ static void ib_policy_change_task(struct work_struct *work)
 	struct ib_device *dev;
 	unsigned long index;
 
-	down_read(&lists_rwsem);
+	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
 		int i;
 
@@ -478,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work)
 				ib_security_cache_change(dev, i, sp);
 		}
 	}
-	up_read(&lists_rwsem);
+	up_read(&devices_rwsem);
 }
 
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
@@ -501,6 +571,7 @@ static int assign_name(struct ib_device *device, const char *name)
 	static u32 last_id;
 	int ret;
 
+	down_write(&devices_rwsem);
 	/* Assign a unique name to the device */
 	if (strchr(name, '%'))
 		ret = alloc_name(device, name);
@@ -528,13 +599,17 @@ static int assign_name(struct ib_device *device, const char *name)
 	last_id = device->index + 1;
 
 	ret = 0;
+
 out:
+	up_write(&devices_rwsem);
 	return ret;
 }
 
 static void release_name(struct ib_device *device)
 {
+	down_write(&devices_rwsem);
 	xa_erase(&devices, device->index);
+	up_write(&devices_rwsem);
 }
 
 static void setup_dma_device(struct ib_device *device)
@@ -572,11 +647,18 @@ static void setup_dma_device(struct ib_device *device)
 	}
 }
 
+/*
+ * setup_device() allocates memory and sets up data that requires calling the
+ * device ops, this is the only reason these actions are not done during
+ * ib_alloc_device. It is undone by ib_dealloc_device().
+ */
 static int setup_device(struct ib_device *device)
 {
 	struct ib_udata uhw = {.outlen = 0, .inlen = 0};
 	int ret;
 
+	setup_dma_device(device);
+
 	ret = ib_device_check_mandatory(device);
 	if (ret)
 		return ret;
@@ -605,6 +687,54 @@ static int setup_device(struct ib_device *device)
 	return 0;
 }
 
+static void disable_device(struct ib_device *device)
+{
+	struct ib_client *client;
+
+	WARN_ON(!refcount_read(&device->refcount));
+
+	down_write(&devices_rwsem);
+	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
+	up_write(&devices_rwsem);
+
+	down_read(&clients_rwsem);
+	list_for_each_entry_reverse(client, &client_list, list)
+		remove_client_context(device, client->client_id);
+	up_read(&clients_rwsem);
+
+	/* Pairs with refcount_set in enable_device */
+	ib_device_put(device);
+	wait_for_completion(&device->unreg_completion);
+}
+
+/*
+ * An enabled device is visible to all clients and to all the public facing
+ * APIs that return a device pointer.
+ */
+static int enable_device(struct ib_device *device)
+{
+	struct ib_client *client;
+	unsigned long index;
+	int ret;
+
+	refcount_set(&device->refcount, 1);
+	down_write(&devices_rwsem);
+	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
+	up_write(&devices_rwsem);
+
+	down_read(&clients_rwsem);
+	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
+		ret = add_client_context(device, client);
+		if (ret) {
+			up_read(&clients_rwsem);
+			disable_device(device);
+			return ret;
+		}
+	}
+	up_read(&clients_rwsem);
+	return 0;
+}
+
 /**
  * ib_register_device - Register an IB device with IB core
  * @device:Device to register
@@ -617,26 +747,20 @@ static int setup_device(struct ib_device *device)
 int ib_register_device(struct ib_device *device, const char *name)
 {
 	int ret;
-	struct ib_client *client;
-	unsigned long index;
-
-	setup_dma_device(device);
-
-	mutex_lock(&device_mutex);
 
 	ret = assign_name(device, name);
 	if (ret)
-		goto out;
+		return ret;
 
 	ret = setup_device(device);
 	if (ret)
-		goto out_name;
+		goto out;
 
 	ret = ib_cache_setup_one(device);
 	if (ret) {
 		dev_warn(&device->dev,
 			 "Couldn't set up InfiniBand P_Key/GID cache\n");
-		goto out_name;
+		goto out;
 	}
 
 	ib_device_register_rdmacg(device);
@@ -648,25 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name)
 		goto cg_cleanup;
 	}
 
-	refcount_set(&device->refcount, 1);
-
-	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED)
-		if (!add_client_context(device, client) && client->add)
-			client->add(device);
+	ret = enable_device(device);
+	if (ret)
+		goto sysfs_cleanup;
 
-	down_write(&lists_rwsem);
-	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
-	up_write(&lists_rwsem);
-	mutex_unlock(&device_mutex);
 	return 0;
 
+sysfs_cleanup:
+	ib_device_unregister_sysfs(device);
 cg_cleanup:
 	ib_device_unregister_rdmacg(device);
 	ib_cache_cleanup_one(device);
-out_name:
-	release_name(device);
 out:
-	mutex_unlock(&device_mutex);
+	release_name(device);
 	return ret;
 }
 EXPORT_SYMBOL(ib_register_device);
@@ -679,45 +797,11 @@ EXPORT_SYMBOL(ib_register_device);
  */
 void ib_unregister_device(struct ib_device *device)
 {
-	struct ib_client *client;
-	unsigned long index;
-
-	/*
-	 * Wait for all netlink command callers to finish working on the
-	 * device.
-	 */
-	ib_device_put(device);
-	wait_for_completion(&device->unreg_completion);
-
-	mutex_lock(&device_mutex);
-
-	down_write(&lists_rwsem);
-	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
-	xa_for_each (&clients, index, client)
-		xa_clear_mark(&device->client_data, index,
-			      CLIENT_DATA_REGISTERED);
-	downgrade_write(&lists_rwsem);
-
-	list_for_each_entry_reverse(client, &client_list, list)
-		if (xa_get_mark(&device->client_data, client->client_id,
-				CLIENT_DATA_REGISTERED) &&
-		    client->remove)
-			client->remove(device, xa_load(&device->client_data,
-						       client->client_id));
-	up_read(&lists_rwsem);
-
+	disable_device(device);
 	ib_device_unregister_sysfs(device);
 	ib_device_unregister_rdmacg(device);
-
-	release_name(device);
-
-	mutex_unlock(&device_mutex);
-
 	ib_cache_cleanup_one(device);
-
-	down_write(&lists_rwsem);
-	xa_destroy(&device->client_data);
-	up_write(&lists_rwsem);
+	release_name(device);
 }
 EXPORT_SYMBOL(ib_unregister_device);
 
@@ -725,6 +809,7 @@ static int assign_client_id(struct ib_client *client)
 {
 	int ret;
 
+	down_write(&clients_rwsem);
 	/*
 	 * The add/remove callbacks must be called in FIFO/LIFO order. To
 	 * achieve this we assign client_ids so they are sorted in
@@ -743,7 +828,11 @@ static int assign_client_id(struct ib_client *client)
 	if (ret)
 		goto out;
 
+	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
+	list_add_tail(&client->list, &client_list);
+
 out:
+	up_write(&clients_rwsem);
 	return ret;
 }
 
@@ -766,23 +855,20 @@ int ib_register_client(struct ib_client *client)
 	unsigned long index;
 	int ret;
 
-	mutex_lock(&device_mutex);
 	ret = assign_client_id(client);
-	if (ret) {
-		mutex_unlock(&device_mutex);
+	if (ret)
 		return ret;
-	}
-
-	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED)
-		if (!add_client_context(device, client) && client->add)
-			client->add(device);
-
-	down_write(&lists_rwsem);
-	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
-	up_write(&lists_rwsem);
-
-	mutex_unlock(&device_mutex);
 
+	down_read(&devices_rwsem);
+	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
+		ret = add_client_context(device, client);
+		if (ret) {
+			up_read(&devices_rwsem);
+			ib_unregister_client(client);
+			return ret;
+		}
+	}
+	up_read(&devices_rwsem);
 	return 0;
 }
 EXPORT_SYMBOL(ib_register_client);
@@ -794,38 +880,31 @@ EXPORT_SYMBOL(ib_register_client);
  * Upper level users use ib_unregister_client() to remove their client
  * registration.  When ib_unregister_client() is called, the client
  * will receive a remove callback for each IB device still registered.
+ *
+ * This is a full fence, once it returns no client callbacks will be called,
+ * or are running in another thread.
  */
 void ib_unregister_client(struct ib_client *client)
 {
 	struct ib_device *device;
 	unsigned long index;
 
-	mutex_lock(&device_mutex);
-
-	down_write(&lists_rwsem);
+	down_write(&clients_rwsem);
 	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
-	up_write(&lists_rwsem);
-
-	xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
-		down_write(&lists_rwsem);
-		xa_clear_mark(&device->client_data, client->client_id,
-			      CLIENT_DATA_REGISTERED);
-		up_write(&lists_rwsem);
-
-		if (client->remove)
-			client->remove(device, xa_load(&device->client_data,
-						       client->client_id));
-
-		down_write(&lists_rwsem);
-		xa_erase(&device->client_data, client->client_id);
-		up_write(&lists_rwsem);
-	}
+	up_write(&clients_rwsem);
+	/*
+	 * Every device still known must be serialized to make sure we are
+	 * done with the client callbacks before we return.
+	 */
+	down_read(&devices_rwsem);
+	xa_for_each (&devices, index, device)
+		remove_client_context(device, client->client_id);
+	up_read(&devices_rwsem);
 
-	down_write(&lists_rwsem);
+	down_write(&clients_rwsem);
 	list_del(&client->list);
 	xa_erase(&clients, client->client_id);
-	up_write(&lists_rwsem);
-	mutex_unlock(&device_mutex);
+	up_write(&clients_rwsem);
 }
 EXPORT_SYMBOL(ib_unregister_client);
 
@@ -1010,10 +1089,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
 	struct ib_device *dev;
 	unsigned long index;
 
-	down_read(&lists_rwsem);
+	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
 		ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
-	up_read(&lists_rwsem);
+	up_read(&devices_rwsem);
 }
 
 /**
@@ -1030,15 +1109,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
 	unsigned int idx = 0;
 	int ret = 0;
 
-	down_read(&lists_rwsem);
+	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
 		ret = nldev_cb(dev, skb, cb, idx);
 		if (ret)
 			break;
 		idx++;
 	}
-
-	up_read(&lists_rwsem);
+	up_read(&devices_rwsem);
 	return ret;
 }
 
@@ -1196,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey);
  * @gid:	A GID that the net_dev uses to communicate.
  * @addr:	Contains the IP address that the request specified as its
  *		destination.
+ *
  */
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 					    u8 port,
@@ -1210,8 +1289,11 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 	if (!rdma_protocol_ib(dev, port))
 		return NULL;
 
-	down_read(&lists_rwsem);
-
+	/*
+	 * Holding the read side guarantees that the client will not become
+	 * unregistered while we are calling get_net_dev_by_params()
+	 */
+	down_read(&dev->client_data_rwsem);
 	xan_for_each_marked (&dev->client_data, index, client_data,
 			     CLIENT_DATA_REGISTERED) {
 		struct ib_client *client = xa_load(&clients, index);
@@ -1224,8 +1306,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 		if (net_dev)
 			break;
 	}
-
-	up_read(&lists_rwsem);
+	up_read(&dev->client_data_rwsem);
 
 	return net_dev;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8558f31ca46f..135fab2c016c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2542,6 +2542,7 @@ struct ib_device {
 	struct list_head              event_handler_list;
 	spinlock_t                    event_handler_lock;
 
+	struct rw_semaphore	      client_data_rwsem;
 	struct xarray                 client_data;
 
 	struct ib_cache               cache;
-- 
cgit v1.2.3


From f818b82b80164014d7ee3df89bb110808778c796 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 8 Feb 2019 14:02:45 -0500
Subject: XArray: Mark xa_insert and xa_reserve as must_check

If the user doesn't care about the return value from xa_insert(), then
they should be using xa_store() instead.  The point of xa_reserve() is
to get the return value early before taking another lock, so this should
also be __must_check.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 23 ++++++++++++-----------
 lib/test_xarray.c      | 10 +++++-----
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5ed6b462e754..687c150071a5 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -497,12 +497,13 @@ void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
-int __xa_insert(struct xarray *, unsigned long index, void *entry, gfp_t);
+int __must_check __xa_insert(struct xarray *, unsigned long index,
+		void *entry, gfp_t);
 int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry,
 		struct xa_limit, gfp_t);
 int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry,
 		struct xa_limit, u32 *next, gfp_t);
-int __xa_reserve(struct xarray *, unsigned long index, gfp_t);
+int __must_check __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -704,8 +705,8 @@ static inline void *xa_cmpxchg_irq(struct xarray *xa, unsigned long index,
  * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
-static inline int xa_insert(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
+static inline int __must_check xa_insert(struct xarray *xa,
+		unsigned long index, void *entry, gfp_t gfp)
 {
 	int err;
 
@@ -733,8 +734,8 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
  * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
-static inline int xa_insert_bh(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
+static inline int __must_check xa_insert_bh(struct xarray *xa,
+		unsigned long index, void *entry, gfp_t gfp)
 {
 	int err;
 
@@ -762,8 +763,8 @@ static inline int xa_insert_bh(struct xarray *xa, unsigned long index,
  * Return: 0 if the store succeeded.  -EBUSY if another entry was present.
  * -ENOMEM if memory could not be allocated.
  */
-static inline int xa_insert_irq(struct xarray *xa, unsigned long index,
-		void *entry, gfp_t gfp)
+static inline int __must_check xa_insert_irq(struct xarray *xa,
+		unsigned long index, void *entry, gfp_t gfp)
 {
 	int err;
 
@@ -978,7 +979,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
  * May sleep if the @gfp flags permit.
  * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
  */
-static inline
+static inline __must_check
 int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	int ret;
@@ -1002,7 +1003,7 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
  * disabling softirqs.
  * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
  */
-static inline
+static inline __must_check
 int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	int ret;
@@ -1026,7 +1027,7 @@ int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
  * disabling interrupts.
  * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
  */
-static inline
+static inline __must_check
 int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
 	int ret;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index eaf53f742c72..3eaa40ddc390 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -364,21 +364,21 @@ static noinline void check_reserve(struct xarray *xa)
 
 	/* An array with a reserved entry is not empty */
 	XA_BUG_ON(xa, !xa_empty(xa));
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_empty(xa));
 	XA_BUG_ON(xa, xa_load(xa, 12345678));
 	xa_release(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* Releasing a used entry does nothing */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* cmpxchg sees a reserved entry as NULL */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
 				GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
@@ -386,7 +386,7 @@ static noinline void check_reserve(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 
 	/* But xa_insert does not */
-	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) !=
 			-EBUSY);
 	XA_BUG_ON(xa, xa_empty(xa));
@@ -395,7 +395,7 @@ static noinline void check_reserve(struct xarray *xa)
 
 	/* Can iterate through a reserved entry */
 	xa_store_index(xa, 5, GFP_KERNEL);
-	xa_reserve(xa, 6, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_reserve(xa, 6, GFP_KERNEL) != 0);
 	xa_store_index(xa, 7, GFP_KERNEL);
 
 	xa_for_each(xa, index, entry) {
-- 
cgit v1.2.3


From df9c716deb76642d0077770bca7107a31568c113 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Thu, 7 Feb 2019 06:20:11 -0800
Subject: qed: Add API for SmartAN query.

The patch adds driver interface to read the SmartAN capability from
management firmware.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  |  1 +
 drivers/net/ethernet/qlogic/qed/qed_main.c |  2 ++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  |  6 ++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 10 ++++++++++
 include/linux/qed/qed_if.h                 |  1 +
 5 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 417121e74ee9..37edaa847512 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12796,6 +12796,7 @@ struct public_drv_mb {
 #define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
 
 /* get MFW feature support response */
+#define FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ	0x00000001
 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE		0x00000002
 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK	0x00010000
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index b47352643fb5..f164d4acebcb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -281,6 +281,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		if (hw_info->b_wol_support == QED_WOL_SUPPORT_PME)
 			dev_info->wol_support = true;
 
+		dev_info->smart_an = qed_mcp_is_smart_an_supported(p_hwfn);
+
 		dev_info->abs_pf_id = QED_LEADING_HWFN(cdev)->abs_pf_id;
 	} else {
 		qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index bb8541847aa5..cc27fd60d689 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3654,6 +3654,12 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 	}
 }
 
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn)
+{
+	return !!(p_hwfn->mcp_info->capabilities &
+		  FW_MB_PARAM_FEATURE_SUPPORT_SMARTLINQ);
+}
+
 int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 mcp_resp;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 6e1d72a669ae..2799e6741765 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1148,6 +1148,16 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 				    struct qed_resc_unlock_params *p_unlock,
 				    enum qed_resc_lock
 				    resource, bool b_is_permanent);
+
+/**
+ * @brief - Return whether management firmware support smart AN
+ *
+ * @param p_hwfn
+ *
+ * @return bool - true if feature is supported.
+ */
+bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief Learn of supported MFW features; To be done during early init
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 35170f74ed80..f6165d304b4d 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -643,6 +643,7 @@ struct qed_dev_info {
 	u16		mtu;
 
 	bool wol_support;
+	bool smart_an;
 
 	/* MBI version */
 	u32 mbi_version;
-- 
cgit v1.2.3


From 3b5e74e0afe3382f9354b657714ac40673b7c597 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 Feb 2019 19:25:22 +0100
Subject: net: phy: disregard "Clause 22 registers present" bit in
 get_phy_c45_devs_in_pkg

Bit 0 in register 1.5 doesn't represent a device but is a flag that
Clause 22 registers are present. Therefore disregard this bit when
populating the device list. If code needs this information it
should read register 1.5 directly instead of accessing the device
list.
Because this bit doesn't represent a device don't define a
MDIO_MMD_XYZ constant, just define a MDIO_DEVS_XYZ constant for
the flag in the device list bitmap.

v2:
- make masking of bit 0 more explicit
- improve commit message

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 3 +++
 include/uapi/linux/mdio.h    | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9369e1323c39..d4fc1fd8af41 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -684,6 +684,9 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr,
 		return -EIO;
 	*devices_in_package |= (phy_reg & 0xffff);
 
+	/* Bit 0 doesn't represent a device, it indicates c22 regs presence */
+	*devices_in_package &= ~BIT(0);
+
 	return 0;
 }
 
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 2e6e309f0847..0e012b168e4d 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -115,6 +115,7 @@
 
 /* Device present registers. */
 #define MDIO_DEVS_PRESENT(devad)	(1 << (devad))
+#define MDIO_DEVS_C22PRESENT		MDIO_DEVS_PRESENT(0)
 #define MDIO_DEVS_PMAPMD		MDIO_DEVS_PRESENT(MDIO_MMD_PMAPMD)
 #define MDIO_DEVS_WIS			MDIO_DEVS_PRESENT(MDIO_MMD_WIS)
 #define MDIO_DEVS_PCS			MDIO_DEVS_PRESENT(MDIO_MMD_PCS)
-- 
cgit v1.2.3


From 180cf62cec0418a64dade18b3575047af46c6335 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Sat, 9 Feb 2019 00:05:36 +0100
Subject: batman-adv: Fix typo "reseved" -> "reserved"

checkpatch.pl complains since commit 45e417022023 ("scripts/spelling.txt:
add more spellings to spelling.txt") about an additional spelling mistake
in batman-adv:`

  CHECK: 'reseved' may be misspelled - perhaps 'reserved'?
  #232: FILE: include/uapi/linux/batadv_packet.h:232:
  + * @flags: reseved for routing relevant flags - currently always 0

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batadv_packet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/batadv_packet.h b/include/uapi/linux/batadv_packet.h
index 7eb2936a8e22..c99336f4eefe 100644
--- a/include/uapi/linux/batadv_packet.h
+++ b/include/uapi/linux/batadv_packet.h
@@ -229,7 +229,7 @@ struct batadv_ogm_packet {
  * @packet_type: batman-adv packet type, part of the general header
  * @version: batman-adv protocol version, part of the general header
  * @ttl: time to live for this packet, part of the general header
- * @flags: reseved for routing relevant flags - currently always 0
+ * @flags: reserved for routing relevant flags - currently always 0
  * @seqno: sequence number
  * @orig: originator mac address
  * @tvlv_len: length of the appended tvlv buffer (in bytes)
-- 
cgit v1.2.3


From 60040513536097584c3d55b39acdfa7080645d80 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:14:56 +0100
Subject: batman-adv: Prepare framework for mesh genl config

The batman-adv configuration interface was implemented solely using sysfs.
This approach was condemned by non-batadv developers as "huge mistake".
Instead a netlink/genl based implementation was suggested.

The main objects for this configuration is the mesh/soft-interface object.
Its actual object in memory already contains most of the available
configuration settings. The genl interface reflects this by allowing to
get/set it using the mesh specific commands.

The BATADV_CMD_GET_MESH_INFO (or short version BATADV_CMD_GET_MESH) is
reused as get command because it already provides the content of other
information from the mesh/soft-interface which are not yet configuration
specific.

The set command BATADV_CMD_SET_MESH will also notify interested userspace
listeners of the "config" mcast group using the BATADV_CMD_SET_MESH command
message type that settings might have been changed and what the current
values are.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  16 +++-
 net/batman-adv/netlink.c        | 160 ++++++++++++++++++++++++----------------
 2 files changed, 111 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index a28e76a7e0a2..b3394aac9a88 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -27,6 +27,7 @@
 
 #define BATADV_NL_NAME "batadv"
 
+#define BATADV_NL_MCAST_GROUP_CONFIG	"config"
 #define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
 
 /**
@@ -372,10 +373,14 @@ enum batadv_nl_commands {
 	BATADV_CMD_UNSPEC,
 
 	/**
-	 * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv
-	 * device
+	 * @BATADV_CMD_GET_MESH: Get attributes from softif/mesh
 	 */
-	BATADV_CMD_GET_MESH_INFO,
+	BATADV_CMD_GET_MESH,
+
+	/**
+	 * @BATADV_CMD_GET_MESH_INFO: Alias for @BATADV_CMD_GET_MESH
+	 */
+	BATADV_CMD_GET_MESH_INFO = BATADV_CMD_GET_MESH,
 
 	/**
 	 * @BATADV_CMD_TP_METER: Start a tp meter session
@@ -443,6 +448,11 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_GET_MCAST_FLAGS,
 
+	/**
+	 * @BATADV_CMD_SET_MESH: Set attributes for softif/mesh
+	 */
+	BATADV_CMD_SET_MESH,
+
 	/* add new commands above here */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index e56c40d1bfe0..179cc7cfcb70 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -62,6 +62,7 @@ struct genl_family batadv_netlink_family;
 
 /* multicast groups */
 enum batadv_netlink_multicast_groups {
+	BATADV_NL_MCGRP_CONFIG,
 	BATADV_NL_MCGRP_TPMETER,
 };
 
@@ -78,6 +79,7 @@ enum batadv_genl_ops_flags {
 };
 
 static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
+	[BATADV_NL_MCGRP_CONFIG] = { .name = BATADV_NL_MCAST_GROUP_CONFIG },
 	[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
 };
 
@@ -138,20 +140,29 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
 }
 
 /**
- * batadv_netlink_mesh_info_put() - fill in generic information about mesh
- *  interface
- * @msg: netlink message to be sent back
- * @soft_iface: interface for which the data should be taken
+ * batadv_netlink_mesh_fill() - Fill message with mesh attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
  *
- * Return: 0 on success, < 0 on error
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
+static int batadv_netlink_mesh_fill(struct sk_buff *msg,
+				    struct batadv_priv *bat_priv,
+				    enum batadv_nl_commands cmd,
+				    u32 portid, u32 seq, int flags)
 {
-	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct net_device *soft_iface = bat_priv->soft_iface;
 	struct batadv_hard_iface *primary_if = NULL;
 	struct net_device *hard_iface;
-	int ret = -ENOBUFS;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+	if (!hdr)
+		return -ENOBUFS;
 
 	if (nla_put_string(msg, BATADV_ATTR_VERSION, BATADV_SOURCE_VERSION) ||
 	    nla_put_string(msg, BATADV_ATTR_ALGO_NAME,
@@ -162,16 +173,16 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 		    soft_iface->dev_addr) ||
 	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
 		       (u8)atomic_read(&bat_priv->tt.vn)))
-		goto out;
+		goto nla_put_failure;
 
 #ifdef CONFIG_BATMAN_ADV_BLA
 	if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
 			ntohs(bat_priv->bla.claim_dest.group)))
-		goto out;
+		goto nla_put_failure;
 #endif
 
 	if (batadv_mcast_mesh_info_put(msg, bat_priv))
-		goto out;
+		goto nla_put_failure;
 
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
@@ -183,77 +194,95 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface)
 				   hard_iface->name) ||
 		    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
 			    hard_iface->dev_addr))
-			goto out;
+			goto nla_put_failure;
 	}
 
-	ret = 0;
+	if (primary_if)
+		batadv_hardif_put(primary_if);
 
- out:
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
-	return ret;
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 /**
- * batadv_netlink_get_mesh_info() - handle incoming BATADV_CMD_GET_MESH_INFO
- *  netlink request
- * @skb: received netlink message
- * @info: receiver information
+ * batadv_netlink_notify_mesh() - send softif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
  *
  * Return: 0 on success, < 0 on error
  */
-static int
-batadv_netlink_get_mesh_info(struct sk_buff *skb, struct genl_info *info)
+static int batadv_netlink_notify_mesh(struct batadv_priv *bat_priv)
 {
-	struct net *net = genl_info_net(info);
-	struct net_device *soft_iface;
-	struct sk_buff *msg = NULL;
-	void *msg_head;
-	int ifindex;
+	struct sk_buff *msg;
 	int ret;
 
-	if (!info->attrs[BATADV_ATTR_MESH_IFINDEX])
-		return -EINVAL;
-
-	ifindex = nla_get_u32(info->attrs[BATADV_ATTR_MESH_IFINDEX]);
-	if (!ifindex)
-		return -EINVAL;
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
 
-	soft_iface = dev_get_by_index(net, ifindex);
-	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
-		ret = -ENODEV;
-		goto out;
+	ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_SET_MESH,
+				       0, 0, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
 	}
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
 
-	msg_head = genlmsg_put(msg, info->snd_portid, info->snd_seq,
-			       &batadv_netlink_family, 0,
-			       BATADV_CMD_GET_MESH_INFO);
-	if (!msg_head) {
-		ret = -ENOBUFS;
-		goto out;
-	}
+	return 0;
+}
 
-	ret = batadv_netlink_mesh_info_put(msg, soft_iface);
+/**
+ * batadv_netlink_get_mesh() - Get softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
 
- out:
-	if (soft_iface)
-		dev_put(soft_iface);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
 
-	if (ret) {
-		if (msg)
-			nlmsg_free(msg);
+	ret = batadv_netlink_mesh_fill(msg, bat_priv, BATADV_CMD_GET_MESH,
+				       info->snd_portid, info->snd_seq, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
 		return ret;
 	}
 
-	genlmsg_end(msg, msg_head);
-	return genlmsg_reply(msg, info);
+	ret = genlmsg_reply(msg, info);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_set_mesh() - Set softif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+
+	batadv_netlink_notify_mesh(bat_priv);
+
+	return 0;
 }
 
 /**
@@ -600,10 +629,11 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 
 static const struct genl_ops batadv_netlink_ops[] = {
 	{
-		.cmd = BATADV_CMD_GET_MESH_INFO,
-		.flags = GENL_ADMIN_PERM,
+		.cmd = BATADV_CMD_GET_MESH,
+		/* can be retrieved by unprivileged users */
 		.policy = batadv_netlink_policy,
-		.doit = batadv_netlink_get_mesh_info,
+		.doit = batadv_netlink_get_mesh,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
 	},
 	{
 		.cmd = BATADV_CMD_TP_METER,
@@ -685,7 +715,13 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.policy = batadv_netlink_policy,
 		.dumpit = batadv_mcast_flags_dump,
 	},
-
+	{
+		.cmd = BATADV_CMD_SET_MESH,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_set_mesh,
+		.internal_flags = BATADV_FLAG_NEED_MESH,
+	},
 };
 
 struct genl_family batadv_netlink_family __ro_after_init = {
-- 
cgit v1.2.3


From 5c55a40fa801df2d807141319c0fdbb3939c3947 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:33:17 +0100
Subject: batman-adv: Prepare framework for hardif genl config

The batman-adv configuration interface was implemented solely using sysfs.
This approach was condemned by non-batadv developers as "huge mistake".
Instead a netlink/genl based implementation was suggested.

Beside the mesh/soft-interface specific configuration, the
slave/hard-interface have B.A.T.M.A.N. V specific configuration settings.
The genl interface reflects this by allowing to get/set it using the
hard-interface specific commands.

The BATADV_CMD_GET_HARDIFS (or short version BATADV_CMD_GET_HARDIF) is
reused as get command because it already allow sto dump the content of
other information from the slave/hard-interface which are not yet
configuration specific.

The set command BATADV_CMD_SET_HARDIF will also notify interested userspace
listeners of the "config" mcast group using the BATADV_CMD_SET_HARDIF
command message type that settings might have been changed and what the
current values are.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  16 ++-
 net/batman-adv/netlink.c        | 240 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 233 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index b3394aac9a88..62456a087ef6 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -398,9 +398,15 @@ enum batadv_nl_commands {
 	BATADV_CMD_GET_ROUTING_ALGOS,
 
 	/**
-	 * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+	 * @BATADV_CMD_GET_HARDIF: Get attributes from a hardif of the
+	 *  current softif
 	 */
-	BATADV_CMD_GET_HARDIFS,
+	BATADV_CMD_GET_HARDIF,
+
+	/**
+	 * @BATADV_CMD_GET_HARDIFS: Alias for @BATADV_CMD_GET_HARDIF
+	 */
+	BATADV_CMD_GET_HARDIFS = BATADV_CMD_GET_HARDIF,
 
 	/**
 	 * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
@@ -453,6 +459,12 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_SET_MESH,
 
+	/**
+	 * @BATADV_CMD_SET_HARDIF: Set attributes for hardif of the
+	 *  current softif
+	 */
+	BATADV_CMD_SET_HARDIF,
+
 	/* add new commands above here */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 179cc7cfcb70..a184e4225fb5 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -21,6 +21,7 @@
 
 #include <linux/atomic.h>
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/byteorder/generic.h>
 #include <linux/cache.h>
 #include <linux/err.h>
@@ -76,6 +77,13 @@ enum batadv_genl_ops_flags {
 	 *  saved in info->user_ptr[0]
 	 */
 	BATADV_FLAG_NEED_MESH = BIT(0),
+
+	/**
+	 * @BATADV_FLAG_NEED_HARDIF: request requires valid hard interface in
+	 *  attribute BATADV_ATTR_HARD_IFINDEX and expects a pointer to it to be
+	 *  saved in info->user_ptr[1]
+	 */
+	BATADV_FLAG_NEED_HARDIF = BIT(1),
 };
 
 static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
@@ -446,29 +454,38 @@ batadv_netlink_tp_meter_cancel(struct sk_buff *skb, struct genl_info *info)
 }
 
 /**
- * batadv_netlink_dump_hardif_entry() - Dump one hard interface into a message
+ * batadv_netlink_hardif_fill() - Fill message with hardif attributes
  * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ * @cmd: type of message to generate
  * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
  * @cb: Control block containing additional options
- * @hard_iface: Hard interface to dump
  *
- * Return: error code, or 0 on success
+ * Return: 0 on success or negative error number in case of failure
  */
-static int
-batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
-				 struct netlink_callback *cb,
-				 struct batadv_hard_iface *hard_iface)
+static int batadv_netlink_hardif_fill(struct sk_buff *msg,
+				      struct batadv_priv *bat_priv,
+				      struct batadv_hard_iface *hard_iface,
+				      enum batadv_nl_commands cmd,
+				      u32 portid, u32 seq, int flags,
+				      struct netlink_callback *cb)
 {
 	struct net_device *net_dev = hard_iface->net_dev;
 	void *hdr;
 
-	hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq,
-			  &batadv_netlink_family, NLM_F_MULTI,
-			  BATADV_CMD_GET_HARDIFS);
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
 	if (!hdr)
-		return -EMSGSIZE;
+		return -ENOBUFS;
+
+	if (cb)
+		genl_dump_check_consistent(cb, hdr);
 
-	genl_dump_check_consistent(cb, hdr);
+	if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+			bat_priv->soft_iface->ifindex))
+		goto nla_put_failure;
 
 	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 			net_dev->ifindex) ||
@@ -486,24 +503,107 @@ batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid,
 	genlmsg_end(msg, hdr);
 	return 0;
 
- nla_put_failure:
+nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
 }
 
 /**
- * batadv_netlink_dump_hardifs() - Dump all hard interface into a messages
+ * batadv_netlink_notify_hardif() - send hardif attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @hard_iface: hard interface which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int batadv_netlink_notify_hardif(struct batadv_priv *bat_priv,
+					struct batadv_hard_iface *hard_iface)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					 BATADV_CMD_SET_HARDIF, 0, 0, 0, NULL);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_get_hardif() - Get hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_hardif(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					 BATADV_CMD_GET_HARDIF,
+					 info->snd_portid, info->snd_seq, 0,
+					 NULL);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_reply(msg, info);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_set_hardif() - Set hardif attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_hardif(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+
+	batadv_netlink_notify_hardif(bat_priv, hard_iface);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_dump_hardif() - Dump all hard interface into a messages
  * @msg: Netlink message to dump into
  * @cb: Parameters from query
  *
  * Return: error code, or length of reply message on success
  */
 static int
-batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(cb->skb->sk);
 	struct net_device *soft_iface;
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv;
 	int ifindex;
 	int portid = NETLINK_CB(cb->skb).portid;
 	int skip = cb->args[0];
@@ -523,6 +623,8 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
 		return -ENODEV;
 	}
 
+	bat_priv = netdev_priv(soft_iface);
+
 	rtnl_lock();
 	cb->seq = batadv_hardif_generation << 1 | 1;
 
@@ -533,8 +635,10 @@ batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
 		if (i++ < skip)
 			continue;
 
-		if (batadv_netlink_dump_hardif_entry(msg, portid, cb,
-						     hard_iface)) {
+		if (batadv_netlink_hardif_fill(msg, bat_priv, hard_iface,
+					       BATADV_CMD_GET_HARDIF,
+					       portid, cb->nlh->nlmsg_seq,
+					       NLM_F_MULTI, cb)) {
 			i--;
 			break;
 		}
@@ -583,6 +687,52 @@ err_put_softif:
 	return ERR_PTR(-EINVAL);
 }
 
+/**
+ * batadv_get_hardif_from_info() - Retrieve hardif from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to hard interface (with increased refcnt) on success, error
+ *  pointer on error
+ */
+static struct batadv_hard_iface *
+batadv_get_hardif_from_info(struct batadv_priv *bat_priv, struct net *net,
+			    struct genl_info *info)
+{
+	struct batadv_hard_iface *hard_iface;
+	struct net_device *hard_dev;
+	unsigned int hardif_index;
+
+	if (!info->attrs[BATADV_ATTR_HARD_IFINDEX])
+		return ERR_PTR(-EINVAL);
+
+	hardif_index = nla_get_u32(info->attrs[BATADV_ATTR_HARD_IFINDEX]);
+
+	hard_dev = dev_get_by_index(net, hardif_index);
+	if (!hard_dev)
+		return ERR_PTR(-ENODEV);
+
+	hard_iface = batadv_hardif_get_by_netdev(hard_dev);
+	if (!hard_iface)
+		goto err_put_harddev;
+
+	if (hard_iface->soft_iface != bat_priv->soft_iface)
+		goto err_put_hardif;
+
+	/* hard_dev is referenced by hard_iface and not needed here */
+	dev_put(hard_dev);
+
+	return hard_iface;
+
+err_put_hardif:
+	batadv_hardif_put(hard_iface);
+err_put_harddev:
+	dev_put(hard_dev);
+
+	return ERR_PTR(-EINVAL);
+}
+
 /**
  * batadv_pre_doit() - Prepare batman-adv genl doit request
  * @ops: requested netlink operation
@@ -595,8 +745,21 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 			   struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
+	struct batadv_hard_iface *hard_iface;
+	struct batadv_priv *bat_priv = NULL;
 	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
+	u8 user_ptr1_flags;
+	u8 mesh_dep_flags;
+	int ret;
+
+	user_ptr1_flags = BATADV_FLAG_NEED_HARDIF;
+	if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1))
+		return -EINVAL;
+
+	mesh_dep_flags = BATADV_FLAG_NEED_HARDIF;
+	if (WARN_ON((ops->internal_flags & mesh_dep_flags) &&
+		    (~ops->internal_flags & BATADV_FLAG_NEED_MESH)))
+		return -EINVAL;
 
 	if (ops->internal_flags & BATADV_FLAG_NEED_MESH) {
 		soft_iface = batadv_get_softif_from_info(net, info);
@@ -607,7 +770,23 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 		info->user_ptr[0] = bat_priv;
 	}
 
+	if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF) {
+		hard_iface = batadv_get_hardif_from_info(bat_priv, net, info);
+		if (IS_ERR(hard_iface)) {
+			ret = PTR_ERR(hard_iface);
+			goto err_put_softif;
+		}
+
+		info->user_ptr[1] = hard_iface;
+	}
+
 	return 0;
+
+err_put_softif:
+	if (bat_priv)
+		dev_put(bat_priv->soft_iface);
+
+	return ret;
 }
 
 /**
@@ -619,8 +798,16 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 			     struct genl_info *info)
 {
+	struct batadv_hard_iface *hard_iface;
 	struct batadv_priv *bat_priv;
 
+	if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF &&
+	    info->user_ptr[1]) {
+		hard_iface = info->user_ptr[1];
+
+		batadv_hardif_put(hard_iface);
+	}
+
 	if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) {
 		bat_priv = info->user_ptr[0];
 		dev_put(bat_priv->soft_iface);
@@ -656,10 +843,13 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.dumpit = batadv_algo_dump,
 	},
 	{
-		.cmd = BATADV_CMD_GET_HARDIFS,
-		.flags = GENL_ADMIN_PERM,
+		.cmd = BATADV_CMD_GET_HARDIF,
+		/* can be retrieved by unprivileged users */
 		.policy = batadv_netlink_policy,
-		.dumpit = batadv_netlink_dump_hardifs,
+		.dumpit = batadv_netlink_dump_hardif,
+		.doit = batadv_netlink_get_hardif,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_HARDIF,
 	},
 	{
 		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
@@ -722,6 +912,14 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.doit = batadv_netlink_set_mesh,
 		.internal_flags = BATADV_FLAG_NEED_MESH,
 	},
+	{
+		.cmd = BATADV_CMD_SET_HARDIF,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_set_hardif,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_HARDIF,
+	},
 };
 
 struct genl_family batadv_netlink_family __ro_after_init = {
-- 
cgit v1.2.3


From 49e7e37cd98122126e8da58df2fe2261c6e83df2 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:41:08 +0100
Subject: batman-adv: Prepare framework for vlan genl config

The batman-adv configuration interface was implemented solely using sysfs.
This approach was condemned by non-batadv developers as "huge mistake".
Instead a netlink/genl based implementation was suggested.

Beside the mesh/soft-interface specific configuration, the VLANs on top of
the mesh/soft-interface have configuration settings. The genl interface
reflects this by allowing to get/set it using the vlan specific commands
BATADV_CMD_GET_VLAN/BATADV_CMD_SET_VLAN.

The set command BATADV_CMD_SET_MESH will also notify interested userspace
listeners of the "config" mcast group using the BATADV_CMD_SET_VLAN command
message type that settings might have been changed and what the current
values are.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  17 ++++
 net/batman-adv/netlink.c        | 191 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 206 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 62456a087ef6..10f3cc34fe16 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -345,6 +345,11 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_MCAST_FLAGS_PRIV,
 
+	/**
+	 * @BATADV_ATTR_VLANID: VLAN id on top of soft interface
+	 */
+	BATADV_ATTR_VLANID,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
@@ -465,6 +470,18 @@ enum batadv_nl_commands {
 	 */
 	BATADV_CMD_SET_HARDIF,
 
+	/**
+	 * @BATADV_CMD_GET_VLAN: Get attributes from a VLAN of the
+	 *  current softif
+	 */
+	BATADV_CMD_GET_VLAN,
+
+	/**
+	 * @BATADV_CMD_SET_VLAN: Set attributes for VLAN of the
+	 *  current softif
+	 */
+	BATADV_CMD_SET_VLAN,
+
 	/* add new commands above here */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index a184e4225fb5..d40913bb9031 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -30,6 +30,7 @@
 #include <linux/genetlink.h>
 #include <linux/gfp.h>
 #include <linux/if_ether.h>
+#include <linux/if_vlan.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
@@ -84,6 +85,13 @@ enum batadv_genl_ops_flags {
 	 *  saved in info->user_ptr[1]
 	 */
 	BATADV_FLAG_NEED_HARDIF = BIT(1),
+
+	/**
+	 * @BATADV_FLAG_NEED_VLAN: request requires valid vlan in
+	 *  attribute BATADV_ATTR_VLANID and expects a pointer to it to be
+	 *  saved in info->user_ptr[1]
+	 */
+	BATADV_FLAG_NEED_VLAN = BIT(2),
 };
 
 static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
@@ -130,6 +138,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_DAT_CACHE_VID]		= { .type = NLA_U16 },
 	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
+	[BATADV_ATTR_VLANID]			= { .type = NLA_U16 },
 };
 
 /**
@@ -653,6 +662,123 @@ batadv_netlink_dump_hardif(struct sk_buff *msg, struct netlink_callback *cb)
 	return msg->len;
 }
 
+/**
+ * batadv_netlink_vlan_fill() - Fill message with vlan attributes
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ * @cmd: type of message to generate
+ * @portid: Port making netlink request
+ * @seq: sequence number for message
+ * @flags: Additional flags for message
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_vlan_fill(struct sk_buff *msg,
+				    struct batadv_priv *bat_priv,
+				    struct batadv_softif_vlan *vlan,
+				    enum batadv_nl_commands cmd,
+				    u32 portid, u32 seq, int flags)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, flags, cmd);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX,
+			bat_priv->soft_iface->ifindex))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_notify_vlan() - send vlan attributes to listener
+ * @bat_priv: the bat priv with all the soft interface information
+ * @vlan: vlan which was modified
+ *
+ * Return: 0 on success, < 0 on error
+ */
+static int batadv_netlink_notify_vlan(struct batadv_priv *bat_priv,
+				      struct batadv_softif_vlan *vlan)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan,
+				       BATADV_CMD_SET_VLAN, 0, 0, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	genlmsg_multicast_netns(&batadv_netlink_family,
+				dev_net(bat_priv->soft_iface), msg, 0,
+				BATADV_NL_MCGRP_CONFIG, GFP_KERNEL);
+
+	return 0;
+}
+
+/**
+ * batadv_netlink_get_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_get_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = batadv_netlink_vlan_fill(msg, bat_priv, vlan, BATADV_CMD_GET_VLAN,
+				       info->snd_portid, info->snd_seq, 0);
+	if (ret < 0) {
+		nlmsg_free(msg);
+		return ret;
+	}
+
+	ret = genlmsg_reply(msg, info);
+
+	return ret;
+}
+
+/**
+ * batadv_netlink_set_vlan() - Get vlan attributes
+ * @skb: Netlink message with request data
+ * @info: receiver information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan = info->user_ptr[1];
+	struct batadv_priv *bat_priv = info->user_ptr[0];
+
+	batadv_netlink_notify_vlan(bat_priv, vlan);
+
+	return 0;
+}
+
 /**
  * batadv_get_softif_from_info() - Retrieve soft interface from genl attributes
  * @net: the applicable net namespace
@@ -733,6 +859,34 @@ err_put_harddev:
 	return ERR_PTR(-EINVAL);
 }
 
+/**
+ * batadv_get_vlan_from_info() - Retrieve vlan from genl attributes
+ * @bat_priv: the bat priv with all the soft interface information
+ * @net: the applicable net namespace
+ * @info: receiver information
+ *
+ * Return: Pointer to vlan on success (with increased refcnt), error pointer
+ *  on error
+ */
+static struct batadv_softif_vlan *
+batadv_get_vlan_from_info(struct batadv_priv *bat_priv, struct net *net,
+			  struct genl_info *info)
+{
+	struct batadv_softif_vlan *vlan;
+	u16 vid;
+
+	if (!info->attrs[BATADV_ATTR_VLANID])
+		return ERR_PTR(-EINVAL);
+
+	vid = nla_get_u16(info->attrs[BATADV_ATTR_VLANID]);
+
+	vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG);
+	if (!vlan)
+		return ERR_PTR(-ENOENT);
+
+	return vlan;
+}
+
 /**
  * batadv_pre_doit() - Prepare batman-adv genl doit request
  * @ops: requested netlink operation
@@ -747,16 +901,17 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 	struct net *net = genl_info_net(info);
 	struct batadv_hard_iface *hard_iface;
 	struct batadv_priv *bat_priv = NULL;
+	struct batadv_softif_vlan *vlan;
 	struct net_device *soft_iface;
 	u8 user_ptr1_flags;
 	u8 mesh_dep_flags;
 	int ret;
 
-	user_ptr1_flags = BATADV_FLAG_NEED_HARDIF;
+	user_ptr1_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
 	if (WARN_ON(hweight8(ops->internal_flags & user_ptr1_flags) > 1))
 		return -EINVAL;
 
-	mesh_dep_flags = BATADV_FLAG_NEED_HARDIF;
+	mesh_dep_flags = BATADV_FLAG_NEED_HARDIF | BATADV_FLAG_NEED_VLAN;
 	if (WARN_ON((ops->internal_flags & mesh_dep_flags) &&
 		    (~ops->internal_flags & BATADV_FLAG_NEED_MESH)))
 		return -EINVAL;
@@ -780,6 +935,16 @@ static int batadv_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
 		info->user_ptr[1] = hard_iface;
 	}
 
+	if (ops->internal_flags & BATADV_FLAG_NEED_VLAN) {
+		vlan = batadv_get_vlan_from_info(bat_priv, net, info);
+		if (IS_ERR(vlan)) {
+			ret = PTR_ERR(vlan);
+			goto err_put_softif;
+		}
+
+		info->user_ptr[1] = vlan;
+	}
+
 	return 0;
 
 err_put_softif:
@@ -799,6 +964,7 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 			     struct genl_info *info)
 {
 	struct batadv_hard_iface *hard_iface;
+	struct batadv_softif_vlan *vlan;
 	struct batadv_priv *bat_priv;
 
 	if (ops->internal_flags & BATADV_FLAG_NEED_HARDIF &&
@@ -808,6 +974,11 @@ static void batadv_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
 		batadv_hardif_put(hard_iface);
 	}
 
+	if (ops->internal_flags & BATADV_FLAG_NEED_VLAN && info->user_ptr[1]) {
+		vlan = info->user_ptr[1];
+		batadv_softif_vlan_put(vlan);
+	}
+
 	if (ops->internal_flags & BATADV_FLAG_NEED_MESH && info->user_ptr[0]) {
 		bat_priv = info->user_ptr[0];
 		dev_put(bat_priv->soft_iface);
@@ -920,6 +1091,22 @@ static const struct genl_ops batadv_netlink_ops[] = {
 		.internal_flags = BATADV_FLAG_NEED_MESH |
 				  BATADV_FLAG_NEED_HARDIF,
 	},
+	{
+		.cmd = BATADV_CMD_GET_VLAN,
+		/* can be retrieved by unprivileged users */
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_get_vlan,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_VLAN,
+	},
+	{
+		.cmd = BATADV_CMD_SET_VLAN,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.doit = batadv_netlink_set_vlan,
+		.internal_flags = BATADV_FLAG_NEED_MESH |
+				  BATADV_FLAG_NEED_VLAN,
+	},
 };
 
 struct genl_family batadv_netlink_family __ro_after_init = {
-- 
cgit v1.2.3


From 9ab4cee5ced970019a5d0f3a43cf85671ca7b38f Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:46:14 +0100
Subject: batman-adv: Add aggregated_ogms mesh genl configuration

The mesh interface can delay OGM messages to aggregate different ogms
together in a single OGM packet.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the BATADV_ATTR_AGGREGATED_OGMS_ENABLED
attribute. Setting the u8 to zero will disable this feature and setting it
to something else is enabling this feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 12 ++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 10f3cc34fe16..f8941e80d6b4 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -350,6 +350,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_VLANID,
 
+	/**
+	 * @BATADV_ATTR_AGGREGATED_OGMS_ENABLED: whether the batman protocol
+	 *  messages of the mesh interface shall be aggregated or not.
+	 */
+	BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index d40913bb9031..82665dc2eb2b 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -139,6 +139,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_MCAST_FLAGS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
 	[BATADV_ATTR_VLANID]			= { .type = NLA_U16 },
+	[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -214,6 +215,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 			goto nla_put_failure;
 	}
 
+	if (nla_put_u8(msg, BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
+		       !!atomic_read(&bat_priv->aggregated_ogms)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -296,6 +301,13 @@ static int batadv_netlink_get_mesh(struct sk_buff *skb, struct genl_info *info)
 static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 {
 	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct nlattr *attr;
+
+	if (info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AGGREGATED_OGMS_ENABLED];
+
+		atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr));
+	}
 
 	batadv_netlink_notify_mesh(bat_priv);
 
-- 
cgit v1.2.3


From e43d16b87dc2cad18799cfd1142f4acae4135ea4 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:51:55 +0100
Subject: batman-adv: Add ap_isolation mesh/vlan genl configuration

The mesh interface can drop messages between clients to implement a
mesh-wide AP isolation.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH and
BATADV_CMD_SET_VLAN/BATADV_CMD_GET_VLAN commands allow to set/get the
configuration of this feature using the BATADV_ATTR_AP_ISOLATION_ENABLED
attribute. Setting the u8 to zero will disable this feature and setting it
to something else is enabling this feature.

This feature also requires that skbuff which should be handled as isolated
are marked. The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to
set/get the mark/mask using the u32 attributes BATADV_ATTR_ISOLATION_MARK
and BATADV_ATTR_ISOLATION_MASK.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 19 +++++++++
 net/batman-adv/netlink.c        | 89 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index f8941e80d6b4..a4dadafe08dd 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -356,6 +356,25 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_AGGREGATED_OGMS_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_AP_ISOLATION_ENABLED: whether the data traffic going
+	 *  from a wireless client to another wireless client will be silently
+	 *  dropped.
+	 */
+	BATADV_ATTR_AP_ISOLATION_ENABLED,
+
+	/**
+	 * @BATADV_ATTR_ISOLATION_MARK: the isolation mark which is used to
+	 *  classify clients as "isolated" by the Extended Isolation feature.
+	 */
+	BATADV_ATTR_ISOLATION_MARK,
+
+	/**
+	 * @BATADV_ATTR_ISOLATION_MASK: the isolation (bit)mask which is used to
+	 *  classify clients as "isolated" by the Extended Isolation feature.
+	 */
+	BATADV_ATTR_ISOLATION_MASK,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 82665dc2eb2b..fc80f003ed6e 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -140,6 +140,9 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_MCAST_FLAGS_PRIV]		= { .type = NLA_U32 },
 	[BATADV_ATTR_VLANID]			= { .type = NLA_U16 },
 	[BATADV_ATTR_AGGREGATED_OGMS_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_AP_ISOLATION_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_ISOLATION_MARK]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ISOLATION_MASK]		= { .type = NLA_U32 },
 };
 
 /**
@@ -157,6 +160,52 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
 	return attr ? nla_get_u32(attr) : 0;
 }
 
+/**
+ * batadv_netlink_mesh_fill_ap_isolation() - Add ap_isolation softif attribute
+ * @msg: Netlink message to dump into
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_mesh_fill_ap_isolation(struct sk_buff *msg,
+						 struct batadv_priv *bat_priv)
+{
+	struct batadv_softif_vlan *vlan;
+	u8 ap_isolation;
+
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (!vlan)
+		return 0;
+
+	ap_isolation = atomic_read(&vlan->ap_isolation);
+	batadv_softif_vlan_put(vlan);
+
+	return nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+			  !!ap_isolation);
+}
+
+/**
+ * batadv_option_set_ap_isolation() - Set ap_isolation from genl msg
+ * @attr: parsed BATADV_ATTR_AP_ISOLATION_ENABLED attribute
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: 0 on success or negative error number in case of failure
+ */
+static int batadv_netlink_set_mesh_ap_isolation(struct nlattr *attr,
+						struct batadv_priv *bat_priv)
+{
+	struct batadv_softif_vlan *vlan;
+
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (!vlan)
+		return -ENOENT;
+
+	atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+	batadv_softif_vlan_put(vlan);
+
+	return 0;
+}
+
 /**
  * batadv_netlink_mesh_fill() - Fill message with mesh attributes
  * @msg: Netlink message to dump into
@@ -219,6 +268,17 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		       !!atomic_read(&bat_priv->aggregated_ogms)))
 		goto nla_put_failure;
 
+	if (batadv_netlink_mesh_fill_ap_isolation(msg, bat_priv))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MARK,
+			bat_priv->isolation_mark))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_ISOLATION_MASK,
+			bat_priv->isolation_mark_mask))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -309,6 +369,24 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		atomic_set(&bat_priv->aggregated_ogms, !!nla_get_u8(attr));
 	}
 
+	if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+		batadv_netlink_set_mesh_ap_isolation(attr, bat_priv);
+	}
+
+	if (info->attrs[BATADV_ATTR_ISOLATION_MARK]) {
+		attr = info->attrs[BATADV_ATTR_ISOLATION_MARK];
+
+		bat_priv->isolation_mark = nla_get_u32(attr);
+	}
+
+	if (info->attrs[BATADV_ATTR_ISOLATION_MASK]) {
+		attr = info->attrs[BATADV_ATTR_ISOLATION_MASK];
+
+		bat_priv->isolation_mark_mask = nla_get_u32(attr);
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
@@ -705,6 +783,10 @@ static int batadv_netlink_vlan_fill(struct sk_buff *msg,
 	if (nla_put_u32(msg, BATADV_ATTR_VLANID, vlan->vid & VLAN_VID_MASK))
 		goto nla_put_failure;
 
+	if (nla_put_u8(msg, BATADV_ATTR_AP_ISOLATION_ENABLED,
+		       !!atomic_read(&vlan->ap_isolation)))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -785,6 +867,13 @@ static int batadv_netlink_set_vlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct batadv_softif_vlan *vlan = info->user_ptr[1];
 	struct batadv_priv *bat_priv = info->user_ptr[0];
+	struct nlattr *attr;
+
+	if (info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_AP_ISOLATION_ENABLED];
+
+		atomic_set(&vlan->ap_isolation, !!nla_get_u8(attr));
+	}
 
 	batadv_netlink_notify_vlan(bat_priv, vlan);
 
-- 
cgit v1.2.3


From d7e52506b680826d6ff7ce73e6a90a3b9defc741 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 12:55:44 +0100
Subject: batman-adv: Add bonding mesh genl configuration

The mesh interface can use multiple slave/hard-interface ports at the same
time to transport the traffic to other nodes.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the BATADV_ATTR_BONDING_ENABLED
attribute. Setting the u8 to zero will disable this feature and setting it
to something else is enabling this feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 11 +++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index a4dadafe08dd..f74ff261ec8f 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -375,6 +375,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_ISOLATION_MASK,
 
+	/**
+	 * @BATADV_ATTR_BONDING_ENABLED: whether the data traffic going through
+	 *  the mesh will be sent using multiple interfaces at the same time.
+	 */
+	BATADV_ATTR_BONDING_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index fc80f003ed6e..310a2c339fd1 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -143,6 +143,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_AP_ISOLATION_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_ISOLATION_MARK]		= { .type = NLA_U32 },
 	[BATADV_ATTR_ISOLATION_MASK]		= { .type = NLA_U32 },
+	[BATADV_ATTR_BONDING_ENABLED]		= { .type = NLA_U8 },
 };
 
 /**
@@ -279,6 +280,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 			bat_priv->isolation_mark_mask))
 		goto nla_put_failure;
 
+	if (nla_put_u8(msg, BATADV_ATTR_BONDING_ENABLED,
+		       !!atomic_read(&bat_priv->bonding)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -387,6 +392,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		bat_priv->isolation_mark_mask = nla_get_u32(attr);
 	}
 
+	if (info->attrs[BATADV_ATTR_BONDING_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_BONDING_ENABLED];
+
+		atomic_set(&bat_priv->bonding, !!nla_get_u8(attr));
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From 43ff6105a527aaa1a8e00163b0b0aedbbc0c4522 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:03:39 +0100
Subject: batman-adv: Add bridge_loop_avoidance mesh genl configuration

The mesh interface can try to detect loops in the same mesh caused by
(indirectly) bridged mesh/soft-interfaces of different nodes. Some of the
loops can also be resolved without breaking the mesh.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the
BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED attribute. Setting the u8 to zero
will disable this feature and setting it to something else is enabling this
feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  7 +++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index f74ff261ec8f..3cb35c661056 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -381,6 +381,13 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BONDING_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED: whether the bridge loop
+	 *  avoidance feature is enabled. This feature detects and avoids loops
+	 *  between the mesh and devices bridged with the soft interface
+	 */
+	BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 310a2c339fd1..40c940da9498 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -144,6 +144,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_ISOLATION_MARK]		= { .type = NLA_U32 },
 	[BATADV_ATTR_ISOLATION_MASK]		= { .type = NLA_U32 },
 	[BATADV_ATTR_BONDING_ENABLED]		= { .type = NLA_U8 },
+	[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -284,6 +285,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		       !!atomic_read(&bat_priv->bonding)))
 		goto nla_put_failure;
 
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (nla_put_u8(msg, BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
+		       !!atomic_read(&bat_priv->bridge_loop_avoidance)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -398,6 +405,16 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		atomic_set(&bat_priv->bonding, !!nla_get_u8(attr));
 	}
 
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED];
+
+		atomic_set(&bat_priv->bridge_loop_avoidance,
+			   !!nla_get_u8(attr));
+		batadv_bla_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_BLA */
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From a1c8de80329609ba68ff860074070efb1e14ade4 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:06:42 +0100
Subject: batman-adv: Add distributed_arp_table mesh genl configuration

The mesh interface can use a distributed hash table to answer ARP requests
without flooding the request through the whole mesh.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the
BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED attribute. Setting the u8 to zero
will disable this feature and setting it to something else is enabling this
feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  8 ++++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 3cb35c661056..f303a1496476 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -388,6 +388,14 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED: whether the distributed
+	 *  arp table feature is enabled. This feature uses a distributed hash
+	 *  table to answer ARP requests without flooding the request through
+	 *  the whole mesh.
+	 */
+	BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 40c940da9498..3028c2a5c782 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -145,6 +145,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_ISOLATION_MASK]		= { .type = NLA_U32 },
 	[BATADV_ATTR_BONDING_ENABLED]		= { .type = NLA_U8 },
 	[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -291,6 +292,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_BLA */
 
+#ifdef CONFIG_BATMAN_ADV_DAT
+	if (nla_put_u8(msg, BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
+		       !!atomic_read(&bat_priv->distributed_arp_table)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -415,6 +422,16 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 #endif /* CONFIG_BATMAN_ADV_BLA */
 
+#ifdef CONFIG_BATMAN_ADV_DAT
+	if (info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED];
+
+		atomic_set(&bat_priv->distributed_arp_table,
+			   !!nla_get_u8(attr));
+		batadv_dat_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_DAT */
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From 3e15b06eb7e410ef9f1b9673be094b3e10eacf93 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:09:49 +0100
Subject: batman-adv: Add fragmentation mesh genl configuration

The mesh interface can fragment unicast packets when the packet size
exceeds the outgoing slave/hard-interface MTU.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the BATADV_ATTR_FRAGMENTATION_ENABLED
attribute. Setting the u8 to zero will disable this feature and setting it
to something else is enabling this feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  7 +++++++
 net/batman-adv/netlink.c        | 12 ++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index f303a1496476..847841b8de5d 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -396,6 +396,13 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_FRAGMENTATION_ENABLED: whether the data traffic going
+	 *  through the mesh will be fragmented or silently discarded if the
+	 *  packet size exceeds the outgoing interface MTU.
+	 */
+	BATADV_ATTR_FRAGMENTATION_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 3028c2a5c782..5b2160d2f482 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -146,6 +146,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_BONDING_ENABLED]		= { .type = NLA_U8 },
 	[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_FRAGMENTATION_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -298,6 +299,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_DAT */
 
+	if (nla_put_u8(msg, BATADV_ATTR_FRAGMENTATION_ENABLED,
+		       !!atomic_read(&bat_priv->fragmentation)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -432,6 +437,13 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 #endif /* CONFIG_BATMAN_ADV_DAT */
 
+	if (info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
+
+		atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+		batadv_update_min_mtu(bat_priv->soft_iface);
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From e2d0d35b5b0ce420505e88255fd5922ed035bb8d Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:15:00 +0100
Subject: batman-adv: Add gateway mesh genl configuration

The mesh/soft-interface can optimize the handling of DHCP packets. Instead
of flooding them through the whole mesh, it can be forwarded as unicast to
a specific gateway server. The originator which injects the packets in the
mesh has to select (based on sel_class thresholds) a responsible gateway
server. This is done by switching this originator to the gw_mode client.
The servers announce their forwarding bandwidth (download/upload) when the
gw_mode server was selected.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the attributes:

* u8 BATADV_ATTR_GW_MODE (0 == off, 1 == client, 2 == server)
* u32 BATADV_ATTR_GW_BANDWIDTH_DOWN (in 100 kbit/s steps)
* u32 BATADV_ATTR_GW_BANDWIDTH_UP (in 100 kbit/s steps)
* u32 BATADV_ATTR_GW_SEL_CLASS

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h | 40 ++++++++++++++++++
 net/batman-adv/gateway_client.c |  1 -
 net/batman-adv/gateway_common.c |  1 +
 net/batman-adv/gateway_common.h |  6 ---
 net/batman-adv/netlink.c        | 92 +++++++++++++++++++++++++++++++++++++++++
 net/batman-adv/soft-interface.c |  2 +-
 net/batman-adv/sysfs.c          |  1 +
 7 files changed, 135 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 847841b8de5d..165272be6878 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -139,6 +139,20 @@ enum batadv_mcast_flags_priv {
 	BATADV_MCAST_FLAGS_QUERIER_IPV6_SHADOWING	= (1 << 4),
 };
 
+/**
+ * enum batadv_gw_modes - gateway mode of node
+ */
+enum batadv_gw_modes {
+	/** @BATADV_GW_MODE_OFF: gw mode disabled */
+	BATADV_GW_MODE_OFF,
+
+	/** @BATADV_GW_MODE_CLIENT: send DHCP requests to gw servers */
+	BATADV_GW_MODE_CLIENT,
+
+	/** @BATADV_GW_MODE_SERVER: announce itself as gatway server */
+	BATADV_GW_MODE_SERVER,
+};
+
 /**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  */
@@ -403,6 +417,32 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_FRAGMENTATION_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_GW_BANDWIDTH_DOWN: defines the download bandwidth which
+	 *  is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set
+	 *  to 'server'.
+	 */
+	BATADV_ATTR_GW_BANDWIDTH_DOWN,
+
+	/**
+	 * @BATADV_ATTR_GW_BANDWIDTH_UP: defines the upload bandwidth which
+	 *  is propagated by this node if %BATADV_ATTR_GW_BANDWIDTH_MODE was set
+	 *  to 'server'.
+	 */
+	BATADV_ATTR_GW_BANDWIDTH_UP,
+
+	/**
+	 * @BATADV_ATTR_GW_MODE: defines the state of the gateway features.
+	 * Possible values are specified in enum batadv_gw_modes
+	 */
+	BATADV_ATTR_GW_MODE,
+
+	/**
+	 * @BATADV_ATTR_GW_SEL_CLASS: defines the selection criteria this node
+	 *  will use to choose a gateway if gw_mode was set to 'client'.
+	 */
+	BATADV_ATTR_GW_SEL_CLASS,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 0a6b9b30eabd..f5811f61aa92 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -47,7 +47,6 @@
 #include <uapi/linux/batadv_packet.h>
 #include <uapi/linux/batman_adv.h>
 
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
 #include "netlink.h"
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 53d03adc9bfc..e064de45e22c 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -28,6 +28,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_client.h"
 #include "log.h"
diff --git a/net/batman-adv/gateway_common.h b/net/batman-adv/gateway_common.h
index 89bae100a0b0..128467a0fb89 100644
--- a/net/batman-adv/gateway_common.h
+++ b/net/batman-adv/gateway_common.h
@@ -25,12 +25,6 @@
 
 struct net_device;
 
-enum batadv_gw_modes {
-	BATADV_GW_MODE_OFF,
-	BATADV_GW_MODE_CLIENT,
-	BATADV_GW_MODE_SERVER,
-};
-
 /**
  * enum batadv_bandwidth_units - bandwidth unit types
  */
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 5b2160d2f482..473467afea91 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -51,6 +51,7 @@
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
 #include "multicast.h"
 #include "originator.h"
@@ -147,6 +148,10 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_BRIDGE_LOOP_AVOIDANCE_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_DISTRIBUTED_ARP_TABLE_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_FRAGMENTATION_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_GW_BANDWIDTH_DOWN]		= { .type = NLA_U32 },
+	[BATADV_ATTR_GW_BANDWIDTH_UP]		= { .type = NLA_U32 },
+	[BATADV_ATTR_GW_MODE]			= { .type = NLA_U8 },
+	[BATADV_ATTR_GW_SEL_CLASS]		= { .type = NLA_U32 },
 };
 
 /**
@@ -303,6 +308,28 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		       !!atomic_read(&bat_priv->fragmentation)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_DOWN,
+			atomic_read(&bat_priv->gw.bandwidth_down)))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_GW_BANDWIDTH_UP,
+			atomic_read(&bat_priv->gw.bandwidth_up)))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, BATADV_ATTR_GW_MODE,
+		       atomic_read(&bat_priv->gw.mode)))
+		goto nla_put_failure;
+
+	if (bat_priv->algo_ops->gw.get_best_gw_node &&
+	    bat_priv->algo_ops->gw.is_eligible) {
+		/* GW selection class is not available if the routing algorithm
+		 * in use does not implement the GW API
+		 */
+		if (nla_put_u32(msg, BATADV_ATTR_GW_SEL_CLASS,
+				atomic_read(&bat_priv->gw.sel_class)))
+			goto nla_put_failure;
+	}
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -444,6 +471,71 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		batadv_update_min_mtu(bat_priv->soft_iface);
 	}
 
+	if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
+		attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN];
+
+		atomic_set(&bat_priv->gw.bandwidth_down, nla_get_u32(attr));
+		batadv_gw_tvlv_container_update(bat_priv);
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP]) {
+		attr = info->attrs[BATADV_ATTR_GW_BANDWIDTH_UP];
+
+		atomic_set(&bat_priv->gw.bandwidth_up, nla_get_u32(attr));
+		batadv_gw_tvlv_container_update(bat_priv);
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_MODE]) {
+		u8 gw_mode;
+
+		attr = info->attrs[BATADV_ATTR_GW_MODE];
+		gw_mode = nla_get_u8(attr);
+
+		if (gw_mode <= BATADV_GW_MODE_SERVER) {
+			/* Invoking batadv_gw_reselect() is not enough to really
+			 * de-select the current GW. It will only instruct the
+			 * gateway client code to perform a re-election the next
+			 * time that this is needed.
+			 *
+			 * When gw client mode is being switched off the current
+			 * GW must be de-selected explicitly otherwise no GW_ADD
+			 * uevent is thrown on client mode re-activation. This
+			 * is operation is performed in
+			 * batadv_gw_check_client_stop().
+			 */
+			batadv_gw_reselect(bat_priv);
+
+			/* always call batadv_gw_check_client_stop() before
+			 * changing the gateway state
+			 */
+			batadv_gw_check_client_stop(bat_priv);
+			atomic_set(&bat_priv->gw.mode, gw_mode);
+			batadv_gw_tvlv_container_update(bat_priv);
+		}
+	}
+
+	if (info->attrs[BATADV_ATTR_GW_SEL_CLASS] &&
+	    bat_priv->algo_ops->gw.get_best_gw_node &&
+	    bat_priv->algo_ops->gw.is_eligible) {
+		/* setting the GW selection class is allowed only if the routing
+		 * algorithm in use implements the GW API
+		 */
+
+		u32 sel_class_max = 0xffffffffu;
+		u32 sel_class;
+
+		attr = info->attrs[BATADV_ATTR_GW_SEL_CLASS];
+		sel_class = nla_get_u32(attr);
+
+		if (!bat_priv->algo_ops->gw.store_sel_class)
+			sel_class_max = BATADV_TQ_MAX_VALUE;
+
+		if (sel_class >= 1 && sel_class <= sel_class_max) {
+			atomic_set(&bat_priv->gw.sel_class, sel_class);
+			batadv_gw_reselect(bat_priv);
+		}
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index b14fb3462af7..12028c287de5 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -50,13 +50,13 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
 #include "debugfs.h"
 #include "distributed-arp-table.h"
 #include "gateway_client.h"
-#include "gateway_common.h"
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index e1b816262c53..64fd5932a555 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -40,6 +40,7 @@
 #include <linux/stringify.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/batadv_packet.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
-- 
cgit v1.2.3


From bfc7f1be57b8a5ea738ce5db62b82234e4901abf Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:19:38 +0100
Subject: batman-adv: Add hop_penalty mesh genl configuration

The TQ (B.A.T.M.A.N. IV) and throughput values (B.A.T.M.A.N. V) are reduced
when they are forwarded. One of the reductions is the penalty for
traversing an additional hop. This hop_penalty (0-255) defines the
percentage of reduction (0-100%).

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the u8 BATADV_ATTR_HOP_PENALTY
attribute.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 11 +++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 165272be6878..b37cb923332e 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -443,6 +443,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_GW_SEL_CLASS,
 
+	/**
+	 * @BATADV_ATTR_HOP_PENALTY: defines the penalty which will be applied
+	 *  to an originator message's tq-field on every hop.
+	 */
+	BATADV_ATTR_HOP_PENALTY,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 473467afea91..ce6e6f078765 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -152,6 +152,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_GW_BANDWIDTH_UP]		= { .type = NLA_U32 },
 	[BATADV_ATTR_GW_MODE]			= { .type = NLA_U8 },
 	[BATADV_ATTR_GW_SEL_CLASS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_HOP_PENALTY]		= { .type = NLA_U8 },
 };
 
 /**
@@ -330,6 +331,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 			goto nla_put_failure;
 	}
 
+	if (nla_put_u8(msg, BATADV_ATTR_HOP_PENALTY,
+		       atomic_read(&bat_priv->hop_penalty)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -536,6 +541,12 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[BATADV_ATTR_HOP_PENALTY]) {
+		attr = info->attrs[BATADV_ATTR_HOP_PENALTY];
+
+		atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr));
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From b85bd091098a52f7bf00d2725b536455f82ba0d0 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:22:33 +0100
Subject: batman-adv: Add log_level mesh genl configuration

In contrast to other modules, batman-adv allows to set the debug message
verbosity per mesh/soft-interface and not per module (via modparam).

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the u32 (bitmask) BATADV_ATTR_LOG_LEVEL
attribute.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index b37cb923332e..6d36e4b47eb4 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -449,6 +449,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_HOP_PENALTY,
 
+	/**
+	 * @BATADV_ATTR_LOG_LEVEL: bitmask with to define which debug messages
+	 *  should be send to the debug log/trace ring buffer
+	 */
+	BATADV_ATTR_LOG_LEVEL,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ce6e6f078765..8c019d46815c 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -53,6 +53,7 @@
 #include "gateway_client.h"
 #include "gateway_common.h"
 #include "hard-interface.h"
+#include "log.h"
 #include "multicast.h"
 #include "originator.h"
 #include "soft-interface.h"
@@ -153,6 +154,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_GW_MODE]			= { .type = NLA_U8 },
 	[BATADV_ATTR_GW_SEL_CLASS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_HOP_PENALTY]		= { .type = NLA_U8 },
+	[BATADV_ATTR_LOG_LEVEL]			= { .type = NLA_U32 },
 };
 
 /**
@@ -335,6 +337,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		       atomic_read(&bat_priv->hop_penalty)))
 		goto nla_put_failure;
 
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	if (nla_put_u32(msg, BATADV_ATTR_LOG_LEVEL,
+			atomic_read(&bat_priv->log_level)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -547,6 +555,15 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 		atomic_set(&bat_priv->hop_penalty, nla_get_u8(attr));
 	}
 
+#ifdef CONFIG_BATMAN_ADV_DEBUG
+	if (info->attrs[BATADV_ATTR_LOG_LEVEL]) {
+		attr = info->attrs[BATADV_ATTR_LOG_LEVEL];
+
+		atomic_set(&bat_priv->log_level,
+			   nla_get_u32(attr) & BATADV_DBG_ALL);
+	}
+#endif /* CONFIG_BATMAN_ADV_DEBUG */
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From f75b56bc91122e2934e2cb458f98727c41d535c7 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:25:05 +0100
Subject: batman-adv: Add multicast forceflood mesh genl configuration

The mesh interface can optimize the flooding of multicast packets based on
the content of the global translation tables. To disable this behavior and
use the broadcast-like flooding of the packets, forceflood has to be
enabled.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the
BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED attribute. Setting the u8 to zero
will disable this feature (allowing multicast optimizations) and setting it
to something else is enabling this feature (forcing simple flooding).

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  9 +++++++++
 net/batman-adv/netlink.c        | 15 +++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 6d36e4b47eb4..38caaaae8a05 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -455,6 +455,15 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_LOG_LEVEL,
 
+	/**
+	 * @BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED: whether multicast
+	 *  optimizations should be replaced by simple broadcast-like flooding
+	 *  of multicast packets. If set to non-zero then all nodes in the mesh
+	 *  are going to use classic flooding for any multicast packet with no
+	 *  optimizations.
+	 */
+	BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 8c019d46815c..475bd15f806c 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -155,6 +155,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_GW_SEL_CLASS]		= { .type = NLA_U32 },
 	[BATADV_ATTR_HOP_PENALTY]		= { .type = NLA_U8 },
 	[BATADV_ATTR_LOG_LEVEL]			= { .type = NLA_U32 },
+	[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -343,6 +344,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_DEBUG */
 
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	if (nla_put_u8(msg, BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
+		       !atomic_read(&bat_priv->multicast_mode)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -564,6 +571,14 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 #endif /* CONFIG_BATMAN_ADV_DEBUG */
 
+#ifdef CONFIG_BATMAN_ADV_MCAST
+	if (info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED];
+
+		atomic_set(&bat_priv->multicast_mode, !nla_get_u8(attr));
+	}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From 6c57cde6800bae2361b8ac14a5924ffc592b3a90 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:26:14 +0100
Subject: batman-adv: Add network_coding mesh genl configuration

The mesh interface can use (in an homogeneous mesh) network coding, a
mechanism that aims to increase the overall network throughput by fusing
multiple packets in one transmission.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the BATADV_ATTR_NETWORK_CODING_ENABLED
attribute. Setting the u8 to zero will disable this feature and setting it
to something else is enabling this feature.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  7 +++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 38caaaae8a05..a4239c147bde 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -464,6 +464,13 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_NETWORK_CODING_ENABLED: whether Network Coding (using
+	 *  some magic to send fewer wifi packets but still the same content) is
+	 *  enabled or not.
+	 */
+	BATADV_ATTR_NETWORK_CODING_ENABLED,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 475bd15f806c..6f01f92e6ab3 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -55,6 +55,7 @@
 #include "hard-interface.h"
 #include "log.h"
 #include "multicast.h"
+#include "network-coding.h"
 #include "originator.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
@@ -156,6 +157,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_HOP_PENALTY]		= { .type = NLA_U8 },
 	[BATADV_ATTR_LOG_LEVEL]			= { .type = NLA_U32 },
 	[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_NETWORK_CODING_ENABLED]	= { .type = NLA_U8 },
 };
 
 /**
@@ -350,6 +352,12 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_MCAST */
 
+#ifdef CONFIG_BATMAN_ADV_NC
+	if (nla_put_u8(msg, BATADV_ATTR_NETWORK_CODING_ENABLED,
+		       !!atomic_read(&bat_priv->network_coding)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_NC */
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -579,6 +587,15 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 #endif /* CONFIG_BATMAN_ADV_MCAST */
 
+#ifdef CONFIG_BATMAN_ADV_NC
+	if (info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED]) {
+		attr = info->attrs[BATADV_ATTR_NETWORK_CODING_ENABLED];
+
+		atomic_set(&bat_priv->network_coding, !!nla_get_u8(attr));
+		batadv_nc_status_update(bat_priv->soft_iface);
+	}
+#endif /* CONFIG_BATMAN_ADV_NC */
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From 7b751b39f018a828a04692e199c044087102e96c Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:28:02 +0100
Subject: batman-adv: Add orig_interval mesh genl configuration

The OGM packets are transmitted every orig_interval milliseconds. This
value can be changed using the configuration interface.

The BATADV_CMD_SET_MESH/BATADV_CMD_GET_MESH commands allow to set/get the
configuration of this feature using the u32 BATADV_ATTR_ORIG_INTERVAL
attribute.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index a4239c147bde..6bedd4889c37 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -471,6 +471,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_NETWORK_CODING_ENABLED,
 
+	/**
+	 * @BATADV_ATTR_ORIG_INTERVAL: defines the interval in milliseconds in
+	 *  which batman sends its protocol messages.
+	 */
+	BATADV_ATTR_ORIG_INTERVAL,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 6f01f92e6ab3..e25c139ba0e1 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -158,6 +158,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_LOG_LEVEL]			= { .type = NLA_U32 },
 	[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_NETWORK_CODING_ENABLED]	= { .type = NLA_U8 },
+	[BATADV_ATTR_ORIG_INTERVAL]		= { .type = NLA_U32 },
 };
 
 /**
@@ -358,6 +359,10 @@ static int batadv_netlink_mesh_fill(struct sk_buff *msg,
 		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_NC */
 
+	if (nla_put_u32(msg, BATADV_ATTR_ORIG_INTERVAL,
+			atomic_read(&bat_priv->orig_interval)))
+		goto nla_put_failure;
+
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 
@@ -596,6 +601,18 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 #endif /* CONFIG_BATMAN_ADV_NC */
 
+	if (info->attrs[BATADV_ATTR_ORIG_INTERVAL]) {
+		u32 orig_interval;
+
+		attr = info->attrs[BATADV_ATTR_ORIG_INTERVAL];
+		orig_interval = nla_get_u32(attr);
+
+		orig_interval = min_t(u32, orig_interval, INT_MAX);
+		orig_interval = max_t(u32, orig_interval, 2 * BATADV_JITTER);
+
+		atomic_set(&bat_priv->orig_interval, orig_interval);
+	}
+
 	batadv_netlink_notify_mesh(bat_priv);
 
 	return 0;
-- 
cgit v1.2.3


From a108008290405545b43b9c7975344bc59af2341b Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:30:04 +0100
Subject: batman-adv: Add elp_interval hardif genl configuration

The ELP packets are transmitted every elp_interval milliseconds on an
slave/hard-interface. This value can be changed using the configuration
interface.

The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get
the configuration of this feature using the u32 BATADV_ATTR_ELP_INTERVAL
attribute.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  6 ++++++
 net/batman-adv/netlink.c        | 17 +++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 6bedd4889c37..f966e497361b 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -477,6 +477,12 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_ORIG_INTERVAL,
 
+	/**
+	 * @BATADV_ATTR_ELP_INTERVAL: defines the interval in milliseconds in
+	 *  which batman emits probing packets for neighbor sensing (ELP).
+	 */
+	BATADV_ATTR_ELP_INTERVAL,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index e25c139ba0e1..09187e6e92a3 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -159,6 +159,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_MULTICAST_FORCEFLOOD_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_NETWORK_CODING_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_ORIG_INTERVAL]		= { .type = NLA_U32 },
+	[BATADV_ATTR_ELP_INTERVAL]		= { .type = NLA_U32 },
 };
 
 /**
@@ -825,6 +826,12 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
 			goto nla_put_failure;
 	}
 
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+	if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
+			atomic_read(&hard_iface->bat_v.elp_interval)))
+		goto nla_put_failure;
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -910,6 +917,16 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb,
 	struct batadv_hard_iface *hard_iface = info->user_ptr[1];
 	struct batadv_priv *bat_priv = info->user_ptr[0];
 
+#ifdef CONFIG_BATMAN_ADV_BATMAN_V
+	struct nlattr *attr;
+
+	if (info->attrs[BATADV_ATTR_ELP_INTERVAL]) {
+		attr = info->attrs[BATADV_ATTR_ELP_INTERVAL];
+
+		atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr));
+	}
+#endif /* CONFIG_BATMAN_ADV_BATMAN_V */
+
 	batadv_netlink_notify_hardif(bat_priv, hard_iface);
 
 	return 0;
-- 
cgit v1.2.3


From 9a182242f17c06fad620663e6fdf992e97661e66 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Fri, 23 Nov 2018 13:31:23 +0100
Subject: batman-adv: Add throughput_override hardif genl configuration

The B.A.T.M.A.N. V implementation tries to estimate the link throughput of
an interface to an originator using different automatic methods. It is
still possible to overwrite it the link throughput for all reachable
originators via this interface.

The BATADV_CMD_SET_HARDIF/BATADV_CMD_GET_HARDIF commands allow to set/get
the configuration of this feature using the u32
BATADV_ATTR_THROUGHPUT_OVERRIDE attribute. The used unit is in 100 Kbit/s.
If the value is set to 0 then batman-adv will try to estimate the
throughput by itself.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 include/uapi/linux/batman_adv.h |  8 ++++++++
 net/batman-adv/netlink.c        | 12 ++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index f966e497361b..305bf316dd03 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -483,6 +483,14 @@ enum batadv_nl_attrs {
 	 */
 	BATADV_ATTR_ELP_INTERVAL,
 
+	/**
+	 * @BATADV_ATTR_THROUGHPUT_OVERRIDE: defines the throughput value to be
+	 *  used by B.A.T.M.A.N. V when estimating the link throughput using
+	 *  this interface. If the value is set to 0 then batman-adv will try to
+	 *  estimate the throughput by itself.
+	 */
+	BATADV_ATTR_THROUGHPUT_OVERRIDE,
+
 	/* add attributes above here, update the policy in netlink.c */
 
 	/**
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 09187e6e92a3..476b4c6017c9 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -160,6 +160,7 @@ static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_NETWORK_CODING_ENABLED]	= { .type = NLA_U8 },
 	[BATADV_ATTR_ORIG_INTERVAL]		= { .type = NLA_U32 },
 	[BATADV_ATTR_ELP_INTERVAL]		= { .type = NLA_U32 },
+	[BATADV_ATTR_THROUGHPUT_OVERRIDE]	= { .type = NLA_U32 },
 };
 
 /**
@@ -830,6 +831,10 @@ static int batadv_netlink_hardif_fill(struct sk_buff *msg,
 	if (nla_put_u32(msg, BATADV_ATTR_ELP_INTERVAL,
 			atomic_read(&hard_iface->bat_v.elp_interval)))
 		goto nla_put_failure;
+
+	if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT_OVERRIDE,
+			atomic_read(&hard_iface->bat_v.throughput_override)))
+		goto nla_put_failure;
 #endif /* CONFIG_BATMAN_ADV_BATMAN_V */
 
 	genlmsg_end(msg, hdr);
@@ -925,6 +930,13 @@ static int batadv_netlink_set_hardif(struct sk_buff *skb,
 
 		atomic_set(&hard_iface->bat_v.elp_interval, nla_get_u32(attr));
 	}
+
+	if (info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE]) {
+		attr = info->attrs[BATADV_ATTR_THROUGHPUT_OVERRIDE];
+
+		atomic_set(&hard_iface->bat_v.throughput_override,
+			   nla_get_u32(attr));
+	}
 #endif /* CONFIG_BATMAN_ADV_BATMAN_V */
 
 	batadv_netlink_notify_hardif(bat_priv, hard_iface);
-- 
cgit v1.2.3


From 7cf74d515b56de0433aebf0c80872d331dd183a2 Mon Sep 17 00:00:00 2001
From: Artur Rojek <contact@artur-rojek.eu>
Date: Mon, 4 Feb 2019 01:15:13 +0100
Subject: dt-bindings: iio/adc: Add bindings for Ingenic JZ47xx SoCs ADC.

Add device tree bindings for the ADC controller on JZ47xx SoCs,
used by the ingenic-adc driver.

Signed-off-by: Artur Rojek <contact@artur-rojek.eu>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/dt-bindings/iio/adc/ingenic,adc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 include/dt-bindings/iio/adc/ingenic,adc.h

(limited to 'include')

diff --git a/include/dt-bindings/iio/adc/ingenic,adc.h b/include/dt-bindings/iio/adc/ingenic,adc.h
new file mode 100644
index 000000000000..82706b2706ac
--- /dev/null
+++ b/include/dt-bindings/iio/adc/ingenic,adc.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _DT_BINDINGS_IIO_ADC_INGENIC_ADC_H
+#define _DT_BINDINGS_IIO_ADC_INGENIC_ADC_H
+
+/* ADC channel idx. */
+#define INGENIC_ADC_AUX		0
+#define INGENIC_ADC_BATTERY	1
+
+#endif
-- 
cgit v1.2.3


From 5efd1d94a5a748c492580b50b9bd3a7e42c31411 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:26:59 -0800
Subject: linux/rcu_node_tree: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX comment format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcu_node_tree.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
index 426cee67f0e2..b8e094b125ee 100644
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * RCU node combining tree definitions.  These are used to compute
  * global attributes while avoiding common-case global contention.  A key
@@ -11,23 +12,9 @@
  * because the size of the TREE SRCU srcu_struct structure depends
  * on these definitions.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2017
  *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ * Author: Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
 #ifndef __LINUX_RCU_NODE_TREE_H
-- 
cgit v1.2.3


From 73604da52167c17c4000a38f7f784f5a2edf0461 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:30:40 -0800
Subject: linux/rcupdate: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcupdate.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0e39e0d2629e..4c82279dd4b7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1,25 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Read-Copy Update mechanism for mutual exclusion
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2001
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
  *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * Based on the original work by Paul McKenney <paulmck@vnet.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
  * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
-- 
cgit v1.2.3


From 265b4d4dc16c2a04ca72386d17c93e5901f5212a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:31:34 -0800
Subject: linux/rcu_segcblist: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcu_segcblist.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index c3ad00e63556..87404cb015f1 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * RCU segmented callback lists
  *
@@ -5,23 +6,9 @@
  * because the size of the TREE SRCU srcu_struct structure depends
  * on these definitions.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2017
  *
- * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ * Authors: Paul E. McKenney <paulmck@linux.net.ibm.com>
  */
 
 #ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
-- 
cgit v1.2.3


From a66e0092fff1f1d4ac3e3de6090b3f15a5ca784a Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:32:48 -0800
Subject: linux/rcu_sync: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcu_sync.h | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index ece7ed9a4a70..6fc53a1345b3 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * RCU-based infrastructure for lightweight reader-writer locking
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright (c) 2015, Red Hat, Inc.
  *
  * Author: Oleg Nesterov <oleg@redhat.com>
-- 
cgit v1.2.3


From 6c4421273694bd2351e230f491c1033b118734fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:34:35 -0800
Subject: linux/rcutiny: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcutiny.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index af65d1f36ddb..8e727f57d814 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2008
  *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ * Author: Paul E. McKenney <paulmck@linux.ibm.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
  *		Documentation/RCU
-- 
cgit v1.2.3


From a9b7343ec1a2f061967e4a17eb9276d129b679f4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:36:27 -0800
Subject: linux/rcutree: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/rcutree.h | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 7f83179177d1..735601ac27d3 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -1,26 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Read-Copy Update mechanism for mutual exclusion (tree-based version)
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2008
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
- *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm
+ *	   Paul E. McKenney <paulmck@linux.ibm.com> Hierarchical algorithm
  *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
+ * Based on the original work by Paul McKenney <paulmck@linux.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
-- 
cgit v1.2.3


From 8c366db05b1f27fac01a7dbf9e4904d499bd5d55 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:39:22 -0800
Subject: linux/srcu: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update ,h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/srcu.h     | 17 ++---------------
 include/linux/srcutiny.h | 17 ++---------------
 include/linux/srcutree.h | 17 ++---------------
 3 files changed, 6 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c614375cd264..0d5fed02df16 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -1,24 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Sleepable Read-Copy Update mechanism for mutual exclusion
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright (C) IBM Corporation, 2006
  * Copyright (C) Fujitsu, 2012
  *
- * Author: Paul McKenney <paulmck@us.ibm.com>
+ * Author: Paul McKenney <paulmck@linux.ibm.com>
  *	   Lai Jiangshan <laijs@cn.fujitsu.com>
  *
  * For detailed explanation of Read-Copy Update mechanism see -
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index b19216aaaef2..5a5a1941ca15 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -1,24 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Sleepable Read-Copy Update mechanism for mutual exclusion,
  *	tiny variant.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright (C) IBM Corporation, 2017
  *
- * Author: Paul McKenney <paulmck@us.ibm.com>
+ * Author: Paul McKenney <paulmck@linux.ibm.com>
  */
 
 #ifndef _LINUX_SRCU_TINY_H
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 6f292bd3e7db..de7a42116b2e 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -1,24 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Sleepable Read-Copy Update mechanism for mutual exclusion,
  *	tree variant.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright (C) IBM Corporation, 2017
  *
- * Author: Paul McKenney <paulmck@us.ibm.com>
+ * Author: Paul McKenney <paulmck@linux.ibm.com>
  */
 
 #ifndef _LINUX_SRCU_TREE_H
-- 
cgit v1.2.3


From 082dfb3c93d6c0f85025638928c92933f62d234d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Thu, 17 Jan 2019 10:46:34 -0800
Subject: linux/torture: Convert to SPDX license identifier

Replace the license boiler plate with a SPDX license identifier.
While in the area, update an email address.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
[ paulmck: Update .h SPDX format per Joe Perches. ]
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/torture.h | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 48fad21109fc..e5167820108a 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Common functions for in-kernel torture tests.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, you can access it online at
- * http://www.gnu.org/licenses/gpl-2.0.html.
- *
  * Copyright IBM Corporation, 2014
  *
- * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ * Author: Paul E. McKenney <paulmck@linux.ibm.com>
  */
 
 #ifndef __LINUX_TORTURE_H
-- 
cgit v1.2.3


From efbdfdc29bdd4dbf79ad4bddc8f7a5ac62c66bfe Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 9 Feb 2019 15:24:47 +0100
Subject: net: phy: Add support for asking the PHY its abilities

Add support for runtime determination of what the PHY supports, by
adding a new function to the phy driver. The get_features call should
set the phydev->supported member with the features the PHY supports.
It is only called if phydrv->features is NULL.

This requires minor changes to pause. The PHY driver should not set
pause abilities, except for when it has odd cause capabilities, e.g.
pause cannot be disabled. With this change, phydev->supported already
contains the drivers abilities, including pause. So rather than
considering phydrv->features, look at the phydev->supported, and
enable pause if neither of the pause bits are already set.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[hkallweit1@gmail.com: fixed small checkpatch complaint in one comment]
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 31 +++++++++++++++----------------
 include/linux/phy.h          |  6 ++++++
 2 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 92b7a71df0ac..8573d17ece0f 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2236,7 +2236,14 @@ static int phy_probe(struct device *dev)
 	 * a controller will attach, and may modify one
 	 * or both of these values
 	 */
-	linkmode_copy(phydev->supported, phydrv->features);
+	if (phydrv->features) {
+		linkmode_copy(phydev->supported, phydrv->features);
+	} else {
+		err = phydrv->get_features(phydev);
+		if (err)
+			goto out;
+	}
+
 	of_set_phy_supported(phydev);
 	linkmode_copy(phydev->advertising, phydev->supported);
 
@@ -2256,20 +2263,8 @@ static int phy_probe(struct device *dev)
 	 * (e.g. hardware erratum) where the driver wants to set only one
 	 * of these bits.
 	 */
-	if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) ||
-	    test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) {
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-				   phydev->supported);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-				   phydev->supported);
-		if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features))
-			linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
-					 phydev->supported);
-		if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-			     phydrv->features))
-			linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
-					 phydev->supported);
-	} else {
+	if (!test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported) &&
+	    !test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported)) {
 		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT,
 				 phydev->supported);
 		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
@@ -2315,7 +2310,11 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
 {
 	int retval;
 
-	if (WARN_ON(!new_driver->features)) {
+	/* Either the features are hard coded, or dynamically
+	 * determine. It cannot be both or neither
+	 */
+	if (WARN_ON((!new_driver->features && !new_driver->get_features) ||
+		    (new_driver->features && new_driver->get_features))) {
 		pr_err("%s: Driver features are missing\n", new_driver->name);
 		return -EINVAL;
 	}
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f41bf651f6a0..d2ffae992e4a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -502,6 +502,12 @@ struct phy_driver {
 	 */
 	int (*probe)(struct phy_device *phydev);
 
+	/*
+	 * Probe the hardware to determine what abilities it has.
+	 * Should only set phydev->supported.
+	 */
+	int (*get_features)(struct phy_device *phydev);
+
 	/* PHY Power Management */
 	int (*suspend)(struct phy_device *phydev);
 	int (*resume)(struct phy_device *phydev);
-- 
cgit v1.2.3


From d11a3998985b351aaab6bbdc23bc884bd5e815c8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 9 Feb 2019 15:40:24 -0700
Subject: block: kill QUEUE_FLAG_FLUSH_NQ

We have various helpers for setting/clearing this flag, and also
a helper to check if the queue supports queueable flushes or not.
But nobody uses them anymore, kill it with fire.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c        | 1 -
 block/blk-settings.c          | 9 ---------
 drivers/ata/libata-scsi.c     | 2 --
 drivers/block/null_blk_main.c | 1 -
 include/linux/blkdev.h        | 7 -------
 5 files changed, 20 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index f8120832ca7b..c782e81db627 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -132,7 +132,6 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(POLL),
 	QUEUE_FLAG_NAME(WC),
 	QUEUE_FLAG_NAME(FUA),
-	QUEUE_FLAG_NAME(FLUSH_NQ),
 	QUEUE_FLAG_NAME(DAX),
 	QUEUE_FLAG_NAME(STATS),
 	QUEUE_FLAG_NAME(POLL_STATS),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 3e7038e475ee..6375afaedcec 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -799,15 +799,6 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
-void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
-{
-	if (queueable)
-		blk_queue_flag_clear(QUEUE_FLAG_FLUSH_NQ, q);
-	else
-		blk_queue_flag_set(QUEUE_FLAG_FLUSH_NQ, q);
-}
-EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);
-
 /**
  * blk_set_queue_depth - tell the block layer about the device queue depth
  * @q:		the request queue for the device
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 3d4887d0e84a..dfe66d00dd5b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1318,8 +1318,6 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		scsi_change_queue_depth(sdev, depth);
 	}
 
-	blk_queue_flush_queueable(q, false);
-
 	if (dev->flags & ATA_DFLAG_TRUSTED)
 		sdev->security_supported = 1;
 
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 62c9654b9ce8..83c38a6217d7 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1678,7 +1678,6 @@ static int null_add_dev(struct nullb_device *dev)
 	if (dev->cache_size > 0) {
 		set_bit(NULLB_DEV_FL_CACHE, &nullb->dev->flags);
 		blk_queue_write_cache(nullb->q, true, true);
-		blk_queue_flush_queueable(nullb->q, true);
 	}
 
 	if (dev->zoned) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 338604dff7d0..24ccab51085f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -592,7 +592,6 @@ struct request_queue {
 #define QUEUE_FLAG_POLL	       19	/* IO polling enabled if set */
 #define QUEUE_FLAG_WC	       20	/* Write back caching */
 #define QUEUE_FLAG_FUA	       21	/* device supports FUA writes */
-#define QUEUE_FLAG_FLUSH_NQ    22	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         23	/* device supports DAX */
 #define QUEUE_FLAG_STATS       24	/* track IO start and completion times */
 #define QUEUE_FLAG_POLL_STATS  25	/* collecting stats for hybrid polling */
@@ -1069,7 +1068,6 @@ extern void blk_queue_virt_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
-extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 
 /*
@@ -1446,11 +1444,6 @@ static inline unsigned int block_size(struct block_device *bdev)
 	return bdev->bd_block_size;
 }
 
-static inline bool queue_flush_queueable(struct request_queue *q)
-{
-	return !test_bit(QUEUE_FLAG_FLUSH_NQ, &q->queue_flags);
-}
-
 typedef struct {struct page *v;} Sector;
 
 unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *);
-- 
cgit v1.2.3


From eca7abf31abba2acac445ec6a1d3f94cf0cab918 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 9 Feb 2019 15:42:07 -0700
Subject: block: queue flag cleanup

We have QUEUE_FLAG_DEFAULT defined, but it's not used anymore since
the legacy IO stack is gone. Kill it.

Sanitize the queue flags in general, they use spaces (for some
reason), and the space is pretty sparse. With the flags renumbered,
we can more clearly see how many we have available.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 58 +++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24ccab51085f..3603270cb82d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -572,37 +572,33 @@ struct request_queue {
 	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
 
-#define QUEUE_FLAG_STOPPED	1	/* queue is stopped */
-#define QUEUE_FLAG_DYING	2	/* queue being torn down */
-#define QUEUE_FLAG_BIDI		4	/* queue supports bidi requests */
-#define QUEUE_FLAG_NOMERGES     5	/* disable merge attempts */
-#define QUEUE_FLAG_SAME_COMP	6	/* complete on same CPU-group */
-#define QUEUE_FLAG_FAIL_IO	7	/* fake timeout */
-#define QUEUE_FLAG_NONROT	9	/* non-rotational device (SSD) */
-#define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     10	/* do disk/partitions IO accounting */
-#define QUEUE_FLAG_DISCARD     11	/* supports DISCARD */
-#define QUEUE_FLAG_NOXMERGES   12	/* No extended merges */
-#define QUEUE_FLAG_ADD_RANDOM  13	/* Contributes to random pool */
-#define QUEUE_FLAG_SECERASE    14	/* supports secure erase */
-#define QUEUE_FLAG_SAME_FORCE  15	/* force complete on same CPU */
-#define QUEUE_FLAG_DEAD        16	/* queue tear-down finished */
-#define QUEUE_FLAG_INIT_DONE   17	/* queue is initialized */
-#define QUEUE_FLAG_NO_SG_MERGE 18	/* don't attempt to merge SG segments*/
-#define QUEUE_FLAG_POLL	       19	/* IO polling enabled if set */
-#define QUEUE_FLAG_WC	       20	/* Write back caching */
-#define QUEUE_FLAG_FUA	       21	/* device supports FUA writes */
-#define QUEUE_FLAG_DAX         23	/* device supports DAX */
-#define QUEUE_FLAG_STATS       24	/* track IO start and completion times */
-#define QUEUE_FLAG_POLL_STATS  25	/* collecting stats for hybrid polling */
-#define QUEUE_FLAG_REGISTERED  26	/* queue has been registered to a disk */
-#define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
-#define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
-#define QUEUE_FLAG_PCI_P2PDMA  29	/* device supports PCI p2p requests */
-
-#define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
-				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
-				 (1 << QUEUE_FLAG_ADD_RANDOM))
+#define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
+#define QUEUE_FLAG_DYING	1	/* queue being torn down */
+#define QUEUE_FLAG_BIDI		2	/* queue supports bidi requests */
+#define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
+#define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
+#define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
+#define QUEUE_FLAG_NONROT	6	/* non-rotational device (SSD) */
+#define QUEUE_FLAG_VIRT		QUEUE_FLAG_NONROT /* paravirt device */
+#define QUEUE_FLAG_IO_STAT	7	/* do disk/partitions IO accounting */
+#define QUEUE_FLAG_DISCARD	8	/* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES	9	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM	10	/* Contributes to random pool */
+#define QUEUE_FLAG_SECERASE	11	/* supports secure erase */
+#define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
+#define QUEUE_FLAG_DEAD		13	/* queue tear-down finished */
+#define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
+#define QUEUE_FLAG_NO_SG_MERGE	15	/* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
+#define QUEUE_FLAG_WC		17	/* Write back caching */
+#define QUEUE_FLAG_FUA		18	/* device supports FUA writes */
+#define QUEUE_FLAG_DAX		19	/* device supports DAX */
+#define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
+#define QUEUE_FLAG_POLL_STATS	21	/* collecting stats for hybrid polling */
+#define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
+#define QUEUE_FLAG_SCSI_PASSTHROUGH 23	/* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
+#define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP))
-- 
cgit v1.2.3


From c65ea996595005be470fbfa16711deba414fd33b Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Tue, 23 Oct 2018 11:29:02 -0500
Subject: ipmi: Fix how the lower layers are told to watch for messages

The IPMI driver has a mechanism to tell the lower layers it needs
to watch for messages, commands, and watchdogs (so it doesn't
needlessly poll).  However, it needed some extensions, it needed
a way to tell what is being waited for so it could set the timeout
appropriately.

The update to the lower layer was also being done once a second
at best because it was done in the main timeout handler.  However,
if a command is sent and a response message is coming back,
it needed to be started immediately.  So modify the code to
update immediately if it needs to be enabled.  Disable is still
lazy.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Kamlakant Patel <kamlakant.patel@cavium.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 119 +++++++++++++++++++++++++-----------
 drivers/char/ipmi/ipmi_si_intf.c    |   5 +-
 drivers/char/ipmi/ipmi_ssif.c       |  26 +++++---
 include/linux/ipmi_smi.h            |  36 ++++++++---
 4 files changed, 134 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index c518659b4d9f..2e008efa735f 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -529,9 +529,22 @@ struct ipmi_smi {
 	unsigned int     waiting_events_count; /* How many events in queue? */
 	char             delivering_events;
 	char             event_msg_printed;
+
+	/* How many users are waiting for events? */
 	atomic_t         event_waiters;
 	unsigned int     ticks_to_req_ev;
-	int              last_needs_timer;
+
+	/* How many users are waiting for commands? */
+	atomic_t         command_waiters;
+
+	/* How many users are waiting for watchdogs? */
+	atomic_t         watchdog_waiters;
+
+	/*
+	 * Tells what the lower layer has last been asked to watch for,
+	 * messages and/or watchdogs.  Protected by xmit_msgs_lock.
+	 */
+	unsigned int     last_watch_mask;
 
 	/*
 	 * The event receiver for my BMC, only really used at panic
@@ -1078,6 +1091,29 @@ static int intf_err_seq(struct ipmi_smi *intf,
 	return rv;
 }
 
+/* Must be called with xmit_msgs_lock held. */
+static void smi_tell_to_watch(struct ipmi_smi *intf,
+			      unsigned int flags,
+			      struct ipmi_smi_msg *smi_msg)
+{
+	if (flags & IPMI_WATCH_MASK_CHECK_MESSAGES) {
+		if (!smi_msg)
+			return;
+
+		if (!smi_msg->needs_response)
+			return;
+	}
+
+	if (!intf->handlers->set_need_watch)
+		return;
+
+	if ((intf->last_watch_mask & flags) == flags)
+		return;
+
+	intf->last_watch_mask |= flags;
+	intf->handlers->set_need_watch(intf->send_info,
+				       intf->last_watch_mask);
+}
 
 int ipmi_create_user(unsigned int          if_num,
 		     const struct ipmi_user_hndl *handler,
@@ -1141,8 +1177,9 @@ int ipmi_create_user(unsigned int          if_num,
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
 	if (handler->ipmi_watchdog_pretimeout) {
 		/* User wants pretimeouts, so make sure to watch for them. */
-		if (atomic_inc_return(&intf->event_waiters) == 1)
-			need_waiter(intf);
+		if (atomic_inc_return(&intf->watchdog_waiters) == 1)
+			smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_WATCHDOG,
+					  NULL);
 	}
 	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 	*user = new_user;
@@ -1214,7 +1251,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
 		user->handler->shutdown(user->handler_data);
 
 	if (user->handler->ipmi_watchdog_pretimeout)
-		atomic_dec(&intf->event_waiters);
+		atomic_dec(&intf->watchdog_waiters);
 
 	if (user->gets_events)
 		atomic_dec(&intf->event_waiters);
@@ -1569,8 +1606,8 @@ int ipmi_register_for_cmd(struct ipmi_user *user,
 		goto out_unlock;
 	}
 
-	if (atomic_inc_return(&intf->event_waiters) == 1)
-		need_waiter(intf);
+	if (atomic_inc_return(&intf->command_waiters) == 1)
+		smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_COMMANDS, NULL);
 
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
@@ -1620,7 +1657,7 @@ int ipmi_unregister_for_cmd(struct ipmi_user *user,
 	synchronize_rcu();
 	release_ipmi_user(user, index);
 	while (rcvrs) {
-		atomic_dec(&intf->event_waiters);
+		atomic_dec(&intf->command_waiters);
 		rcvr = rcvrs;
 		rcvrs = rcvr->next;
 		kfree(rcvr);
@@ -1737,22 +1774,21 @@ static struct ipmi_smi_msg *smi_add_send_msg(struct ipmi_smi *intf,
 	return smi_msg;
 }
 
-
 static void smi_send(struct ipmi_smi *intf,
 		     const struct ipmi_smi_handlers *handlers,
 		     struct ipmi_smi_msg *smi_msg, int priority)
 {
 	int run_to_completion = intf->run_to_completion;
+	unsigned long flags = 0;
 
-	if (run_to_completion) {
-		smi_msg = smi_add_send_msg(intf, smi_msg, priority);
-	} else {
-		unsigned long flags;
-
+	if (!run_to_completion)
 		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
-		smi_msg = smi_add_send_msg(intf, smi_msg, priority);
+	smi_msg = smi_add_send_msg(intf, smi_msg, priority);
+
+	smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES, smi_msg);
+
+	if (!run_to_completion)
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
-	}
 
 	if (smi_msg)
 		handlers->sender(intf->send_info, smi_msg);
@@ -1950,6 +1986,9 @@ static int i_ipmi_req_ipmb(struct ipmi_smi        *intf,
 				ipmb_seq, broadcast,
 				source_address, source_lun);
 
+		/* We will be getting a response in the BMC message queue. */
+		smi_msg->needs_response = true;
+
 		/*
 		 * Copy the message into the recv message data, so we
 		 * can retransmit it later if necessary.
@@ -2137,6 +2176,7 @@ static int i_ipmi_request(struct ipmi_user     *user,
 			goto out;
 		}
 	}
+	smi_msg->needs_response = false;
 
 	rcu_read_lock();
 	if (intf->in_shutdown) {
@@ -3351,6 +3391,8 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 	INIT_LIST_HEAD(&intf->hp_xmit_msgs);
 	spin_lock_init(&intf->events_lock);
 	atomic_set(&intf->event_waiters, 0);
+	atomic_set(&intf->watchdog_waiters, 0);
+	atomic_set(&intf->command_waiters, 0);
 	intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 	INIT_LIST_HEAD(&intf->waiting_events);
 	intf->waiting_events_count = 0;
@@ -4365,6 +4407,9 @@ static void smi_recv_tasklet(unsigned long val)
 			intf->curr_msg = newmsg;
 		}
 	}
+
+	smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES, newmsg);
+
 	if (!run_to_completion)
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 	if (newmsg)
@@ -4492,7 +4537,7 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 			      struct list_head *timeouts,
 			      unsigned long timeout_period,
 			      int slot, unsigned long *flags,
-			      unsigned int *waiting_msgs)
+			      unsigned int *watch_mask)
 {
 	struct ipmi_recv_msg *msg;
 
@@ -4504,7 +4549,7 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 
 	if (timeout_period < ent->timeout) {
 		ent->timeout -= timeout_period;
-		(*waiting_msgs)++;
+		*watch_mask |= IPMI_WATCH_MASK_CHECK_MESSAGES;
 		return;
 	}
 
@@ -4523,7 +4568,7 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 		struct ipmi_smi_msg *smi_msg;
 		/* More retries, send again. */
 
-		(*waiting_msgs)++;
+		*watch_mask |= IPMI_WATCH_MASK_CHECK_MESSAGES;
 
 		/*
 		 * Start with the max timer, set to normal timer after
@@ -4575,13 +4620,13 @@ static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 	struct ipmi_recv_msg *msg, *msg2;
 	unsigned long        flags;
 	int                  i;
-	unsigned int         waiting_msgs = 0;
+	unsigned int         watch_mask = 0;
 
 	if (!intf->bmc_registered) {
 		kref_get(&intf->refcount);
 		if (!schedule_work(&intf->bmc_reg_work)) {
 			kref_put(&intf->refcount, intf_free);
-			waiting_msgs++;
+			watch_mask |= IPMI_WATCH_MASK_INTERNAL;
 		}
 	}
 
@@ -4601,7 +4646,7 @@ static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
 		check_msg_timeout(intf, &intf->seq_table[i],
 				  &timeouts, timeout_period, i,
-				  &flags, &waiting_msgs);
+				  &flags, &watch_mask);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	list_for_each_entry_safe(msg, msg2, &timeouts, link)
@@ -4632,7 +4677,7 @@ static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 
 	tasklet_schedule(&intf->recv_tasklet);
 
-	return waiting_msgs;
+	return watch_mask;
 }
 
 static void ipmi_request_event(struct ipmi_smi *intf)
@@ -4652,37 +4697,43 @@ static atomic_t stop_operation;
 static void ipmi_timeout(struct timer_list *unused)
 {
 	struct ipmi_smi *intf;
-	int nt = 0, index;
+	unsigned int watch_mask = 0;
+	int index;
+	unsigned long flags;
 
 	if (atomic_read(&stop_operation))
 		return;
 
 	index = srcu_read_lock(&ipmi_interfaces_srcu);
 	list_for_each_entry_rcu(intf, &ipmi_interfaces, link) {
-		int lnt = 0;
-
 		if (atomic_read(&intf->event_waiters)) {
 			intf->ticks_to_req_ev--;
 			if (intf->ticks_to_req_ev == 0) {
 				ipmi_request_event(intf);
 				intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 			}
-			lnt++;
+			watch_mask |= IPMI_WATCH_MASK_INTERNAL;
 		}
 
-		lnt += ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
+		if (atomic_read(&intf->watchdog_waiters))
+			watch_mask |= IPMI_WATCH_MASK_CHECK_WATCHDOG;
 
-		lnt = !!lnt;
-		if (lnt != intf->last_needs_timer &&
-					intf->handlers->set_need_watch)
-			intf->handlers->set_need_watch(intf->send_info, lnt);
-		intf->last_needs_timer = lnt;
+		if (atomic_read(&intf->command_waiters))
+			watch_mask |= IPMI_WATCH_MASK_CHECK_COMMANDS;
+
+		watch_mask |= ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
 
-		nt += lnt;
+		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
+		if (watch_mask != intf->last_watch_mask &&
+					intf->handlers->set_need_watch)
+			intf->handlers->set_need_watch(intf->send_info,
+						       watch_mask);
+		intf->last_watch_mask = watch_mask;
+		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 	}
 	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 
-	if (nt)
+	if (watch_mask)
 		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 }
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index f1b9fda6b9df..c81c84a723b6 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1060,10 +1060,13 @@ static void request_events(void *send_info)
 	atomic_set(&smi_info->req_events, 1);
 }
 
-static void set_need_watch(void *send_info, bool enable)
+static void set_need_watch(void *send_info, unsigned int watch_mask)
 {
 	struct smi_info *smi_info = send_info;
 	unsigned long flags;
+	int enable;
+
+	enable = !!(watch_mask & ~IPMI_WATCH_MASK_INTERNAL);
 
 	atomic_set(&smi_info->need_watch, enable);
 	spin_lock_irqsave(&smi_info->si_lock, flags);
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index 1aacc1144d2a..a1219af32105 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -93,8 +93,8 @@
 /*
  * Timeout for the watch, only used for get flag timer.
  */
-#define SSIF_WATCH_TIMEOUT_MSEC	   100
-#define SSIF_WATCH_TIMEOUT_JIFFIES msecs_to_jiffies(SSIF_WATCH_TIMEOUT_MSEC)
+#define SSIF_WATCH_MSG_TIMEOUT		msecs_to_jiffies(10)
+#define SSIF_WATCH_WATCHDOG_TIMEOUT	msecs_to_jiffies(250)
 
 enum ssif_intf_state {
 	SSIF_NORMAL,
@@ -276,7 +276,7 @@ struct ssif_info {
 	struct timer_list retry_timer;
 	int retries_left;
 
-	bool need_watch;		/* Need to look for flags? */
+	long watch_timeout;		/* Timeout for flags check, 0 if off. */
 	struct timer_list watch_timer;	/* Flag fetch timer. */
 
 	/* Info from SSIF cmd */
@@ -578,9 +578,9 @@ static void watch_timeout(struct timer_list *t)
 		return;
 
 	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
-	if (ssif_info->need_watch) {
+	if (ssif_info->watch_timeout) {
 		mod_timer(&ssif_info->watch_timer,
-			  jiffies + SSIF_WATCH_TIMEOUT_JIFFIES);
+			  jiffies + ssif_info->watch_timeout);
 		if (SSIF_IDLE(ssif_info)) {
 			start_flag_fetch(ssif_info, flags); /* Releases lock */
 			return;
@@ -1121,17 +1121,23 @@ static void request_events(void *send_info)
  * Upper layer is changing the flag saying whether we need to request
  * flags periodically or not.
  */
-static void ssif_set_need_watch(void *send_info, bool enable)
+static void ssif_set_need_watch(void *send_info, unsigned int watch_mask)
 {
 	struct ssif_info *ssif_info = (struct ssif_info *) send_info;
 	unsigned long oflags, *flags;
+	long timeout = 0;
+
+	if (watch_mask & IPMI_WATCH_MASK_CHECK_MESSAGES)
+		timeout = SSIF_WATCH_MSG_TIMEOUT;
+	else if (watch_mask & ~IPMI_WATCH_MASK_INTERNAL)
+		timeout = SSIF_WATCH_WATCHDOG_TIMEOUT;
 
 	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
-	if (enable != ssif_info->need_watch) {
-		ssif_info->need_watch = enable;
-		if (ssif_info->need_watch)
+	if (timeout != ssif_info->watch_timeout) {
+		ssif_info->watch_timeout = timeout;
+		if (ssif_info->watch_timeout)
 			mod_timer(&ssif_info->watch_timer,
-				  jiffies + SSIF_WATCH_TIMEOUT_JIFFIES);
+				  jiffies + ssif_info->watch_timeout);
 	}
 	ipmi_ssif_unlock_cond(ssif_info, flags);
 }
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 8c4e2ab696c3..da6abb06a5dc 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -30,6 +30,17 @@ struct device;
 /* Structure for the low-level drivers. */
 struct ipmi_smi;
 
+/*
+ * Flags for set_check_watch() below.  Tells if the SMI should be
+ * waiting for watchdog timeouts, commands and/or messages.  There is
+ * also an internal flag for the message handler, SMIs should ignore
+ * it.
+ */
+#define IPMI_WATCH_MASK_INTERNAL	(1 << 0)
+#define IPMI_WATCH_MASK_CHECK_MESSAGES	(1 << 1)
+#define IPMI_WATCH_MASK_CHECK_WATCHDOG	(1 << 2)
+#define IPMI_WATCH_MASK_CHECK_COMMANDS	(1 << 3)
+
 /*
  * Messages to/from the lower layer.  The smi interface will take one
  * of these to send. After the send has occurred and a response has
@@ -55,8 +66,16 @@ struct ipmi_smi_msg {
 	int           rsp_size;
 	unsigned char rsp[IPMI_MAX_MSG_LENGTH];
 
-	/* Will be called when the system is done with the message
-	   (presumably to free it). */
+	/*
+	 * There should be a response message coming back in the BMC
+	 * message queue.
+	 */
+	bool needs_response;
+
+	/*
+	 * Will be called when the system is done with the message
+	 * (presumably to free it).
+	 */
 	void (*done)(struct ipmi_smi_msg *msg);
 };
 
@@ -105,12 +124,15 @@ struct ipmi_smi_handlers {
 
 	/*
 	 * Called by the upper layer when some user requires that the
-	 * interface watch for events, received messages, watchdog
-	 * pretimeouts, or not.  Used by the SMI to know if it should
-	 * watch for these.  This may be NULL if the SMI does not
-	 * implement it.
+	 * interface watch for received messages and watchdog
+	 * pretimeouts (basically do a "Get Flags", or not.  Used by
+	 * the SMI to know if it should watch for these.  This may be
+	 * NULL if the SMI does not implement it.  watch_mask is from
+	 * IPMI_WATCH_MASK_xxx above.  The interface should run slower
+	 * timeouts for just watchdog checking or faster timeouts when
+	 * waiting for the message queue.
 	 */
-	void (*set_need_watch)(void *send_info, bool enable);
+	void (*set_need_watch)(void *send_info, unsigned int watch_mask);
 
 	/*
 	 * Called when flushing all pending messages.
-- 
cgit v1.2.3


From e1891cffd4c4896a899337a243273f0e23c028df Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 24 Oct 2018 15:17:04 -0500
Subject: ipmi: Make the smi watcher be disabled immediately when not needed

The code to tell the lower layer to enable or disable watching for
certain things was lazy in disabling, it waited until a timer tick
to see if a disable was necessary.  Not a really big deal, but it
could be improved.

Modify the code to enable and disable watching immediately and don't
do it from the background timer any more.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Tested-by: Kamlakant Patel <kamlakant.patel@cavium.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 164 ++++++++++++++++++++----------------
 drivers/char/ipmi/ipmi_si_intf.c    |   2 +-
 drivers/char/ipmi/ipmi_ssif.c       |   2 +-
 include/linux/ipmi_smi.h            |  17 +---
 4 files changed, 96 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 2e008efa735f..5bc84fbbeee5 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -534,15 +534,20 @@ struct ipmi_smi {
 	atomic_t         event_waiters;
 	unsigned int     ticks_to_req_ev;
 
+	spinlock_t       watch_lock; /* For dealing with watch stuff below. */
+
 	/* How many users are waiting for commands? */
-	atomic_t         command_waiters;
+	unsigned int     command_waiters;
 
 	/* How many users are waiting for watchdogs? */
-	atomic_t         watchdog_waiters;
+	unsigned int     watchdog_waiters;
+
+	/* How many users are waiting for message responses? */
+	unsigned int     response_waiters;
 
 	/*
 	 * Tells what the lower layer has last been asked to watch for,
-	 * messages and/or watchdogs.  Protected by xmit_msgs_lock.
+	 * messages and/or watchdogs.  Protected by watch_lock.
 	 */
 	unsigned int     last_watch_mask;
 
@@ -938,6 +943,64 @@ static void deliver_err_response(struct ipmi_smi *intf,
 	deliver_local_response(intf, msg);
 }
 
+static void smi_add_watch(struct ipmi_smi *intf, unsigned int flags)
+{
+	unsigned long iflags;
+
+	if (!intf->handlers->set_need_watch)
+		return;
+
+	spin_lock_irqsave(&intf->watch_lock, iflags);
+	if (flags & IPMI_WATCH_MASK_CHECK_MESSAGES)
+		intf->response_waiters++;
+
+	if (flags & IPMI_WATCH_MASK_CHECK_WATCHDOG)
+		intf->watchdog_waiters++;
+
+	if (flags & IPMI_WATCH_MASK_CHECK_COMMANDS)
+		intf->command_waiters++;
+
+	if ((intf->last_watch_mask & flags) != flags) {
+		intf->last_watch_mask |= flags;
+		intf->handlers->set_need_watch(intf->send_info,
+					       intf->last_watch_mask);
+	}
+	spin_unlock_irqrestore(&intf->watch_lock, iflags);
+}
+
+static void smi_remove_watch(struct ipmi_smi *intf, unsigned int flags)
+{
+	unsigned long iflags;
+
+	if (!intf->handlers->set_need_watch)
+		return;
+
+	spin_lock_irqsave(&intf->watch_lock, iflags);
+	if (flags & IPMI_WATCH_MASK_CHECK_MESSAGES)
+		intf->response_waiters--;
+
+	if (flags & IPMI_WATCH_MASK_CHECK_WATCHDOG)
+		intf->watchdog_waiters--;
+
+	if (flags & IPMI_WATCH_MASK_CHECK_COMMANDS)
+		intf->command_waiters--;
+
+	flags = 0;
+	if (intf->response_waiters)
+		flags |= IPMI_WATCH_MASK_CHECK_MESSAGES;
+	if (intf->watchdog_waiters)
+		flags |= IPMI_WATCH_MASK_CHECK_WATCHDOG;
+	if (intf->command_waiters)
+		flags |= IPMI_WATCH_MASK_CHECK_COMMANDS;
+
+	if (intf->last_watch_mask != flags) {
+		intf->last_watch_mask = flags;
+		intf->handlers->set_need_watch(intf->send_info,
+					       intf->last_watch_mask);
+	}
+	spin_unlock_irqrestore(&intf->watch_lock, iflags);
+}
+
 /*
  * Find the next sequence number not being used and add the given
  * message with the given timeout to the sequence table.  This must be
@@ -981,6 +1044,7 @@ static int intf_next_seq(struct ipmi_smi      *intf,
 		*seq = i;
 		*seqid = intf->seq_table[i].seqid;
 		intf->curr_seq = (i+1)%IPMI_IPMB_NUM_SEQ;
+		smi_add_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES);
 		need_waiter(intf);
 	} else {
 		rv = -EAGAIN;
@@ -1019,6 +1083,7 @@ static int intf_find_seq(struct ipmi_smi      *intf,
 				&& (ipmi_addr_equal(addr, &msg->addr))) {
 			*recv_msg = msg;
 			intf->seq_table[seq].inuse = 0;
+			smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES);
 			rv = 0;
 		}
 	}
@@ -1080,6 +1145,7 @@ static int intf_err_seq(struct ipmi_smi *intf,
 		struct seq_table *ent = &intf->seq_table[seq];
 
 		ent->inuse = 0;
+		smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES);
 		msg = ent->recv_msg;
 		rv = 0;
 	}
@@ -1091,30 +1157,6 @@ static int intf_err_seq(struct ipmi_smi *intf,
 	return rv;
 }
 
-/* Must be called with xmit_msgs_lock held. */
-static void smi_tell_to_watch(struct ipmi_smi *intf,
-			      unsigned int flags,
-			      struct ipmi_smi_msg *smi_msg)
-{
-	if (flags & IPMI_WATCH_MASK_CHECK_MESSAGES) {
-		if (!smi_msg)
-			return;
-
-		if (!smi_msg->needs_response)
-			return;
-	}
-
-	if (!intf->handlers->set_need_watch)
-		return;
-
-	if ((intf->last_watch_mask & flags) == flags)
-		return;
-
-	intf->last_watch_mask |= flags;
-	intf->handlers->set_need_watch(intf->send_info,
-				       intf->last_watch_mask);
-}
-
 int ipmi_create_user(unsigned int          if_num,
 		     const struct ipmi_user_hndl *handler,
 		     void                  *handler_data,
@@ -1175,12 +1217,9 @@ int ipmi_create_user(unsigned int          if_num,
 	spin_lock_irqsave(&intf->seq_lock, flags);
 	list_add_rcu(&new_user->link, &intf->users);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
-	if (handler->ipmi_watchdog_pretimeout) {
+	if (handler->ipmi_watchdog_pretimeout)
 		/* User wants pretimeouts, so make sure to watch for them. */
-		if (atomic_inc_return(&intf->watchdog_waiters) == 1)
-			smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_WATCHDOG,
-					  NULL);
-	}
+		smi_add_watch(intf, IPMI_WATCH_MASK_CHECK_WATCHDOG);
 	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 	*user = new_user;
 	return 0;
@@ -1251,7 +1290,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
 		user->handler->shutdown(user->handler_data);
 
 	if (user->handler->ipmi_watchdog_pretimeout)
-		atomic_dec(&intf->watchdog_waiters);
+		smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_WATCHDOG);
 
 	if (user->gets_events)
 		atomic_dec(&intf->event_waiters);
@@ -1264,6 +1303,7 @@ static void _ipmi_destroy_user(struct ipmi_user *user)
 		if (intf->seq_table[i].inuse
 		    && (intf->seq_table[i].recv_msg->user == user)) {
 			intf->seq_table[i].inuse = 0;
+			smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES);
 			ipmi_free_recv_msg(intf->seq_table[i].recv_msg);
 		}
 	}
@@ -1606,8 +1646,7 @@ int ipmi_register_for_cmd(struct ipmi_user *user,
 		goto out_unlock;
 	}
 
-	if (atomic_inc_return(&intf->command_waiters) == 1)
-		smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_COMMANDS, NULL);
+	smi_add_watch(intf, IPMI_WATCH_MASK_CHECK_COMMANDS);
 
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
@@ -1657,7 +1696,7 @@ int ipmi_unregister_for_cmd(struct ipmi_user *user,
 	synchronize_rcu();
 	release_ipmi_user(user, index);
 	while (rcvrs) {
-		atomic_dec(&intf->command_waiters);
+		smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_COMMANDS);
 		rcvr = rcvrs;
 		rcvrs = rcvr->next;
 		kfree(rcvr);
@@ -1785,8 +1824,6 @@ static void smi_send(struct ipmi_smi *intf,
 		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
 	smi_msg = smi_add_send_msg(intf, smi_msg, priority);
 
-	smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES, smi_msg);
-
 	if (!run_to_completion)
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 
@@ -1986,9 +2023,6 @@ static int i_ipmi_req_ipmb(struct ipmi_smi        *intf,
 				ipmb_seq, broadcast,
 				source_address, source_lun);
 
-		/* We will be getting a response in the BMC message queue. */
-		smi_msg->needs_response = true;
-
 		/*
 		 * Copy the message into the recv message data, so we
 		 * can retransmit it later if necessary.
@@ -2176,7 +2210,6 @@ static int i_ipmi_request(struct ipmi_user     *user,
 			goto out;
 		}
 	}
-	smi_msg->needs_response = false;
 
 	rcu_read_lock();
 	if (intf->in_shutdown) {
@@ -3390,9 +3423,8 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers,
 	INIT_LIST_HEAD(&intf->xmit_msgs);
 	INIT_LIST_HEAD(&intf->hp_xmit_msgs);
 	spin_lock_init(&intf->events_lock);
+	spin_lock_init(&intf->watch_lock);
 	atomic_set(&intf->event_waiters, 0);
-	atomic_set(&intf->watchdog_waiters, 0);
-	atomic_set(&intf->command_waiters, 0);
 	intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 	INIT_LIST_HEAD(&intf->waiting_events);
 	intf->waiting_events_count = 0;
@@ -4408,8 +4440,6 @@ static void smi_recv_tasklet(unsigned long val)
 		}
 	}
 
-	smi_tell_to_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES, newmsg);
-
 	if (!run_to_completion)
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 	if (newmsg)
@@ -4537,7 +4567,7 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 			      struct list_head *timeouts,
 			      unsigned long timeout_period,
 			      int slot, unsigned long *flags,
-			      unsigned int *watch_mask)
+			      bool *need_timer)
 {
 	struct ipmi_recv_msg *msg;
 
@@ -4549,13 +4579,14 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 
 	if (timeout_period < ent->timeout) {
 		ent->timeout -= timeout_period;
-		*watch_mask |= IPMI_WATCH_MASK_CHECK_MESSAGES;
+		*need_timer = true;
 		return;
 	}
 
 	if (ent->retries_left == 0) {
 		/* The message has used all its retries. */
 		ent->inuse = 0;
+		smi_remove_watch(intf, IPMI_WATCH_MASK_CHECK_MESSAGES);
 		msg = ent->recv_msg;
 		list_add_tail(&msg->link, timeouts);
 		if (ent->broadcast)
@@ -4568,7 +4599,7 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 		struct ipmi_smi_msg *smi_msg;
 		/* More retries, send again. */
 
-		*watch_mask |= IPMI_WATCH_MASK_CHECK_MESSAGES;
+		*need_timer = true;
 
 		/*
 		 * Start with the max timer, set to normal timer after
@@ -4613,20 +4644,20 @@ static void check_msg_timeout(struct ipmi_smi *intf, struct seq_table *ent,
 	}
 }
 
-static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
-					 unsigned long timeout_period)
+static bool ipmi_timeout_handler(struct ipmi_smi *intf,
+				 unsigned long timeout_period)
 {
 	struct list_head     timeouts;
 	struct ipmi_recv_msg *msg, *msg2;
 	unsigned long        flags;
 	int                  i;
-	unsigned int         watch_mask = 0;
+	bool                 need_timer = false;
 
 	if (!intf->bmc_registered) {
 		kref_get(&intf->refcount);
 		if (!schedule_work(&intf->bmc_reg_work)) {
 			kref_put(&intf->refcount, intf_free);
-			watch_mask |= IPMI_WATCH_MASK_INTERNAL;
+			need_timer = true;
 		}
 	}
 
@@ -4646,7 +4677,7 @@ static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++)
 		check_msg_timeout(intf, &intf->seq_table[i],
 				  &timeouts, timeout_period, i,
-				  &flags, &watch_mask);
+				  &flags, &need_timer);
 	spin_unlock_irqrestore(&intf->seq_lock, flags);
 
 	list_for_each_entry_safe(msg, msg2, &timeouts, link)
@@ -4677,7 +4708,7 @@ static unsigned int ipmi_timeout_handler(struct ipmi_smi *intf,
 
 	tasklet_schedule(&intf->recv_tasklet);
 
-	return watch_mask;
+	return need_timer;
 }
 
 static void ipmi_request_event(struct ipmi_smi *intf)
@@ -4697,9 +4728,8 @@ static atomic_t stop_operation;
 static void ipmi_timeout(struct timer_list *unused)
 {
 	struct ipmi_smi *intf;
-	unsigned int watch_mask = 0;
+	bool need_timer = false;
 	int index;
-	unsigned long flags;
 
 	if (atomic_read(&stop_operation))
 		return;
@@ -4712,28 +4742,14 @@ static void ipmi_timeout(struct timer_list *unused)
 				ipmi_request_event(intf);
 				intf->ticks_to_req_ev = IPMI_REQUEST_EV_TIME;
 			}
-			watch_mask |= IPMI_WATCH_MASK_INTERNAL;
+			need_timer = true;
 		}
 
-		if (atomic_read(&intf->watchdog_waiters))
-			watch_mask |= IPMI_WATCH_MASK_CHECK_WATCHDOG;
-
-		if (atomic_read(&intf->command_waiters))
-			watch_mask |= IPMI_WATCH_MASK_CHECK_COMMANDS;
-
-		watch_mask |= ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
-
-		spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
-		if (watch_mask != intf->last_watch_mask &&
-					intf->handlers->set_need_watch)
-			intf->handlers->set_need_watch(intf->send_info,
-						       watch_mask);
-		intf->last_watch_mask = watch_mask;
-		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
+		need_timer |= ipmi_timeout_handler(intf, IPMI_TIMEOUT_TIME);
 	}
 	srcu_read_unlock(&ipmi_interfaces_srcu, index);
 
-	if (watch_mask)
+	if (need_timer)
 		mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
 }
 
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index c81c84a723b6..ae99d6a14789 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1066,7 +1066,7 @@ static void set_need_watch(void *send_info, unsigned int watch_mask)
 	unsigned long flags;
 	int enable;
 
-	enable = !!(watch_mask & ~IPMI_WATCH_MASK_INTERNAL);
+	enable = !!watch_mask;
 
 	atomic_set(&smi_info->need_watch, enable);
 	spin_lock_irqsave(&smi_info->si_lock, flags);
diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index a1219af32105..e4abaa8e22bc 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c
@@ -1129,7 +1129,7 @@ static void ssif_set_need_watch(void *send_info, unsigned int watch_mask)
 
 	if (watch_mask & IPMI_WATCH_MASK_CHECK_MESSAGES)
 		timeout = SSIF_WATCH_MSG_TIMEOUT;
-	else if (watch_mask & ~IPMI_WATCH_MASK_INTERNAL)
+	else if (watch_mask)
 		timeout = SSIF_WATCH_WATCHDOG_TIMEOUT;
 
 	flags = ipmi_ssif_lock_cond(ssif_info, &oflags);
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index da6abb06a5dc..4dc66157d872 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -32,14 +32,11 @@ struct ipmi_smi;
 
 /*
  * Flags for set_check_watch() below.  Tells if the SMI should be
- * waiting for watchdog timeouts, commands and/or messages.  There is
- * also an internal flag for the message handler, SMIs should ignore
- * it.
+ * waiting for watchdog timeouts, commands and/or messages.
  */
-#define IPMI_WATCH_MASK_INTERNAL	(1 << 0)
-#define IPMI_WATCH_MASK_CHECK_MESSAGES	(1 << 1)
-#define IPMI_WATCH_MASK_CHECK_WATCHDOG	(1 << 2)
-#define IPMI_WATCH_MASK_CHECK_COMMANDS	(1 << 3)
+#define IPMI_WATCH_MASK_CHECK_MESSAGES	(1 << 0)
+#define IPMI_WATCH_MASK_CHECK_WATCHDOG	(1 << 1)
+#define IPMI_WATCH_MASK_CHECK_COMMANDS	(1 << 2)
 
 /*
  * Messages to/from the lower layer.  The smi interface will take one
@@ -66,12 +63,6 @@ struct ipmi_smi_msg {
 	int           rsp_size;
 	unsigned char rsp[IPMI_MAX_MSG_LENGTH];
 
-	/*
-	 * There should be a response message coming back in the BMC
-	 * message queue.
-	 */
-	bool needs_response;
-
 	/*
 	 * Will be called when the system is done with the message
 	 * (presumably to free it).
-- 
cgit v1.2.3


From 257eeded20b34219d5484cfc415b3e39093f37b8 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 10 Feb 2019 14:24:59 +0200
Subject: net: Move all TC actions identifiers to one place

Move all the TC identifiers to one place, to the same enum that defines
the identifier of police action. This makes it easier choose numbers for
new actions since they are now defined in one place. We preserve the
original values for binary compatibility. New IDs should be added inside
the enum.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h              | 43 ++++++++++++++++++++++++++++---
 include/uapi/linux/tc_act/tc_bpf.h        |  2 --
 include/uapi/linux/tc_act/tc_connmark.h   |  2 --
 include/uapi/linux/tc_act/tc_csum.h       |  2 --
 include/uapi/linux/tc_act/tc_gact.h       |  1 -
 include/uapi/linux/tc_act/tc_ife.h        |  1 -
 include/uapi/linux/tc_act/tc_ipt.h        |  3 ---
 include/uapi/linux/tc_act/tc_mirred.h     |  1 -
 include/uapi/linux/tc_act/tc_nat.h        |  2 --
 include/uapi/linux/tc_act/tc_pedit.h      |  2 --
 include/uapi/linux/tc_act/tc_sample.h     |  2 --
 include/uapi/linux/tc_act/tc_skbedit.h    |  2 --
 include/uapi/linux/tc_act/tc_skbmod.h     |  2 --
 include/uapi/linux/tc_act/tc_tunnel_key.h |  2 --
 include/uapi/linux/tc_act/tc_vlan.h       |  2 --
 net/sched/act_simple.c                    |  2 --
 tools/include/uapi/linux/tc_act/tc_bpf.h  |  2 --
 17 files changed, 40 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 02ac251be8c4..7ab55f97e7c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -63,12 +63,49 @@ enum {
 #define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
 #define TC_ACT_EXT_OPCODE_MAX	TC_ACT_GOTO_CHAIN
 
+/* These macros are put here for binary compatibility with userspace apps that
+ * make use of them. For kernel code and new userspace apps, use the TCA_ID_*
+ * versions.
+ */
+#define TCA_ACT_GACT 5
+#define TCA_ACT_IPT 6
+#define TCA_ACT_PEDIT 7
+#define TCA_ACT_MIRRED 8
+#define TCA_ACT_NAT 9
+#define TCA_ACT_XT 10
+#define TCA_ACT_SKBEDIT 11
+#define TCA_ACT_VLAN 12
+#define TCA_ACT_BPF 13
+#define TCA_ACT_CONNMARK 14
+#define TCA_ACT_SKBMOD 15
+#define TCA_ACT_CSUM 16
+#define TCA_ACT_TUNNEL_KEY 17
+#define TCA_ACT_SIMP 22
+#define TCA_ACT_IFE 25
+#define TCA_ACT_SAMPLE 26
+
 /* Action type identifiers*/
 enum {
-	TCA_ID_UNSPEC=0,
-	TCA_ID_POLICE=1,
+	TCA_ID_UNSPEC = 0,
+	TCA_ID_POLICE = 1,
+	TCA_ID_GACT = TCA_ACT_GACT,
+	TCA_ID_IPT = TCA_ACT_IPT,
+	TCA_ID_PEDIT = TCA_ACT_PEDIT,
+	TCA_ID_MIRRED = TCA_ACT_MIRRED,
+	TCA_ID_NAT = TCA_ACT_NAT,
+	TCA_ID_XT = TCA_ACT_XT,
+	TCA_ID_SKBEDIT = TCA_ACT_SKBEDIT,
+	TCA_ID_VLAN = TCA_ACT_VLAN,
+	TCA_ID_BPF = TCA_ACT_BPF,
+	TCA_ID_CONNMARK = TCA_ACT_CONNMARK,
+	TCA_ID_SKBMOD = TCA_ACT_SKBMOD,
+	TCA_ID_CSUM = TCA_ACT_CSUM,
+	TCA_ID_TUNNEL_KEY = TCA_ACT_TUNNEL_KEY,
+	TCA_ID_SIMP = TCA_ACT_SIMP,
+	TCA_ID_IFE = TCA_ACT_IFE,
+	TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
 	/* other actions go here */
-	__TCA_ID_MAX=255
+	__TCA_ID_MAX = 255
 };
 
 #define TCA_ID_MAX __TCA_ID_MAX
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 6e89a5df49a4..653c4f94f76e 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_BPF 13
-
 struct tc_act_bpf {
 	tc_gen;
 };
diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
index 80caa47b1933..9f8f6f709feb 100644
--- a/include/uapi/linux/tc_act/tc_connmark.h
+++ b/include/uapi/linux/tc_act/tc_connmark.h
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_CONNMARK 14
-
 struct tc_connmark {
 	tc_gen;
 	__u16 zone;
diff --git a/include/uapi/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h
index 0ecf4d29e2f3..94b2044929de 100644
--- a/include/uapi/linux/tc_act/tc_csum.h
+++ b/include/uapi/linux/tc_act/tc_csum.h
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_CSUM 16
-
 enum {
 	TCA_CSUM_UNSPEC,
 	TCA_CSUM_PARMS,
diff --git a/include/uapi/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h
index 94273c3b81b0..37e5392e02c7 100644
--- a/include/uapi/linux/tc_act/tc_gact.h
+++ b/include/uapi/linux/tc_act/tc_gact.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_GACT 5
 struct tc_gact {
 	tc_gen;
 
diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h
index 2f48490ef386..8c401f185675 100644
--- a/include/uapi/linux/tc_act/tc_ife.h
+++ b/include/uapi/linux/tc_act/tc_ife.h
@@ -6,7 +6,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/ife.h>
 
-#define TCA_ACT_IFE 25
 /* Flag bits for now just encoding/decoding; mutually exclusive */
 #define IFE_ENCODE 1
 #define IFE_DECODE 0
diff --git a/include/uapi/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h
index b743c8bddd13..c48d7da6750d 100644
--- a/include/uapi/linux/tc_act/tc_ipt.h
+++ b/include/uapi/linux/tc_act/tc_ipt.h
@@ -4,9 +4,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_IPT 6
-#define TCA_ACT_XT 10
-
 enum {
 	TCA_IPT_UNSPEC,
 	TCA_IPT_TABLE,
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 5dd671cf5776..2500a0005d05 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -5,7 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_MIRRED 8
 #define TCA_EGRESS_REDIR 1  /* packet redirect to EGRESS*/
 #define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
 #define TCA_INGRESS_REDIR 3  /* packet redirect to INGRESS*/
diff --git a/include/uapi/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h
index 086be842587b..21399c2c6130 100644
--- a/include/uapi/linux/tc_act/tc_nat.h
+++ b/include/uapi/linux/tc_act/tc_nat.h
@@ -5,8 +5,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/types.h>
 
-#define TCA_ACT_NAT 9
-
 enum {
 	TCA_NAT_UNSPEC,
 	TCA_NAT_PARMS,
diff --git a/include/uapi/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h
index 24ec792dacc1..f3e61b04fa01 100644
--- a/include/uapi/linux/tc_act/tc_pedit.h
+++ b/include/uapi/linux/tc_act/tc_pedit.h
@@ -5,8 +5,6 @@
 #include <linux/types.h>
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_PEDIT 7
-
 enum {
 	TCA_PEDIT_UNSPEC,
 	TCA_PEDIT_TM,
diff --git a/include/uapi/linux/tc_act/tc_sample.h b/include/uapi/linux/tc_act/tc_sample.h
index bd7e9f03abd2..fee1bcc20793 100644
--- a/include/uapi/linux/tc_act/tc_sample.h
+++ b/include/uapi/linux/tc_act/tc_sample.h
@@ -6,8 +6,6 @@
 #include <linux/pkt_cls.h>
 #include <linux/if_ether.h>
 
-#define TCA_ACT_SAMPLE 26
-
 struct tc_sample {
 	tc_gen;
 };
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 6de6071ebed6..800e93377218 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -23,8 +23,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_SKBEDIT 11
-
 #define SKBEDIT_F_PRIORITY		0x1
 #define SKBEDIT_F_QUEUE_MAPPING		0x2
 #define SKBEDIT_F_MARK			0x4
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index 38c072f66f2f..c525b3503797 100644
--- a/include/uapi/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_SKBMOD 15
-
 #define SKBMOD_F_DMAC	0x1
 #define SKBMOD_F_SMAC	0x2
 #define SKBMOD_F_ETYPE	0x4
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index be384d63e1b5..41c8b462c177 100644
--- a/include/uapi/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -14,8 +14,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_TUNNEL_KEY 17
-
 #define TCA_TUNNEL_KEY_ACT_SET	    1
 #define TCA_TUNNEL_KEY_ACT_RELEASE  2
 
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 0d7b5fd6605b..168995b54a70 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_VLAN 12
-
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
 #define TCA_VLAN_ACT_MODIFY	3
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 902957beceb3..b2b16d440154 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -19,8 +19,6 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-#define TCA_ACT_SIMP 22
-
 #include <linux/tc_act/tc_defact.h>
 #include <net/tc_act/tc_defact.h>
 
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
index 6e89a5df49a4..653c4f94f76e 100644
--- a/tools/include/uapi/linux/tc_act/tc_bpf.h
+++ b/tools/include/uapi/linux/tc_act/tc_bpf.h
@@ -13,8 +13,6 @@
 
 #include <linux/pkt_cls.h>
 
-#define TCA_ACT_BPF 13
-
 struct tc_act_bpf {
 	tc_gen;
 };
-- 
cgit v1.2.3


From eddd2cf195d6fb5e4bbc91a0fe4be55110f559ab Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 10 Feb 2019 14:25:00 +0200
Subject: net: Change TCA_ACT_* to TCA_ID_* to match that of TCA_ID_POLICE

Modify the kernel users of the TCA_ACT_* macros to use TCA_ID_*. For
example, use TCA_ID_GACT instead of TCA_ACT_GACT. This will align with
TCA_ID_POLICE and also differentiates these identifier, used in struct
tc_action_ops type field, from other macros starting with TCA_ACT_.

To make things clearer, we name the enum defining the TCA_ID_*
identifiers and also change the "type" field of struct tc_action to
id.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h              | 2 +-
 include/net/tc_act/tc_csum.h       | 2 +-
 include/net/tc_act/tc_gact.h       | 2 +-
 include/net/tc_act/tc_mirred.h     | 4 ++--
 include/net/tc_act/tc_pedit.h      | 2 +-
 include/net/tc_act/tc_sample.h     | 2 +-
 include/net/tc_act/tc_skbedit.h    | 2 +-
 include/net/tc_act/tc_tunnel_key.h | 4 ++--
 include/net/tc_act/tc_vlan.h       | 2 +-
 include/uapi/linux/pkt_cls.h       | 2 +-
 net/sched/act_api.c                | 2 +-
 net/sched/act_bpf.c                | 2 +-
 net/sched/act_connmark.c           | 2 +-
 net/sched/act_csum.c               | 2 +-
 net/sched/act_gact.c               | 2 +-
 net/sched/act_ife.c                | 2 +-
 net/sched/act_ipt.c                | 4 ++--
 net/sched/act_mirred.c             | 2 +-
 net/sched/act_nat.c                | 2 +-
 net/sched/act_pedit.c              | 2 +-
 net/sched/act_police.c             | 2 +-
 net/sched/act_sample.c             | 2 +-
 net/sched/act_simple.c             | 2 +-
 net/sched/act_skbedit.c            | 2 +-
 net/sched/act_skbmod.c             | 2 +-
 net/sched/act_tunnel_key.c         | 2 +-
 net/sched/act_vlan.c               | 2 +-
 27 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index dbc795ec659e..c745e9ccfab2 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -80,7 +80,7 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
 struct tc_action_ops {
 	struct list_head head;
 	char    kind[IFNAMSIZ];
-	__u32   type; /* TBD to match kind */
+	enum tca_id  id; /* identifier should match kind */
 	size_t	size;
 	struct module		*owner;
 	int     (*act)(struct sk_buff *, const struct tc_action *,
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 32d2454c0479..68269e4581b7 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -21,7 +21,7 @@ struct tcf_csum {
 static inline bool is_tcf_csum(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_CSUM)
+	if (a->ops && a->ops->id == TCA_ID_CSUM)
 		return true;
 #endif
 	return false;
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index ef8dd0db70ce..ee8d005f56fc 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -22,7 +22,7 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_gact *gact;
 
-	if (a->ops && a->ops->type != TCA_ACT_GACT)
+	if (a->ops && a->ops->id != TCA_ID_GACT)
 		return false;
 
 	gact = to_gact(a);
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index a2e9cbca5c9e..c757585a05b0 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -17,7 +17,7 @@ struct tcf_mirred {
 static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+	if (a->ops && a->ops->id == TCA_ID_MIRRED)
 		return to_mirred(a)->tcfm_eaction == TCA_EGRESS_REDIR;
 #endif
 	return false;
@@ -26,7 +26,7 @@ static inline bool is_tcf_mirred_egress_redirect(const struct tc_action *a)
 static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_MIRRED)
+	if (a->ops && a->ops->id == TCA_ID_MIRRED)
 		return to_mirred(a)->tcfm_eaction == TCA_EGRESS_MIRROR;
 #endif
 	return false;
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h
index fac3ad4a86de..748cf87a4d7e 100644
--- a/include/net/tc_act/tc_pedit.h
+++ b/include/net/tc_act/tc_pedit.h
@@ -23,7 +23,7 @@ struct tcf_pedit {
 static inline bool is_tcf_pedit(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_PEDIT)
+	if (a->ops && a->ops->id == TCA_ID_PEDIT)
 		return true;
 #endif
 	return false;
diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 01dbfea32672..0a559d4b6f0f 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -20,7 +20,7 @@ struct tcf_sample {
 static inline bool is_tcf_sample(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	return a->ops && a->ops->type == TCA_ACT_SAMPLE;
+	return a->ops && a->ops->id == TCA_ID_SAMPLE;
 #else
 	return false;
 #endif
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 911bbac838a2..85c5c4756d92 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -44,7 +44,7 @@ static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
 #ifdef CONFIG_NET_CLS_ACT
 	u32 flags;
 
-	if (a->ops && a->ops->type == TCA_ACT_SKBEDIT) {
+	if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
 		rcu_read_lock();
 		flags = rcu_dereference(to_skbedit(a)->params)->flags;
 		rcu_read_unlock();
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index 46b8c7f1c8d5..23d5b8b19f3e 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -34,7 +34,7 @@ static inline bool is_tcf_tunnel_set(const struct tc_action *a)
 	struct tcf_tunnel_key *t = to_tunnel_key(a);
 	struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
 
-	if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+	if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
 		return params->tcft_action == TCA_TUNNEL_KEY_ACT_SET;
 #endif
 	return false;
@@ -46,7 +46,7 @@ static inline bool is_tcf_tunnel_release(const struct tc_action *a)
 	struct tcf_tunnel_key *t = to_tunnel_key(a);
 	struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
 
-	if (a->ops && a->ops->type == TCA_ACT_TUNNEL_KEY)
+	if (a->ops && a->ops->id == TCA_ID_TUNNEL_KEY)
 		return params->tcft_action == TCA_TUNNEL_KEY_ACT_RELEASE;
 #endif
 	return false;
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index 22ae260d6869..fe39ed502bef 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -30,7 +30,7 @@ struct tcf_vlan {
 static inline bool is_tcf_vlan(const struct tc_action *a)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (a->ops && a->ops->type == TCA_ACT_VLAN)
+	if (a->ops && a->ops->id == TCA_ID_VLAN)
 		return true;
 #endif
 	return false;
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7ab55f97e7c4..51a0496f78ea 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -85,7 +85,7 @@ enum {
 #define TCA_ACT_SAMPLE 26
 
 /* Action type identifiers*/
-enum {
+enum tca_id {
 	TCA_ID_UNSPEC = 0,
 	TCA_ID_POLICE = 1,
 	TCA_ID_GACT = TCA_ACT_GACT,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d4b8355737d8..aecf1bf233c8 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -543,7 +543,7 @@ int tcf_register_action(struct tc_action_ops *act,
 
 	write_lock(&act_mod_lock);
 	list_for_each_entry(a, &act_base, head) {
-		if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+		if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
 			write_unlock(&act_mod_lock);
 			unregister_pernet_subsys(ops);
 			return -EEXIST;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7633843e223..aa5c38d11a30 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -396,7 +396,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_bpf_ops __read_mostly = {
 	.kind		=	"bpf",
-	.type		=	TCA_ACT_BPF,
+	.id		=	TCA_ID_BPF,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_bpf_act,
 	.dump		=	tcf_bpf_dump,
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8475913f2070..5d24993cccfe 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -204,7 +204,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_connmark_ops = {
 	.kind		=	"connmark",
-	.type		=	TCA_ACT_CONNMARK,
+	.id		=	TCA_ID_CONNMARK,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_connmark_act,
 	.dump		=	tcf_connmark_dump,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3dc25b7806d7..945fb34ae721 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -660,7 +660,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
 
 static struct tc_action_ops act_csum_ops = {
 	.kind		= "csum",
-	.type		= TCA_ACT_CSUM,
+	.id		= TCA_ID_CSUM,
 	.owner		= THIS_MODULE,
 	.act		= tcf_csum_act,
 	.dump		= tcf_csum_dump,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b61c20ebb314..93da0004e9f4 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -253,7 +253,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
 
 static struct tc_action_ops act_gact_ops = {
 	.kind		=	"gact",
-	.type		=	TCA_ACT_GACT,
+	.id		=	TCA_ID_GACT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_gact_act,
 	.stats_update	=	tcf_gact_stats_update,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 30b63fa23ee2..9b1f2b3990ee 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -864,7 +864,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_ife_ops = {
 	.kind = "ife",
-	.type = TCA_ACT_IFE,
+	.id = TCA_ID_IFE,
 	.owner = THIS_MODULE,
 	.act = tcf_ife_act,
 	.dump = tcf_ife_dump,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8af6c11d2482..1bad190710ad 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -338,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_ipt_ops = {
 	.kind		=	"ipt",
-	.type		=	TCA_ACT_IPT,
+	.id		=	TCA_ID_IPT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_ipt_act,
 	.dump		=	tcf_ipt_dump,
@@ -387,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_xt_ops = {
 	.kind		=	"xt",
-	.type		=	TCA_ACT_XT,
+	.id		=	TCA_ID_XT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_ipt_act,
 	.dump		=	tcf_ipt_dump,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c8cf4d10c435..6692fd054617 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -400,7 +400,7 @@ static void tcf_mirred_put_dev(struct net_device *dev)
 
 static struct tc_action_ops act_mirred_ops = {
 	.kind		=	"mirred",
-	.type		=	TCA_ACT_MIRRED,
+	.id		=	TCA_ID_MIRRED,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_mirred_act,
 	.stats_update	=	tcf_stats_update,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c5c1e23add77..543eab9193f1 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -304,7 +304,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_nat_ops = {
 	.kind		=	"nat",
-	.type		=	TCA_ACT_NAT,
+	.id		=	TCA_ID_NAT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_nat_act,
 	.dump		=	tcf_nat_dump,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 3663d3b615a4..a80373878df7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -470,7 +470,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_pedit_ops = {
 	.kind		=	"pedit",
-	.type		=	TCA_ACT_PEDIT,
+	.id		=	TCA_ID_PEDIT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_pedit_act,
 	.dump		=	tcf_pedit_dump,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ec8ec55e0fe8..8271a6263824 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -366,7 +366,7 @@ MODULE_LICENSE("GPL");
 
 static struct tc_action_ops act_police_ops = {
 	.kind		=	"police",
-	.type		=	TCA_ID_POLICE,
+	.id		=	TCA_ID_POLICE,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_police_act,
 	.dump		=	tcf_police_dump,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1a0c682fd734..203e399e5c85 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -233,7 +233,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
-	.type	  = TCA_ACT_SAMPLE,
+	.id	  = TCA_ID_SAMPLE,
 	.owner	  = THIS_MODULE,
 	.act	  = tcf_sample_act,
 	.dump	  = tcf_sample_dump,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index b2b16d440154..d54cb608dbaf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -195,7 +195,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_simp_ops = {
 	.kind		=	"simple",
-	.type		=	TCA_ACT_SIMP,
+	.id		=	TCA_ID_SIMP,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_simp_act,
 	.dump		=	tcf_simp_dump,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 64dba3708fce..39f8a67ea940 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -305,7 +305,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_skbedit_ops = {
 	.kind		=	"skbedit",
-	.type		=	TCA_ACT_SKBEDIT,
+	.id		=	TCA_ID_SKBEDIT,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_skbedit_act,
 	.dump		=	tcf_skbedit_dump,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 59710a183bd3..7bac1d78e7a3 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -260,7 +260,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_skbmod_ops = {
 	.kind		=	"skbmod",
-	.type		=	TCA_ACT_SKBMOD,
+	.id		=	TCA_ACT_SKBMOD,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_skbmod_act,
 	.dump		=	tcf_skbmod_dump,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 8b43fe0130f7..9104b8e36482 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -563,7 +563,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_tunnel_key_ops = {
 	.kind		=	"tunnel_key",
-	.type		=	TCA_ACT_TUNNEL_KEY,
+	.id		=	TCA_ID_TUNNEL_KEY,
 	.owner		=	THIS_MODULE,
 	.act		=	tunnel_key_act,
 	.dump		=	tunnel_key_dump,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 93fdaf707313..ac0061599225 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -297,7 +297,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
 
 static struct tc_action_ops act_vlan_ops = {
 	.kind		=	"vlan",
-	.type		=	TCA_ACT_VLAN,
+	.id		=	TCA_ID_VLAN,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_vlan_act,
 	.dump		=	tcf_vlan_dump,
-- 
cgit v1.2.3


From 1136b0728969901a091f0471968b2b76ed14d9ad Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 8 Feb 2019 14:48:03 +0100
Subject: genirq: Avoid summation loops for /proc/stat

Waiman reported that on large systems with a large amount of interrupts the
readout of /proc/stat takes a long time to sum up the interrupt
statistics. In principle this is not a problem. but for unknown reasons
some enterprise quality software reads /proc/stat with a high frequency.

The reason for this is that interrupt statistics are accounted per cpu. So
the /proc/stat logic has to sum up the interrupt stats for each interrupt.

This can be largely avoided for interrupts which are not marked as
'PER_CPU' interrupts by simply adding a per interrupt summation counter
which is incremented along with the per interrupt per cpu counter.

The PER_CPU interrupts need to avoid that and use only per cpu accounting
because they share the interrupt number and the interrupt descriptor and
concurrent updates would conflict or require unwanted synchronization.

Reported-by: Waiman Long <longman@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Daniel Colascione <dancol@google.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Link: https://lkml.kernel.org/r/20190208135020.925487496@linutronix.de


8<-------------

v2: Undo the unintentional layout change of struct irq_desc.

 include/linux/irqdesc.h |    1 +
 kernel/irq/chip.c       |   12 ++++++++++--
 kernel/irq/internals.h  |    8 +++++++-
 kernel/irq/irqdesc.c    |    7 ++++++-
 4 files changed, 24 insertions(+), 4 deletions(-)
---
 include/linux/irqdesc.h |  1 +
 kernel/irq/chip.c       | 12 ++++++++++--
 kernel/irq/internals.h  |  8 +++++++-
 kernel/irq/irqdesc.c    |  7 ++++++-
 4 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index dd1e40ddac7d..875c41b23f20 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -65,6 +65,7 @@ struct irq_desc {
 	unsigned int		core_internal_state__do_not_mess_with_it;
 	unsigned int		depth;		/* nested irq disables */
 	unsigned int		wake_depth;	/* nested wake enables */
+	unsigned int		tot_count;
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 34e969069488..e960c4f46ee0 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -855,7 +855,11 @@ void handle_percpu_irq(struct irq_desc *desc)
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 
-	kstat_incr_irqs_this_cpu(desc);
+	/*
+	 * PER CPU interrupts are not serialized. Do not touch
+	 * desc->tot_count.
+	 */
+	__kstat_incr_irqs_this_cpu(desc);
 
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
@@ -884,7 +888,11 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
 	unsigned int irq = irq_desc_get_irq(desc);
 	irqreturn_t res;
 
-	kstat_incr_irqs_this_cpu(desc);
+	/*
+	 * PER CPU interrupts are not serialized. Do not touch
+	 * desc->tot_count.
+	 */
+	__kstat_incr_irqs_this_cpu(desc);
 
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ca6afa267070..e74e7eea76cf 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -242,12 +242,18 @@ static inline void irq_state_set_masked(struct irq_desc *desc)
 
 #undef __irqd_to_state
 
-static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
 {
 	__this_cpu_inc(*desc->kstat_irqs);
 	__this_cpu_inc(kstat.irqs_sum);
 }
 
+static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+{
+	__kstat_incr_irqs_this_cpu(desc);
+	desc->tot_count++;
+}
+
 static inline int irq_desc_get_node(struct irq_desc *desc)
 {
 	return irq_common_data_get_node(&desc->irq_common_data);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index ee062b7939d3..f98293d0e173 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -119,6 +119,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 	desc->depth = 1;
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
+	desc->tot_count = 0;
 	desc->name = NULL;
 	desc->owner = owner;
 	for_each_possible_cpu(cpu)
@@ -919,11 +920,15 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-	int cpu;
 	unsigned int sum = 0;
+	int cpu;
 
 	if (!desc || !desc->kstat_irqs)
 		return 0;
+	if (!irq_settings_is_per_cpu_devid(desc) &&
+	    !irq_settings_is_per_cpu(desc))
+	    return desc->tot_count;
+
 	for_each_possible_cpu(cpu)
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;
-- 
cgit v1.2.3


From 0121805d9d2b1fff371e195c28e9b86ae38b5e47 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Mon, 28 Jan 2019 15:46:24 -0800
Subject: kthread: Add __kthread_should_park()

kthread_should_park() is used to check if the calling kthread ('current')
should park, but there is no function to check whether an arbitrary kthread
should be parked. The latter is required to plug a CPU hotplug race vs. a
parking ksoftirqd thread.

The new __kthread_should_park() receives a task_struct as parameter to
check if the corresponding kernel thread should be parked.

Call __kthread_should_park() from kthread_should_park() to avoid code
duplication.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Douglas Anderson <dianders@chromium.org>
Cc: Stephen Boyd <swboyd@chromium.org>
Link: https://lkml.kernel.org/r/20190128234625.78241-2-mka@chromium.org
---
 include/linux/kthread.h | 1 +
 kernel/kthread.c        | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..1577a2d56e9d 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -56,6 +56,7 @@ void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
 int kthread_stop(struct task_struct *k);
 bool kthread_should_stop(void);
 bool kthread_should_park(void);
+bool __kthread_should_park(struct task_struct *k);
 bool kthread_freezable_should_stop(bool *was_frozen);
 void *kthread_data(struct task_struct *k);
 void *kthread_probe_data(struct task_struct *k);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 087d18d771b5..65234c89d85b 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -101,6 +101,12 @@ bool kthread_should_stop(void)
 }
 EXPORT_SYMBOL(kthread_should_stop);
 
+bool __kthread_should_park(struct task_struct *k)
+{
+	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
+}
+EXPORT_SYMBOL_GPL(__kthread_should_park);
+
 /**
  * kthread_should_park - should this kthread park now?
  *
@@ -114,7 +120,7 @@ EXPORT_SYMBOL(kthread_should_stop);
  */
 bool kthread_should_park(void)
 {
-	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
+	return __kthread_should_park(current);
 }
 EXPORT_SYMBOL_GPL(kthread_should_park);
 
-- 
cgit v1.2.3


From b8554d4f7288f86fb278e0bc7b5b19579bf16b69 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 10 Feb 2019 19:57:56 +0100
Subject: net: phy: add register modifying helpers returning 1 on change

When modifying registers there are scenarios where we need to know
whether the register content actually changed. This patch adds
new helpers to not break users of the current ones, phy_modify() etc.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c | 127 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/phy.h        |  12 ++++-
 2 files changed, 128 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 7d6aad287f84..cdea028d1328 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -531,7 +531,7 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
 EXPORT_SYMBOL(phy_write_mmd);
 
 /**
- * __phy_modify() - Convenience function for modifying a PHY register
+ * __phy_modify_changed() - Convenience function for modifying a PHY register
  * @phydev: a pointer to a &struct phy_device
  * @regnum: register number
  * @mask: bit mask of bits to clear
@@ -539,16 +539,69 @@ EXPORT_SYMBOL(phy_write_mmd);
  *
  * Unlocked helper function which allows a PHY register to be modified as
  * new register value = (old register value & ~mask) | set
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
  */
-int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+			 u16 set)
 {
-	int ret;
+	int new, ret;
 
 	ret = __phy_read(phydev, regnum);
 	if (ret < 0)
 		return ret;
 
-	ret = __phy_write(phydev, regnum, (ret & ~mask) | set);
+	new = (ret & ~mask) | set;
+	if (new == ret)
+		return 0;
+
+	ret = __phy_write(phydev, regnum, new);
+
+	return ret < 0 ? ret : 1;
+}
+EXPORT_SYMBOL_GPL(__phy_modify_changed);
+
+/**
+ * phy_modify_changed - Function for modifying a PHY register
+ * @phydev: the phy_device struct
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
+ */
+int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+{
+	int ret;
+
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	ret = __phy_modify_changed(phydev, regnum, mask, set);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_modify_changed);
+
+/**
+ * __phy_modify - Convenience function for modifying a PHY register
+ * @phydev: the phy_device struct
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
+{
+	int ret;
+
+	ret = __phy_modify_changed(phydev, regnum, mask, set);
 
 	return ret < 0 ? ret : 0;
 }
@@ -578,7 +631,7 @@ int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set)
 EXPORT_SYMBOL_GPL(phy_modify);
 
 /**
- * __phy_modify_mmd - Convenience function for modifying a register on MMD
+ * __phy_modify_mmd_changed - Function for modifying a register on MMD
  * @phydev: the phy_device struct
  * @devad: the MMD containing register to modify
  * @regnum: register number to modify
@@ -587,17 +640,73 @@ EXPORT_SYMBOL_GPL(phy_modify);
  *
  * Unlocked helper function which allows a MMD register to be modified as
  * new register value = (old register value & ~mask) | set
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
  */
-int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-		     u16 mask, u16 set)
+int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+			     u16 mask, u16 set)
 {
-	int ret;
+	int new, ret;
 
 	ret = __phy_read_mmd(phydev, devad, regnum);
 	if (ret < 0)
 		return ret;
 
-	ret = __phy_write_mmd(phydev, devad, regnum, (ret & ~mask) | set);
+	new = (ret & ~mask) | set;
+	if (new == ret)
+		return 0;
+
+	ret = __phy_write_mmd(phydev, devad, regnum, new);
+
+	return ret < 0 ? ret : 1;
+}
+EXPORT_SYMBOL_GPL(__phy_modify_mmd_changed);
+
+/**
+ * phy_modify_mmd_changed - Function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ *
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
+ */
+int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+			   u16 mask, u16 set)
+{
+	int ret;
+
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_modify_mmd_changed);
+
+/**
+ * __phy_modify_mmd - Convenience function for modifying a register on MMD
+ * @phydev: the phy_device struct
+ * @devad: the MMD containing register to modify
+ * @regnum: register number to modify
+ * @mask: bit mask of bits to clear
+ * @set: new value of bits set in mask to write to @regnum
+ *
+ * NOTE: MUST NOT be called from interrupt context,
+ * because the bus read/write functions may wait for an interrupt
+ * to conclude the operation.
+ */
+int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
+		     u16 mask, u16 set)
+{
+	int ret;
+
+	ret = __phy_modify_mmd_changed(phydev, devad, regnum, mask, set);
 
 	return ret < 0 ? ret : 0;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d2ffae992e4a..378da9a6165e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -799,13 +799,21 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
  */
 int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val);
 
+int __phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+			 u16 set);
+int phy_modify_changed(struct phy_device *phydev, u32 regnum, u16 mask,
+		       u16 set);
 int __phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 int phy_modify(struct phy_device *phydev, u32 regnum, u16 mask, u16 set);
 
+int __phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+			     u16 mask, u16 set);
+int phy_modify_mmd_changed(struct phy_device *phydev, int devad, u32 regnum,
+			   u16 mask, u16 set);
 int __phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-		u16 mask, u16 set);
+		     u16 mask, u16 set);
 int phy_modify_mmd(struct phy_device *phydev, int devad, u32 regnum,
-		u16 mask, u16 set);
+		   u16 mask, u16 set);
 
 /**
  * __phy_set_bits - Convenience function for setting bits in a PHY register
-- 
cgit v1.2.3


From d90bf296ae18f26a18e572965fc0047fa1bd37a8 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Wed, 30 Jan 2019 13:30:22 +0000
Subject: firmware: imx: Add support to start/stop a CPU

This is done via RPC call to SCU.

Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/imx/misc.c           | 38 +++++++++++++++++++++++++++++++++++
 include/linux/firmware/imx/svc/misc.h |  3 +++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c
index 97f5424dbac9..4b56a587dacd 100644
--- a/drivers/firmware/imx/misc.c
+++ b/drivers/firmware/imx/misc.c
@@ -18,6 +18,14 @@ struct imx_sc_msg_req_misc_set_ctrl {
 	u16 resource;
 } __packed;
 
+struct imx_sc_msg_req_cpu_start {
+	struct imx_sc_rpc_msg hdr;
+	u32 address_hi;
+	u32 address_lo;
+	u16 resource;
+	u8 enable;
+} __packed;
+
 struct imx_sc_msg_req_misc_get_ctrl {
 	struct imx_sc_rpc_msg hdr;
 	u32 ctrl;
@@ -97,3 +105,33 @@ int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
 	return 0;
 }
 EXPORT_SYMBOL(imx_sc_misc_get_control);
+
+/*
+ * This function starts/stops a CPU identified by @resource
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in]     resource    resource the control is associated with
+ * @param[in]     enable      true for start, false for stop
+ * @param[in]     phys_addr   initial instruction address to be executed
+ *
+ * @return Returns 0 for success and < 0 for errors.
+ */
+int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource,
+			bool enable, u64 phys_addr)
+{
+	struct imx_sc_msg_req_cpu_start msg;
+	struct imx_sc_rpc_msg *hdr = &msg.hdr;
+
+	hdr->ver = IMX_SC_RPC_VERSION;
+	hdr->svc = IMX_SC_RPC_SVC_PM;
+	hdr->func = IMX_SC_PM_FUNC_CPU_START;
+	hdr->size = 4;
+
+	msg.address_hi = phys_addr >> 32;
+	msg.address_lo = phys_addr;
+	msg.resource = resource;
+	msg.enable = enable;
+
+	return imx_scu_call_rpc(ipc, &msg, true);
+}
+EXPORT_SYMBOL(imx_sc_pm_cpu_start);
diff --git a/include/linux/firmware/imx/svc/misc.h b/include/linux/firmware/imx/svc/misc.h
index e21c49aba92f..031dd4d3c766 100644
--- a/include/linux/firmware/imx/svc/misc.h
+++ b/include/linux/firmware/imx/svc/misc.h
@@ -52,4 +52,7 @@ int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource,
 int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
 			    u8 ctrl, u32 *val);
 
+int imx_sc_pm_cpu_start(struct imx_sc_ipc *ipc, u32 resource,
+			bool enable, u64 phys_addr);
+
 #endif /* _SC_MISC_API_H */
-- 
cgit v1.2.3


From 46f8bc92758c6259bcf945e9216098661c1587cd Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:20 -0800
Subject: bpf: Add a bpf_sock pointer to __sk_buff and a bpf_sk_fullsock helper

In kernel, it is common to check "skb->sk && sk_fullsock(skb->sk)"
before accessing the fields in sock.  For example, in __netdev_pick_tx:

static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
			    struct net_device *sb_dev)
{
	/* ... */

	struct sock *sk = skb->sk;

		if (queue_index != new_index && sk &&
		    sk_fullsock(sk) &&
		    rcu_access_pointer(sk->sk_dst_cache))
			sk_tx_queue_set(sk, new_index);

	/* ... */

	return queue_index;
}

This patch adds a "struct bpf_sock *sk" pointer to the "struct __sk_buff"
where a few of the convert_ctx_access() in filter.c has already been
accessing the skb->sk sock_common's fields,
e.g. sock_ops_convert_ctx_access().

"__sk_buff->sk" is a PTR_TO_SOCK_COMMON_OR_NULL in the verifier.
Some of the fileds in "bpf_sock" will not be directly
accessible through the "__sk_buff->sk" pointer.  It is limited
by the new "bpf_sock_common_is_valid_access()".
e.g. The existing "type", "protocol", "mark" and "priority" in bpf_sock
     are not allowed.

The newly added "struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)"
can be used to get a sk with all accessible fields in "bpf_sock".
This helper is added to both cg_skb and sched_(cls|act).

int cg_skb_foo(struct __sk_buff *skb) {
	struct bpf_sock *sk;

	sk = skb->sk;
	if (!sk)
		return 1;

	sk = bpf_sk_fullsock(sk);
	if (!sk)
		return 1;

	if (sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP)
		return 1;

	/* some_traffic_shaping(); */

	return 1;
}

(1) The sk is read only

(2) There is no new "struct bpf_sock_common" introduced.

(3) Future kernel sock's members could be added to bpf_sock only
    instead of repeatedly adding at multiple places like currently
    in bpf_sock_ops_md, bpf_sock_addr_md, sk_reuseport_md...etc.

(4) After "sk = skb->sk", the reg holding sk is in type
    PTR_TO_SOCK_COMMON_OR_NULL.

(5) After bpf_sk_fullsock(), the return type will be in type
    PTR_TO_SOCKET_OR_NULL which is the same as the return type of
    bpf_sk_lookup_xxx().

    However, bpf_sk_fullsock() does not take refcnt.  The
    acquire_reference_state() is only depending on the return type now.
    To avoid it, a new is_acquire_function() is checked before calling
    acquire_reference_state().

(6) The WARN_ON in "release_reference_state()" is no longer an
    internal verifier bug.

    When reg->id is not found in state->refs[], it means the
    bpf_prog does something wrong like
    "bpf_sk_release(bpf_sk_fullsock(skb->sk))" where reference has
    never been acquired by calling "bpf_sk_fullsock(skb->sk)".

    A -EINVAL and a verbose are done instead of WARN_ON.  A test is
    added to the test_verifier in a later patch.

    Since the WARN_ON in "release_reference_state()" is no longer
    needed, "__release_reference_state()" is folded into
    "release_reference_state()" also.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h      |  12 +++++
 include/uapi/linux/bpf.h |  12 ++++-
 kernel/bpf/verifier.c    | 132 +++++++++++++++++++++++++++++++++--------------
 net/core/filter.c        |  42 +++++++++++++++
 4 files changed, 157 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bd169a7bcc93..a60463b45b54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,7 @@ enum bpf_arg_type {
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
+	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 };
 
 /* type of values returned from helper functions */
@@ -256,6 +257,8 @@ enum bpf_reg_type {
 	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
 	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
+	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
+	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -920,6 +923,9 @@ void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 #if defined(CONFIG_NET)
+bool bpf_sock_common_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info);
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info);
 u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@@ -928,6 +934,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				struct bpf_prog *prog,
 				u32 *target_size);
 #else
+static inline bool bpf_sock_common_is_valid_access(int off, int size,
+						   enum bpf_access_type type,
+						   struct bpf_insn_access_aux *info)
+{
+	return false;
+}
 static inline bool bpf_sock_is_valid_access(int off, int size,
 					    enum bpf_access_type type,
 					    struct bpf_insn_access_aux *info)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1777fa0c61e4..5d79cba74ddc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2329,6 +2329,14 @@ union bpf_attr {
  *		"**y**".
  *	Return
  *		0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ *	Description
+ *		This helper gets a **struct bpf_sock** pointer such
+ *		that all the fields in bpf_sock can be accessed.
+ *	Return
+ *		A **struct bpf_sock** pointer on success, or NULL in
+ *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2425,7 +2433,8 @@ union bpf_attr {
 	FN(msg_pop_data),		\
 	FN(rc_pointer_rel),		\
 	FN(spin_lock),			\
-	FN(spin_unlock),
+	FN(spin_unlock),		\
+	FN(sk_fullsock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2545,6 +2554,7 @@ struct __sk_buff {
 	__u64 tstamp;
 	__u32 wire_len;
 	__u32 gso_segs;
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 struct bpf_tunnel_key {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 516dfc6d78de..b755d55a3791 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -331,10 +331,17 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
 	       type == PTR_TO_PACKET_META;
 }
 
+static bool type_is_sk_pointer(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET ||
+		type == PTR_TO_SOCK_COMMON;
+}
+
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
-	       type == PTR_TO_SOCKET_OR_NULL;
+	       type == PTR_TO_SOCKET_OR_NULL ||
+	       type == PTR_TO_SOCK_COMMON_OR_NULL;
 }
 
 static bool type_is_refcounted(enum bpf_reg_type type)
@@ -377,6 +384,12 @@ static bool is_release_function(enum bpf_func_id func_id)
 	return func_id == BPF_FUNC_sk_release;
 }
 
+static bool is_acquire_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_sk_lookup_tcp ||
+		func_id == BPF_FUNC_sk_lookup_udp;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
@@ -392,6 +405,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
 	[PTR_TO_SOCKET]		= "sock",
 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
+	[PTR_TO_SOCK_COMMON]	= "sock_common",
+	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 };
 
 static char slot_type_char[] = {
@@ -618,13 +633,10 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 }
 
 /* release function corresponding to acquire_reference_state(). Idempotent. */
-static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 {
 	int i, last_idx;
 
-	if (!ptr_id)
-		return -EFAULT;
-
 	last_idx = state->acquired_refs - 1;
 	for (i = 0; i < state->acquired_refs; i++) {
 		if (state->refs[i].id == ptr_id) {
@@ -636,21 +648,7 @@ static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
 			return 0;
 		}
 	}
-	return -EFAULT;
-}
-
-/* variation on the above for cases where we expect that there must be an
- * outstanding reference for the specified ptr_id.
- */
-static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
-{
-	struct bpf_func_state *state = cur_func(env);
-	int err;
-
-	err = __release_reference_state(state, ptr_id);
-	if (WARN_ON_ONCE(err != 0))
-		verbose(env, "verifier internal error: can't release reference\n");
-	return err;
+	return -EINVAL;
 }
 
 static int transfer_reference_state(struct bpf_func_state *dst,
@@ -1209,6 +1207,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -1647,6 +1647,7 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 	struct bpf_reg_state *regs = cur_regs(env);
 	struct bpf_reg_state *reg = &regs[regno];
 	struct bpf_insn_access_aux info = {};
+	bool valid;
 
 	if (reg->smin_value < 0) {
 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
@@ -1654,15 +1655,28 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 		return -EACCES;
 	}
 
-	if (!bpf_sock_is_valid_access(off, size, t, &info)) {
-		verbose(env, "invalid bpf_sock access off=%d size=%d\n",
-			off, size);
-		return -EACCES;
+	switch (reg->type) {
+	case PTR_TO_SOCK_COMMON:
+		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
+		break;
+	case PTR_TO_SOCKET:
+		valid = bpf_sock_is_valid_access(off, size, t, &info);
+		break;
+	default:
+		valid = false;
 	}
 
-	env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 
-	return 0;
+	if (valid) {
+		env->insn_aux_data[insn_idx].ctx_field_size =
+			info.ctx_field_size;
+		return 0;
+	}
+
+	verbose(env, "R%d invalid %s access off=%d size=%d\n",
+		regno, reg_type_str[reg->type], off, size);
+
+	return -EACCES;
 }
 
 static bool __is_pointer_value(bool allow_ptr_leaks,
@@ -1688,8 +1702,14 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 {
 	const struct bpf_reg_state *reg = reg_state(env, regno);
 
-	return reg->type == PTR_TO_CTX ||
-	       reg->type == PTR_TO_SOCKET;
+	return reg->type == PTR_TO_CTX;
+}
+
+static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
+{
+	const struct bpf_reg_state *reg = reg_state(env, regno);
+
+	return type_is_sk_pointer(reg->type);
 }
 
 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -1800,6 +1820,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_SOCKET:
 		pointer_desc = "sock ";
 		break;
+	case PTR_TO_SOCK_COMMON:
+		pointer_desc = "sock_common ";
+		break;
 	default:
 		break;
 	}
@@ -2003,11 +2026,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			 * PTR_TO_PACKET[_META,_END]. In the latter
 			 * case, we know the offset is zero.
 			 */
-			if (reg_type == SCALAR_VALUE)
+			if (reg_type == SCALAR_VALUE) {
 				mark_reg_unknown(env, regs, value_regno);
-			else
+			} else {
 				mark_reg_known_zero(env, regs,
 						    value_regno);
+				if (reg_type_may_be_null(reg_type))
+					regs[value_regno].id = ++env->id_gen;
+			}
 			regs[value_regno].type = reg_type;
 		}
 
@@ -2053,9 +2079,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_flow_keys_access(env, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
-	} else if (reg->type == PTR_TO_SOCKET) {
+	} else if (type_is_sk_pointer(reg->type)) {
 		if (t == BPF_WRITE) {
-			verbose(env, "cannot write into socket\n");
+			verbose(env, "R%d cannot write into %s\n",
+				regno, reg_type_str[reg->type]);
 			return -EACCES;
 		}
 		err = check_sock_access(env, insn_idx, regno, off, size, t);
@@ -2102,7 +2129,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
 
 	if (is_ctx_reg(env, insn->dst_reg) ||
 	    is_pkt_reg(env, insn->dst_reg) ||
-	    is_flow_key_reg(env, insn->dst_reg)) {
+	    is_flow_key_reg(env, insn->dst_reg) ||
+	    is_sk_reg(env, insn->dst_reg)) {
 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
 			insn->dst_reg,
 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
@@ -2369,6 +2397,11 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		err = check_ctx_reg(env, reg, regno);
 		if (err < 0)
 			return err;
+	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
+		expected_type = PTR_TO_SOCK_COMMON;
+		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
+		if (!type_is_sk_pointer(type))
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_SOCKET) {
 		expected_type = PTR_TO_SOCKET;
 		if (type != expected_type)
@@ -2783,7 +2816,7 @@ static int release_reference(struct bpf_verifier_env *env,
 	for (i = 0; i <= vstate->curframe; i++)
 		release_reg_references(env, vstate->frame[i], meta->ptr_id);
 
-	return release_reference_state(env, meta->ptr_id);
+	return release_reference_state(cur_func(env), meta->ptr_id);
 }
 
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3049,8 +3082,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		}
 	} else if (is_release_function(func_id)) {
 		err = release_reference(env, &meta);
-		if (err)
+		if (err) {
+			verbose(env, "func %s#%d reference has not been acquired before\n",
+				func_id_name(func_id), func_id);
 			return err;
+		}
 	}
 
 	regs = cur_regs(env);
@@ -3099,12 +3135,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
-		int id = acquire_reference_state(env, insn_idx);
-		if (id < 0)
-			return id;
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-		regs[BPF_REG_0].id = id;
+		if (is_acquire_function(func_id)) {
+			int id = acquire_reference_state(env, insn_idx);
+
+			if (id < 0)
+				return id;
+			/* For release_reference() */
+			regs[BPF_REG_0].id = id;
+		} else {
+			/* For mark_ptr_or_null_reg() */
+			regs[BPF_REG_0].id = ++env->id_gen;
+		}
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -3364,6 +3407,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case PTR_TO_PACKET_END:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -4597,6 +4642,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			}
 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 			reg->type = PTR_TO_SOCKET;
+		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
+			reg->type = PTR_TO_SOCK_COMMON;
 		}
 		if (is_null || !(reg_is_refcounted(reg) ||
 				 reg_may_point_to_spin_lock(reg))) {
@@ -4621,7 +4668,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 	int i, j;
 
 	if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
-		__release_reference_state(state, id);
+		release_reference_state(state, id);
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
@@ -5790,6 +5837,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_FLOW_KEYS:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -6110,6 +6159,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_CTX:
 	case PTR_TO_SOCKET:
 	case PTR_TO_SOCKET_OR_NULL:
+	case PTR_TO_SOCK_COMMON:
+	case PTR_TO_SOCK_COMMON_OR_NULL:
 		return false;
 	default:
 		return true;
@@ -7112,6 +7163,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			convert_ctx_access = ops->convert_ctx_access;
 			break;
 		case PTR_TO_SOCKET:
+		case PTR_TO_SOCK_COMMON:
 			convert_ctx_access = bpf_sock_convert_ctx_access;
 			break;
 		default:
diff --git a/net/core/filter.c b/net/core/filter.c
index 3a49f68eda10..401d2e0aebf8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1793,6 +1793,20 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
+{
+	sk = sk_to_full_sk(sk);
+
+	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_sk_fullsock_proto = {
+	.func		= bpf_sk_fullsock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
 static inline int sk_skb_try_make_writable(struct sk_buff *skb,
 					   unsigned int write_len)
 {
@@ -5406,6 +5420,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	switch (func_id) {
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
 	default:
 		return sk_filter_func_proto(func_id, prog);
 	}
@@ -5477,6 +5493,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_socket_uid_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_skb_fib_lookup_proto;
+	case BPF_FUNC_sk_fullsock:
+		return &bpf_sk_fullsock_proto;
 #ifdef CONFIG_XFRM
 	case BPF_FUNC_skb_get_xfrm_state:
 		return &bpf_skb_get_xfrm_state_proto;
@@ -5764,6 +5782,11 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != sizeof(__u64))
 			return false;
 		break;
+	case offsetof(struct __sk_buff, sk):
+		if (type == BPF_WRITE || size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5950,6 +5973,18 @@ static bool __sock_filter_check_size(int off, int size,
 	return size == size_default;
 }
 
+bool bpf_sock_common_is_valid_access(int off, int size,
+				     enum bpf_access_type type,
+				     struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range_till(struct bpf_sock, type, priority):
+		return false;
+	default:
+		return bpf_sock_is_valid_access(off, size, type, info);
+	}
+}
+
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
@@ -6748,6 +6783,13 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 		off += offsetof(struct qdisc_skb_cb, pkt_len);
 		*target_size = 4;
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
+		break;
+
+	case offsetof(struct __sk_buff, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_buff, sk));
+		break;
 	}
 
 	return insn - insn_buf;
-- 
cgit v1.2.3


From aa65d6960a98fc15a96ce361b26e9fd55c9bccc5 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:21 -0800
Subject: bpf: Add state, dst_ip4, dst_ip6 and dst_port to bpf_sock

This patch adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the
bpf_sock.  The userspace has already been using "state",
e.g. inet_diag (ss -t) and getsockopt(TCP_INFO).

This patch also allows narrow load on the following existing fields:
"family", "type", "protocol" and "src_port".  Unlike IP address,
the load offset is resticted to the first byte for them but it
can be relaxed later if there is a use case.

This patch also folds __sock_filter_check_size() into
bpf_sock_is_valid_access() since it is not called
by any where else.  All bpf_sock checking is in
one place.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 17 ++++-----
 net/core/filter.c        | 99 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 85 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5d79cba74ddc..d8f91777c5b6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2606,15 +2606,14 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
-	__u32 src_ip4;		/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
-				 */
-	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
-				 */
-	__u32 src_port;		/* Allows 4-byte read.
-				 * Stored in host byte order
-				 */
+	/* IP address also allows 1 and 2 bytes access */
+	__u32 src_ip4;
+	__u32 src_ip6[4];
+	__u32 src_port;		/* host byte order */
+	__u32 dst_port;		/* network byte order */
+	__u32 dst_ip4;
+	__u32 dst_ip6[4];
+	__u32 state;
 };
 
 struct bpf_sock_tuple {
diff --git a/net/core/filter.c b/net/core/filter.c
index 401d2e0aebf8..01bb64bf2b5e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5958,21 +5958,6 @@ full_access:
 	return true;
 }
 
-static bool __sock_filter_check_size(int off, int size,
-				     struct bpf_insn_access_aux *info)
-{
-	const int size_default = sizeof(__u32);
-
-	switch (off) {
-	case bpf_ctx_range(struct bpf_sock, src_ip4):
-	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
-		bpf_ctx_record_field_size(info, size_default);
-		return bpf_ctx_narrow_access_ok(off, size, size_default);
-	}
-
-	return size == size_default;
-}
-
 bool bpf_sock_common_is_valid_access(int off, int size,
 				     enum bpf_access_type type,
 				     struct bpf_insn_access_aux *info)
@@ -5988,13 +5973,29 @@ bool bpf_sock_common_is_valid_access(int off, int size,
 bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 			      struct bpf_insn_access_aux *info)
 {
+	const int size_default = sizeof(__u32);
+
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
 	if (off % size != 0)
 		return false;
-	if (!__sock_filter_check_size(off, size, info))
-		return false;
-	return true;
+
+	switch (off) {
+	case offsetof(struct bpf_sock, state):
+	case offsetof(struct bpf_sock, family):
+	case offsetof(struct bpf_sock, type):
+	case offsetof(struct bpf_sock, protocol):
+	case offsetof(struct bpf_sock, dst_port):
+	case offsetof(struct bpf_sock, src_port):
+	case bpf_ctx_range(struct bpf_sock, src_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
+	case bpf_ctx_range(struct bpf_sock, dst_ip4):
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+		bpf_ctx_record_field_size(info, size_default);
+		return bpf_ctx_narrow_access_ok(off, size, size_default);
+	}
+
+	return size == size_default;
 }
 
 static bool sock_filter_is_valid_access(int off, int size,
@@ -6838,24 +6839,32 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct bpf_sock, family):
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
-
-		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, sk_family));
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_family),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_family,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_family),
+				       target_size));
 		break;
 
 	case offsetof(struct bpf_sock, type):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		*target_size = 2;
 		break;
 
 	case offsetof(struct bpf_sock, protocol):
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
 				      offsetof(struct sock, __sk_flags_offset));
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
 		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
+		*target_size = 1;
 		break;
 
 	case offsetof(struct bpf_sock, src_ip4):
@@ -6867,6 +6876,15 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 				       target_size));
 		break;
 
+	case offsetof(struct bpf_sock, dst_ip4):
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_daddr,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_daddr),
+				       target_size));
+		break;
+
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 #if IS_ENABLED(CONFIG_IPV6)
 		off = si->off;
@@ -6885,6 +6903,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 #endif
 		break;
 
+	case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
+#if IS_ENABLED(CONFIG_IPV6)
+		off = si->off;
+		off -= offsetof(struct bpf_sock, dst_ip6[0]);
+		*insn++ = BPF_LDX_MEM(
+			BPF_SIZE(si->code), si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common,
+				       skc_v6_daddr.s6_addr32[0],
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_v6_daddr.s6_addr32[0]),
+				       target_size) + off);
+#else
+		*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
+		*target_size = 4;
+#endif
+		break;
+
 	case offsetof(struct bpf_sock, src_port):
 		*insn++ = BPF_LDX_MEM(
 			BPF_FIELD_SIZEOF(struct sock_common, skc_num),
@@ -6894,6 +6929,26 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 						    skc_num),
 				       target_size));
 		break;
+
+	case offsetof(struct bpf_sock, dst_port):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_dport,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_dport),
+				       target_size));
+		break;
+
+	case offsetof(struct bpf_sock, state):
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock_common, skc_state),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock_common, skc_state,
+				       FIELD_SIZEOF(struct sock_common,
+						    skc_state),
+				       target_size));
+		break;
 	}
 
 	return insn - insn_buf;
-- 
cgit v1.2.3


From 655a51e536c09d15ffa3603b1b6fce2b45b85a1f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Sat, 9 Feb 2019 23:22:24 -0800
Subject: bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock

This patch adds a helper function BPF_FUNC_tcp_sock and it
is currently available for cg_skb and sched_(cls|act):

struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk);

int cg_skb_foo(struct __sk_buff *skb) {
	struct bpf_tcp_sock *tp;
	struct bpf_sock *sk;
	__u32 snd_cwnd;

	sk = skb->sk;
	if (!sk)
		return 1;

	tp = bpf_tcp_sock(sk);
	if (!tp)
		return 1;

	snd_cwnd = tp->snd_cwnd;
	/* ... */

	return 1;
}

A 'struct bpf_tcp_sock' is also added to the uapi bpf.h to provide
read-only access.  bpf_tcp_sock has all the existing tcp_sock's fields
that has already been exposed by the bpf_sock_ops.
i.e. no new tcp_sock's fields are exposed in bpf.h.

This helper returns a pointer to the tcp_sock.  If it is not a tcp_sock
or it cannot be traced back to a tcp_sock by sk_to_full_sk(), it
returns NULL.  Hence, the caller needs to check for NULL before
accessing it.

The current use case is to expose members from tcp_sock
to allow a cg_skb_bpf_prog to provide per cgroup traffic
policing/shaping.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h      | 30 ++++++++++++++++++
 include/uapi/linux/bpf.h | 51 ++++++++++++++++++++++++++++++-
 kernel/bpf/verifier.c    | 31 +++++++++++++++++--
 net/core/filter.c        | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 188 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a60463b45b54..7f58828755fd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -204,6 +204,7 @@ enum bpf_return_type {
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
 	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
+	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -259,6 +260,8 @@ enum bpf_reg_type {
 	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
 	PTR_TO_SOCK_COMMON,	 /* reg points to sock_common */
 	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
+	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
+	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -956,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 }
 #endif
 
+#ifdef CONFIG_INET
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info);
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog,
+				    u32 *target_size);
+#else
+static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
+						enum bpf_access_type type,
+						struct bpf_insn_access_aux *info)
+{
+	return false;
+}
+
+static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+						  const struct bpf_insn *si,
+						  struct bpf_insn *insn_buf,
+						  struct bpf_prog *prog,
+						  u32 *target_size)
+{
+	return 0;
+}
+#endif /* CONFIG_INET */
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d8f91777c5b6..25c8c0e62ecf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2337,6 +2337,15 @@ union bpf_attr {
  *	Return
  *		A **struct bpf_sock** pointer on success, or NULL in
  *		case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ *	Description
+ *		This helper gets a **struct bpf_tcp_sock** pointer from a
+ *		**struct bpf_sock** pointer.
+ *
+ *	Return
+ *		A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2434,7 +2443,8 @@ union bpf_attr {
 	FN(rc_pointer_rel),		\
 	FN(spin_lock),			\
 	FN(spin_unlock),		\
-	FN(sk_fullsock),
+	FN(sk_fullsock),		\
+	FN(tcp_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2616,6 +2626,45 @@ struct bpf_sock {
 	__u32 state;
 };
 
+struct bpf_tcp_sock {
+	__u32 snd_cwnd;		/* Sending congestion window		*/
+	__u32 srtt_us;		/* smoothed round trip time << 3 in usecs */
+	__u32 rtt_min;
+	__u32 snd_ssthresh;	/* Slow start size threshold		*/
+	__u32 rcv_nxt;		/* What we want to receive next		*/
+	__u32 snd_nxt;		/* Next sequence we send		*/
+	__u32 snd_una;		/* First byte we want an ack for	*/
+	__u32 mss_cache;	/* Cached effective mss, not including SACKS */
+	__u32 ecn_flags;	/* ECN status bits.			*/
+	__u32 rate_delivered;	/* saved rate sample: packets delivered */
+	__u32 rate_interval_us;	/* saved rate sample: time elapsed */
+	__u32 packets_out;	/* Packets which are "in flight"	*/
+	__u32 retrans_out;	/* Retransmitted packets out		*/
+	__u32 total_retrans;	/* Total retransmits for entire connection */
+	__u32 segs_in;		/* RFC4898 tcpEStatsPerfSegsIn
+				 * total number of segments in.
+				 */
+	__u32 data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
+				 * total number of data segments in.
+				 */
+	__u32 segs_out;		/* RFC4898 tcpEStatsPerfSegsOut
+				 * The total number of segments sent.
+				 */
+	__u32 data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
+				 * total number of data segments sent.
+				 */
+	__u32 lost_out;		/* Lost packets			*/
+	__u32 sacked_out;	/* SACK'd packets			*/
+	__u64 bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
+				 * sum(delta(rcv_nxt)), or how many bytes
+				 * were acked.
+				 */
+	__u64 bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
+				 * sum(delta(snd_una)), or how many bytes
+				 * were acked.
+				 */
+};
+
 struct bpf_sock_tuple {
 	union {
 		struct {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b755d55a3791..1b9496c41383 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -334,14 +334,16 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
 static bool type_is_sk_pointer(enum bpf_reg_type type)
 {
 	return type == PTR_TO_SOCKET ||
-		type == PTR_TO_SOCK_COMMON;
+		type == PTR_TO_SOCK_COMMON ||
+		type == PTR_TO_TCP_SOCK;
 }
 
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
 	       type == PTR_TO_SOCKET_OR_NULL ||
-	       type == PTR_TO_SOCK_COMMON_OR_NULL;
+	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
+	       type == PTR_TO_TCP_SOCK_OR_NULL;
 }
 
 static bool type_is_refcounted(enum bpf_reg_type type)
@@ -407,6 +409,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 	[PTR_TO_SOCK_COMMON]	= "sock_common",
 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
+	[PTR_TO_TCP_SOCK]	= "tcp_sock",
+	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 };
 
 static char slot_type_char[] = {
@@ -1209,6 +1213,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_SOCKET_OR_NULL:
 	case PTR_TO_SOCK_COMMON:
 	case PTR_TO_SOCK_COMMON_OR_NULL:
+	case PTR_TO_TCP_SOCK:
+	case PTR_TO_TCP_SOCK_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -1662,6 +1668,9 @@ static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 	case PTR_TO_SOCKET:
 		valid = bpf_sock_is_valid_access(off, size, t, &info);
 		break;
+	case PTR_TO_TCP_SOCK:
+		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
+		break;
 	default:
 		valid = false;
 	}
@@ -1823,6 +1832,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 	case PTR_TO_SOCK_COMMON:
 		pointer_desc = "sock_common ";
 		break;
+	case PTR_TO_TCP_SOCK:
+		pointer_desc = "tcp_sock ";
+		break;
 	default:
 		break;
 	}
@@ -3148,6 +3160,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			/* For mark_ptr_or_null_reg() */
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
+	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
+		regs[BPF_REG_0].id = ++env->id_gen;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -3409,6 +3425,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	case PTR_TO_SOCKET_OR_NULL:
 	case PTR_TO_SOCK_COMMON:
 	case PTR_TO_SOCK_COMMON_OR_NULL:
+	case PTR_TO_TCP_SOCK:
+	case PTR_TO_TCP_SOCK_OR_NULL:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -4644,6 +4662,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			reg->type = PTR_TO_SOCKET;
 		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
 			reg->type = PTR_TO_SOCK_COMMON;
+		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
+			reg->type = PTR_TO_TCP_SOCK;
 		}
 		if (is_null || !(reg_is_refcounted(reg) ||
 				 reg_may_point_to_spin_lock(reg))) {
@@ -5839,6 +5859,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_SOCKET_OR_NULL:
 	case PTR_TO_SOCK_COMMON:
 	case PTR_TO_SOCK_COMMON_OR_NULL:
+	case PTR_TO_TCP_SOCK:
+	case PTR_TO_TCP_SOCK_OR_NULL:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -6161,6 +6183,8 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_SOCKET_OR_NULL:
 	case PTR_TO_SOCK_COMMON:
 	case PTR_TO_SOCK_COMMON_OR_NULL:
+	case PTR_TO_TCP_SOCK:
+	case PTR_TO_TCP_SOCK_OR_NULL:
 		return false;
 	default:
 		return true;
@@ -7166,6 +7190,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		case PTR_TO_SOCK_COMMON:
 			convert_ctx_access = bpf_sock_convert_ctx_access;
 			break;
+		case PTR_TO_TCP_SOCK:
+			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
+			break;
 		default:
 			continue;
 		}
diff --git a/net/core/filter.c b/net/core/filter.c
index c0d7b9ef279f..353735575204 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5315,6 +5315,79 @@ static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+				  struct bpf_insn_access_aux *info)
+{
+	if (off < 0 || off >= offsetofend(struct bpf_tcp_sock, bytes_acked))
+		return false;
+
+	if (off % size != 0)
+		return false;
+
+	switch (off) {
+	case offsetof(struct bpf_tcp_sock, bytes_received):
+	case offsetof(struct bpf_tcp_sock, bytes_acked):
+		return size == sizeof(__u64);
+	default:
+		return size == sizeof(__u32);
+	}
+}
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+				    const struct bpf_insn *si,
+				    struct bpf_insn *insn_buf,
+				    struct bpf_prog *prog, u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+
+#define BPF_TCP_SOCK_GET_COMMON(FIELD)					\
+	do {								\
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) >	\
+			     FIELD_SIZEOF(struct bpf_tcp_sock, FIELD));	\
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
+				      si->dst_reg, si->src_reg,		\
+				      offsetof(struct tcp_sock, FIELD)); \
+	} while (0)
+
+	CONVERT_COMMON_TCP_SOCK_FIELDS(struct bpf_tcp_sock,
+				       BPF_TCP_SOCK_GET_COMMON);
+
+	if (insn > insn_buf)
+		return insn - insn_buf;
+
+	switch (si->off) {
+	case offsetof(struct bpf_tcp_sock, rtt_min):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+			     sizeof(struct minmax));
+		BUILD_BUG_ON(sizeof(struct minmax) <
+			     sizeof(struct minmax_sample));
+
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
+				      offsetof(struct tcp_sock, rtt_min) +
+				      offsetof(struct minmax_sample, v));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
+{
+	sk = sk_to_full_sk(sk);
+
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_tcp_sock_proto = {
+	.func		= bpf_tcp_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_TCP_SOCK_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5470,6 +5543,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_local_storage_proto;
 	case BPF_FUNC_sk_fullsock:
 		return &bpf_sk_fullsock_proto;
+#ifdef CONFIG_INET
+	case BPF_FUNC_tcp_sock:
+		return &bpf_tcp_sock_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
 	}
@@ -5560,6 +5637,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_lookup_udp_proto;
 	case BPF_FUNC_sk_release:
 		return &bpf_sk_release_proto;
+	case BPF_FUNC_tcp_sock:
+		return &bpf_tcp_sock_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
-- 
cgit v1.2.3


From 99687cdbb3f6c8e32bcc7f37496e811f30460e48 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Fri, 18 Jan 2019 15:49:36 +0100
Subject: sched/topology: Fix percpu data types in struct sd_data & struct
 s_data

The percpu members of struct sd_data and s_data are declared as:

	struct ... ** __percpu member;

So their type is:

	__percpu pointer to pointer to struct ...

But looking at how they're used, their type should be:

	pointer to __percpu pointer to struct ...

and they should thus be declared as:

	struct ... * __percpu *member;

So fix the placement of '__percpu' in the definition of these
structures.

This addresses a bunch of Sparse's warnings like:

	warning: incorrect type in initializer (different address spaces)
	  expected void const [noderef] <asn:3> *__vpp_verify
	  got struct sched_domain **

Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190118144936.79158-1-luc.vanoostenryck@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 8 ++++----
 kernel/sched/topology.c        | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index c31d3a47a47c..57c7ed3fe465 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -176,10 +176,10 @@ typedef int (*sched_domain_flags_f)(void);
 #define SDTL_OVERLAP	0x01
 
 struct sd_data {
-	struct sched_domain **__percpu sd;
-	struct sched_domain_shared **__percpu sds;
-	struct sched_group **__percpu sg;
-	struct sched_group_capacity **__percpu sgc;
+	struct sched_domain *__percpu *sd;
+	struct sched_domain_shared *__percpu *sds;
+	struct sched_group *__percpu *sg;
+	struct sched_group_capacity *__percpu *sgc;
 };
 
 struct sched_domain_topology_level {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 4ae9403420ed..93ff526e77b0 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -705,7 +705,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 }
 
 struct s_data {
-	struct sched_domain ** __percpu sd;
+	struct sched_domain * __percpu *sd;
 	struct root_domain	*rd;
 };
 
-- 
cgit v1.2.3


From 2b9c2a4859ad5ac7b5a28e9db28c3e618760fe8c Mon Sep 17 00:00:00 2001
From: Hugo Lefeuvre <hle@owl.eu.com>
Date: Thu, 7 Feb 2019 21:03:52 +0100
Subject: sched/wait: Use freezable_schedule() when possible

Replace 'schedule(); try_to_freeze();' with a call to freezable_schedule().

Tasks calling freezable_schedule() set the PF_FREEZER_SKIP flag
before calling schedule(). Unlike tasks calling schedule();
try_to_freeze() tasks calling freezable_schedule() are not awaken by
try_to_freeze_tasks(). Instead they call try_to_freeze() when they
wake up if the freeze is still underway.

It is not a problem since sleeping tasks can't do anything which isn't
allowed for a frozen task while sleeping.

The result is a potential performance gain during freeze, since less
tasks have to be awaken.

For instance on a bare Debian vm running a 4.19 stable kernel, the
number of tasks skipped in freeze_task() went up from 12 without the
patch to 32 with the patch (out of 448), an increase of > x2.5.

Signed-off-by: Hugo Lefeuvre <hle@owl.eu.com>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20190207200352.GA27859@behemoth.owl.eu.com.local
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index ed7c122cb31f..5f3efabc36f4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -308,7 +308,7 @@ do {										\
 
 #define __wait_event_freezable(wq_head, condition)				\
 	___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0,		\
-			    schedule(); try_to_freeze())
+			    freezable_schedule())
 
 /**
  * wait_event_freezable - sleep (or freeze) until a condition gets true
@@ -367,7 +367,7 @@ do {										\
 #define __wait_event_freezable_timeout(wq_head, condition, timeout)		\
 	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
 		      TASK_INTERRUPTIBLE, 0, timeout,				\
-		      __ret = schedule_timeout(__ret); try_to_freeze())
+		      __ret = freezable_schedule_timeout(__ret))
 
 /*
  * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
@@ -588,7 +588,7 @@ do {										\
 
 #define __wait_event_freezable_exclusive(wq, condition)				\
 	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,			\
-			schedule(); try_to_freeze())
+			freezable_schedule())
 
 #define wait_event_freezable_exclusive(wq, condition)				\
 ({										\
-- 
cgit v1.2.3


From f96935d3bc38a5f4b5188b6470a10e3fb8c3f0cc Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:49:01 +0000
Subject: firmware: arm_sdei: Add ACPI GHES registration helper

APEI's Generic Hardware Error Source structures do not describe
whether the SDEI event is shared or private, as this information is
discoverable via the API.

GHES needs to know whether an event is normal or critical to avoid
sharing locks or fixmap entries, but GHES shouldn't have to know about
the SDEI API.

Add a helper to register the GHES using the appropriate normal or
critical callback.

Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm64/include/asm/fixmap.h |  4 +++
 drivers/firmware/arm_sdei.c     | 68 +++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_sdei.h        |  6 ++++
 3 files changed, 78 insertions(+)

(limited to 'include')

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 966dd4bb23f2..f987b8a8f325 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -56,6 +56,10 @@ enum fixed_addresses {
 	/* Used for GHES mapping from assorted contexts */
 	FIX_APEI_GHES_IRQ,
 	FIX_APEI_GHES_SEA,
+#ifdef CONFIG_ARM_SDE_INTERFACE
+	FIX_APEI_GHES_SDEI_NORMAL,
+	FIX_APEI_GHES_SDEI_CRITICAL,
+#endif
 #endif /* CONFIG_ACPI_APEI_GHES */
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index c64c7da73829..e6376f985ef7 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -2,6 +2,7 @@
 // Copyright (C) 2017 Arm Ltd.
 #define pr_fmt(fmt) "sdei: " fmt
 
+#include <acpi/ghes.h>
 #include <linux/acpi.h>
 #include <linux/arm_sdei.h>
 #include <linux/arm-smccc.h>
@@ -887,6 +888,73 @@ static void sdei_smccc_hvc(unsigned long function_id,
 	arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
 }
 
+int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb,
+		       sdei_event_callback *critical_cb)
+{
+	int err;
+	u64 result;
+	u32 event_num;
+	sdei_event_callback *cb;
+
+	if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
+		return -EOPNOTSUPP;
+
+	event_num = ghes->generic->notify.vector;
+	if (event_num == 0) {
+		/*
+		 * Event 0 is reserved by the specification for
+		 * SDEI_EVENT_SIGNAL.
+		 */
+		return -EINVAL;
+	}
+
+	err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY,
+				      &result);
+	if (err)
+		return err;
+
+	if (result == SDEI_EVENT_PRIORITY_CRITICAL)
+		cb = critical_cb;
+	else
+		cb = normal_cb;
+
+	err = sdei_event_register(event_num, cb, ghes);
+	if (!err)
+		err = sdei_event_enable(event_num);
+
+	return err;
+}
+
+int sdei_unregister_ghes(struct ghes *ghes)
+{
+	int i;
+	int err;
+	u32 event_num = ghes->generic->notify.vector;
+
+	might_sleep();
+
+	if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES))
+		return -EOPNOTSUPP;
+
+	/*
+	 * The event may be running on another CPU. Disable it
+	 * to stop new events, then try to unregister a few times.
+	 */
+	err = sdei_event_disable(event_num);
+	if (err)
+		return err;
+
+	for (i = 0; i < 3; i++) {
+		err = sdei_event_unregister(event_num);
+		if (err != -EINPROGRESS)
+			break;
+
+		schedule();
+	}
+
+	return err;
+}
+
 static int sdei_get_conduit(struct platform_device *pdev)
 {
 	const char *method;
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 942afbd544b7..393899192906 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -11,6 +11,7 @@ enum sdei_conduit_types {
 	CONDUIT_HVC,
 };
 
+#include <acpi/ghes.h>
 #include <asm/sdei.h>
 
 /* Arch code should override this to set the entry point from firmware... */
@@ -39,6 +40,11 @@ int sdei_event_unregister(u32 event_num);
 int sdei_event_enable(u32 event_num);
 int sdei_event_disable(u32 event_num);
 
+/* GHES register/unregister helpers */
+int sdei_register_ghes(struct ghes *ghes, sdei_event_callback *normal_cb,
+		       sdei_event_callback *critical_cb);
+int sdei_unregister_ghes(struct ghes *ghes);
+
 #ifdef CONFIG_ARM_SDE_INTERFACE
 /* For use by arch code when CPU hotplug notifiers are not appropriate. */
 int sdei_mask_local_cpu(void);
-- 
cgit v1.2.3


From f9f05395f384ee858520b6c65d7e3e436af20c53 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Tue, 29 Jan 2019 18:49:02 +0000
Subject: ACPI / APEI: Add support for the SDEI GHES Notification type

If the GHES notification type is SDEI, register the provided event
using the SDEI-GHES helper.

SDEI may be one of two types of event, normal and critical. Critical
events can interrupt normal events, so these must have separate
fixmap slots and locks in case both event types are in use.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/apei/ghes.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/arm_sdei.h |  3 ++
 2 files changed, 88 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 99707d565dcc..0b5ae91fd0fb 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -25,6 +25,7 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/arm_sdei.h>
 #include <linux/kernel.h>
 #include <linux/moduleparam.h>
 #include <linux/init.h>
@@ -85,6 +86,15 @@
 	((struct acpi_hest_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
+/*
+ *  NMI-like notifications vary by architecture, before the compiler can prune
+ *  unused static functions it needs a value for these enums.
+ */
+#ifndef CONFIG_ARM_SDE_INTERFACE
+#define FIX_APEI_GHES_SDEI_NORMAL	__end_of_fixed_addresses
+#define FIX_APEI_GHES_SDEI_CRITICAL	__end_of_fixed_addresses
+#endif
+
 static inline bool is_hest_type_generic_v2(struct ghes *ghes)
 {
 	return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
@@ -1040,6 +1050,63 @@ static void ghes_nmi_init_cxt(void)
 	init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
 }
 
+static int __ghes_sdei_callback(struct ghes *ghes,
+				enum fixed_addresses fixmap_idx)
+{
+	if (!ghes_in_nmi_queue_one_entry(ghes, fixmap_idx)) {
+		irq_work_queue(&ghes_proc_irq_work);
+
+		return 0;
+	}
+
+	return -ENOENT;
+}
+
+static int ghes_sdei_normal_callback(u32 event_num, struct pt_regs *regs,
+				      void *arg)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_normal);
+	struct ghes *ghes = arg;
+	int err;
+
+	raw_spin_lock(&ghes_notify_lock_sdei_normal);
+	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_NORMAL);
+	raw_spin_unlock(&ghes_notify_lock_sdei_normal);
+
+	return err;
+}
+
+static int ghes_sdei_critical_callback(u32 event_num, struct pt_regs *regs,
+				       void *arg)
+{
+	static DEFINE_RAW_SPINLOCK(ghes_notify_lock_sdei_critical);
+	struct ghes *ghes = arg;
+	int err;
+
+	raw_spin_lock(&ghes_notify_lock_sdei_critical);
+	err = __ghes_sdei_callback(ghes, FIX_APEI_GHES_SDEI_CRITICAL);
+	raw_spin_unlock(&ghes_notify_lock_sdei_critical);
+
+	return err;
+}
+
+static int apei_sdei_register_ghes(struct ghes *ghes)
+{
+	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+		return -EOPNOTSUPP;
+
+	return sdei_register_ghes(ghes, ghes_sdei_normal_callback,
+				 ghes_sdei_critical_callback);
+}
+
+static int apei_sdei_unregister_ghes(struct ghes *ghes)
+{
+	if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+		return -EOPNOTSUPP;
+
+	return sdei_unregister_ghes(ghes);
+}
+
 static int ghes_probe(struct platform_device *ghes_dev)
 {
 	struct acpi_hest_generic *generic;
@@ -1075,6 +1142,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
 			goto err;
 		}
 		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) {
+			pr_warn(GHES_PFX "Generic hardware error source: %d notified via SDE Interface is not supported!\n",
+				generic->header.source_id);
+			goto err;
+		}
+		break;
 	case ACPI_HEST_NOTIFY_LOCAL:
 		pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
 			   generic->header.source_id);
@@ -1138,6 +1212,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_add(ghes);
 		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		rc = apei_sdei_register_ghes(ghes);
+		if (rc)
+			goto err;
+		break;
 	default:
 		BUG();
 	}
@@ -1163,6 +1242,7 @@ err:
 
 static int ghes_remove(struct platform_device *ghes_dev)
 {
+	int rc;
 	struct ghes *ghes;
 	struct acpi_hest_generic *generic;
 
@@ -1195,6 +1275,11 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_remove(ghes);
 		break;
+	case ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED:
+		rc = apei_sdei_unregister_ghes(ghes);
+		if (rc)
+			return rc;
+		break;
 	default:
 		BUG();
 		break;
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 393899192906..3305ea7f9dc7 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -12,7 +12,10 @@ enum sdei_conduit_types {
 };
 
 #include <acpi/ghes.h>
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
 #include <asm/sdei.h>
+#endif
 
 /* Arch code should override this to set the entry point from firmware... */
 #ifndef sdei_arch_get_entry_point
-- 
cgit v1.2.3


From b77cf11f094136a9d7d0ee6a56cf49db1f412871 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 5 Feb 2019 10:37:31 -0600
Subject: iommu: Allow io-pgtable to be used outside of drivers/iommu/

Move io-pgtable.h to include/linux/ and export alloc_io_pgtable_ops
and free_io_pgtable_ops. This enables drivers outside drivers/iommu/ to
use the page table library. Specifically, some ARM Mali GPUs use the
ARM page table formats.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Rob Clark <robdclark@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: iommu@lists.linux-foundation.org
Cc: linux-mediatek@lists.infradead.org
Cc: linux-arm-msm@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu-v3.c        |   3 +-
 drivers/iommu/arm-smmu.c           |   2 +-
 drivers/iommu/io-pgtable-arm-v7s.c |   3 +-
 drivers/iommu/io-pgtable-arm.c     |   3 +-
 drivers/iommu/io-pgtable.c         |   5 +-
 drivers/iommu/io-pgtable.h         | 213 -------------------------------------
 drivers/iommu/ipmmu-vmsa.c         |   3 +-
 drivers/iommu/msm_iommu.c          |   2 +-
 drivers/iommu/mtk_iommu.h          |   3 +-
 drivers/iommu/qcom_iommu.c         |   2 +-
 include/linux/io-pgtable.h         | 213 +++++++++++++++++++++++++++++++++++++
 11 files changed, 224 insertions(+), 228 deletions(-)
 delete mode 100644 drivers/iommu/io-pgtable.h
 create mode 100644 include/linux/io-pgtable.h

(limited to 'include')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 0d284029dc73..d3880010c6cf 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -18,6 +18,7 @@
 #include <linux/dma-iommu.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/init.h>
@@ -32,8 +33,6 @@
 
 #include <linux/amba/bus.h>
 
-#include "io-pgtable.h"
-
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
 #define IDR0_ST_LVL			GENMASK(28, 27)
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index af18a7e7f917..045d93884164 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -39,6 +39,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/init.h>
@@ -56,7 +57,6 @@
 #include <linux/amba/bus.h>
 #include <linux/fsl/mc.h>
 
-#include "io-pgtable.h"
 #include "arm-smmu-regs.h"
 
 #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index cec29bf45c9b..75a8273d1ae9 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -35,6 +35,7 @@
 #include <linux/atomic.h>
 #include <linux/dma-mapping.h>
 #include <linux/gfp.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
@@ -45,8 +46,6 @@
 
 #include <asm/barrier.h>
 
-#include "io-pgtable.h"
-
 /* Struct accessors */
 #define io_pgtable_to_data(x)						\
 	container_of((x), struct arm_v7s_io_pgtable, iop)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 237cacd4a62b..d3700ec15cbd 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -22,6 +22,7 @@
 
 #include <linux/atomic.h>
 #include <linux/bitops.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
@@ -31,8 +32,6 @@
 
 #include <asm/barrier.h>
 
-#include "io-pgtable.h"
-
 #define ARM_LPAE_MAX_ADDR_BITS		52
 #define ARM_LPAE_S2_MAX_CONCAT_PAGES	16
 #define ARM_LPAE_MAX_LEVELS		4
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index 127558d83667..93f2880be6c6 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -19,11 +19,10 @@
  */
 
 #include <linux/bug.h>
+#include <linux/io-pgtable.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 
-#include "io-pgtable.h"
-
 static const struct io_pgtable_init_fns *
 io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
 #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE
@@ -61,6 +60,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
 
 	return &iop->ops;
 }
+EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops);
 
 /*
  * It is the IOMMU driver's responsibility to ensure that the page table
@@ -77,3 +77,4 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
 	io_pgtable_tlb_flush_all(iop);
 	io_pgtable_init_table[iop->fmt]->free(iop);
 }
+EXPORT_SYMBOL_GPL(free_io_pgtable_ops);
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
deleted file mode 100644
index 47d5ae559329..000000000000
--- a/drivers/iommu/io-pgtable.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IO_PGTABLE_H
-#define __IO_PGTABLE_H
-#include <linux/bitops.h>
-
-/*
- * Public API for use by IOMMU drivers
- */
-enum io_pgtable_fmt {
-	ARM_32_LPAE_S1,
-	ARM_32_LPAE_S2,
-	ARM_64_LPAE_S1,
-	ARM_64_LPAE_S2,
-	ARM_V7S,
-	IO_PGTABLE_NUM_FMTS,
-};
-
-/**
- * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
- *
- * @tlb_flush_all: Synchronously invalidate the entire TLB context.
- * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
- *                 any corresponding page table updates are visible to the
- *                 IOMMU.
- *
- * Note that these can all be called in atomic context and must therefore
- * not block.
- */
-struct iommu_gather_ops {
-	void (*tlb_flush_all)(void *cookie);
-	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
-			      bool leaf, void *cookie);
-	void (*tlb_sync)(void *cookie);
-};
-
-/**
- * struct io_pgtable_cfg - Configuration data for a set of page tables.
- *
- * @quirks:        A bitmap of hardware quirks that require some special
- *                 action by the low-level page table allocator.
- * @pgsize_bitmap: A bitmap of page sizes supported by this set of page
- *                 tables.
- * @ias:           Input address (iova) size, in bits.
- * @oas:           Output address (paddr) size, in bits.
- * @tlb:           TLB management callbacks for this set of tables.
- * @iommu_dev:     The device representing the DMA configuration for the
- *                 page table walker.
- */
-struct io_pgtable_cfg {
-	/*
-	 * IO_PGTABLE_QUIRK_ARM_NS: (ARM formats) Set NS and NSTABLE bits in
-	 *	stage 1 PTEs, for hardware which insists on validating them
-	 *	even in	non-secure state where they should normally be ignored.
-	 *
-	 * IO_PGTABLE_QUIRK_NO_PERMS: Ignore the IOMMU_READ, IOMMU_WRITE and
-	 *	IOMMU_NOEXEC flags and map everything with full access, for
-	 *	hardware which does not implement the permissions of a given
-	 *	format, and/or requires some format-specific default value.
-	 *
-	 * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
-	 *	(unmapped) entries but the hardware might do so anyway, perform
-	 *	TLB maintenance when mapping as well as when unmapping.
-	 *
-	 * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
-	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
-	 *	when the SoC is in "4GB mode" and they can only access the high
-	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
-	 *
-	 * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
-	 *	be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
-	 *	software-emulated IOMMU), such that pagetable updates need not
-	 *	be treated as explicit DMA data.
-	 *
-	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
-	 *	on unmap, for DMA domains using the flush queue mechanism for
-	 *	delayed invalidation.
-	 */
-	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
-	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
-	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
-	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
-	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
-	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(5)
-	unsigned long			quirks;
-	unsigned long			pgsize_bitmap;
-	unsigned int			ias;
-	unsigned int			oas;
-	const struct iommu_gather_ops	*tlb;
-	struct device			*iommu_dev;
-
-	/* Low-level data specific to the table format */
-	union {
-		struct {
-			u64	ttbr[2];
-			u64	tcr;
-			u64	mair[2];
-		} arm_lpae_s1_cfg;
-
-		struct {
-			u64	vttbr;
-			u64	vtcr;
-		} arm_lpae_s2_cfg;
-
-		struct {
-			u32	ttbr[2];
-			u32	tcr;
-			u32	nmrr;
-			u32	prrr;
-		} arm_v7s_cfg;
-	};
-};
-
-/**
- * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
- *
- * @map:          Map a physically contiguous memory region.
- * @unmap:        Unmap a physically contiguous memory region.
- * @iova_to_phys: Translate iova to physical address.
- *
- * These functions map directly onto the iommu_ops member functions with
- * the same names.
- */
-struct io_pgtable_ops {
-	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
-		   phys_addr_t paddr, size_t size, int prot);
-	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-			size_t size);
-	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
-				    unsigned long iova);
-};
-
-/**
- * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU.
- *
- * @fmt:    The page table format.
- * @cfg:    The page table configuration. This will be modified to represent
- *          the configuration actually provided by the allocator (e.g. the
- *          pgsize_bitmap may be restricted).
- * @cookie: An opaque token provided by the IOMMU driver and passed back to
- *          the callback routines in cfg->tlb.
- */
-struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
-					    struct io_pgtable_cfg *cfg,
-					    void *cookie);
-
-/**
- * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller
- *                         *must* ensure that the page table is no longer
- *                         live, but the TLB can be dirty.
- *
- * @ops: The ops returned from alloc_io_pgtable_ops.
- */
-void free_io_pgtable_ops(struct io_pgtable_ops *ops);
-
-
-/*
- * Internal structures for page table allocator implementations.
- */
-
-/**
- * struct io_pgtable - Internal structure describing a set of page tables.
- *
- * @fmt:    The page table format.
- * @cookie: An opaque token provided by the IOMMU driver and passed back to
- *          any callback routines.
- * @cfg:    A copy of the page table configuration.
- * @ops:    The page table operations in use for this set of page tables.
- */
-struct io_pgtable {
-	enum io_pgtable_fmt	fmt;
-	void			*cookie;
-	struct io_pgtable_cfg	cfg;
-	struct io_pgtable_ops	ops;
-};
-
-#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
-
-static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
-{
-	iop->cfg.tlb->tlb_flush_all(iop->cookie);
-}
-
-static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
-		unsigned long iova, size_t size, size_t granule, bool leaf)
-{
-	iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
-}
-
-static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
-{
-	iop->cfg.tlb->tlb_sync(iop->cookie);
-}
-
-/**
- * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
- *                              particular format.
- *
- * @alloc: Allocate a set of page tables described by cfg.
- * @free:  Free the page tables associated with iop.
- */
-struct io_pgtable_init_fns {
-	struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
-	void (*free)(struct io_pgtable *iop);
-};
-
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
-extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
-
-#endif /* __IO_PGTABLE_H */
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 7a4529c61c19..9a380c10655e 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -35,8 +36,6 @@
 #define arm_iommu_detach_device(...)	do {} while (0)
 #endif
 
-#include "io-pgtable.h"
-
 #define IPMMU_CTX_MAX 8
 
 struct ipmmu_features {
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index fc4270733f11..ef7d1f995d6b 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -23,6 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/errno.h>
 #include <linux/io.h>
+#include <linux/io-pgtable.h>
 #include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -37,7 +38,6 @@
 
 #include "msm_iommu_hw-8xxx.h"
 #include "msm_iommu.h"
-#include "io-pgtable.h"
 
 #define MRC(reg, processor, op1, crn, crm, op2)				\
 __asm__ __volatile__ (							\
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 778498b8633f..62c2c3e8c5df 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -19,13 +19,12 @@
 #include <linux/component.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <soc/mediatek/smi.h>
 
-#include "io-pgtable.h"
-
 struct mtk_iommu_suspend_reg {
 	u32				standard_axi_mode;
 	u32				dcm_dis;
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index d8595f0a987d..8cdd3f059513 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -26,6 +26,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/io-pgtable.h>
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/kconfig.h>
@@ -42,7 +43,6 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include "io-pgtable.h"
 #include "arm-smmu-regs.h"
 
 #define SMMU_INTR_SEL_NS     0x2000
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
new file mode 100644
index 000000000000..47d5ae559329
--- /dev/null
+++ b/include/linux/io-pgtable.h
@@ -0,0 +1,213 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __IO_PGTABLE_H
+#define __IO_PGTABLE_H
+#include <linux/bitops.h>
+
+/*
+ * Public API for use by IOMMU drivers
+ */
+enum io_pgtable_fmt {
+	ARM_32_LPAE_S1,
+	ARM_32_LPAE_S2,
+	ARM_64_LPAE_S1,
+	ARM_64_LPAE_S2,
+	ARM_V7S,
+	IO_PGTABLE_NUM_FMTS,
+};
+
+/**
+ * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
+ *
+ * @tlb_flush_all: Synchronously invalidate the entire TLB context.
+ * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
+ * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
+ *                 any corresponding page table updates are visible to the
+ *                 IOMMU.
+ *
+ * Note that these can all be called in atomic context and must therefore
+ * not block.
+ */
+struct iommu_gather_ops {
+	void (*tlb_flush_all)(void *cookie);
+	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
+			      bool leaf, void *cookie);
+	void (*tlb_sync)(void *cookie);
+};
+
+/**
+ * struct io_pgtable_cfg - Configuration data for a set of page tables.
+ *
+ * @quirks:        A bitmap of hardware quirks that require some special
+ *                 action by the low-level page table allocator.
+ * @pgsize_bitmap: A bitmap of page sizes supported by this set of page
+ *                 tables.
+ * @ias:           Input address (iova) size, in bits.
+ * @oas:           Output address (paddr) size, in bits.
+ * @tlb:           TLB management callbacks for this set of tables.
+ * @iommu_dev:     The device representing the DMA configuration for the
+ *                 page table walker.
+ */
+struct io_pgtable_cfg {
+	/*
+	 * IO_PGTABLE_QUIRK_ARM_NS: (ARM formats) Set NS and NSTABLE bits in
+	 *	stage 1 PTEs, for hardware which insists on validating them
+	 *	even in	non-secure state where they should normally be ignored.
+	 *
+	 * IO_PGTABLE_QUIRK_NO_PERMS: Ignore the IOMMU_READ, IOMMU_WRITE and
+	 *	IOMMU_NOEXEC flags and map everything with full access, for
+	 *	hardware which does not implement the permissions of a given
+	 *	format, and/or requires some format-specific default value.
+	 *
+	 * IO_PGTABLE_QUIRK_TLBI_ON_MAP: If the format forbids caching invalid
+	 *	(unmapped) entries but the hardware might do so anyway, perform
+	 *	TLB maintenance when mapping as well as when unmapping.
+	 *
+	 * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
+	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
+	 *	when the SoC is in "4GB mode" and they can only access the high
+	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+	 *
+	 * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
+	 *	be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
+	 *	software-emulated IOMMU), such that pagetable updates need not
+	 *	be treated as explicit DMA data.
+	 *
+	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
+	 *	on unmap, for DMA domains using the flush queue mechanism for
+	 *	delayed invalidation.
+	 */
+	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
+	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
+	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
+	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
+	#define IO_PGTABLE_QUIRK_NO_DMA		BIT(4)
+	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(5)
+	unsigned long			quirks;
+	unsigned long			pgsize_bitmap;
+	unsigned int			ias;
+	unsigned int			oas;
+	const struct iommu_gather_ops	*tlb;
+	struct device			*iommu_dev;
+
+	/* Low-level data specific to the table format */
+	union {
+		struct {
+			u64	ttbr[2];
+			u64	tcr;
+			u64	mair[2];
+		} arm_lpae_s1_cfg;
+
+		struct {
+			u64	vttbr;
+			u64	vtcr;
+		} arm_lpae_s2_cfg;
+
+		struct {
+			u32	ttbr[2];
+			u32	tcr;
+			u32	nmrr;
+			u32	prrr;
+		} arm_v7s_cfg;
+	};
+};
+
+/**
+ * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
+ *
+ * @map:          Map a physically contiguous memory region.
+ * @unmap:        Unmap a physically contiguous memory region.
+ * @iova_to_phys: Translate iova to physical address.
+ *
+ * These functions map directly onto the iommu_ops member functions with
+ * the same names.
+ */
+struct io_pgtable_ops {
+	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
+		   phys_addr_t paddr, size_t size, int prot);
+	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
+			size_t size);
+	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
+				    unsigned long iova);
+};
+
+/**
+ * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU.
+ *
+ * @fmt:    The page table format.
+ * @cfg:    The page table configuration. This will be modified to represent
+ *          the configuration actually provided by the allocator (e.g. the
+ *          pgsize_bitmap may be restricted).
+ * @cookie: An opaque token provided by the IOMMU driver and passed back to
+ *          the callback routines in cfg->tlb.
+ */
+struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
+					    struct io_pgtable_cfg *cfg,
+					    void *cookie);
+
+/**
+ * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller
+ *                         *must* ensure that the page table is no longer
+ *                         live, but the TLB can be dirty.
+ *
+ * @ops: The ops returned from alloc_io_pgtable_ops.
+ */
+void free_io_pgtable_ops(struct io_pgtable_ops *ops);
+
+
+/*
+ * Internal structures for page table allocator implementations.
+ */
+
+/**
+ * struct io_pgtable - Internal structure describing a set of page tables.
+ *
+ * @fmt:    The page table format.
+ * @cookie: An opaque token provided by the IOMMU driver and passed back to
+ *          any callback routines.
+ * @cfg:    A copy of the page table configuration.
+ * @ops:    The page table operations in use for this set of page tables.
+ */
+struct io_pgtable {
+	enum io_pgtable_fmt	fmt;
+	void			*cookie;
+	struct io_pgtable_cfg	cfg;
+	struct io_pgtable_ops	ops;
+};
+
+#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
+
+static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
+{
+	iop->cfg.tlb->tlb_flush_all(iop->cookie);
+}
+
+static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
+		unsigned long iova, size_t size, size_t granule, bool leaf)
+{
+	iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
+}
+
+static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
+{
+	iop->cfg.tlb->tlb_sync(iop->cookie);
+}
+
+/**
+ * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
+ *                              particular format.
+ *
+ * @alloc: Allocate a set of page tables described by cfg.
+ * @free:  Free the page tables associated with iop.
+ */
+struct io_pgtable_init_fns {
+	struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie);
+	void (*free)(struct io_pgtable *iop);
+};
+
+extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
+
+#endif /* __IO_PGTABLE_H */
-- 
cgit v1.2.3


From d123fab71f63aae129aebe052664fda73131921a Mon Sep 17 00:00:00 2001
From: Wesley Sheng <wesley.sheng@microchip.com>
Date: Thu, 6 Dec 2018 21:30:51 +0800
Subject: ntb_hw_switchtec: NT req id mapping table register entry number
 should be 512

The number of available NT req id mapping table entries per NTB control
register is 512. The driver mistakenly limits the number to 256.

Fix the array size of NT req id mapping table.

Fixes: c082b04c9d40 ("NTB: switchtec: Add NTB hardware register definitions")
Signed-off-by: Wesley Sheng <wesley.sheng@microchip.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 include/linux/switchtec.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index eee0412bdf4b..32b282cd0ead 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -249,8 +249,8 @@ struct ntb_ctrl_regs {
 		u64 xlate_addr;
 	} bar_entry[6];
 	u32 reserved2[216];
-	u32 req_id_table[256];
-	u32 reserved3[512];
+	u32 req_id_table[512];
+	u32 reserved3[256];
 	u64 lut_entry[512];
 } __packed;
 
-- 
cgit v1.2.3


From a2585cdc9e4cda6afaea5f5687eaabce3bebbb2c Mon Sep 17 00:00:00 2001
From: Paul Selles <paul.selles@microchip.com>
Date: Thu, 6 Dec 2018 21:30:52 +0800
Subject: ntb_hw_switchtec: Added support of >=4G memory windows

Current Switchtec's BAR setup registers are limited to 32bits,
corresponding to the maximum MW (memory window) size is <4G.

Increase the MW sizes with the addition of the BAR Setup Extension
Register for the upper 32bits of a 64bits MW size. This increases the MW
range to between 4K and 2^63.

Reported-by: Boris Glimcher <boris.glimcher@emc.com>
Signed-off-by: Paul Selles <paul.selles@microchip.com>
Signed-off-by: Wesley Sheng <wesley.sheng@microchip.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 9 +++++++--
 include/linux/switchtec.h              | 6 +++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index 9916bc5b6759..f6f00354047b 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -264,6 +264,7 @@ static void switchtec_ntb_mw_clr_direct(struct switchtec_ntb *sndev, int idx)
 	ctl_val &= ~NTB_CTRL_BAR_DIR_WIN_EN;
 	iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
 	iowrite32(0, &ctl->bar_entry[bar].win_size);
+	iowrite32(0, &ctl->bar_ext_entry[bar].win_size);
 	iowrite64(sndev->self_partition, &ctl->bar_entry[bar].xlate_addr);
 }
 
@@ -286,7 +287,9 @@ static void switchtec_ntb_mw_set_direct(struct switchtec_ntb *sndev, int idx,
 	ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
 
 	iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
-	iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+	iowrite32(xlate_pos | (lower_32_bits(size) & 0xFFFFF000),
+		  &ctl->bar_entry[bar].win_size);
+	iowrite32(upper_32_bits(size), &ctl->bar_ext_entry[bar].win_size);
 	iowrite64(sndev->self_partition | addr,
 		  &ctl->bar_entry[bar].xlate_addr);
 }
@@ -1053,7 +1056,9 @@ static int crosslink_setup_mws(struct switchtec_ntb *sndev, int ntb_lut_idx,
 		ctl_val |= NTB_CTRL_BAR_DIR_WIN_EN;
 
 		iowrite32(ctl_val, &ctl->bar_entry[bar].ctl);
-		iowrite32(xlate_pos | size, &ctl->bar_entry[bar].win_size);
+		iowrite32(xlate_pos | (lower_32_bits(size) & 0xFFFFF000),
+			  &ctl->bar_entry[bar].win_size);
+		iowrite32(upper_32_bits(size), &ctl->bar_ext_entry[bar].win_size);
 		iowrite64(sndev->peer_partition | addr,
 			  &ctl->bar_entry[bar].xlate_addr);
 	}
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index 32b282cd0ead..52a079b3a9a6 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -248,7 +248,11 @@ struct ntb_ctrl_regs {
 		u32 win_size;
 		u64 xlate_addr;
 	} bar_entry[6];
-	u32 reserved2[216];
+	struct {
+		u32 win_size;
+		u32 reserved[3];
+	} bar_ext_entry[6];
+	u32 reserved2[192];
 	u32 req_id_table[512];
 	u32 reserved3[256];
 	u64 lut_entry[512];
-- 
cgit v1.2.3


From bf7fbeeae6db644ef5995085de2bc5c6121f8c8d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 8 Feb 2019 17:02:56 +0100
Subject: module: Cure the MODULE_LICENSE "GPL" vs. "GPL v2" bogosity

The original MODULE_LICENSE string for kernel modules licensed under the
GPL v2 (only / or later) was simply "GPL", which was - and still is -
completely sufficient for the purpose of module loading and checking
whether the module is free software or proprietary.

In January 2003 this was changed with commit 3344ea3ad4b7 ("[PATCH]
MODULE_LICENSE and EXPORT_SYMBOL_GPL support"). This commit can be found in
the history git repository which holds the 1:1 import of Linus' bitkeeper
repository:

  https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git/commit/?id=3344ea3ad4b7c302c846a680dbaeedf96ed45c02

The main intention of the patch was to refuse linking proprietary modules
against symbols exported with EXPORT_SYMBOL_GPL() at module load time.

As a completely undocumented side effect it also introduced the distinction
between "GPL" and "GPL v2" MODULE_LICENSE() strings:

 *      "GPL"                           [GNU Public License v2 or later]
 *      "GPL v2"                        [GNU Public License v2]
 *      "GPL and additional rights"     [GNU Public License v2 rights and more]
 *      "Dual BSD/GPL"                  [GNU Public License v2
 *                                       or BSD license choice]
 *      "Dual MPL/GPL"                  [GNU Public License v2
 *                                       or Mozilla license choice]

This distinction was and still is wrong in several aspects:

 1) It broke all modules which were using the "GPL" string in the
    MODULE_LICENSE() already and were licensed under GPL v2 only.

    A quick license scan over the tree at that time shows that at least 480
    out of 1484 modules have been affected by this change back then. The
    number is probably way higher as this was just a quick check for
    clearly identifiable license information.

    There was exactly ONE instance of a "GPL v2" module license string in
    the kernel back then - drivers/net/tulip/xircom_tulip_cb.c which
    otherwise had no license information at all. There is no indication
    that the change above is any way related to this driver. The change
    happend with the 2.4.11 release which was on Oct. 9 2001 - so quite
    some time before the above commit. Unfortunately there is no trace on
    the intertubes to any discussion of this.

 2) The dual licensed strings became ill defined as well because following
    the "GPL" vs. "GPL v2" distinction all dual licensed (or additional
    rights) MODULE_LICENSE strings would either require those dual licensed
    modules to be licensed under GPL v2 or later or just be unspecified for
    the dual licensing case. Neither choice is coherent with the GPL
    distinction.

Due to the lack of a proper changelog and no real discussion on the patch
submission other than a few implementation details, it's completely unclear
why this distinction was introduced at all. Other than the comment in the
module header file exists no documentation for this at all.

From a license compliance and license scanning POV this distinction is a
total nightmare.

As of 5.0-rc2 2873 out of 9200 instances of MODULE_LICENSE() strings are
conflicting with the actual license in the source code (either SPDX or
license boilerplate/reference). A comparison between the scan of the
history tree and a scan of current Linus tree shows to the extent that the
git rename detection over Linus tree grafted with the history tree is
halfways complete that almost none of the files which got broken in 2003
have been cleaned up vs. the MODULE_LICENSE string. So subtracting those
480 known instances from the conflicting 2800 of today more than 25% of the
module authors got it wrong and it's a high propability that a large
portion of the rest just got it right by chance.

There is no value for the module loader to convey the detailed license
information as the only decision to be made is whether the module is free
software or not.

The "and additional rights", "BSD" and "MPL" strings are not conclusive
license information either. So there is no point in trying to make the GPL
part conclusive and exact. As shown above it's already non conclusive for
dual licensing and incoherent with a large portion of the module source.

As an unintended side effect this distinction causes a major headache for
license compliance, license scanners and the ongoing effort to clean up the
license mess of the kernel.

Therefore remove the well meant, but ill defined, distinction between "GPL"
and "GPL v2" and document that:

  - "GPL" and "GPL v2" both express that the module is licensed under GPLv2
    (without a distinction of 'only' and 'or later') and is therefore kernel
    license compliant.

  - None of the MODULE_LICENSE strings can be used for expressing or
    determining the exact license

  - Their sole purpose is to decide whether the module is free software or
    not.

Add a MODULE_LICENSE subsection to the license rule documentation as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Philippe Ombredanne <pombredanne@nexb.com>
Acked-by: Joe Perches <joe@perches.com>
[jc: Did s/merily/merely/ ]
Acked-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/license-rules.rst | 62 +++++++++++++++++++++++++++++++++
 include/linux/module.h                  | 18 +++++++++-
 2 files changed, 79 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/process/license-rules.rst b/Documentation/process/license-rules.rst
index 2bb8c0fc2238..43b6a1ee0193 100644
--- a/Documentation/process/license-rules.rst
+++ b/Documentation/process/license-rules.rst
@@ -372,3 +372,65 @@ in the LICENSE subdirectories. This is required to allow tool
 verification (e.g. checkpatch.pl) and to have the licenses ready to read
 and extract right from the source, which is recommended by various FOSS
 organizations, e.g. the `FSFE REUSE initiative <https://reuse.software/>`_.
+
+_`MODULE_LICENSE`
+-----------------
+
+   Loadable kernel modules also require a MODULE_LICENSE() tag. This tag is
+   neither a replacement for proper source code license information
+   (SPDX-License-Identifier) nor in any way relevant for expressing or
+   determining the exact license under which the source code of the module
+   is provided.
+
+   The sole purpose of this tag is to provide sufficient information
+   whether the module is free software or proprietary for the kernel
+   module loader and for user space tools.
+
+   The valid license strings for MODULE_LICENSE() are:
+
+    ============================= =============================================
+    "GPL"			  Module is licensed under GPL version 2. This
+				  does not express any distinction between
+				  GPL-2.0-only or GPL-2.0-or-later. The exact
+				  license information can only be determined
+				  via the license information in the
+				  corresponding source files.
+
+    "GPL v2"			  Same as "GPL". It exists for historic
+				  reasons.
+
+    "GPL and additional rights"   Historical variant of expressing that the
+				  module source is dual licensed under a
+				  GPL v2 variant and MIT license. Please do
+				  not use in new code.
+
+    "Dual MIT/GPL"		  The correct way of expressing that the
+				  module is dual licensed under a GPL v2
+				  variant or MIT license choice.
+
+    "Dual BSD/GPL"		  The module is dual licensed under a GPL v2
+				  variant or BSD license choice. The exact
+				  variant of the BSD license can only be
+				  determined via the license information
+				  in the corresponding source files.
+
+    "Dual MPL/GPL"		  The module is dual licensed under a GPL v2
+				  variant or Mozilla Public License (MPL)
+				  choice. The exact variant of the MPL
+				  license can only be determined via the
+				  license information in the corresponding
+				  source files.
+
+    "Proprietary"		  The module is under a proprietary license.
+				  This string is solely for proprietary third
+				  party modules and cannot be used for modules
+				  which have their source code in the kernel
+				  tree. Modules tagged that way are tainting
+				  the kernel with the 'P' flag when loaded and
+				  the kernel module loader refuses to link such
+				  modules against symbols which are exported
+				  with EXPORT_SYMBOL_GPL().
+    ============================= =============================================
+
+
+
diff --git a/include/linux/module.h b/include/linux/module.h
index 9a21fe3509af..3a2402b8d790 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -172,7 +172,7 @@ extern void cleanup_module(void);
  * The following license idents are currently accepted as indicating free
  * software modules
  *
- *	"GPL"				[GNU Public License v2 or later]
+ *	"GPL"				[GNU Public License v2]
  *	"GPL v2"			[GNU Public License v2]
  *	"GPL and additional rights"	[GNU Public License v2 rights and more]
  *	"Dual BSD/GPL"			[GNU Public License v2
@@ -186,6 +186,22 @@ extern void cleanup_module(void);
  *
  *	"Proprietary"			[Non free products]
  *
+ * Both "GPL v2" and "GPL" (the latter also in dual licensed strings) are
+ * merely stating that the module is licensed under the GPL v2, but are not
+ * telling whether "GPL v2 only" or "GPL v2 or later". The reason why there
+ * are two variants is a historic and failed attempt to convey more
+ * information in the MODULE_LICENSE string. For module loading the
+ * "only/or later" distinction is completely irrelevant and does neither
+ * replace the proper license identifiers in the corresponding source file
+ * nor amends them in any way. The sole purpose is to make the
+ * 'Proprietary' flagging work and to refuse to bind symbols which are
+ * exported with EXPORT_SYMBOL_GPL when a non free module is loaded.
+ *
+ * In the same way "BSD" is not a clear license information. It merely
+ * states, that the module is licensed under one of the compatible BSD
+ * license variants. The detailed and correct license information is again
+ * to be found in the corresponding source files.
+ *
  * There are dual licensed components, but when running with Linux it is the
  * GPL that is relevant so this is a non issue. Similarly LGPL linked with GPL
  * is a GPL combined work.
-- 
cgit v1.2.3


From 7388afe09143210f555bdd6c75035e9acc1fab96 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 11 Feb 2019 16:29:04 +0200
Subject: cfg80211: Use const more consistently in for_each_element macros

Enforce the first argument to be a correct type of a pointer to struct
element and avoid unnecessary typecasts from const to non-const pointers
(the change in validate_ie_attr() is needed to make this part work). In
addition, avoid signed/unsigned comparison within for_each_element() and
mark struct element packed just in case.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 18 +++++++++---------
 net/wireless/nl80211.c    |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3c9dfcada45f..6cbaed4d7a6b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3284,16 +3284,16 @@ struct element {
 	u8 id;
 	u8 datalen;
 	u8 data[];
-};
+} __packed;
 
 /* element iteration helpers */
-#define for_each_element(element, _data, _datalen)			\
-	for (element = (void *)(_data);					\
-	     (u8 *)(_data) + (_datalen) - (u8 *)element >=		\
-		sizeof(*element) &&					\
-	     (u8 *)(_data) + (_datalen) - (u8 *)element >=		\
-		sizeof(*element) + element->datalen;			\
-	     element = (void *)(element->data + element->datalen))
+#define for_each_element(_elem, _data, _datalen)			\
+	for (_elem = (const struct element *)(_data);			\
+	     (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >=	\
+		(int)sizeof(*_elem) &&					\
+	     (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >=	\
+		(int)sizeof(*_elem) + _elem->datalen;			\
+	     _elem = (const struct element *)(_elem->data + _elem->datalen))
 
 #define for_each_element_id(element, _id, data, datalen)		\
 	for_each_element(element, data, datalen)			\
@@ -3330,7 +3330,7 @@ struct element {
 static inline bool for_each_element_completed(const struct element *element,
 					      const void *data, size_t datalen)
 {
-	return (u8 *)element == (u8 *)data + datalen;
+	return (const u8 *)element == (const u8 *)data + datalen;
 }
 
 #endif /* LINUX_IEEE80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d85f6032f84..80878b431584 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -205,7 +205,7 @@ static int validate_ie_attr(const struct nlattr *attr,
 {
 	const u8 *data = nla_data(attr);
 	unsigned int len = nla_len(attr);
-	struct element *elem;
+	const struct element *elem;
 
 	for_each_element(elem, data, len) {
 		/* nothing */
-- 
cgit v1.2.3


From 5d4071abd9a1e2b417beef31aaf7d45999e4882e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 11 Feb 2019 16:03:12 +0100
Subject: cfg80211: fix and clean up cfg80211_gen_new_bssid()

Fix cfg80211_gen_new_bssid() to not rely on u64 modulo arithmetic,
which isn't needed since we really just want to mask there. Also,
clean it up to calculate the mask only once and use GENMASK_ULL()
instead of open-coding the mask calculation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b61b71f369c7..f81677f2f051 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5475,22 +5475,20 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
  * @bssid: transmitter BSSID
  * @max_bssid: max BSSID indicator, taken from Multiple BSSID element
  * @mbssid_index: BSSID index, taken from Multiple BSSID index element
- * @new_bssid_addr: address of the resulting BSSID
+ * @new_bssid: calculated nontransmitted BSSID
  */
 static inline void cfg80211_gen_new_bssid(const u8 *bssid, u8 max_bssid,
-					  u8 mbssid_index, u8 *new_bssid_addr)
+					  u8 mbssid_index, u8 *new_bssid)
 {
-	u64 bssid_tmp, new_bssid;
-	u64 lsb_n;
+	u64 bssid_u64 = ether_addr_to_u64(bssid);
+	u64 mask = GENMASK_ULL(max_bssid - 1, 0);
+	u64 new_bssid_u64;
 
-	bssid_tmp = ether_addr_to_u64(bssid);
+	new_bssid_u64 = bssid_u64 & ~mask;
 
-	lsb_n = bssid_tmp & ((1 << max_bssid) - 1);
-	new_bssid = bssid_tmp;
-	new_bssid &= ~((1 << max_bssid) - 1);
-	new_bssid |= (lsb_n + mbssid_index) % (1 << max_bssid);
+	new_bssid_u64 |= ((bssid_u64 & mask) + mbssid_index) & mask;
 
-	u64_to_ether_addr(new_bssid, new_bssid_addr);
+	u64_to_ether_addr(new_bssid_u64, new_bssid);
 }
 
 /**
-- 
cgit v1.2.3


From d901b2760dc6cd5fbbf2eac31d71d94baa6c4aef Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Fri, 4 Jan 2019 11:40:21 -0700
Subject: lib/scatterlist: Provide a DMA page iterator

Commit 2db76d7c3c6d ("lib/scatterlist: sg_page_iter: support sg lists w/o
backing pages") introduced the sg_page_iter_dma_address() function without
providing a way to use it in the general case. If the sg_dma_len() is not
equal to the sg length callers cannot safely use the
for_each_sg_page/sg_page_iter_dma_address combination.

Resolve this API mistake by providing a DMA specific iterator,
for_each_sg_dma_page(), that uses the right length so
sg_page_iter_dma_address() works as expected with all sglists.

A new iterator type is introduced to provide compile-time safety against
wrongly mixing accessors and iterators.

Acked-by: Christoph Hellwig <hch@lst.de> (for scatterlist)
Acked-by: Thomas Hellstrom <thellstrom@vmware.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com> (ipu3-cio2)
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 .clang-format                              |  1 +
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c |  8 ++++-
 drivers/media/pci/intel/ipu3/ipu3-cio2.c   |  4 +--
 include/linux/scatterlist.h                | 49 ++++++++++++++++++++++++------
 lib/scatterlist.c                          | 26 ++++++++++++++++
 5 files changed, 76 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/.clang-format b/.clang-format
index bc2ffb2a0b53..335ce29ab813 100644
--- a/.clang-format
+++ b/.clang-format
@@ -240,6 +240,7 @@ ForEachMacros:
   - 'for_each_set_bit'
   - 'for_each_set_bit_from'
   - 'for_each_sg'
+  - 'for_each_sg_dma_page'
   - 'for_each_sg_page'
   - 'for_each_sibling_event'
   - '__for_each_thread'
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index 31786b200afc..a3357ff7540d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -311,7 +311,13 @@ static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter)
 
 static dma_addr_t __vmw_piter_sg_addr(struct vmw_piter *viter)
 {
-	return sg_page_iter_dma_address(&viter->iter);
+	/*
+	 * FIXME: This driver wrongly mixes DMA and CPU SG list iteration and
+	 * needs revision. See
+	 * https://lore.kernel.org/lkml/20190104223531.GA1705@ziepe.ca/
+	 */
+	return sg_page_iter_dma_address(
+		container_of(&viter->iter, struct sg_dma_page_iter, base));
 }
 
 
diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
index cdb79ae2d8dc..9fbfbda74171 100644
--- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c
+++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c
@@ -846,7 +846,7 @@ static int cio2_vb2_buf_init(struct vb2_buffer *vb)
 	unsigned int pages = DIV_ROUND_UP(vb->planes[0].length, CIO2_PAGE_SIZE);
 	unsigned int lops = DIV_ROUND_UP(pages + 1, entries_per_page);
 	struct sg_table *sg;
-	struct sg_page_iter sg_iter;
+	struct sg_dma_page_iter sg_iter;
 	int i, j;
 
 	if (lops <= 0 || lops > CIO2_MAX_LOPS) {
@@ -873,7 +873,7 @@ static int cio2_vb2_buf_init(struct vb2_buffer *vb)
 		b->offset = sg->sgl->offset;
 
 	i = j = 0;
-	for_each_sg_page(sg->sgl, &sg_iter, sg->nents, 0) {
+	for_each_sg_dma_page (sg->sgl, &sg_iter, sg->nents, 0) {
 		if (!pages--)
 			break;
 		b->lop[i][j] = sg_page_iter_dma_address(&sg_iter) >> PAGE_SHIFT;
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b96f0d0b5b8f..b4be960c7e5d 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -339,12 +339,12 @@ int sg_alloc_table_chained(struct sg_table *table, int nents,
 /*
  * sg page iterator
  *
- * Iterates over sg entries page-by-page.  On each successful iteration,
- * you can call sg_page_iter_page(@piter) and sg_page_iter_dma_address(@piter)
- * to get the current page and its dma address. @piter->sg will point to the
- * sg holding this page and @piter->sg_pgoffset to the page's page offset
- * within the sg. The iteration will stop either when a maximum number of sg
- * entries was reached or a terminating sg (sg_last(sg) == true) was reached.
+ * Iterates over sg entries page-by-page.  On each successful iteration, you
+ * can call sg_page_iter_page(@piter) to get the current page and its dma
+ * address. @piter->sg will point to the sg holding this page and
+ * @piter->sg_pgoffset to the page's page offset within the sg. The iteration
+ * will stop either when a maximum number of sg entries was reached or a
+ * terminating sg (sg_last(sg) == true) was reached.
  */
 struct sg_page_iter {
 	struct scatterlist	*sg;		/* sg holding the page */
@@ -356,7 +356,19 @@ struct sg_page_iter {
 						 * next step */
 };
 
+/*
+ * sg page iterator for DMA addresses
+ *
+ * This is the same as sg_page_iter however you can call
+ * sg_page_iter_dma_address(@dma_iter) to get the page's DMA
+ * address. sg_page_iter_page() cannot be called on this iterator.
+ */
+struct sg_dma_page_iter {
+	struct sg_page_iter base;
+};
+
 bool __sg_page_iter_next(struct sg_page_iter *piter);
+bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter);
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
 			  unsigned long pgoffset);
@@ -372,11 +384,13 @@ static inline struct page *sg_page_iter_page(struct sg_page_iter *piter)
 /**
  * sg_page_iter_dma_address - get the dma address of the current page held by
  * the page iterator.
- * @piter:	page iterator holding the page
+ * @dma_iter:	page iterator holding the page
  */
-static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
+static inline dma_addr_t
+sg_page_iter_dma_address(struct sg_dma_page_iter *dma_iter)
 {
-	return sg_dma_address(piter->sg) + (piter->sg_pgoffset << PAGE_SHIFT);
+	return sg_dma_address(dma_iter->base.sg) +
+	       (dma_iter->base.sg_pgoffset << PAGE_SHIFT);
 }
 
 /**
@@ -385,11 +399,28 @@ static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
  * @piter:	page iterator to hold current page, sg, sg_pgoffset
  * @nents:	maximum number of sg entries to iterate over
  * @pgoffset:	starting page offset
+ *
+ * Callers may use sg_page_iter_page() to get each page pointer.
  */
 #define for_each_sg_page(sglist, piter, nents, pgoffset)		   \
 	for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
 	     __sg_page_iter_next(piter);)
 
+/**
+ * for_each_sg_dma_page - iterate over the pages of the given sg list
+ * @sglist:	sglist to iterate over
+ * @dma_iter:	page iterator to hold current page
+ * @dma_nents:	maximum number of sg entries to iterate over, this is the value
+ *              returned from dma_map_sg
+ * @pgoffset:	starting page offset
+ *
+ * Callers may use sg_page_iter_dma_address() to get each page's DMA address.
+ */
+#define for_each_sg_dma_page(sglist, dma_iter, dma_nents, pgoffset)            \
+	for (__sg_page_iter_start(&(dma_iter)->base, sglist, dma_nents,        \
+				  pgoffset);                                   \
+	     __sg_page_iter_dma_next(dma_iter);)
+
 /*
  * Mapping sg iterator
  *
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 9ba349e775ef..739dc9fe2c55 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -625,6 +625,32 @@ bool __sg_page_iter_next(struct sg_page_iter *piter)
 }
 EXPORT_SYMBOL(__sg_page_iter_next);
 
+static int sg_dma_page_count(struct scatterlist *sg)
+{
+	return PAGE_ALIGN(sg->offset + sg_dma_len(sg)) >> PAGE_SHIFT;
+}
+
+bool __sg_page_iter_dma_next(struct sg_dma_page_iter *dma_iter)
+{
+	struct sg_page_iter *piter = &dma_iter->base;
+
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_dma_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_dma_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!--piter->__nents || !piter->sg)
+			return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(__sg_page_iter_dma_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started
-- 
cgit v1.2.3


From 14fd1901e718138b22ae7cbd8995bfdeb4df578f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Sun, 10 Feb 2019 19:35:29 -0800
Subject: devlink: add a generic board.manufacture version name

At Jiri's suggestion add a generic "board.manufacture"
version identifier.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-info-versions.rst | 5 +++++
 include/net/devlink.h                              | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
index 7d4ecf6b6f34..c79ad8593383 100644
--- a/Documentation/networking/devlink-info-versions.rst
+++ b/Documentation/networking/devlink-info-versions.rst
@@ -14,6 +14,11 @@ board.rev
 
 Board design revision.
 
+board.manufacture
+=================
+
+An identifier of the company or the facility which produced the part.
+
 fw.mgmt
 =======
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 2b384a38911b..07660fe4c0e3 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -435,6 +435,8 @@ enum devlink_param_wol_types {
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_ID	"board.id"
 /* Revision of board design */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_REV	"board.rev"
+/* Maker of the board */
+#define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE	"board.manufacture"
 
 /* Control processor FW version */
 #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT	"fw.mgmt"
-- 
cgit v1.2.3


From 6663cf821c133b36dee65fb9eb2d976abc706512 Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Mon, 11 Feb 2019 09:52:59 +0200
Subject: flow_offload: Fix flow action infrastructure

Implementation of macro "flow_action_for_each" introduced in
commit e3ab786b42535 ("flow_offload: add flow action infrastructure")
and used in commit 738678817573c ("drivers: net: use flow action
infrastructure") iterated the first item twice and did not reach the
last one. Fix it.

Fixes: e3ab786b42535 ("flow_offload: add flow action infrastructure")
Fixes: 738678817573c ("drivers: net: use flow action infrastructure")
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 23166caa0da5..a307ccb18015 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -171,7 +171,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action)
 }
 
 #define flow_action_for_each(__i, __act, __actions)			\
-        for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[__i++])
+        for (__i = 0, __act = &(__actions)->entries[0]; __i < (__actions)->num_entries; __act = &(__actions)->entries[++__i])
 
 struct flow_rule {
 	struct flow_match	match;
-- 
cgit v1.2.3


From 23fa70e40a42e8dfeac654b8cc0e5b463e54af25 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 11 Feb 2019 11:37:02 +0100
Subject: usb: ohci-da8xx: remove unused callbacks from platform data

There are no more users of the platform_data callbacks in ohci-da8xx.
Remove them.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/platform_data/usb-davinci.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/usb-davinci.h b/include/linux/platform_data/usb-davinci.h
index 0926e99f2e8f..879f5c78b91a 100644
--- a/include/linux/platform_data/usb-davinci.h
+++ b/include/linux/platform_data/usb-davinci.h
@@ -11,22 +11,8 @@
 #ifndef __ASM_ARCH_USB_H
 #define __ASM_ARCH_USB_H
 
-struct	da8xx_ohci_root_hub;
-
-typedef void (*da8xx_ocic_handler_t)(struct da8xx_ohci_root_hub *hub,
-				     unsigned port);
-
 /* Passed as the platform data to the OHCI driver */
 struct	da8xx_ohci_root_hub {
-	/* Switch the port power on/off */
-	int	(*set_power)(unsigned port, int on);
-	/* Read the port power status */
-	int	(*get_power)(unsigned port);
-	/* Read the port over-current indicator */
-	int	(*get_oci)(unsigned port);
-	/* Over-current indicator change notification (pass NULL to disable) */
-	int	(*ocic_notify)(da8xx_ocic_handler_t handler);
-
 	/* Time from power on to power good (in 2 ms units) */
 	u8	potpgt;
 };
-- 
cgit v1.2.3


From 32ea33a044842ae6c5fc7e33426e0a7bd50f8801 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sat, 9 Feb 2019 18:42:05 +0200
Subject: mei: bus: export to_mei_cl_device for mei client devices drivers

Export to_mei_cl_device macro, as it is needed also
in the mei client drivers.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus.c     | 1 -
 include/linux/mei_cl_bus.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index fc3872fe7b25..e5456faf00e6 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -28,7 +28,6 @@
 #include "client.h"
 
 #define to_mei_cl_driver(d) container_of(d, struct mei_cl_driver, driver)
-#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev)
 
 /**
  * __mei_cl_send - internal client send (write)
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index 7fde40e17c8b..03b6ba2a63f8 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -55,6 +55,8 @@ struct mei_cl_device {
 	void *priv_data;
 };
 
+#define to_mei_cl_device(d) container_of(d, struct mei_cl_device, dev)
+
 struct mei_cl_driver {
 	struct device_driver driver;
 	const char *name;
-- 
cgit v1.2.3


From c68cfb718c8f97b7f7a50ed66be5feb42d0c8988 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 8 Feb 2019 17:11:25 +0000
Subject: misc: fastrpc: Add support for context Invoke method

This patch adds support to compute context invoke method on the
remote processor (DSP).
This involves setting up the functions input and output arguments,
input and output handles and mapping the dmabuf fd for the
argument/handle buffers.

The below diagram depicts invocation of a single method where the
client and objects reside on different processors. An object could
expose multiple methods which can be grouped together and referred
to as an interface.

,--------,        ,------,  ,-----------,  ,------,        ,--------,
|        | method |      |  |           |  |      | method |        |
| Client |------->| Stub |->| Transport |->| Skel |------->| Object |
|        |        |      |  |           |  |      |        |        |
`--------`        `------`  `-----------`  `------`        `--------`

Client:    Linux user mode process that initiates the remote invocation
Stub:      Auto generated code linked in with the user mode process that
           takes care of marshaling parameters
Transport: Involved in carrying an invocation from a client to an
           object. This involves two portions: 1) FastRPC Linux
           kernel driver that receives the remote invocation, queues
           them up and then waits for the response after signaling the
           remote side. 2) Service running on the remote side that
           dequeues the messages from the queue and dispatches them for
           processing.
Skel:      Auto generated code that takes care of un-marshaling
           parameters
Object:    Method implementation

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <mtharu@codeaurora.org>

Co-developed-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c      | 730 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/fastrpc.h |  23 ++
 2 files changed, 753 insertions(+)
 create mode 100644 include/uapi/misc/fastrpc.h

(limited to 'include')

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 10b93fd5659a..cd69f8b308f6 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -2,7 +2,9 @@
 // Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
 // Copyright (c) 2018, Linaro Limited
 
+#include <linux/completion.h>
 #include <linux/device.h>
+#include <linux/dma-buf.h>
 #include <linux/dma-mapping.h>
 #include <linux/idr.h>
 #include <linux/list.h>
@@ -14,6 +16,7 @@
 #include <linux/rpmsg.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <uapi/misc/fastrpc.h>
 
 #define ADSP_DOMAIN_ID (0)
 #define MDSP_DOMAIN_ID (1)
@@ -21,14 +24,118 @@
 #define CDSP_DOMAIN_ID (3)
 #define FASTRPC_DEV_MAX		4 /* adsp, mdsp, slpi, cdsp*/
 #define FASTRPC_MAX_SESSIONS	9 /*8 compute, 1 cpz*/
+#define FASTRPC_ALIGN		128
+#define FASTRPC_MAX_FDLIST	16
+#define FASTRPC_MAX_CRCLIST	64
+#define FASTRPC_PHYS(p)	((p) & 0xffffffff)
 #define FASTRPC_CTX_MAX (256)
 #define FASTRPC_CTXID_MASK (0xFF0)
 #define FASTRPC_DEVICE_NAME	"fastrpc"
 
+/* Retrives number of input buffers from the scalars parameter */
+#define REMOTE_SCALARS_INBUFS(sc)	(((sc) >> 16) & 0x0ff)
+
+/* Retrives number of output buffers from the scalars parameter */
+#define REMOTE_SCALARS_OUTBUFS(sc)	(((sc) >> 8) & 0x0ff)
+
+/* Retrives number of input handles from the scalars parameter */
+#define REMOTE_SCALARS_INHANDLES(sc)	(((sc) >> 4) & 0x0f)
+
+/* Retrives number of output handles from the scalars parameter */
+#define REMOTE_SCALARS_OUTHANDLES(sc)	((sc) & 0x0f)
+
+#define REMOTE_SCALARS_LENGTH(sc)	(REMOTE_SCALARS_INBUFS(sc) +   \
+					 REMOTE_SCALARS_OUTBUFS(sc) +  \
+					 REMOTE_SCALARS_INHANDLES(sc)+ \
+					 REMOTE_SCALARS_OUTHANDLES(sc))
+#define FASTRPC_BUILD_SCALARS(attr, method, in, out, oin, oout)  \
+				(((attr & 0x07) << 29) |		\
+				((method & 0x1f) << 24) |	\
+				((in & 0xff) << 16) |		\
+				((out & 0xff) <<  8) |		\
+				((oin & 0x0f) <<  4) |		\
+				(oout & 0x0f))
+
+#define FASTRPC_SCALARS(method, in, out) \
+		FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
+
 #define miscdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, miscdev)
 
 static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
 						"sdsp", "cdsp"};
+struct fastrpc_phy_page {
+	u64 addr;		/* physical address */
+	u64 size;		/* size of contiguous region */
+};
+
+struct fastrpc_invoke_buf {
+	u32 num;		/* number of contiguous regions */
+	u32 pgidx;		/* index to start of contiguous region */
+};
+
+struct fastrpc_remote_arg {
+	u64 pv;
+	u64 len;
+};
+
+struct fastrpc_msg {
+	int pid;		/* process group id */
+	int tid;		/* thread id */
+	u64 ctx;		/* invoke caller context */
+	u32 handle;	/* handle to invoke */
+	u32 sc;		/* scalars structure describing the data */
+	u64 addr;		/* physical address */
+	u64 size;		/* size of contiguous region */
+};
+
+struct fastrpc_invoke_rsp {
+	u64 ctx;		/* invoke caller context */
+	int retval;		/* invoke return value */
+};
+
+struct fastrpc_buf {
+	struct fastrpc_user *fl;
+	struct device *dev;
+	void *virt;
+	u64 phys;
+	u64 size;
+};
+
+struct fastrpc_map {
+	struct list_head node;
+	struct fastrpc_user *fl;
+	int fd;
+	struct dma_buf *buf;
+	struct sg_table *table;
+	struct dma_buf_attachment *attach;
+	u64 phys;
+	u64 size;
+	void *va;
+	u64 len;
+	struct kref refcount;
+};
+
+struct fastrpc_invoke_ctx {
+	int nscalars;
+	int nbufs;
+	int retval;
+	int pid;
+	int tgid;
+	u32 sc;
+	u32 *crc;
+	u64 ctxid;
+	u64 msg_sz;
+	struct kref refcount;
+	struct list_head node; /* list of ctxs */
+	struct completion work;
+	struct fastrpc_msg msg;
+	struct fastrpc_user *fl;
+	struct fastrpc_remote_arg *rpra;
+	struct fastrpc_map **maps;
+	struct fastrpc_buf *buf;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_channel_ctx *cctx;
+};
 
 struct fastrpc_session_ctx {
 	struct device *dev;
@@ -55,6 +162,7 @@ struct fastrpc_user {
 
 	struct fastrpc_channel_ctx *cctx;
 	struct fastrpc_session_ctx *sctx;
+	struct fastrpc_buf *init_mem;
 
 	int tgid;
 	int pd;
@@ -64,6 +172,522 @@ struct fastrpc_user {
 	struct mutex mutex;
 };
 
+static void fastrpc_free_map(struct kref *ref)
+{
+	struct fastrpc_map *map;
+
+	map = container_of(ref, struct fastrpc_map, refcount);
+
+	if (map->table) {
+		dma_buf_unmap_attachment(map->attach, map->table,
+					 DMA_BIDIRECTIONAL);
+		dma_buf_detach(map->buf, map->attach);
+		dma_buf_put(map->buf);
+	}
+
+	kfree(map);
+}
+
+static void fastrpc_map_put(struct fastrpc_map *map)
+{
+	if (map)
+		kref_put(&map->refcount, fastrpc_free_map);
+}
+
+static void fastrpc_map_get(struct fastrpc_map *map)
+{
+	if (map)
+		kref_get(&map->refcount);
+}
+
+static int fastrpc_map_find(struct fastrpc_user *fl, int fd,
+			    struct fastrpc_map **ppmap)
+{
+	struct fastrpc_map *map = NULL;
+
+	mutex_lock(&fl->mutex);
+	list_for_each_entry(map, &fl->maps, node) {
+		if (map->fd == fd) {
+			fastrpc_map_get(map);
+			*ppmap = map;
+			mutex_unlock(&fl->mutex);
+			return 0;
+		}
+	}
+	mutex_unlock(&fl->mutex);
+
+	return -ENOENT;
+}
+
+static void fastrpc_buf_free(struct fastrpc_buf *buf)
+{
+	dma_free_coherent(buf->dev, buf->size, buf->virt,
+			  FASTRPC_PHYS(buf->phys));
+	kfree(buf);
+}
+
+static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+			     u64 size, struct fastrpc_buf **obuf)
+{
+	struct fastrpc_buf *buf;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->fl = fl;
+	buf->virt = NULL;
+	buf->phys = 0;
+	buf->size = size;
+	buf->dev = dev;
+
+	buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys,
+				       GFP_KERNEL);
+	if (!buf->virt)
+		return -ENOMEM;
+
+	if (fl->sctx && fl->sctx->sid)
+		buf->phys += ((u64)fl->sctx->sid << 32);
+
+	*obuf = buf;
+
+	return 0;
+}
+
+static void fastrpc_context_free(struct kref *ref)
+{
+	struct fastrpc_invoke_ctx *ctx;
+	struct fastrpc_channel_ctx *cctx;
+	int i;
+
+	ctx = container_of(ref, struct fastrpc_invoke_ctx, refcount);
+	cctx = ctx->cctx;
+
+	for (i = 0; i < ctx->nscalars; i++)
+		fastrpc_map_put(ctx->maps[i]);
+
+	if (ctx->buf)
+		fastrpc_buf_free(ctx->buf);
+
+	spin_lock(&cctx->lock);
+	idr_remove(&cctx->ctx_idr, ctx->ctxid >> 4);
+	spin_unlock(&cctx->lock);
+
+	kfree(ctx->maps);
+	kfree(ctx);
+}
+
+static void fastrpc_context_get(struct fastrpc_invoke_ctx *ctx)
+{
+	kref_get(&ctx->refcount);
+}
+
+static void fastrpc_context_put(struct fastrpc_invoke_ctx *ctx)
+{
+	kref_put(&ctx->refcount, fastrpc_context_free);
+}
+
+static struct fastrpc_invoke_ctx *fastrpc_context_alloc(
+			struct fastrpc_user *user, u32 kernel, u32 sc,
+			struct fastrpc_invoke_args *args)
+{
+	struct fastrpc_channel_ctx *cctx = user->cctx;
+	struct fastrpc_invoke_ctx *ctx = NULL;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&ctx->node);
+	ctx->fl = user;
+	ctx->nscalars = REMOTE_SCALARS_LENGTH(sc);
+	ctx->nbufs = REMOTE_SCALARS_INBUFS(sc) +
+		     REMOTE_SCALARS_OUTBUFS(sc);
+
+	if (ctx->nscalars) {
+		ctx->maps = kcalloc(ctx->nscalars,
+				    sizeof(*ctx->maps), GFP_KERNEL);
+		if (!ctx->maps) {
+			kfree(ctx);
+			return ERR_PTR(-ENOMEM);
+		}
+		ctx->args = args;
+	}
+
+	ctx->sc = sc;
+	ctx->retval = -1;
+	ctx->pid = current->pid;
+	ctx->tgid = user->tgid;
+	ctx->cctx = cctx;
+	init_completion(&ctx->work);
+
+	spin_lock(&user->lock);
+	list_add_tail(&ctx->node, &user->pending);
+	spin_unlock(&user->lock);
+
+	spin_lock(&cctx->lock);
+	ret = idr_alloc_cyclic(&cctx->ctx_idr, ctx, 1,
+			       FASTRPC_CTX_MAX, GFP_ATOMIC);
+	if (ret < 0) {
+		spin_unlock(&cctx->lock);
+		goto err_idr;
+	}
+	ctx->ctxid = ret << 4;
+	spin_unlock(&cctx->lock);
+
+	kref_init(&ctx->refcount);
+
+	return ctx;
+err_idr:
+	spin_lock(&user->lock);
+	list_del(&ctx->node);
+	spin_unlock(&user->lock);
+	kfree(ctx->maps);
+	kfree(ctx);
+
+	return ERR_PTR(ret);
+}
+
+static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
+			      u64 len, struct fastrpc_map **ppmap)
+{
+	struct fastrpc_session_ctx *sess = fl->sctx;
+	struct fastrpc_map *map = NULL;
+	int err = 0;
+
+	if (!fastrpc_map_find(fl, fd, ppmap))
+		return 0;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&map->node);
+	map->fl = fl;
+	map->fd = fd;
+	map->buf = dma_buf_get(fd);
+	if (!map->buf) {
+		err = -EINVAL;
+		goto get_err;
+	}
+
+	map->attach = dma_buf_attach(map->buf, sess->dev);
+	if (IS_ERR(map->attach)) {
+		dev_err(sess->dev, "Failed to attach dmabuf\n");
+		err = PTR_ERR(map->attach);
+		goto attach_err;
+	}
+
+	map->table = dma_buf_map_attachment(map->attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(map->table)) {
+		err = PTR_ERR(map->table);
+		goto map_err;
+	}
+
+	map->phys = sg_dma_address(map->table->sgl);
+	map->phys += ((u64)fl->sctx->sid << 32);
+	map->size = len;
+	map->va = sg_virt(map->table->sgl);
+	map->len = len;
+	kref_init(&map->refcount);
+
+	spin_lock(&fl->lock);
+	list_add_tail(&map->node, &fl->maps);
+	spin_unlock(&fl->lock);
+	*ppmap = map;
+
+	return 0;
+
+map_err:
+	dma_buf_detach(map->buf, map->attach);
+attach_err:
+	dma_buf_put(map->buf);
+get_err:
+	kfree(map);
+
+	return err;
+}
+
+/*
+ * Fastrpc payload buffer with metadata looks like:
+ *
+ * >>>>>>  START of METADATA <<<<<<<<<
+ * +---------------------------------+
+ * |           Arguments             |
+ * | type:(struct fastrpc_remote_arg)|
+ * |             (0 - N)             |
+ * +---------------------------------+
+ * |         Invoke Buffer list      |
+ * | type:(struct fastrpc_invoke_buf)|
+ * |           (0 - N)               |
+ * +---------------------------------+
+ * |         Page info list          |
+ * | type:(struct fastrpc_phy_page)  |
+ * |             (0 - N)             |
+ * +---------------------------------+
+ * |         Optional info           |
+ * |(can be specific to SoC/Firmware)|
+ * +---------------------------------+
+ * >>>>>>>>  END of METADATA <<<<<<<<<
+ * +---------------------------------+
+ * |         Inline ARGS             |
+ * |            (0-N)                |
+ * +---------------------------------+
+ */
+
+static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx)
+{
+	int size = 0;
+
+	size = (sizeof(struct fastrpc_remote_arg) +
+		sizeof(struct fastrpc_invoke_buf) +
+		sizeof(struct fastrpc_phy_page)) * ctx->nscalars +
+		sizeof(u64) * FASTRPC_MAX_FDLIST +
+		sizeof(u32) * FASTRPC_MAX_CRCLIST;
+
+	return size;
+}
+
+static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen)
+{
+	u64 size = 0;
+	int i;
+
+	size = ALIGN(metalen, FASTRPC_ALIGN);
+	for (i = 0; i < ctx->nscalars; i++) {
+		if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) {
+			size = ALIGN(size, FASTRPC_ALIGN);
+			size += ctx->args[i].length;
+		}
+	}
+
+	return size;
+}
+
+static int fastrpc_create_maps(struct fastrpc_invoke_ctx *ctx)
+{
+	struct device *dev = ctx->fl->sctx->dev;
+	int i, err;
+
+	for (i = 0; i < ctx->nscalars; ++i) {
+		/* Make sure reserved field is set to 0 */
+		if (ctx->args[i].reserved)
+			return -EINVAL;
+
+		if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1 ||
+		    ctx->args[i].length == 0)
+			continue;
+
+		err = fastrpc_map_create(ctx->fl, ctx->args[i].fd,
+					 ctx->args[i].length, &ctx->maps[i]);
+		if (err) {
+			dev_err(dev, "Error Creating map %d\n", err);
+			return -EINVAL;
+		}
+
+	}
+	return 0;
+}
+
+static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx)
+{
+	struct device *dev = ctx->fl->sctx->dev;
+	struct fastrpc_remote_arg *rpra;
+	struct fastrpc_invoke_buf *list;
+	struct fastrpc_phy_page *pages;
+	int inbufs, i, err = 0;
+	u64 rlen, pkt_size;
+	uintptr_t args;
+	int metalen;
+
+
+	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
+	metalen = fastrpc_get_meta_size(ctx);
+	pkt_size = fastrpc_get_payload_size(ctx, metalen);
+
+	err = fastrpc_create_maps(ctx);
+	if (err)
+		return err;
+
+	ctx->msg_sz = pkt_size;
+
+	err = fastrpc_buf_alloc(ctx->fl, dev, pkt_size, &ctx->buf);
+	if (err)
+		return err;
+
+	rpra = ctx->buf->virt;
+	list = ctx->buf->virt + ctx->nscalars * sizeof(*rpra);
+	pages = ctx->buf->virt + ctx->nscalars * (sizeof(*list) +
+		sizeof(*rpra));
+	args = (uintptr_t)ctx->buf->virt + metalen;
+	rlen = pkt_size - metalen;
+	ctx->rpra = rpra;
+
+	for (i = 0; i < ctx->nbufs; ++i) {
+		u64 len = ctx->args[i].length;
+
+		rpra[i].pv = 0;
+		rpra[i].len = len;
+		list[i].num = len ? 1 : 0;
+		list[i].pgidx = i;
+
+		if (!len)
+			continue;
+
+		pages[i].size = roundup(len, PAGE_SIZE);
+
+		if (ctx->maps[i]) {
+			rpra[i].pv = (u64) ctx->args[i].ptr;
+			pages[i].addr = ctx->maps[i]->phys;
+		} else {
+			rlen -= ALIGN(args, FASTRPC_ALIGN) - args;
+			args = ALIGN(args, FASTRPC_ALIGN);
+			if (rlen < len)
+				goto bail;
+
+			rpra[i].pv = args;
+			pages[i].addr = ctx->buf->phys + (pkt_size - rlen);
+			pages[i].addr = pages[i].addr &	PAGE_MASK;
+			args = args + len;
+			rlen -= len;
+		}
+
+		if (i < inbufs && !ctx->maps[i]) {
+			void *dst = (void *)(uintptr_t)rpra[i].pv;
+			void *src = (void *)(uintptr_t)ctx->args[i].ptr;
+
+			if (!kernel) {
+				if (copy_from_user(dst, (void __user *)src,
+						   len)) {
+					err = -EFAULT;
+					goto bail;
+				}
+			} else {
+				memcpy(dst, src, len);
+			}
+		}
+	}
+
+	for (i = ctx->nbufs; i < ctx->nscalars; ++i) {
+		rpra[i].pv = (u64) ctx->args[i].ptr;
+		rpra[i].len = ctx->args[i].length;
+		list[i].num = ctx->args[i].length ? 1 : 0;
+		list[i].pgidx = i;
+		pages[i].addr = ctx->maps[i]->phys;
+		pages[i].size = ctx->maps[i]->size;
+	}
+
+bail:
+	if (err)
+		dev_err(dev, "Error: get invoke args failed:%d\n", err);
+
+	return err;
+}
+
+static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx,
+			    u32 kernel)
+{
+	struct fastrpc_remote_arg *rpra = ctx->rpra;
+	int i, inbufs;
+
+	inbufs = REMOTE_SCALARS_INBUFS(ctx->sc);
+
+	for (i = inbufs; i < ctx->nbufs; ++i) {
+		void *src = (void *)(uintptr_t)rpra[i].pv;
+		void *dst = (void *)(uintptr_t)ctx->args[i].ptr;
+		u64 len = rpra[i].len;
+
+		if (!kernel) {
+			if (copy_to_user((void __user *)dst, src, len))
+				return -EFAULT;
+		} else {
+			memcpy(dst, src, len);
+		}
+	}
+
+	return 0;
+}
+
+static int fastrpc_invoke_send(struct fastrpc_session_ctx *sctx,
+			       struct fastrpc_invoke_ctx *ctx,
+			       u32 kernel, uint32_t handle)
+{
+	struct fastrpc_channel_ctx *cctx;
+	struct fastrpc_user *fl = ctx->fl;
+	struct fastrpc_msg *msg = &ctx->msg;
+
+	cctx = fl->cctx;
+	msg->pid = fl->tgid;
+	msg->tid = current->pid;
+
+	if (kernel)
+		msg->pid = 0;
+
+	msg->ctx = ctx->ctxid | fl->pd;
+	msg->handle = handle;
+	msg->sc = ctx->sc;
+	msg->addr = ctx->buf ? ctx->buf->phys : 0;
+	msg->size = roundup(ctx->msg_sz, PAGE_SIZE);
+	fastrpc_context_get(ctx);
+
+	return rpmsg_send(cctx->rpdev->ept, (void *)msg, sizeof(*msg));
+}
+
+static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
+				   u32 handle, u32 sc,
+				   struct fastrpc_invoke_args *args)
+{
+	struct fastrpc_invoke_ctx *ctx = NULL;
+	int err = 0;
+
+	if (!fl->sctx)
+		return -EINVAL;
+
+	ctx = fastrpc_context_alloc(fl, kernel, sc, args);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	if (ctx->nscalars) {
+		err = fastrpc_get_args(kernel, ctx);
+		if (err)
+			goto bail;
+	}
+	/* Send invoke buffer to remote dsp */
+	err = fastrpc_invoke_send(fl->sctx, ctx, kernel, handle);
+	if (err)
+		goto bail;
+
+	/* Wait for remote dsp to respond or time out */
+	err = wait_for_completion_interruptible(&ctx->work);
+	if (err)
+		goto bail;
+
+	/* Check the response from remote dsp */
+	err = ctx->retval;
+	if (err)
+		goto bail;
+
+	if (ctx->nscalars) {
+		/* populate all the output buffers with results */
+		err = fastrpc_put_args(ctx, kernel);
+		if (err)
+			goto bail;
+	}
+
+bail:
+	/* We are done with this compute context, remove it from pending list */
+	spin_lock(&fl->lock);
+	list_del(&ctx->node);
+	spin_unlock(&fl->lock);
+	fastrpc_context_put(ctx);
+
+	if (err)
+		dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err);
+
+	return err;
+}
+
 static struct fastrpc_session_ctx *fastrpc_session_alloc(
 					struct fastrpc_channel_ctx *cctx)
 {
@@ -95,11 +719,26 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
 {
 	struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
 	struct fastrpc_channel_ctx *cctx = fl->cctx;
+	struct fastrpc_invoke_ctx *ctx, *n;
+	struct fastrpc_map *map, *m;
 
 	spin_lock(&cctx->lock);
 	list_del(&fl->user);
 	spin_unlock(&cctx->lock);
 
+	if (fl->init_mem)
+		fastrpc_buf_free(fl->init_mem);
+
+	list_for_each_entry_safe(ctx, n, &fl->pending, node) {
+		list_del(&ctx->node);
+		fastrpc_context_put(ctx);
+	}
+
+	list_for_each_entry_safe(map, m, &fl->maps, node) {
+		list_del(&map->node);
+		fastrpc_map_put(map);
+	}
+
 	fastrpc_session_free(cctx, fl->sctx);
 
 	mutex_destroy(&fl->mutex);
@@ -134,9 +773,60 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_invoke_args *args = NULL;
+	struct fastrpc_invoke inv;
+	u32 nscalars;
+	int err;
+
+	if (copy_from_user(&inv, argp, sizeof(inv)))
+		return -EFAULT;
+
+	/* nscalars is truncated here to max supported value */
+	nscalars = REMOTE_SCALARS_LENGTH(inv.sc);
+	if (nscalars) {
+		args = kcalloc(nscalars, sizeof(*args), GFP_KERNEL);
+		if (!args)
+			return -ENOMEM;
+
+		if (copy_from_user(args, (void __user *)(uintptr_t)inv.args,
+				   nscalars * sizeof(*args))) {
+			kfree(args);
+			return -EFAULT;
+		}
+	}
+
+	err = fastrpc_internal_invoke(fl, false, inv.handle, inv.sc, args);
+	kfree(args);
+
+	return err;
+}
+
+static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
+	char __user *argp = (char __user *)arg;
+	int err;
+
+	switch (cmd) {
+	case FASTRPC_IOCTL_INVOKE:
+		err = fastrpc_invoke(fl, argp);
+		break;
+	default:
+		err = -ENOTTY;
+		break;
+	}
+
+	return err;
+}
+
 static const struct file_operations fastrpc_fops = {
 	.open = fastrpc_device_open,
 	.release = fastrpc_device_release,
+	.unlocked_ioctl = fastrpc_device_ioctl,
+	.compat_ioctl = fastrpc_device_ioctl,
 };
 
 static int fastrpc_cb_probe(struct platform_device *pdev)
@@ -260,9 +950,25 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
 	return of_platform_populate(rdev->of_node, NULL, NULL, rdev);
 }
 
+static void fastrpc_notify_users(struct fastrpc_user *user)
+{
+	struct fastrpc_invoke_ctx *ctx;
+
+	spin_lock(&user->lock);
+	list_for_each_entry(ctx, &user->pending, node)
+		complete(&ctx->work);
+	spin_unlock(&user->lock);
+}
+
 static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
 {
 	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+	struct fastrpc_user *user;
+
+	spin_lock(&cctx->lock);
+	list_for_each_entry(user, &cctx->users, user)
+		fastrpc_notify_users(user);
+	spin_unlock(&cctx->lock);
 
 	misc_deregister(&cctx->miscdev);
 	of_platform_depopulate(&rpdev->dev);
@@ -272,6 +978,30 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
 static int fastrpc_rpmsg_callback(struct rpmsg_device *rpdev, void *data,
 				  int len, void *priv, u32 addr)
 {
+	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+	struct fastrpc_invoke_rsp *rsp = data;
+	struct fastrpc_invoke_ctx *ctx;
+	unsigned long flags;
+	unsigned long ctxid;
+
+	if (len < sizeof(*rsp))
+		return -EINVAL;
+
+	ctxid = ((rsp->ctx & FASTRPC_CTXID_MASK) >> 4);
+
+	spin_lock_irqsave(&cctx->lock, flags);
+	ctx = idr_find(&cctx->ctx_idr, ctxid);
+	spin_unlock_irqrestore(&cctx->lock, flags);
+
+	if (!ctx) {
+		dev_err(&rpdev->dev, "No context ID matches response\n");
+		return -ENOENT;
+	}
+
+	ctx->retval = rsp->retval;
+	complete(&ctx->work);
+	fastrpc_context_put(ctx);
+
 	return 0;
 }
 
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
new file mode 100644
index 000000000000..a69ef33dc37e
--- /dev/null
+++ b/include/uapi/misc/fastrpc.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __QCOM_FASTRPC_H__
+#define __QCOM_FASTRPC_H__
+
+#include <linux/types.h>
+
+#define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
+
+struct fastrpc_invoke_args {
+	__u64 ptr;
+	__u64 length;
+	__s32 fd;
+	__u32 reserved;
+};
+
+struct fastrpc_invoke {
+	__u32 handle;
+	__u32 sc;
+	__u64 args;
+};
+
+#endif /* __QCOM_FASTRPC_H__ */
-- 
cgit v1.2.3


From d73f71c7c6ee1583c08c214c8f7b20d841490b36 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 8 Feb 2019 17:11:26 +0000
Subject: misc: fastrpc: Add support for create remote init process

This patch adds support to create or attach remote shell process.
The shell process called fastrpc_shell_0 is usually loaded on the DSP
when a user process is spawned.

Most of the work is derived from various downstream Qualcomm kernels.
Credits to various Qualcomm authors who have contributed to this code.
Specially Tharun Kumar Merugu <mtharu@codeaurora.org>

Co-developed-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c      | 156 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/fastrpc.h |  10 +++
 2 files changed, 166 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index cd69f8b308f6..ceb498487569 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -29,7 +29,10 @@
 #define FASTRPC_MAX_CRCLIST	64
 #define FASTRPC_PHYS(p)	((p) & 0xffffffff)
 #define FASTRPC_CTX_MAX (256)
+#define FASTRPC_INIT_HANDLE	1
 #define FASTRPC_CTXID_MASK (0xFF0)
+#define INIT_FILELEN_MAX (2 * 1024 * 1024)
+#define INIT_MEMLEN_MAX  (8 * 1024 * 1024)
 #define FASTRPC_DEVICE_NAME	"fastrpc"
 
 /* Retrives number of input buffers from the scalars parameter */
@@ -59,6 +62,14 @@
 #define FASTRPC_SCALARS(method, in, out) \
 		FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
 
+#define FASTRPC_CREATE_PROCESS_NARGS	6
+/* Remote Method id table */
+#define FASTRPC_RMID_INIT_ATTACH	0
+#define FASTRPC_RMID_INIT_RELEASE	1
+#define FASTRPC_RMID_INIT_CREATE	6
+#define FASTRPC_RMID_INIT_CREATE_ATTR	7
+#define FASTRPC_RMID_INIT_CREATE_STATIC	8
+
 #define miscdev_to_cctx(d) container_of(d, struct fastrpc_channel_ctx, miscdev)
 
 static const char *domains[FASTRPC_DEV_MAX] = { "adsp", "mdsp",
@@ -688,6 +699,109 @@ bail:
 	return err;
 }
 
+static int fastrpc_init_create_process(struct fastrpc_user *fl,
+					char __user *argp)
+{
+	struct fastrpc_init_create init;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_phy_page pages[1];
+	struct fastrpc_map *map = NULL;
+	struct fastrpc_buf *imem = NULL;
+	int memlen;
+	int err;
+	struct {
+		int pgid;
+		u32 namelen;
+		u32 filelen;
+		u32 pageslen;
+		u32 attrs;
+		u32 siglen;
+	} inbuf;
+	u32 sc;
+
+	args = kcalloc(FASTRPC_CREATE_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	if (copy_from_user(&init, argp, sizeof(init))) {
+		err = -EFAULT;
+		goto bail;
+	}
+
+	if (init.filelen > INIT_FILELEN_MAX) {
+		err = -EINVAL;
+		goto bail;
+	}
+
+	inbuf.pgid = fl->tgid;
+	inbuf.namelen = strlen(current->comm) + 1;
+	inbuf.filelen = init.filelen;
+	inbuf.pageslen = 1;
+	inbuf.attrs = init.attrs;
+	inbuf.siglen = init.siglen;
+	fl->pd = 1;
+
+	if (init.filelen && init.filefd) {
+		err = fastrpc_map_create(fl, init.filefd, init.filelen, &map);
+		if (err)
+			goto bail;
+	}
+
+	memlen = ALIGN(max(INIT_FILELEN_MAX, (int)init.filelen * 4),
+		       1024 * 1024);
+	err = fastrpc_buf_alloc(fl, fl->sctx->dev, memlen,
+				&imem);
+	if (err) {
+		fastrpc_map_put(map);
+		goto bail;
+	}
+
+	fl->init_mem = imem;
+	args[0].ptr = (u64)(uintptr_t)&inbuf;
+	args[0].length = sizeof(inbuf);
+	args[0].fd = -1;
+
+	args[1].ptr = (u64)(uintptr_t)current->comm;
+	args[1].length = inbuf.namelen;
+	args[1].fd = -1;
+
+	args[2].ptr = (u64) init.file;
+	args[2].length = inbuf.filelen;
+	args[2].fd = init.filefd;
+
+	pages[0].addr = imem->phys;
+	pages[0].size = imem->size;
+
+	args[3].ptr = (u64)(uintptr_t) pages;
+	args[3].length = 1 * sizeof(*pages);
+	args[3].fd = -1;
+
+	args[4].ptr = (u64)(uintptr_t)&inbuf.attrs;
+	args[4].length = sizeof(inbuf.attrs);
+	args[4].fd = -1;
+
+	args[5].ptr = (u64)(uintptr_t) &inbuf.siglen;
+	args[5].length = sizeof(inbuf.siglen);
+	args[5].fd = -1;
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0);
+	if (init.attrs)
+		sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 6, 0);
+
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				      sc, args);
+
+	if (err) {
+		fastrpc_map_put(map);
+		fastrpc_buf_free(imem);
+	}
+
+bail:
+	kfree(args);
+
+	return err;
+}
+
 static struct fastrpc_session_ctx *fastrpc_session_alloc(
 					struct fastrpc_channel_ctx *cctx)
 {
@@ -715,6 +829,23 @@ static void fastrpc_session_free(struct fastrpc_channel_ctx *cctx,
 	spin_unlock(&cctx->lock);
 }
 
+static int fastrpc_release_current_dsp_process(struct fastrpc_user *fl)
+{
+	struct fastrpc_invoke_args args[1];
+	int tgid = 0;
+	u32 sc;
+
+	tgid = fl->tgid;
+	args[0].ptr = (u64)(uintptr_t) &tgid;
+	args[0].length = sizeof(tgid);
+	args[0].fd = -1;
+	args[0].reserved = 0;
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_RELEASE, 1, 0);
+
+	return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				       sc, &args[0]);
+}
+
 static int fastrpc_device_release(struct inode *inode, struct file *file)
 {
 	struct fastrpc_user *fl = (struct fastrpc_user *)file->private_data;
@@ -722,6 +853,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
 	struct fastrpc_invoke_ctx *ctx, *n;
 	struct fastrpc_map *map, *m;
 
+	fastrpc_release_current_dsp_process(fl);
+
 	spin_lock(&cctx->lock);
 	list_del(&fl->user);
 	spin_unlock(&cctx->lock);
@@ -773,6 +906,23 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int fastrpc_init_attach(struct fastrpc_user *fl)
+{
+	struct fastrpc_invoke_args args[1];
+	int tgid = fl->tgid;
+	u32 sc;
+
+	args[0].ptr = (u64)(uintptr_t) &tgid;
+	args[0].length = sizeof(tgid);
+	args[0].fd = -1;
+	args[0].reserved = 0;
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_ATTACH, 1, 0);
+	fl->pd = 0;
+
+	return fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				       sc, &args[0]);
+}
+
 static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
 {
 	struct fastrpc_invoke_args *args = NULL;
@@ -814,6 +964,12 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 	case FASTRPC_IOCTL_INVOKE:
 		err = fastrpc_invoke(fl, argp);
 		break;
+	case FASTRPC_IOCTL_INIT_ATTACH:
+		err = fastrpc_init_attach(fl);
+		break;
+	case FASTRPC_IOCTL_INIT_CREATE:
+		err = fastrpc_init_create_process(fl, argp);
+		break;
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index a69ef33dc37e..32d191c3b7bc 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -6,6 +6,8 @@
 #include <linux/types.h>
 
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
+#define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
+#define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
 
 struct fastrpc_invoke_args {
 	__u64 ptr;
@@ -20,4 +22,12 @@ struct fastrpc_invoke {
 	__u64 args;
 };
 
+struct fastrpc_init_create {
+	__u32 filelen;	/* elf file length */
+	__s32 filefd;	/* fd for the file */
+	__u32 attrs;
+	__u32 siglen;
+	__u64 file;	/* pointer to elf file */
+};
+
 #endif /* __QCOM_FASTRPC_H__ */
-- 
cgit v1.2.3


From 6cffd79504ce040f460831030d3069fa1c99bb71 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Fri, 8 Feb 2019 17:11:27 +0000
Subject: misc: fastrpc: Add support for dmabuf exporter

User process can involve dealing with big buffer sizes, and also passing
buffers from one compute context bank to other compute context bank for
complex dsp algorithms.

This patch adds support to fastrpc to make it a proper dmabuf exporter
to avoid making copies of buffers.

Co-developed-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Thierry Escande <thierry.escande@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c      | 184 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/fastrpc.h |   8 ++
 2 files changed, 192 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index ceb498487569..4b0db33896df 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -106,10 +106,20 @@ struct fastrpc_invoke_rsp {
 
 struct fastrpc_buf {
 	struct fastrpc_user *fl;
+	struct dma_buf *dmabuf;
 	struct device *dev;
 	void *virt;
 	u64 phys;
 	u64 size;
+	/* Lock for dma buf attachments */
+	struct mutex lock;
+	struct list_head attachments;
+};
+
+struct fastrpc_dma_buf_attachment {
+	struct device *dev;
+	struct sg_table sgt;
+	struct list_head node;
 };
 
 struct fastrpc_map {
@@ -246,6 +256,9 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
 	if (!buf)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&buf->attachments);
+	mutex_init(&buf->lock);
+
 	buf->fl = fl;
 	buf->virt = NULL;
 	buf->phys = 0;
@@ -360,6 +373,111 @@ err_idr:
 	return ERR_PTR(ret);
 }
 
+static struct sg_table *
+fastrpc_map_dma_buf(struct dma_buf_attachment *attachment,
+		    enum dma_data_direction dir)
+{
+	struct fastrpc_dma_buf_attachment *a = attachment->priv;
+	struct sg_table *table;
+
+	table = &a->sgt;
+
+	if (!dma_map_sg(attachment->dev, table->sgl, table->nents, dir))
+		return ERR_PTR(-ENOMEM);
+
+	return table;
+}
+
+static void fastrpc_unmap_dma_buf(struct dma_buf_attachment *attach,
+				  struct sg_table *table,
+				  enum dma_data_direction dir)
+{
+	dma_unmap_sg(attach->dev, table->sgl, table->nents, dir);
+}
+
+static void fastrpc_release(struct dma_buf *dmabuf)
+{
+	struct fastrpc_buf *buffer = dmabuf->priv;
+
+	fastrpc_buf_free(buffer);
+}
+
+static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attachment)
+{
+	struct fastrpc_dma_buf_attachment *a;
+	struct fastrpc_buf *buffer = dmabuf->priv;
+	int ret;
+
+	a = kzalloc(sizeof(*a), GFP_KERNEL);
+	if (!a)
+		return -ENOMEM;
+
+	ret = dma_get_sgtable(buffer->dev, &a->sgt, buffer->virt,
+			      FASTRPC_PHYS(buffer->phys), buffer->size);
+	if (ret < 0) {
+		dev_err(buffer->dev, "failed to get scatterlist from DMA API\n");
+		return -EINVAL;
+	}
+
+	a->dev = attachment->dev;
+	INIT_LIST_HEAD(&a->node);
+	attachment->priv = a;
+
+	mutex_lock(&buffer->lock);
+	list_add(&a->node, &buffer->attachments);
+	mutex_unlock(&buffer->lock);
+
+	return 0;
+}
+
+static void fastrpc_dma_buf_detatch(struct dma_buf *dmabuf,
+				    struct dma_buf_attachment *attachment)
+{
+	struct fastrpc_dma_buf_attachment *a = attachment->priv;
+	struct fastrpc_buf *buffer = dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+	list_del(&a->node);
+	mutex_unlock(&buffer->lock);
+	kfree(a);
+}
+
+static void *fastrpc_kmap(struct dma_buf *dmabuf, unsigned long pgnum)
+{
+	struct fastrpc_buf *buf = dmabuf->priv;
+
+	return buf->virt ? buf->virt + pgnum * PAGE_SIZE : NULL;
+}
+
+static void *fastrpc_vmap(struct dma_buf *dmabuf)
+{
+	struct fastrpc_buf *buf = dmabuf->priv;
+
+	return buf->virt;
+}
+
+static int fastrpc_mmap(struct dma_buf *dmabuf,
+			struct vm_area_struct *vma)
+{
+	struct fastrpc_buf *buf = dmabuf->priv;
+	size_t size = vma->vm_end - vma->vm_start;
+
+	return dma_mmap_coherent(buf->dev, vma, buf->virt,
+				 FASTRPC_PHYS(buf->phys), size);
+}
+
+static const struct dma_buf_ops fastrpc_dma_buf_ops = {
+	.attach = fastrpc_dma_buf_attach,
+	.detach = fastrpc_dma_buf_detatch,
+	.map_dma_buf = fastrpc_map_dma_buf,
+	.unmap_dma_buf = fastrpc_unmap_dma_buf,
+	.mmap = fastrpc_mmap,
+	.map = fastrpc_kmap,
+	.vmap = fastrpc_vmap,
+	.release = fastrpc_release,
+};
+
 static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
 			      u64 len, struct fastrpc_map **ppmap)
 {
@@ -906,6 +1024,66 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int fastrpc_dmabuf_free(struct fastrpc_user *fl, char __user *argp)
+{
+	struct dma_buf *buf;
+	int info;
+
+	if (copy_from_user(&info, argp, sizeof(info)))
+		return -EFAULT;
+
+	buf = dma_buf_get(info);
+	if (IS_ERR_OR_NULL(buf))
+		return -EINVAL;
+	/*
+	 * one for the last get and other for the ALLOC_DMA_BUFF ioctl
+	 */
+	dma_buf_put(buf);
+	dma_buf_put(buf);
+
+	return 0;
+}
+
+static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_alloc_dma_buf bp;
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct fastrpc_buf *buf = NULL;
+	int err;
+
+	if (copy_from_user(&bp, argp, sizeof(bp)))
+		return -EFAULT;
+
+	err = fastrpc_buf_alloc(fl, fl->sctx->dev, bp.size, &buf);
+	if (err)
+		return err;
+	exp_info.ops = &fastrpc_dma_buf_ops;
+	exp_info.size = bp.size;
+	exp_info.flags = O_RDWR;
+	exp_info.priv = buf;
+	buf->dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(buf->dmabuf)) {
+		err = PTR_ERR(buf->dmabuf);
+		fastrpc_buf_free(buf);
+		return err;
+	}
+
+	bp.fd = dma_buf_fd(buf->dmabuf, O_ACCMODE);
+	if (bp.fd < 0) {
+		dma_buf_put(buf->dmabuf);
+		return -EINVAL;
+	}
+
+	if (copy_to_user(argp, &bp, sizeof(bp))) {
+		dma_buf_put(buf->dmabuf);
+		return -EFAULT;
+	}
+
+	get_dma_buf(buf->dmabuf);
+
+	return 0;
+}
+
 static int fastrpc_init_attach(struct fastrpc_user *fl)
 {
 	struct fastrpc_invoke_args args[1];
@@ -970,6 +1148,12 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 	case FASTRPC_IOCTL_INIT_CREATE:
 		err = fastrpc_init_create_process(fl, argp);
 		break;
+	case FASTRPC_IOCTL_FREE_DMA_BUFF:
+		err = fastrpc_dmabuf_free(fl, argp);
+		break;
+	case FASTRPC_IOCTL_ALLOC_DMA_BUFF:
+		err = fastrpc_dmabuf_alloc(fl, argp);
+		break;
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 32d191c3b7bc..6d701af9fc42 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -5,6 +5,8 @@
 
 #include <linux/types.h>
 
+#define FASTRPC_IOCTL_ALLOC_DMA_BUFF	_IOWR('R', 1, struct fastrpc_alloc_dma_buf)
+#define FASTRPC_IOCTL_FREE_DMA_BUFF	_IOWR('R', 2, __u32)
 #define FASTRPC_IOCTL_INVOKE		_IOWR('R', 3, struct fastrpc_invoke)
 #define FASTRPC_IOCTL_INIT_ATTACH	_IO('R', 4)
 #define FASTRPC_IOCTL_INIT_CREATE	_IOWR('R', 5, struct fastrpc_init_create)
@@ -30,4 +32,10 @@ struct fastrpc_init_create {
 	__u64 file;	/* pointer to elf file */
 };
 
+struct fastrpc_alloc_dma_buf {
+	__s32 fd;	/* fd */
+	__u32 flags;	/* flags to map with */
+	__u64 size;	/* size */
+};
+
 #endif /* __QCOM_FASTRPC_H__ */
-- 
cgit v1.2.3


From e178df31cf41ba7cd63f7830bd02fd918d16592d Mon Sep 17 00:00:00 2001
From: Jolly Shah <jolly.shah@xilinx.com>
Date: Tue, 29 Jan 2019 12:38:20 -0800
Subject: firmware: xilinx: Implement ZynqMP power management APIs

Add Xilinx ZynqMP firmware APIs to set suspend mode
and inform firmware that master has initialized its
own power management.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Jolly Shah <jolly.shah@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 29 +++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 20 ++++++++++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 16a23bc4c2c3..765a2ca1b100 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -530,6 +530,33 @@ static int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset,
 	return ret;
 }
 
+/**
+ * zynqmp_pm_init_finalize() - PM call to inform firmware that the caller
+ *			       master has initialized its own power management
+ *
+ * This API function is to be used for notify the power management controller
+ * about the completed power management initialization.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_init_finalize(void)
+{
+	return zynqmp_pm_invoke_fn(PM_PM_INIT_FINALIZE, 0, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_set_suspend_mode()	- Set system suspend mode
+ * @mode:	Mode to set for system suspend
+ *
+ * This API function is used to set mode of system suspend.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_set_suspend_mode(u32 mode)
+{
+	return zynqmp_pm_invoke_fn(PM_SET_SUSPEND_MODE, mode, 0, 0, 0, NULL);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.get_chipid = zynqmp_pm_get_chipid,
@@ -546,6 +573,8 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.ioctl = zynqmp_pm_ioctl,
 	.reset_assert = zynqmp_pm_reset_assert,
 	.reset_get_status = zynqmp_pm_reset_get_status,
+	.init_finalize = zynqmp_pm_init_finalize,
+	.set_suspend_mode = zynqmp_pm_set_suspend_mode,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 5a1f19848100..56b2108a2148 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -28,14 +28,23 @@
 /* SMC SIP service Call Function Identifier Prefix */
 #define PM_SIP_SVC			0xC2000000
 #define PM_GET_TRUSTZONE_VERSION	0xa03
+#define PM_SET_SUSPEND_MODE		0xa02
+#define GET_CALLBACK_DATA		0xa01
 
 /* Number of 32bits values in payload */
 #define PAYLOAD_ARG_CNT	4U
 
+/* Number of arguments for a callback */
+#define CB_ARG_CNT     4
+
+/* Payload size (consists of callback API ID + arguments) */
+#define CB_PAYLOAD_SIZE (CB_ARG_CNT + 1)
+
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
 	PM_RESET_ASSERT = 17,
 	PM_RESET_GET_STATUS,
+	PM_PM_INIT_FINALIZE = 21,
 	PM_GET_CHIPID = 24,
 	PM_IOCTL = 34,
 	PM_QUERY_DATA,
@@ -209,6 +218,12 @@ enum zynqmp_pm_reset {
 	ZYNQMP_PM_RESET_END = ZYNQMP_PM_RESET_PS_PL3
 };
 
+enum zynqmp_pm_suspend_reason {
+	SUSPEND_POWER_REQUEST = 201,
+	SUSPEND_ALERT,
+	SUSPEND_SYSTEM_SHUTDOWN,
+};
+
 /**
  * struct zynqmp_pm_query_data - PM query data
  * @qid:	query ID
@@ -240,8 +255,13 @@ struct zynqmp_eemi_ops {
 	int (*reset_assert)(const enum zynqmp_pm_reset reset,
 			    const enum zynqmp_pm_reset_action assert_flag);
 	int (*reset_get_status)(const enum zynqmp_pm_reset reset, u32 *status);
+	int (*init_finalize)(void);
+	int (*set_suspend_mode)(u32 mode);
 };
 
+int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
+			u32 arg2, u32 arg3, u32 *ret_payload);
+
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
 const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void);
 #else
-- 
cgit v1.2.3


From 8fd27fb4cf76a64d4ea4814b358541477dc7a31e Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@xilinx.com>
Date: Fri, 1 Feb 2019 14:08:48 -0800
Subject: dt-bindings: power: Add ZynqMP power domain bindings

Add documentation to describe ZynqMP power domain bindings.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Jolly Shah <jolly.shah@xilinx.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 .../bindings/power/xlnx,zynqmp-genpd.txt           | 34 +++++++++++++++++++
 include/dt-bindings/power/xlnx-zynqmp-power.h      | 39 ++++++++++++++++++++++
 2 files changed, 73 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
 create mode 100644 include/dt-bindings/power/xlnx-zynqmp-power.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt b/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
new file mode 100644
index 000000000000..8d1b8200ebd0
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/xlnx,zynqmp-genpd.txt
@@ -0,0 +1,34 @@
+-----------------------------------------------------------
+Device Tree Bindings for the Xilinx Zynq MPSoC PM domains
+-----------------------------------------------------------
+The binding for zynqmp-power-controller follow the common
+generic PM domain binding[1].
+
+[1] Documentation/devicetree/bindings/power/power_domain.txt
+
+== Zynq MPSoC Generic PM Domain Node ==
+
+Required property:
+ - Below property should be in zynqmp-firmware node.
+ - #power-domain-cells:	Number of cells in a PM domain specifier. Must be 1.
+
+Power domain ID indexes are mentioned in
+include/dt-bindings/power/xlnx-zynqmp-power.h.
+
+-------
+Example
+-------
+
+firmware {
+	zynqmp_firmware: zynqmp-firmware {
+		...
+		#power-domain-cells = <1>;
+		...
+	};
+};
+
+sata {
+	...
+	power-domains = <&zynqmp_firmware 28>;
+	...
+};
diff --git a/include/dt-bindings/power/xlnx-zynqmp-power.h b/include/dt-bindings/power/xlnx-zynqmp-power.h
new file mode 100644
index 000000000000..0d9a412fd5e0
--- /dev/null
+++ b/include/dt-bindings/power/xlnx-zynqmp-power.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2018 Xilinx, Inc.
+ */
+
+#ifndef _DT_BINDINGS_ZYNQMP_POWER_H
+#define _DT_BINDINGS_ZYNQMP_POWER_H
+
+#define		PD_USB_0	22
+#define		PD_USB_1	23
+#define		PD_TTC_0	24
+#define		PD_TTC_1	25
+#define		PD_TTC_2	26
+#define		PD_TTC_3	27
+#define		PD_SATA		28
+#define		PD_ETH_0	29
+#define		PD_ETH_1	30
+#define		PD_ETH_2	31
+#define		PD_ETH_3	32
+#define		PD_UART_0	33
+#define		PD_UART_1	34
+#define		PD_SPI_0	35
+#define		PD_SPI_1	36
+#define		PD_I2C_0	37
+#define		PD_I2C_1	38
+#define		PD_SD_0		39
+#define		PD_SD_1		40
+#define		PD_DP		41
+#define		PD_GDMA		42
+#define		PD_ADMA		43
+#define		PD_NAND		44
+#define		PD_QSPI		45
+#define		PD_GPIO		46
+#define		PD_CAN_0	47
+#define		PD_CAN_1	48
+#define		PD_GPU		58
+#define		PD_PCIE		59
+
+#endif
-- 
cgit v1.2.3


From c1986ac3d483b051fc237aea3e9812fd1bb4d239 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@xilinx.com>
Date: Fri, 1 Feb 2019 14:08:49 -0800
Subject: firmware: xilinx: Add APIs to control node status/power

Add Xilinx ZynqMP firmware APIs to control node status
and power. These APIs allows turning on/off power domain
and setting capabilities of devices present in power domain.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Jolly Shah <jolly.shah@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 58 ++++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 26 ++++++++++++++++
 2 files changed, 84 insertions(+)

(limited to 'include')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 765a2ca1b100..af5cffd3ac43 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -557,6 +557,61 @@ static int zynqmp_pm_set_suspend_mode(u32 mode)
 	return zynqmp_pm_invoke_fn(PM_SET_SUSPEND_MODE, mode, 0, 0, 0, NULL);
 }
 
+/**
+ * zynqmp_pm_request_node() - Request a node with specific capabilities
+ * @node:		Node ID of the slave
+ * @capabilities:	Requested capabilities of the slave
+ * @qos:		Quality of service (not supported)
+ * @ack:		Flag to specify whether acknowledge is requested
+ *
+ * This function is used by master to request particular node from firmware.
+ * Every master must request node before using it.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_request_node(const u32 node, const u32 capabilities,
+				  const u32 qos,
+				  const enum zynqmp_pm_request_ack ack)
+{
+	return zynqmp_pm_invoke_fn(PM_REQUEST_NODE, node, capabilities,
+				   qos, ack, NULL);
+}
+
+/**
+ * zynqmp_pm_release_node() - Release a node
+ * @node:	Node ID of the slave
+ *
+ * This function is used by master to inform firmware that master
+ * has released node. Once released, master must not use that node
+ * without re-request.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_release_node(const u32 node)
+{
+	return zynqmp_pm_invoke_fn(PM_RELEASE_NODE, node, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_set_requirement() - PM call to set requirement for PM slaves
+ * @node:		Node ID of the slave
+ * @capabilities:	Requested capabilities of the slave
+ * @qos:		Quality of service (not supported)
+ * @ack:		Flag to specify whether acknowledge is requested
+ *
+ * This API function is to be used for slaves a PU already has requested
+ * to change its capabilities.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_set_requirement(const u32 node, const u32 capabilities,
+				     const u32 qos,
+				     const enum zynqmp_pm_request_ack ack)
+{
+	return zynqmp_pm_invoke_fn(PM_SET_REQUIREMENT, node, capabilities,
+				   qos, ack, NULL);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.get_chipid = zynqmp_pm_get_chipid,
@@ -575,6 +630,9 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.reset_get_status = zynqmp_pm_reset_get_status,
 	.init_finalize = zynqmp_pm_init_finalize,
 	.set_suspend_mode = zynqmp_pm_set_suspend_mode,
+	.request_node = zynqmp_pm_request_node,
+	.release_node = zynqmp_pm_release_node,
+	.set_requirement = zynqmp_pm_set_requirement,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 56b2108a2148..642dab10f65d 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -40,8 +40,19 @@
 /* Payload size (consists of callback API ID + arguments) */
 #define CB_PAYLOAD_SIZE (CB_ARG_CNT + 1)
 
+#define ZYNQMP_PM_MAX_QOS		100U
+
+/* Node capabilities */
+#define	ZYNQMP_PM_CAPABILITY_ACCESS	0x1U
+#define	ZYNQMP_PM_CAPABILITY_CONTEXT	0x2U
+#define	ZYNQMP_PM_CAPABILITY_WAKEUP	0x4U
+#define	ZYNQMP_PM_CAPABILITY_POWER	0x8U
+
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
+	PM_REQUEST_NODE = 13,
+	PM_RELEASE_NODE,
+	PM_SET_REQUIREMENT,
 	PM_RESET_ASSERT = 17,
 	PM_RESET_GET_STATUS,
 	PM_PM_INIT_FINALIZE = 21,
@@ -224,6 +235,12 @@ enum zynqmp_pm_suspend_reason {
 	SUSPEND_SYSTEM_SHUTDOWN,
 };
 
+enum zynqmp_pm_request_ack {
+	ZYNQMP_PM_REQUEST_ACK_NO = 1,
+	ZYNQMP_PM_REQUEST_ACK_BLOCKING,
+	ZYNQMP_PM_REQUEST_ACK_NON_BLOCKING,
+};
+
 /**
  * struct zynqmp_pm_query_data - PM query data
  * @qid:	query ID
@@ -257,6 +274,15 @@ struct zynqmp_eemi_ops {
 	int (*reset_get_status)(const enum zynqmp_pm_reset reset, u32 *status);
 	int (*init_finalize)(void);
 	int (*set_suspend_mode)(u32 mode);
+	int (*request_node)(const u32 node,
+			    const u32 capabilities,
+			    const u32 qos,
+			    const enum zynqmp_pm_request_ack ack);
+	int (*release_node)(const u32 node);
+	int (*set_requirement)(const u32 node,
+			       const u32 capabilities,
+			       const u32 qos,
+			       const enum zynqmp_pm_request_ack ack);
 };
 
 int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
-- 
cgit v1.2.3


From dd27c2e3d0a05c01ff14bb672d1a3f0fdd8f98fc Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 12 Feb 2019 00:20:39 -0800
Subject: bpf: offload: add priv field for drivers

Currently bpf_offload_dev does not have any priv pointer, forcing
the drivers to work backwards from the netdev in program metadata.
This is not great given programs are conceptually associated with
the offload device, and it means one or two unnecessary deferences.
Add a priv pointer to bpf_offload_dev.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/main.c    |  2 +-
 drivers/net/ethernet/netronome/nfp/bpf/offload.c |  4 +---
 drivers/net/netdevsim/bpf.c                      |  5 +++--
 include/linux/bpf.h                              |  3 ++-
 kernel/bpf/offload.c                             | 10 +++++++++-
 5 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index dccae0319204..275de9f4c61c 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -465,7 +465,7 @@ static int nfp_bpf_init(struct nfp_app *app)
 		app->ctrl_mtu = nfp_bpf_ctrl_cmsg_mtu(bpf);
 	}
 
-	bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops);
+	bpf->bpf_dev = bpf_offload_dev_create(&nfp_bpf_dev_ops, bpf);
 	err = PTR_ERR_OR_ZERO(bpf->bpf_dev);
 	if (err)
 		goto err_free_neutral_maps;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 55c7dbf8b421..15dce97650a5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -185,8 +185,6 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog)
 
 static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct nfp_net *nn = netdev_priv(prog->aux->offload->netdev);
-	struct nfp_app *app = nn->app;
 	struct nfp_prog *nfp_prog;
 	int ret;
 
@@ -197,7 +195,7 @@ static int nfp_bpf_verifier_prep(struct bpf_prog *prog)
 
 	INIT_LIST_HEAD(&nfp_prog->insns);
 	nfp_prog->type = prog->type;
-	nfp_prog->bpf = app->priv;
+	nfp_prog->bpf = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
 	ret = nfp_prog_prepare(nfp_prog, prog->insnsi, prog->len);
 	if (ret)
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 172b271c8bd2..f92c43453ec6 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -248,7 +248,7 @@ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
 
 static int nsim_bpf_verifier_prep(struct bpf_prog *prog)
 {
-	struct netdevsim *ns = netdev_priv(prog->aux->offload->netdev);
+	struct netdevsim *ns = bpf_offload_dev_priv(prog->aux->offload->offdev);
 
 	if (!ns->bpf_bind_accept)
 		return -EOPNOTSUPP;
@@ -589,7 +589,8 @@ int nsim_bpf_init(struct netdevsim *ns)
 		if (IS_ERR_OR_NULL(ns->sdev->ddir_bpf_bound_progs))
 			return -ENOMEM;
 
-		ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops);
+		ns->sdev->bpf_dev = bpf_offload_dev_create(&nsim_bpf_dev_ops,
+							   ns);
 		err = PTR_ERR_OR_ZERO(ns->sdev->bpf_dev);
 		if (err)
 			return err;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7f58828755fd..de18227b3d95 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -773,8 +773,9 @@ int bpf_map_offload_get_next_key(struct bpf_map *map,
 bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map);
 
 struct bpf_offload_dev *
-bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops);
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv);
 void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev);
+void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev);
 int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
 				    struct net_device *netdev);
 void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 39dba8c90331..ba635209ae9a 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -35,6 +35,7 @@ static DECLARE_RWSEM(bpf_devs_lock);
 struct bpf_offload_dev {
 	const struct bpf_prog_offload_ops *ops;
 	struct list_head netdevs;
+	void *priv;
 };
 
 struct bpf_offload_netdev {
@@ -669,7 +670,7 @@ unlock:
 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
 
 struct bpf_offload_dev *
-bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
+bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
 {
 	struct bpf_offload_dev *offdev;
 	int err;
@@ -688,6 +689,7 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops)
 		return ERR_PTR(-ENOMEM);
 
 	offdev->ops = ops;
+	offdev->priv = priv;
 	INIT_LIST_HEAD(&offdev->netdevs);
 
 	return offdev;
@@ -700,3 +702,9 @@ void bpf_offload_dev_destroy(struct bpf_offload_dev *offdev)
 	kfree(offdev);
 }
 EXPORT_SYMBOL_GPL(bpf_offload_dev_destroy);
+
+void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
+{
+	return offdev->priv;
+}
+EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
-- 
cgit v1.2.3


From 9fa505adf9f2914637e4e81620f62dcdaa575f71 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 8 Feb 2019 13:25:03 -0800
Subject: scsi: core: Move resid from scsi_data_buffer to scsi_cmnd

This patch does not change any functionality but reduces the size of
struct scsi_cmnd.

Cc: Douglas Gilbert <dgilbert@interlog.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c  | 1 -
 include/scsi/scsi_cmnd.h | 5 ++---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d25101c5e3b8..c77cb0d31dbc 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -938,7 +938,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 		 * scsi_result_to_blk_status may have reset the host_byte
 		 */
 		scsi_req(req)->result = cmd->result;
-		scsi_req(req)->resid_len = scsi_get_resid(cmd);
 	}
 
 	/*
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index af6ae3e1eba6..6c7793c2ad01 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -35,7 +35,6 @@ struct scsi_driver;
 struct scsi_data_buffer {
 	struct sg_table table;
 	unsigned length;
-	int resid;
 };
 
 /* embedded in scsi_cmnd */
@@ -202,12 +201,12 @@ static inline unsigned scsi_bufflen(struct scsi_cmnd *cmd)
 
 static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid)
 {
-	cmd->sdb.resid = resid;
+	cmd->req.resid_len = resid;
 }
 
 static inline int scsi_get_resid(struct scsi_cmnd *cmd)
 {
-	return cmd->sdb.resid;
+	return cmd->req.resid_len;
 }
 
 #define scsi_for_each_sg(cmd, sg, nseg, __i)			\
-- 
cgit v1.2.3


From ecefff3e5b9b6a427a1a78c0c3f5eb147fd2d761 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 8 Feb 2019 17:45:56 -0600
Subject: ASoC: soc-acpi: remove asoc_plat_name field

This field was never used, let's remove it

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 6cbbeed9cdd0..655e4e010cc8 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -86,8 +86,6 @@ struct snd_soc_acpi_mach_params {
  *  is not constant since this field may be updated at run-time
  * @sof_fw_filename: Sound Open Firmware file name, if enabled
  * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled
- * @asoc_plat_name: ASoC platform name, used for binding machine drivers
- * if non NULL
  * @new_mach_data: machine driver private data fixup
  */
 /* Descriptor for SST ASoC machine driver */
@@ -102,7 +100,6 @@ struct snd_soc_acpi_mach {
 	struct snd_soc_acpi_mach_params mach_params;
 	const char *sof_fw_filename;
 	const char *sof_tplg_filename;
-	const char *asoc_plat_name;
 	struct platform_device * (*new_mach_data)(void *pdata);
 };
 
-- 
cgit v1.2.3


From b3d8f7cad1b41411de443018cc5323070db06ab2 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 8 Feb 2019 17:45:57 -0600
Subject: ASoC: soc-acpi: remove new_mach_data field

We never used this field (or in older SOF implementations), let's
remove it

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 655e4e010cc8..35b38e41e5b2 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -86,7 +86,6 @@ struct snd_soc_acpi_mach_params {
  *  is not constant since this field may be updated at run-time
  * @sof_fw_filename: Sound Open Firmware file name, if enabled
  * @sof_tplg_filename: Sound Open Firmware topology file name, if enabled
- * @new_mach_data: machine driver private data fixup
  */
 /* Descriptor for SST ASoC machine driver */
 struct snd_soc_acpi_mach {
@@ -100,7 +99,6 @@ struct snd_soc_acpi_mach {
 	struct snd_soc_acpi_mach_params mach_params;
 	const char *sof_fw_filename;
 	const char *sof_tplg_filename;
-	struct platform_device * (*new_mach_data)(void *pdata);
 };
 
 #define SND_SOC_ACPI_MAX_CODECS 3
-- 
cgit v1.2.3


From da203dfa89ce83c55b6623f73560ef7ec742aca4 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 11 Feb 2019 14:46:17 +0530
Subject: Revert "devlink: Add a generic wake_on_lan port parameter"

This reverts commit b639583f9e36d044ac1b13090ae812266992cbac.

As per discussion with Jakub Kicinski and Michal Kubecek,
this will be better addressed by soon-too-come ethtool netlink
API with additional indication that given configuration request
is supposed to be persisted.

Also, remove the parameter support from bnxt_en driver.

Cc: Jiri Pirko <jiri@mellanox.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Michal Kubecek <mkubecek@suse.cz>
Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 19 +------------------
 drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h |  1 -
 include/net/devlink.h                             |  8 --------
 net/core/devlink.c                                |  5 -----
 4 files changed, 1 insertion(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 2955e404fd18..e1feb97bcd81 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -37,8 +37,6 @@ static const struct bnxt_dl_nvm_param nvm_params[] = {
 	 NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7},
 	{BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK,
 	 BNXT_NVM_SHARED_CFG, 1},
-
-	{DEVLINK_PARAM_GENERIC_ID_WOL, NVM_OFF_WOL, BNXT_NVM_PORT_CFG, 1},
 };
 
 static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
@@ -72,8 +70,7 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
 	bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
 	switch (bytesize) {
 	case 1:
-		if (nvm_param.num_bits == 1 &&
-		    nvm_param.id != DEVLINK_PARAM_GENERIC_ID_WOL)
+		if (nvm_param.num_bits == 1)
 			buf = &val->vbool;
 		else
 			buf = &val->vu8;
@@ -167,17 +164,6 @@ static int bnxt_dl_msix_validate(struct devlink *dl, u32 id,
 	return 0;
 }
 
-static int bnxt_dl_wol_validate(struct devlink *dl, u32 id,
-				union devlink_param_value val,
-				struct netlink_ext_ack *extack)
-{
-	if (val.vu8 && val.vu8 != DEVLINK_PARAM_WAKE_MAGIC) {
-		NL_SET_ERR_MSG_MOD(extack, "WOL type is not supported");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static const struct devlink_param bnxt_dl_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV,
 			      BIT(DEVLINK_PARAM_CMODE_PERMANENT),
@@ -203,9 +189,6 @@ static const struct devlink_param bnxt_dl_params[] = {
 };
 
 static const struct devlink_param bnxt_dl_port_params[] = {
-	DEVLINK_PARAM_GENERIC(WOL, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
-			      bnxt_dl_nvm_param_get, bnxt_dl_nvm_param_set,
-			      bnxt_dl_wol_validate),
 };
 
 int bnxt_dl_register(struct bnxt *bp)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index da065ca84cc7..5b6b2c7d97cf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -35,7 +35,6 @@ static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
 
 #define NVM_OFF_MSIX_VEC_PER_PF_MAX	108
 #define NVM_OFF_MSIX_VEC_PER_PF_MIN	114
-#define NVM_OFF_WOL			152
 #define NVM_OFF_IGNORE_ARI		164
 #define NVM_OFF_DIS_GRE_VER_CHECK	171
 #define NVM_OFF_ENABLE_SRIOV		401
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 07660fe4c0e3..c6d88759b7d5 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -370,17 +370,12 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
 	DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
-	DEVLINK_PARAM_GENERIC_ID_WOL,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MAX = __DEVLINK_PARAM_GENERIC_ID_MAX - 1,
 };
 
-enum devlink_param_wol_types {
-	DEVLINK_PARAM_WAKE_MAGIC = (1 << 0),
-};
-
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME "internal_error_reset"
 #define DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE DEVLINK_PARAM_TYPE_BOOL
 
@@ -405,9 +400,6 @@ enum devlink_param_wol_types {
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy"
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8
 
-#define DEVLINK_PARAM_GENERIC_WOL_NAME "wake_on_lan"
-#define DEVLINK_PARAM_GENERIC_WOL_TYPE DEVLINK_PARAM_TYPE_U8
-
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
diff --git a/net/core/devlink.c b/net/core/devlink.c
index ec02459eea94..d3dfdcacf7eb 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2701,11 +2701,6 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
 		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
 	},
-	{
-		.id = DEVLINK_PARAM_GENERIC_ID_WOL,
-		.name = DEVLINK_PARAM_GENERIC_WOL_NAME,
-		.type = DEVLINK_PARAM_GENERIC_WOL_TYPE,
-	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
-- 
cgit v1.2.3


From 86e58135bc4ac68b41be11d82f1b1f87cb6119ba Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Mon, 11 Feb 2019 11:46:06 +0000
Subject: net: phylink: add phylink_init_eee() helper

Provide phylink_init_eee() to allow MAC drivers to initialise PHY EEE
from within the ethtool set_eee() method.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 18 ++++++++++++++++++
 include/linux/phylink.h   |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index a148866cbb14..33f66dcd369a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1271,6 +1271,24 @@ int phylink_get_eee_err(struct phylink *pl)
 }
 EXPORT_SYMBOL_GPL(phylink_get_eee_err);
 
+/**
+ * phylink_init_eee() - init and check the EEE features
+ * @pl: a pointer to a &struct phylink returned from phylink_create()
+ * @clk_stop_enable: allow PHY to stop receive clock
+ *
+ * Must be called either with RTNL held or within mac_link_up()
+ */
+int phylink_init_eee(struct phylink *pl, bool clk_stop_enable)
+{
+	int ret = -EOPNOTSUPP;
+
+	if (pl->phydev)
+		ret = phy_init_eee(pl->phydev, clk_stop_enable);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phylink_init_eee);
+
 /**
  * phylink_ethtool_get_eee() - read the energy efficient ethernet parameters
  * @pl: a pointer to a &struct phylink returned from phylink_create()
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 021fc6595856..f57059e4353f 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -220,6 +220,7 @@ void phylink_ethtool_get_pauseparam(struct phylink *,
 int phylink_ethtool_set_pauseparam(struct phylink *,
 				   struct ethtool_pauseparam *);
 int phylink_get_eee_err(struct phylink *);
+int phylink_init_eee(struct phylink *, bool);
 int phylink_ethtool_get_eee(struct phylink *, struct ethtool_eee *);
 int phylink_ethtool_set_eee(struct phylink *, struct ethtool_eee *);
 int phylink_mii_ioctl(struct phylink *, struct ifreq *, int);
-- 
cgit v1.2.3


From 4ea7b0cf0da7494d5c7b8dc328493c50640d0cbc Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Mon, 11 Feb 2019 13:02:25 -0800
Subject: net/skbuff: fix up kernel-doc placement

There are several skb_* functions where the locked and unlocked
functions are confusingly documented. For several of them, the
kernel-doc for the unlocked version is placed above the locked version,
which to the casual reader makes it seems like the locked version "takes
no locks and you must therefore hold required locks before calling it."

One can see, for example, that this link claims to document
skb_queue_head(), while instead describing __skb_queue_head().

https://www.kernel.org/doc/html/latest/networking/kapi.html#c.skb_queue_head

The correct documentation for skb_queue_head() is also included further
down the page.

This diff tested via:

  $ scripts/kernel-doc -rst include/linux/skbuff.h net/core/skbuff.c

No new warnings were seen, and the output makes a little more sense.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 831846617d07..a41e84f7730c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1889,12 +1889,12 @@ static inline void __skb_queue_before(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_head(struct sk_buff_head *list,
 				    struct sk_buff *newsk)
 {
 	__skb_queue_after(list, (struct sk_buff *)list, newsk);
 }
+void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
 
 /**
  *	__skb_queue_tail - queue a buffer at the list tail
@@ -1906,12 +1906,12 @@ static inline void __skb_queue_head(struct sk_buff_head *list,
  *
  *	A buffer cannot be placed on two lists at the same time.
  */
-void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 static inline void __skb_queue_tail(struct sk_buff_head *list,
 				   struct sk_buff *newsk)
 {
 	__skb_queue_before(list, (struct sk_buff *)list, newsk);
 }
+void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
 
 /*
  * remove sk_buff from list. _Must_ be called atomically, and with
@@ -1938,7 +1938,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The head item is
  *	returned or %NULL if the list is empty.
  */
-struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek(list);
@@ -1946,6 +1945,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
 		__skb_unlink(skb, list);
 	return skb;
 }
+struct sk_buff *skb_dequeue(struct sk_buff_head *list);
 
 /**
  *	__skb_dequeue_tail - remove from the tail of the queue
@@ -1955,7 +1955,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  *	so must be used with appropriate locks held only. The tail item is
  *	returned or %NULL if the list is empty.
  */
-struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 {
 	struct sk_buff *skb = skb_peek_tail(list);
@@ -1963,6 +1962,7 @@ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
 		__skb_unlink(skb, list);
 	return skb;
 }
+struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
 
 
 static inline bool skb_is_nonlinear(const struct sk_buff *skb)
@@ -2653,13 +2653,13 @@ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
  *	the list and one reference dropped. This function does not take the
  *	list lock and the caller must hold the relevant locks to use it.
  */
-void skb_queue_purge(struct sk_buff_head *list);
 static inline void __skb_queue_purge(struct sk_buff_head *list)
 {
 	struct sk_buff *skb;
 	while ((skb = __skb_dequeue(list)) != NULL)
 		kfree_skb(skb);
 }
+void skb_queue_purge(struct sk_buff_head *list);
 
 unsigned int skb_rbtree_purge(struct rb_root *root);
 
@@ -3028,7 +3028,7 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
 }
 
 /**
- *	skb_put_padto - increase size and pad an skbuff up to a minimal size
+ *	__skb_put_padto - increase size and pad an skbuff up to a minimal size
  *	@skb: buffer to pad
  *	@len: minimal length
  *	@free_on_error: free buffer on error
-- 
cgit v1.2.3


From 1e562c815e67185e030bcaa06323e95d85d80987 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 12 Feb 2019 12:23:56 +0800
Subject: ptp_qoriq: make structure/function names more consistent

Strings containing "ptp_qoriq" or "qoriq_ptp" which were used for
structure/function names were complained by users. Let's just use
the unique "ptp_qoriq" to make these names more consistent.
This patch is just to unify the names using "ptp_qoriq". It hasn't
changed any functions.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c |   2 +-
 drivers/net/ethernet/freescale/gianfar_ethtool.c   |   2 +-
 drivers/ptp/ptp_qoriq.c                            | 288 ++++++++++-----------
 drivers/ptp/ptp_qoriq_debugfs.c                    |  36 +--
 include/linux/fsl/ptp_qoriq.h                      |  14 +-
 5 files changed, 171 insertions(+), 171 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 62497119c85f..bdee441bc3b7 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -501,7 +501,7 @@ static int dpaa_get_ts_info(struct net_device *net_dev,
 	struct device_node *mac_node = dev->of_node;
 	struct device_node *fman_node = NULL, *ptp_node = NULL;
 	struct platform_device *ptp_dev = NULL;
-	struct qoriq_ptp *ptp = NULL;
+	struct ptp_qoriq *ptp = NULL;
 
 	info->phc_index = -1;
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 241325c35cb4..27ed995f439a 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -1492,7 +1492,7 @@ static int gfar_get_ts_info(struct net_device *dev,
 	struct gfar_private *priv = netdev_priv(dev);
 	struct platform_device *ptp_dev;
 	struct device_node *ptp_node;
-	struct qoriq_ptp *ptp = NULL;
+	struct ptp_qoriq *ptp = NULL;
 
 	info->phc_index = -1;
 
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 43416b2e8a13..8c10d0f8864f 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -37,10 +37,10 @@
  * Register access functions
  */
 
-/* Caller must hold qoriq_ptp->lock. */
-static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq)
 {
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u64 ns;
 	u32 lo, hi;
 
@@ -51,10 +51,10 @@ static u64 tmr_cnt_read(struct qoriq_ptp *qoriq_ptp)
 	return ns;
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
+/* Caller must hold ptp_qoriq->lock. */
+static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns)
 {
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 hi = ns >> 32;
 	u32 lo = ns & 0xffffffff;
 
@@ -62,36 +62,36 @@ static void tmr_cnt_write(struct qoriq_ptp *qoriq_ptp, u64 ns)
 	qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void set_alarm(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static void set_alarm(struct ptp_qoriq *ptp_qoriq)
 {
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u64 ns;
 	u32 lo, hi;
 
-	ns = tmr_cnt_read(qoriq_ptp) + 1500000000ULL;
+	ns = tmr_cnt_read(ptp_qoriq) + 1500000000ULL;
 	ns = div_u64(ns, 1000000000UL) * 1000000000ULL;
-	ns -= qoriq_ptp->tclk_period;
+	ns -= ptp_qoriq->tclk_period;
 	hi = ns >> 32;
 	lo = ns & 0xffffffff;
 	qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
 	qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
-/* Caller must hold qoriq_ptp->lock. */
-static void set_fipers(struct qoriq_ptp *qoriq_ptp)
+/* Caller must hold ptp_qoriq->lock. */
+static void set_fipers(struct ptp_qoriq *ptp_qoriq)
 {
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
-	set_alarm(qoriq_ptp);
-	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
+	set_alarm(ptp_qoriq);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
 }
 
-static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
+static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
 			  bool update_event)
 {
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	struct ptp_clock_event event;
 	void __iomem *reg_etts_l;
 	void __iomem *reg_etts_h;
@@ -122,11 +122,11 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
 		if (update_event) {
 			event.timestamp = ((u64) hi) << 32;
 			event.timestamp |= lo;
-			ptp_clock_event(qoriq_ptp->clock, &event);
+			ptp_clock_event(ptp_qoriq->clock, &event);
 		}
 
 		stat = qoriq_read(&regs->ctrl_regs->tmr_stat);
-	} while (qoriq_ptp->extts_fifo_support && (stat & valid));
+	} while (ptp_qoriq->extts_fifo_support && (stat & valid));
 
 	return 0;
 }
@@ -137,61 +137,61 @@ static int extts_clean_up(struct qoriq_ptp *qoriq_ptp, int index,
 
 static irqreturn_t isr(int irq, void *priv)
 {
-	struct qoriq_ptp *qoriq_ptp = priv;
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = priv;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	struct ptp_clock_event event;
 	u64 ns;
 	u32 ack = 0, lo, hi, mask, val, irqs;
 
-	spin_lock(&qoriq_ptp->lock);
+	spin_lock(&ptp_qoriq->lock);
 
 	val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
 	mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 
-	spin_unlock(&qoriq_ptp->lock);
+	spin_unlock(&ptp_qoriq->lock);
 
 	irqs = val & mask;
 
 	if (irqs & ETS1) {
 		ack |= ETS1;
-		extts_clean_up(qoriq_ptp, 0, true);
+		extts_clean_up(ptp_qoriq, 0, true);
 	}
 
 	if (irqs & ETS2) {
 		ack |= ETS2;
-		extts_clean_up(qoriq_ptp, 1, true);
+		extts_clean_up(ptp_qoriq, 1, true);
 	}
 
 	if (irqs & ALM2) {
 		ack |= ALM2;
-		if (qoriq_ptp->alarm_value) {
+		if (ptp_qoriq->alarm_value) {
 			event.type = PTP_CLOCK_ALARM;
 			event.index = 0;
-			event.timestamp = qoriq_ptp->alarm_value;
-			ptp_clock_event(qoriq_ptp->clock, &event);
+			event.timestamp = ptp_qoriq->alarm_value;
+			ptp_clock_event(ptp_qoriq->clock, &event);
 		}
-		if (qoriq_ptp->alarm_interval) {
-			ns = qoriq_ptp->alarm_value + qoriq_ptp->alarm_interval;
+		if (ptp_qoriq->alarm_interval) {
+			ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval;
 			hi = ns >> 32;
 			lo = ns & 0xffffffff;
 			qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
 			qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
-			qoriq_ptp->alarm_value = ns;
+			ptp_qoriq->alarm_value = ns;
 		} else {
-			spin_lock(&qoriq_ptp->lock);
+			spin_lock(&ptp_qoriq->lock);
 			mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 			mask &= ~ALM2EN;
 			qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
-			spin_unlock(&qoriq_ptp->lock);
-			qoriq_ptp->alarm_value = 0;
-			qoriq_ptp->alarm_interval = 0;
+			spin_unlock(&ptp_qoriq->lock);
+			ptp_qoriq->alarm_value = 0;
+			ptp_qoriq->alarm_interval = 0;
 		}
 	}
 
 	if (irqs & PP1) {
 		ack |= PP1;
 		event.type = PTP_CLOCK_PPS;
-		ptp_clock_event(qoriq_ptp->clock, &event);
+		ptp_clock_event(ptp_qoriq->clock, &event);
 	}
 
 	if (ack) {
@@ -210,14 +210,14 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 	u64 adj, diff;
 	u32 tmr_add;
 	int neg_adj = 0;
-	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
 	if (scaled_ppm < 0) {
 		neg_adj = 1;
 		scaled_ppm = -scaled_ppm;
 	}
-	tmr_add = qoriq_ptp->tmr_add;
+	tmr_add = ptp_qoriq->tmr_add;
 	adj = tmr_add;
 
 	/* calculate diff as adj*(scaled_ppm/65536)/1000000
@@ -238,16 +238,16 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	s64 now;
 	unsigned long flags;
-	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
-	spin_lock_irqsave(&qoriq_ptp->lock, flags);
+	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-	now = tmr_cnt_read(qoriq_ptp);
+	now = tmr_cnt_read(ptp_qoriq);
 	now += delta;
-	tmr_cnt_write(qoriq_ptp, now);
-	set_fipers(qoriq_ptp);
+	tmr_cnt_write(ptp_qoriq, now);
+	set_fipers(ptp_qoriq);
 
-	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
 	return 0;
 }
@@ -257,13 +257,13 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp,
 {
 	u64 ns;
 	unsigned long flags;
-	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
-	spin_lock_irqsave(&qoriq_ptp->lock, flags);
+	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-	ns = tmr_cnt_read(qoriq_ptp);
+	ns = tmr_cnt_read(ptp_qoriq);
 
-	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -275,16 +275,16 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
 {
 	u64 ns;
 	unsigned long flags;
-	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 
 	ns = timespec64_to_ns(ts);
 
-	spin_lock_irqsave(&qoriq_ptp->lock, flags);
+	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-	tmr_cnt_write(qoriq_ptp, ns);
-	set_fipers(qoriq_ptp);
+	tmr_cnt_write(ptp_qoriq, ns);
+	set_fipers(ptp_qoriq);
 
-	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
 	return 0;
 }
@@ -292,8 +292,8 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
 static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 			      struct ptp_clock_request *rq, int on)
 {
-	struct qoriq_ptp *qoriq_ptp = container_of(ptp, struct qoriq_ptp, caps);
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	unsigned long flags;
 	u32 bit, mask = 0;
 
@@ -311,7 +311,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 		}
 
 		if (on)
-			extts_clean_up(qoriq_ptp, rq->extts.index, false);
+			extts_clean_up(ptp_qoriq, rq->extts.index, false);
 
 		break;
 	case PTP_CLK_REQ_PPS:
@@ -321,7 +321,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 		return -EOPNOTSUPP;
 	}
 
-	spin_lock_irqsave(&qoriq_ptp->lock, flags);
+	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
 	mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
 	if (on) {
@@ -333,7 +333,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 
 	qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
 
-	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 	return 0;
 }
 
@@ -354,7 +354,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
 };
 
 /**
- * qoriq_ptp_nominal_freq - calculate nominal frequency according to
+ * ptp_qoriq_nominal_freq - calculate nominal frequency according to
  *			    reference clock frequency
  *
  * @clk_src: reference clock frequency
@@ -365,7 +365,7 @@ static const struct ptp_clock_info ptp_qoriq_caps = {
  *
  * Return the nominal frequency
  */
-static u32 qoriq_ptp_nominal_freq(u32 clk_src)
+static u32 ptp_qoriq_nominal_freq(u32 clk_src)
 {
 	u32 remainder = 0;
 
@@ -385,9 +385,9 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src)
 }
 
 /**
- * qoriq_ptp_auto_config - calculate a set of default configurations
+ * ptp_qoriq_auto_config - calculate a set of default configurations
  *
- * @qoriq_ptp: pointer to qoriq_ptp
+ * @ptp_qoriq: pointer to ptp_qoriq
  * @node: pointer to device_node
  *
  * If below dts properties are not provided, this function will be
@@ -401,7 +401,7 @@ static u32 qoriq_ptp_nominal_freq(u32 clk_src)
  *
  * Return 0 if success
  */
-static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
+static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq,
 				 struct device_node *node)
 {
 	struct clk *clk;
@@ -411,7 +411,7 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
 	u32 remainder = 0;
 	u32 clk_src = 0;
 
-	qoriq_ptp->cksel = DEFAULT_CKSEL;
+	ptp_qoriq->cksel = DEFAULT_CKSEL;
 
 	clk = of_clk_get(node, 0);
 	if (!IS_ERR(clk)) {
@@ -424,12 +424,12 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
 		return -EINVAL;
 	}
 
-	nominal_freq = qoriq_ptp_nominal_freq(clk_src);
+	nominal_freq = ptp_qoriq_nominal_freq(clk_src);
 	if (!nominal_freq)
 		return -EINVAL;
 
-	qoriq_ptp->tclk_period = 1000000000UL / nominal_freq;
-	qoriq_ptp->tmr_prsc = DEFAULT_TMR_PRSC;
+	ptp_qoriq->tclk_period = 1000000000UL / nominal_freq;
+	ptp_qoriq->tmr_prsc = DEFAULT_TMR_PRSC;
 
 	/* Calculate initial frequency compensation value for TMR_ADD register.
 	 * freq_comp = ceil(2^32 / freq_ratio)
@@ -440,171 +440,171 @@ static int qoriq_ptp_auto_config(struct qoriq_ptp *qoriq_ptp,
 	if (remainder)
 		freq_comp++;
 
-	qoriq_ptp->tmr_add = freq_comp;
-	qoriq_ptp->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - qoriq_ptp->tclk_period;
-	qoriq_ptp->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - qoriq_ptp->tclk_period;
+	ptp_qoriq->tmr_add = freq_comp;
+	ptp_qoriq->tmr_fiper1 = DEFAULT_FIPER1_PERIOD - ptp_qoriq->tclk_period;
+	ptp_qoriq->tmr_fiper2 = DEFAULT_FIPER2_PERIOD - ptp_qoriq->tclk_period;
 
 	/* max_adj = 1000000000 * (freq_ratio - 1.0) - 1
 	 * freq_ratio = reference_clock_freq / nominal_freq
 	 */
 	max_adj = 1000000000ULL * (clk_src - nominal_freq);
 	max_adj = div_u64(max_adj, nominal_freq) - 1;
-	qoriq_ptp->caps.max_adj = max_adj;
+	ptp_qoriq->caps.max_adj = max_adj;
 
 	return 0;
 }
 
-static int qoriq_ptp_probe(struct platform_device *dev)
+static int ptp_qoriq_probe(struct platform_device *dev)
 {
 	struct device_node *node = dev->dev.of_node;
-	struct qoriq_ptp *qoriq_ptp;
-	struct qoriq_ptp_registers *regs;
+	struct ptp_qoriq *ptp_qoriq;
+	struct ptp_qoriq_registers *regs;
 	struct timespec64 now;
 	int err = -ENOMEM;
 	u32 tmr_ctrl;
 	unsigned long flags;
 	void __iomem *base;
 
-	qoriq_ptp = kzalloc(sizeof(*qoriq_ptp), GFP_KERNEL);
-	if (!qoriq_ptp)
+	ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
+	if (!ptp_qoriq)
 		goto no_memory;
 
 	err = -EINVAL;
 
-	qoriq_ptp->dev = &dev->dev;
-	qoriq_ptp->caps = ptp_qoriq_caps;
+	ptp_qoriq->dev = &dev->dev;
+	ptp_qoriq->caps = ptp_qoriq_caps;
 
-	if (of_property_read_u32(node, "fsl,cksel", &qoriq_ptp->cksel))
-		qoriq_ptp->cksel = DEFAULT_CKSEL;
+	if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel))
+		ptp_qoriq->cksel = DEFAULT_CKSEL;
 
 	if (of_property_read_bool(node, "fsl,extts-fifo"))
-		qoriq_ptp->extts_fifo_support = true;
+		ptp_qoriq->extts_fifo_support = true;
 	else
-		qoriq_ptp->extts_fifo_support = false;
+		ptp_qoriq->extts_fifo_support = false;
 
 	if (of_property_read_u32(node,
-				 "fsl,tclk-period", &qoriq_ptp->tclk_period) ||
+				 "fsl,tclk-period", &ptp_qoriq->tclk_period) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-prsc", &qoriq_ptp->tmr_prsc) ||
+				 "fsl,tmr-prsc", &ptp_qoriq->tmr_prsc) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-add", &qoriq_ptp->tmr_add) ||
+				 "fsl,tmr-add", &ptp_qoriq->tmr_add) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-fiper1", &qoriq_ptp->tmr_fiper1) ||
+				 "fsl,tmr-fiper1", &ptp_qoriq->tmr_fiper1) ||
 	    of_property_read_u32(node,
-				 "fsl,tmr-fiper2", &qoriq_ptp->tmr_fiper2) ||
+				 "fsl,tmr-fiper2", &ptp_qoriq->tmr_fiper2) ||
 	    of_property_read_u32(node,
-				 "fsl,max-adj", &qoriq_ptp->caps.max_adj)) {
+				 "fsl,max-adj", &ptp_qoriq->caps.max_adj)) {
 		pr_warn("device tree node missing required elements, try automatic configuration\n");
 
-		if (qoriq_ptp_auto_config(qoriq_ptp, node))
+		if (ptp_qoriq_auto_config(ptp_qoriq, node))
 			goto no_config;
 	}
 
 	err = -ENODEV;
 
-	qoriq_ptp->irq = platform_get_irq(dev, 0);
+	ptp_qoriq->irq = platform_get_irq(dev, 0);
 
-	if (qoriq_ptp->irq < 0) {
+	if (ptp_qoriq->irq < 0) {
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
-	if (request_irq(qoriq_ptp->irq, isr, IRQF_SHARED, DRIVER, qoriq_ptp)) {
+	if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) {
 		pr_err("request_irq failed\n");
 		goto no_node;
 	}
 
-	qoriq_ptp->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!qoriq_ptp->rsrc) {
+	ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!ptp_qoriq->rsrc) {
 		pr_err("no resource\n");
 		goto no_resource;
 	}
-	if (request_resource(&iomem_resource, qoriq_ptp->rsrc)) {
+	if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) {
 		pr_err("resource busy\n");
 		goto no_resource;
 	}
 
-	spin_lock_init(&qoriq_ptp->lock);
+	spin_lock_init(&ptp_qoriq->lock);
 
-	base = ioremap(qoriq_ptp->rsrc->start,
-		       resource_size(qoriq_ptp->rsrc));
+	base = ioremap(ptp_qoriq->rsrc->start,
+		       resource_size(ptp_qoriq->rsrc));
 	if (!base) {
 		pr_err("ioremap ptp registers failed\n");
 		goto no_ioremap;
 	}
 
-	qoriq_ptp->base = base;
+	ptp_qoriq->base = base;
 
 	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
-		qoriq_ptp->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
-		qoriq_ptp->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
-		qoriq_ptp->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
-		qoriq_ptp->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
+		ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
+		ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
+		ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
+		ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
 	} else {
-		qoriq_ptp->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
-		qoriq_ptp->regs.alarm_regs = base + ALARM_REGS_OFFSET;
-		qoriq_ptp->regs.fiper_regs = base + FIPER_REGS_OFFSET;
-		qoriq_ptp->regs.etts_regs = base + ETTS_REGS_OFFSET;
+		ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
+		ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET;
+		ptp_qoriq->regs.fiper_regs = base + FIPER_REGS_OFFSET;
+		ptp_qoriq->regs.etts_regs = base + ETTS_REGS_OFFSET;
 	}
 
 	ktime_get_real_ts64(&now);
-	ptp_qoriq_settime(&qoriq_ptp->caps, &now);
+	ptp_qoriq_settime(&ptp_qoriq->caps, &now);
 
 	tmr_ctrl =
-	  (qoriq_ptp->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
-	  (qoriq_ptp->cksel & CKSEL_MASK) << CKSEL_SHIFT;
+	  (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
+	  (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT;
 
-	spin_lock_irqsave(&qoriq_ptp->lock, flags);
+	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-	regs = &qoriq_ptp->regs;
+	regs = &ptp_qoriq->regs;
 	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
-	qoriq_write(&regs->ctrl_regs->tmr_add,    qoriq_ptp->tmr_add);
-	qoriq_write(&regs->ctrl_regs->tmr_prsc,   qoriq_ptp->tmr_prsc);
-	qoriq_write(&regs->fiper_regs->tmr_fiper1, qoriq_ptp->tmr_fiper1);
-	qoriq_write(&regs->fiper_regs->tmr_fiper2, qoriq_ptp->tmr_fiper2);
-	set_alarm(qoriq_ptp);
+	qoriq_write(&regs->ctrl_regs->tmr_add,    ptp_qoriq->tmr_add);
+	qoriq_write(&regs->ctrl_regs->tmr_prsc,   ptp_qoriq->tmr_prsc);
+	qoriq_write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+	qoriq_write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
+	set_alarm(ptp_qoriq);
 	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
-	spin_unlock_irqrestore(&qoriq_ptp->lock, flags);
+	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
-	qoriq_ptp->clock = ptp_clock_register(&qoriq_ptp->caps, &dev->dev);
-	if (IS_ERR(qoriq_ptp->clock)) {
-		err = PTR_ERR(qoriq_ptp->clock);
+	ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev);
+	if (IS_ERR(ptp_qoriq->clock)) {
+		err = PTR_ERR(ptp_qoriq->clock);
 		goto no_clock;
 	}
-	qoriq_ptp->phc_index = ptp_clock_index(qoriq_ptp->clock);
+	ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock);
 
-	ptp_qoriq_create_debugfs(qoriq_ptp);
-	platform_set_drvdata(dev, qoriq_ptp);
+	ptp_qoriq_create_debugfs(ptp_qoriq);
+	platform_set_drvdata(dev, ptp_qoriq);
 
 	return 0;
 
 no_clock:
-	iounmap(qoriq_ptp->base);
+	iounmap(ptp_qoriq->base);
 no_ioremap:
-	release_resource(qoriq_ptp->rsrc);
+	release_resource(ptp_qoriq->rsrc);
 no_resource:
-	free_irq(qoriq_ptp->irq, qoriq_ptp);
+	free_irq(ptp_qoriq->irq, ptp_qoriq);
 no_config:
 no_node:
-	kfree(qoriq_ptp);
+	kfree(ptp_qoriq);
 no_memory:
 	return err;
 }
 
-static int qoriq_ptp_remove(struct platform_device *dev)
+static int ptp_qoriq_remove(struct platform_device *dev)
 {
-	struct qoriq_ptp *qoriq_ptp = platform_get_drvdata(dev);
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev);
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
 	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
 	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
 
-	ptp_qoriq_remove_debugfs(qoriq_ptp);
-	ptp_clock_unregister(qoriq_ptp->clock);
-	iounmap(qoriq_ptp->base);
-	release_resource(qoriq_ptp->rsrc);
-	free_irq(qoriq_ptp->irq, qoriq_ptp);
-	kfree(qoriq_ptp);
+	ptp_qoriq_remove_debugfs(ptp_qoriq);
+	ptp_clock_unregister(ptp_qoriq->clock);
+	iounmap(ptp_qoriq->base);
+	release_resource(ptp_qoriq->rsrc);
+	free_irq(ptp_qoriq->irq, ptp_qoriq);
+	kfree(ptp_qoriq);
 
 	return 0;
 }
@@ -616,16 +616,16 @@ static const struct of_device_id match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, match_table);
 
-static struct platform_driver qoriq_ptp_driver = {
+static struct platform_driver ptp_qoriq_driver = {
 	.driver = {
 		.name		= "ptp_qoriq",
 		.of_match_table	= match_table,
 	},
-	.probe       = qoriq_ptp_probe,
-	.remove      = qoriq_ptp_remove,
+	.probe       = ptp_qoriq_probe,
+	.remove      = ptp_qoriq_remove,
 };
 
-module_platform_driver(qoriq_ptp_driver);
+module_platform_driver(ptp_qoriq_driver);
 
 MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
 MODULE_DESCRIPTION("PTP clock for Freescale QorIQ 1588 timer");
diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c
index 970595021088..3a70daf03727 100644
--- a/drivers/ptp/ptp_qoriq_debugfs.c
+++ b/drivers/ptp/ptp_qoriq_debugfs.c
@@ -7,8 +7,8 @@
 
 static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
 {
-	struct qoriq_ptp *qoriq_ptp = data;
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = data;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
 	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
@@ -19,8 +19,8 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
 
 static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val)
 {
-	struct qoriq_ptp *qoriq_ptp = data;
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = data;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
 	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
@@ -38,8 +38,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper1_fops, ptp_qoriq_fiper1_lpbk_get,
 
 static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
 {
-	struct qoriq_ptp *qoriq_ptp = data;
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = data;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
 	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
@@ -50,8 +50,8 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
 
 static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val)
 {
-	struct qoriq_ptp *qoriq_ptp = data;
-	struct qoriq_ptp_registers *regs = &qoriq_ptp->regs;
+	struct ptp_qoriq *ptp_qoriq = data;
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
 	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
@@ -67,35 +67,35 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(ptp_qoriq_fiper2_fops, ptp_qoriq_fiper2_lpbk_get,
 			 ptp_qoriq_fiper2_lpbk_set, "%llu\n");
 
-void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq)
 {
 	struct dentry *root;
 
-	root = debugfs_create_dir(dev_name(qoriq_ptp->dev), NULL);
+	root = debugfs_create_dir(dev_name(ptp_qoriq->dev), NULL);
 	if (IS_ERR(root))
 		return;
 	if (!root)
 		goto err_root;
 
-	qoriq_ptp->debugfs_root = root;
+	ptp_qoriq->debugfs_root = root;
 
 	if (!debugfs_create_file_unsafe("fiper1-loopback", 0600, root,
-					qoriq_ptp, &ptp_qoriq_fiper1_fops))
+					ptp_qoriq, &ptp_qoriq_fiper1_fops))
 		goto err_node;
 	if (!debugfs_create_file_unsafe("fiper2-loopback", 0600, root,
-					qoriq_ptp, &ptp_qoriq_fiper2_fops))
+					ptp_qoriq, &ptp_qoriq_fiper2_fops))
 		goto err_node;
 	return;
 
 err_node:
 	debugfs_remove_recursive(root);
-	qoriq_ptp->debugfs_root = NULL;
+	ptp_qoriq->debugfs_root = NULL;
 err_root:
-	dev_err(qoriq_ptp->dev, "failed to initialize debugfs\n");
+	dev_err(ptp_qoriq->dev, "failed to initialize debugfs\n");
 }
 
-void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq)
 {
-	debugfs_remove_recursive(qoriq_ptp->debugfs_root);
-	qoriq_ptp->debugfs_root = NULL;
+	debugfs_remove_recursive(ptp_qoriq->debugfs_root);
+	ptp_qoriq->debugfs_root = NULL;
 }
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 94e9797e434c..c2a32d9ec6ba 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -49,7 +49,7 @@ struct etts_regs {
 	u32 tmr_etts2_l;  /* Timestamp of general purpose external trigger */
 };
 
-struct qoriq_ptp_registers {
+struct ptp_qoriq_registers {
 	struct ctrl_regs __iomem *ctrl_regs;
 	struct alarm_regs __iomem *alarm_regs;
 	struct fiper_regs __iomem *fiper_regs;
@@ -136,9 +136,9 @@ struct qoriq_ptp_registers {
 #define DEFAULT_FIPER1_PERIOD	1000000000
 #define DEFAULT_FIPER2_PERIOD	100000
 
-struct qoriq_ptp {
+struct ptp_qoriq {
 	void __iomem *base;
-	struct qoriq_ptp_registers regs;
+	struct ptp_qoriq_registers regs;
 	spinlock_t lock; /* protects regs */
 	struct ptp_clock *clock;
 	struct ptp_clock_info caps;
@@ -172,12 +172,12 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val)
 }
 
 #ifdef CONFIG_DEBUG_FS
-void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp);
-void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp);
+void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq);
+void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq);
 #else
-static inline void ptp_qoriq_create_debugfs(struct qoriq_ptp *qoriq_ptp)
+static inline void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq)
 { }
-static inline void ptp_qoriq_remove_debugfs(struct qoriq_ptp *qoriq_ptp)
+static inline void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq)
 { }
 #endif
 
-- 
cgit v1.2.3


From 73356e4ea895d5d4fb2bed30c32d3293b090f3ce Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 12 Feb 2019 12:23:57 +0800
Subject: ptp_qoriq: make ptp operations global

This patch is to make functions of ptp operations global,
so that ENETC PTP driver which is a PCI driver for same
1588 timer IP block could reuse them.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 27 ++++++++++++++++-----------
 include/linux/fsl/ptp_qoriq.h |  9 +++++++++
 2 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 8c10d0f8864f..1f3e73e62de9 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -22,7 +22,6 @@
 
 #include <linux/device.h>
 #include <linux/hrtimer.h>
-#include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -135,7 +134,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
  * Interrupt service routine
  */
 
-static irqreturn_t isr(int irq, void *priv)
+irqreturn_t ptp_qoriq_isr(int irq, void *priv)
 {
 	struct ptp_qoriq *ptp_qoriq = priv;
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
@@ -200,12 +199,13 @@ static irqreturn_t isr(int irq, void *priv)
 	} else
 		return IRQ_NONE;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_isr);
 
 /*
  * PTP clock operations
  */
 
-static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
 	u64 adj, diff;
 	u32 tmr_add;
@@ -233,8 +233,9 @@ static int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_adjfine);
 
-static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
+int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	s64 now;
 	unsigned long flags;
@@ -251,9 +252,9 @@ static int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_adjtime);
 
-static int ptp_qoriq_gettime(struct ptp_clock_info *ptp,
-			       struct timespec64 *ts)
+int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
 	u64 ns;
 	unsigned long flags;
@@ -269,9 +270,10 @@ static int ptp_qoriq_gettime(struct ptp_clock_info *ptp,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_gettime);
 
-static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
-			       const struct timespec64 *ts)
+int ptp_qoriq_settime(struct ptp_clock_info *ptp,
+		      const struct timespec64 *ts)
 {
 	u64 ns;
 	unsigned long flags;
@@ -288,9 +290,10 @@ static int ptp_qoriq_settime(struct ptp_clock_info *ptp,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_settime);
 
-static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
-			      struct ptp_clock_request *rq, int on)
+int ptp_qoriq_enable(struct ptp_clock_info *ptp,
+		     struct ptp_clock_request *rq, int on)
 {
 	struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps);
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
@@ -336,6 +339,7 @@ static int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(ptp_qoriq_enable);
 
 static const struct ptp_clock_info ptp_qoriq_caps = {
 	.owner		= THIS_MODULE,
@@ -508,7 +512,8 @@ static int ptp_qoriq_probe(struct platform_device *dev)
 		pr_err("irq not in device tree\n");
 		goto no_node;
 	}
-	if (request_irq(ptp_qoriq->irq, isr, IRQF_SHARED, DRIVER, ptp_qoriq)) {
+	if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED,
+			DRIVER, ptp_qoriq)) {
 		pr_err("request_irq failed\n");
 		goto no_node;
 	}
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index c2a32d9ec6ba..75e6f0523cb1 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -7,6 +7,7 @@
 #define __PTP_QORIQ_H__
 
 #include <linux/io.h>
+#include <linux/interrupt.h>
 #include <linux/ptp_clock_kernel.h>
 
 /*
@@ -171,6 +172,14 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val)
 	iowrite32be(val, addr);
 }
 
+irqreturn_t ptp_qoriq_isr(int irq, void *priv);
+int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
+int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta);
+int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts);
+int ptp_qoriq_settime(struct ptp_clock_info *ptp,
+		      const struct timespec64 *ts);
+int ptp_qoriq_enable(struct ptp_clock_info *ptp,
+		     struct ptp_clock_request *rq, int on);
 #ifdef CONFIG_DEBUG_FS
 void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq);
 void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq);
-- 
cgit v1.2.3


From ff54571a747bc1fc8e132ef9c512451ec6de3336 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 12 Feb 2019 12:23:58 +0800
Subject: ptp_qoriq: convert to use ptp_qoriq_init/free

Moved QorIQ PTP clock initialization/free into new functions
ptp_qoriq_init()/ptp_qoriq_free(). These functions could also
be reused by ENETC PTP drvier which is a PCI driver for same
1588 timer IP block.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 144 ++++++++++++++++++++++--------------------
 include/linux/fsl/ptp_qoriq.h |   3 +
 2 files changed, 80 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 1f3e73e62de9..db4f929ea4e9 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -458,25 +458,17 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq,
 	return 0;
 }
 
-static int ptp_qoriq_probe(struct platform_device *dev)
+int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
+		   const struct ptp_clock_info caps)
 {
-	struct device_node *node = dev->dev.of_node;
-	struct ptp_qoriq *ptp_qoriq;
+	struct device_node *node = ptp_qoriq->dev->of_node;
 	struct ptp_qoriq_registers *regs;
 	struct timespec64 now;
-	int err = -ENOMEM;
-	u32 tmr_ctrl;
 	unsigned long flags;
-	void __iomem *base;
-
-	ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
-	if (!ptp_qoriq)
-		goto no_memory;
-
-	err = -EINVAL;
+	u32 tmr_ctrl;
 
-	ptp_qoriq->dev = &dev->dev;
-	ptp_qoriq->caps = ptp_qoriq_caps;
+	ptp_qoriq->base = base;
+	ptp_qoriq->caps = caps;
 
 	if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel))
 		ptp_qoriq->cksel = DEFAULT_CKSEL;
@@ -501,44 +493,9 @@ static int ptp_qoriq_probe(struct platform_device *dev)
 		pr_warn("device tree node missing required elements, try automatic configuration\n");
 
 		if (ptp_qoriq_auto_config(ptp_qoriq, node))
-			goto no_config;
+			return -ENODEV;
 	}
 
-	err = -ENODEV;
-
-	ptp_qoriq->irq = platform_get_irq(dev, 0);
-
-	if (ptp_qoriq->irq < 0) {
-		pr_err("irq not in device tree\n");
-		goto no_node;
-	}
-	if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED,
-			DRIVER, ptp_qoriq)) {
-		pr_err("request_irq failed\n");
-		goto no_node;
-	}
-
-	ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	if (!ptp_qoriq->rsrc) {
-		pr_err("no resource\n");
-		goto no_resource;
-	}
-	if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) {
-		pr_err("resource busy\n");
-		goto no_resource;
-	}
-
-	spin_lock_init(&ptp_qoriq->lock);
-
-	base = ioremap(ptp_qoriq->rsrc->start,
-		       resource_size(ptp_qoriq->rsrc));
-	if (!base) {
-		pr_err("ioremap ptp registers failed\n");
-		goto no_ioremap;
-	}
-
-	ptp_qoriq->base = base;
-
 	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
 		ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
 		ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
@@ -558,6 +515,7 @@ static int ptp_qoriq_probe(struct platform_device *dev)
 	  (ptp_qoriq->tclk_period & TCLK_PERIOD_MASK) << TCLK_PERIOD_SHIFT |
 	  (ptp_qoriq->cksel & CKSEL_MASK) << CKSEL_SHIFT;
 
+	spin_lock_init(&ptp_qoriq->lock);
 	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
 	regs = &ptp_qoriq->regs;
@@ -571,16 +529,77 @@ static int ptp_qoriq_probe(struct platform_device *dev)
 
 	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
-	ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, &dev->dev);
-	if (IS_ERR(ptp_qoriq->clock)) {
-		err = PTR_ERR(ptp_qoriq->clock);
-		goto no_clock;
-	}
-	ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock);
+	ptp_qoriq->clock = ptp_clock_register(&ptp_qoriq->caps, ptp_qoriq->dev);
+	if (IS_ERR(ptp_qoriq->clock))
+		return PTR_ERR(ptp_qoriq->clock);
 
+	ptp_qoriq->phc_index = ptp_clock_index(ptp_qoriq->clock);
 	ptp_qoriq_create_debugfs(ptp_qoriq);
-	platform_set_drvdata(dev, ptp_qoriq);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ptp_qoriq_init);
+
+void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq)
+{
+	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
+
+	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
+	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
+
+	ptp_qoriq_remove_debugfs(ptp_qoriq);
+	ptp_clock_unregister(ptp_qoriq->clock);
+	iounmap(ptp_qoriq->base);
+	free_irq(ptp_qoriq->irq, ptp_qoriq);
+}
+EXPORT_SYMBOL_GPL(ptp_qoriq_free);
+
+static int ptp_qoriq_probe(struct platform_device *dev)
+{
+	struct ptp_qoriq *ptp_qoriq;
+	int err = -ENOMEM;
+	void __iomem *base;
 
+	ptp_qoriq = kzalloc(sizeof(*ptp_qoriq), GFP_KERNEL);
+	if (!ptp_qoriq)
+		goto no_memory;
+
+	ptp_qoriq->dev = &dev->dev;
+
+	err = -ENODEV;
+
+	ptp_qoriq->irq = platform_get_irq(dev, 0);
+	if (ptp_qoriq->irq < 0) {
+		pr_err("irq not in device tree\n");
+		goto no_node;
+	}
+	if (request_irq(ptp_qoriq->irq, ptp_qoriq_isr, IRQF_SHARED,
+			DRIVER, ptp_qoriq)) {
+		pr_err("request_irq failed\n");
+		goto no_node;
+	}
+
+	ptp_qoriq->rsrc = platform_get_resource(dev, IORESOURCE_MEM, 0);
+	if (!ptp_qoriq->rsrc) {
+		pr_err("no resource\n");
+		goto no_resource;
+	}
+	if (request_resource(&iomem_resource, ptp_qoriq->rsrc)) {
+		pr_err("resource busy\n");
+		goto no_resource;
+	}
+
+	base = ioremap(ptp_qoriq->rsrc->start,
+		       resource_size(ptp_qoriq->rsrc));
+	if (!base) {
+		pr_err("ioremap ptp registers failed\n");
+		goto no_ioremap;
+	}
+
+	err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps);
+	if (err)
+		goto no_clock;
+
+	platform_set_drvdata(dev, ptp_qoriq);
 	return 0;
 
 no_clock:
@@ -589,7 +608,6 @@ no_ioremap:
 	release_resource(ptp_qoriq->rsrc);
 no_resource:
 	free_irq(ptp_qoriq->irq, ptp_qoriq);
-no_config:
 no_node:
 	kfree(ptp_qoriq);
 no_memory:
@@ -599,18 +617,10 @@ no_memory:
 static int ptp_qoriq_remove(struct platform_device *dev)
 {
 	struct ptp_qoriq *ptp_qoriq = platform_get_drvdata(dev);
-	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
-	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
-
-	ptp_qoriq_remove_debugfs(ptp_qoriq);
-	ptp_clock_unregister(ptp_qoriq->clock);
-	iounmap(ptp_qoriq->base);
+	ptp_qoriq_free(ptp_qoriq);
 	release_resource(ptp_qoriq->rsrc);
-	free_irq(ptp_qoriq->irq, ptp_qoriq);
 	kfree(ptp_qoriq);
-
 	return 0;
 }
 
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 75e6f0523cb1..757aec385493 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -173,6 +173,9 @@ static inline void qoriq_write(unsigned __iomem *addr, u32 val)
 }
 
 irqreturn_t ptp_qoriq_isr(int irq, void *priv);
+int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
+		   const struct ptp_clock_info caps);
+void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq);
 int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
 int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta);
 int ptp_qoriq_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts);
-- 
cgit v1.2.3


From f038ddf25b80be90e1af9439935bdb66fdbf5e28 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 12 Feb 2019 12:23:59 +0800
Subject: ptp_qoriq: add little enadian support

There is QorIQ 1588 timer IP block on the new ENETC Ethernet
controller. However it uses little endian mode which is different
with before. This patch is to add little endian support for the
driver by using "little-endian" dts node property.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c         | 69 +++++++++++++++++++++++------------------
 drivers/ptp/ptp_qoriq_debugfs.c | 12 +++----
 include/linux/fsl/ptp_qoriq.h   | 21 +++++++++----
 3 files changed, 60 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index db4f929ea4e9..ed4dc398c57b 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -43,8 +43,8 @@ static u64 tmr_cnt_read(struct ptp_qoriq *ptp_qoriq)
 	u64 ns;
 	u32 lo, hi;
 
-	lo = qoriq_read(&regs->ctrl_regs->tmr_cnt_l);
-	hi = qoriq_read(&regs->ctrl_regs->tmr_cnt_h);
+	lo = ptp_qoriq->read(&regs->ctrl_regs->tmr_cnt_l);
+	hi = ptp_qoriq->read(&regs->ctrl_regs->tmr_cnt_h);
 	ns = ((u64) hi) << 32;
 	ns |= lo;
 	return ns;
@@ -57,8 +57,8 @@ static void tmr_cnt_write(struct ptp_qoriq *ptp_qoriq, u64 ns)
 	u32 hi = ns >> 32;
 	u32 lo = ns & 0xffffffff;
 
-	qoriq_write(&regs->ctrl_regs->tmr_cnt_l, lo);
-	qoriq_write(&regs->ctrl_regs->tmr_cnt_h, hi);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_cnt_l, lo);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_cnt_h, hi);
 }
 
 /* Caller must hold ptp_qoriq->lock. */
@@ -73,8 +73,8 @@ static void set_alarm(struct ptp_qoriq *ptp_qoriq)
 	ns -= ptp_qoriq->tclk_period;
 	hi = ns >> 32;
 	lo = ns & 0xffffffff;
-	qoriq_write(&regs->alarm_regs->tmr_alarm1_l, lo);
-	qoriq_write(&regs->alarm_regs->tmr_alarm1_h, hi);
+	ptp_qoriq->write(&regs->alarm_regs->tmr_alarm1_l, lo);
+	ptp_qoriq->write(&regs->alarm_regs->tmr_alarm1_h, hi);
 }
 
 /* Caller must hold ptp_qoriq->lock. */
@@ -83,8 +83,8 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq)
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
 	set_alarm(ptp_qoriq);
-	qoriq_write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
-	qoriq_write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
+	ptp_qoriq->write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+	ptp_qoriq->write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
 }
 
 static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
@@ -115,8 +115,8 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
 	event.index = index;
 
 	do {
-		lo = qoriq_read(reg_etts_l);
-		hi = qoriq_read(reg_etts_h);
+		lo = ptp_qoriq->read(reg_etts_l);
+		hi = ptp_qoriq->read(reg_etts_h);
 
 		if (update_event) {
 			event.timestamp = ((u64) hi) << 32;
@@ -124,7 +124,7 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
 			ptp_clock_event(ptp_qoriq->clock, &event);
 		}
 
-		stat = qoriq_read(&regs->ctrl_regs->tmr_stat);
+		stat = ptp_qoriq->read(&regs->ctrl_regs->tmr_stat);
 	} while (ptp_qoriq->extts_fifo_support && (stat & valid));
 
 	return 0;
@@ -144,8 +144,8 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv)
 
 	spin_lock(&ptp_qoriq->lock);
 
-	val = qoriq_read(&regs->ctrl_regs->tmr_tevent);
-	mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+	val = ptp_qoriq->read(&regs->ctrl_regs->tmr_tevent);
+	mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
 
 	spin_unlock(&ptp_qoriq->lock);
 
@@ -173,14 +173,14 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv)
 			ns = ptp_qoriq->alarm_value + ptp_qoriq->alarm_interval;
 			hi = ns >> 32;
 			lo = ns & 0xffffffff;
-			qoriq_write(&regs->alarm_regs->tmr_alarm2_l, lo);
-			qoriq_write(&regs->alarm_regs->tmr_alarm2_h, hi);
+			ptp_qoriq->write(&regs->alarm_regs->tmr_alarm2_l, lo);
+			ptp_qoriq->write(&regs->alarm_regs->tmr_alarm2_h, hi);
 			ptp_qoriq->alarm_value = ns;
 		} else {
 			spin_lock(&ptp_qoriq->lock);
-			mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+			mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
 			mask &= ~ALM2EN;
-			qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
+			ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, mask);
 			spin_unlock(&ptp_qoriq->lock);
 			ptp_qoriq->alarm_value = 0;
 			ptp_qoriq->alarm_interval = 0;
@@ -194,7 +194,7 @@ irqreturn_t ptp_qoriq_isr(int irq, void *priv)
 	}
 
 	if (ack) {
-		qoriq_write(&regs->ctrl_regs->tmr_tevent, ack);
+		ptp_qoriq->write(&regs->ctrl_regs->tmr_tevent, ack);
 		return IRQ_HANDLED;
 	} else
 		return IRQ_NONE;
@@ -229,7 +229,7 @@ int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 
 	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
-	qoriq_write(&regs->ctrl_regs->tmr_add, tmr_add);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_add, tmr_add);
 
 	return 0;
 }
@@ -326,15 +326,15 @@ int ptp_qoriq_enable(struct ptp_clock_info *ptp,
 
 	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
-	mask = qoriq_read(&regs->ctrl_regs->tmr_temask);
+	mask = ptp_qoriq->read(&regs->ctrl_regs->tmr_temask);
 	if (on) {
 		mask |= bit;
-		qoriq_write(&regs->ctrl_regs->tmr_tevent, bit);
+		ptp_qoriq->write(&regs->ctrl_regs->tmr_tevent, bit);
 	} else {
 		mask &= ~bit;
 	}
 
-	qoriq_write(&regs->ctrl_regs->tmr_temask, mask);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, mask);
 
 	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 	return 0;
@@ -496,6 +496,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
 			return -ENODEV;
 	}
 
+	if (of_property_read_bool(node, "little-endian")) {
+		ptp_qoriq->read = qoriq_read_le;
+		ptp_qoriq->write = qoriq_write_le;
+	} else {
+		ptp_qoriq->read = qoriq_read_be;
+		ptp_qoriq->write = qoriq_write_be;
+	}
+
 	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
 		ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
 		ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
@@ -519,13 +527,14 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
 	spin_lock_irqsave(&ptp_qoriq->lock, flags);
 
 	regs = &ptp_qoriq->regs;
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl);
-	qoriq_write(&regs->ctrl_regs->tmr_add,    ptp_qoriq->tmr_add);
-	qoriq_write(&regs->ctrl_regs->tmr_prsc,   ptp_qoriq->tmr_prsc);
-	qoriq_write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
-	qoriq_write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, tmr_ctrl);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_add, ptp_qoriq->tmr_add);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_prsc, ptp_qoriq->tmr_prsc);
+	ptp_qoriq->write(&regs->fiper_regs->tmr_fiper1, ptp_qoriq->tmr_fiper1);
+	ptp_qoriq->write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
 	set_alarm(ptp_qoriq);
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl,
+			 tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
 	spin_unlock_irqrestore(&ptp_qoriq->lock, flags);
 
@@ -543,8 +552,8 @@ void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq)
 {
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 
-	qoriq_write(&regs->ctrl_regs->tmr_temask, 0);
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl,   0);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_temask, 0);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl,   0);
 
 	ptp_qoriq_remove_debugfs(ptp_qoriq);
 	ptp_clock_unregister(ptp_qoriq->clock);
diff --git a/drivers/ptp/ptp_qoriq_debugfs.c b/drivers/ptp/ptp_qoriq_debugfs.c
index 3a70daf03727..e8dddcedf288 100644
--- a/drivers/ptp/ptp_qoriq_debugfs.c
+++ b/drivers/ptp/ptp_qoriq_debugfs.c
@@ -11,7 +11,7 @@ static int ptp_qoriq_fiper1_lpbk_get(void *data, u64 *val)
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
-	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
 	*val = ctrl & PP1L ? 1 : 0;
 
 	return 0;
@@ -23,13 +23,13 @@ static int ptp_qoriq_fiper1_lpbk_set(void *data, u64 val)
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
-	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
 	if (val == 0)
 		ctrl &= ~PP1L;
 	else
 		ctrl |= PP1L;
 
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, ctrl);
 	return 0;
 }
 
@@ -42,7 +42,7 @@ static int ptp_qoriq_fiper2_lpbk_get(void *data, u64 *val)
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
-	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
 	*val = ctrl & PP2L ? 1 : 0;
 
 	return 0;
@@ -54,13 +54,13 @@ static int ptp_qoriq_fiper2_lpbk_set(void *data, u64 val)
 	struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
 	u32 ctrl;
 
-	ctrl = qoriq_read(&regs->ctrl_regs->tmr_ctrl);
+	ctrl = ptp_qoriq->read(&regs->ctrl_regs->tmr_ctrl);
 	if (val == 0)
 		ctrl &= ~PP2L;
 	else
 		ctrl |= PP2L;
 
-	qoriq_write(&regs->ctrl_regs->tmr_ctrl, ctrl);
+	ptp_qoriq->write(&regs->ctrl_regs->tmr_ctrl, ctrl);
 	return 0;
 }
 
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 757aec385493..1f8bb6a6a121 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -157,21 +157,30 @@ struct ptp_qoriq {
 	u32 cksel;
 	u32 tmr_fiper1;
 	u32 tmr_fiper2;
+	u32 (*read)(unsigned __iomem *addr);
+	void (*write)(unsigned __iomem *addr, u32 val);
 };
 
-static inline u32 qoriq_read(unsigned __iomem *addr)
+static inline u32 qoriq_read_be(unsigned __iomem *addr)
 {
-	u32 val;
-
-	val = ioread32be(addr);
-	return val;
+	return ioread32be(addr);
 }
 
-static inline void qoriq_write(unsigned __iomem *addr, u32 val)
+static inline void qoriq_write_be(unsigned __iomem *addr, u32 val)
 {
 	iowrite32be(val, addr);
 }
 
+static inline u32 qoriq_read_le(unsigned __iomem *addr)
+{
+	return ioread32(addr);
+}
+
+static inline void qoriq_write_le(unsigned __iomem *addr, u32 val)
+{
+	iowrite32(val, addr);
+}
+
 irqreturn_t ptp_qoriq_isr(int irq, void *priv);
 int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
 		   const struct ptp_clock_info caps);
-- 
cgit v1.2.3


From d4e176870bffde373d9688c54aad8f92b3394ba6 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Tue, 12 Feb 2019 12:24:01 +0800
Subject: ptp_qoriq: fix register memory map

The 1588 timer on eTSEC Ethernet controller uses different
register memory map with DPAA Ethernet controller.
Now the new ENETC Ethernet controller uses same reigster
memory map with DPAA. To support ENETC, let's use register
memory map of DPAA/ENETC in default.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_qoriq.c       | 11 ++++++-----
 include/linux/fsl/ptp_qoriq.h | 18 +++++++++---------
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index ed4dc398c57b..42d3654f77f0 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -504,11 +504,12 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
 		ptp_qoriq->write = qoriq_write_be;
 	}
 
-	if (of_device_is_compatible(node, "fsl,fman-ptp-timer")) {
-		ptp_qoriq->regs.ctrl_regs = base + FMAN_CTRL_REGS_OFFSET;
-		ptp_qoriq->regs.alarm_regs = base + FMAN_ALARM_REGS_OFFSET;
-		ptp_qoriq->regs.fiper_regs = base + FMAN_FIPER_REGS_OFFSET;
-		ptp_qoriq->regs.etts_regs = base + FMAN_ETTS_REGS_OFFSET;
+	/* The eTSEC uses differnt memory map with DPAA/ENETC */
+	if (of_device_is_compatible(node, "fsl,etsec-ptp")) {
+		ptp_qoriq->regs.ctrl_regs = base + ETSEC_CTRL_REGS_OFFSET;
+		ptp_qoriq->regs.alarm_regs = base + ETSEC_ALARM_REGS_OFFSET;
+		ptp_qoriq->regs.fiper_regs = base + ETSEC_FIPER_REGS_OFFSET;
+		ptp_qoriq->regs.etts_regs = base + ETSEC_ETTS_REGS_OFFSET;
 	} else {
 		ptp_qoriq->regs.ctrl_regs = base + CTRL_REGS_OFFSET;
 		ptp_qoriq->regs.alarm_regs = base + ALARM_REGS_OFFSET;
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 1f8bb6a6a121..f127adb71041 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -58,15 +58,15 @@ struct ptp_qoriq_registers {
 };
 
 /* Offset definitions for the four register groups */
-#define CTRL_REGS_OFFSET	0x0
-#define ALARM_REGS_OFFSET	0x40
-#define FIPER_REGS_OFFSET	0x80
-#define ETTS_REGS_OFFSET	0xa0
-
-#define FMAN_CTRL_REGS_OFFSET	0x80
-#define FMAN_ALARM_REGS_OFFSET	0xb8
-#define FMAN_FIPER_REGS_OFFSET	0xd0
-#define FMAN_ETTS_REGS_OFFSET	0xe0
+#define ETSEC_CTRL_REGS_OFFSET	0x0
+#define ETSEC_ALARM_REGS_OFFSET	0x40
+#define ETSEC_FIPER_REGS_OFFSET	0x80
+#define ETSEC_ETTS_REGS_OFFSET	0xa0
+
+#define CTRL_REGS_OFFSET	0x80
+#define ALARM_REGS_OFFSET	0xb8
+#define FIPER_REGS_OFFSET	0xd0
+#define ETTS_REGS_OFFSET	0xe0
 
 
 /* Bit definitions for the TMR_CTRL register */
-- 
cgit v1.2.3


From c266f64dbfa2a970a13b0574246c0ddfec492365 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:32 +0200
Subject: net: sched: protect block state with mutex

Currently, tcf_block doesn't use any synchronization mechanisms to protect
critical sections that manage lifetime of its chains. block->chain_list and
multiple variables in tcf_chain that control its lifetime assume external
synchronization provided by global rtnl lock. Converting chain reference
counting to atomic reference counters is not possible because cls API uses
multiple counters and flags to control chain lifetime, so all of them must
be synchronized in chain get/put code.

Use single per-block lock to protect block data and manage lifetime of all
chains on the block. Always take block->lock when accessing chain_list.
Chain get and put modify chain lifetime-management data and parent block's
chain_list, so take the lock in these functions. Verify block->lock state
with assertions in functions that expect to be called with the lock taken
and are called from multiple places. Take block->lock when accessing
filter_chain_list.

In order to allow parallel update of rules on single block, move all calls
to classifiers outside of critical sections protected by new block->lock.
Rearrange chain get and put functions code to only access protected chain
data while holding block lock:
- Rearrange code to only access chain reference counter and chain action
  reference counter while holding block lock.
- Extract code that requires block->lock from tcf_chain_destroy() into
  standalone tcf_chain_destroy() function that is called by
  __tcf_chain_put() in same critical section that changes chain reference
  counters.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  5 +++
 net/sched/cls_api.c       | 84 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 76 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7a4957599874..31b8ea66a47d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -12,6 +12,7 @@
 #include <linux/list.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
+#include <linux/mutex.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -352,6 +353,10 @@ struct tcf_chain {
 };
 
 struct tcf_block {
+	/* Lock protects tcf_block and lifetime-management data of chains
+	 * attached to the block (refcnt, action_refcnt, explicitly_created).
+	 */
+	struct mutex lock;
 	struct list_head chain_list;
 	u32 index; /* block index for shared blocks */
 	refcount_t refcnt;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 02cf6d2fa0e1..806e7158a7e8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -201,6 +201,9 @@ static void tcf_proto_destroy(struct tcf_proto *tp,
 	kfree_rcu(tp, rcu);
 }
 
+#define ASSERT_BLOCK_LOCKED(block)					\
+	lockdep_assert_held(&(block)->lock)
+
 struct tcf_filter_chain_list_item {
 	struct list_head list;
 	tcf_chain_head_change_t *chain_head_change;
@@ -212,6 +215,8 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 	if (!chain)
 		return NULL;
@@ -243,25 +248,51 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
 		tcf_chain_head_change_item(item, tp_head);
 }
 
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
 {
 	struct tcf_block *block = chain->block;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_del(&chain->list);
 	if (!chain->index)
 		block->chain0.chain = NULL;
+
+	if (list_empty(&block->chain_list) &&
+	    refcount_read(&block->refcnt) == 0)
+		return true;
+
+	return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+	mutex_destroy(&block->lock);
+	kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+	struct tcf_block *block = chain->block;
+
 	kfree(chain);
-	if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
-		kfree_rcu(block, rcu);
+	if (free_block)
+		tcf_block_destroy(block);
 }
 
 static void tcf_chain_hold(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	++chain->refcnt;
 }
 
 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	/* In case all the references are action references, this
 	 * chain should not be shown to the user.
 	 */
@@ -273,6 +304,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_for_each_entry(chain, &block->chain_list, list) {
 		if (chain->index == chain_index)
 			return chain;
@@ -287,31 +320,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
 					 u32 chain_index, bool create,
 					 bool by_act)
 {
-	struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+	struct tcf_chain *chain = NULL;
+	bool is_first_reference;
 
+	mutex_lock(&block->lock);
+	chain = tcf_chain_lookup(block, chain_index);
 	if (chain) {
 		tcf_chain_hold(chain);
 	} else {
 		if (!create)
-			return NULL;
+			goto errout;
 		chain = tcf_chain_create(block, chain_index);
 		if (!chain)
-			return NULL;
+			goto errout;
 	}
 
 	if (by_act)
 		++chain->action_refcnt;
+	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+	mutex_unlock(&block->lock);
 
 	/* Send notification only in case we got the first
 	 * non-action reference. Until then, the chain acts only as
 	 * a placeholder for actions pointing to it and user ought
 	 * not know about them.
 	 */
-	if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+	if (is_first_reference && !by_act)
 		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 				RTM_NEWCHAIN, false);
 
 	return chain;
+
+errout:
+	mutex_unlock(&block->lock);
+	return chain;
 }
 
 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -330,17 +372,31 @@ static void tc_chain_tmplt_del(struct tcf_chain *chain);
 
 static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
 {
+	struct tcf_block *block = chain->block;
+	bool is_last, free_block = false;
+	unsigned int refcnt;
+
+	mutex_lock(&block->lock);
 	if (by_act)
 		chain->action_refcnt--;
-	chain->refcnt--;
+
+	/* tc_chain_notify_delete can't be called while holding block lock.
+	 * However, when block is unlocked chain can be changed concurrently, so
+	 * save these to temporary variables.
+	 */
+	refcnt = --chain->refcnt;
+	is_last = refcnt - chain->action_refcnt == 0;
+	if (refcnt == 0)
+		free_block = tcf_chain_detach(chain);
+	mutex_unlock(&block->lock);
 
 	/* The last dropped non-action reference will trigger notification. */
-	if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
+	if (is_last && !by_act)
 		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
 
-	if (chain->refcnt == 0) {
+	if (refcnt == 0) {
 		tc_chain_tmplt_del(chain);
-		tcf_chain_destroy(chain);
+		tcf_chain_destroy(chain, free_block);
 	}
 }
 
@@ -772,6 +828,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
 		return ERR_PTR(-ENOMEM);
 	}
+	mutex_init(&block->lock);
 	INIT_LIST_HEAD(&block->chain_list);
 	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
@@ -835,7 +892,7 @@ static void tcf_block_put_all_chains(struct tcf_block *block)
 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 			    struct tcf_block_ext_info *ei)
 {
-	if (refcount_dec_and_test(&block->refcnt)) {
+	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
 		/* Flushing/putting all chains will cause the block to be
 		 * deallocated when last chain is freed. However, if chain_list
 		 * is empty, block has to be manually deallocated. After block
@@ -844,6 +901,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		 */
 		bool free_block = list_empty(&block->chain_list);
 
+		mutex_unlock(&block->lock);
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, block->net);
 		if (!free_block)
@@ -853,7 +911,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 			tcf_block_offload_unbind(block, q, ei);
 
 		if (free_block)
-			kfree_rcu(block, rcu);
+			tcf_block_destroy(block);
 		else
 			tcf_block_put_all_chains(block);
 	} else if (q) {
-- 
cgit v1.2.3


From bbf73830cd48cff1599811d4f69c7cfd49c7b869 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:36 +0200
Subject: net: sched: traverse chains in block with tcf_get_next_chain()

All users of block->chain_list rely on rtnl lock and assume that no new
chains are added when traversing the list. Use tcf_get_next_chain() to
traverse chain list without relying on rtnl mutex. This function iterates
over chains by taking reference to current iterator chain only and doesn't
assume external synchronization of chain list.

Don't take reference to all chains in block when flushing and use
tcf_get_next_chain() to safely iterate over chain list instead. Remove
tcf_block_put_all_chains() that is no longer used.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  2 ++
 net/sched/cls_api.c   | 96 +++++++++++++++++++++++++++++++++++++--------------
 net/sched/sch_api.c   |  4 ++-
 3 files changed, 76 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index cb8be396a11f..38bee7dd21d1 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -44,6 +44,8 @@ bool tcf_queue_work(struct rcu_work *rwork, work_func_t func);
 struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 				       u32 chain_index);
 void tcf_chain_put_by_act(struct tcf_chain *chain);
+struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
+				     struct tcf_chain *chain);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 869ae44d7631..8e2ac785f6fd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -883,28 +883,62 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
 	return block;
 }
 
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-	struct tcf_chain *chain;
+	mutex_lock(&block->lock);
+	if (chain)
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+	else
+		chain = list_first_entry_or_null(&block->chain_list,
+						 struct tcf_chain, list);
 
-	/* Hold a refcnt for all chains, so that they don't disappear
-	 * while we are iterating.
-	 */
-	list_for_each_entry(chain, &block->chain_list, list)
+	/* skip all action-only chains */
+	while (chain && tcf_chain_held_by_acts_only(chain))
+		chain = list_is_last(&chain->list, &block->chain_list) ?
+			NULL : list_next_entry(chain, list);
+
+	if (chain)
 		tcf_chain_hold(chain);
+	mutex_unlock(&block->lock);
 
-	list_for_each_entry(chain, &block->chain_list, list)
-		tcf_chain_flush(chain);
+	return chain;
 }
 
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 {
-	struct tcf_chain *chain, *tmp;
+	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
 
-	/* At this point, all the chains should have refcnt >= 1. */
-	list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
-		tcf_chain_put_explicitly_created(chain);
+	if (chain)
 		tcf_chain_put(chain);
+
+	return chain_next;
+}
+EXPORT_SYMBOL(tcf_get_next_chain);
+
+static void tcf_block_flush_all_chains(struct tcf_block *block)
+{
+	struct tcf_chain *chain;
+
+	/* Last reference to block. At this point chains cannot be added or
+	 * removed concurrently.
+	 */
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
+		tcf_chain_put_explicitly_created(chain);
+		tcf_chain_flush(chain);
 	}
 }
 
@@ -923,8 +957,6 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		mutex_unlock(&block->lock);
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, block->net);
-		if (!free_block)
-			tcf_block_flush_all_chains(block);
 
 		if (q)
 			tcf_block_offload_unbind(block, q, ei);
@@ -932,7 +964,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		if (free_block)
 			tcf_block_destroy(block);
 		else
-			tcf_block_put_all_chains(block);
+			tcf_block_flush_all_chains(block);
 	} else if (q) {
 		tcf_block_offload_unbind(block, q, ei);
 	}
@@ -1266,11 +1298,15 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 			    void *cb_priv, bool add, bool offload_in_use,
 			    struct netlink_ext_ack *extack)
 {
-	struct tcf_chain *chain;
+	struct tcf_chain *chain, *chain_prev;
 	struct tcf_proto *tp;
 	int err;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		for (tp = rtnl_dereference(chain->filter_chain); tp;
 		     tp = rtnl_dereference(tp->next)) {
 			if (tp->ops->reoffload) {
@@ -1289,6 +1325,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 	return 0;
 
 err_playback_remove:
+	tcf_chain_put(chain);
 	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
 				    extack);
 	return err;
@@ -2023,11 +2060,11 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcf_chain *chain, *chain_prev;
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	long index_start;
 	long index;
@@ -2091,12 +2128,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	index_start = cb->args[0];
 	index = 0;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		if (tca[TCA_CHAIN] &&
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
 		if (!tcf_chain_dump(chain, q, parent, skb, cb,
 				    index_start, &index)) {
+			tcf_chain_put(chain);
 			err = -EMSGSIZE;
 			break;
 		}
@@ -2364,11 +2406,11 @@ errout_block_locked:
 /* called with RTNL */
 static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct tcf_chain *chain, *chain_prev;
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
-	struct tcf_chain *chain;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
 	long index_start;
 	long index;
@@ -2432,7 +2474,11 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	index_start = cb->args[0];
 	index = 0;
 
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = __tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain_prev = chain,
+		     chain = __tcf_get_next_chain(block, chain),
+		     tcf_chain_put(chain_prev)) {
 		if ((tca[TCA_CHAIN] &&
 		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
 			continue;
@@ -2440,14 +2486,14 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 			index++;
 			continue;
 		}
-		if (tcf_chain_held_by_acts_only(chain))
-			continue;
 		err = tc_chain_fill_node(chain, net, skb, block,
 					 NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					 RTM_NEWCHAIN);
-		if (err <= 0)
+		if (err <= 0) {
+			tcf_chain_put(chain);
 			break;
+		}
 		index++;
 	}
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 03e26e8d0ec9..80058abc729f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1909,7 +1909,9 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
 	block = cops->tcf_block(q, cl, NULL);
 	if (!block)
 		return;
-	list_for_each_entry(chain, &block->chain_list, list) {
+	for (chain = tcf_get_next_chain(block, NULL);
+	     chain;
+	     chain = tcf_get_next_chain(block, chain)) {
 		struct tcf_proto *tp;
 
 		for (tp = rtnl_dereference(chain->filter_chain);
-- 
cgit v1.2.3


From ed76f5edccc98fa66f2337f0b3b255d6e1a568b7 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:38 +0200
Subject: net: sched: protect filter_chain list with filter_chain_lock mutex

Extend tcf_chain with new filter_chain_lock mutex. Always lock the chain
when accessing filter_chain list, instead of relying on rtnl lock.
Dereference filter_chain with tcf_chain_dereference() lockdep macro to
verify that all users of chain_list have the lock taken.

Rearrange tp insert/remove code in tc_new_tfilter/tc_del_tfilter to execute
all necessary code while holding chain lock in order to prevent
invalidation of chain_info structure by potential concurrent change. This
also serializes calls to tcf_chain0_head_change(), which allows head change
callbacks to rely on filter_chain_lock for synchronization instead of rtnl
mutex.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  17 +++++++
 net/sched/cls_api.c       | 111 +++++++++++++++++++++++++++++++++-------------
 net/sched/sch_generic.c   |   6 ++-
 3 files changed, 101 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 31b8ea66a47d..85993d7efee6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -341,6 +341,8 @@ struct qdisc_skb_cb {
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
 
 struct tcf_chain {
+	/* Protects filter_chain. */
+	struct mutex filter_chain_lock;
 	struct tcf_proto __rcu *filter_chain;
 	struct list_head list;
 	struct tcf_block *block;
@@ -374,6 +376,21 @@ struct tcf_block {
 	struct rcu_head rcu;
 };
 
+#ifdef CONFIG_PROVE_LOCKING
+static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
+{
+	return lockdep_is_held(&chain->filter_chain_lock);
+}
+#else
+static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
+{
+	return true;
+}
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+
+#define tcf_chain_dereference(p, chain)					\
+	rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
+
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 {
 	if (*flags & TCA_CLS_FLAGS_IN_HW)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0dcce8b0c7b4..3fce30ae9a9b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -221,6 +221,7 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 	if (!chain)
 		return NULL;
 	list_add_tail(&chain->list, &block->chain_list);
+	mutex_init(&chain->filter_chain_lock);
 	chain->block = block;
 	chain->index = chain_index;
 	chain->refcnt = 1;
@@ -280,6 +281,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
 {
 	struct tcf_block *block = chain->block;
 
+	mutex_destroy(&chain->filter_chain_lock);
 	kfree(chain);
 	if (free_block)
 		tcf_block_destroy(block);
@@ -443,9 +445,13 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 
 static void tcf_chain_flush(struct tcf_chain *chain)
 {
-	struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+	struct tcf_proto *tp;
 
+	mutex_lock(&chain->filter_chain_lock);
+	tp = tcf_chain_dereference(chain->filter_chain, chain);
 	tcf_chain0_head_change(chain, NULL);
+	mutex_unlock(&chain->filter_chain_lock);
+
 	while (tp) {
 		RCU_INIT_POINTER(chain->filter_chain, tp->next);
 		tcf_proto_destroy(tp, NULL);
@@ -785,11 +791,29 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
 
 	mutex_lock(&block->lock);
 	chain0 = block->chain0.chain;
-	if (chain0 && chain0->filter_chain)
-		tcf_chain_head_change_item(item, chain0->filter_chain);
-	list_add(&item->list, &block->chain0.filter_chain_list);
+	if (chain0)
+		tcf_chain_hold(chain0);
+	else
+		list_add(&item->list, &block->chain0.filter_chain_list);
 	mutex_unlock(&block->lock);
 
+	if (chain0) {
+		struct tcf_proto *tp_head;
+
+		mutex_lock(&chain0->filter_chain_lock);
+
+		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+		if (tp_head)
+			tcf_chain_head_change_item(item, tp_head);
+
+		mutex_lock(&block->lock);
+		list_add(&item->list, &block->chain0.filter_chain_list);
+		mutex_unlock(&block->lock);
+
+		mutex_unlock(&chain0->filter_chain_lock);
+		tcf_chain_put(chain0);
+	}
+
 	return 0;
 }
 
@@ -1464,9 +1488,10 @@ struct tcf_chain_info {
 	struct tcf_proto __rcu *next;
 };
 
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info)
 {
-	return rtnl_dereference(*chain_info->pprev);
+	return tcf_chain_dereference(*chain_info->pprev, chain);
 }
 
 static void tcf_chain_tp_insert(struct tcf_chain *chain,
@@ -1475,7 +1500,7 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
 {
 	if (*chain_info->pprev == chain->filter_chain)
 		tcf_chain0_head_change(chain, tp);
-	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
 	tcf_chain_hold(chain);
 }
@@ -1484,7 +1509,7 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 				struct tcf_chain_info *chain_info,
 				struct tcf_proto *tp)
 {
-	struct tcf_proto *next = rtnl_dereference(chain_info->next);
+	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
 
 	if (tp == chain->filter_chain)
 		tcf_chain0_head_change(chain, next);
@@ -1502,7 +1527,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 
 	/* Check the chain for existence of proto-tcf with this priority */
 	for (pprev = &chain->filter_chain;
-	     (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+	     (tp = tcf_chain_dereference(*pprev, chain));
+	     pprev = &tp->next) {
 		if (tp->prio >= prio) {
 			if (tp->prio == prio) {
 				if (prio_allocate ||
@@ -1710,12 +1736,13 @@ replay:
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, prio_allocate);
 	if (IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = PTR_ERR(tp);
-		goto errout;
+		goto errout_locked;
 	}
 
 	if (tp == NULL) {
@@ -1724,29 +1751,37 @@ replay:
 		if (tca[TCA_KIND] == NULL || !protocol) {
 			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
 			err = -EINVAL;
-			goto errout;
+			goto errout_locked;
 		}
 
 		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
 			err = -ENOENT;
-			goto errout;
+			goto errout_locked;
 		}
 
 		if (prio_allocate)
-			prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+							       &chain_info));
 
+		mutex_unlock(&chain->filter_chain_lock);
 		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
 				      protocol, prio, chain, extack);
 		if (IS_ERR(tp)) {
 			err = PTR_ERR(tp);
 			goto errout;
 		}
+
+		mutex_lock(&chain->filter_chain_lock);
+		tcf_chain_tp_insert(chain, &chain_info, tp);
+		mutex_unlock(&chain->filter_chain_lock);
 		tp_created = 1;
 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
 		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
 		err = -EINVAL;
-		goto errout;
+		goto errout_locked;
+	} else {
+		mutex_unlock(&chain->filter_chain_lock);
 	}
 
 	fh = tp->ops->get(tp, t->tcm_handle);
@@ -1772,15 +1807,11 @@ replay:
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
 			      extack);
-	if (err == 0) {
-		if (tp_created)
-			tcf_chain_tp_insert(chain, &chain_info, tp);
+	if (err == 0)
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_NEWTFILTER, false);
-	} else {
-		if (tp_created)
-			tcf_proto_destroy(tp, NULL);
-	}
+	else if (tp_created)
+		tcf_proto_destroy(tp, NULL);
 
 errout:
 	if (chain)
@@ -1790,6 +1821,10 @@ errout:
 		/* Replay the request. */
 		goto replay;
 	return err;
+
+errout_locked:
+	mutex_unlock(&chain->filter_chain_lock);
+	goto errout;
 }
 
 static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1865,31 +1900,34 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, false);
 	if (!tp || IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = tp ? PTR_ERR(tp) : -ENOENT;
-		goto errout;
+		goto errout_locked;
 	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
 		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
 		err = -EINVAL;
+		goto errout_locked;
+	} else if (t->tcm_handle == 0) {
+		tcf_chain_tp_remove(chain, &chain_info, tp);
+		mutex_unlock(&chain->filter_chain_lock);
+
+		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+			       RTM_DELTFILTER, false);
+		tcf_proto_destroy(tp, extack);
+		err = 0;
 		goto errout;
 	}
+	mutex_unlock(&chain->filter_chain_lock);
 
 	fh = tp->ops->get(tp, t->tcm_handle);
 
 	if (!fh) {
-		if (t->tcm_handle == 0) {
-			tcf_chain_tp_remove(chain, &chain_info, tp);
-			tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-				       RTM_DELTFILTER, false);
-			tcf_proto_destroy(tp, extack);
-			err = 0;
-		} else {
-			NL_SET_ERR_MSG(extack, "Specified filter handle not found");
-			err = -ENOENT;
-		}
+		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+		err = -ENOENT;
 	} else {
 		bool last;
 
@@ -1899,7 +1937,10 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		if (err)
 			goto errout;
 		if (last) {
+			mutex_lock(&chain->filter_chain_lock);
 			tcf_chain_tp_remove(chain, &chain_info, tp);
+			mutex_unlock(&chain->filter_chain_lock);
+
 			tcf_proto_destroy(tp, extack);
 		}
 	}
@@ -1909,6 +1950,10 @@ errout:
 		tcf_chain_put(chain);
 	tcf_block_release(q, block);
 	return err;
+
+errout_locked:
+	mutex_unlock(&chain->filter_chain_lock);
+	goto errout;
 }
 
 static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1966,8 +2011,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		goto errout;
 	}
 
+	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
 			       prio, false);
+	mutex_unlock(&chain->filter_chain_lock);
 	if (!tp || IS_ERR(tp)) {
 		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
 		err = tp ? PTR_ERR(tp) : -ENOENT;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 66ba2ce2320f..e24568f9246c 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1366,7 +1366,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head)
 void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
 			  struct tcf_proto *tp_head)
 {
-	struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+	/* Protected with chain0->filter_chain_lock.
+	 * Can't access chain directly because tp_head can be NULL.
+	 */
+	struct mini_Qdisc *miniq_old =
+		rcu_dereference_protected(*miniqp->p_miniq, 1);
 	struct mini_Qdisc *miniq;
 
 	if (!tp_head) {
-- 
cgit v1.2.3


From 4dbfa766440c6dfe3d10f077cde966a7d11b58f1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:39 +0200
Subject: net: sched: introduce reference counting for tcf_proto

In order to remove dependency on rtnl lock and allow concurrent tcf_proto
modification, extend tcf_proto with reference counter. Implement helper
get/put functions for tcf proto and use them to modify cls API to always
take reference to tcf_proto while using it. Only release reference to
parent chain after releasing last reference to tp.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/cls_api.c       | 53 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 44 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 85993d7efee6..4372c08fc4d9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -322,6 +322,7 @@ struct tcf_proto {
 	void			*data;
 	const struct tcf_proto_ops	*ops;
 	struct tcf_chain	*chain;
+	refcount_t		refcnt;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3fce30ae9a9b..37c05b96898f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -180,6 +180,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	tp->protocol = protocol;
 	tp->prio = prio;
 	tp->chain = chain;
+	refcount_set(&tp->refcnt, 1);
 
 	err = tp->ops->init(tp);
 	if (err) {
@@ -193,14 +194,29 @@ errout:
 	return ERR_PTR(err);
 }
 
+static void tcf_proto_get(struct tcf_proto *tp)
+{
+	refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
 static void tcf_proto_destroy(struct tcf_proto *tp,
 			      struct netlink_ext_ack *extack)
 {
 	tp->ops->destroy(tp, extack);
+	tcf_chain_put(tp->chain);
 	module_put(tp->ops->owner);
 	kfree_rcu(tp, rcu);
 }
 
+static void tcf_proto_put(struct tcf_proto *tp,
+			  struct netlink_ext_ack *extack)
+{
+	if (refcount_dec_and_test(&tp->refcnt))
+		tcf_proto_destroy(tp, extack);
+}
+
 #define ASSERT_BLOCK_LOCKED(block)					\
 	lockdep_assert_held(&(block)->lock)
 
@@ -445,18 +461,18 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 
 static void tcf_chain_flush(struct tcf_chain *chain)
 {
-	struct tcf_proto *tp;
+	struct tcf_proto *tp, *tp_next;
 
 	mutex_lock(&chain->filter_chain_lock);
 	tp = tcf_chain_dereference(chain->filter_chain, chain);
+	RCU_INIT_POINTER(chain->filter_chain, NULL);
 	tcf_chain0_head_change(chain, NULL);
 	mutex_unlock(&chain->filter_chain_lock);
 
 	while (tp) {
-		RCU_INIT_POINTER(chain->filter_chain, tp->next);
-		tcf_proto_destroy(tp, NULL);
-		tp = rtnl_dereference(chain->filter_chain);
-		tcf_chain_put(chain);
+		tp_next = rcu_dereference_protected(tp->next, 1);
+		tcf_proto_put(tp, NULL);
+		tp = tp_next;
 	}
 }
 
@@ -1500,9 +1516,9 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain,
 {
 	if (*chain_info->pprev == chain->filter_chain)
 		tcf_chain0_head_change(chain, tp);
+	tcf_proto_get(tp);
 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
-	tcf_chain_hold(chain);
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
@@ -1514,7 +1530,6 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 	if (tp == chain->filter_chain)
 		tcf_chain0_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
-	tcf_chain_put(chain);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1541,7 +1556,12 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 		}
 	}
 	chain_info->pprev = pprev;
-	chain_info->next = tp ? tp->next : NULL;
+	if (tp) {
+		chain_info->next = tp->next;
+		tcf_proto_get(tp);
+	} else {
+		chain_info->next = NULL;
+	}
 	return tp;
 }
 
@@ -1699,6 +1719,7 @@ replay:
 	prio = TC_H_MAJ(t->tcm_info);
 	prio_allocate = false;
 	parent = t->tcm_parent;
+	tp = NULL;
 	cl = 0;
 
 	if (prio == 0) {
@@ -1816,6 +1837,12 @@ replay:
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, NULL);
+		if (!tp_created)
+			tcf_chain_put(chain);
+	}
 	tcf_block_release(q, block);
 	if (err == -EAGAIN)
 		/* Replay the request. */
@@ -1946,8 +1973,11 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	}
 
 errout:
-	if (chain)
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, NULL);
 		tcf_chain_put(chain);
+	}
 	tcf_block_release(q, block);
 	return err;
 
@@ -2038,8 +2068,11 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	}
 
 errout:
-	if (chain)
+	if (chain) {
+		if (tp && !IS_ERR(tp))
+			tcf_proto_put(tp, NULL);
 		tcf_chain_put(chain);
+	}
 	tcf_block_release(q, block);
 	return err;
 }
-- 
cgit v1.2.3


From fe2923afc12490e92237d23fc0b29f31da72a4f9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:40 +0200
Subject: net: sched: traverse classifiers in chain with tcf_get_next_proto()

All users of chain->filters_chain rely on rtnl lock and assume that no new
classifier instances are added when traversing the list. Use
tcf_get_next_proto() to traverse filters list without relying on rtnl
mutex. This function iterates over classifiers by taking reference to
current iterator classifier only and doesn't assume external
synchronization of filters list.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  2 ++
 net/sched/cls_api.c   | 70 +++++++++++++++++++++++++++++++++++++++++++--------
 net/sched/sch_api.c   |  4 +--
 3 files changed, 64 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 38bee7dd21d1..e5dafa5ee1b2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -46,6 +46,8 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block,
 void tcf_chain_put_by_act(struct tcf_chain *chain);
 struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
 				     struct tcf_chain *chain);
+struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain,
+				     struct tcf_proto *tp);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 37c05b96898f..dca8a3bee9c2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -980,6 +980,45 @@ tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
 }
 EXPORT_SYMBOL(tcf_get_next_chain);
 
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+	ASSERT_RTNL();
+	mutex_lock(&chain->filter_chain_lock);
+
+	if (!tp)
+		tp = tcf_chain_dereference(chain->filter_chain, chain);
+	else
+		tp = tcf_chain_dereference(tp->next, chain);
+
+	if (tp)
+		tcf_proto_get(tp);
+
+	mutex_unlock(&chain->filter_chain_lock);
+
+	return tp;
+}
+
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+	if (tp)
+		tcf_proto_put(tp, NULL);
+
+	return tp_next;
+}
+EXPORT_SYMBOL(tcf_get_next_proto);
+
 static void tcf_block_flush_all_chains(struct tcf_block *block)
 {
 	struct tcf_chain *chain;
@@ -1352,7 +1391,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 			    struct netlink_ext_ack *extack)
 {
 	struct tcf_chain *chain, *chain_prev;
-	struct tcf_proto *tp;
+	struct tcf_proto *tp, *tp_prev;
 	int err;
 
 	for (chain = __tcf_get_next_chain(block, NULL);
@@ -1360,8 +1399,10 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 	     chain_prev = chain,
 		     chain = __tcf_get_next_chain(block, chain),
 		     tcf_chain_put(chain_prev)) {
-		for (tp = rtnl_dereference(chain->filter_chain); tp;
-		     tp = rtnl_dereference(tp->next)) {
+		for (tp = __tcf_get_next_proto(chain, NULL); tp;
+		     tp_prev = tp,
+			     tp = __tcf_get_next_proto(chain, tp),
+			     tcf_proto_put(tp_prev, NULL)) {
 			if (tp->ops->reoffload) {
 				err = tp->ops->reoffload(tp, add, cb, cb_priv,
 							 extack);
@@ -1378,6 +1419,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 	return 0;
 
 err_playback_remove:
+	tcf_proto_put(tp, NULL);
 	tcf_chain_put(chain);
 	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
 				    extack);
@@ -1677,8 +1719,8 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 {
 	struct tcf_proto *tp;
 
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next))
+	for (tp = tcf_get_next_proto(chain, NULL);
+	     tp; tp = tcf_get_next_proto(chain, tp))
 		tfilter_notify(net, oskb, n, tp, block,
 			       q, parent, NULL, event, false);
 }
@@ -2104,11 +2146,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 	struct net *net = sock_net(skb->sk);
 	struct tcf_block *block = chain->block;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	struct tcf_proto *tp, *tp_prev;
 	struct tcf_dump_args arg;
-	struct tcf_proto *tp;
 
-	for (tp = rtnl_dereference(chain->filter_chain);
-	     tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+	for (tp = __tcf_get_next_proto(chain, NULL);
+	     tp;
+	     tp_prev = tp,
+		     tp = __tcf_get_next_proto(chain, tp),
+		     tcf_proto_put(tp_prev, NULL),
+		     (*p_index)++) {
 		if (*p_index < index_start)
 			continue;
 		if (TC_H_MAJ(tcm->tcm_info) &&
@@ -2125,7 +2171,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
 					  RTM_NEWTFILTER) <= 0)
-				return false;
+				goto errout;
 
 			cb->args[1] = 1;
 		}
@@ -2145,9 +2191,13 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
-			return false;
+			goto errout;
 	}
 	return true;
+
+errout:
+	tcf_proto_put(tp, NULL);
+	return false;
 }
 
 /* called with RTNL */
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 80058abc729f..9a530cad2759 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1914,8 +1914,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
 	     chain = tcf_get_next_chain(block, chain)) {
 		struct tcf_proto *tp;
 
-		for (tp = rtnl_dereference(chain->filter_chain);
-		     tp; tp = rtnl_dereference(tp->next)) {
+		for (tp = tcf_get_next_proto(chain, NULL);
+		     tp; tp = tcf_get_next_proto(chain, tp)) {
 			struct tcf_bind_args arg = {};
 
 			arg.w.fn = tcf_node_bind;
-- 
cgit v1.2.3


From 8b64678e0af8f4d62a40149baedebe78503a5255 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:41 +0200
Subject: net: sched: refactor tp insert/delete for concurrent execution

Implement unique insertion function to atomically attach tcf_proto to chain
after verifying that no other tcf proto with specified priority exists.
Implement delete function that verifies that tp is actually empty before
deleting it. Use these functions to refactor cls API to account for
concurrent tp and rule update instead of relying on rtnl lock. Add new
'deleting' flag to tcf proto. Use it to restart search when iterating over
tp's on chain to prevent accessing potentially inval tp->next pointer.

Extend tcf proto with spinlock that is intended to be used to protect its
data from concurrent modification instead of relying on rtnl mutex. Use it
to protect 'deleting' flag. Add lockdep macros to validate that lock is
held when accessing protected fields.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  18 +++++
 net/sched/cls_api.c       | 177 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 170 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 4372c08fc4d9..083e566fc380 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -322,6 +322,11 @@ struct tcf_proto {
 	void			*data;
 	const struct tcf_proto_ops	*ops;
 	struct tcf_chain	*chain;
+	/* Lock protects tcf_proto shared state and can be used by unlocked
+	 * classifiers to protect their private data.
+	 */
+	spinlock_t		lock;
+	bool			deleting;
 	refcount_t		refcnt;
 	struct rcu_head		rcu;
 };
@@ -382,16 +387,29 @@ static inline bool lockdep_tcf_chain_is_locked(struct tcf_chain *chain)
 {
 	return lockdep_is_held(&chain->filter_chain_lock);
 }
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+	return lockdep_is_held(&tp->lock);
+}
 #else
 static inline bool lockdep_tcf_chain_is_locked(struct tcf_block *chain)
 {
 	return true;
 }
+
+static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
+{
+	return true;
+}
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
 #define tcf_chain_dereference(p, chain)					\
 	rcu_dereference_protected(p, lockdep_tcf_chain_is_locked(chain))
 
+#define tcf_proto_dereference(p, tp)					\
+	rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
+
 static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 {
 	if (*flags & TCA_CLS_FLAGS_IN_HW)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dca8a3bee9c2..c6452e3bfc6a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -180,6 +180,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	tp->protocol = protocol;
 	tp->prio = prio;
 	tp->chain = chain;
+	spin_lock_init(&tp->lock);
 	refcount_set(&tp->refcnt, 1);
 
 	err = tp->ops->init(tp);
@@ -217,6 +218,49 @@ static void tcf_proto_put(struct tcf_proto *tp,
 		tcf_proto_destroy(tp, extack);
 }
 
+static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+{
+	return -1;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp)
+{
+	struct tcf_walker walker = { .fn = walker_noop, };
+
+	if (tp->ops->walk) {
+		tp->ops->walk(tp, &walker);
+		return !walker.stop;
+	}
+	return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
+{
+	spin_lock(&tp->lock);
+	if (tcf_proto_is_empty(tp))
+		tp->deleting = true;
+	spin_unlock(&tp->lock);
+	return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+	spin_lock(&tp->lock);
+	tp->deleting = true;
+	spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+	bool deleting;
+
+	spin_lock(&tp->lock);
+	deleting = tp->deleting;
+	spin_unlock(&tp->lock);
+
+	return deleting;
+}
+
 #define ASSERT_BLOCK_LOCKED(block)					\
 	lockdep_assert_held(&(block)->lock)
 
@@ -983,13 +1027,27 @@ EXPORT_SYMBOL(tcf_get_next_chain);
 static struct tcf_proto *
 __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
 {
+	u32 prio = 0;
+
 	ASSERT_RTNL();
 	mutex_lock(&chain->filter_chain_lock);
 
-	if (!tp)
+	if (!tp) {
 		tp = tcf_chain_dereference(chain->filter_chain, chain);
-	else
+	} else if (tcf_proto_is_deleting(tp)) {
+		/* 'deleting' flag is set and chain->filter_chain_lock was
+		 * unlocked, which means next pointer could be invalid. Restart
+		 * search.
+		 */
+		prio = tp->prio + 1;
+		tp = tcf_chain_dereference(chain->filter_chain, chain);
+
+		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+			if (!tp->deleting && tp->prio >= prio)
+				break;
+	} else {
 		tp = tcf_chain_dereference(tp->next, chain);
+	}
 
 	if (tp)
 		tcf_proto_get(tp);
@@ -1569,11 +1627,85 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain,
 {
 	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
 
+	tcf_proto_mark_delete(tp);
 	if (tp == chain->filter_chain)
 		tcf_chain0_head_change(chain, next);
 	RCU_INIT_POINTER(*chain_info->pprev, next);
 }
 
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+					   struct tcf_chain_info *chain_info,
+					   u32 protocol, u32 prio,
+					   bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+						    struct tcf_proto *tp_new,
+						    u32 protocol, u32 prio)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	tp = tcf_chain_tp_find(chain, &chain_info,
+			       protocol, prio, false);
+	if (!tp)
+		tcf_chain_tp_insert(chain, &chain_info, tp_new);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	if (tp) {
+		tcf_proto_destroy(tp_new, NULL);
+		tp_new = tp;
+	}
+
+	return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+				      struct tcf_proto *tp,
+				      struct netlink_ext_ack *extack)
+{
+	struct tcf_chain_info chain_info;
+	struct tcf_proto *tp_iter;
+	struct tcf_proto **pprev;
+	struct tcf_proto *next;
+
+	mutex_lock(&chain->filter_chain_lock);
+
+	/* Atomically find and remove tp from chain. */
+	for (pprev = &chain->filter_chain;
+	     (tp_iter = tcf_chain_dereference(*pprev, chain));
+	     pprev = &tp_iter->next) {
+		if (tp_iter == tp) {
+			chain_info.pprev = pprev;
+			chain_info.next = tp_iter->next;
+			WARN_ON(tp_iter->deleting);
+			break;
+		}
+	}
+	/* Verify that tp still exists and no new filters were inserted
+	 * concurrently.
+	 * Mark tp for deletion if it is empty.
+	 */
+	if (!tp_iter || !tcf_proto_check_delete(tp)) {
+		mutex_unlock(&chain->filter_chain_lock);
+		return;
+	}
+
+	next = tcf_chain_dereference(chain_info.next, chain);
+	if (tp == chain->filter_chain)
+		tcf_chain0_head_change(chain, next);
+	RCU_INIT_POINTER(*chain_info.pprev, next);
+	mutex_unlock(&chain->filter_chain_lock);
+
+	tcf_proto_put(tp, extack);
+}
+
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 					   struct tcf_chain_info *chain_info,
 					   u32 protocol, u32 prio,
@@ -1809,6 +1941,8 @@ replay:
 	}
 
 	if (tp == NULL) {
+		struct tcf_proto *tp_new = NULL;
+
 		/* Proto-tcf does not exist, create new one */
 
 		if (tca[TCA_KIND] == NULL || !protocol) {
@@ -1828,25 +1962,25 @@ replay:
 							       &chain_info));
 
 		mutex_unlock(&chain->filter_chain_lock);
-		tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
-				      protocol, prio, chain, extack);
-		if (IS_ERR(tp)) {
-			err = PTR_ERR(tp);
+		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+					  protocol, prio, chain, extack);
+		if (IS_ERR(tp_new)) {
+			err = PTR_ERR(tp_new);
 			goto errout;
 		}
 
-		mutex_lock(&chain->filter_chain_lock);
-		tcf_chain_tp_insert(chain, &chain_info, tp);
-		mutex_unlock(&chain->filter_chain_lock);
 		tp_created = 1;
-	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
-		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
-		err = -EINVAL;
-		goto errout_locked;
+		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio);
 	} else {
 		mutex_unlock(&chain->filter_chain_lock);
 	}
 
+	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
+		err = -EINVAL;
+		goto errout;
+	}
+
 	fh = tp->ops->get(tp, t->tcm_handle);
 
 	if (!fh) {
@@ -1873,12 +2007,10 @@ replay:
 	if (err == 0)
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_NEWTFILTER, false);
-	else if (tp_created)
-		tcf_proto_destroy(tp, NULL);
 
 errout:
-	if (chain)
-		tcf_chain_put(chain);
+	if (err && tp_created)
+		tcf_chain_tp_delete_empty(chain, tp, NULL);
 	if (chain) {
 		if (tp && !IS_ERR(tp))
 			tcf_proto_put(tp, NULL);
@@ -1984,9 +2116,9 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		tcf_chain_tp_remove(chain, &chain_info, tp);
 		mutex_unlock(&chain->filter_chain_lock);
 
+		tcf_proto_put(tp, NULL);
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_DELTFILTER, false);
-		tcf_proto_destroy(tp, extack);
 		err = 0;
 		goto errout;
 	}
@@ -2005,13 +2137,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 					 extack);
 		if (err)
 			goto errout;
-		if (last) {
-			mutex_lock(&chain->filter_chain_lock);
-			tcf_chain_tp_remove(chain, &chain_info, tp);
-			mutex_unlock(&chain->filter_chain_lock);
-
-			tcf_proto_destroy(tp, extack);
-		}
+		if (last)
+			tcf_chain_tp_delete_empty(chain, tp, extack);
 	}
 
 errout:
-- 
cgit v1.2.3


From 726d061286ceeeabda54ba6f080d0cb8f187a9d7 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:42 +0200
Subject: net: sched: prevent insertion of new classifiers during chain flush

Extend tcf_chain with 'flushing' flag. Use the flag to prevent insertion of
new classifier instances when chain flushing is in progress in order to
prevent resource leak when tcf_proto is created by unlocked users
concurrently.

Return EAGAIN error from tcf_chain_tp_insert_unique() to restart
tc_new_tfilter() and lookup the chain/proto again.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/cls_api.c       | 35 +++++++++++++++++++++++++++++------
 2 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 083e566fc380..e8cf36ed3e87 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -356,6 +356,7 @@ struct tcf_chain {
 	unsigned int refcnt;
 	unsigned int action_refcnt;
 	bool explicitly_created;
+	bool flushing;
 	const struct tcf_proto_ops *tmplt_ops;
 	void *tmplt_priv;
 };
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c6452e3bfc6a..3038a82f6591 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -477,9 +477,12 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
 	mutex_unlock(&block->lock);
 
 	/* The last dropped non-action reference will trigger notification. */
-	if (is_last && !by_act)
+	if (is_last && !by_act) {
 		tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain_index,
 				       block, NULL, 0, 0, false);
+		/* Last reference to chain, no need to lock. */
+		chain->flushing = false;
+	}
 
 	if (refcnt == 0) {
 		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
@@ -511,6 +514,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 	tp = tcf_chain_dereference(chain->filter_chain, chain);
 	RCU_INIT_POINTER(chain->filter_chain, NULL);
 	tcf_chain0_head_change(chain, NULL);
+	chain->flushing = true;
 	mutex_unlock(&chain->filter_chain_lock);
 
 	while (tp) {
@@ -1610,15 +1614,20 @@ static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
 	return tcf_chain_dereference(*chain_info->pprev, chain);
 }
 
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
-				struct tcf_chain_info *chain_info,
-				struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+			       struct tcf_chain_info *chain_info,
+			       struct tcf_proto *tp)
 {
+	if (chain->flushing)
+		return -EAGAIN;
+
 	if (*chain_info->pprev == chain->filter_chain)
 		tcf_chain0_head_change(chain, tp);
 	tcf_proto_get(tp);
 	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
 	rcu_assign_pointer(*chain_info->pprev, tp);
+
+	return 0;
 }
 
 static void tcf_chain_tp_remove(struct tcf_chain *chain,
@@ -1649,18 +1658,22 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
 {
 	struct tcf_chain_info chain_info;
 	struct tcf_proto *tp;
+	int err = 0;
 
 	mutex_lock(&chain->filter_chain_lock);
 
 	tp = tcf_chain_tp_find(chain, &chain_info,
 			       protocol, prio, false);
 	if (!tp)
-		tcf_chain_tp_insert(chain, &chain_info, tp_new);
+		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
 	mutex_unlock(&chain->filter_chain_lock);
 
 	if (tp) {
 		tcf_proto_destroy(tp_new, NULL);
 		tp_new = tp;
+	} else if (err) {
+		tcf_proto_destroy(tp_new, NULL);
+		tp_new = ERR_PTR(err);
 	}
 
 	return tp_new;
@@ -1943,6 +1956,11 @@ replay:
 	if (tp == NULL) {
 		struct tcf_proto *tp_new = NULL;
 
+		if (chain->flushing) {
+			err = -EAGAIN;
+			goto errout_locked;
+		}
+
 		/* Proto-tcf does not exist, create new one */
 
 		if (tca[TCA_KIND] == NULL || !protocol) {
@@ -1966,11 +1984,15 @@ replay:
 					  protocol, prio, chain, extack);
 		if (IS_ERR(tp_new)) {
 			err = PTR_ERR(tp_new);
-			goto errout;
+			goto errout_tp;
 		}
 
 		tp_created = 1;
 		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio);
+		if (IS_ERR(tp)) {
+			err = PTR_ERR(tp);
+			goto errout_tp;
+		}
 	} else {
 		mutex_unlock(&chain->filter_chain_lock);
 	}
@@ -2011,6 +2033,7 @@ replay:
 errout:
 	if (err && tp_created)
 		tcf_chain_tp_delete_empty(chain, tp, NULL);
+errout_tp:
 	if (chain) {
 		if (tp && !IS_ERR(tp))
 			tcf_proto_put(tp, NULL);
-- 
cgit v1.2.3


From ec6743a10996d38e0438e5f45f2347ff2f42df0a Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:43 +0200
Subject: net: sched: track rtnl lock status when validating extensions

Actions API is already updated to not rely on rtnl lock for
synchronization. However, it need to be provided with rtnl status when
called from classifiers API in order to be able to correctly release the
lock when loading kernel module.

Extend extension validation function with 'rtnl_held' flag which is passed
to actions API. Add new 'rtnl_held' parameter to tcf_exts_validate() in cls
API. No classifier is currently updated to support unlocked execution, so
pass hardcoded 'true' flag parameter value.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h    | 2 +-
 net/sched/cls_api.c      | 9 +++++----
 net/sched/cls_basic.c    | 2 +-
 net/sched/cls_bpf.c      | 3 ++-
 net/sched/cls_cgroup.c   | 2 +-
 net/sched/cls_flow.c     | 2 +-
 net/sched/cls_flower.c   | 3 ++-
 net/sched/cls_fw.c       | 2 +-
 net/sched/cls_matchall.c | 3 ++-
 net/sched/cls_route.c    | 2 +-
 net/sched/cls_rsvp.h     | 3 ++-
 net/sched/cls_tcindex.c  | 2 +-
 net/sched/cls_u32.c      | 2 +-
 13 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e5dafa5ee1b2..0e3b61016931 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -416,7 +416,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
-		      struct tcf_exts *exts, bool ovr,
+		      struct tcf_exts *exts, bool ovr, bool rtnl_held,
 		      struct netlink_ext_ack *extack);
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3038a82f6591..a3e715d34efb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2841,7 +2841,7 @@ EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
-		      struct netlink_ext_ack *extack)
+		      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	{
@@ -2851,7 +2851,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tp, tb[exts->police],
 						rate_tlv, "police", ovr,
-						TCA_ACT_BIND, true, extack);
+						TCA_ACT_BIND, rtnl_held,
+						extack);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
@@ -2863,8 +2864,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 
 			err = tcf_action_init(net, tp, tb[exts->action],
 					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
-					      exts->actions, &attr_size, true,
-					      extack);
+					      exts->actions, &attr_size,
+					      rtnl_held, extack);
 			if (err < 0)
 				return err;
 			exts->nr_actions = err;
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 4a57fec6f306..eaf9c02fe792 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -153,7 +153,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a95cb240a606..656b3423ad35 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+	ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+				extack);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..663ee1c6d606 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 		goto errout;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..39a6407d4832 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -445,7 +445,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 		goto err2;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		goto err2;
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6a341287a527..5e3f74ab68ca 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1272,7 +1272,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+				extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..c8173ebb69f2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -217,7 +217,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 	int err;
 
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
-				extack);
+				true, extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a1b803fd372e..8848a147c4bf 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -145,7 +145,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+				extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..44b26038c4c4 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -393,7 +393,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 	struct route4_bucket *b;
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7daea7d..9dd9530e6a52 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -502,7 +502,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+				extack);
 	if (err < 0)
 		goto errout2;
 
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 9ccc93f257db..b7dc667b6ec0 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -314,7 +314,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 	if (err < 0)
 		return err;
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index dcea21004604..e891f30d42e9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -726,7 +726,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
 {
 	int err;
 
-	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+	err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 7d5509fa0d3ddfe252b4418513e493ac98de3317 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:44 +0200
Subject: net: sched: extend proto ops with 'put' callback

Add optional tp->ops->put() API to be implemented for filter reference
counting. This new function is called by cls API to release filter
reference for filters returned by tp->ops->change() or tp->ops->get()
functions. Implement tfilter_put() helper to call tp->ops->put() only for
classifiers that implement it.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/cls_api.c       | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index e8cf36ed3e87..410dda80ca62 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -277,6 +277,7 @@ struct tcf_proto_ops {
 					   struct netlink_ext_ack *extack);
 
 	void*			(*get)(struct tcf_proto*, u32 handle);
+	void			(*put)(struct tcf_proto *tp, void *f);
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a3e715d34efb..8fe38aa180cf 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1870,6 +1870,12 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 			       q, parent, NULL, event, false);
 }
 
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+	if (tp->ops->put && fh)
+		tp->ops->put(tp, fh);
+}
+
 static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			  struct netlink_ext_ack *extack)
 {
@@ -2012,6 +2018,7 @@ replay:
 			goto errout;
 		}
 	} else if (n->nlmsg_flags & NLM_F_EXCL) {
+		tfilter_put(tp, fh);
 		NL_SET_ERR_MSG(extack, "Filter already exists");
 		err = -EEXIST;
 		goto errout;
@@ -2026,9 +2033,11 @@ replay:
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
 			      extack);
-	if (err == 0)
+	if (err == 0) {
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
 			       RTM_NEWTFILTER, false);
+		tfilter_put(tp, fh);
+	}
 
 errout:
 	if (err && tp_created)
@@ -2259,6 +2268,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
 	}
 
+	tfilter_put(tp, fh);
 errout:
 	if (chain) {
 		if (tp && !IS_ERR(tp))
-- 
cgit v1.2.3


From 12db03b65c2b90752e4c37666977fd4a1b5f5824 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:45 +0200
Subject: net: sched: extend proto ops to support unlocked classifiers

Add 'rtnl_held' flag to tcf proto change, delete, destroy, dump, walk
functions to track rtnl lock status. Extend users of these function in cls
API to propagate rtnl lock status to them. This allows classifiers to
obtain rtnl lock when necessary and to pass rtnl lock status to extensions
and driver offload callbacks.

Add flags field to tcf proto ops. Add flag value to indicate that
classifier doesn't require rtnl lock.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |   2 +-
 include/net/sch_generic.h |  17 +++--
 net/sched/cls_api.c       | 168 +++++++++++++++++++++++++---------------------
 net/sched/cls_basic.c     |  12 ++--
 net/sched/cls_bpf.c       |  12 ++--
 net/sched/cls_cgroup.c    |  11 +--
 net/sched/cls_flow.c      |  13 ++--
 net/sched/cls_flower.c    |  13 ++--
 net/sched/cls_fw.c        |  13 ++--
 net/sched/cls_matchall.c  |  13 ++--
 net/sched/cls_route.c     |  12 ++--
 net/sched/cls_rsvp.h      |  13 ++--
 net/sched/cls_tcindex.c   |  15 +++--
 net/sched/cls_u32.c       |  12 ++--
 net/sched/sch_api.c       |   6 +-
 15 files changed, 191 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 0e3b61016931..6a530bef9253 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -47,7 +47,7 @@ void tcf_chain_put_by_act(struct tcf_chain *chain);
 struct tcf_chain *tcf_get_next_chain(struct tcf_block *block,
 				     struct tcf_chain *chain);
 struct tcf_proto *tcf_get_next_proto(struct tcf_chain *chain,
-				     struct tcf_proto *tp);
+				     struct tcf_proto *tp, bool rtnl_held);
 void tcf_block_netif_keep_dst(struct tcf_block *block);
 int tcf_block_get(struct tcf_block **p_block,
 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 410dda80ca62..365801c2a4f5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -273,7 +273,7 @@ struct tcf_proto_ops {
 					    const struct tcf_proto *,
 					    struct tcf_result *);
 	int			(*init)(struct tcf_proto*);
-	void			(*destroy)(struct tcf_proto *tp,
+	void			(*destroy)(struct tcf_proto *tp, bool rtnl_held,
 					   struct netlink_ext_ack *extack);
 
 	void*			(*get)(struct tcf_proto*, u32 handle);
@@ -281,12 +281,13 @@ struct tcf_proto_ops {
 	int			(*change)(struct net *net, struct sk_buff *,
 					struct tcf_proto*, unsigned long,
 					u32 handle, struct nlattr **,
-					void **, bool,
+					void **, bool, bool,
 					struct netlink_ext_ack *);
 	int			(*delete)(struct tcf_proto *tp, void *arg,
-					  bool *last,
+					  bool *last, bool rtnl_held,
 					  struct netlink_ext_ack *);
-	void			(*walk)(struct tcf_proto*, struct tcf_walker *arg);
+	void			(*walk)(struct tcf_proto *tp,
+					struct tcf_walker *arg, bool rtnl_held);
 	int			(*reoffload)(struct tcf_proto *tp, bool add,
 					     tc_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
@@ -299,12 +300,18 @@ struct tcf_proto_ops {
 
 	/* rtnetlink specific */
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
-					struct sk_buff *skb, struct tcmsg*);
+					struct sk_buff *skb, struct tcmsg*,
+					bool);
 	int			(*tmplt_dump)(struct sk_buff *skb,
 					      struct net *net,
 					      void *tmplt_priv);
 
 	struct module		*owner;
+	int			flags;
+};
+
+enum tcf_proto_ops_flags {
+	TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
 };
 
 struct tcf_proto {
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8fe38aa180cf..e8ed461e94af 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -69,7 +69,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
 }
 
 static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
 	const struct tcf_proto_ops *ops;
 
@@ -77,9 +78,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
 	if (ops)
 		return ops;
 #ifdef CONFIG_MODULES
-	rtnl_unlock();
+	if (rtnl_held)
+		rtnl_unlock();
 	request_module("cls_%s", kind);
-	rtnl_lock();
+	if (rtnl_held)
+		rtnl_lock();
 	ops = __tcf_proto_lookup_ops(kind);
 	/* We dropped the RTNL semaphore in order to perform
 	 * the module load. So, even if we succeeded in loading
@@ -162,6 +165,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
 
 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 					  u32 prio, struct tcf_chain *chain,
+					  bool rtnl_held,
 					  struct netlink_ext_ack *extack)
 {
 	struct tcf_proto *tp;
@@ -171,7 +175,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
 	if (!tp)
 		return ERR_PTR(-ENOBUFS);
 
-	tp->ops = tcf_proto_lookup_ops(kind, extack);
+	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
 	if (IS_ERR(tp->ops)) {
 		err = PTR_ERR(tp->ops);
 		goto errout;
@@ -202,20 +206,20 @@ static void tcf_proto_get(struct tcf_proto *tp)
 
 static void tcf_chain_put(struct tcf_chain *chain);
 
-static void tcf_proto_destroy(struct tcf_proto *tp,
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
 			      struct netlink_ext_ack *extack)
 {
-	tp->ops->destroy(tp, extack);
+	tp->ops->destroy(tp, rtnl_held, extack);
 	tcf_chain_put(tp->chain);
 	module_put(tp->ops->owner);
 	kfree_rcu(tp, rcu);
 }
 
-static void tcf_proto_put(struct tcf_proto *tp,
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
 			  struct netlink_ext_ack *extack)
 {
 	if (refcount_dec_and_test(&tp->refcnt))
-		tcf_proto_destroy(tp, extack);
+		tcf_proto_destroy(tp, rtnl_held, extack);
 }
 
 static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
@@ -223,21 +227,21 @@ static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
 	return -1;
 }
 
-static bool tcf_proto_is_empty(struct tcf_proto *tp)
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
 {
 	struct tcf_walker walker = { .fn = walker_noop, };
 
 	if (tp->ops->walk) {
-		tp->ops->walk(tp, &walker);
+		tp->ops->walk(tp, &walker, rtnl_held);
 		return !walker.stop;
 	}
 	return true;
 }
 
-static bool tcf_proto_check_delete(struct tcf_proto *tp)
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
 {
 	spin_lock(&tp->lock);
-	if (tcf_proto_is_empty(tp))
+	if (tcf_proto_is_empty(tp, rtnl_held))
 		tp->deleting = true;
 	spin_unlock(&tp->lock);
 	return tp->deleting;
@@ -506,7 +510,7 @@ static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 	__tcf_chain_put(chain, false, true);
 }
 
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 {
 	struct tcf_proto *tp, *tp_next;
 
@@ -519,7 +523,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
 
 	while (tp) {
 		tp_next = rcu_dereference_protected(tp->next, 1);
-		tcf_proto_put(tp, NULL);
+		tcf_proto_put(tp, rtnl_held, NULL);
 		tp = tp_next;
 	}
 }
@@ -1070,18 +1074,19 @@ __tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
  */
 
 struct tcf_proto *
-tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+		   bool rtnl_held)
 {
 	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
 
 	if (tp)
-		tcf_proto_put(tp, NULL);
+		tcf_proto_put(tp, rtnl_held, NULL);
 
 	return tp_next;
 }
 EXPORT_SYMBOL(tcf_get_next_proto);
 
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
 {
 	struct tcf_chain *chain;
 
@@ -1092,12 +1097,12 @@ static void tcf_block_flush_all_chains(struct tcf_block *block)
 	     chain;
 	     chain = tcf_get_next_chain(block, chain)) {
 		tcf_chain_put_explicitly_created(chain);
-		tcf_chain_flush(chain);
+		tcf_chain_flush(chain, rtnl_held);
 	}
 }
 
 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
-			    struct tcf_block_ext_info *ei)
+			    struct tcf_block_ext_info *ei, bool rtnl_held)
 {
 	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
 		/* Flushing/putting all chains will cause the block to be
@@ -1118,15 +1123,15 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		if (free_block)
 			tcf_block_destroy(block);
 		else
-			tcf_block_flush_all_chains(block);
+			tcf_block_flush_all_chains(block, rtnl_held);
 	} else if (q) {
 		tcf_block_offload_unbind(block, q, ei);
 	}
 }
 
-static void tcf_block_refcnt_put(struct tcf_block *block)
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
 {
-	__tcf_block_put(block, NULL, NULL);
+	__tcf_block_put(block, NULL, NULL, rtnl_held);
 }
 
 /* Find tcf block.
@@ -1244,10 +1249,11 @@ errout_qdisc:
 	return ERR_PTR(err);
 }
 
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+			      bool rtnl_held)
 {
 	if (!IS_ERR_OR_NULL(block))
-		tcf_block_refcnt_put(block);
+		tcf_block_refcnt_put(block, rtnl_held);
 
 	if (q)
 		qdisc_put(q);
@@ -1358,7 +1364,7 @@ err_chain0_head_change_cb_add:
 	tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
 err_block_insert:
-	tcf_block_refcnt_put(block);
+	tcf_block_refcnt_put(block, true);
 	return err;
 }
 EXPORT_SYMBOL(tcf_block_get_ext);
@@ -1395,7 +1401,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
 	tcf_chain0_head_change_cb_del(block, ei);
 	tcf_block_owner_del(block, q, ei->binder_type);
 
-	__tcf_block_put(block, q, ei);
+	__tcf_block_put(block, q, ei, true);
 }
 EXPORT_SYMBOL(tcf_block_put_ext);
 
@@ -1464,7 +1470,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 		for (tp = __tcf_get_next_proto(chain, NULL); tp;
 		     tp_prev = tp,
 			     tp = __tcf_get_next_proto(chain, tp),
-			     tcf_proto_put(tp_prev, NULL)) {
+			     tcf_proto_put(tp_prev, true, NULL)) {
 			if (tp->ops->reoffload) {
 				err = tp->ops->reoffload(tp, add, cb, cb_priv,
 							 extack);
@@ -1481,7 +1487,7 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
 	return 0;
 
 err_playback_remove:
-	tcf_proto_put(tp, NULL);
+	tcf_proto_put(tp, true, NULL);
 	tcf_chain_put(chain);
 	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
 				    extack);
@@ -1654,7 +1660,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 
 static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
 						    struct tcf_proto *tp_new,
-						    u32 protocol, u32 prio)
+						    u32 protocol, u32 prio,
+						    bool rtnl_held)
 {
 	struct tcf_chain_info chain_info;
 	struct tcf_proto *tp;
@@ -1669,10 +1676,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
 	mutex_unlock(&chain->filter_chain_lock);
 
 	if (tp) {
-		tcf_proto_destroy(tp_new, NULL);
+		tcf_proto_destroy(tp_new, rtnl_held, NULL);
 		tp_new = tp;
 	} else if (err) {
-		tcf_proto_destroy(tp_new, NULL);
+		tcf_proto_destroy(tp_new, rtnl_held, NULL);
 		tp_new = ERR_PTR(err);
 	}
 
@@ -1680,7 +1687,7 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
 }
 
 static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
-				      struct tcf_proto *tp,
+				      struct tcf_proto *tp, bool rtnl_held,
 				      struct netlink_ext_ack *extack)
 {
 	struct tcf_chain_info chain_info;
@@ -1705,7 +1712,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
 	 * concurrently.
 	 * Mark tp for deletion if it is empty.
 	 */
-	if (!tp_iter || !tcf_proto_check_delete(tp)) {
+	if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
 		mutex_unlock(&chain->filter_chain_lock);
 		return;
 	}
@@ -1716,7 +1723,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
 	RCU_INIT_POINTER(*chain_info.pprev, next);
 	mutex_unlock(&chain->filter_chain_lock);
 
-	tcf_proto_put(tp, extack);
+	tcf_proto_put(tp, rtnl_held, extack);
 }
 
 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1755,7 +1762,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 			 struct tcf_proto *tp, struct tcf_block *block,
 			 struct Qdisc *q, u32 parent, void *fh,
-			 u32 portid, u32 seq, u16 flags, int event)
+			 u32 portid, u32 seq, u16 flags, int event,
+			 bool rtnl_held)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -1783,7 +1791,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 	if (!fh) {
 		tcm->tcm_handle = 0;
 	} else {
-		if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+		if (tp->ops->dump &&
+		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
 			goto nla_put_failure;
 	}
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1798,7 +1807,8 @@ nla_put_failure:
 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 			  struct nlmsghdr *n, struct tcf_proto *tp,
 			  struct tcf_block *block, struct Qdisc *q,
-			  u32 parent, void *fh, int event, bool unicast)
+			  u32 parent, void *fh, int event, bool unicast,
+			  bool rtnl_held)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1808,7 +1818,8 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 		return -ENOBUFS;
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-			  n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+			  n->nlmsg_seq, n->nlmsg_flags, event,
+			  rtnl_held) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -1824,7 +1835,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 			      struct nlmsghdr *n, struct tcf_proto *tp,
 			      struct tcf_block *block, struct Qdisc *q,
 			      u32 parent, void *fh, bool unicast, bool *last,
-			      struct netlink_ext_ack *extack)
+			      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct sk_buff *skb;
 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1835,13 +1846,14 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 		return -ENOBUFS;
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
-			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+			  rtnl_held) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
 		kfree_skb(skb);
 		return -EINVAL;
 	}
 
-	err = tp->ops->delete(tp, fh, last, extack);
+	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
 	if (err) {
 		kfree_skb(skb);
 		return err;
@@ -1860,14 +1872,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
 				 struct tcf_block *block, struct Qdisc *q,
 				 u32 parent, struct nlmsghdr *n,
-				 struct tcf_chain *chain, int event)
+				 struct tcf_chain *chain, int event,
+				 bool rtnl_held)
 {
 	struct tcf_proto *tp;
 
-	for (tp = tcf_get_next_proto(chain, NULL);
-	     tp; tp = tcf_get_next_proto(chain, tp))
+	for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+	     tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
 		tfilter_notify(net, oskb, n, tp, block,
-			       q, parent, NULL, event, false);
+			       q, parent, NULL, event, false, rtnl_held);
 }
 
 static void tfilter_put(struct tcf_proto *tp, void *fh)
@@ -1896,6 +1909,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	void *fh;
 	int err;
 	int tp_created;
+	bool rtnl_held = true;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -1987,14 +2001,16 @@ replay:
 
 		mutex_unlock(&chain->filter_chain_lock);
 		tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
-					  protocol, prio, chain, extack);
+					  protocol, prio, chain, rtnl_held,
+					  extack);
 		if (IS_ERR(tp_new)) {
 			err = PTR_ERR(tp_new);
 			goto errout_tp;
 		}
 
 		tp_created = 1;
-		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio);
+		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+						rtnl_held);
 		if (IS_ERR(tp)) {
 			err = PTR_ERR(tp);
 			goto errout_tp;
@@ -2032,24 +2048,24 @@ replay:
 
 	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
 			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
-			      extack);
+			      rtnl_held, extack);
 	if (err == 0) {
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-			       RTM_NEWTFILTER, false);
+			       RTM_NEWTFILTER, false, rtnl_held);
 		tfilter_put(tp, fh);
 	}
 
 errout:
 	if (err && tp_created)
-		tcf_chain_tp_delete_empty(chain, tp, NULL);
+		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
 errout_tp:
 	if (chain) {
 		if (tp && !IS_ERR(tp))
-			tcf_proto_put(tp, NULL);
+			tcf_proto_put(tp, rtnl_held, NULL);
 		if (!tp_created)
 			tcf_chain_put(chain);
 	}
-	tcf_block_release(q, block);
+	tcf_block_release(q, block, rtnl_held);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -2078,6 +2094,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
+	bool rtnl_held = true;
 
 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
@@ -2127,8 +2144,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 	if (prio == 0) {
 		tfilter_notify_chain(net, skb, block, q, parent, n,
-				     chain, RTM_DELTFILTER);
-		tcf_chain_flush(chain);
+				     chain, RTM_DELTFILTER, rtnl_held);
+		tcf_chain_flush(chain, rtnl_held);
 		err = 0;
 		goto errout;
 	}
@@ -2148,9 +2165,9 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		tcf_chain_tp_remove(chain, &chain_info, tp);
 		mutex_unlock(&chain->filter_chain_lock);
 
-		tcf_proto_put(tp, NULL);
+		tcf_proto_put(tp, rtnl_held, NULL);
 		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
-			       RTM_DELTFILTER, false);
+			       RTM_DELTFILTER, false, rtnl_held);
 		err = 0;
 		goto errout;
 	}
@@ -2166,20 +2183,21 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 
 		err = tfilter_del_notify(net, skb, n, tp, block,
 					 q, parent, fh, false, &last,
-					 extack);
+					 rtnl_held, extack);
+
 		if (err)
 			goto errout;
 		if (last)
-			tcf_chain_tp_delete_empty(chain, tp, extack);
+			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
 	}
 
 errout:
 	if (chain) {
 		if (tp && !IS_ERR(tp))
-			tcf_proto_put(tp, NULL);
+			tcf_proto_put(tp, rtnl_held, NULL);
 		tcf_chain_put(chain);
 	}
-	tcf_block_release(q, block);
+	tcf_block_release(q, block, rtnl_held);
 	return err;
 
 errout_locked:
@@ -2205,6 +2223,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	unsigned long cl = 0;
 	void *fh = NULL;
 	int err;
+	bool rtnl_held = true;
 
 	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
 	if (err < 0)
@@ -2263,7 +2282,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 		err = -ENOENT;
 	} else {
 		err = tfilter_notify(net, skb, n, tp, block, q, parent,
-				     fh, RTM_NEWTFILTER, true);
+				     fh, RTM_NEWTFILTER, true, rtnl_held);
 		if (err < 0)
 			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
 	}
@@ -2272,10 +2291,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain) {
 		if (tp && !IS_ERR(tp))
-			tcf_proto_put(tp, NULL);
+			tcf_proto_put(tp, rtnl_held, NULL);
 		tcf_chain_put(chain);
 	}
-	tcf_block_release(q, block);
+	tcf_block_release(q, block, rtnl_held);
 	return err;
 }
 
@@ -2296,7 +2315,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
 	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
 			     n, NETLINK_CB(a->cb->skb).portid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
-			     RTM_NEWTFILTER);
+			     RTM_NEWTFILTER, true);
 }
 
 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -2313,7 +2332,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 	     tp;
 	     tp_prev = tp,
 		     tp = __tcf_get_next_proto(chain, tp),
-		     tcf_proto_put(tp_prev, NULL),
+		     tcf_proto_put(tp_prev, true, NULL),
 		     (*p_index)++) {
 		if (*p_index < index_start)
 			continue;
@@ -2330,9 +2349,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					  RTM_NEWTFILTER) <= 0)
+					  RTM_NEWTFILTER, true) <= 0)
 				goto errout;
-
 			cb->args[1] = 1;
 		}
 		if (!tp->ops->walk)
@@ -2347,7 +2365,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
 		arg.w.cookie = cb->args[2];
-		tp->ops->walk(tp, &arg.w);
+		tp->ops->walk(tp, &arg.w, true);
 		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
 		if (arg.w.stop)
@@ -2356,7 +2374,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 	return true;
 
 errout:
-	tcf_proto_put(tp, NULL);
+	tcf_proto_put(tp, true, NULL);
 	return false;
 }
 
@@ -2448,7 +2466,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
-		tcf_block_refcnt_put(block);
+		tcf_block_refcnt_put(block, true);
 	cb->args[0] = index;
 
 out:
@@ -2569,7 +2587,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
 	if (!tca[TCA_KIND])
 		return 0;
 
-	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+	ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2699,9 +2717,9 @@ replay:
 		break;
 	case RTM_DELCHAIN:
 		tfilter_notify_chain(net, skb, block, q, parent, n,
-				     chain, RTM_DELTFILTER);
+				     chain, RTM_DELTFILTER, true);
 		/* Flush the chain first as the user requested chain removal. */
-		tcf_chain_flush(chain);
+		tcf_chain_flush(chain, true);
 		/* In case the chain was successfully deleted, put a reference
 		 * to the chain previously taken during addition.
 		 */
@@ -2722,7 +2740,7 @@ replay:
 errout:
 	tcf_chain_put(chain);
 errout_block:
-	tcf_block_release(q, block);
+	tcf_block_release(q, block, true);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -2829,7 +2847,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
 	}
 
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
-		tcf_block_refcnt_put(block);
+		tcf_block_refcnt_put(block, true);
 	cb->args[0] = index;
 
 out:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index eaf9c02fe792..2383f449d2bc 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -107,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f, *n;
@@ -126,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f = arg;
@@ -173,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
 static int basic_change(struct net *net, struct sk_buff *in_skb,
 			struct tcf_proto *tp, unsigned long base, u32 handle,
 			struct nlattr **tca, void **arg, bool ovr,
-			struct netlink_ext_ack *extack)
+			bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	int err;
 	struct basic_head *head = rtnl_dereference(tp->root);
@@ -247,7 +248,8 @@ errout:
 	return err;
 }
 
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		       bool rtnl_held)
 {
 	struct basic_head *head = rtnl_dereference(tp->root);
 	struct basic_filter *f;
@@ -274,7 +276,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
 }
 
 static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		      struct sk_buff *skb, struct tcmsg *t)
+		      struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tc_basic_pcnt gpf = {};
 	struct basic_filter *f = fh;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 656b3423ad35..062350c6621c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 }
 
 static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
-			  struct netlink_ext_ack *extack)
+			  bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 
@@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
 	return 0;
 }
 
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
 			    struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -456,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 			  struct tcf_proto *tp, unsigned long base,
 			  u32 handle, struct nlattr **tca,
-			  void **arg, bool ovr, struct netlink_ext_ack *extack)
+			  void **arg, bool ovr, bool rtnl_held,
+			  struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *oldprog = *arg;
@@ -576,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
 }
 
 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			struct sk_buff *skb, struct tcmsg *tm)
+			struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
 {
 	struct cls_bpf_prog *prog = fh;
 	struct nlattr *nest;
@@ -636,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
 		prog->res.class = cl;
 }
 
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			 bool rtnl_held)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
 	struct cls_bpf_prog *prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 663ee1c6d606..1cef3b416094 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
 static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 			     struct tcf_proto *tp, unsigned long base,
 			     u32 handle, struct nlattr **tca,
-			     void **arg, bool ovr,
+			     void **arg, bool ovr, bool rtnl_held,
 			     struct netlink_ext_ack *extack)
 {
 	struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -130,7 +130,7 @@ errout:
 	return err;
 }
 
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
 			       struct netlink_ext_ack *extack)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,12 +145,13 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
 }
 
 static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
-			     struct netlink_ext_ack *extack)
+			     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
 
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			    bool rtnl_held)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 
@@ -166,7 +167,7 @@ skip:
 }
 
 static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			   struct sk_buff *skb, struct tcmsg *t)
+			   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct cls_cgroup_head *head = rtnl_dereference(tp->root);
 	struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 39a6407d4832..204e2edae8d5 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
 static int flow_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *fold, *fnew;
@@ -566,7 +567,7 @@ err1:
 }
 
 static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f = arg;
@@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp)
 	return 0;
 }
 
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f, *next;
@@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle)
 }
 
 static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct flow_filter *f = fh;
 	struct nlattr *nest;
@@ -677,7 +679,8 @@ nla_put_failure:
 	return -1;
 }
 
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct flow_head *head = rtnl_dereference(tp->root);
 	struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5e3f74ab68ca..32fa3e20adc5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -465,7 +465,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
 	module_put(THIS_MODULE);
 }
 
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct fl_flow_mask *mask, *next_mask;
@@ -1300,7 +1301,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
 static int fl_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca,
-		     void **arg, bool ovr, struct netlink_ext_ack *extack)
+		     void **arg, bool ovr, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *fold = *arg;
@@ -1437,7 +1439,7 @@ errout_mask_alloc:
 }
 
 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
-		     struct netlink_ext_ack *extack)
+		     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f = arg;
@@ -1449,7 +1451,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
 	return 0;
 }
 
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		    bool rtnl_held)
 {
 	struct cls_fl_head *head = rtnl_dereference(tp->root);
 	struct cls_fl_filter *f;
@@ -2044,7 +2047,7 @@ nla_put_failure:
 }
 
 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		   struct sk_buff *skb, struct tcmsg *t)
+		   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct cls_fl_filter *f = fh;
 	struct nlattr *nest;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index c8173ebb69f2..317151bae73b 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f;
@@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
-		     struct netlink_ext_ack *extack)
+		     bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = arg;
@@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
 		     u32 handle, struct nlattr **tca, void **arg,
-		     bool ovr, struct netlink_ext_ack *extack)
+		     bool ovr, bool rtnl_held,
+		     struct netlink_ext_ack *extack)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = *arg;
@@ -354,7 +356,8 @@ errout:
 	return err;
 }
 
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		    bool rtnl_held)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	int h;
@@ -384,7 +387,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		   struct sk_buff *skb, struct tcmsg *t)
+		   struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = fh;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 8848a147c4bf..a37137430e61 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -109,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 	return 0;
 }
 
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 
@@ -160,7 +161,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
 static int mall_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle, struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 	struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -233,12 +235,13 @@ err_exts_init:
 }
 
 static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	return -EOPNOTSUPP;
 }
 
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
 
@@ -280,7 +283,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
 }
 
 static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tc_matchall_pcnt gpf = {};
 	struct cls_mall_head *head = fh;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 44b26038c4c4..e590c3a2999d 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f)
 	tcf_queue_work(&f->rwork, route4_delete_filter_work);
 }
 
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+			   struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	int h1, h2;
@@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
-			 struct netlink_ext_ack *extack)
+			 bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter *f = arg;
@@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
 static int route4_change(struct net *net, struct sk_buff *in_skb,
 			 struct tcf_proto *tp, unsigned long base, u32 handle,
 			 struct nlattr **tca, void **arg, bool ovr,
-			 struct netlink_ext_ack *extack)
+			 bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -560,7 +561,8 @@ errout:
 	return err;
 }
 
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+			bool rtnl_held)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	unsigned int h, h1;
@@ -597,7 +599,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		       struct sk_buff *skb, struct tcmsg *t)
+		       struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct route4_filter *f = fh;
 	struct nlattr *nest;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 9dd9530e6a52..4d3836178fa5 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 		__rsvp_delete_filter(f);
 }
 
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+			 struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	int h1, h2;
@@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
-		       struct netlink_ext_ack *extack)
+		       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 		       struct tcf_proto *tp, unsigned long base,
 		       u32 handle,
 		       struct nlattr **tca,
-		       void **arg, bool ovr, struct netlink_ext_ack *extack)
+		       void **arg, bool ovr, bool rtnl_held,
+		       struct netlink_ext_ack *extack)
 {
 	struct rsvp_head *data = rtnl_dereference(tp->root);
 	struct rsvp_filter *f, *nfp;
@@ -655,7 +657,8 @@ errout2:
 	return err;
 }
 
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		      bool rtnl_held)
 {
 	struct rsvp_head *head = rtnl_dereference(tp->root);
 	unsigned int h, h1;
@@ -689,7 +692,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 }
 
 static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		     struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct rsvp_filter *f = fh;
 	struct rsvp_session *s;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index b7dc667b6ec0..14d6b4058045 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work)
 }
 
 static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
-			  struct netlink_ext_ack *extack)
+			  bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = arg;
@@ -226,7 +226,7 @@ static int tcindex_destroy_element(struct tcf_proto *tp,
 {
 	bool last;
 
-	return tcindex_delete(tp, arg, &last, NULL);
+	return tcindex_delete(tp, arg, &last, false, NULL);
 }
 
 static void __tcindex_destroy(struct rcu_head *head)
@@ -499,7 +499,7 @@ static int
 tcindex_change(struct net *net, struct sk_buff *in_skb,
 	       struct tcf_proto *tp, unsigned long base, u32 handle,
 	       struct nlattr **tca, void **arg, bool ovr,
-	       struct netlink_ext_ack *extack)
+	       bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -522,7 +522,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 				 tca[TCA_RATE], ovr, extack);
 }
 
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+			 bool rtnl_held)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter *f, *next;
@@ -558,7 +559,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	}
 }
 
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
 			    struct netlink_ext_ack *extack)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
@@ -568,14 +569,14 @@ static void tcindex_destroy(struct tcf_proto *tp,
 	walker.count = 0;
 	walker.skip = 0;
 	walker.fn = tcindex_destroy_element;
-	tcindex_walk(tp, &walker);
+	tcindex_walk(tp, &walker, true);
 
 	call_rcu(&p->rcu, __tcindex_destroy);
 }
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
-			struct sk_buff *skb, struct tcmsg *t)
+			struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index e891f30d42e9..27d29c04dcc9 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 	return -ENOENT;
 }
 
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
 }
 
 static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
-		      struct netlink_ext_ack *extack)
+		      bool rtnl_held, struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = arg;
 	struct tc_u_common *tp_c = tp->data;
@@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca, void **arg, bool ovr,
+		      struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
 		      struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
@@ -1123,7 +1124,8 @@ erridr:
 	return err;
 }
 
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+		     bool rtnl_held)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
 }
 
 static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
-		    struct sk_buff *skb, struct tcmsg *t)
+		    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
 {
 	struct tc_u_knode *n = fh;
 	struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9a530cad2759..2283924fb56d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1914,14 +1914,14 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
 	     chain = tcf_get_next_chain(block, chain)) {
 		struct tcf_proto *tp;
 
-		for (tp = tcf_get_next_proto(chain, NULL);
-		     tp; tp = tcf_get_next_proto(chain, tp)) {
+		for (tp = tcf_get_next_proto(chain, NULL, true);
+		     tp; tp = tcf_get_next_proto(chain, tp, true)) {
 			struct tcf_bind_args arg = {};
 
 			arg.w.fn = tcf_node_bind;
 			arg.classid = clid;
 			arg.cl = new_cl;
-			tp->ops->walk(tp, &arg.w);
+			tp->ops->walk(tp, &arg.w, true);
 		}
 	}
 }
-- 
cgit v1.2.3


From dfcd2a2b2274ec029699be70ea0fe41a7a03f728 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 11 Feb 2019 10:55:46 +0200
Subject: net: sched: add flags to Qdisc class ops struct

Extend Qdisc_class_ops with flags. Create enum to hold possible class ops
flag values. Add first class ops flags value QDISC_CLASS_OPS_DOIT_UNLOCKED
to indicate that class ops functions can be called without taking rtnl
lock.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 365801c2a4f5..e50b729f8691 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -179,6 +179,7 @@ static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq)
 }
 
 struct Qdisc_class_ops {
+	unsigned int		flags;
 	/* Child qdisc manipulation */
 	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
 	int			(*graft)(struct Qdisc *, unsigned long cl,
@@ -210,6 +211,13 @@ struct Qdisc_class_ops {
 					struct gnet_dump *);
 };
 
+/* Qdisc_class_ops flag values */
+
+/* Implements API that doesn't require rtnl lock */
+enum qdisc_class_ops_flags {
+	QDISC_CLASS_OPS_DOIT_UNLOCKED = 1,
+};
+
 struct Qdisc_ops {
 	struct Qdisc_ops	*next;
 	const struct Qdisc_class_ops	*cl_ops;
-- 
cgit v1.2.3


From 72d1cd033154f50e77cd4feb4e16c227b598632e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Tue, 11 Dec 2018 13:07:45 -0700
Subject: qcom: soc: llcc-slice: Clear the global drv_data pointer on error

Currently the data structure for llc-slice is devm allocated and
stored as a global but never cleared if the probe function fails.
This is a problem because devm managed memory gets freed on probe
failure the API functions could access the pointer after it has been
freed.

Initialize the drv_data pointer to an error and reset it to an error
on probe failure or device destroy and add protection to the API
functions to make sure the memory doesn't get accessed.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/llcc-sdm845.c     |  6 ++++
 drivers/soc/qcom/llcc-slice.c      | 71 +++++++++++++++++++++++++++++---------
 include/linux/soc/qcom/llcc-qcom.h |  6 ++++
 3 files changed, 66 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/qcom/llcc-sdm845.c b/drivers/soc/qcom/llcc-sdm845.c
index 2e1e4f0a5db8..86600d97c36d 100644
--- a/drivers/soc/qcom/llcc-sdm845.c
+++ b/drivers/soc/qcom/llcc-sdm845.c
@@ -71,6 +71,11 @@ static struct llcc_slice_config sdm845_data[] =  {
 	SCT_ENTRY(LLCC_AUDHW,    22, 1024, 1, 1, 0xffc, 0x2,   0, 0, 1, 1, 0),
 };
 
+static int sdm845_qcom_llcc_remove(struct platform_device *pdev)
+{
+	return qcom_llcc_remove(pdev);
+}
+
 static int sdm845_qcom_llcc_probe(struct platform_device *pdev)
 {
 	return qcom_llcc_probe(pdev, sdm845_data, ARRAY_SIZE(sdm845_data));
@@ -87,6 +92,7 @@ static struct platform_driver sdm845_qcom_llcc_driver = {
 		.of_match_table = sdm845_qcom_llcc_of_match,
 	},
 	.probe = sdm845_qcom_llcc_probe,
+	.remove = sdm845_qcom_llcc_remove,
 };
 module_platform_driver(sdm845_qcom_llcc_driver);
 
diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
index 80667f7be52c..8390bc006a31 100644
--- a/drivers/soc/qcom/llcc-slice.c
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -46,7 +46,7 @@
 
 #define BANK_OFFSET_STRIDE	      0x80000
 
-static struct llcc_drv_data *drv_data;
+static struct llcc_drv_data *drv_data = (void *) -EPROBE_DEFER;
 
 static const struct regmap_config llcc_regmap_config = {
 	.reg_bits = 32,
@@ -68,6 +68,9 @@ struct llcc_slice_desc *llcc_slice_getd(u32 uid)
 	struct llcc_slice_desc *desc;
 	u32 sz, count;
 
+	if (IS_ERR(drv_data))
+		return ERR_CAST(drv_data);
+
 	cfg = drv_data->cfg;
 	sz = drv_data->cfg_size;
 
@@ -108,6 +111,9 @@ static int llcc_update_act_ctrl(u32 sid,
 	u32 slice_status;
 	int ret;
 
+	if (IS_ERR(drv_data))
+		return PTR_ERR(drv_data);
+
 	act_ctrl_reg = LLCC_TRP_ACT_CTRLn(sid);
 	status_reg = LLCC_TRP_STATUSn(sid);
 
@@ -143,6 +149,9 @@ int llcc_slice_activate(struct llcc_slice_desc *desc)
 	int ret;
 	u32 act_ctrl_val;
 
+	If (IS_ERR(drv_data))
+		return PTR_ERR(drv_data);
+
 	if (IS_ERR_OR_NULL(desc))
 		return -EINVAL;
 
@@ -180,6 +189,9 @@ int llcc_slice_deactivate(struct llcc_slice_desc *desc)
 	u32 act_ctrl_val;
 	int ret;
 
+	If (IS_ERR(drv_data))
+		return PTR_ERR(drv_data);
+
 	if (IS_ERR_OR_NULL(desc))
 		return -EINVAL;
 
@@ -289,6 +301,14 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	return ret;
 }
 
+int qcom_llcc_remove(struct platform_device *pdev)
+{
+	/* Set the global pointer to a error code to avoid referencing it */
+	drv_data = ERR_PTR(-ENODEV);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(qcom_llcc_remove);
+
 int qcom_llcc_probe(struct platform_device *pdev,
 		      const struct llcc_slice_config *llcc_cfg, u32 sz)
 {
@@ -300,35 +320,45 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	struct platform_device *llcc_edac;
 
 	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
-	if (!drv_data)
-		return -ENOMEM;
+	if (!drv_data) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	llcc_banks_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 							"llcc_base");
 	llcc_banks_base = devm_ioremap_resource(&pdev->dev, llcc_banks_res);
-	if (IS_ERR(llcc_banks_base))
-		return PTR_ERR(llcc_banks_base);
+	if (IS_ERR(llcc_banks_base)) {
+		ret = PTR_ERR(llcc_banks_base);
+		goto err;
+	}
 
 	drv_data->regmap = devm_regmap_init_mmio(dev, llcc_banks_base,
 						&llcc_regmap_config);
-	if (IS_ERR(drv_data->regmap))
-		return PTR_ERR(drv_data->regmap);
+	if (IS_ERR(drv_data->regmap)) {
+		ret = PTR_ERR(drv_data->regmap);
+		goto err;
+	}
 
 	llcc_bcast_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 							"llcc_broadcast_base");
 	llcc_bcast_base = devm_ioremap_resource(&pdev->dev, llcc_bcast_res);
-	if (IS_ERR(llcc_bcast_base))
-		return PTR_ERR(llcc_bcast_base);
+	if (IS_ERR(llcc_bcast_base)) {
+		ret = PTR_ERR(llcc_bcast_base);
+		goto err;
+	}
 
 	drv_data->bcast_regmap = devm_regmap_init_mmio(dev, llcc_bcast_base,
 							&llcc_regmap_config);
-	if (IS_ERR(drv_data->bcast_regmap))
-		return PTR_ERR(drv_data->bcast_regmap);
+	if (IS_ERR(drv_data->bcast_regmap)) {
+		ret = PTR_ERR(drv_data->bcast_regmap);
+		goto err;
+	}
 
 	ret = regmap_read(drv_data->regmap, LLCC_COMMON_STATUS0,
 						&num_banks);
 	if (ret)
-		return ret;
+		goto err;
 
 	num_banks &= LLCC_LB_CNT_MASK;
 	num_banks >>= LLCC_LB_CNT_SHIFT;
@@ -340,8 +370,10 @@ int qcom_llcc_probe(struct platform_device *pdev,
 
 	drv_data->offsets = devm_kcalloc(dev, num_banks, sizeof(u32),
 							GFP_KERNEL);
-	if (!drv_data->offsets)
-		return -ENOMEM;
+	if (!drv_data->offsets) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	for (i = 0; i < num_banks; i++)
 		drv_data->offsets[i] = i * BANK_OFFSET_STRIDE;
@@ -349,8 +381,10 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	drv_data->bitmap = devm_kcalloc(dev,
 	BITS_TO_LONGS(drv_data->max_slices), sizeof(unsigned long),
 						GFP_KERNEL);
-	if (!drv_data->bitmap)
-		return -ENOMEM;
+	if (!drv_data->bitmap) {
+		ret = -ENOMEM;
+		goto err;
+	}
 
 	drv_data->cfg = llcc_cfg;
 	drv_data->cfg_size = sz;
@@ -359,7 +393,7 @@ int qcom_llcc_probe(struct platform_device *pdev,
 
 	ret = qcom_llcc_cfg_program(pdev);
 	if (ret)
-		return ret;
+		goto err;
 
 	drv_data->ecc_irq = platform_get_irq(pdev, 0);
 	if (drv_data->ecc_irq >= 0) {
@@ -370,6 +404,9 @@ int qcom_llcc_probe(struct platform_device *pdev,
 			dev_err(dev, "Failed to register llcc edac driver\n");
 	}
 
+	return 0;
+err:
+	drv_data = ERR_PTR(-ENODEV);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(qcom_llcc_probe);
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 69c285b1c990..eb71a50b8afc 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -162,6 +162,12 @@ int llcc_slice_deactivate(struct llcc_slice_desc *desc);
  */
 int qcom_llcc_probe(struct platform_device *pdev,
 		      const struct llcc_slice_config *table, u32 sz);
+
+/**
+ * qcom_llcc_remove - remove the sct table
+ * @pdev: Platform device pointer
+ */
+int qcom_llcc_remove(struct platform_device *pdev);
 #else
 static inline struct llcc_slice_desc *llcc_slice_getd(u32 uid)
 {
-- 
cgit v1.2.3


From 91a12e91dc39137906d929a4ff6f9c32c59697fa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 12 Feb 2019 16:36:04 +0530
Subject: cpufreq: Allow light-weight tear down and bring up of CPUs

The cpufreq core doesn't remove the cpufreq policy anymore on CPU
offline operation, rather that happens when the CPU device gets
unregistered from the kernel. This allows faster recovery when the CPU
comes back online. This is also very useful during system wide
suspend/resume where we offline all non-boot CPUs during suspend and
then bring them back on resume.

This commit takes the same idea a step ahead to allow drivers to do
light weight tear-down and bring-up during CPU offline and online
operations.

A new set of callbacks is introduced, online/offline(). online() gets
called when the first CPU of an inactive policy is brought up and
offline() gets called when all the CPUs of a policy are offlined.

The existing init/exit() callback get called on policy
creation/destruction. They also get called instead of online/offline()
callbacks if the online/offline() callbacks aren't provided.

This also moves around some code to get executed only for the new-policy
case going forward.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 58 +++++++++++++++++++++++++++++++----------------
 include/linux/cpufreq.h   |  2 ++
 2 files changed, 40 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 96a69c67a545..55e9795801a4 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1201,28 +1201,39 @@ static int cpufreq_online(unsigned int cpu)
 			return -ENOMEM;
 	}
 
-	cpumask_copy(policy->cpus, cpumask_of(cpu));
+	if (!new_policy && cpufreq_driver->online) {
+		ret = cpufreq_driver->online(policy);
+		if (ret) {
+			pr_debug("%s: %d: initialization failed\n", __func__,
+				 __LINE__);
+			goto out_exit_policy;
+		}
 
-	/* call driver. From then on the cpufreq must be able
-	 * to accept all calls to ->verify and ->setpolicy for this CPU
-	 */
-	ret = cpufreq_driver->init(policy);
-	if (ret) {
-		pr_debug("initialization failed\n");
-		goto out_free_policy;
-	}
+		/* Recover policy->cpus using related_cpus */
+		cpumask_copy(policy->cpus, policy->related_cpus);
+	} else {
+		cpumask_copy(policy->cpus, cpumask_of(cpu));
 
-	ret = cpufreq_table_validate_and_sort(policy);
-	if (ret)
-		goto out_exit_policy;
+		/*
+		 * Call driver. From then on the cpufreq must be able
+		 * to accept all calls to ->verify and ->setpolicy for this CPU.
+		 */
+		ret = cpufreq_driver->init(policy);
+		if (ret) {
+			pr_debug("%s: %d: initialization failed\n", __func__,
+				 __LINE__);
+			goto out_free_policy;
+		}
 
-	down_write(&policy->rwsem);
+		ret = cpufreq_table_validate_and_sort(policy);
+		if (ret)
+			goto out_exit_policy;
 
-	if (new_policy) {
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_copy(policy->related_cpus, policy->cpus);
 	}
 
+	down_write(&policy->rwsem);
 	/*
 	 * affected cpus must always be the one, which are online. We aren't
 	 * managing offline cpus here.
@@ -1421,11 +1432,12 @@ static int cpufreq_offline(unsigned int cpu)
 		cpufreq_exit_governor(policy);
 
 	/*
-	 * Perform the ->exit() even during light-weight tear-down,
-	 * since this is a core component, and is essential for the
-	 * subsequent light-weight ->init() to succeed.
+	 * Perform the ->offline() during light-weight tear-down, as
+	 * that allows fast recovery when the CPU comes back.
 	 */
-	if (cpufreq_driver->exit) {
+	if (cpufreq_driver->offline) {
+		cpufreq_driver->offline(policy);
+	} else if (cpufreq_driver->exit) {
 		cpufreq_driver->exit(policy);
 		policy->freq_table = NULL;
 	}
@@ -1454,8 +1466,13 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 	cpumask_clear_cpu(cpu, policy->real_cpus);
 	remove_cpu_dev_symlink(policy, dev);
 
-	if (cpumask_empty(policy->real_cpus))
+	if (cpumask_empty(policy->real_cpus)) {
+		/* We did light-weight exit earlier, do full tear down now */
+		if (cpufreq_driver->offline)
+			cpufreq_driver->exit(policy);
+
 		cpufreq_policy_free(policy);
+	}
 }
 
 /**
@@ -2488,7 +2505,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 		    driver_data->target) ||
 	     (driver_data->setpolicy && (driver_data->target_index ||
 		    driver_data->target)) ||
-	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
+	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate) ||
+	     (!driver_data->online != !driver_data->offline))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9db074ecbbd7..b160e98076e3 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -325,6 +325,8 @@ struct cpufreq_driver {
 	/* optional */
 	int		(*bios_limit)(int cpu, unsigned int *limit);
 
+	int		(*online)(struct cpufreq_policy *policy);
+	int		(*offline)(struct cpufreq_policy *policy);
 	int		(*exit)(struct cpufreq_policy *policy);
 	void		(*stop_cpu)(struct cpufreq_policy *policy);
 	int		(*suspend)(struct cpufreq_policy *policy);
-- 
cgit v1.2.3


From 0cf264b3133dce56a60ca8b4335d1f76fe26870a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 11 Feb 2019 13:20:35 +0000
Subject: locking/atomics: Check atomic headers with sha1sum

We currently check the atomic headers at build-time to ensure they
haven't been modified directly, and these checks require regenerating
the headers in full. As this takes a few seconds, even when
parallelized, this is too slow to run for every kernel build.

Instead, we can generate a hash of each header as we generate them,
which we can cheaply check at build time (~0.16s for all headers).

This patch does so, updating headers with their hashes using the new
gen-atomics.sh script. As some users apparently build the kernel wihout
coreutils, lacking sha1sum, the checks are skipped in this case.
Presumably, most developers have a working coreutils installation.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: anders.roxell@linaro.org
Cc: linux-kernel@vger.kernel.rg
Cc: naresh.kamboju@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic-instrumented.h |  1 +
 include/asm-generic/atomic-long.h         |  1 +
 include/linux/atomic-fallback.h           |  1 +
 scripts/atomic/check-atomics.sh           | 26 ++++++++++++++++++++------
 scripts/atomic/gen-atomics.sh             | 20 ++++++++++++++++++++
 5 files changed, 43 insertions(+), 6 deletions(-)
 create mode 100644 scripts/atomic/gen-atomics.sh

(limited to 'include')

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index b8f5b35216e1..e8730c6b9fe2 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -1785,3 +1785,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 })
 
 #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
+// b29b625d5de9280f680e42c7be859b55b15e5f6a
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index a833d385a70b..881c7e27af28 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -1010,3 +1010,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_GENERIC_ATOMIC_LONG_H */
+// 77558968132ce4f911ad53f6f52ce423006f6268
diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
index 1c02c0112fbb..a7d240e465c0 100644
--- a/include/linux/atomic-fallback.h
+++ b/include/linux/atomic-fallback.h
@@ -2292,3 +2292,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
+// 25de4a2804d70f57e994fe3b419148658bb5378a
diff --git a/scripts/atomic/check-atomics.sh b/scripts/atomic/check-atomics.sh
index c30101cddf2d..cfa0c2f71c84 100755
--- a/scripts/atomic/check-atomics.sh
+++ b/scripts/atomic/check-atomics.sh
@@ -7,13 +7,27 @@ ATOMICDIR=$(dirname $0)
 ATOMICTBL=${ATOMICDIR}/atomics.tbl
 LINUXDIR=${ATOMICDIR}/../..
 
+echo '' | sha1sum - > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+	printf "sha1sum not available, skipping atomic header checks.\n"
+	exit 0
+fi
+
 cat <<EOF |
-gen-atomic-instrumented.sh      asm-generic/atomic-instrumented.h
-gen-atomic-long.sh              asm-generic/atomic-long.h
-gen-atomic-fallback.sh          linux/atomic-fallback.h
+asm-generic/atomic-instrumented.h
+asm-generic/atomic-long.h
+linux/atomic-fallback.h
 EOF
-while read script header; do
-	if ! (${ATOMICDIR}/${script} ${ATOMICTBL} | diff - ${LINUXDIR}/include/${header} > /dev/null); then
-		printf "warning: include/${header} is out-of-date.\n"
+while read header; do
+	OLDSUM="$(tail -n 1 ${LINUXDIR}/include/${header})"
+	OLDSUM="${OLDSUM#// }"
+
+	NEWSUM="$(head -n -1 ${LINUXDIR}/include/${header} | sha1sum)"
+	NEWSUM="${NEWSUM%% *}"
+
+	if [ "${OLDSUM}" != "${NEWSUM}" ]; then
+		printf "warning: generated include/${header} has been modified.\n"
 	fi
 done
+
+exit 0
diff --git a/scripts/atomic/gen-atomics.sh b/scripts/atomic/gen-atomics.sh
new file mode 100644
index 000000000000..27400b0cd732
--- /dev/null
+++ b/scripts/atomic/gen-atomics.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate atomic headers
+
+ATOMICDIR=$(dirname $0)
+ATOMICTBL=${ATOMICDIR}/atomics.tbl
+LINUXDIR=${ATOMICDIR}/../..
+
+cat <<EOF |
+gen-atomic-instrumented.sh      asm-generic/atomic-instrumented.h
+gen-atomic-long.sh              asm-generic/atomic-long.h
+gen-atomic-fallback.sh          linux/atomic-fallback.h
+EOF
+while read script header; do
+	${ATOMICDIR}/${script} ${ATOMICTBL} > ${LINUXDIR}/include/${header}
+	HASH="$(sha1sum ${LINUXDIR}/include/${header})"
+	HASH="${HASH%% *}"
+	printf "// %s\n" "${HASH}" >> ${LINUXDIR}/include/${header}
+done
-- 
cgit v1.2.3


From 030fc443aef663df71cd834331fd8f1ec10c30c0 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Tue, 12 Feb 2019 09:54:13 -0500
Subject: genirq: Add missing documentation for tot_count

Commit:

  1136b0728969 ("genirq: Avoid summation loops for /proc/stat")

adds a new irq_desc::tot_count field, without documenting it.
Add the missing piece of documentation.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daniel Colascione <dancol@google.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1549983253-19107-1-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/irqdesc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 875c41b23f20..1d679feff3f6 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -28,6 +28,7 @@ struct pt_regs;
  * @core_internal_state__do_not_mess_with_it: core internal status information
  * @depth:		disable-depth, for nested irq_disable() calls
  * @wake_depth:		enable depth, for multiple irq_set_irq_wake() callers
+ * @tot_count:		stats field for non-percpu irqs
  * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
-- 
cgit v1.2.3


From c8faabfc6f48009fb0d9ad4203aecfa569e5ff8d Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Thu, 24 Jan 2019 16:49:05 +0100
Subject: tpm: add _head suffix to tcg_efi_specid_event and tcg_pcr_event2

TCG defines two structures, TCG_EfiSpecIDEventStruct and TCG_PCR_EVENT2,
which contain variable-sized arrays in the middle of the definition.

Since these structures are not suitable for type casting, this patch
removes structure members after the variable-sized arrays and adds the
_head suffix to the structure name, to indicate that the renamed structures
do not contain all fields defined by TCG.

Lastly, given that variable-sized arrays are now in the last position, and
given that the size of the arrays cannot be determined in advance, this
patch also sets the size of those arrays to zero and removes the definition
of TPM2_ACTIVE_PCR_BANKS.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/eventlog/tpm2.c | 12 ++++++------
 include/linux/tpm_eventlog.h     | 12 ++++--------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/eventlog/tpm2.c b/drivers/char/tpm/eventlog/tpm2.c
index 1b8fa9de2cac..d8b77133a83a 100644
--- a/drivers/char/tpm/eventlog/tpm2.c
+++ b/drivers/char/tpm/eventlog/tpm2.c
@@ -37,10 +37,10 @@
  *
  * Returns size of the event. If it is an invalid event, returns 0.
  */
-static int calc_tpm2_event_size(struct tcg_pcr_event2 *event,
+static int calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
 				struct tcg_pcr_event *event_header)
 {
-	struct tcg_efi_specid_event *efispecid;
+	struct tcg_efi_specid_event_head *efispecid;
 	struct tcg_event_field *event_field;
 	void *marker;
 	void *marker_start;
@@ -55,7 +55,7 @@ static int calc_tpm2_event_size(struct tcg_pcr_event2 *event,
 	marker = marker + sizeof(event->pcr_idx) + sizeof(event->event_type)
 		+ sizeof(event->count);
 
-	efispecid = (struct tcg_efi_specid_event *)event_header->event;
+	efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
 
 	/* Check if event is malformed. */
 	if (event->count > efispecid->num_algs)
@@ -95,7 +95,7 @@ static void *tpm2_bios_measurements_start(struct seq_file *m, loff_t *pos)
 	void *addr = log->bios_event_log;
 	void *limit = log->bios_event_log_end;
 	struct tcg_pcr_event *event_header;
-	struct tcg_pcr_event2 *event;
+	struct tcg_pcr_event2_head *event;
 	size_t size;
 	int i;
 
@@ -136,7 +136,7 @@ static void *tpm2_bios_measurements_next(struct seq_file *m, void *v,
 					 loff_t *pos)
 {
 	struct tcg_pcr_event *event_header;
-	struct tcg_pcr_event2 *event;
+	struct tcg_pcr_event2_head *event;
 	struct tpm_chip *chip = m->private;
 	struct tpm_bios_log *log = &chip->log;
 	void *limit = log->bios_event_log_end;
@@ -180,7 +180,7 @@ static int tpm2_binary_bios_measurements_show(struct seq_file *m, void *v)
 	struct tpm_chip *chip = m->private;
 	struct tpm_bios_log *log = &chip->log;
 	struct tcg_pcr_event *event_header = log->bios_event_log;
-	struct tcg_pcr_event2 *event = v;
+	struct tcg_pcr_event2_head *event = v;
 	void *temp_ptr;
 	size_t size;
 
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 20d9da77fc11..f47342361e87 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -8,7 +8,6 @@
 #define TCG_EVENT_NAME_LEN_MAX	255
 #define MAX_TEXT_EVENT		1000	/* Max event string length */
 #define ACPI_TCPA_SIG		"TCPA"	/* 0x41504354 /'TCPA' */
-#define TPM2_ACTIVE_PCR_BANKS	3
 
 #define EFI_TCG2_EVENT_LOG_FORMAT_TCG_1_2 0x1
 #define EFI_TCG2_EVENT_LOG_FORMAT_TCG_2   0x2
@@ -82,7 +81,7 @@ struct tcg_efi_specid_event_algs {
 	u16 digest_size;
 } __packed;
 
-struct tcg_efi_specid_event {
+struct tcg_efi_specid_event_head {
 	u8 signature[16];
 	u32 platform_class;
 	u8 spec_version_minor;
@@ -90,9 +89,7 @@ struct tcg_efi_specid_event {
 	u8 spec_errata;
 	u8 uintnsize;
 	u32 num_algs;
-	struct tcg_efi_specid_event_algs digest_sizes[TPM2_ACTIVE_PCR_BANKS];
-	u8 vendor_info_size;
-	u8 vendor_info[0];
+	struct tcg_efi_specid_event_algs digest_sizes[];
 } __packed;
 
 struct tcg_pcr_event {
@@ -113,12 +110,11 @@ struct tpm2_digest {
 	u8 digest[SHA512_DIGEST_SIZE];
 } __packed;
 
-struct tcg_pcr_event2 {
+struct tcg_pcr_event2_head {
 	u32 pcr_idx;
 	u32 event_type;
 	u32 count;
-	struct tpm2_digest digests[TPM2_ACTIVE_PCR_BANKS];
-	struct tcg_event_field event;
+	struct tpm2_digest digests[];
 } __packed;
 
 #endif
-- 
cgit v1.2.3


From 36ce089758b1b55df5854d6b6d74713f609e125d Mon Sep 17 00:00:00 2001
From: Jerry Snitselaar <jsnitsel@redhat.com>
Date: Wed, 30 Jan 2019 15:06:58 -0700
Subject: tpm: don't return bool from update_timeouts

Set tpm_chip->timeouts_adjusted directly in the update_timeouts
code instead of returning bool. In case of tpm read failing
print warning that the read failed and continue on.

Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm1-cmd.c     |  3 +--
 drivers/char/tpm/tpm_tis_core.c | 15 +++++++++------
 include/linux/tpm.h             |  2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index 6f306338953b..bda9a16b44f6 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -380,8 +380,7 @@ int tpm1_get_timeouts(struct tpm_chip *chip)
 	 * of misreporting.
 	 */
 	if (chip->ops->update_timeouts)
-		chip->timeout_adjusted =
-			chip->ops->update_timeouts(chip, timeout_eff);
+		chip->ops->update_timeouts(chip, timeout_eff);
 
 	if (!chip->timeout_adjusted) {
 		/* Restore default if chip reported 0 */
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index bb0c2e160562..c6b0c6d541a5 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -521,35 +521,38 @@ static const struct tis_vendor_timeout_override vendor_timeout_overrides[] = {
 			(TIS_SHORT_TIMEOUT*1000), (TIS_SHORT_TIMEOUT*1000) } },
 };
 
-static bool tpm_tis_update_timeouts(struct tpm_chip *chip,
+static void tpm_tis_update_timeouts(struct tpm_chip *chip,
 				    unsigned long *timeout_cap)
 {
 	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
 	int i, rc;
 	u32 did_vid;
 
+	chip->timeout_adjusted = false;
+
 	if (chip->ops->clk_enable != NULL)
 		chip->ops->clk_enable(chip, true);
 
 	rc = tpm_tis_read32(priv, TPM_DID_VID(0), &did_vid);
-	if (rc < 0)
+	if (rc < 0) {
+		dev_warn(&chip->dev, "%s: failed to read did_vid: %d\n",
+			 __func__, rc);
 		goto out;
+	}
 
 	for (i = 0; i != ARRAY_SIZE(vendor_timeout_overrides); i++) {
 		if (vendor_timeout_overrides[i].did_vid != did_vid)
 			continue;
 		memcpy(timeout_cap, vendor_timeout_overrides[i].timeout_us,
 		       sizeof(vendor_timeout_overrides[i].timeout_us));
-		rc = true;
+		chip->timeout_adjusted = true;
 	}
 
-	rc = false;
-
 out:
 	if (chip->ops->clk_enable != NULL)
 		chip->ops->clk_enable(chip, false);
 
-	return rc;
+	return;
 }
 
 /*
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index b49a55cf775f..13563b8c0c3a 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -41,7 +41,7 @@ struct tpm_class_ops {
 	int (*send) (struct tpm_chip *chip, u8 *buf, size_t len);
 	void (*cancel) (struct tpm_chip *chip);
 	u8 (*status) (struct tpm_chip *chip);
-	bool (*update_timeouts)(struct tpm_chip *chip,
+	void (*update_timeouts)(struct tpm_chip *chip,
 				unsigned long *timeout_cap);
 	int (*go_idle)(struct tpm_chip *chip);
 	int (*cmd_ready)(struct tpm_chip *chip);
-- 
cgit v1.2.3


From aa042475938f5818b0c1b6203061e85ad2535dbc Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 6 Feb 2019 17:24:48 +0100
Subject: tpm: rename and export tpm2_digest and tpm2_algorithms

Rename tpm2_* to tpm_* and move the definitions to include/linux/tpm.h so
that these can be used by other kernel subsystems (e.g. IMA).

Also, set the length of the digest array in tpm_digest to a new constant
named TPM_MAX_DIGEST_SIZE, equal to SHA512_DIGEST_SIZE.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Acked-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-interface.c |  2 +-
 drivers/char/tpm/tpm.h           | 13 +------------
 drivers/char/tpm/tpm1-cmd.c      |  2 +-
 drivers/char/tpm/tpm2-cmd.c      | 18 +++++++++---------
 include/linux/tpm.h              | 19 +++++++++++++++++++
 include/linux/tpm_eventlog.h     |  9 ++-------
 6 files changed, 33 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 2b31eff06b0e..9c6aa77b5dee 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -318,7 +318,7 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
 int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash)
 {
 	int rc;
-	struct tpm2_digest *digest_list;
+	struct tpm_digest *digest_list;
 	int i;
 
 	chip = tpm_find_get_ops(chip);
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index cd330ace6248..0e54061d3fd1 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -122,17 +122,6 @@ enum tpm2_return_codes {
 	TPM2_RC_RETRY		= 0x0922,
 };
 
-enum tpm2_algorithms {
-	TPM2_ALG_ERROR		= 0x0000,
-	TPM2_ALG_SHA1		= 0x0004,
-	TPM2_ALG_KEYEDHASH	= 0x0008,
-	TPM2_ALG_SHA256		= 0x000B,
-	TPM2_ALG_SHA384		= 0x000C,
-	TPM2_ALG_SHA512		= 0x000D,
-	TPM2_ALG_NULL		= 0x0010,
-	TPM2_ALG_SM3_256	= 0x0012,
-};
-
 enum tpm2_command_codes {
 	TPM2_CC_FIRST		        = 0x011F,
 	TPM2_CC_HIERARCHY_CONTROL       = 0x0121,
@@ -545,7 +534,7 @@ static inline u32 tpm2_rc_value(u32 rc)
 int tpm2_get_timeouts(struct tpm_chip *chip);
 int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf);
 int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
-		    struct tpm2_digest *digests);
+		    struct tpm_digest *digests);
 int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
 void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
 int tpm2_seal_trusted(struct tpm_chip *chip,
diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index e7d3228a0f37..3eb7e03889a0 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -703,7 +703,7 @@ int tpm1_auto_startup(struct tpm_chip *chip)
 		goto out;
 	}
 
-	chip->allocated_banks[0] = TPM2_ALG_SHA1;
+	chip->allocated_banks[0] = TPM_ALG_SHA1;
 	chip->nr_allocated_banks = 1;
 
 	return rc;
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index bd20b9a61fc0..440ae6ee29e4 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -33,11 +33,11 @@ struct tpm2_hash {
 };
 
 static struct tpm2_hash tpm2_hash_map[] = {
-	{HASH_ALGO_SHA1, TPM2_ALG_SHA1},
-	{HASH_ALGO_SHA256, TPM2_ALG_SHA256},
-	{HASH_ALGO_SHA384, TPM2_ALG_SHA384},
-	{HASH_ALGO_SHA512, TPM2_ALG_SHA512},
-	{HASH_ALGO_SM3_256, TPM2_ALG_SM3_256},
+	{HASH_ALGO_SHA1, TPM_ALG_SHA1},
+	{HASH_ALGO_SHA256, TPM_ALG_SHA256},
+	{HASH_ALGO_SHA384, TPM_ALG_SHA384},
+	{HASH_ALGO_SHA512, TPM_ALG_SHA512},
+	{HASH_ALGO_SM3_256, TPM_ALG_SM3_256},
 };
 
 int tpm2_get_timeouts(struct tpm_chip *chip)
@@ -192,7 +192,7 @@ int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
 	pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7);
 
 	tpm_buf_append_u32(&buf, 1);
-	tpm_buf_append_u16(&buf, TPM2_ALG_SHA1);
+	tpm_buf_append_u16(&buf, TPM_ALG_SHA1);
 	tpm_buf_append_u8(&buf, TPM2_PCR_SELECT_MIN);
 	tpm_buf_append(&buf, (const unsigned char *)pcr_select,
 		       sizeof(pcr_select));
@@ -226,7 +226,7 @@ struct tpm2_null_auth_area {
  * Return: Same as with tpm_transmit_cmd.
  */
 int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
-		    struct tpm2_digest *digests)
+		    struct tpm_digest *digests)
 {
 	struct tpm_buf buf;
 	struct tpm2_null_auth_area auth_area;
@@ -443,7 +443,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip,
 
 	/* public */
 	tpm_buf_append_u16(&buf, 14 + options->policydigest_len);
-	tpm_buf_append_u16(&buf, TPM2_ALG_KEYEDHASH);
+	tpm_buf_append_u16(&buf, TPM_ALG_KEYEDHASH);
 	tpm_buf_append_u16(&buf, hash);
 
 	/* policy */
@@ -458,7 +458,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip,
 	}
 
 	/* public parameters */
-	tpm_buf_append_u16(&buf, TPM2_ALG_NULL);
+	tpm_buf_append_u16(&buf, TPM_ALG_NULL);
 	tpm_buf_append_u16(&buf, 0);
 
 	/* outside info */
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 13563b8c0c3a..9fe8c9816cf0 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -22,12 +22,31 @@
 #ifndef __LINUX_TPM_H__
 #define __LINUX_TPM_H__
 
+#include <crypto/hash_info.h>
+
 #define TPM_DIGEST_SIZE 20	/* Max TPM v1.2 PCR size */
+#define TPM_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
 
 struct tpm_chip;
 struct trusted_key_payload;
 struct trusted_key_options;
 
+enum tpm_algorithms {
+	TPM_ALG_ERROR		= 0x0000,
+	TPM_ALG_SHA1		= 0x0004,
+	TPM_ALG_KEYEDHASH	= 0x0008,
+	TPM_ALG_SHA256		= 0x000B,
+	TPM_ALG_SHA384		= 0x000C,
+	TPM_ALG_SHA512		= 0x000D,
+	TPM_ALG_NULL		= 0x0010,
+	TPM_ALG_SM3_256		= 0x0012,
+};
+
+struct tpm_digest {
+	u16 alg_id;
+	u8 digest[TPM_MAX_DIGEST_SIZE];
+} __packed;
+
 enum TPM_OPS_FLAGS {
 	TPM_OPS_AUTO_STARTUP = BIT(0),
 };
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index f47342361e87..81519f163211 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -3,7 +3,7 @@
 #ifndef __LINUX_TPM_EVENTLOG_H__
 #define __LINUX_TPM_EVENTLOG_H__
 
-#include <crypto/hash_info.h>
+#include <linux/tpm.h>
 
 #define TCG_EVENT_NAME_LEN_MAX	255
 #define MAX_TEXT_EVENT		1000	/* Max event string length */
@@ -105,16 +105,11 @@ struct tcg_event_field {
 	u8 event[0];
 } __packed;
 
-struct tpm2_digest {
-	u16 alg_id;
-	u8 digest[SHA512_DIGEST_SIZE];
-} __packed;
-
 struct tcg_pcr_event2_head {
 	u32 pcr_idx;
 	u32 event_type;
 	u32 count;
-	struct tpm2_digest digests[];
+	struct tpm_digest digests[];
 } __packed;
 
 #endif
-- 
cgit v1.2.3


From 879b589210a9a0c9f77d301aaf0ddee20f2c5052 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 6 Feb 2019 17:24:49 +0100
Subject: tpm: retrieve digest size of unknown algorithms with PCR read

Currently, the TPM driver retrieves the digest size from a table mapping
TPM algorithms identifiers to identifiers defined by the crypto subsystem.
If the algorithm is not defined by the latter, the digest size can be
retrieved from the output of the PCR read command.

The patch modifies the definition of tpm_pcr_read() and tpm2_pcr_read() to
pass the desired hash algorithm and obtain the digest size at TPM startup.
Algorithms and corresponding digest sizes are stored in the new structure
tpm_bank_info, member of tpm_chip, so that the information can be used by
other kernel subsystems.

tpm_bank_info contains: the TPM algorithm identifier, necessary to generate
the event log as defined by Trusted Computing Group (TCG); the digest size,
to pad/truncate a digest calculated with a different algorithm; the crypto
subsystem identifier, to calculate the digest of event data.

This patch also protects against data corruption that could happen in the
bus, by checking that the digest size returned by the TPM during a PCR read
matches the size of the algorithm passed to tpm2_pcr_read().

For the initial PCR read, when digest sizes are not yet available, this
patch ensures that the amount of data copied from the output returned by
the TPM does not exceed the size of the array data are copied to.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Acked-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-interface.c    | 16 +++----
 drivers/char/tpm/tpm.h              |  5 ++-
 drivers/char/tpm/tpm1-cmd.c         |  4 +-
 drivers/char/tpm/tpm2-cmd.c         | 85 +++++++++++++++++++++++++++++--------
 include/linux/tpm.h                 | 12 +++++-
 security/integrity/ima/ima_crypto.c | 10 ++---
 6 files changed, 96 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 9c6aa77b5dee..1c92dbeef736 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -281,11 +281,12 @@ EXPORT_SYMBOL_GPL(tpm_is_tpm2);
  * tpm_pcr_read - read a PCR value from SHA1 bank
  * @chip:	a &struct tpm_chip instance, %NULL for the default chip
  * @pcr_idx:	the PCR to be retrieved
- * @res_buf:	the value of the PCR
+ * @digest:	the PCR bank and buffer current PCR value is written to
  *
  * Return: same as with tpm_transmit_cmd()
  */
-int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
+int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
+		 struct tpm_digest *digest)
 {
 	int rc;
 
@@ -294,9 +295,9 @@ int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
 		return -ENODEV;
 
 	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		rc = tpm2_pcr_read(chip, pcr_idx, res_buf);
+		rc = tpm2_pcr_read(chip, pcr_idx, digest, NULL);
 	else
-		rc = tpm1_pcr_read(chip, pcr_idx, res_buf);
+		rc = tpm1_pcr_read(chip, pcr_idx, digest->digest);
 
 	tpm_put_ops(chip);
 	return rc;
@@ -309,9 +310,8 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
  * @pcr_idx:	the PCR to be retrieved
  * @hash:	the hash value used to extend the PCR value
  *
- * Note: with TPM 2.0 extends also those banks with a known digest size to the
- * cryto subsystem in order to prevent malicious use of those PCR banks. In the
- * future we should dynamically determine digest sizes.
+ * Note: with TPM 2.0 extends also those banks for which no digest was
+ * specified in order to prevent malicious use of those PCR banks.
  *
  * Return: same as with tpm_transmit_cmd()
  */
@@ -332,7 +332,7 @@ int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash)
 			return -ENOMEM;
 
 		for (i = 0; i < chip->nr_allocated_banks; i++) {
-			digest_list[i].alg_id = chip->allocated_banks[i];
+			digest_list[i].alg_id = chip->allocated_banks[i].alg_id;
 			memcpy(digest_list[i].digest, hash, TPM_DIGEST_SIZE);
 		}
 
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 0e54061d3fd1..4efa304e9ece 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -247,7 +247,7 @@ struct tpm_chip {
 	unsigned int groups_cnt;
 
 	u32 nr_allocated_banks;
-	u16 *allocated_banks;
+	struct tpm_bank_info *allocated_banks;
 #ifdef CONFIG_ACPI
 	acpi_handle acpi_dev_handle;
 	char ppi_version[TPM_PPI_VERSION_LEN + 1];
@@ -532,7 +532,8 @@ static inline u32 tpm2_rc_value(u32 rc)
 }
 
 int tpm2_get_timeouts(struct tpm_chip *chip);
-int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf);
+int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
+		  struct tpm_digest *digest, u16 *digest_size_ptr);
 int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
 		    struct tpm_digest *digests);
 int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index 3eb7e03889a0..85dcf2654d11 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -703,7 +703,9 @@ int tpm1_auto_startup(struct tpm_chip *chip)
 		goto out;
 	}
 
-	chip->allocated_banks[0] = TPM_ALG_SHA1;
+	chip->allocated_banks[0].alg_id = TPM_ALG_SHA1;
+	chip->allocated_banks[0].digest_size = hash_digest_size[HASH_ALGO_SHA1];
+	chip->allocated_banks[0].crypto_id = HASH_ALGO_SHA1;
 	chip->nr_allocated_banks = 1;
 
 	return rc;
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 440ae6ee29e4..6967f15a6585 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -171,20 +171,36 @@ struct tpm2_pcr_read_out {
  * tpm2_pcr_read() - read a PCR value
  * @chip:	TPM chip to use.
  * @pcr_idx:	index of the PCR to read.
- * @res_buf:	buffer to store the resulting hash.
+ * @digest:	PCR bank and buffer current PCR value is written to.
+ * @digest_size_ptr:	pointer to variable that stores the digest size.
  *
  * Return: Same as with tpm_transmit_cmd.
  */
-int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
+int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
+		  struct tpm_digest *digest, u16 *digest_size_ptr)
 {
+	int i;
 	int rc;
 	struct tpm_buf buf;
 	struct tpm2_pcr_read_out *out;
 	u8 pcr_select[TPM2_PCR_SELECT_MIN] = {0};
+	u16 digest_size;
+	u16 expected_digest_size = 0;
 
 	if (pcr_idx >= TPM2_PLATFORM_PCR)
 		return -EINVAL;
 
+	if (!digest_size_ptr) {
+		for (i = 0; i < chip->nr_allocated_banks &&
+		     chip->allocated_banks[i].alg_id != digest->alg_id; i++)
+			;
+
+		if (i == chip->nr_allocated_banks)
+			return -EINVAL;
+
+		expected_digest_size = chip->allocated_banks[i].digest_size;
+	}
+
 	rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_PCR_READ);
 	if (rc)
 		return rc;
@@ -192,18 +208,28 @@ int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
 	pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7);
 
 	tpm_buf_append_u32(&buf, 1);
-	tpm_buf_append_u16(&buf, TPM_ALG_SHA1);
+	tpm_buf_append_u16(&buf, digest->alg_id);
 	tpm_buf_append_u8(&buf, TPM2_PCR_SELECT_MIN);
 	tpm_buf_append(&buf, (const unsigned char *)pcr_select,
 		       sizeof(pcr_select));
 
-	rc = tpm_transmit_cmd(chip, &buf, 0, res_buf ?
-			      "attempting to read a pcr value" : NULL);
-	if (rc == 0 && res_buf) {
-		out = (struct tpm2_pcr_read_out *)&buf.data[TPM_HEADER_SIZE];
-		memcpy(res_buf, out->digest, SHA1_DIGEST_SIZE);
+	rc = tpm_transmit_cmd(chip, &buf, 0, "attempting to read a pcr value");
+	if (rc)
+		goto out;
+
+	out = (struct tpm2_pcr_read_out *)&buf.data[TPM_HEADER_SIZE];
+	digest_size = be16_to_cpu(out->digest_size);
+	if (digest_size > sizeof(digest->digest) ||
+	    (!digest_size_ptr && digest_size != expected_digest_size)) {
+		rc = -EINVAL;
+		goto out;
 	}
 
+	if (digest_size_ptr)
+		*digest_size_ptr = digest_size;
+
+	memcpy(digest->digest, out->digest, digest_size);
+out:
 	tpm_buf_destroy(&buf);
 	return rc;
 }
@@ -232,7 +258,6 @@ int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
 	struct tpm2_null_auth_area auth_area;
 	int rc;
 	int i;
-	int j;
 
 	if (count > chip->nr_allocated_banks)
 		return -EINVAL;
@@ -254,14 +279,9 @@ int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
 	tpm_buf_append_u32(&buf, count);
 
 	for (i = 0; i < count; i++) {
-		for (j = 0; j < ARRAY_SIZE(tpm2_hash_map); j++) {
-			if (digests[i].alg_id != tpm2_hash_map[j].tpm_id)
-				continue;
-			tpm_buf_append_u16(&buf, digests[i].alg_id);
-			tpm_buf_append(&buf, (const unsigned char
-					      *)&digests[i].digest,
-			       hash_digest_size[tpm2_hash_map[j].crypto_id]);
-		}
+		tpm_buf_append_u16(&buf, digests[i].alg_id);
+		tpm_buf_append(&buf, (const unsigned char *)&digests[i].digest,
+			       chip->allocated_banks[i].digest_size);
 	}
 
 	rc = tpm_transmit_cmd(chip, &buf, 0, "attempting extend a PCR value");
@@ -795,6 +815,30 @@ int tpm2_probe(struct tpm_chip *chip)
 }
 EXPORT_SYMBOL_GPL(tpm2_probe);
 
+static int tpm2_init_bank_info(struct tpm_chip *chip, u32 bank_index)
+{
+	struct tpm_bank_info *bank = chip->allocated_banks + bank_index;
+	struct tpm_digest digest = { .alg_id = bank->alg_id };
+	int i;
+
+	/*
+	 * Avoid unnecessary PCR read operations to reduce overhead
+	 * and obtain identifiers of the crypto subsystem.
+	 */
+	for (i = 0; i < ARRAY_SIZE(tpm2_hash_map); i++) {
+		enum hash_algo crypto_algo = tpm2_hash_map[i].crypto_id;
+
+		if (bank->alg_id != tpm2_hash_map[i].tpm_id)
+			continue;
+
+		bank->digest_size = hash_digest_size[crypto_algo];
+		bank->crypto_id = crypto_algo;
+		return 0;
+	}
+
+	return tpm2_pcr_read(chip, 0, &digest, &bank->digest_size);
+}
+
 struct tpm2_pcr_selection {
 	__be16  hash_alg;
 	u8  size_of_select;
@@ -858,7 +902,12 @@ static ssize_t tpm2_get_pcr_allocation(struct tpm_chip *chip)
 		pcr_select_offset = memchr_inv(pcr_selection.pcr_select, 0,
 					       pcr_selection.size_of_select);
 		if (pcr_select_offset) {
-			chip->allocated_banks[nr_alloc_banks] = hash_alg;
+			chip->allocated_banks[nr_alloc_banks].alg_id = hash_alg;
+
+			rc = tpm2_init_bank_info(chip, nr_alloc_banks);
+			if (rc < 0)
+				break;
+
 			nr_alloc_banks++;
 		}
 
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 9fe8c9816cf0..afd022fc9d3d 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -47,6 +47,12 @@ struct tpm_digest {
 	u8 digest[TPM_MAX_DIGEST_SIZE];
 } __packed;
 
+struct tpm_bank_info {
+	u16 alg_id;
+	u16 digest_size;
+	u16 crypto_id;
+};
+
 enum TPM_OPS_FLAGS {
 	TPM_OPS_AUTO_STARTUP = BIT(0),
 };
@@ -72,7 +78,8 @@ struct tpm_class_ops {
 #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
 extern int tpm_is_tpm2(struct tpm_chip *chip);
-extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf);
+extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
+			struct tpm_digest *digest);
 extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash);
 extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen);
 extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max);
@@ -89,7 +96,8 @@ static inline int tpm_is_tpm2(struct tpm_chip *chip)
 	return -ENODEV;
 }
 
-static inline int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, u8 *res_buf)
+static inline int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx,
+			       struct tpm_digest *digest)
 {
 	return -ENODEV;
 }
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index acf2c7df7145..16a4f45863b1 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -643,12 +643,12 @@ int ima_calc_buffer_hash(const void *buf, loff_t len,
 	return calc_buffer_shash(buf, len, hash);
 }
 
-static void __init ima_pcrread(u32 idx, u8 *pcr)
+static void __init ima_pcrread(u32 idx, struct tpm_digest *d)
 {
 	if (!ima_tpm_chip)
 		return;
 
-	if (tpm_pcr_read(ima_tpm_chip, idx, pcr) != 0)
+	if (tpm_pcr_read(ima_tpm_chip, idx, d) != 0)
 		pr_err("Error Communicating to TPM chip\n");
 }
 
@@ -658,7 +658,7 @@ static void __init ima_pcrread(u32 idx, u8 *pcr)
 static int __init ima_calc_boot_aggregate_tfm(char *digest,
 					      struct crypto_shash *tfm)
 {
-	u8 pcr_i[TPM_DIGEST_SIZE];
+	struct tpm_digest d = { .alg_id = TPM_ALG_SHA1, .digest = {0} };
 	int rc;
 	u32 i;
 	SHASH_DESC_ON_STACK(shash, tfm);
@@ -672,9 +672,9 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest,
 
 	/* cumulative sha1 over tpm registers 0-7 */
 	for (i = TPM_PCR0; i < TPM_PCR8; i++) {
-		ima_pcrread(i, pcr_i);
+		ima_pcrread(i, &d);
 		/* now accumulate with current aggregate */
-		rc = crypto_shash_update(shash, pcr_i, TPM_DIGEST_SIZE);
+		rc = crypto_shash_update(shash, d.digest, TPM_DIGEST_SIZE);
 	}
 	if (!rc)
 		crypto_shash_final(shash, digest);
-- 
cgit v1.2.3


From 901615cb916dc955fb7bda4e34402bf263532e4a Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 6 Feb 2019 17:24:50 +0100
Subject: tpm: move tpm_chip definition to include/linux/tpm.h

The tpm_chip structure contains the list of PCR banks currently allocated
in the TPM. When support for crypto agility will be added to the TPM
driver, users of the driver have to provide a digest for each allocated
bank to tpm_pcr_extend(). With this patch, they can obtain the PCR bank
algorithms directly from chip->allocated_banks.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm.h | 101 ++-----------------------------------------------
 include/linux/tpm.h    |  91 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 4efa304e9ece..4f85ce909122 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -25,30 +25,22 @@
 
 #include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/fs.h>
-#include <linux/hw_random.h>
 #include <linux/mutex.h>
 #include <linux/sched.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/tpm.h>
-#include <linux/acpi.h>
-#include <linux/cdev.h>
 #include <linux/highmem.h>
 #include <linux/tpm_eventlog.h>
-#include <crypto/hash_info.h>
 
 #ifdef CONFIG_X86
 #include <asm/intel-family.h>
 #endif
 
-enum tpm_const {
-	TPM_MINOR = 224,	/* officially assigned */
-	TPM_BUFSIZE = 4096,
-	TPM_NUM_DEVICES = 65536,
-	TPM_RETRY = 50,		/* 5 seconds */
-	TPM_NUM_EVENT_LOG_FILES = 3,
-};
+#define TPM_MINOR		224	/* officially assigned */
+#define TPM_BUFSIZE		4096
+#define TPM_NUM_DEVICES		65536
+#define TPM_RETRY		50
 
 enum tpm_timeout {
 	TPM_TIMEOUT = 5,	/* msecs */
@@ -65,16 +57,6 @@ enum tpm_addr {
 	TPM_ADDR = 0x4E,
 };
 
-/* Indexes the duration array */
-enum tpm_duration {
-	TPM_SHORT = 0,
-	TPM_MEDIUM = 1,
-	TPM_LONG = 2,
-	TPM_LONG_LONG = 3,
-	TPM_UNDEFINED,
-	TPM_NUM_DURATIONS = TPM_UNDEFINED,
-};
-
 #define TPM_WARN_RETRY          0x800
 #define TPM_WARN_DOING_SELFTEST 0x802
 #define TPM_ERR_DEACTIVATED     0x6
@@ -179,15 +161,6 @@ enum tpm2_cc_attrs {
 #define TPM_VID_WINBOND  0x1050
 #define TPM_VID_STM      0x104A
 
-#define TPM_PPI_VERSION_LEN		3
-
-struct tpm_space {
-	u32 context_tbl[3];
-	u8 *context_buf;
-	u32 session_tbl[3];
-	u8 *session_buf;
-};
-
 enum tpm_chip_flags {
 	TPM_CHIP_FLAG_TPM2		= BIT(1),
 	TPM_CHIP_FLAG_IRQ		= BIT(2),
@@ -196,72 +169,6 @@ enum tpm_chip_flags {
 	TPM_CHIP_FLAG_ALWAYS_POWERED	= BIT(5),
 };
 
-struct tpm_bios_log {
-	void *bios_event_log;
-	void *bios_event_log_end;
-};
-
-struct tpm_chip_seqops {
-	struct tpm_chip *chip;
-	const struct seq_operations *seqops;
-};
-
-struct tpm_chip {
-	struct device dev;
-	struct device devs;
-	struct cdev cdev;
-	struct cdev cdevs;
-
-	/* A driver callback under ops cannot be run unless ops_sem is held
-	 * (sometimes implicitly, eg for the sysfs code). ops becomes null
-	 * when the driver is unregistered, see tpm_try_get_ops.
-	 */
-	struct rw_semaphore ops_sem;
-	const struct tpm_class_ops *ops;
-
-	struct tpm_bios_log log;
-	struct tpm_chip_seqops bin_log_seqops;
-	struct tpm_chip_seqops ascii_log_seqops;
-
-	unsigned int flags;
-
-	int dev_num;		/* /dev/tpm# */
-	unsigned long is_open;	/* only one allowed */
-
-	char hwrng_name[64];
-	struct hwrng hwrng;
-
-	struct mutex tpm_mutex;	/* tpm is processing */
-
-	unsigned long timeout_a; /* jiffies */
-	unsigned long timeout_b; /* jiffies */
-	unsigned long timeout_c; /* jiffies */
-	unsigned long timeout_d; /* jiffies */
-	bool timeout_adjusted;
-	unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */
-	bool duration_adjusted;
-
-	struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES];
-
-	const struct attribute_group *groups[3];
-	unsigned int groups_cnt;
-
-	u32 nr_allocated_banks;
-	struct tpm_bank_info *allocated_banks;
-#ifdef CONFIG_ACPI
-	acpi_handle acpi_dev_handle;
-	char ppi_version[TPM_PPI_VERSION_LEN + 1];
-#endif /* CONFIG_ACPI */
-
-	struct tpm_space work_space;
-	u32 last_cc;
-	u32 nr_commands;
-	u32 *cc_attrs_tbl;
-
-	/* active locality */
-	int locality;
-};
-
 #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
 
 struct tpm_header {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index afd022fc9d3d..816e686a73ac 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -22,6 +22,10 @@
 #ifndef __LINUX_TPM_H__
 #define __LINUX_TPM_H__
 
+#include <linux/hw_random.h>
+#include <linux/acpi.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
 #include <crypto/hash_info.h>
 
 #define TPM_DIGEST_SIZE 20	/* Max TPM v1.2 PCR size */
@@ -75,6 +79,93 @@ struct tpm_class_ops {
 	void (*clk_enable)(struct tpm_chip *chip, bool value);
 };
 
+#define TPM_NUM_EVENT_LOG_FILES		3
+
+/* Indexes the duration array */
+enum tpm_duration {
+	TPM_SHORT = 0,
+	TPM_MEDIUM = 1,
+	TPM_LONG = 2,
+	TPM_LONG_LONG = 3,
+	TPM_UNDEFINED,
+	TPM_NUM_DURATIONS = TPM_UNDEFINED,
+};
+
+#define TPM_PPI_VERSION_LEN		3
+
+struct tpm_space {
+	u32 context_tbl[3];
+	u8 *context_buf;
+	u32 session_tbl[3];
+	u8 *session_buf;
+};
+
+struct tpm_bios_log {
+	void *bios_event_log;
+	void *bios_event_log_end;
+};
+
+struct tpm_chip_seqops {
+	struct tpm_chip *chip;
+	const struct seq_operations *seqops;
+};
+
+struct tpm_chip {
+	struct device dev;
+	struct device devs;
+	struct cdev cdev;
+	struct cdev cdevs;
+
+	/* A driver callback under ops cannot be run unless ops_sem is held
+	 * (sometimes implicitly, eg for the sysfs code). ops becomes null
+	 * when the driver is unregistered, see tpm_try_get_ops.
+	 */
+	struct rw_semaphore ops_sem;
+	const struct tpm_class_ops *ops;
+
+	struct tpm_bios_log log;
+	struct tpm_chip_seqops bin_log_seqops;
+	struct tpm_chip_seqops ascii_log_seqops;
+
+	unsigned int flags;
+
+	int dev_num;		/* /dev/tpm# */
+	unsigned long is_open;	/* only one allowed */
+
+	char hwrng_name[64];
+	struct hwrng hwrng;
+
+	struct mutex tpm_mutex;	/* tpm is processing */
+
+	unsigned long timeout_a; /* jiffies */
+	unsigned long timeout_b; /* jiffies */
+	unsigned long timeout_c; /* jiffies */
+	unsigned long timeout_d; /* jiffies */
+	bool timeout_adjusted;
+	unsigned long duration[TPM_NUM_DURATIONS]; /* jiffies */
+	bool duration_adjusted;
+
+	struct dentry *bios_dir[TPM_NUM_EVENT_LOG_FILES];
+
+	const struct attribute_group *groups[3];
+	unsigned int groups_cnt;
+
+	u32 nr_allocated_banks;
+	struct tpm_bank_info *allocated_banks;
+#ifdef CONFIG_ACPI
+	acpi_handle acpi_dev_handle;
+	char ppi_version[TPM_PPI_VERSION_LEN + 1];
+#endif /* CONFIG_ACPI */
+
+	struct tpm_space work_space;
+	u32 last_cc;
+	u32 nr_commands;
+	u32 *cc_attrs_tbl;
+
+	/* active locality */
+	int locality;
+};
+
 #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
 
 extern int tpm_is_tpm2(struct tpm_chip *chip);
-- 
cgit v1.2.3


From 0b6cf6b97b7ef1fa3c7fefab0cac897a1c4a3400 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Wed, 6 Feb 2019 17:24:52 +0100
Subject: tpm: pass an array of tpm_extend_digest structures to
 tpm_pcr_extend()

Currently, tpm_pcr_extend() accepts as an input only a SHA1 digest.

This patch replaces the hash parameter of tpm_pcr_extend() with an array of
tpm_digest structures, so that the caller can provide a digest for each PCR
bank currently allocated in the TPM.

tpm_pcr_extend() will not extend banks for which no digest was provided,
as it happened before this patch, but instead it requires that callers
provide the full set of digests. Since the number of digests will always be
chip->nr_allocated_banks, the count parameter has been removed.

Due to the API change, ima_pcr_extend() and pcrlock() have been modified.
Since the number of allocated banks is not known in advance, the memory for
the digests must be dynamically allocated. To avoid performance degradation
and to avoid that a PCR extend is not done due to lack of memory, the array
of tpm_digest structures is allocated by the users of the TPM driver at
initialization time.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Tested-by: Mimi Zohar <zohar@linux.ibm.com> (on x86 for TPM 1.2 & PTT TPM 2.0)
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 drivers/char/tpm/tpm-interface.c   | 30 ++++++++++------------------
 drivers/char/tpm/tpm.h             |  2 +-
 drivers/char/tpm/tpm2-cmd.c        | 10 +++-------
 include/linux/tpm.h                |  5 +++--
 security/integrity/ima/ima.h       |  1 +
 security/integrity/ima/ima_init.c  |  4 ++++
 security/integrity/ima/ima_queue.c | 27 ++++++++++++++++++++++++-
 security/keys/trusted.c            | 41 ++++++++++++++++++++++++++++++--------
 8 files changed, 82 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1c92dbeef736..83ece5639f86 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -308,42 +308,34 @@ EXPORT_SYMBOL_GPL(tpm_pcr_read);
  * tpm_pcr_extend - extend a PCR value in SHA1 bank.
  * @chip:	a &struct tpm_chip instance, %NULL for the default chip
  * @pcr_idx:	the PCR to be retrieved
- * @hash:	the hash value used to extend the PCR value
+ * @digests:	array of tpm_digest structures used to extend PCRs
  *
- * Note: with TPM 2.0 extends also those banks for which no digest was
- * specified in order to prevent malicious use of those PCR banks.
+ * Note: callers must pass a digest for every allocated PCR bank, in the same
+ * order of the banks in chip->allocated_banks.
  *
  * Return: same as with tpm_transmit_cmd()
  */
-int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash)
+int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
+		   struct tpm_digest *digests)
 {
 	int rc;
-	struct tpm_digest *digest_list;
 	int i;
 
 	chip = tpm_find_get_ops(chip);
 	if (!chip)
 		return -ENODEV;
 
-	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
-		digest_list = kcalloc(chip->nr_allocated_banks,
-				      sizeof(*digest_list), GFP_KERNEL);
-		if (!digest_list)
-			return -ENOMEM;
-
-		for (i = 0; i < chip->nr_allocated_banks; i++) {
-			digest_list[i].alg_id = chip->allocated_banks[i].alg_id;
-			memcpy(digest_list[i].digest, hash, TPM_DIGEST_SIZE);
-		}
+	for (i = 0; i < chip->nr_allocated_banks; i++)
+		if (digests[i].alg_id != chip->allocated_banks[i].alg_id)
+			return -EINVAL;
 
-		rc = tpm2_pcr_extend(chip, pcr_idx, chip->nr_allocated_banks,
-				     digest_list);
-		kfree(digest_list);
+	if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+		rc = tpm2_pcr_extend(chip, pcr_idx, digests);
 		tpm_put_ops(chip);
 		return rc;
 	}
 
-	rc = tpm1_pcr_extend(chip, pcr_idx, hash,
+	rc = tpm1_pcr_extend(chip, pcr_idx, digests[0].digest,
 			     "attempting extend a PCR value");
 	tpm_put_ops(chip);
 	return rc;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 4f85ce909122..2cce072f25b5 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -441,7 +441,7 @@ static inline u32 tpm2_rc_value(u32 rc)
 int tpm2_get_timeouts(struct tpm_chip *chip);
 int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
 		  struct tpm_digest *digest, u16 *digest_size_ptr);
-int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
+int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
 		    struct tpm_digest *digests);
 int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
 void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 6967f15a6585..e74c5b7b64bf 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -246,12 +246,11 @@ struct tpm2_null_auth_area {
  *
  * @chip:	TPM chip to use.
  * @pcr_idx:	index of the PCR.
- * @count:	number of digests passed.
  * @digests:	list of pcr banks and corresponding digest values to extend.
  *
  * Return: Same as with tpm_transmit_cmd.
  */
-int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
+int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
 		    struct tpm_digest *digests)
 {
 	struct tpm_buf buf;
@@ -259,9 +258,6 @@ int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
 	int rc;
 	int i;
 
-	if (count > chip->nr_allocated_banks)
-		return -EINVAL;
-
 	rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_PCR_EXTEND);
 	if (rc)
 		return rc;
@@ -276,9 +272,9 @@ int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, u32 count,
 	tpm_buf_append_u32(&buf, sizeof(struct tpm2_null_auth_area));
 	tpm_buf_append(&buf, (const unsigned char *)&auth_area,
 		       sizeof(auth_area));
-	tpm_buf_append_u32(&buf, count);
+	tpm_buf_append_u32(&buf, chip->nr_allocated_banks);
 
-	for (i = 0; i < count; i++) {
+	for (i = 0; i < chip->nr_allocated_banks; i++) {
 		tpm_buf_append_u16(&buf, digests[i].alg_id);
 		tpm_buf_append(&buf, (const unsigned char *)&digests[i].digest,
 			       chip->allocated_banks[i].digest_size);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 816e686a73ac..1b5436b213a2 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -171,7 +171,8 @@ struct tpm_chip {
 extern int tpm_is_tpm2(struct tpm_chip *chip);
 extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
 			struct tpm_digest *digest);
-extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, const u8 *hash);
+extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
+			  struct tpm_digest *digests);
 extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen);
 extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max);
 extern int tpm_seal_trusted(struct tpm_chip *chip,
@@ -194,7 +195,7 @@ static inline int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx,
 }
 
 static inline int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
-				 const u8 *hash)
+				 struct tpm_digest *digests)
 {
 	return -ENODEV;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index cc12f3449a72..89d65cf8053d 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -153,6 +153,7 @@ int ima_measurements_show(struct seq_file *m, void *v);
 unsigned long ima_get_binary_runtime_size(void);
 int ima_init_template(void);
 void ima_init_template_list(void);
+int __init ima_init_digests(void);
 
 /*
  * used to protect h_table and sha_table
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index 6bb42a9c5e47..6c9295449751 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -123,8 +123,12 @@ int __init ima_init(void)
 	if (rc != 0)
 		return rc;
 
+	/* It can be called before ima_init_digests(), it does not use TPM. */
 	ima_load_kexec_buffer();
 
+	rc = ima_init_digests();
+	if (rc != 0)
+		return rc;
 	rc = ima_add_boot_aggregate();	/* boot aggregate must be first entry */
 	if (rc != 0)
 		return rc;
diff --git a/security/integrity/ima/ima_queue.c b/security/integrity/ima/ima_queue.c
index 0e41dc1df1d4..6b6d044e0440 100644
--- a/security/integrity/ima/ima_queue.c
+++ b/security/integrity/ima/ima_queue.c
@@ -27,6 +27,9 @@
 
 #define AUDIT_CAUSE_LEN_MAX 32
 
+/* pre-allocated array of tpm_digest structures to extend a PCR */
+static struct tpm_digest *digests;
+
 LIST_HEAD(ima_measurements);	/* list of all measurements */
 #ifdef CONFIG_IMA_KEXEC
 static unsigned long binary_runtime_size;
@@ -140,11 +143,15 @@ unsigned long ima_get_binary_runtime_size(void)
 static int ima_pcr_extend(const u8 *hash, int pcr)
 {
 	int result = 0;
+	int i;
 
 	if (!ima_tpm_chip)
 		return result;
 
-	result = tpm_pcr_extend(ima_tpm_chip, pcr, hash);
+	for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++)
+		memcpy(digests[i].digest, hash, TPM_DIGEST_SIZE);
+
+	result = tpm_pcr_extend(ima_tpm_chip, pcr, digests);
 	if (result != 0)
 		pr_err("Error Communicating to TPM chip, result: %d\n", result);
 	return result;
@@ -211,3 +218,21 @@ int ima_restore_measurement_entry(struct ima_template_entry *entry)
 	mutex_unlock(&ima_extend_list_mutex);
 	return result;
 }
+
+int __init ima_init_digests(void)
+{
+	int i;
+
+	if (!ima_tpm_chip)
+		return 0;
+
+	digests = kcalloc(ima_tpm_chip->nr_allocated_banks, sizeof(*digests),
+			  GFP_NOFS);
+	if (!digests)
+		return -ENOMEM;
+
+	for (i = 0; i < ima_tpm_chip->nr_allocated_banks; i++)
+		digests[i].alg_id = ima_tpm_chip->allocated_banks[i].alg_id;
+
+	return 0;
+}
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 5b852263eae1..bcc9c6ead7fd 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -35,6 +35,7 @@
 static const char hmac_alg[] = "hmac(sha1)";
 static const char hash_alg[] = "sha1";
 static struct tpm_chip *chip;
+static struct tpm_digest *digests;
 
 struct sdesc {
 	struct shash_desc shash;
@@ -380,15 +381,10 @@ EXPORT_SYMBOL_GPL(trusted_tpm_send);
  */
 static int pcrlock(const int pcrnum)
 {
-	unsigned char hash[SHA1_DIGEST_SIZE];
-	int ret;
-
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	ret = tpm_get_random(chip, hash, SHA1_DIGEST_SIZE);
-	if (ret != SHA1_DIGEST_SIZE)
-		return ret;
-	return tpm_pcr_extend(chip, pcrnum, hash) ? -EINVAL : 0;
+
+	return tpm_pcr_extend(chip, pcrnum, digests) ? -EINVAL : 0;
 }
 
 /*
@@ -1222,6 +1218,29 @@ hashalg_fail:
 	return ret;
 }
 
+static int __init init_digests(void)
+{
+	u8 digest[TPM_MAX_DIGEST_SIZE];
+	int ret;
+	int i;
+
+	ret = tpm_get_random(chip, digest, TPM_MAX_DIGEST_SIZE);
+	if (ret < 0)
+		return ret;
+	if (ret < TPM_MAX_DIGEST_SIZE)
+		return -EFAULT;
+
+	digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests),
+			  GFP_KERNEL);
+	if (!digests)
+		return -ENOMEM;
+
+	for (i = 0; i < chip->nr_allocated_banks; i++)
+		memcpy(digests[i].digest, digest, TPM_MAX_DIGEST_SIZE);
+
+	return 0;
+}
+
 static int __init init_trusted(void)
 {
 	int ret;
@@ -1229,15 +1248,20 @@ static int __init init_trusted(void)
 	chip = tpm_default_chip();
 	if (!chip)
 		return -ENOENT;
-	ret = trusted_shash_alloc();
+	ret = init_digests();
 	if (ret < 0)
 		goto err_put;
+	ret = trusted_shash_alloc();
+	if (ret < 0)
+		goto err_free;
 	ret = register_key_type(&key_type_trusted);
 	if (ret < 0)
 		goto err_release;
 	return 0;
 err_release:
 	trusted_shash_release();
+err_free:
+	kfree(digests);
 err_put:
 	put_device(&chip->dev);
 	return ret;
@@ -1246,6 +1270,7 @@ err_put:
 static void __exit cleanup_trusted(void)
 {
 	put_device(&chip->dev);
+	kfree(digests);
 	trusted_shash_release();
 	unregister_key_type(&key_type_trusted);
 }
-- 
cgit v1.2.3


From ede033e1e863c36729de25b57145fff287415830 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Date: Thu, 7 Feb 2019 17:28:55 +0100
Subject: dt-bindings: gpio: document the new pull-up/pull-down flags

This commit extends the flags that can be used in GPIO specifiers to
indicate if a pull-up resistor or pull-down resistor should be
enabled.

While some pinctrl DT bindings already offer the capability of
configuring pull-up/pull-down resistors at the pin level, a number of
simple GPIO controllers don't have any pinmuxing capability, and
therefore do not rely on the pinctrl DT bindings.

Such simple GPIO controllers however sometimes allow to configure
pull-up and pull-down resistors on a per-pin basis, and whether such
resistors should be enabled or not is a highly board-specific HW
characteristic.

By using two additional bits of the GPIO flag specifier, we can easily
allow the Device Tree to describe which GPIOs should have their
pull-up or pull-down resistors enabled. Even though the two options
are mutually exclusive, we still need two bits to encode at least
three states: no pull-up/pull-down, pull-up, pull-down.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/devicetree/bindings/gpio/gpio.txt | 12 ++++++++++++
 include/dt-bindings/gpio/gpio.h                 |  6 ++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
index f0ba154b5723..a8895d339bfe 100644
--- a/Documentation/devicetree/bindings/gpio/gpio.txt
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -67,6 +67,18 @@ Optional standard bitfield specifiers for the last cell:
            https://en.wikipedia.org/wiki/Open_collector
 - Bit 3: 0 means the output should be maintained during sleep/low-power mode
          1 means the output state can be lost during sleep/low-power mode
+- Bit 4: 0 means no pull-up resistor should be enabled
+         1 means a pull-up resistor should be enabled
+         This setting only applies to hardware with a simple on/off
+         control for pull-up configuration. If the hardware has more
+         elaborate pull-up configuration, it should be represented
+         using a pin control binding.
+- Bit 5: 0 means no pull-down resistor should be enabled
+         1 means a pull-down resistor should be enabled
+         This setting only applies to hardware with a simple on/off
+         control for pull-down configuration. If the hardware has more
+         elaborate pull-down configuration, it should be represented
+         using a pin control binding.
 
 1.1) GPIO specifier best practices
 ----------------------------------
diff --git a/include/dt-bindings/gpio/gpio.h b/include/dt-bindings/gpio/gpio.h
index 2cc10ae4bbb7..c029467e828b 100644
--- a/include/dt-bindings/gpio/gpio.h
+++ b/include/dt-bindings/gpio/gpio.h
@@ -33,4 +33,10 @@
 #define GPIO_PERSISTENT 0
 #define GPIO_TRANSITORY 8
 
+/* Bit 4 express pull up */
+#define GPIO_PULL_UP 16
+
+/* Bit 5 express pull down */
+#define GPIO_PULL_DOWN 32
+
 #endif
-- 
cgit v1.2.3


From 4c06c4e6cf63d7f3d5dfe62593a073253d750a59 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 12 Feb 2019 13:08:10 +0100
Subject: driver core: Fix possible supplier PM-usage counter imbalance

If a stateless device link to a certain supplier with
DL_FLAG_PM_RUNTIME set in the flags is added and then removed by the
consumer driver's probe callback, the supplier's PM-runtime usage
counter will be nonzero after that which effectively causes the
supplier to remain "always on" going forward.

Namely, device_link_add() called to add the link invokes
device_link_rpm_prepare() which notices that the consumer driver is
probing, so it increments the supplier's PM-runtime usage counter
with the assumption that the link will stay around until
pm_runtime_put_suppliers() is called by driver_probe_device(),
but if the link goes away before that point, the supplier's
PM-runtime usage counter will remain nonzero.

To prevent that from happening, first rework pm_runtime_get_suppliers()
and pm_runtime_put_suppliers() to use the rpm_active refounts of device
links and make the latter only drop rpm_active and the supplier's
PM-runtime usage counter for each link by one, unless rpm_active is
one already for it.  Next, modify device_link_add() to bump up the
new link's rpm_active refcount and the suppliers PM-runtime usage
counter by two, to prevent pm_runtime_put_suppliers(), if it is
called subsequently, from suspending the supplier prematurely (in
case its PM-runtime usage counter goes down to 0 in there).

Due to the way rpm_put_suppliers() works, this change does not
affect runtime suspend of the consumer ends of new device links (or,
generally, device links for which DL_FLAG_PM_RUNTIME has just been
set).

Fixes: e2f3cd831a28 ("driver core: Fix handling of runtime PM flags in device_link_add()")
Reported-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c          | 21 ++++-----------------
 drivers/base/power/runtime.c | 27 +++++++++++++++++++++++++--
 include/linux/pm_runtime.h   |  4 ++++
 3 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index abfce4f613f8..787190238753 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -165,19 +165,6 @@ void device_pm_move_to_tail(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
-static void device_link_rpm_prepare(struct device *consumer,
-				    struct device *supplier)
-{
-	pm_runtime_new_link(consumer);
-	/*
-	 * If the link is being added by the consumer driver at probe time,
-	 * balance the decrementation of the supplier's runtime PM usage counter
-	 * after consumer probe in driver_probe_device().
-	 */
-	if (consumer->links.status == DL_DEV_PROBING)
-		pm_runtime_get_noresume(supplier);
-}
-
 /**
  * device_link_add - Create a link between two devices.
  * @consumer: Consumer end of the link.
@@ -286,11 +273,11 @@ struct device_link *device_link_add(struct device *consumer,
 
 		if (flags & DL_FLAG_PM_RUNTIME) {
 			if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
-				device_link_rpm_prepare(consumer, supplier);
+				pm_runtime_new_link(consumer);
 				link->flags |= DL_FLAG_PM_RUNTIME;
 			}
 			if (flags & DL_FLAG_RPM_ACTIVE)
-				refcount_inc(&link->rpm_active);
+				pm_runtime_active_link(link, supplier);
 		}
 
 		if (flags & DL_FLAG_STATELESS) {
@@ -323,9 +310,9 @@ struct device_link *device_link_add(struct device *consumer,
 
 	if (flags & DL_FLAG_PM_RUNTIME) {
 		if (flags & DL_FLAG_RPM_ACTIVE)
-			refcount_inc(&link->rpm_active);
+			pm_runtime_active_link(link, supplier);
 
-		device_link_rpm_prepare(consumer, supplier);
+		pm_runtime_new_link(consumer);
 	}
 
 	get_device(supplier);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index af23eb327f57..6b8aa6bed064 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1625,8 +1625,10 @@ void pm_runtime_get_suppliers(struct device *dev)
 	idx = device_links_read_lock();
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->flags & DL_FLAG_PM_RUNTIME)
+		if (link->flags & DL_FLAG_PM_RUNTIME) {
+			refcount_inc(&link->rpm_active);
 			pm_runtime_get_sync(link->supplier);
+		}
 
 	device_links_read_unlock(idx);
 }
@@ -1643,7 +1645,8 @@ void pm_runtime_put_suppliers(struct device *dev)
 	idx = device_links_read_lock();
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->flags & DL_FLAG_PM_RUNTIME)
+		if (link->flags & DL_FLAG_PM_RUNTIME &&
+		    refcount_dec_not_one(&link->rpm_active))
 			pm_runtime_put(link->supplier);
 
 	device_links_read_unlock(idx);
@@ -1656,6 +1659,26 @@ void pm_runtime_new_link(struct device *dev)
 	spin_unlock_irq(&dev->power.lock);
 }
 
+/**
+ * pm_runtime_active_link - Set up new device link as active for PM-runtime.
+ * @link: Device link to be set up as active.
+ * @supplier: Supplier end of the link.
+ *
+ * Add 2 to the rpm_active refcount of @link and increment the PM-runtime
+ * usage counter of @supplier once more in case the link is being added while
+ * the consumer driver is probing and pm_runtime_put_suppliers() will be called
+ * subsequently.
+ *
+ * Note that this doesn't prevent rpm_put_suppliers() from decreasing the link's
+ * rpm_active refcount down to one, so runtime suspend of the consumer end of
+ * @link is not affected.
+ */
+void pm_runtime_active_link(struct device_link *link, struct device *supplier)
+{
+	refcount_add(2, &link->rpm_active);
+	pm_runtime_get_noresume(supplier);
+}
+
 void pm_runtime_drop_link(struct device *dev)
 {
 	spin_lock_irq(&dev->power.lock);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index fed5be706bc9..a27bbb5937b8 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -59,6 +59,8 @@ extern void pm_runtime_clean_up_links(struct device *dev);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
+extern void pm_runtime_active_link(struct device_link *link,
+				   struct device *supplier);
 extern void pm_runtime_drop_link(struct device *dev);
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
@@ -176,6 +178,8 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {}
 static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
+static inline void pm_runtime_active_link(struct device_link *link,
+					  struct device *supplier) {}
 static inline void pm_runtime_drop_link(struct device *dev) {}
 
 #endif /* !CONFIG_PM */
-- 
cgit v1.2.3


From d449991c4d1d0663b42db7648510a9911de21298 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Date: Thu, 7 Feb 2019 17:28:58 +0100
Subject: gpio: add core support for pull-up/pull-down configuration

This commit adds support for configuring the pull-up and pull-down
resistors available in some GPIO controllers. While configuring
pull-up/pull-down is already possible through the pinctrl subsystem,
some GPIO controllers, especially simple ones such as GPIO expanders
on I2C, don't have any pinmuxing capability and therefore do not use
the pinctrl subsystem.

This commit implements the GPIO_PULL_UP and GPIO_PULL_DOWN flags,
which can be used from the Device Tree, to enable a pull-up or
pull-down resistor on a given GPIO.

The flag is simply propagated all the way to the core GPIO subsystem,
where it is used to call the gpio_chip ->set_config callback with the
appropriate existing PIN_CONFIG_BIAS_* values.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c    |  5 +++++
 drivers/gpio/gpiolib.c       | 18 ++++++++++++++++++
 drivers/gpio/gpiolib.h       |  2 ++
 include/linux/gpio/machine.h |  2 ++
 include/linux/of_gpio.h      |  2 ++
 5 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a6e1891217e2..9a8b78477f79 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -345,6 +345,11 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 	if (of_flags & OF_GPIO_TRANSITORY)
 		*flags |= GPIO_TRANSITORY;
 
+	if (of_flags & OF_GPIO_PULL_UP)
+		*flags |= GPIO_PULL_UP;
+	if (of_flags & OF_GPIO_PULL_DOWN)
+		*flags |= GPIO_PULL_DOWN;
+
 	return desc;
 }
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 1f239aac43df..22d8b37f5319 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2573,6 +2573,13 @@ int gpiod_direction_input(struct gpio_desc *desc)
 	if (status == 0)
 		clear_bit(FLAG_IS_OUT, &desc->flags);
 
+	if (test_bit(FLAG_PULL_UP, &desc->flags))
+		gpio_set_config(chip, gpio_chip_hwgpio(desc),
+				PIN_CONFIG_BIAS_PULL_UP);
+	else if (test_bit(FLAG_PULL_DOWN, &desc->flags))
+		gpio_set_config(chip, gpio_chip_hwgpio(desc),
+				PIN_CONFIG_BIAS_PULL_DOWN);
+
 	trace_gpio_direction(desc_to_gpio(desc), 1, status);
 
 	return status;
@@ -4050,6 +4057,17 @@ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id,
 	if (lflags & GPIO_OPEN_SOURCE)
 		set_bit(FLAG_OPEN_SOURCE, &desc->flags);
 
+	if ((lflags & GPIO_PULL_UP) && (lflags & GPIO_PULL_DOWN)) {
+		gpiod_err(desc,
+			  "both pull-up and pull-down enabled, invalid configuration\n");
+		return -EINVAL;
+	}
+
+	if (lflags & GPIO_PULL_UP)
+		set_bit(FLAG_PULL_UP, &desc->flags);
+	else if (lflags & GPIO_PULL_DOWN)
+		set_bit(FLAG_PULL_DOWN, &desc->flags);
+
 	status = gpiod_set_transitory(desc, (lflags & GPIO_TRANSITORY));
 	if (status < 0)
 		return status;
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index bc57f0dc5953..078ab17b96bf 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -219,6 +219,8 @@ struct gpio_desc {
 #define FLAG_IRQ_IS_ENABLED 10	/* GPIO is connected to an enabled IRQ */
 #define FLAG_IS_HOGGED	11	/* GPIO is hogged */
 #define FLAG_TRANSITORY 12	/* GPIO may lose value in sleep or reset */
+#define FLAG_PULL_UP    13	/* GPIO has pull up enabled */
+#define FLAG_PULL_DOWN  14	/* GPIO has pull down enabled */
 
 	/* Connection label */
 	const char		*label;
diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h
index daa44eac9241..69673be10213 100644
--- a/include/linux/gpio/machine.h
+++ b/include/linux/gpio/machine.h
@@ -12,6 +12,8 @@ enum gpio_lookup_flags {
 	GPIO_OPEN_SOURCE = (1 << 2),
 	GPIO_PERSISTENT = (0 << 3),
 	GPIO_TRANSITORY = (1 << 3),
+	GPIO_PULL_UP = (1 << 4),
+	GPIO_PULL_DOWN = (1 << 5),
 };
 
 /**
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index 163b79ecd01a..f9737dea9d1f 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -28,6 +28,8 @@ enum of_gpio_flags {
 	OF_GPIO_SINGLE_ENDED = 0x2,
 	OF_GPIO_OPEN_DRAIN = 0x4,
 	OF_GPIO_TRANSITORY = 0x8,
+	OF_GPIO_PULL_UP = 0x10,
+	OF_GPIO_PULL_DOWN = 0x20,
 };
 
 #ifdef CONFIG_OF_GPIO
-- 
cgit v1.2.3


From b5c231d8c8037f63d34199ea1667bbe1cd9f940f Mon Sep 17 00:00:00 2001
From: Brian Masney <masneyb@onstation.org>
Date: Thu, 7 Feb 2019 21:16:22 -0500
Subject: genirq: introduce irq_domain_translate_twocell

Add a new function irq_domain_translate_twocell() that is to be used as
the translate function in struct irq_domain_ops for the v2 IRQ API.

This patch also changes irq_domain_xlate_twocell() from the v1 IRQ API
to call irq_domain_translate_twocell() in the v2 IRQ API. This required
changes to of_phandle_args_to_fwspec()'s arguments so that it can be
called from multiple places.

Cc: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Brian Masney <masneyb@onstation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/irqdomain.h |  5 +++++
 kernel/irq/irqdomain.c    | 45 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 35965f41d7be..fcefe0c7263f 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -419,6 +419,11 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type);
 
+int irq_domain_translate_twocell(struct irq_domain *d,
+				 struct irq_fwspec *fwspec,
+				 unsigned long *out_hwirq,
+				 unsigned int *out_type);
+
 /* IPI functions */
 int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest);
 int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8b0be4bd6565..56a30d542b8e 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -729,16 +729,17 @@ static int irq_domain_translate(struct irq_domain *d,
 	return 0;
 }
 
-static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
+static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+				      unsigned int count,
 				      struct irq_fwspec *fwspec)
 {
 	int i;
 
-	fwspec->fwnode = irq_data->np ? &irq_data->np->fwnode : NULL;
-	fwspec->param_count = irq_data->args_count;
+	fwspec->fwnode = np ? &np->fwnode : NULL;
+	fwspec->param_count = count;
 
-	for (i = 0; i < irq_data->args_count; i++)
-		fwspec->param[i] = irq_data->args[i];
+	for (i = 0; i < count; i++)
+		fwspec->param[i] = args[i];
 }
 
 unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
@@ -836,7 +837,9 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 {
 	struct irq_fwspec fwspec;
 
-	of_phandle_args_to_fwspec(irq_data, &fwspec);
+	of_phandle_args_to_fwspec(irq_data->np, irq_data->args,
+				  irq_data->args_count, &fwspec);
+
 	return irq_create_fwspec_mapping(&fwspec);
 }
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);
@@ -928,11 +931,10 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type)
 {
-	if (WARN_ON(intsize < 2))
-		return -EINVAL;
-	*out_hwirq = intspec[0];
-	*out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
-	return 0;
+	struct irq_fwspec fwspec;
+
+	of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec);
+	return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type);
 }
 EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
 
@@ -968,6 +970,27 @@ const struct irq_domain_ops irq_domain_simple_ops = {
 };
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
 
+/**
+ * irq_domain_translate_twocell() - Generic translate for direct two cell
+ * bindings
+ *
+ * Device Tree IRQ specifier translation function which works with two cell
+ * bindings where the cell values map directly to the hwirq number
+ * and linux irq flags.
+ */
+int irq_domain_translate_twocell(struct irq_domain *d,
+				 struct irq_fwspec *fwspec,
+				 unsigned long *out_hwirq,
+				 unsigned int *out_type)
+{
+	if (WARN_ON(fwspec->param_count < 2))
+		return -EINVAL;
+	*out_hwirq = fwspec->param[0];
+	*out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_domain_translate_twocell);
+
 int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
 			   int node, const struct irq_affinity_desc *affinity)
 {
-- 
cgit v1.2.3


From 5aa5bd563ce041d931c0dc1fc436dd18c27c60a7 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 7 Feb 2019 21:16:23 -0500
Subject: genirq: introduce irq_chip_mask_ack_parent()

The hierarchical irqchip never before ran into a situation
where the parent is not "simple", i.e. does not implement
.irq_ack() and .irq_mask() like most, but the qcom-pm8xxx.c
happens to implement only .irq_mask_ack().

Since we want to make ssbi-gpio a hierarchical child of this
irqchip, it must *also* only implement .irq_mask_ack()
and call down to the parent, and for this we of course
need irq_chip_mask_ack_parent().

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Brian Masney <masneyb@onstation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/irq.h |  1 +
 kernel/irq/chip.c   | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index def2b2aac8b1..9a1a67d2e07d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -605,6 +605,7 @@ extern void irq_chip_disable_parent(struct irq_data *data);
 extern void irq_chip_ack_parent(struct irq_data *data);
 extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
 extern void irq_chip_mask_parent(struct irq_data *data);
+extern void irq_chip_mask_ack_parent(struct irq_data *data);
 extern void irq_chip_unmask_parent(struct irq_data *data);
 extern void irq_chip_eoi_parent(struct irq_data *data);
 extern int irq_chip_set_affinity_parent(struct irq_data *data,
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 34e969069488..982b75e127c5 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1277,6 +1277,17 @@ void irq_chip_mask_parent(struct irq_data *data)
 }
 EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
 
+/**
+ * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ */
+void irq_chip_mask_ack_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_mask_ack(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent);
+
 /**
  * irq_chip_unmask_parent - Unmask the parent interrupt
  * @data:	Pointer to interrupt specific data
-- 
cgit v1.2.3


From be3d960b0aeb97438a044d8d3ce75393d198c020 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Tue, 12 Feb 2019 17:28:58 +0100
Subject: dt-bindings: clk: add G12A AO Clock and Reset Bindings

Add bindings for the Amlogic G12A AO Clock and Reset controllers.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lkml.kernel.org/r/20190212162859.20743-2-narmstrong@baylibre.com
---
 .../bindings/clock/amlogic,gxbb-aoclkc.txt         |  1 +
 include/dt-bindings/clock/g12a-aoclkc.h            | 34 ++++++++++++++++++++++
 include/dt-bindings/reset/g12a-aoclkc.h            | 18 ++++++++++++
 3 files changed, 53 insertions(+)
 create mode 100644 include/dt-bindings/clock/g12a-aoclkc.h
 create mode 100644 include/dt-bindings/reset/g12a-aoclkc.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
index 79511d7bb321..c41f0be5d438 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
@@ -10,6 +10,7 @@ Required Properties:
 	- GXL (S905X, S905D) : "amlogic,meson-gxl-aoclkc"
 	- GXM (S912) : "amlogic,meson-gxm-aoclkc"
 	- AXG (A113D, A113X) : "amlogic,meson-axg-aoclkc"
+	- G12A (S905X2, S905D2, S905Y2) : "amlogic,meson-g12a-aoclkc"
 	followed by the common "amlogic,meson-gx-aoclkc"
 - clocks: list of clock phandle, one for each entry clock-names.
 - clock-names: should contain the following:
diff --git a/include/dt-bindings/clock/g12a-aoclkc.h b/include/dt-bindings/clock/g12a-aoclkc.h
new file mode 100644
index 000000000000..8db01ffbeb06
--- /dev/null
+++ b/include/dt-bindings/clock/g12a-aoclkc.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2016 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * Copyright (c) 2018 Amlogic, inc.
+ * Author: Qiufang Dai <qiufang.dai@amlogic.com>
+ */
+
+#ifndef DT_BINDINGS_CLOCK_AMLOGIC_MESON_G12A_AOCLK
+#define DT_BINDINGS_CLOCK_AMLOGIC_MESON_G12A_AOCLK
+
+#define CLKID_AO_AHB		0
+#define CLKID_AO_IR_IN		1
+#define CLKID_AO_I2C_M0		2
+#define CLKID_AO_I2C_S0		3
+#define CLKID_AO_UART		4
+#define CLKID_AO_PROD_I2C	5
+#define CLKID_AO_UART2		6
+#define CLKID_AO_IR_OUT		7
+#define CLKID_AO_SAR_ADC	8
+#define CLKID_AO_MAILBOX	9
+#define CLKID_AO_M3		10
+#define CLKID_AO_AHB_SRAM	11
+#define CLKID_AO_RTI		12
+#define CLKID_AO_M4_FCLK	13
+#define CLKID_AO_M4_HCLK	14
+#define CLKID_AO_CLK81		15
+#define CLKID_AO_SAR_ADC_CLK	18
+#define CLKID_AO_32K		23
+#define CLKID_AO_CEC		27
+#define CLKID_AO_CTS_RTC_OSCIN	28
+
+#endif
diff --git a/include/dt-bindings/reset/g12a-aoclkc.h b/include/dt-bindings/reset/g12a-aoclkc.h
new file mode 100644
index 000000000000..bd2e2337135c
--- /dev/null
+++ b/include/dt-bindings/reset/g12a-aoclkc.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright (c) 2016 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ */
+
+#ifndef DT_BINDINGS_RESET_AMLOGIC_MESON_G12A_AOCLK
+#define DT_BINDINGS_RESET_AMLOGIC_MESON_G12A_AOCLK
+
+#define RESET_AO_IR_IN		0
+#define RESET_AO_UART		1
+#define RESET_AO_I2C_M		2
+#define RESET_AO_I2C_S		3
+#define RESET_AO_SAR_ADC	4
+#define RESET_AO_UART2		5
+#define RESET_AO_IR_OUT		6
+
+#endif
-- 
cgit v1.2.3


From 40d08f774c17add49c1db0083390b6a672030b7d Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sun, 10 Feb 2019 23:26:02 +0100
Subject: dt-bindings: clock: meson8b: add APB clock definition

Commit 8e1dd17c8b0e3f ("dt-bindings: clock: meson8b: export the CPU
post dividers") added a clock with the name "ABP". The actual name of
this clock is "APB".

Add a new #define with the same ID but the correct name. The old #define
will be dropped in a follow-up patch because each commit in the tree
must compile on it's own (the old #define is still used by the clock
controller driver).

Fixes: 8e1dd17c8b0e3f ("dt-bindings: clock: meson8b: export the CPU post dividers")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Jerome Brunet <jbrunet@baylibre.com>
Link: https://lkml.kernel.org/r/20190210222603.6404-2-martin.blumenstingl@googlemail.com
---
 include/dt-bindings/clock/meson8b-clkc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/meson8b-clkc.h b/include/dt-bindings/clock/meson8b-clkc.h
index 5fe2923382d0..8067077a62ca 100644
--- a/include/dt-bindings/clock/meson8b-clkc.h
+++ b/include/dt-bindings/clock/meson8b-clkc.h
@@ -104,6 +104,7 @@
 #define CLKID_MPLL2		95
 #define CLKID_NAND_CLK		112
 #define CLKID_ABP		124
+#define CLKID_APB		124
 #define CLKID_PERIPH		126
 #define CLKID_AXI		128
 #define CLKID_L2_DRAM		130
-- 
cgit v1.2.3


From 7fc38225363dd8f19e667ad7c77b63bc4a5c065d Mon Sep 17 00:00:00 2001
From: Alin Nastac <alin.nastac@gmail.com>
Date: Wed, 13 Feb 2019 09:14:53 +0100
Subject: netfilter: reject: skip csum verification for protocols that don't
 support it

Some protocols have other means to verify the payload integrity
(AH, ESP, SCTP) while others are incompatible with nf_ip(6)_checksum
implementation because checksum is either optional or might be
partial (UDPLITE, DCCP, GRE). Because nf_ip(6)_checksum was used
to validate the packets, ip(6)tables REJECT rules were not capable
to generate ICMP(v6) errors for the protocols mentioned above.

This commit also fixes the incorrect pseudo-header protocol used
for IPv4 packets that carry other transport protocols than TCP or
UDP (pseudo-header used protocol 0 iso the proper value).

Signed-off-by: Alin Nastac <alin.nastac@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv4/nf_reject.h   |  1 +
 include/net/netfilter/ipv6/nf_reject.h   |  1 +
 include/net/netfilter/nf_reject.h        | 27 +++++++++++++++++++++++++++
 net/bridge/netfilter/nft_reject_bridge.c | 10 +++++-----
 net/ipv4/netfilter/nf_reject_ipv4.c      |  9 ++-------
 net/ipv6/netfilter/nf_reject_ipv6.c      |  3 +++
 6 files changed, 39 insertions(+), 12 deletions(-)
 create mode 100644 include/net/netfilter/nf_reject.h

(limited to 'include')

diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 2eb43fcefc50..40e0e0623f46 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -5,6 +5,7 @@
 #include <linux/skbuff.h>
 #include <net/ip.h>
 #include <net/icmp.h>
+#include <net/netfilter/nf_reject.h>
 
 void nf_send_unreach(struct sk_buff *skb_in, int code, int hook);
 void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook);
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 3a5a9a36a0b2..4a3ef9ebdf6f 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -3,6 +3,7 @@
 #define _IPV6_NF_REJECT_H
 
 #include <linux/icmpv6.h>
+#include <net/netfilter/nf_reject.h>
 
 void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code,
 		      unsigned int hooknum);
diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h
new file mode 100644
index 000000000000..221f877f29d1
--- /dev/null
+++ b/include/net/netfilter/nf_reject.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NF_REJECT_H
+#define _NF_REJECT_H
+
+static inline bool nf_reject_verify_csum(__u8 proto)
+{
+	/* Skip protocols that don't use 16-bit one's complement checksum
+	 * of the entire payload.
+	 */
+	switch (proto) {
+		/* Protocols with other integrity checks. */
+		case IPPROTO_AH:
+		case IPPROTO_ESP:
+		case IPPROTO_SCTP:
+
+		/* Protocols with partial checksums. */
+		case IPPROTO_UDPLITE:
+		case IPPROTO_DCCP:
+
+		/* Protocols with optional checksums. */
+		case IPPROTO_GRE:
+			return false;
+	}
+	return true;
+}
+
+#endif /* _NF_REJECT_H */
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 419e8edf23ba..1b1856744c80 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -125,13 +125,10 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
 	if (pskb_trim_rcsum(oldskb, ntohs(ip_hdr(oldskb)->tot_len)))
 		return;
 
-	if (ip_hdr(oldskb)->protocol == IPPROTO_TCP ||
-	    ip_hdr(oldskb)->protocol == IPPROTO_UDP)
-		proto = ip_hdr(oldskb)->protocol;
-	else
-		proto = 0;
+	proto = ip_hdr(oldskb)->protocol;
 
 	if (!skb_csum_unnecessary(oldskb) &&
+	    nf_reject_verify_csum(proto) &&
 	    nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), proto))
 		return;
 
@@ -234,6 +231,9 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
 
+	if (!nf_reject_verify_csum(proto))
+		return true;
+
 	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
 }
 
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index aa8304c618b8..7dc3c324b911 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -173,21 +173,16 @@ EXPORT_SYMBOL_GPL(nf_send_reset);
 void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 {
 	struct iphdr *iph = ip_hdr(skb_in);
-	u8 proto;
+	u8 proto = iph->protocol;
 
 	if (iph->frag_off & htons(IP_OFFSET))
 		return;
 
-	if (skb_csum_unnecessary(skb_in)) {
+	if (skb_csum_unnecessary(skb_in) || !nf_reject_verify_csum(proto)) {
 		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 		return;
 	}
 
-	if (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP)
-		proto = iph->protocol;
-	else
-		proto = 0;
-
 	if (nf_ip_checksum(skb_in, hook, ip_hdrlen(skb_in), proto) == 0)
 		icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
 }
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index b9c8a763c863..02e9228641e0 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
 	if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
 		return false;
 
+	if (!nf_reject_verify_csum(proto))
+		return true;
+
 	return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
 }
 
-- 
cgit v1.2.3


From d9a9ea94f748f47b1d75c6c5e33edcf74476c445 Mon Sep 17 00:00:00 2001
From: Chad Austin <chadaustin@fb.com>
Date: Mon, 7 Jan 2019 16:53:17 -0800
Subject: fuse: support clients that don't implement 'opendir'

Allow filesystems to return ENOSYS from opendir, preventing the kernel from
sending opendir and releasedir messages in the future. This avoids
userspace transitions when filesystems don't need to keep track of state
per directory handle.

A new capability flag, FUSE_NO_OPENDIR_SUPPORT, parallels
FUSE_NO_OPEN_SUPPORT, indicating the new semantics for returning ENOSYS
from opendir.

Signed-off-by: Chad Austin <chadaustin@fb.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/file.c            | 11 +++++++----
 fs/fuse/fuse_i.h          |  3 +++
 fs/fuse/inode.c           |  3 ++-
 include/uapi/linux/fuse.h |  7 ++++++-
 4 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8ee0446a8322..cc6ffd23b80f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -90,7 +90,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
 	if (refcount_dec_and_test(&ff->count)) {
 		struct fuse_req *req = ff->reserved_req;
 
-		if (ff->fc->no_open && !isdir) {
+		if (isdir ? ff->fc->no_opendir : ff->fc->no_open) {
 			/*
 			 * Drop the release request when client does not
 			 * implement 'open'
@@ -125,7 +125,7 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 
 	ff->fh = 0;
 	ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */
-	if (!fc->no_open || isdir) {
+	if (isdir ? !fc->no_opendir : !fc->no_open) {
 		struct fuse_open_out outarg;
 		int err;
 
@@ -134,11 +134,14 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 			ff->fh = outarg.fh;
 			ff->open_flags = outarg.open_flags;
 
-		} else if (err != -ENOSYS || isdir) {
+		} else if (err != -ENOSYS) {
 			fuse_file_free(ff);
 			return err;
 		} else {
-			fc->no_open = 1;
+			if (isdir)
+				fc->no_opendir = 1;
+			else
+				fc->no_open = 1;
 		}
 	}
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 033e30af519f..0920c0c032a0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -630,6 +630,9 @@ struct fuse_conn {
 	/** Is open/release not implemented by fs? */
 	unsigned no_open:1;
 
+	/** Is opendir/releasedir not implemented by fs? */
+	unsigned no_opendir:1;
+
 	/** Is fsync not implemented by fs? */
 	unsigned no_fsync:1;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2bbb7c59d6da..1b3f3b67d9f0 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -972,7 +972,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
-		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS;
+		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
+		FUSE_NO_OPENDIR_SUPPORT;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index b4967d48bfda..2ac598614a8f 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -122,6 +122,9 @@
  *  - add FOPEN_CACHE_DIR
  *  - add FUSE_MAX_PAGES, add max_pages to init_out
  *  - add FUSE_CACHE_SYMLINKS
+ *
+ *  7.29
+ *  - add FUSE_NO_OPENDIR_SUPPORT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -157,7 +160,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 28
+#define FUSE_KERNEL_MINOR_VERSION 29
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -259,6 +262,7 @@ struct fuse_file_lock {
  * FUSE_ABORT_ERROR: reading the device after abort returns ECONNABORTED
  * FUSE_MAX_PAGES: init_out.max_pages contains the max number of req pages
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
+ * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -284,6 +288,7 @@ struct fuse_file_lock {
 #define FUSE_ABORT_ERROR	(1 << 21)
 #define FUSE_MAX_PAGES		(1 << 22)
 #define FUSE_CACHE_SYMLINKS	(1 << 23)
+#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit v1.2.3


From 6f701383368d54e8d43fad2a0bac30f877899568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:00 -0500
Subject: SUNRPC: Display symbolic flag names in RPC trace events

Human-readable flags make it easier to observe RPC scheduling
decisions and other operational details.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/sunrpc.h | 56 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0d5d0d91f861..f88b0f52aa7e 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -77,6 +77,50 @@ TRACE_EVENT(rpc_request,
 		)
 );
 
+TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
+TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
+TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
+TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
+TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
+TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
+TRACE_DEFINE_ENUM(RPC_TASK_SENT);
+TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
+TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
+TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
+
+#define rpc_show_task_flags(flags)					\
+	__print_flags(flags, "|",					\
+		{ RPC_TASK_ASYNC, "ASYNC" },				\
+		{ RPC_TASK_SWAPPER, "SWAPPER" },			\
+		{ RPC_CALL_MAJORSEEN, "MAJORSEEN" },			\
+		{ RPC_TASK_ROOTCREDS, "ROOTCREDS" },			\
+		{ RPC_TASK_DYNAMIC, "DYNAMIC" },			\
+		{ RPC_TASK_KILLED, "KILLED" },				\
+		{ RPC_TASK_SOFT, "SOFT" },				\
+		{ RPC_TASK_SOFTCONN, "SOFTCONN" },			\
+		{ RPC_TASK_SENT, "SENT" },				\
+		{ RPC_TASK_TIMEOUT, "TIMEOUT" },			\
+		{ RPC_TASK_NOCONNECT, "NOCONNECT" },			\
+		{ RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" })
+
+TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
+TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
+TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
+TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+
+#define rpc_show_runstate(flags)					\
+	__print_flags(flags, "|",					\
+		{ (1UL << RPC_TASK_RUNNING), "RUNNING" },		\
+		{ (1UL << RPC_TASK_QUEUED), "QUEUED" },			\
+		{ (1UL << RPC_TASK_ACTIVE), "ACTIVE" },			\
+		{ (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" },		\
+		{ (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" },		\
+		{ (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+
 DECLARE_EVENT_CLASS(rpc_task_running,
 
 	TP_PROTO(const struct rpc_task *task, const void *action),
@@ -102,10 +146,10 @@ DECLARE_EVENT_CLASS(rpc_task_running,
 		__entry->flags = task->tk_flags;
 		),
 
-	TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf",
+	TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
 		__entry->task_id, __entry->client_id,
-		__entry->flags,
-		__entry->runstate,
+		rpc_show_task_flags(__entry->flags),
+		rpc_show_runstate(__entry->runstate),
 		__entry->status,
 		__entry->action
 		)
@@ -149,10 +193,10 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
 		__assign_str(q_name, rpc_qname(q));
 		),
 
-	TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+	TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
 		__entry->task_id, __entry->client_id,
-		__entry->flags,
-		__entry->runstate,
+		rpc_show_task_flags(__entry->flags),
+		rpc_show_runstate(__entry->runstate),
 		__entry->status,
 		__entry->timeout,
 		__get_str(q_name)
-- 
cgit v1.2.3


From ebb09b33c60c46fd4f7ffa0af9e693eebe765d1b Mon Sep 17 00:00:00 2001
From: Leonid Ravich <lravich@gmail.com>
Date: Tue, 12 Feb 2019 22:09:28 +0200
Subject: NTB: add new parameter to peer_db_addr() db_bit and db_data

NTB door bell usage depends on NTB hardware.

ex: intel NTB gen1 has one peer door bell register which can be controlled
by the bitmap writen to it, while Intel NTB gen3 has a registers
per door bell and the data trigering the each door bell is always 1.

therefore exposing only peer door bell address forcing the user
to be aware of such low level details

Signed-off-by: Leonid Ravich <Leonid.Ravich@emc.com>
Acked-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Acked-by: Allen Hubbe <allenbh@gmail.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
---
 drivers/ntb/hw/intel/ntb_hw_gen1.c     | 25 +++++++++++++++++++------
 drivers/ntb/hw/intel/ntb_hw_gen1.h     |  5 +++--
 drivers/ntb/hw/intel/ntb_hw_gen3.c     | 33 ++++++++++++++++++++++++++++++++-
 drivers/ntb/hw/mscc/ntb_hw_switchtec.c |  9 ++++++++-
 include/linux/ntb.h                    | 10 +++++++---
 5 files changed, 69 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c
index 2ad263f708da..bb57ec239029 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen1.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c
@@ -180,7 +180,7 @@ int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx)
 	return ndev->reg->mw_bar[idx];
 }
 
-static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
+void ndev_db_addr(struct intel_ntb_dev *ndev,
 			       phys_addr_t *db_addr, resource_size_t *db_size,
 			       phys_addr_t reg_addr, unsigned long reg)
 {
@@ -196,8 +196,6 @@ static inline int ndev_db_addr(struct intel_ntb_dev *ndev,
 		*db_size = ndev->reg->db_size;
 		dev_dbg(&ndev->ntb.pdev->dev, "Peer db size %llx\n", *db_size);
 	}
-
-	return 0;
 }
 
 u64 ndev_db_read(struct intel_ntb_dev *ndev,
@@ -1111,13 +1109,28 @@ int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
 				  ndev->self_reg->db_mask);
 }
 
-int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
-			   resource_size_t *db_size)
+static int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+			   resource_size_t *db_size, u64 *db_data, int db_bit)
 {
+	u64 db_bits;
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
 
-	return ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr,
+	if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+		return -EINVAL;
+
+	db_bits = BIT_ULL(db_bit);
+
+	if (unlikely(db_bits & ~ntb_ndev(ntb)->db_valid_mask))
+		return -EINVAL;
+
+	ndev_db_addr(ndev, db_addr, db_size, ndev->peer_addr,
 			    ndev->peer_reg->db_bell);
+
+	if (db_data)
+		*db_data = db_bits;
+
+
+	return 0;
 }
 
 static int intel_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.h b/drivers/ntb/hw/intel/ntb_hw_gen1.h
index ad8ec1444436..544cf5c06f4d 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen1.h
+++ b/drivers/ntb/hw/intel/ntb_hw_gen1.h
@@ -147,6 +147,9 @@ extern struct intel_b2b_addr xeon_b2b_dsd_addr;
 int ndev_init_isr(struct intel_ntb_dev *ndev, int msix_min, int msix_max,
 		int msix_shift, int total_shift);
 enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd);
+void ndev_db_addr(struct intel_ntb_dev *ndev,
+				phys_addr_t *db_addr, resource_size_t *db_size,
+				phys_addr_t reg_addr, unsigned long reg);
 u64 ndev_db_read(struct intel_ntb_dev *ndev, void __iomem *mmio);
 int ndev_db_write(struct intel_ntb_dev *ndev, u64 db_bits,
 				void __iomem *mmio);
@@ -166,8 +169,6 @@ int intel_ntb_db_vector_count(struct ntb_dev *ntb);
 u64 intel_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
 int intel_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
 int intel_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
-int intel_ntb_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
-		resource_size_t *db_size);
 int intel_ntb_spad_is_unsafe(struct ntb_dev *ntb);
 int intel_ntb_spad_count(struct ntb_dev *ntb);
 u32 intel_ntb_spad_read(struct ntb_dev *ntb, int idx);
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
index b3fa24778f94..f475b56a3f49 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen3.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -532,6 +532,37 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
 	return 0;
 }
 
+int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+				resource_size_t *db_size,
+				u64 *db_data, int db_bit)
+{
+	phys_addr_t db_addr_base;
+	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+		return -EINVAL;
+
+	if (unlikely(BIT_ULL(db_bit) & ~ntb_ndev(ntb)->db_valid_mask))
+		return -EINVAL;
+
+	ndev_db_addr(ndev, &db_addr_base, db_size, ndev->peer_addr,
+				ndev->peer_reg->db_bell);
+
+	if (db_addr) {
+		*db_addr = db_addr_base + (db_bit * 4);
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db addr %llx db bit %d\n",
+				*db_addr, db_bit);
+	}
+
+	if (db_data) {
+		*db_data = 1;
+		dev_dbg(&ndev->ntb.pdev->dev, "Peer db data %llx db bit %d\n",
+				*db_data, db_bit);
+	}
+
+	return 0;
+}
+
 static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
 {
 	struct intel_ntb_dev *ndev = ntb_ndev(ntb);
@@ -584,7 +615,7 @@ const struct ntb_dev_ops intel_ntb3_ops = {
 	.db_clear		= intel_ntb3_db_clear,
 	.db_set_mask		= intel_ntb_db_set_mask,
 	.db_clear_mask		= intel_ntb_db_clear_mask,
-	.peer_db_addr		= intel_ntb_peer_db_addr,
+	.peer_db_addr		= intel_ntb3_peer_db_addr,
 	.peer_db_set		= intel_ntb3_peer_db_set,
 	.spad_is_unsafe		= intel_ntb_spad_is_unsafe,
 	.spad_count		= intel_ntb_spad_count,
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index f6f00354047b..9ae944597708 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -710,11 +710,16 @@ static u64 switchtec_ntb_db_read_mask(struct ntb_dev *ntb)
 
 static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb,
 				      phys_addr_t *db_addr,
-				      resource_size_t *db_size)
+				      resource_size_t *db_size,
+				      u64 *db_data,
+				      int db_bit)
 {
 	struct switchtec_ntb *sndev = ntb_sndev(ntb);
 	unsigned long offset;
 
+	if (unlikely(db_bit >= BITS_PER_LONG_LONG))
+		return -EINVAL;
+
 	offset = (unsigned long)sndev->mmio_peer_dbmsg->odb -
 		(unsigned long)sndev->stdev->mmio;
 
@@ -724,6 +729,8 @@ static int switchtec_ntb_peer_db_addr(struct ntb_dev *ntb,
 		*db_addr = pci_resource_start(ntb->pdev, 0) + offset;
 	if (db_size)
 		*db_size = sizeof(u32);
+	if (db_data)
+		*db_data = BIT_ULL(db_bit) << sndev->db_peer_shift;
 
 	return 0;
 }
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index 181d16601dd9..56a92e3ae3ae 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -296,7 +296,8 @@ struct ntb_dev_ops {
 	int (*db_clear_mask)(struct ntb_dev *ntb, u64 db_bits);
 
 	int (*peer_db_addr)(struct ntb_dev *ntb,
-			    phys_addr_t *db_addr, resource_size_t *db_size);
+			    phys_addr_t *db_addr, resource_size_t *db_size,
+				u64 *db_data, int db_bit);
 	u64 (*peer_db_read)(struct ntb_dev *ntb);
 	int (*peer_db_set)(struct ntb_dev *ntb, u64 db_bits);
 	int (*peer_db_clear)(struct ntb_dev *ntb, u64 db_bits);
@@ -1078,6 +1079,8 @@ static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
  * @ntb:	NTB device context.
  * @db_addr:	OUT - The address of the peer doorbell register.
  * @db_size:	OUT - The number of bytes to write the peer doorbell register.
+ * @db_data:	OUT - The data of peer doorbell register
+ * @db_bit:		door bell bit number
  *
  * Return the address of the peer doorbell register.  This may be used, for
  * example, by drivers that offload memory copy operations to a dma engine.
@@ -1091,12 +1094,13 @@ static inline int ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
  */
 static inline int ntb_peer_db_addr(struct ntb_dev *ntb,
 				   phys_addr_t *db_addr,
-				   resource_size_t *db_size)
+				   resource_size_t *db_size,
+				   u64 *db_data, int db_bit)
 {
 	if (!ntb->ops->peer_db_addr)
 		return -EINVAL;
 
-	return ntb->ops->peer_db_addr(ntb, db_addr, db_size);
+	return ntb->ops->peer_db_addr(ntb, db_addr, db_size, db_data, db_bit);
 }
 
 /**
-- 
cgit v1.2.3


From 0ccc61b1c76e5163c6fea6cf83bd18e7ea244c5b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:05 -0500
Subject: SUNRPC: Add xdr_stream::rqst field

Having access to the controlling rpc_rqst means a trace point in the
XDR code can report:

 - the XID
 - the task ID and client ID
 - the p_name of RPC being processed

Subsequent patches will introduce such trace points.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_xdr.c                  |  5 +++--
 fs/nfs/flexfilelayout/flexfilelayout.c |  2 +-
 include/linux/sunrpc/xdr.h             |  8 ++++++--
 net/sunrpc/auth.c                      |  4 ++--
 net/sunrpc/auth_gss/auth_gss.c         |  4 ++--
 net/sunrpc/xdr.c                       | 12 +++++++++---
 net/sunrpc/xprtrdma/backchannel.c      |  2 +-
 net/sunrpc/xprtrdma/rpc_rdma.c         |  4 ++--
 8 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a87a56273407..bc7c1766a2be 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -943,10 +943,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 	};
 	unsigned int nops = 0;
 
-	xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+	xdr_init_decode(&xdr_in, &rqstp->rq_arg,
+			rqstp->rq_arg.head[0].iov_base, NULL);
 
 	p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
-	xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+	xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);
 
 	status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
 	if (status == htonl(NFS4ERR_RESOURCE))
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 63abe705f4ca..32701b6a9566 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2036,7 +2036,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr,
 
 	dprintk("%s: Begin\n", __func__);
 
-	xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
+	xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);
 
 	ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
 	ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 2ec128060239..787939d13643 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -217,6 +217,8 @@ struct xdr_stream {
 	struct kvec scratch;	/* Scratch buffer */
 	struct page **page_ptr;	/* pointer to the current page */
 	unsigned int nwords;	/* Remaining decode buffer length */
+
+	struct rpc_rqst *rqst;	/* For debugging */
 };
 
 /*
@@ -227,7 +229,8 @@ typedef void	(*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 typedef int	(*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		void *obj);
 
-extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
+			    __be32 *p, struct rpc_rqst *rqst);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
@@ -235,7 +238,8 @@ extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
-extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
+			    __be32 *p, struct rpc_rqst *rqst);
 extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 		struct page **pages, unsigned int len);
 extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f3023bbc0b7f..8dfab6119e6a 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -798,7 +798,7 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
 {
 	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp);
 	encode(rqstp, &xdr, obj);
 }
 
@@ -823,7 +823,7 @@ rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
 {
 	struct xdr_stream xdr;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp);
 	return decode(rqstp, &xdr, obj);
 }
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1531b0219344..a42672e81792 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1722,7 +1722,7 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
 {
 	struct xdr_stream xdr;
 
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp);
 	encode(rqstp, &xdr, obj);
 }
 
@@ -1998,7 +1998,7 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
 {
 	struct xdr_stream xdr;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp);
 	return decode(rqstp, &xdr, obj);
 }
 
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f302c6eb8779..345f08b634ee 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -483,6 +483,7 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer in which to encode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  *
  * Note: at the moment the RPC client only passes the length of our
  *	 scratch buffer in the xdr_buf's header kvec. Previously this
@@ -491,7 +492,8 @@ EXPORT_SYMBOL_GPL(xdr_stream_pos);
  *	 of the buffer length, and takes care of adjusting the kvec
  *	 length for us.
  */
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+		     struct rpc_rqst *rqst)
 {
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -513,6 +515,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 		buf->len += len;
 		iov->iov_len += len;
 	}
+	xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
@@ -819,8 +822,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
  * @xdr: pointer to xdr_stream struct
  * @buf: pointer to XDR buffer from which to decode data
  * @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
  */
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+		     struct rpc_rqst *rqst)
 {
 	xdr->buf = buf;
 	xdr->scratch.iov_base = NULL;
@@ -836,6 +841,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 		xdr->nwords -= p - xdr->p;
 		xdr->p = p;
 	}
+	xdr->rqst = rqst;
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
@@ -854,7 +860,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
 	buf->page_len =  len;
 	buf->buflen =  len;
 	buf->len = len;
-	xdr_init_decode(xdr, buf, NULL);
+	xdr_init_decode(xdr, buf, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
 
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 0de9b3e63770..98c1e43eb7b1 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
 
 	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
 	xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
-			req->rl_rdmabuf->rg_base);
+			req->rl_rdmabuf->rg_base, rqst);
 
 	p = xdr_reserve_space(&req->rl_stream, 28);
 	if (unlikely(!p))
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 7774aee7c013..6c1fb270f127 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -748,7 +748,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
 
 	rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
 	xdr_init_encode(xdr, &req->rl_hdrbuf,
-			req->rl_rdmabuf->rg_base);
+			req->rl_rdmabuf->rg_base, rqst);
 
 	/* Fixed header fields */
 	ret = -EMSGSIZE;
@@ -1329,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 
 	/* Fixed transport header fields */
 	xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
-			rep->rr_hdrbuf.head[0].iov_base);
+			rep->rr_hdrbuf.head[0].iov_base, NULL);
 	p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
 	if (unlikely(!p))
 		goto out_shortreply;
-- 
cgit v1.2.3


From 5582863f450ca44c472dbf4812a62f4e0e68b0c8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:10 -0500
Subject: SUNRPC: Add XDR overflow trace event

This can help field troubleshooting without needing the overhead of
a full network capture (ie, tcpdump).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/sunrpc.h | 67 +++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/xdr.c              | 24 +++++++++++-----
 2 files changed, 84 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f88b0f52aa7e..fbc41b8142d3 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -254,6 +254,73 @@ TRACE_EVENT(rpc_stats_latency,
 		__entry->backlog, __entry->rtt, __entry->execute)
 );
 
+TRACE_EVENT(rpc_xdr_overflow,
+	TP_PROTO(
+		const struct xdr_stream *xdr,
+		size_t requested
+	),
+
+	TP_ARGS(xdr, requested),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(int, version)
+		__field(size_t, requested)
+		__field(const void *, end)
+		__field(const void *, p)
+		__field(const void *, head_base)
+		__field(size_t, head_len)
+		__field(const void *, tail_base)
+		__field(size_t, tail_len)
+		__field(unsigned int, page_len)
+		__field(unsigned int, len)
+		__string(progname,
+			 xdr->rqst->rq_task->tk_client->cl_program->name)
+		__string(procedure,
+			 xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+	),
+
+	TP_fast_assign(
+		if (xdr->rqst) {
+			const struct rpc_task *task = xdr->rqst->rq_task;
+
+			__entry->task_id = task->tk_pid;
+			__entry->client_id = task->tk_client->cl_clid;
+			__assign_str(progname,
+				     task->tk_client->cl_program->name)
+			__entry->version = task->tk_client->cl_vers;
+			__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+		} else {
+			__entry->task_id = 0;
+			__entry->client_id = 0;
+			__assign_str(progname, "unknown")
+			__entry->version = 0;
+			__assign_str(procedure, "unknown")
+		}
+		__entry->requested = requested;
+		__entry->end = xdr->end;
+		__entry->p = xdr->p;
+		__entry->head_base = xdr->buf->head[0].iov_base,
+		__entry->head_len = xdr->buf->head[0].iov_len,
+		__entry->page_len = xdr->buf->page_len,
+		__entry->tail_base = xdr->buf->tail[0].iov_base,
+		__entry->tail_len = xdr->buf->tail[0].iov_len,
+		__entry->len = xdr->buf->len;
+	),
+
+	TP_printk(
+		"task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+		__entry->task_id, __entry->client_id,
+		__get_str(progname), __entry->version, __get_str(procedure),
+		__entry->requested, __entry->p, __entry->end,
+		__entry->head_base, __entry->head_len,
+		__entry->page_len,
+		__entry->tail_base, __entry->tail_len,
+		__entry->len
+	)
+);
+
 /*
  * First define the enums in the below macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 345f08b634ee..6d0b615a02ae 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -16,6 +16,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/bvec.h>
+#include <trace/events/sunrpc.h>
 
 /*
  * XDR functions for basic NFS types
@@ -554,9 +555,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
 	int frag1bytes, frag2bytes;
 
 	if (nbytes > PAGE_SIZE)
-		return NULL; /* Bigger buffers require special handling */
+		goto out_overflow; /* Bigger buffers require special handling */
 	if (xdr->buf->len + nbytes > xdr->buf->buflen)
-		return NULL; /* Sorry, we're totally out of space */
+		goto out_overflow; /* Sorry, we're totally out of space */
 	frag1bytes = (xdr->end - xdr->p) << 2;
 	frag2bytes = nbytes - frag1bytes;
 	if (xdr->iov)
@@ -585,6 +586,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
 	xdr->buf->page_len += frag2bytes;
 	xdr->buf->len += nbytes;
 	return p;
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 
 /**
@@ -902,20 +906,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
 	size_t cplen = (char *)xdr->end - (char *)xdr->p;
 
 	if (nbytes > xdr->scratch.iov_len)
-		return NULL;
+		goto out_overflow;
 	p = __xdr_inline_decode(xdr, cplen);
 	if (p == NULL)
 		return NULL;
 	memcpy(cpdest, p, cplen);
+	if (!xdr_set_next_buffer(xdr))
+		goto out_overflow;
 	cpdest += cplen;
 	nbytes -= cplen;
-	if (!xdr_set_next_buffer(xdr))
-		return NULL;
 	p = __xdr_inline_decode(xdr, nbytes);
 	if (p == NULL)
 		return NULL;
 	memcpy(cpdest, p, nbytes);
 	return xdr->scratch.iov_base;
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 
 /**
@@ -932,14 +939,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
 {
 	__be32 *p;
 
-	if (nbytes == 0)
+	if (unlikely(nbytes == 0))
 		return xdr->p;
 	if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
-		return NULL;
+		goto out_overflow;
 	p = __xdr_inline_decode(xdr, nbytes);
 	if (p != NULL)
 		return p;
 	return xdr_copy_to_scratch(xdr, nbytes);
+out_overflow:
+	trace_rpc_xdr_overflow(xdr, nbytes);
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(xdr_inline_decode);
 
-- 
cgit v1.2.3


From 7be9cea3600ba24f45733f823cb4ecb5a40f5db7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:16 -0500
Subject: SUNRPC: Add trace event that reports reply page vector alignment

We don't want READ payloads that are partially in the head iovec and
in the page buffer because this requires pull-up, which can be
expensive.

The NFS/RPC client tries hard to predict the size of the head iovec
so that the incoming READ data payload lands only in the page
vector, but it doesn't always get it right. To help diagnose such
problems, add a trace point in the logic that decodes READ-like
operations that reports whether pull-up is being done.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/sunrpc.h | 59 +++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/xdr.c              | 33 +++++++++++++++++++-----
 2 files changed, 86 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index fbc41b8142d3..627650800676 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -321,6 +321,65 @@ TRACE_EVENT(rpc_xdr_overflow,
 	)
 );
 
+TRACE_EVENT(rpc_xdr_alignment,
+	TP_PROTO(
+		const struct xdr_stream *xdr,
+		size_t offset,
+		unsigned int copied
+	),
+
+	TP_ARGS(xdr, offset, copied),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(int, version)
+		__field(size_t, offset)
+		__field(unsigned int, copied)
+		__field(const void *, head_base)
+		__field(size_t, head_len)
+		__field(const void *, tail_base)
+		__field(size_t, tail_len)
+		__field(unsigned int, page_len)
+		__field(unsigned int, len)
+		__string(progname,
+			 xdr->rqst->rq_task->tk_client->cl_program->name)
+		__string(procedure,
+			 xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+	),
+
+	TP_fast_assign(
+		const struct rpc_task *task = xdr->rqst->rq_task;
+
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__assign_str(progname,
+			     task->tk_client->cl_program->name)
+		__entry->version = task->tk_client->cl_vers;
+		__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+
+		__entry->offset = offset;
+		__entry->copied = copied;
+		__entry->head_base = xdr->buf->head[0].iov_base,
+		__entry->head_len = xdr->buf->head[0].iov_len,
+		__entry->page_len = xdr->buf->page_len,
+		__entry->tail_base = xdr->buf->tail[0].iov_base,
+		__entry->tail_len = xdr->buf->tail[0].iov_len,
+		__entry->len = xdr->buf->len;
+	),
+
+	TP_printk(
+		"task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+		__entry->task_id, __entry->client_id,
+		__get_str(progname), __entry->version, __get_str(procedure),
+		__entry->offset, __entry->copied,
+		__entry->head_base, __entry->head_len,
+		__entry->page_len,
+		__entry->tail_base, __entry->tail_len,
+		__entry->len
+	)
+);
+
 /*
  * First define the enums in the below macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6d0b615a02ae..5f0aa53fa4ae 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -347,13 +347,15 @@ EXPORT_SYMBOL_GPL(_copy_from_pages);
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the inlined pages and/or the tail.
  */
-static void
+static unsigned int
 xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *head, *tail;
 	size_t copy, offs;
 	unsigned int pglen = buf->page_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	head = buf->head;
 
@@ -367,6 +369,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 			copy = tail->iov_len - len;
 			memmove((char *)tail->iov_base + len,
 					tail->iov_base, copy);
+			result += copy;
 		}
 		/* Copy from the inlined pages into the tail */
 		copy = len;
@@ -377,11 +380,13 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 			copy = 0;
 		else if (copy > tail->iov_len - offs)
 			copy = tail->iov_len - offs;
-		if (copy != 0)
+		if (copy != 0) {
 			_copy_from_pages((char *)tail->iov_base + offs,
 					buf->pages,
 					buf->page_base + pglen + offs - len,
 					copy);
+			result += copy;
+		}
 		/* Do we also need to copy data from the head into the tail ? */
 		if (len > pglen) {
 			offs = copy = len - pglen;
@@ -391,6 +396,7 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 					(char *)head->iov_base +
 					head->iov_len - offs,
 					copy);
+			result += copy;
 		}
 	}
 	/* Now handle pages */
@@ -406,12 +412,15 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
 		_copy_to_pages(buf->pages, buf->page_base,
 				(char *)head->iov_base + head->iov_len - len,
 				copy);
+		result += copy;
 	}
 	head->iov_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 /**
@@ -423,14 +432,16 @@ xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
  * 'len' bytes. The extra data is not lost, but is instead
  * moved into the tail.
  */
-static void
+static unsigned int
 xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 {
 	struct kvec *tail;
 	size_t copy;
 	unsigned int pglen = buf->page_len;
 	unsigned int tailbuf_len;
+	unsigned int result;
 
+	result = 0;
 	tail = buf->tail;
 	BUG_ON (len > pglen);
 
@@ -448,18 +459,22 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 		if (tail->iov_len > len) {
 			char *p = (char *)tail->iov_base + len;
 			memmove(p, tail->iov_base, tail->iov_len - len);
+			result += tail->iov_len - len;
 		} else
 			copy = tail->iov_len;
 		/* Copy from the inlined pages into the tail */
 		_copy_from_pages((char *)tail->iov_base,
 				buf->pages, buf->page_base + pglen - len,
 				copy);
+		result += copy;
 	}
 	buf->page_len -= len;
 	buf->buflen -= len;
 	/* Have we truncated the message? */
 	if (buf->len > buf->buflen)
 		buf->len = buf->buflen;
+
+	return result;
 }
 
 void
@@ -959,13 +974,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
 	struct kvec *iov;
 	unsigned int nwords = XDR_QUADLEN(len);
 	unsigned int cur = xdr_stream_pos(xdr);
+	unsigned int copied, offset;
 
 	if (xdr->nwords == 0)
 		return 0;
+
 	/* Realign pages to current pointer position */
-	iov  = buf->head;
+	iov = buf->head;
 	if (iov->iov_len > cur) {
-		xdr_shrink_bufhead(buf, iov->iov_len - cur);
+		offset = iov->iov_len - cur;
+		copied = xdr_shrink_bufhead(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 
@@ -977,7 +996,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
 		len = buf->page_len;
 	else if (nwords < xdr->nwords) {
 		/* Truncate page data and move it into the tail */
-		xdr_shrink_pagelen(buf, buf->page_len - len);
+		offset = buf->page_len - len;
+		copied = xdr_shrink_pagelen(buf, offset);
+		trace_rpc_xdr_alignment(xdr, offset, copied);
 		xdr->nwords = XDR_QUADLEN(buf->len - cur);
 	}
 	return len;
-- 
cgit v1.2.3


From 347cb6af8710b72cf9685fdc09d07873cf42d51f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 7 Jan 2019 13:36:20 -0500
Subject: dma-mapping: add a kconfig symbol for arch_setup_dma_ops availability

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
---
 arch/arc/Kconfig                     |  1 +
 arch/arc/include/asm/Kbuild          |  1 +
 arch/arc/include/asm/dma-mapping.h   | 13 -------------
 arch/arm/Kconfig                     |  1 +
 arch/arm/include/asm/dma-mapping.h   |  4 ----
 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/dma-mapping.h |  4 ----
 arch/mips/Kconfig                    |  1 +
 arch/mips/include/asm/dma-mapping.h  | 10 ----------
 arch/mips/mm/dma-noncoherent.c       |  8 ++++++++
 include/linux/dma-mapping.h          | 12 ++++++++----
 kernel/dma/Kconfig                   |  3 +++
 12 files changed, 24 insertions(+), 35 deletions(-)
 delete mode 100644 arch/arc/include/asm/dma-mapping.h

(limited to 'include')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 376366a7db81..2ab27d88eb1c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -11,6 +11,7 @@ config ARC
 	select ARC_TIMERS
 	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index caa270261521..b41f8881ecc8 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += bugs.h
 generic-y += compat.h
 generic-y += device.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += extable.h
 generic-y += ftrace.h
diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
deleted file mode 100644
index c946c0a83e76..000000000000
--- a/arch/arc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier:  GPL-2.0
-// (C) 2018 Synopsys, Inc. (www.synopsys.com)
-
-#ifndef ASM_ARC_DMA_MAPPING_H
-#define ASM_ARC_DMA_MAPPING_H
-
-#include <asm-generic/dma-mapping.h>
-
-void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			const struct iommu_ops *iommu, bool coherent);
-#define arch_setup_dma_ops arch_setup_dma_ops
-
-#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 664e918e2624..c1cf44f00870 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PTE_SPECIAL if ARM_LPAE
 	select ARCH_HAS_PHYS_TO_DMA
+	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 31d3b96f0f4b..a224b6e39e58 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -96,10 +96,6 @@ static inline unsigned long dma_max_pfn(struct device *dev)
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
-#define arch_setup_dma_ops arch_setup_dma_ops
-extern void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			       const struct iommu_ops *iommu, bool coherent);
-
 #ifdef CONFIG_MMU
 #define arch_teardown_dma_ops arch_teardown_dma_ops
 extern void arch_teardown_dma_ops(struct device *dev);
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a4168d366127..63909f318d56 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -22,6 +22,7 @@ config ARM64
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_PTE_SPECIAL
+	select ARCH_HAS_SETUP_DMA_OPS
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 95dbf3ef735a..de96507ee2c1 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -29,10 +29,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 	return NULL;
 }
 
-void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
-			const struct iommu_ops *iommu, bool coherent);
-#define arch_setup_dma_ops	arch_setup_dma_ops
-
 #ifdef CONFIG_IOMMU_DMA
 void arch_teardown_dma_ops(struct device *dev);
 #define arch_teardown_dma_ops	arch_teardown_dma_ops
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0d14f51d0002..dc5d70f674e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1118,6 +1118,7 @@ config DMA_MAYBE_COHERENT
 
 config DMA_PERDEV_COHERENT
 	bool
+	select ARCH_HAS_SETUP_DMA_OPS
 	select DMA_NONCOHERENT
 
 config DMA_NONCOHERENT
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 20dfaad3a55d..34de7b17b41b 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -15,14 +15,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 #endif
 }
 
-#define arch_setup_dma_ops arch_setup_dma_ops
-static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
-				      u64 size, const struct iommu_ops *iommu,
-				      bool coherent)
-{
-#ifdef CONFIG_DMA_PERDEV_COHERENT
-	dev->dma_coherent = coherent;
-#endif
-}
-
 #endif /* _ASM_DMA_MAPPING_H */
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index cb38461391cb..0606fc87b294 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -159,3 +159,11 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 
 	dma_sync_virt(vaddr, size, direction);
 }
+
+#ifdef CONFIG_DMA_PERDEV_COHERENT
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+		const struct iommu_ops *iommu, bool coherent)
+{
+	dev->dma_coherent = coherent;
+}
+#endif
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index b904d55247ab..2b20d60e6158 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -671,11 +671,15 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 	return dma_set_mask_and_coherent(dev, mask);
 }
 
-#ifndef arch_setup_dma_ops
+#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+		const struct iommu_ops *iommu, bool coherent);
+#else
 static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
-				      u64 size, const struct iommu_ops *iommu,
-				      bool coherent) { }
-#endif
+		u64 size, const struct iommu_ops *iommu, bool coherent)
+{
+}
+#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */
 
 #ifndef arch_teardown_dma_ops
 static inline void arch_teardown_dma_ops(struct device *dev) { }
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 61cebea36d89..6014cad35e58 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -19,6 +19,9 @@ config ARCH_HAS_DMA_COHERENCE_H
 config HAVE_GENERIC_DMA_COHERENT
 	bool
 
+config ARCH_HAS_SETUP_DMA_OPS
+	bool
+
 config ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	bool
 
-- 
cgit v1.2.3


From dc2acded38957dfa6b7b7e0203b4b8cb8d818ce6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 21 Dec 2018 22:14:44 +0100
Subject: dma-mapping: add a kconfig symbol for arch_teardown_dma_ops
 availability

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
---
 arch/arm/Kconfig                     |  1 +
 arch/arm/include/asm/dma-mapping.h   |  5 -----
 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/dma-mapping.h |  5 -----
 include/linux/dma-mapping.h          | 10 +++++++---
 kernel/dma/Kconfig                   |  3 +++
 6 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index c1cf44f00870..4bb36ae71b14 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -16,6 +16,7 @@ config ARM
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
 	select ARCH_HAS_STRICT_MODULE_RWX if MMU
+	select ARCH_HAS_TEARDOWN_DMA_OPS if MMU
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index a224b6e39e58..03ba90ffc0f8 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -96,11 +96,6 @@ static inline unsigned long dma_max_pfn(struct device *dev)
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
-#ifdef CONFIG_MMU
-#define arch_teardown_dma_ops arch_teardown_dma_ops
-extern void arch_teardown_dma_ops(struct device *dev);
-#endif
-
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 63909f318d56..87ec7be25e97 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -29,6 +29,7 @@ config ARM64
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYSCALL_WRAPPER
+	select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT
 	select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK if !PREEMPT
diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index de96507ee2c1..de98191e4c7d 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -29,11 +29,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 	return NULL;
 }
 
-#ifdef CONFIG_IOMMU_DMA
-void arch_teardown_dma_ops(struct device *dev);
-#define arch_teardown_dma_ops	arch_teardown_dma_ops
-#endif
-
 /*
  * Do not use this function in a driver, it is only provided for
  * arch/arm/mm/xen.c, which is used by arm64 as well.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 2b20d60e6158..4210c5c1dd21 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -681,9 +681,13 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
 }
 #endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */
 
-#ifndef arch_teardown_dma_ops
-static inline void arch_teardown_dma_ops(struct device *dev) { }
-#endif
+#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS
+void arch_teardown_dma_ops(struct device *dev);
+#else
+static inline void arch_teardown_dma_ops(struct device *dev)
+{
+}
+#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 6014cad35e58..bde9179c6ed7 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -22,6 +22,9 @@ config HAVE_GENERIC_DMA_COHERENT
 config ARCH_HAS_SETUP_DMA_OPS
 	bool
 
+config ARCH_HAS_TEARDOWN_DMA_OPS
+	bool
+
 config ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	bool
 
-- 
cgit v1.2.3


From 067fb11b12af1448f7bbcacca41e470cb775e9fa Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:37 -0500
Subject: SUNRPC: Remove rpc_xprt::tsh_size

tsh_size was added to accommodate transports that send a pre-amble
before each RPC message. However, this assumes the pre-amble is
fixed in size, which isn't true for some transports. That makes
tsh_size not very generic.

Also I'd like to make the estimation of RPC send and receive
buffer sizes more precise. tsh_size doesn't currently appear to be
accounted for at all by call_allocate.

Therefore let's just remove the tsh_size concept, and make the only
transports that have a non-zero tsh_size employ a direct approach.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h                |  7 ---
 net/sunrpc/auth_gss/auth_gss.c             |  3 +-
 net/sunrpc/clnt.c                          |  1 -
 net/sunrpc/svc.c                           | 19 ++-----
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  1 -
 net/sunrpc/xprtrdma/transport.c            |  1 -
 net/sunrpc/xprtsock.c                      | 91 ++++++++++++++++++++----------
 7 files changed, 65 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ad7e910b119d..3a391544299e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -196,8 +196,6 @@ struct rpc_xprt {
 
 	size_t			max_payload;	/* largest RPC payload size,
 						   in bytes */
-	unsigned int		tsh_size;	/* size of transport specific
-						   header */
 
 	struct rpc_wait_queue	binding;	/* requests waiting on rpcbind */
 	struct rpc_wait_queue	sending;	/* requests waiting to send */
@@ -362,11 +360,6 @@ struct rpc_xprt *	xprt_alloc(struct net *net, size_t size,
 				unsigned int max_req);
 void			xprt_free(struct rpc_xprt *);
 
-static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
-{
-	return p + xprt->tsh_size;
-}
-
 static inline int
 xprt_enable_swap(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index a42672e81792..4b52e2b11c58 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1563,8 +1563,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
 
 	/* We compute the checksum for the verifier over the xdr-encoded bytes
 	 * starting with the xid and ending at the end of the credential: */
-	iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
-					req->rq_snd_buf.head[0].iov_base);
+	iov.iov_base = req->rq_snd_buf.head[0].iov_base;
 	iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
 	xdr_buf_from_iov(&iov, &verf_buf);
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7ec6132c046..c4203f6138ef 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2331,7 +2331,6 @@ rpc_encode_header(struct rpc_task *task)
 
 	/* FIXME: check buffer size? */
 
-	p = xprt_skip_transport_header(req->rq_xprt, p);
 	*p++ = req->rq_xid;		/* XID */
 	*p++ = htonl(RPC_CALL);		/* CALL */
 	*p++ = htonl(RPC_VERSION);	/* RPC version */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e87ddb9f7feb..dbd19697ee38 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1144,17 +1144,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
-/*
- * Setup response header for TCP, it has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
-	struct kvec *resv = &rqstp->rq_res.head[0];
-
-	/* tcp needs a space for the record length... */
-	svc_putnl(resv, 0);
-}
-
 /*
  * Common routine for processing the RPC request.
  */
@@ -1182,10 +1171,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 	clear_bit(RQ_DROPME, &rqstp->rq_flags);
 
-	/* Setup reply header */
-	if (rqstp->rq_prot == IPPROTO_TCP)
-		svc_tcp_prep_reply_hdr(rqstp);
-
 	svc_putu32(resv, rqstp->rq_xid);
 
 	vers = svc_getnl(argv);
@@ -1443,6 +1428,10 @@ svc_process(struct svc_rqst *rqstp)
 		goto out_drop;
 	}
 
+	/* Reserve space for the record marker */
+	if (rqstp->rq_prot == IPPROTO_TCP)
+		svc_putnl(resv, 0);
+
 	/* Returns 1 for send, 0 for drop */
 	if (likely(svc_process_common(rqstp, argv, resv)))
 		return svc_send(rqstp);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index b908f2ca08fd..907464c2a9f0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -304,7 +304,6 @@ xprt_setup_rdma_bc(struct xprt_create *args)
 	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
 	xprt->prot = XPRT_TRANSPORT_BC_RDMA;
-	xprt->tsh_size = 0;
 	xprt->ops = &xprt_rdma_bc_procs;
 
 	memcpy(&xprt->addr, args->dstaddr, args->addrlen);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index fbc171ebfe91..e7274dc10120 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -332,7 +332,6 @@ xprt_setup_rdma(struct xprt_create *args)
 	xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
 
 	xprt->resvport = 0;		/* privileged port not needed */
-	xprt->tsh_size = 0;		/* RPC-RDMA handles framing */
 	xprt->ops = &xprt_rdma_procs;
 
 	/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7754aa3e434f..ae09d850cd11 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -696,6 +696,40 @@ xs_stream_reset_connect(struct sock_xprt *transport)
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
+/* Common case:
+ *  - stream transport
+ *  - sending from byte 0 of the message
+ *  - the message is wholly contained in @xdr's head iovec
+ */
+static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr,
+			       unsigned int remainder)
+{
+	struct msghdr msg = {
+		.msg_flags	= XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0)
+	};
+	rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+					 (u32)xdr->len);
+	struct kvec iov[2] = {
+		{
+			.iov_base	= &marker,
+			.iov_len	= sizeof(marker)
+		},
+		{
+			.iov_base	= xdr->head[0].iov_base,
+			.iov_len	= xdr->head[0].iov_len
+		},
+	};
+	int ret;
+
+	ret = kernel_sendmsg(sock, &msg, iov, 2,
+			     iov[0].iov_len + iov[1].iov_len);
+	if (ret < 0)
+		return ret;
+	if (ret < iov[0].iov_len)
+		return -EPIPE;
+	return ret - iov[0].iov_len;
+}
+
 static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
 {
 	struct msghdr msg = {
@@ -779,7 +813,11 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
 	if (base < xdr->head[0].iov_len || addr != NULL) {
 		unsigned int len = xdr->head[0].iov_len - base;
 		remainder -= len;
-		err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+		if (!base && !addr)
+			err = xs_send_rm_and_kvec(sock, xdr, remainder);
+		else
+			err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0],
+					   base, remainder != 0);
 		if (remainder == 0 || err != len)
 			goto out;
 		*sent_p += err;
@@ -869,16 +907,6 @@ xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
 	return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
 }
 
-/*
- * Construct a stream transport record marker in @buf.
- */
-static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
-{
-	u32 reclen = buf->len - sizeof(rpc_fraghdr);
-	rpc_fraghdr *base = buf->head[0].iov_base;
-	*base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
-}
-
 /**
  * xs_local_send_request - write an RPC request to an AF_LOCAL socket
  * @req: pointer to RPC request
@@ -905,8 +933,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
 		return -ENOTCONN;
 	}
 
-	xs_encode_stream_record_marker(&req->rq_snd_buf);
-
 	xs_pktdump("packet data:",
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
@@ -1057,8 +1083,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
 		return -ENOTCONN;
 	}
 
-	xs_encode_stream_record_marker(&req->rq_snd_buf);
-
 	xs_pktdump("packet data:",
 				req->rq_svec->iov_base,
 				req->rq_svec->iov_len);
@@ -2534,26 +2558,35 @@ static int bc_sendto(struct rpc_rqst *req)
 {
 	int len;
 	struct xdr_buf *xbufp = &req->rq_snd_buf;
-	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct socket *sock = transport->sock;
+			container_of(req->rq_xprt, struct sock_xprt, xprt);
 	unsigned long headoff;
 	unsigned long tailoff;
+	struct page *tailpage;
+	struct msghdr msg = {
+		.msg_flags	= MSG_MORE
+	};
+	rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+					 (u32)xbufp->len);
+	struct kvec iov = {
+		.iov_base	= &marker,
+		.iov_len	= sizeof(marker),
+	};
 
-	xs_encode_stream_record_marker(xbufp);
+	len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
+	if (len != iov.iov_len)
+		return -EAGAIN;
 
+	tailpage = NULL;
+	if (xbufp->tail[0].iov_len)
+		tailpage = virt_to_page(xbufp->tail[0].iov_base);
 	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
 	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
-	len = svc_send_common(sock, xbufp,
+	len = svc_send_common(transport->sock, xbufp,
 			      virt_to_page(xbufp->head[0].iov_base), headoff,
-			      xbufp->tail[0].iov_base, tailoff);
-
-	if (len != xbufp->len) {
-		printk(KERN_NOTICE "Error sending entire callback!\n");
-		len = -EAGAIN;
-	}
-
+			      tailpage, tailoff);
+	if (len != xbufp->len)
+		return -EAGAIN;
 	return len;
 }
 
@@ -2793,7 +2826,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = 0;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
@@ -2862,7 +2894,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_UDP;
-	xprt->tsh_size = 0;
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
@@ -2942,7 +2973,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_TCP;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
@@ -3015,7 +3045,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	transport = container_of(xprt, struct sock_xprt, xprt);
 
 	xprt->prot = IPPROTO_TCP;
-	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 	xprt->timeout = &xs_tcp_default_timeout;
 
-- 
cgit v1.2.3


From 2b2812961302c38500c1027778e371c895f1cac4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Dec 2018 14:03:32 +0100
Subject: device.h: dma_mem is only needed for HAVE_GENERIC_DMA_COHERENT

No need to carry an unused field around.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6cb4640b6160..be544400acdd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1017,8 +1017,10 @@ struct device {
 
 	struct list_head	dma_pools;	/* dma pools (if dma'ble) */
 
+#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
+#endif
 #ifdef CONFIG_DMA_CMA
 	struct cma *cma_area;		/* contiguous memory area for dma
 					   allocations */
-- 
cgit v1.2.3


From fe9a270519c72bccb3af524db7ea6c7b67700d50 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:43 -0500
Subject: SUNRPC: Add build option to disable support for insecure enctypes

Enable distributions to enforce the rejection of ancient and
insecure Kerberos enctypes in the kernel's RPCSEC_GSS
implementation. These are the single-DES encryption types that
were deprecated in 2012 by RFC 6649.

Enctypes that were deprecated more recently (by RFC 8429) remain
fully supported for now because they are still likely to be widely
used.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: Simo Sorce <simo@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/gss_krb5_enctypes.h | 42 +++++++++++++++++++++++++++++++-
 net/sunrpc/Kconfig                       | 16 ++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c      |  2 ++
 3 files changed, 59 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h
index ec6234eee89c..981c89cef19d 100644
--- a/include/linux/sunrpc/gss_krb5_enctypes.h
+++ b/include/linux/sunrpc/gss_krb5_enctypes.h
@@ -1,4 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Dumb way to share this static piece of information with nfsd
+ * Define the string that exports the set of kernel-supported
+ * Kerberos enctypes. This list is sent via upcall to gssd, and
+ * is also exposed via the nfsd /proc API. The consumers generally
+ * treat this as an ordered list, where the first item in the list
+ * is the most preferred.
+ */
+
+#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+
+#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC).
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ */
+#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23"
+
+#else	/* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 and RFC 6649.
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ * ENCTYPE_DES_CBC_MD5
+ * ENCTYPE_DES_CBC_CRC
+ * ENCTYPE_DES_CBC_MD4
  */
 #define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2"
+
+#endif	/* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+#endif	/* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index ac09ca803296..83f5617bae07 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5
 
 	  If unsure, say Y.
 
+config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+	bool "Secure RPC: Disable insecure Kerberos encryption types"
+	depends on RPCSEC_GSS_KRB5
+	default n
+	help
+	  Choose Y here to disable the use of deprecated encryption types
+	  with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
+	  deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
+	  and DES-CBC-MD4. These types were deprecated by RFC 6649 because
+	  they were found to be insecure.
+
+	  N is the default because many sites have deployed KDCs and
+	  keytabs that contain only these deprecated encryption types.
+	  Choosing Y prevents the use of known-insecure encryption types
+	  but might result in compatibility problems.
+
 config SUNRPC_DEBUG
 	bool "RPC: Enable dprintk debugging"
 	depends on SUNRPC && SYSCTL
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index eab71fc7af3e..be31a58d54e0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -53,6 +53,7 @@
 static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
 
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
 	/*
 	 * DES (All DES enctypes are mapped to the same gss functionality)
 	 */
@@ -74,6 +75,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+#endif	/* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
 	/*
 	 * RC4-HMAC
 	 */
-- 
cgit v1.2.3


From c17c7cf147ac56312156eaaaf8b2e19c9a59a71a Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 12 Feb 2019 07:58:17 -0800
Subject: usb: typec: tcpm: Remove unused functions

tcpm_update_source_capabilities() and tcpm_update_sink_capabilities()
are not used anywhere, and I don't recall why I introduced those functions
in the first place. Effectively that means that we don't know if they even
work, or ever did. Lets remove them.

Reported-by: Kyle Tso <kyletso@google.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Kyle Tso <kyletso@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpm.c | 60 -------------------------------------------
 include/linux/usb/tcpm.h      |  6 -----
 2 files changed, 66 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 8f2af348bda5..0f62db091d8d 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4435,66 +4435,6 @@ sink:
 	return 0;
 }
 
-int tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
-				    unsigned int nr_pdo)
-{
-	if (tcpm_validate_caps(port, pdo, nr_pdo))
-		return -EINVAL;
-
-	mutex_lock(&port->lock);
-	port->nr_src_pdo = tcpm_copy_pdos(port->src_pdo, pdo, nr_pdo);
-	switch (port->state) {
-	case SRC_UNATTACHED:
-	case SRC_ATTACH_WAIT:
-	case SRC_TRYWAIT:
-		tcpm_set_cc(port, tcpm_rp_cc(port));
-		break;
-	case SRC_SEND_CAPABILITIES:
-	case SRC_NEGOTIATE_CAPABILITIES:
-	case SRC_READY:
-	case SRC_WAIT_NEW_CAPABILITIES:
-		tcpm_set_cc(port, tcpm_rp_cc(port));
-		tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0);
-		break;
-	default:
-		break;
-	}
-	mutex_unlock(&port->lock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(tcpm_update_source_capabilities);
-
-int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
-				  unsigned int nr_pdo,
-				  unsigned int operating_snk_mw)
-{
-	if (tcpm_validate_caps(port, pdo, nr_pdo))
-		return -EINVAL;
-
-	mutex_lock(&port->lock);
-	port->nr_snk_pdo = tcpm_copy_pdos(port->snk_pdo, pdo, nr_pdo);
-	port->operating_snk_mw = operating_snk_mw;
-	port->update_sink_caps = true;
-
-	switch (port->state) {
-	case SNK_NEGOTIATE_CAPABILITIES:
-	case SNK_NEGOTIATE_PPS_CAPABILITIES:
-	case SNK_READY:
-	case SNK_TRANSITION_SINK:
-	case SNK_TRANSITION_SINK_VBUS:
-		if (port->pps_data.active)
-			tcpm_set_state(port, SNK_NEGOTIATE_PPS_CAPABILITIES, 0);
-		else
-			tcpm_set_state(port, SNK_NEGOTIATE_CAPABILITIES, 0);
-		break;
-	default:
-		break;
-	}
-	mutex_unlock(&port->lock);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(tcpm_update_sink_capabilities);
-
 /* Power Supply access to expose source power information */
 enum tcpm_psy_online_states {
 	TCPM_PSY_OFFLINE = 0,
diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h
index 50c74a77db55..0c532ca3f079 100644
--- a/include/linux/usb/tcpm.h
+++ b/include/linux/usb/tcpm.h
@@ -159,12 +159,6 @@ struct tcpm_port;
 struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc);
 void tcpm_unregister_port(struct tcpm_port *port);
 
-int tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo,
-				    unsigned int nr_pdo);
-int tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo,
-				  unsigned int nr_pdo,
-				  unsigned int operating_snk_mw);
-
 void tcpm_vbus_change(struct tcpm_port *port);
 void tcpm_cc_change(struct tcpm_port *port);
 void tcpm_pd_receive(struct tcpm_port *port,
-- 
cgit v1.2.3


From e8680a24a269bd6dcb533f4e4a5faba9ae58925c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:48 -0500
Subject: SUNRPC: Use struct xdr_stream when constructing RPC Call header

Modernize and harden the code path that constructs each RPC Call
message.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    |  15 ++--
 include/linux/sunrpc/xdr.h     |   6 ++
 include/trace/events/sunrpc.h  |  29 +++++++
 net/sunrpc/auth.c              |  56 ++++++++----
 net/sunrpc/auth_gss/auth_gss.c | 191 ++++++++++++++++++++---------------------
 net/sunrpc/auth_null.c         |  23 +++--
 net/sunrpc/auth_unix.c         |  61 ++++++++-----
 net/sunrpc/clnt.c              |  66 +++++++-------
 8 files changed, 266 insertions(+), 181 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index eed3cb16ccf1..96e237f8e60b 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -131,11 +131,12 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
+	int			(*crmarshal)(struct rpc_task *task,
+					     struct xdr_stream *xdr);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
-	int			(*crwrap_req)(struct rpc_task *, kxdreproc_t,
-						void *, __be32 *, void *);
+	int			(*crwrap_req)(struct rpc_task *task,
+					      struct xdr_stream *xdr);
 	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
 						void *, __be32 *, void *);
 	int			(*crkey_timeout)(struct rpc_cred *);
@@ -165,9 +166,13 @@ struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 void			put_rpccred(struct rpc_cred *);
-__be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
+int			rpcauth_marshcred(struct rpc_task *task,
+					  struct xdr_stream *xdr);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
-int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_wrap_req_encode(struct rpc_task *task,
+						struct xdr_stream *xdr);
+int			rpcauth_wrap_req(struct rpc_task *task,
+					 struct xdr_stream *xdr);
 int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
 bool			rpcauth_xmit_need_reencode(struct rpc_task *task);
 int			rpcauth_refreshcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 787939d13643..6df9ac1ca471 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -87,6 +87,12 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define	xdr_one		cpu_to_be32(1)
 #define	xdr_two		cpu_to_be32(2)
 
+#define	rpc_auth_null	cpu_to_be32(RPC_AUTH_NULL)
+#define	rpc_auth_unix	cpu_to_be32(RPC_AUTH_UNIX)
+#define	rpc_auth_gss	cpu_to_be32(RPC_AUTH_GSS)
+
+#define	rpc_call	cpu_to_be32(RPC_CALL)
+
 #define	rpc_success		cpu_to_be32(RPC_SUCCESS)
 #define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
 #define	rpc_prog_mismatch	cpu_to_be32(RPC_PROG_MISMATCH)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 627650800676..2b3f9d139e75 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -213,6 +213,35 @@ DECLARE_EVENT_CLASS(rpc_task_queued,
 DEFINE_RPC_QUEUED_EVENT(sleep);
 DEFINE_RPC_QUEUED_EVENT(wakeup);
 
+DECLARE_EVENT_CLASS(rpc_failure,
+
+	TP_PROTO(const struct rpc_task *task),
+
+	TP_ARGS(task),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+	),
+
+	TP_printk("task:%u@%u",
+		__entry->task_id, __entry->client_id)
+);
+
+#define DEFINE_RPC_FAILURE(name)					\
+	DEFINE_EVENT(rpc_failure, rpc_bad_##name,			\
+			TP_PROTO(					\
+				const struct rpc_task *task		\
+			),						\
+			TP_ARGS(task))
+
+DEFINE_RPC_FAILURE(callhdr);
+
 TRACE_EVENT(rpc_stats_latency,
 
 	TP_PROTO(
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 275e84e817b7..add2135d9b01 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -756,12 +756,21 @@ destroy:
 }
 EXPORT_SYMBOL_GPL(put_rpccred);
 
-__be32 *
-rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_marshcred - Append RPC credential to end of @xdr
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing initial portion of RPC Call header
+ *
+ * On success, an appropriate verifier is added to @xdr, @xdr is
+ * updated to point past the verifier, and zero is returned.
+ * Otherwise, @xdr is in an undefined state and a negative errno
+ * is returned.
+ */
+int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	return cred->cr_ops->crmarshal(task, p);
+	return ops->crmarshal(task, xdr);
 }
 
 __be32 *
@@ -772,26 +781,37 @@ rpcauth_checkverf(struct rpc_task *task, __be32 *p)
 	return cred->cr_ops->crvalidate(task, p);
 }
 
-static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-				   __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req_encode - XDR encode the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message.
+ * Otherwise, @xdr is in an undefined state.
+ */
+int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_stream xdr;
+	kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;
 
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp);
-	encode(rqstp, &xdr, obj);
+	encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
+	return 0;
 }
+EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);
 
-int
-rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
-		__be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req - XDR encode and wrap the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message,
+ * and zero is returned. Otherwise, @xdr is in an undefined
+ * state and a negative errno is returned.
+ */
+int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	if (cred->cr_ops->crwrap_req)
-		return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
-	/* By default, we encode the arguments normally. */
-	rpcauth_wrap_req_encode(encode, rqstp, data, obj);
-	return 0;
+	return ops->crwrap_req(task, xdr);
 }
 
 static int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4b52e2b11c58..b333b1bdad45 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1526,18 +1526,20 @@ out:
 }
 
 /*
-* Marshal credentials.
-* Maybe we should keep a cached credential for performance reasons.
-*/
-static __be32 *
-gss_marshal(struct rpc_task *task, __be32 *p)
+ * Marshal credentials.
+ *
+ * The expensive part is computing the verifier. We can't cache a
+ * pre-computed version of the verifier because the seqno, which
+ * is different every time, is included in the MIC.
+ */
+static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_cred *cred = req->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 						 gc_base);
 	struct gss_cl_ctx	*ctx = gss_cred_get_ctx(cred);
-	__be32		*cred_len;
+	__be32		*p, *cred_len;
 	u32             maj_stat = 0;
 	struct xdr_netobj mic;
 	struct kvec	iov;
@@ -1545,7 +1547,13 @@ gss_marshal(struct rpc_task *task, __be32 *p)
 
 	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
 
-	*p++ = htonl(RPC_AUTH_GSS);
+	/* Credential */
+
+	p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
+			      ctx->gc_wire_ctx.len);
+	if (!p)
+		goto out_put_ctx;
+	*p++ = rpc_auth_gss;
 	cred_len = p++;
 
 	spin_lock(&ctx->gc_seq_lock);
@@ -1554,12 +1562,14 @@ gss_marshal(struct rpc_task *task, __be32 *p)
 	if (req->rq_seqno == MAXSEQ)
 		goto out_expired;
 
-	*p++ = htonl((u32) RPC_GSS_VERSION);
-	*p++ = htonl((u32) ctx->gc_proc);
-	*p++ = htonl((u32) req->rq_seqno);
-	*p++ = htonl((u32) gss_cred->gc_service);
+	*p++ = cpu_to_be32(RPC_GSS_VERSION);
+	*p++ = cpu_to_be32(ctx->gc_proc);
+	*p++ = cpu_to_be32(req->rq_seqno);
+	*p++ = cpu_to_be32(gss_cred->gc_service);
 	p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
-	*cred_len = htonl((p - (cred_len + 1)) << 2);
+	*cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
+
+	/* Verifier */
 
 	/* We compute the checksum for the verifier over the xdr-encoded bytes
 	 * starting with the xid and ending at the end of the credential: */
@@ -1567,27 +1577,27 @@ gss_marshal(struct rpc_task *task, __be32 *p)
 	iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
 	xdr_buf_from_iov(&iov, &verf_buf);
 
-	/* set verifier flavor*/
-	*p++ = htonl(RPC_AUTH_GSS);
-
+	p = xdr_reserve_space(xdr, sizeof(*p));
+	if (!p)
+		goto out_put_ctx;
+	*p++ = rpc_auth_gss;
 	mic.data = (u8 *)(p + 1);
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
-	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		goto out_expired;
-	} else if (maj_stat != 0) {
-		pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
-		task->tk_status = -EIO;
+	else if (maj_stat != 0)
+		goto out_put_ctx;
+	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
 		goto out_put_ctx;
-	}
-	p = xdr_encode_opaque(p, NULL, mic.len);
 	gss_put_ctx(ctx);
-	return p;
+	return 0;
 out_expired:
+	gss_put_ctx(ctx);
 	clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	task->tk_status = -EKEYEXPIRED;
+	return -EKEYEXPIRED;
 out_put_ctx:
 	gss_put_ctx(ctx);
-	return NULL;
+	return -EMSGSIZE;
 }
 
 static int gss_renew_cred(struct rpc_task *task)
@@ -1716,61 +1726,45 @@ out_bad:
 	return ret;
 }
 
-static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
-				__be32 *p, void *obj)
-{
-	struct xdr_stream xdr;
-
-	xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp);
-	encode(rqstp, &xdr, obj);
-}
-
-static inline int
-gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		   kxdreproc_t encode, struct rpc_rqst *rqstp,
-		   __be32 *p, void *obj)
+static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+			      struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
-	struct xdr_buf	integ_buf;
-	__be32          *integ_len = NULL;
+	struct rpc_rqst *rqstp = task->tk_rqstp;
+	struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_netobj mic;
-	u32		offset;
-	__be32		*q;
-	struct kvec	*iov;
-	u32             maj_stat = 0;
-	int		status = -EIO;
+	__be32 *p, *integ_len;
+	u32 offset, maj_stat;
 
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto wrap_failed;
 	integ_len = p++;
-	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(rqstp->rq_seqno);
+	*p = cpu_to_be32(rqstp->rq_seqno);
 
-	gss_wrap_req_encode(encode, rqstp, p, obj);
+	if (rpcauth_wrap_req_encode(task, xdr))
+		goto wrap_failed;
 
+	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	if (xdr_buf_subsegment(snd_buf, &integ_buf,
 				offset, snd_buf->len - offset))
-		return status;
-	*integ_len = htonl(integ_buf.len);
+		goto wrap_failed;
+	*integ_len = cpu_to_be32(integ_buf.len);
 
-	/* guess whether we're in the head or the tail: */
-	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
-		iov = snd_buf->tail;
-	else
-		iov = snd_buf->head;
-	p = iov->iov_base + iov->iov_len;
+	p = xdr_reserve_space(xdr, 0);
+	if (!p)
+		goto wrap_failed;
 	mic.data = (u8 *)(p + 1);
-
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
-	status = -EIO; /* XXX? */
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		return status;
-	q = xdr_encode_opaque(p, NULL, mic.len);
-
-	offset = (u8 *)q - (u8 *)p;
-	iov->iov_len += offset;
-	snd_buf->len += offset;
+		goto wrap_failed;
+	/* Check that the trailing MIC fit in the buffer, after the fact */
+	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+		goto wrap_failed;
 	return 0;
+wrap_failed:
+	return -EMSGSIZE;
 }
 
 static void
@@ -1821,61 +1815,63 @@ out:
 	return -EAGAIN;
 }
 
-static inline int
-gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		  kxdreproc_t encode, struct rpc_rqst *rqstp,
-		  __be32 *p, void *obj)
+static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+			     struct rpc_task *task, struct xdr_stream *xdr)
 {
+	struct rpc_rqst *rqstp = task->tk_rqstp;
 	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
-	u32		offset;
-	u32             maj_stat;
+	u32		pad, offset, maj_stat;
 	int		status;
-	__be32		*opaque_len;
+	__be32		*p, *opaque_len;
 	struct page	**inpages;
 	int		first;
-	int		pad;
 	struct kvec	*iov;
-	char		*tmp;
 
+	status = -EIO;
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto wrap_failed;
 	opaque_len = p++;
-	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(rqstp->rq_seqno);
+	*p = cpu_to_be32(rqstp->rq_seqno);
 
-	gss_wrap_req_encode(encode, rqstp, p, obj);
+	if (rpcauth_wrap_req_encode(task, xdr))
+		goto wrap_failed;
 
 	status = alloc_enc_pages(rqstp);
-	if (status)
-		return status;
+	if (unlikely(status))
+		goto wrap_failed;
 	first = snd_buf->page_base >> PAGE_SHIFT;
 	inpages = snd_buf->pages + first;
 	snd_buf->pages = rqstp->rq_enc_pages;
 	snd_buf->page_base -= first << PAGE_SHIFT;
 	/*
-	 * Give the tail its own page, in case we need extra space in the
-	 * head when wrapping:
+	 * Move the tail into its own page, in case gss_wrap needs
+	 * more space in the head when wrapping.
 	 *
-	 * call_allocate() allocates twice the slack space required
-	 * by the authentication flavor to rq_callsize.
-	 * For GSS, slack is GSS_CRED_SLACK.
+	 * Still... Why can't gss_wrap just slide the tail down?
 	 */
 	if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+		char *tmp;
+
 		tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
 		snd_buf->tail[0].iov_base = tmp;
 	}
+	status = -EIO;
+	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
 	/* slack space should prevent this ever happening: */
-	BUG_ON(snd_buf->len > snd_buf->buflen);
-	status = -EIO;
+	if (unlikely(snd_buf->len > snd_buf->buflen))
+		goto wrap_failed;
 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
 	 * done anyway, so it's safe to put the request on the wire: */
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		return status;
+		goto wrap_failed;
 
-	*opaque_len = htonl(snd_buf->len - offset);
-	/* guess whether we're in the head or the tail: */
+	*opaque_len = cpu_to_be32(snd_buf->len - offset);
+	/* guess whether the pad goes into the head or the tail: */
 	if (snd_buf->page_len || snd_buf->tail[0].iov_len)
 		iov = snd_buf->tail;
 	else
@@ -1887,37 +1883,36 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	snd_buf->len += pad;
 
 	return 0;
+wrap_failed:
+	return status;
 }
 
-static int
-gss_wrap_req(struct rpc_task *task,
-	     kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
+static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cred	*gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	int             status = -EIO;
+	int status;
 
 	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	status = -EIO;
 	if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
 		/* The spec seems a little ambiguous here, but I think that not
 		 * wrapping context destruction requests makes the most sense.
 		 */
-		gss_wrap_req_encode(encode, rqstp, p, obj);
-		status = 0;
+		status = rpcauth_wrap_req_encode(task, xdr);
 		goto out;
 	}
 	switch (gss_cred->gc_service) {
 	case RPC_GSS_SVC_NONE:
-		gss_wrap_req_encode(encode, rqstp, p, obj);
-		status = 0;
+		status = rpcauth_wrap_req_encode(task, xdr);
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
+		status = gss_wrap_req_integ(cred, ctx, task, xdr);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
-		status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
+		status = gss_wrap_req_priv(cred, ctx, task, xdr);
 		break;
 	}
 out:
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index d0ceac57c06e..797f8472c21b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -59,15 +59,21 @@ nul_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags)
 /*
  * Marshal credential.
  */
-static __be32 *
-nul_marshal(struct rpc_task *task, __be32 *p)
+static int
+nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = 0;
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = 0;
-
-	return p;
+	__be32 *p;
+
+	p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+	if (!p)
+		return -EMSGSIZE;
+	/* Credential */
+	*p++ = rpc_auth_null;
+	*p++ = xdr_zero;
+	/* Verifier */
+	*p++ = rpc_auth_null;
+	*p   = xdr_zero;
+	return 0;
 }
 
 /*
@@ -125,6 +131,7 @@ const struct rpc_credops null_credops = {
 	.crdestroy	= nul_destroy_cred,
 	.crmatch	= nul_match,
 	.crmarshal	= nul_marshal,
+	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= nul_refresh,
 	.crvalidate	= nul_validate,
 };
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index fc8a59134640..1d5b7ed9c6f7 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -99,37 +99,55 @@ unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
  * Marshal credentials.
  * Maybe we should keep a cached credential for performance reasons.
  */
-static __be32 *
-unx_marshal(struct rpc_task *task, __be32 *p)
+static int
+unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
-	__be32		*base, *hold;
+	__be32		*p, *cred_len, *gidarr_len;
 	int		i;
 	struct group_info *gi = cred->cr_cred->group_info;
 
-	*p++ = htonl(RPC_AUTH_UNIX);
-	base = p++;
-	*p++ = htonl(jiffies/HZ);
-
-	/*
-	 * Copy the UTS nodename captured when the client was created.
-	 */
-	p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
-
-	*p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
-	*p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
-	hold = p++;
+	/* Credential */
+
+	p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_unix;
+	cred_len = p++;
+	*p++ = xdr_zero;	/* stamp */
+	if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
+				     clnt->cl_nodelen) < 0)
+		goto marshal_failed;
+	p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
+	*p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+
+	gidarr_len = p++;
 	if (gi)
 		for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
-			*p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
-	*hold = htonl(p - hold - 1);		/* gid array length */
-	*base = htonl((p - base - 1) << 2);	/* cred length */
+			*p++ = cpu_to_be32(from_kgid(&init_user_ns,
+						     gi->gid[i]));
+	*gidarr_len = cpu_to_be32(p - gidarr_len - 1);
+	*cred_len = cpu_to_be32((p - cred_len - 1) << 2);
+	p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
+	if (!p)
+		goto marshal_failed;
+
+	/* Verifier */
+
+	p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto marshal_failed;
+	*p++ = rpc_auth_null;
+	*p   = xdr_zero;
 
-	*p++ = htonl(RPC_AUTH_NULL);
-	*p++ = htonl(0);
+	return 0;
 
-	return p;
+marshal_failed:
+	return -EMSGSIZE;
 }
 
 /*
@@ -202,6 +220,7 @@ const struct rpc_credops unix_credops = {
 	.crdestroy	= unx_destroy_cred,
 	.crmatch	= unx_match,
 	.crmarshal	= unx_marshal,
+	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= unx_refresh,
 	.crvalidate	= unx_validate,
 };
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c4203f6138ef..d6750b7f169a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -77,7 +77,8 @@ static void	call_timeout(struct rpc_task *task);
 static void	call_connect(struct rpc_task *task);
 static void	call_connect_status(struct rpc_task *task);
 
-static __be32	*rpc_encode_header(struct rpc_task *task);
+static int	rpc_encode_header(struct rpc_task *task,
+				  struct xdr_stream *xdr);
 static __be32	*rpc_verify_header(struct rpc_task *task);
 static int	rpc_ping(struct rpc_clnt *clnt);
 
@@ -1728,10 +1729,7 @@ static void
 rpc_xdr_encode(struct rpc_task *task)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdreproc_t	encode;
-	__be32		*p;
-
-	dprint_status(task);
+	struct xdr_stream xdr;
 
 	xdr_buf_init(&req->rq_snd_buf,
 		     req->rq_buffer,
@@ -1740,18 +1738,13 @@ rpc_xdr_encode(struct rpc_task *task)
 		     req->rq_rbuffer,
 		     req->rq_rcvsize);
 
-	p = rpc_encode_header(task);
-	if (p == NULL)
+	req->rq_snd_buf.head[0].iov_len = 0;
+	xdr_init_encode(&xdr, &req->rq_snd_buf,
+			req->rq_snd_buf.head[0].iov_base, req);
+	if (rpc_encode_header(task, &xdr))
 		return;
 
-	encode = task->tk_msg.rpc_proc->p_encode;
-	if (encode == NULL)
-		return;
-
-	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
-			task->tk_msg.rpc_argp);
-	if (task->tk_status == 0)
-		xprt_request_prepare(req);
+	task->tk_status = rpcauth_wrap_req(task, &xdr);
 }
 
 /*
@@ -1762,6 +1755,7 @@ call_encode(struct rpc_task *task)
 {
 	if (!rpc_task_need_encode(task))
 		goto out;
+	dprint_status(task);
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
 	rpc_xdr_encode(task);
 	/* Did the encode result in an error condition? */
@@ -1779,6 +1773,8 @@ call_encode(struct rpc_task *task)
 			rpc_exit(task, task->tk_status);
 		}
 		return;
+	} else {
+		xprt_request_prepare(task->tk_rqstp);
 	}
 
 	/* Add task to reply queue before transmission to avoid races */
@@ -2322,25 +2318,33 @@ out_retry:
 	}
 }
 
-static __be32 *
-rpc_encode_header(struct rpc_task *task)
+static int
+rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	__be32		*p = req->rq_svec[0].iov_base;
-
-	/* FIXME: check buffer size? */
-
-	*p++ = req->rq_xid;		/* XID */
-	*p++ = htonl(RPC_CALL);		/* CALL */
-	*p++ = htonl(RPC_VERSION);	/* RPC version */
-	*p++ = htonl(clnt->cl_prog);	/* program number */
-	*p++ = htonl(clnt->cl_vers);	/* program version */
-	*p++ = htonl(task->tk_msg.rpc_proc->p_proc);	/* procedure */
-	p = rpcauth_marshcred(task, p);
-	if (p)
-		req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
-	return p;
+	__be32 *p;
+	int error;
+
+	error = -EMSGSIZE;
+	p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
+	if (!p)
+		goto out_fail;
+	*p++ = req->rq_xid;
+	*p++ = rpc_call;
+	*p++ = cpu_to_be32(RPC_VERSION);
+	*p++ = cpu_to_be32(clnt->cl_prog);
+	*p++ = cpu_to_be32(clnt->cl_vers);
+	*p   = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
+
+	error = rpcauth_marshcred(task, xdr);
+	if (error < 0)
+		goto out_fail;
+	return 0;
+out_fail:
+	trace_rpc_bad_callhdr(task);
+	rpc_exit(task, error);
+	return error;
 }
 
 static __be32 *
-- 
cgit v1.2.3


From 1aec4211204d9463d1fd209eb50453de16254599 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Wed, 13 Feb 2019 08:47:06 +0000
Subject: parport: daisy: use new parport device model

Modify parport daisy driver to use the new parallel port device model.

Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/parport/daisy.c | 32 +++++++++++++++++++++++++++++++-
 drivers/parport/probe.c |  2 +-
 drivers/parport/share.c | 10 +++++++++-
 include/linux/parport.h | 13 +++++++++++++
 4 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index 5484a46dafda..56dd83a45e55 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -213,10 +213,12 @@ void parport_daisy_fini(struct parport *port)
 struct pardevice *parport_open(int devnum, const char *name)
 {
 	struct daisydev *p = topology;
+	struct pardev_cb par_cb;
 	struct parport *port;
 	struct pardevice *dev;
 	int daisy;
 
+	memset(&par_cb, 0, sizeof(par_cb));
 	spin_lock(&topology_lock);
 	while (p && p->devnum != devnum)
 		p = p->next;
@@ -230,7 +232,7 @@ struct pardevice *parport_open(int devnum, const char *name)
 	port = parport_get_port(p->port);
 	spin_unlock(&topology_lock);
 
-	dev = parport_register_device(port, name, NULL, NULL, NULL, 0, NULL);
+	dev = parport_register_dev_model(port, name, &par_cb, devnum);
 	parport_put_port(port);
 	if (!dev)
 		return NULL;
@@ -480,3 +482,31 @@ static int assign_addrs(struct parport *port)
 	kfree(deviceid);
 	return detected;
 }
+
+static int daisy_drv_probe(struct pardevice *par_dev)
+{
+	struct device_driver *drv = par_dev->dev.driver;
+
+	if (strcmp(drv->name, "daisy_drv"))
+		return -ENODEV;
+	if (strcmp(par_dev->name, daisy_dev_name))
+		return -ENODEV;
+
+	return 0;
+}
+
+static struct parport_driver daisy_driver = {
+	.name = "daisy_drv",
+	.probe = daisy_drv_probe,
+	.devmodel = true,
+};
+
+int daisy_drv_init(void)
+{
+	return parport_register_driver(&daisy_driver);
+}
+
+void daisy_drv_exit(void)
+{
+	parport_unregister_driver(&daisy_driver);
+}
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index e035174ba205..e5e6a463a941 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -257,7 +257,7 @@ static ssize_t parport_read_device_id (struct parport *port, char *buffer,
 ssize_t parport_device_id (int devnum, char *buffer, size_t count)
 {
 	ssize_t retval = -ENXIO;
-	struct pardevice *dev = parport_open (devnum, "Device ID probe");
+	struct pardevice *dev = parport_open(devnum, daisy_dev_name);
 	if (!dev)
 		return -ENXIO;
 
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 5dc53d420ca8..0171b8dbcdcd 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -137,11 +137,19 @@ static struct bus_type parport_bus_type = {
 
 int parport_bus_init(void)
 {
-	return bus_register(&parport_bus_type);
+	int retval;
+
+	retval = bus_register(&parport_bus_type);
+	if (retval)
+		return retval;
+	daisy_drv_init();
+
+	return 0;
 }
 
 void parport_bus_exit(void)
 {
+	daisy_drv_exit();
 	bus_unregister(&parport_bus_type);
 }
 
diff --git a/include/linux/parport.h b/include/linux/parport.h
index 397607a0c0eb..f41f1d041e2c 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -460,6 +460,7 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *,
 					      void *, size_t, int);
 
 /* IEEE1284.3 functions */
+#define daisy_dev_name "Device ID probe"
 extern int parport_daisy_init (struct parport *port);
 extern void parport_daisy_fini (struct parport *port);
 extern struct pardevice *parport_open (int devnum, const char *name);
@@ -468,6 +469,18 @@ extern ssize_t parport_device_id (int devnum, char *buffer, size_t len);
 extern void parport_daisy_deselect_all (struct parport *port);
 extern int parport_daisy_select (struct parport *port, int daisy, int mode);
 
+#ifdef CONFIG_PARPORT_1284
+extern int daisy_drv_init(void);
+extern void daisy_drv_exit(void);
+#else
+static inline int daisy_drv_init(void)
+{
+	return 0;
+}
+
+static inline void daisy_drv_exit(void) {}
+#endif
+
 /* Lowlevel drivers _can_ call this support function to handle irqs.  */
 static inline void parport_generic_irq(struct parport *port)
 {
-- 
cgit v1.2.3


From 7f5667a5f8c4ff85b14ccce9d41f9244bd30ab68 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:53 -0500
Subject: SUNRPC: Clean up rpc_verify_header()

- Recover some instruction count because I'm about to introduce a
  few xdr_inline_decode call sites
- Replace dprintk() call sites with trace points
- Reduce the hot path so it fits in fewer cachelines

I've also renamed it rpc_decode_header() to match everything else
in the RPC client.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h    |   7 +-
 include/trace/events/sunrpc.h |  52 ++++++++++
 net/sunrpc/clnt.c             | 223 ++++++++++++++++++------------------------
 3 files changed, 154 insertions(+), 128 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 6df9ac1ca471..c54041950cc0 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -92,6 +92,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define	rpc_auth_gss	cpu_to_be32(RPC_AUTH_GSS)
 
 #define	rpc_call	cpu_to_be32(RPC_CALL)
+#define	rpc_reply	cpu_to_be32(RPC_REPLY)
+
+#define	rpc_msg_accepted	cpu_to_be32(RPC_MSG_ACCEPTED)
 
 #define	rpc_success		cpu_to_be32(RPC_SUCCESS)
 #define	rpc_prog_unavail	cpu_to_be32(RPC_PROG_UNAVAIL)
@@ -101,6 +104,9 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define	rpc_system_err		cpu_to_be32(RPC_SYSTEM_ERR)
 #define	rpc_drop_reply		cpu_to_be32(RPC_DROP_REPLY)
 
+#define	rpc_mismatch		cpu_to_be32(RPC_MISMATCH)
+#define	rpc_auth_error		cpu_to_be32(RPC_AUTH_ERROR)
+
 #define	rpc_auth_ok		cpu_to_be32(RPC_AUTH_OK)
 #define	rpc_autherr_badcred	cpu_to_be32(RPC_AUTH_BADCRED)
 #define	rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
@@ -109,7 +115,6 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 #define	rpc_autherr_tooweak	cpu_to_be32(RPC_AUTH_TOOWEAK)
 #define	rpcsec_gsserr_credproblem	cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
 #define	rpcsec_gsserr_ctxproblem	cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
-#define	rpc_autherr_oldseqnum	cpu_to_be32(101)
 
 /*
  * Miscellaneous XDR helper functions
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 2b3f9d139e75..0654e9c50371 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -241,6 +241,58 @@ DECLARE_EVENT_CLASS(rpc_failure,
 			TP_ARGS(task))
 
 DEFINE_RPC_FAILURE(callhdr);
+DEFINE_RPC_FAILURE(verifier);
+
+DECLARE_EVENT_CLASS(rpc_reply_event,
+
+	TP_PROTO(
+		const struct rpc_task *task
+	),
+
+	TP_ARGS(task),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__string(progname, task->tk_client->cl_program->name)
+		__field(u32, version)
+		__string(procname, rpc_proc_name(task))
+		__string(servername, task->tk_xprt->servername)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+		__assign_str(progname, task->tk_client->cl_program->name)
+		__entry->version = task->tk_client->cl_vers;
+		__assign_str(procname, rpc_proc_name(task))
+		__assign_str(servername, task->tk_xprt->servername)
+	),
+
+	TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
+		__entry->task_id, __entry->client_id, __get_str(servername),
+		__entry->xid, __get_str(progname), __entry->version,
+		__get_str(procname))
+)
+
+#define DEFINE_RPC_REPLY_EVENT(name)					\
+	DEFINE_EVENT(rpc_reply_event, rpc__##name,			\
+			TP_PROTO(					\
+				const struct rpc_task *task		\
+			),						\
+			TP_ARGS(task))
+
+DEFINE_RPC_REPLY_EVENT(prog_unavail);
+DEFINE_RPC_REPLY_EVENT(prog_mismatch);
+DEFINE_RPC_REPLY_EVENT(proc_unavail);
+DEFINE_RPC_REPLY_EVENT(garbage_args);
+DEFINE_RPC_REPLY_EVENT(unparsable);
+DEFINE_RPC_REPLY_EVENT(mismatch);
+DEFINE_RPC_REPLY_EVENT(stale_creds);
+DEFINE_RPC_REPLY_EVENT(bad_creds);
+DEFINE_RPC_REPLY_EVENT(auth_tooweak);
 
 TRACE_EVENT(rpc_stats_latency,
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d6750b7f169a..e9735089bd66 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,7 @@ static void	call_connect_status(struct rpc_task *task);
 
 static int	rpc_encode_header(struct rpc_task *task,
 				  struct xdr_stream *xdr);
-static __be32	*rpc_verify_header(struct rpc_task *task);
+static __be32	*rpc_decode_header(struct rpc_task *task);
 static int	rpc_ping(struct rpc_clnt *clnt);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
@@ -2292,7 +2292,7 @@ call_decode(struct rpc_task *task)
 		goto out_retry;
 	}
 
-	p = rpc_verify_header(task);
+	p = rpc_decode_header(task);
 	if (IS_ERR(p)) {
 		if (p == ERR_PTR(-EAGAIN))
 			goto out_retry;
@@ -2308,7 +2308,7 @@ call_decode(struct rpc_task *task)
 	return;
 out_retry:
 	task->tk_status = 0;
-	/* Note: rpc_verify_header() may have freed the RPC slot */
+	/* Note: rpc_decode_header() may have freed the RPC slot */
 	if (task->tk_rqstp == req) {
 		xdr_free_bvec(&req->rq_rcv_buf);
 		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
@@ -2347,164 +2347,133 @@ out_fail:
 	return error;
 }
 
-static __be32 *
-rpc_verify_header(struct rpc_task *task)
+static noinline __be32 *
+rpc_decode_header(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
 	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
 	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
 	__be32	*p = iov->iov_base;
-	u32 n;
 	int error = -EACCES;
 
-	if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
-		/* RFC-1014 says that the representation of XDR data must be a
-		 * multiple of four bytes
-		 * - if it isn't pointer subtraction in the NFS client may give
-		 *   undefined results
-		 */
-		dprintk("RPC: %5u %s: XDR representation not a multiple of"
-		       " 4 bytes: 0x%x\n", task->tk_pid, __func__,
-		       task->tk_rqstp->rq_rcv_buf.len);
-		error = -EIO;
-		goto out_err;
-	}
+	/* RFC-1014 says that the representation of XDR data must be a
+	 * multiple of four bytes
+	 * - if it isn't pointer subtraction in the NFS client may give
+	 *   undefined results
+	 */
+	if (task->tk_rqstp->rq_rcv_buf.len & 3)
+		goto out_badlen;
 	if ((len -= 3) < 0)
-		goto out_overflow;
+		goto out_unparsable;
 
-	p += 1; /* skip XID */
-	if ((n = ntohl(*p++)) != RPC_REPLY) {
-		dprintk("RPC: %5u %s: not an RPC reply: %x\n",
-			task->tk_pid, __func__, n);
-		error = -EIO;
-		goto out_garbage;
-	}
+	p++;	/* skip XID */
+	if (*p++ != rpc_reply)
+		goto out_unparsable;
+	if (*p++ != rpc_msg_accepted)
+		goto out_msg_denied;
 
-	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
-		if (--len < 0)
-			goto out_overflow;
-		switch ((n = ntohl(*p++))) {
-		case RPC_AUTH_ERROR:
-			break;
-		case RPC_MISMATCH:
-			dprintk("RPC: %5u %s: RPC call version mismatch!\n",
-				task->tk_pid, __func__);
-			error = -EPROTONOSUPPORT;
-			goto out_err;
-		default:
-			dprintk("RPC: %5u %s: RPC call rejected, "
-				"unknown error: %x\n",
-				task->tk_pid, __func__, n);
-			error = -EIO;
-			goto out_err;
-		}
-		if (--len < 0)
-			goto out_overflow;
-		switch ((n = ntohl(*p++))) {
-		case RPC_AUTH_REJECTEDCRED:
-		case RPC_AUTH_REJECTEDVERF:
-		case RPCSEC_GSS_CREDPROBLEM:
-		case RPCSEC_GSS_CTXPROBLEM:
-			if (!task->tk_cred_retry)
-				break;
-			task->tk_cred_retry--;
-			dprintk("RPC: %5u %s: retry stale creds\n",
-					task->tk_pid, __func__);
-			rpcauth_invalcred(task);
-			/* Ensure we obtain a new XID! */
-			xprt_release(task);
-			task->tk_action = call_reserve;
-			goto out_retry;
-		case RPC_AUTH_BADCRED:
-		case RPC_AUTH_BADVERF:
-			/* possibly garbled cred/verf? */
-			if (!task->tk_garb_retry)
-				break;
-			task->tk_garb_retry--;
-			dprintk("RPC: %5u %s: retry garbled creds\n",
-					task->tk_pid, __func__);
-			task->tk_action = call_encode;
-			goto out_retry;
-		case RPC_AUTH_TOOWEAK:
-			printk(KERN_NOTICE "RPC: server %s requires stronger "
-			       "authentication.\n",
-			       task->tk_xprt->servername);
-			break;
-		default:
-			dprintk("RPC: %5u %s: unknown auth error: %x\n",
-					task->tk_pid, __func__, n);
-			error = -EIO;
-		}
-		dprintk("RPC: %5u %s: call rejected %d\n",
-				task->tk_pid, __func__, n);
-		goto out_err;
-	}
 	p = rpcauth_checkverf(task, p);
-	if (IS_ERR(p)) {
-		error = PTR_ERR(p);
-		dprintk("RPC: %5u %s: auth check failed with %d\n",
-				task->tk_pid, __func__, error);
-		goto out_garbage;		/* bad verifier, retry */
-	}
+	if (IS_ERR(p))
+		goto out_verifier;
+
 	len = p - (__be32 *)iov->iov_base - 1;
 	if (len < 0)
-		goto out_overflow;
-	switch ((n = ntohl(*p++))) {
-	case RPC_SUCCESS:
+		goto out_unparsable;
+	switch (*p++) {
+	case rpc_success:
 		return p;
-	case RPC_PROG_UNAVAIL:
-		dprintk("RPC: %5u %s: program %u is unsupported "
-				"by server %s\n", task->tk_pid, __func__,
-				(unsigned int)clnt->cl_prog,
-				task->tk_xprt->servername);
+	case rpc_prog_unavail:
+		trace_rpc__prog_unavail(task);
 		error = -EPFNOSUPPORT;
 		goto out_err;
-	case RPC_PROG_MISMATCH:
-		dprintk("RPC: %5u %s: program %u, version %u unsupported "
-				"by server %s\n", task->tk_pid, __func__,
-				(unsigned int)clnt->cl_prog,
-				(unsigned int)clnt->cl_vers,
-				task->tk_xprt->servername);
+	case rpc_prog_mismatch:
+		trace_rpc__prog_mismatch(task);
 		error = -EPROTONOSUPPORT;
 		goto out_err;
-	case RPC_PROC_UNAVAIL:
-		dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
-				"version %u on server %s\n",
-				task->tk_pid, __func__,
-				rpc_proc_name(task),
-				clnt->cl_prog, clnt->cl_vers,
-				task->tk_xprt->servername);
+	case rpc_proc_unavail:
+		trace_rpc__proc_unavail(task);
 		error = -EOPNOTSUPP;
 		goto out_err;
-	case RPC_GARBAGE_ARGS:
-		dprintk("RPC: %5u %s: server saw garbage\n",
-				task->tk_pid, __func__);
-		break;			/* retry */
+	case rpc_garbage_args:
+		trace_rpc__garbage_args(task);
+		break;
 	default:
-		dprintk("RPC: %5u %s: server accept status: %x\n",
-				task->tk_pid, __func__, n);
-		/* Also retry */
+		trace_rpc__unparsable(task);
 	}
 
 out_garbage:
 	clnt->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
-		dprintk("RPC: %5u %s: retrying\n",
-				task->tk_pid, __func__);
 		task->tk_action = call_encode;
-out_retry:
 		return ERR_PTR(-EAGAIN);
 	}
 out_err:
 	rpc_exit(task, error);
-	dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
-			__func__, error);
 	return ERR_PTR(error);
-out_overflow:
-	dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
-			__func__);
+
+out_badlen:
+	trace_rpc__unparsable(task);
+	error = -EIO;
+	goto out_err;
+
+out_unparsable:
+	trace_rpc__unparsable(task);
+	error = -EIO;
 	goto out_garbage;
+
+out_verifier:
+	trace_rpc_bad_verifier(task);
+	error = PTR_ERR(p);
+	goto out_garbage;
+
+out_msg_denied:
+	switch (*p++) {
+	case rpc_auth_error:
+		break;
+	case rpc_mismatch:
+		trace_rpc__mismatch(task);
+		error = -EPROTONOSUPPORT;
+		goto out_err;
+	default:
+		trace_rpc__unparsable(task);
+		error = -EIO;
+		goto out_err;
+	}
+
+	switch (*p++) {
+	case rpc_autherr_rejectedcred:
+	case rpc_autherr_rejectedverf:
+	case rpcsec_gsserr_credproblem:
+	case rpcsec_gsserr_ctxproblem:
+		if (!task->tk_cred_retry)
+			break;
+		task->tk_cred_retry--;
+		trace_rpc__stale_creds(task);
+		rpcauth_invalcred(task);
+		/* Ensure we obtain a new XID! */
+		xprt_release(task);
+		task->tk_action = call_reserve;
+		return ERR_PTR(-EAGAIN);
+	case rpc_autherr_badcred:
+	case rpc_autherr_badverf:
+		/* possibly garbled cred/verf? */
+		if (!task->tk_garb_retry)
+			break;
+		task->tk_garb_retry--;
+		trace_rpc__bad_creds(task);
+		task->tk_action = call_encode;
+		return ERR_PTR(-EAGAIN);
+	case rpc_autherr_tooweak:
+		trace_rpc__auth_tooweak(task);
+		pr_warn("RPC: server %s requires stronger authentication.\n",
+			task->tk_xprt->servername);
+		break;
+	default:
+		trace_rpc__unparsable(task);
+		error = -EIO;
+	}
+	goto out_err;
 }
 
 static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-- 
cgit v1.2.3


From a4eaed9f9a895b16bb2c54e0ff6b3c99404fec92 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 11 Feb 2019 15:25:26 +0100
Subject: net: phy: Mask-out non-compatible modes when setting the max-speed

When setting a PHY's max speed using either the max-speed DT property
or ethtool, we should mask-out all non-compatible modes according to the
settings table, instead of just the 10/100BASET modes.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Suggested-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c   | 45 +++++++++++++++++++++++++++++++++++++
 drivers/net/phy/phy_device.c | 53 --------------------------------------------
 include/linux/phy.h          |  1 +
 3 files changed, 46 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index cdea028d1328..855abf487279 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -4,6 +4,7 @@
  */
 #include <linux/export.h>
 #include <linux/phy.h>
+#include <linux/of.h>
 
 const char *phy_speed_to_str(int speed)
 {
@@ -338,6 +339,50 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 	return count;
 }
 
+static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
+{
+	const struct phy_setting *p;
+	int i;
+
+	for (i = 0, p = settings; i < ARRAY_SIZE(settings); i++, p++) {
+		if (p->speed > max_speed)
+			linkmode_clear_bit(p->bit, phydev->supported);
+		else
+			break;
+	}
+
+	return 0;
+}
+
+int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
+{
+	int err;
+
+	err = __set_phy_supported(phydev, max_speed);
+	if (err)
+		return err;
+
+	linkmode_copy(phydev->advertising, phydev->supported);
+
+	return 0;
+}
+EXPORT_SYMBOL(phy_set_max_speed);
+
+void of_set_phy_supported(struct phy_device *phydev)
+{
+	struct device_node *node = phydev->mdio.dev.of_node;
+	u32 max_speed;
+
+	if (!IS_ENABLED(CONFIG_OF_MDIO))
+		return;
+
+	if (!node)
+		return;
+
+	if (!of_property_read_u32(node, "max-speed", &max_speed))
+		__set_phy_supported(phydev, max_speed);
+}
+
 /**
  * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 2c61282a2726..64497ec293e1 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1949,44 +1949,6 @@ int genphy_loopback(struct phy_device *phydev, bool enable)
 }
 EXPORT_SYMBOL(genphy_loopback);
 
-static int __set_phy_supported(struct phy_device *phydev, u32 max_speed)
-{
-	switch (max_speed) {
-	case SPEED_10:
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-				   phydev->supported);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-				   phydev->supported);
-		/* fall through */
-	case SPEED_100:
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
-				   phydev->supported);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-				   phydev->supported);
-		break;
-	case SPEED_1000:
-		break;
-	default:
-		return -ENOTSUPP;
-	}
-
-	return 0;
-}
-
-int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
-{
-	int err;
-
-	err = __set_phy_supported(phydev, max_speed);
-	if (err)
-		return err;
-
-	linkmode_copy(phydev->advertising, phydev->supported);
-
-	return 0;
-}
-EXPORT_SYMBOL(phy_set_max_speed);
-
 /**
  * phy_remove_link_mode - Remove a supported link mode
  * @phydev: phy_device structure to remove link mode from
@@ -2117,21 +2079,6 @@ bool phy_validate_pause(struct phy_device *phydev,
 }
 EXPORT_SYMBOL(phy_validate_pause);
 
-static void of_set_phy_supported(struct phy_device *phydev)
-{
-	struct device_node *node = phydev->mdio.dev.of_node;
-	u32 max_speed;
-
-	if (!IS_ENABLED(CONFIG_OF_MDIO))
-		return;
-
-	if (!node)
-		return;
-
-	if (!of_property_read_u32(node, "max-speed", &max_speed))
-		__set_phy_supported(phydev, max_speed);
-}
-
 static void of_set_phy_eee_broken(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->mdio.dev.of_node;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 378da9a6165e..20344c7744d8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -673,6 +673,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
 		   bool exact);
 size_t phy_speeds(unsigned int *speeds, size_t size,
 		  unsigned long *mask);
+void of_set_phy_supported(struct phy_device *phydev);
 
 static inline bool __phy_is_started(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From 3feb9b23bf4cbf9f34568035170c6f1c25416523 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 11 Feb 2019 15:25:27 +0100
Subject: net: phy: Move of_set_phy_eee_broken to phy-core.c

Since of_set_phy_supported was moved to phy-core.c, we can also move
of_set_phy_eee_broken to the same location, so that we have all OF
functions in the same place.

This patch doesn't intend to introduce any change in behaviour.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c   | 27 +++++++++++++++++++++++++++
 drivers/net/phy/phy_device.c | 28 ----------------------------
 include/linux/phy.h          |  1 +
 3 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 855abf487279..de58a59815d5 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -383,6 +383,33 @@ void of_set_phy_supported(struct phy_device *phydev)
 		__set_phy_supported(phydev, max_speed);
 }
 
+void of_set_phy_eee_broken(struct phy_device *phydev)
+{
+	struct device_node *node = phydev->mdio.dev.of_node;
+	u32 broken = 0;
+
+	if (!IS_ENABLED(CONFIG_OF_MDIO))
+		return;
+
+	if (!node)
+		return;
+
+	if (of_property_read_bool(node, "eee-broken-100tx"))
+		broken |= MDIO_EEE_100TX;
+	if (of_property_read_bool(node, "eee-broken-1000t"))
+		broken |= MDIO_EEE_1000T;
+	if (of_property_read_bool(node, "eee-broken-10gt"))
+		broken |= MDIO_EEE_10GT;
+	if (of_property_read_bool(node, "eee-broken-1000kx"))
+		broken |= MDIO_EEE_1000KX;
+	if (of_property_read_bool(node, "eee-broken-10gkx4"))
+		broken |= MDIO_EEE_10GKX4;
+	if (of_property_read_bool(node, "eee-broken-10gkr"))
+		broken |= MDIO_EEE_10GKR;
+
+	phydev->eee_broken_modes = broken;
+}
+
 /**
  * phy_resolve_aneg_linkmode - resolve the advertisements into phy settings
  * @phydev: The phy_device struct
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 64497ec293e1..a752de2fff5e 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -30,7 +30,6 @@
 #include <linux/mdio.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
-#include <linux/of.h>
 
 MODULE_DESCRIPTION("PHY library");
 MODULE_AUTHOR("Andy Fleming");
@@ -2079,33 +2078,6 @@ bool phy_validate_pause(struct phy_device *phydev,
 }
 EXPORT_SYMBOL(phy_validate_pause);
 
-static void of_set_phy_eee_broken(struct phy_device *phydev)
-{
-	struct device_node *node = phydev->mdio.dev.of_node;
-	u32 broken = 0;
-
-	if (!IS_ENABLED(CONFIG_OF_MDIO))
-		return;
-
-	if (!node)
-		return;
-
-	if (of_property_read_bool(node, "eee-broken-100tx"))
-		broken |= MDIO_EEE_100TX;
-	if (of_property_read_bool(node, "eee-broken-1000t"))
-		broken |= MDIO_EEE_1000T;
-	if (of_property_read_bool(node, "eee-broken-10gt"))
-		broken |= MDIO_EEE_10GT;
-	if (of_property_read_bool(node, "eee-broken-1000kx"))
-		broken |= MDIO_EEE_1000KX;
-	if (of_property_read_bool(node, "eee-broken-10gkx4"))
-		broken |= MDIO_EEE_10GKX4;
-	if (of_property_read_bool(node, "eee-broken-10gkr"))
-		broken |= MDIO_EEE_10GKR;
-
-	phydev->eee_broken_modes = broken;
-}
-
 static bool phy_drv_supports_irq(struct phy_driver *phydrv)
 {
 	return phydrv->config_intr && phydrv->ack_interrupt;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 20344c7744d8..1a1d93a2a906 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -674,6 +674,7 @@ phy_lookup_setting(int speed, int duplex, const unsigned long *mask,
 size_t phy_speeds(unsigned int *speeds, size_t size,
 		  unsigned long *mask);
 void of_set_phy_supported(struct phy_device *phydev);
+void of_set_phy_eee_broken(struct phy_device *phydev);
 
 static inline bool __phy_is_started(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From ac3f5533343f6ec7fa24a27f0ae22bbfd27e0b23 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 11 Feb 2019 15:25:28 +0100
Subject: net: phy: Extract genphy_c45_pma_read_abilities from marvell10g

Marvell 10G PHY driver has a generic way of initializing the supported
link modes by reading the PHY's C45 PMA abilities. This can be made
generic, since these registers are part of the 802.3 specifications.

This commit extracts the config_init link_mode initialization code from
marvell10g and uses it to introduce the genphy_c45_pma_read_abilities
function.

Only PMA modes are read, it's still up to the caller to set the Pause
parameters.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 78 +++++---------------------------------------
 drivers/net/phy/phy-c45.c    | 74 +++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h          |  1 +
 3 files changed, 83 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 08362dc657cd..496805c0ddfe 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -233,8 +233,7 @@ static int mv3310_resume(struct phy_device *phydev)
 
 static int mv3310_config_init(struct phy_device *phydev)
 {
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported) = { 0, };
-	int val;
+	int ret, val;
 
 	/* Check that the PHY interface type is compatible */
 	if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
@@ -243,8 +242,8 @@ static int mv3310_config_init(struct phy_device *phydev)
 	    phydev->interface != PHY_INTERFACE_MODE_10GKR)
 		return -ENODEV;
 
-	__set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
-	__set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
+	__set_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydev->supported);
+	__set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydev->supported);
 
 	if (phydev->c45_ids.devices_in_package & MDIO_DEVS_AN) {
 		val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
@@ -252,74 +251,13 @@ static int mv3310_config_init(struct phy_device *phydev)
 			return val;
 
 		if (val & MDIO_AN_STAT1_ABLE)
-			__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
+			__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+				  phydev->supported);
 	}
 
-	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2);
-	if (val < 0)
-		return val;
-
-	/* Ethtool does not support the WAN mode bits */
-	if (val & (MDIO_PMA_STAT2_10GBSR | MDIO_PMA_STAT2_10GBLR |
-		   MDIO_PMA_STAT2_10GBER | MDIO_PMA_STAT2_10GBLX4 |
-		   MDIO_PMA_STAT2_10GBSW | MDIO_PMA_STAT2_10GBLW |
-		   MDIO_PMA_STAT2_10GBEW))
-		__set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
-	if (val & MDIO_PMA_STAT2_10GBSR)
-		__set_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT, supported);
-	if (val & MDIO_PMA_STAT2_10GBLR)
-		__set_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT, supported);
-	if (val & MDIO_PMA_STAT2_10GBER)
-		__set_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT, supported);
-
-	if (val & MDIO_PMA_STAT2_EXTABLE) {
-		val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE);
-		if (val < 0)
-			return val;
-
-		if (val & (MDIO_PMA_EXTABLE_10GBT | MDIO_PMA_EXTABLE_1000BT |
-			   MDIO_PMA_EXTABLE_100BTX | MDIO_PMA_EXTABLE_10BT))
-			__set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
-		if (val & MDIO_PMA_EXTABLE_10GBLRM)
-			__set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, supported);
-		if (val & (MDIO_PMA_EXTABLE_10GBKX4 | MDIO_PMA_EXTABLE_10GBKR |
-			   MDIO_PMA_EXTABLE_1000BKX))
-			__set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, supported);
-		if (val & MDIO_PMA_EXTABLE_10GBLRM)
-			__set_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_10GBT)
-			__set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_10GBKX4)
-			__set_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_10GBKR)
-			__set_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_1000BT)
-			__set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_1000BKX)
-			__set_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
-				  supported);
-		if (val & MDIO_PMA_EXTABLE_100BTX) {
-			__set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-				  supported);
-			__set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-				  supported);
-		}
-		if (val & MDIO_PMA_EXTABLE_10BT) {
-			__set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
-				  supported);
-			__set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
-				  supported);
-		}
-	}
-
-	linkmode_copy(phydev->supported, supported);
-	linkmode_and(phydev->advertising, phydev->advertising,
-		     phydev->supported);
+	ret = genphy_c45_pma_read_abilities(phydev);
+	if (ret)
+		return ret;
 
 	return 0;
 }
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index eff9e5a4d831..6f028de4dae1 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -271,6 +271,80 @@ int genphy_c45_read_mdix(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_read_mdix);
 
+/**
+ * genphy_c45_pma_read_abilities - read supported link modes from PMA
+ * @phydev: target phy_device struct
+ *
+ * Read the supported link modes from the PMA Status 2 (1.8) register. If bit
+ * 1.8.9 is set, the list of supported modes is build using the values in the
+ * PMA Extended Abilities (1.11) register, indicating 1000BASET an 10G related
+ * modes. If bit 1.11.14 is set, then the list is also extended with the modes
+ * in the 2.5G/5G PMA Extended register (1.21), indicating if 2.5GBASET and
+ * 5GBASET are supported.
+ */
+int genphy_c45_pma_read_abilities(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_STAT2);
+	if (val < 0)
+		return val;
+
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseSR_Full_BIT,
+			 phydev->supported,
+			 val & MDIO_PMA_STAT2_10GBSR);
+
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLR_Full_BIT,
+			 phydev->supported,
+			 val & MDIO_PMA_STAT2_10GBLR);
+
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
+			 phydev->supported,
+			 val & MDIO_PMA_STAT2_10GBER);
+
+	if (val & MDIO_PMA_STAT2_EXTABLE) {
+		val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_EXTABLE);
+		if (val < 0)
+			return val;
+
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10GBLRM);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10GBT);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10GBKX4);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10GBKR);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_1000BT);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_1000BKX);
+
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_100BTX);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_100BTX);
+
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10BT);
+		linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				 phydev->supported,
+				 val & MDIO_PMA_EXTABLE_10BT);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities);
+
 /* The gen10g_* functions are the old Clause 45 stub */
 
 int gen10g_config_aneg(struct phy_device *phydev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1a1d93a2a906..177a330d84e5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1116,6 +1116,7 @@ int genphy_c45_read_pma(struct phy_device *phydev);
 int genphy_c45_pma_setup_forced(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
+int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From 7fd8afa8933a095a97995885740999f174e61b60 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Mon, 11 Feb 2019 15:25:29 +0100
Subject: net: phy: Add generic support for 2.5GBaseT and 5GBaseT

The 802.3bz specification, based on previous by the NBASET alliance,
defines the 2.5GBaseT and 5GBaseT link modes for ethernet traffic on
cat5e, cat6 and cat7 cables.

These mode integrate with the already defined C45 MDIO PMA/PMD registers
set that added 10G support, by defining some previously reserved bits,
and adding a new register (2.5G/5G Extended abilities).

This commit adds the required definitions in include/uapi/linux/mdio.h
to support these modes, and detect when a link-partner advertises them.

It also adds support for these mode in the generic C45 PHY
infrastructure.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 37 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/mdio.h | 16 ++++++++++++++++
 2 files changed, 53 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 6f028de4dae1..7af5fa81daf6 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -47,6 +47,16 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev)
 		/* Assume 1000base-T */
 		ctrl2 |= MDIO_PMA_CTRL2_1000BT;
 		break;
+	case SPEED_2500:
+		ctrl1 |= MDIO_CTRL1_SPEED2_5G;
+		/* Assume 2.5Gbase-T */
+		ctrl2 |= MDIO_PMA_CTRL2_2_5GBT;
+		break;
+	case SPEED_5000:
+		ctrl1 |= MDIO_CTRL1_SPEED5G;
+		/* Assume 5Gbase-T */
+		ctrl2 |= MDIO_PMA_CTRL2_5GBT;
+		break;
 	case SPEED_10000:
 		ctrl1 |= MDIO_CTRL1_SPEED10G;
 		/* Assume 10Gbase-T */
@@ -194,6 +204,12 @@ int genphy_c45_read_lpa(struct phy_device *phydev)
 	if (val < 0)
 		return val;
 
+	if (val & MDIO_AN_10GBT_STAT_LP2_5G)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+				 phydev->lp_advertising);
+	if (val & MDIO_AN_10GBT_STAT_LP5G)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+				 phydev->lp_advertising);
 	if (val & MDIO_AN_10GBT_STAT_LP10G)
 		linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
 				 phydev->lp_advertising);
@@ -224,6 +240,12 @@ int genphy_c45_read_pma(struct phy_device *phydev)
 	case MDIO_PMA_CTRL1_SPEED1000:
 		phydev->speed = SPEED_1000;
 		break;
+	case MDIO_CTRL1_SPEED2_5G:
+		phydev->speed = SPEED_2500;
+		break;
+	case MDIO_CTRL1_SPEED5G:
+		phydev->speed = SPEED_5000;
+		break;
 	case MDIO_CTRL1_SPEED10G:
 		phydev->speed = SPEED_10000;
 		break;
@@ -339,6 +361,21 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev)
 		linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
 				 phydev->supported,
 				 val & MDIO_PMA_EXTABLE_10BT);
+
+		if (val & MDIO_PMA_EXTABLE_NBT) {
+			val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD,
+					   MDIO_PMA_NG_EXTABLE);
+			if (val < 0)
+				return val;
+
+			linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+					 phydev->supported,
+					 val & MDIO_PMA_NG_EXTABLE_2_5GBT);
+
+			linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+					 phydev->supported,
+					 val & MDIO_PMA_NG_EXTABLE_5GBT);
+		}
 	}
 
 	return 0;
diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 0e012b168e4d..0a552061ff1c 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -45,6 +45,7 @@
 #define MDIO_AN_ADVERTISE	16	/* AN advertising (base page) */
 #define MDIO_AN_LPA		19	/* AN LP abilities (base page) */
 #define MDIO_PCS_EEE_ABLE	20	/* EEE Capability register */
+#define MDIO_PMA_NG_EXTABLE	21	/* 2.5G/5G PMA/PMD extended ability */
 #define MDIO_PCS_EEE_WK_ERR	22	/* EEE wake error counter */
 #define MDIO_PHYXS_LNSTAT	24	/* PHY XGXS lane state */
 #define MDIO_AN_EEE_ADV		60	/* EEE advertisement */
@@ -92,6 +93,10 @@
 #define MDIO_CTRL1_SPEED10G		(MDIO_CTRL1_SPEEDSELEXT | 0x00)
 /* 10PASS-TS/2BASE-TL */
 #define MDIO_CTRL1_SPEED10P2B		(MDIO_CTRL1_SPEEDSELEXT | 0x04)
+/* 2.5 Gb/s */
+#define MDIO_CTRL1_SPEED2_5G		(MDIO_CTRL1_SPEEDSELEXT | 0x18)
+/* 5 Gb/s */
+#define MDIO_CTRL1_SPEED5G		(MDIO_CTRL1_SPEEDSELEXT | 0x1c)
 
 /* Status register 1. */
 #define MDIO_STAT1_LPOWERABLE		0x0002	/* Low-power ability */
@@ -145,6 +150,8 @@
 #define MDIO_PMA_CTRL2_1000BKX		0x000d	/* 1000BASE-KX type */
 #define MDIO_PMA_CTRL2_100BTX		0x000e	/* 100BASE-TX type */
 #define MDIO_PMA_CTRL2_10BT		0x000f	/* 10BASE-T type */
+#define MDIO_PMA_CTRL2_2_5GBT		0x0030  /* 2.5GBaseT type */
+#define MDIO_PMA_CTRL2_5GBT		0x0031  /* 5GBaseT type */
 #define MDIO_PCS_CTRL2_TYPE		0x0003	/* PCS type selection */
 #define MDIO_PCS_CTRL2_10GBR		0x0000	/* 10GBASE-R type */
 #define MDIO_PCS_CTRL2_10GBX		0x0001	/* 10GBASE-X type */
@@ -198,6 +205,7 @@
 #define MDIO_PMA_EXTABLE_1000BKX	0x0040	/* 1000BASE-KX ability */
 #define MDIO_PMA_EXTABLE_100BTX		0x0080	/* 100BASE-TX ability */
 #define MDIO_PMA_EXTABLE_10BT		0x0100	/* 10BASE-T ability */
+#define MDIO_PMA_EXTABLE_NBT		0x4000  /* 2.5/5GBASE-T ability */
 
 /* PHY XGXS lane state register. */
 #define MDIO_PHYXS_LNSTAT_SYNC0		0x0001
@@ -234,9 +242,13 @@
 #define MDIO_PCS_10GBRT_STAT2_BER	0x3f00
 
 /* AN 10GBASE-T control register. */
+#define MDIO_AN_10GBT_CTRL_ADV2_5G	0x0080	/* Advertise 2.5GBASE-T */
+#define MDIO_AN_10GBT_CTRL_ADV5G	0x0100	/* Advertise 5GBASE-T */
 #define MDIO_AN_10GBT_CTRL_ADV10G	0x1000	/* Advertise 10GBASE-T */
 
 /* AN 10GBASE-T status register. */
+#define MDIO_AN_10GBT_STAT_LP2_5G	0x0020  /* LP is 2.5GBT capable */
+#define MDIO_AN_10GBT_STAT_LP5G		0x0040  /* LP is 5GBT capable */
 #define MDIO_AN_10GBT_STAT_LPTRR	0x0200	/* LP training reset req. */
 #define MDIO_AN_10GBT_STAT_LPLTABLE	0x0400	/* LP loop timing ability */
 #define MDIO_AN_10GBT_STAT_LP10G	0x0800	/* LP is 10GBT capable */
@@ -265,6 +277,10 @@
 #define MDIO_EEE_10GKX4		0x0020	/* 10G KX4 EEE cap */
 #define MDIO_EEE_10GKR		0x0040	/* 10G KR EEE cap */
 
+/* 2.5G/5G Extended abilities register. */
+#define MDIO_PMA_NG_EXTABLE_2_5GBT	0x0001	/* 2.5GBASET ability */
+#define MDIO_PMA_NG_EXTABLE_5GBT	0x0002	/* 5GBASET ability */
+
 /* LASI RX_ALARM control/status registers. */
 #define MDIO_PMA_LASI_RX_PHYXSLFLT	0x0001	/* PHY XS RX local fault */
 #define MDIO_PMA_LASI_RX_PCSLFLT	0x0008	/* PCS RX local fault */
-- 
cgit v1.2.3


From 3e0bd37ce0e4a574df6d87a901e13bcb46e10301 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Wed, 13 Feb 2019 11:53:35 -0800
Subject: bpf: add plumbing for BPF_LWT_ENCAP_IP in bpf_lwt_push_encap

This patch adds all needed plumbing in preparation to allowing
bpf programs to do IP encapping via bpf_lwt_push_encap. Actual
implementation is added in the next patch in the patchset.

Of note:
- bpf_lwt_push_encap can now be called from BPF_PROG_TYPE_LWT_XMIT
  prog types in addition to BPF_PROG_TYPE_LWT_IN;
- if the skb being encapped has GSO set, encapsulation is limited
  to IPIP/IP+GRE/IP+GUE (both IPv4 and IPv6);
- as route lookups are different for ingress vs egress, the single
  external bpf_lwt_push_encap BPF helper is routed internally to
  either bpf_lwt_in_push_encap or bpf_lwt_xmit_push_encap BPF_CALLs,
  depending on prog type.

v8 changes: fixed a typo.

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 26 ++++++++++++++++++++++++--
 net/core/filter.c        | 48 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 67 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 25c8c0e62ecf..bcdd2474eee7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2016,6 +2016,19 @@ union bpf_attr {
  *			Only works if *skb* contains an IPv6 packet. Insert a
  *			Segment Routing Header (**struct ipv6_sr_hdr**) inside
  *			the IPv6 header.
+ *		**BPF_LWT_ENCAP_IP**
+ *			IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ *			must be IPv4 or IPv6, followed by zero or more
+ *			additional headers, up to LWT_BPF_MAX_HEADROOM total
+ *			bytes in all prepended headers. Please note that
+ *			if skb_is_gso(skb) is true, no more than two headers
+ *			can be prepended, and the inner header, if present,
+ *			should be either GRE or UDP/GUE.
+ *
+ *		BPF_LWT_ENCAP_SEG6*** types can be called by bpf programs of
+ *		type BPF_PROG_TYPE_LWT_IN; BPF_LWT_ENCAP_IP type can be called
+ *		by bpf programs of types BPF_PROG_TYPE_LWT_IN and
+ *		BPF_PROG_TYPE_LWT_XMIT.
  *
  * 		A call to this helper is susceptible to change the underlaying
  * 		packet buffer. Therefore, at load time, all checks on pointers
@@ -2517,7 +2530,8 @@ enum bpf_hdr_start_off {
 /* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
 enum bpf_lwt_encap_mode {
 	BPF_LWT_ENCAP_SEG6,
-	BPF_LWT_ENCAP_SEG6_INLINE
+	BPF_LWT_ENCAP_SEG6_INLINE,
+	BPF_LWT_ENCAP_IP,
 };
 
 #define __bpf_md_ptr(type, name)	\
@@ -2606,7 +2620,15 @@ enum bpf_ret_code {
 	BPF_DROP = 2,
 	/* 3-6 reserved */
 	BPF_REDIRECT = 7,
-	/* >127 are reserved for prog type specific return codes */
+	/* >127 are reserved for prog type specific return codes.
+	 *
+	 * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+	 *    BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+	 *    changed and should be routed based on its new L3 header.
+	 *    (This is an L3 redirect, as opposed to L2 redirect
+	 *    represented by BPF_REDIRECT above).
+	 */
+	BPF_LWT_REROUTE = 128,
 };
 
 struct bpf_sock {
diff --git a/net/core/filter.c b/net/core/filter.c
index 353735575204..12c88c21b6b8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4815,7 +4815,15 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
 }
 #endif /* CONFIG_IPV6_SEG6_BPF */
 
-BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+			     bool ingress)
+{
+	return -EINVAL;  /* Implemented in the next patch. */
+}
+#endif
+
+BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
 	   u32, len)
 {
 	switch (type) {
@@ -4823,14 +4831,41 @@ BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
 	case BPF_LWT_ENCAP_SEG6:
 	case BPF_LWT_ENCAP_SEG6_INLINE:
 		return bpf_push_seg6_encap(skb, type, hdr, len);
+#endif
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+	case BPF_LWT_ENCAP_IP:
+		return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
+#endif
+	default:
+		return -EINVAL;
+	}
+}
+
+BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
+	   void *, hdr, u32, len)
+{
+	switch (type) {
+#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
+	case BPF_LWT_ENCAP_IP:
+		return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
 #endif
 	default:
 		return -EINVAL;
 	}
 }
 
-static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
-	.func		= bpf_lwt_push_encap,
+static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
+	.func		= bpf_lwt_in_push_encap,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_MEM,
+	.arg4_type	= ARG_CONST_SIZE
+};
+
+static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
+	.func		= bpf_lwt_xmit_push_encap,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
@@ -5417,7 +5452,8 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_lwt_seg6_adjust_srh ||
 	    func == bpf_lwt_seg6_action ||
 #endif
-	    func == bpf_lwt_push_encap)
+	    func == bpf_lwt_in_push_encap ||
+	    func == bpf_lwt_xmit_push_encap)
 		return true;
 
 	return false;
@@ -5815,7 +5851,7 @@ lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_lwt_push_encap:
-		return &bpf_lwt_push_encap_proto;
+		return &bpf_lwt_in_push_encap_proto;
 	default:
 		return lwt_out_func_proto(func_id, prog);
 	}
@@ -5851,6 +5887,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_l4_csum_replace_proto;
 	case BPF_FUNC_set_hash_invalid:
 		return &bpf_set_hash_invalid_proto;
+	case BPF_FUNC_lwt_push_encap:
+		return &bpf_lwt_xmit_push_encap_proto;
 	default:
 		return lwt_out_func_proto(func_id, prog);
 	}
-- 
cgit v1.2.3


From 52f278774e796a553be0c869dcaaee6f259ca795 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Wed, 13 Feb 2019 11:53:36 -0800
Subject: bpf: implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap

Implement BPF_LWT_ENCAP_IP mode in bpf_lwt_push_encap BPF helper.
It enables BPF programs (specifically, BPF_PROG_TYPE_LWT_IN and
BPF_PROG_TYPE_LWT_XMIT prog types) to add IP encapsulation headers
to packets (e.g. IP/GRE, GUE, IPIP).

This is useful when thousands of different short-lived flows should be
encapped, each with different and dynamically determined destination.
Although lwtunnels can be used in some of these scenarios, the ability
to dynamically generate encap headers adds more flexibility, e.g.
when routing depends on the state of the host (reflected in global bpf
maps).

v7 changes:
 - added a call skb_clear_hash();
 - removed calls to skb_set_transport_header();
 - refuse to encap GSO-enabled packets.

v8 changes:
 - fix build errors when LWT is not enabled.

Note: the next patch in the patchset with deal with GSO-enabled packets,
which are currently rejected at encapping attempt.

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/lwtunnel.h |  2 ++
 net/core/filter.c      |  3 ++-
 net/core/lwt_bpf.c     | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index 33fd9ba7e0e5..671113bcb2cc 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -126,6 +126,8 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
 int lwtunnel_xmit(struct sk_buff *skb);
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
+			  bool ingress);
 
 static inline void lwtunnel_set_redirect(struct dst_entry *dst)
 {
diff --git a/net/core/filter.c b/net/core/filter.c
index 12c88c21b6b8..a78deb2656e1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -73,6 +73,7 @@
 #include <linux/seg6_local.h>
 #include <net/seg6.h>
 #include <net/seg6_local.h>
+#include <net/lwtunnel.h>
 
 /**
  *	sk_filter_trim_cap - run a packet through a socket filter
@@ -4819,7 +4820,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
 static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
 			     bool ingress)
 {
-	return -EINVAL;  /* Implemented in the next patch. */
+	return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
 }
 #endif
 
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index a648568c5e8f..e5a9850d9f48 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -390,6 +390,71 @@ static const struct lwtunnel_encap_ops bpf_encap_ops = {
 	.owner		= THIS_MODULE,
 };
 
+static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len)
+{
+	/* Handling of GSO-enabled packets is added in the next patch. */
+	return -EOPNOTSUPP;
+}
+
+int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
+{
+	struct iphdr *iph;
+	bool ipv4;
+	int err;
+
+	if (unlikely(len < sizeof(struct iphdr) || len > LWT_BPF_MAX_HEADROOM))
+		return -EINVAL;
+
+	/* validate protocol and length */
+	iph = (struct iphdr *)hdr;
+	if (iph->version == 4) {
+		ipv4 = true;
+		if (unlikely(len < iph->ihl * 4))
+			return -EINVAL;
+	} else if (iph->version == 6) {
+		ipv4 = false;
+		if (unlikely(len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+
+	if (ingress)
+		err = skb_cow_head(skb, len + skb->mac_len);
+	else
+		err = skb_cow_head(skb,
+				   len + LL_RESERVED_SPACE(skb_dst(skb)->dev));
+	if (unlikely(err))
+		return err;
+
+	/* push the encap headers and fix pointers */
+	skb_reset_inner_headers(skb);
+	skb->encapsulation = 1;
+	skb_push(skb, len);
+	if (ingress)
+		skb_postpush_rcsum(skb, iph, len);
+	skb_reset_network_header(skb);
+	memcpy(skb_network_header(skb), hdr, len);
+	bpf_compute_data_pointers(skb);
+	skb_clear_hash(skb);
+
+	if (ipv4) {
+		skb->protocol = htons(ETH_P_IP);
+		iph = ip_hdr(skb);
+
+		if (!iph->check)
+			iph->check = ip_fast_csum((unsigned char *)iph,
+						  iph->ihl);
+	} else {
+		skb->protocol = htons(ETH_P_IPV6);
+	}
+
+	if (skb_is_gso(skb))
+		return handle_gso_encap(skb, ipv4, len);
+
+	return 0;
+}
+
 static int __init bpf_lwt_init(void)
 {
 	return lwtunnel_encap_add_ops(&bpf_encap_ops, LWTUNNEL_ENCAP_BPF);
-- 
cgit v1.2.3


From 9b0a6a9dbab0ae092d033e67dc2701e8a7b09cdb Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Wed, 13 Feb 2019 11:53:38 -0800
Subject: ipv6_stub: add ipv6_route_input stub/proxy.

Proxy ip6_route_input via ipv6_stub, for later use by lwt bpf ip encap
(see the next patch in the patchset).

Signed-off-by: Peter Oskolkov <posk@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/addrconf.h   | 1 +
 net/ipv6/addrconf_core.c | 6 ++++++
 net/ipv6/af_inet6.c      | 7 +++++++
 3 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 20d523ee2fec..269ec27385e9 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -248,6 +248,7 @@ struct ipv6_stub {
 				 const struct in6_addr *addr);
 	int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
 			       struct dst_entry **dst, struct flowi6 *fl6);
+	int (*ipv6_route_input)(struct sk_buff *skb);
 
 	struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
 	struct fib6_info *(*fib6_lookup)(struct net *net, int oif,
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5cd0029d930e..6c79af056d9b 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
 	return -EAFNOSUPPORT;
 }
 
+static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+{
+	return -EAFNOSUPPORT;
+}
+
 static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
 {
 	return NULL;
@@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
 
 const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
 	.ipv6_dst_lookup   = eafnosupport_ipv6_dst_lookup,
+	.ipv6_route_input  = eafnosupport_ipv6_route_input,
 	.fib6_get_table    = eafnosupport_fib6_get_table,
 	.fib6_table_lookup = eafnosupport_fib6_table_lookup,
 	.fib6_lookup       = eafnosupport_fib6_lookup,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d99753b5e39b..2f45d2a3e3a3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -900,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
 	.exit = inet6_net_exit,
 };
 
+static int ipv6_route_input(struct sk_buff *skb)
+{
+	ip6_route_input(skb);
+	return skb_dst(skb)->error;
+}
+
 static const struct ipv6_stub ipv6_stub_impl = {
 	.ipv6_sock_mc_join = ipv6_sock_mc_join,
 	.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
 	.ipv6_dst_lookup   = ip6_dst_lookup,
+	.ipv6_route_input  = ipv6_route_input,
 	.fib6_get_table	   = fib6_get_table,
 	.fib6_table_lookup = fib6_table_lookup,
 	.fib6_lookup       = fib6_lookup,
-- 
cgit v1.2.3


From 9f9dc493f724b311c84d88b8e77dad14abd06bcc Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Wed, 13 Feb 2019 00:23:52 +0000
Subject: flow_offload: fix block stats

With the introduction of flow_stats_update(), drivers now update the stats
fields of the passed tc_cls_flower_offload struct, rather than call
tcf_exts_stats_update() directly to update the stats of offloaded TC
flower rules. However, if multiple qdiscs are registered to a TC shared
block and a flower rule is applied, then, when getting stats for the rule,
multiple callbacks may be made.

Take this into consideration by modifying flow_stats_update to gather the
stats from all callbacks. Currently, the values in tc_cls_flower_offload
only account for the last stats callback in the list.

Fixes: 3b1903ef97c0 ("flow_offload: add statistics retrieval infrastructure and use it")
Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index a307ccb18015..d035183c8d03 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -195,9 +195,9 @@ struct flow_stats {
 static inline void flow_stats_update(struct flow_stats *flow_stats,
 				     u64 bytes, u64 pkts, u64 lastused)
 {
-	flow_stats->pkts	= pkts;
-	flow_stats->bytes	= bytes;
-	flow_stats->lastused	= lastused;
+	flow_stats->pkts	+= pkts;
+	flow_stats->bytes	+= bytes;
+	flow_stats->lastused	= max_t(u64, flow_stats->lastused, lastused);
 }
 
 #endif /* _NET_FLOW_OFFLOAD_H */
-- 
cgit v1.2.3


From c25fff7171bebb76243ccc77f0f04aafa3db87be Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 13 Feb 2019 02:55:40 +0100
Subject: mm: add dma_addr_t to struct page

The page_pool API is using page->private to store DMA addresses.
As pointed out by David Miller we can't use that on 32-bit architectures
with 64-bit DMA

This patch adds a new dma_addr_t struct to allow storing DMA addresses

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mm_types.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2c471a2c43fa..0a36a22228e7 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -95,6 +95,13 @@ struct page {
 			 */
 			unsigned long private;
 		};
+		struct {	/* page_pool used by netstack */
+			/**
+			 * @dma_addr: might require a 64-bit value even on
+			 * 32-bit architectures.
+			 */
+			dma_addr_t dma_addr;
+		};
 		struct {	/* slab, slob and slub */
 			union {
 				struct list_head slab_list;	/* uses lru */
-- 
cgit v1.2.3


From 4fa882c9f628b312d697cfcefaa6e973ce8ece3e Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Tue, 7 Aug 2018 12:07:43 +0200
Subject: eeprom: at24: remove at24_platform_data

There are no more users of at24_platform_data. Remove the relevant
header and modify the driver code to not use it anymore.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 MAINTAINERS                        |   1 -
 drivers/misc/eeprom/at24.c         | 162 +++++++++++++++++--------------------
 include/linux/platform_data/at24.h |  60 --------------
 3 files changed, 75 insertions(+), 148 deletions(-)
 delete mode 100644 include/linux/platform_data/at24.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9919840d54cd..d901919dd475 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2503,7 +2503,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/eeprom/at24.txt
 F:	drivers/misc/eeprom/at24.c
-F:	include/linux/platform_data/at24.h
 
 ATA OVER ETHERNET (AOE) DRIVER
 M:	"Ed L. Cashin" <ed.cashin@acm.org>
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index ddfcf4ade7bf..b806a403ca46 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -22,10 +22,24 @@
 #include <linux/i2c.h>
 #include <linux/nvmem-provider.h>
 #include <linux/regmap.h>
-#include <linux/platform_data/at24.h>
 #include <linux/pm_runtime.h>
 #include <linux/gpio/consumer.h>
 
+/* Address pointer is 16 bit. */
+#define AT24_FLAG_ADDR16	BIT(7)
+/* sysfs-entry will be read-only. */
+#define AT24_FLAG_READONLY	BIT(6)
+/* sysfs-entry will be world-readable. */
+#define AT24_FLAG_IRUGO		BIT(5)
+/* Take always 8 addresses (24c00). */
+#define AT24_FLAG_TAKE8ADDR	BIT(4)
+/* Factory-programmed serial number. */
+#define AT24_FLAG_SERIAL	BIT(3)
+/* Factory-programmed mac address. */
+#define AT24_FLAG_MAC		BIT(2)
+/* Does not auto-rollover reads to the next slave address. */
+#define AT24_FLAG_NO_RDROL	BIT(1)
+
 /*
  * I2C EEPROMs from most vendors are inexpensive and mostly interchangeable.
  * Differences between different vendor product lines (like Atmel AT24C or
@@ -107,10 +121,6 @@ module_param_named(write_timeout, at24_write_timeout, uint, 0);
 MODULE_PARM_DESC(at24_write_timeout, "Time (in ms) to try writes (default 25)");
 
 struct at24_chip_data {
-	/*
-	 * these fields mirror their equivalents in
-	 * struct at24_platform_data
-	 */
 	u32 byte_len;
 	u8 flags;
 };
@@ -471,63 +481,11 @@ static int at24_write(void *priv, unsigned int off, void *val, size_t count)
 	return 0;
 }
 
-static void at24_properties_to_pdata(struct device *dev,
-				     struct at24_platform_data *chip)
-{
-	int err;
-	u32 val;
-
-	if (device_property_present(dev, "read-only"))
-		chip->flags |= AT24_FLAG_READONLY;
-	if (device_property_present(dev, "no-read-rollover"))
-		chip->flags |= AT24_FLAG_NO_RDROL;
-
-	err = device_property_read_u32(dev, "address-width", &val);
-	if (!err) {
-		switch (val) {
-		case 8:
-			if (chip->flags & AT24_FLAG_ADDR16)
-				dev_warn(dev, "Override address width to be 8, while default is 16\n");
-			chip->flags &= ~AT24_FLAG_ADDR16;
-			break;
-		case 16:
-			chip->flags |= AT24_FLAG_ADDR16;
-			break;
-		default:
-			dev_warn(dev, "Bad \"address-width\" property: %u\n",
-				 val);
-		}
-	}
-
-	err = device_property_read_u32(dev, "size", &val);
-	if (!err)
-		chip->byte_len = val;
-
-	err = device_property_read_u32(dev, "pagesize", &val);
-	if (!err) {
-		chip->page_size = val;
-	} else {
-		/*
-		 * This is slow, but we can't know all eeproms, so we better
-		 * play safe. Specifying custom eeprom-types via platform_data
-		 * is recommended anyhow.
-		 */
-		chip->page_size = 1;
-	}
-}
-
-static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
+static const struct at24_chip_data *at24_get_chip_data(struct device *dev)
 {
 	struct device_node *of_node = dev->of_node;
 	const struct at24_chip_data *cdata;
 	const struct i2c_device_id *id;
-	struct at24_platform_data *pd;
-
-	pd = dev_get_platdata(dev);
-	if (pd) {
-		memcpy(pdata, pd, sizeof(*pdata));
-		return 0;
-	}
 
 	id = i2c_match_id(at24_ids, to_i2c_client(dev));
 
@@ -544,13 +502,9 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata)
 		cdata = acpi_device_get_match_data(dev);
 
 	if (!cdata)
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
-	pdata->byte_len = cdata->byte_len;
-	pdata->flags = cdata->flags;
-	at24_properties_to_pdata(dev, pdata);
-
-	return 0;
+	return cdata;
 }
 
 static void at24_remove_dummy_clients(struct at24_data *at24)
@@ -619,7 +573,8 @@ static int at24_probe(struct i2c_client *client)
 {
 	struct regmap_config regmap_config = { };
 	struct nvmem_config nvmem_config = { };
-	struct at24_platform_data pdata = { };
+	u32 byte_len, page_size, flags, addrw;
+	const struct at24_chip_data *cdata;
 	struct device *dev = &client->dev;
 	bool i2c_fn_i2c, i2c_fn_block;
 	unsigned int i, num_addresses;
@@ -634,35 +589,72 @@ static int at24_probe(struct i2c_client *client)
 	i2c_fn_block = i2c_check_functionality(client->adapter,
 					       I2C_FUNC_SMBUS_WRITE_I2C_BLOCK);
 
-	err = at24_get_pdata(dev, &pdata);
+	cdata = at24_get_chip_data(dev);
+	if (IS_ERR(cdata))
+		return PTR_ERR(cdata);
+
+	err = device_property_read_u32(dev, "pagesize", &page_size);
 	if (err)
-		return err;
+		/*
+		 * This is slow, but we can't know all eeproms, so we better
+		 * play safe. Specifying custom eeprom-types via platform_data
+		 * is recommended anyhow.
+		 */
+		page_size = 1;
+
+	flags = cdata->flags;
+	if (device_property_present(dev, "read-only"))
+		flags |= AT24_FLAG_READONLY;
+	if (device_property_present(dev, "no-read-rollover"))
+		flags |= AT24_FLAG_NO_RDROL;
+
+	err = device_property_read_u32(dev, "address-width", &addrw);
+	if (!err) {
+		switch (addrw) {
+		case 8:
+			if (flags & AT24_FLAG_ADDR16)
+				dev_warn(dev,
+					 "Override address width to be 8, while default is 16\n");
+			flags &= ~AT24_FLAG_ADDR16;
+			break;
+		case 16:
+			flags |= AT24_FLAG_ADDR16;
+			break;
+		default:
+			dev_warn(dev, "Bad \"address-width\" property: %u\n",
+				 addrw);
+		}
+	}
+
+	err = device_property_read_u32(dev, "size", &byte_len);
+	if (err)
+		byte_len = cdata->byte_len;
 
 	if (!i2c_fn_i2c && !i2c_fn_block)
-		pdata.page_size = 1;
+		page_size = 1;
 
-	if (!pdata.page_size) {
+	if (!page_size) {
 		dev_err(dev, "page_size must not be 0!\n");
 		return -EINVAL;
 	}
 
-	if (!is_power_of_2(pdata.page_size))
+	if (!is_power_of_2(page_size))
 		dev_warn(dev, "page_size looks suspicious (no power of 2)!\n");
 
-	if (pdata.flags & AT24_FLAG_TAKE8ADDR)
+	if (flags & AT24_FLAG_TAKE8ADDR)
 		num_addresses = 8;
 	else
-		num_addresses =	DIV_ROUND_UP(pdata.byte_len,
-			(pdata.flags & AT24_FLAG_ADDR16) ? 65536 : 256);
+		num_addresses =	DIV_ROUND_UP(byte_len,
+			(flags & AT24_FLAG_ADDR16) ? 65536 : 256);
 
-	if ((pdata.flags & AT24_FLAG_SERIAL) && (pdata.flags & AT24_FLAG_MAC)) {
+	if ((flags & AT24_FLAG_SERIAL) && (flags & AT24_FLAG_MAC)) {
 		dev_err(dev,
 			"invalid device data - cannot have both AT24_FLAG_SERIAL & AT24_FLAG_MAC.");
 		return -EINVAL;
 	}
 
 	regmap_config.val_bits = 8;
-	regmap_config.reg_bits = (pdata.flags & AT24_FLAG_ADDR16) ? 16 : 8;
+	regmap_config.reg_bits = (flags & AT24_FLAG_ADDR16) ? 16 : 8;
 	regmap_config.disable_locking = true;
 
 	regmap = devm_regmap_init_i2c(client, &regmap_config);
@@ -675,11 +667,11 @@ static int at24_probe(struct i2c_client *client)
 		return -ENOMEM;
 
 	mutex_init(&at24->lock);
-	at24->byte_len = pdata.byte_len;
-	at24->page_size = pdata.page_size;
-	at24->flags = pdata.flags;
+	at24->byte_len = byte_len;
+	at24->page_size = page_size;
+	at24->flags = flags;
 	at24->num_addresses = num_addresses;
-	at24->offset_adj = at24_get_offset_adj(pdata.flags, pdata.byte_len);
+	at24->offset_adj = at24_get_offset_adj(flags, byte_len);
 	at24->client[0].client = client;
 	at24->client[0].regmap = regmap;
 
@@ -687,10 +679,10 @@ static int at24_probe(struct i2c_client *client)
 	if (IS_ERR(at24->wp_gpio))
 		return PTR_ERR(at24->wp_gpio);
 
-	writable = !(pdata.flags & AT24_FLAG_READONLY);
+	writable = !(flags & AT24_FLAG_READONLY);
 	if (writable) {
 		at24->write_max = min_t(unsigned int,
-					pdata.page_size, at24_io_limit);
+					page_size, at24_io_limit);
 		if (!i2c_fn_i2c && at24->write_max > I2C_SMBUS_BLOCK_MAX)
 			at24->write_max = I2C_SMBUS_BLOCK_MAX;
 	}
@@ -733,7 +725,7 @@ static int at24_probe(struct i2c_client *client)
 	nvmem_config.priv = at24;
 	nvmem_config.stride = 1;
 	nvmem_config.word_size = 1;
-	nvmem_config.size = pdata.byte_len;
+	nvmem_config.size = byte_len;
 
 	at24->nvmem = devm_nvmem_register(dev, &nvmem_config);
 	if (IS_ERR(at24->nvmem)) {
@@ -742,13 +734,9 @@ static int at24_probe(struct i2c_client *client)
 	}
 
 	dev_info(dev, "%u byte %s EEPROM, %s, %u bytes/write\n",
-		 pdata.byte_len, client->name,
+		 byte_len, client->name,
 		 writable ? "writable" : "read-only", at24->write_max);
 
-	/* export data to kernel code */
-	if (pdata.setup)
-		pdata.setup(at24->nvmem, pdata.context);
-
 	return 0;
 
 err_clients:
diff --git a/include/linux/platform_data/at24.h b/include/linux/platform_data/at24.h
deleted file mode 100644
index 63507ff464ee..000000000000
--- a/include/linux/platform_data/at24.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * at24.h - platform_data for the at24 (generic eeprom) driver
- * (C) Copyright 2008 by Pengutronix
- * (C) Copyright 2012 by Wolfram Sang
- * same license as the driver
- */
-
-#ifndef _LINUX_AT24_H
-#define _LINUX_AT24_H
-
-#include <linux/types.h>
-#include <linux/nvmem-consumer.h>
-#include <linux/bitops.h>
-
-/**
- * struct at24_platform_data - data to set up at24 (generic eeprom) driver
- * @byte_len: size of eeprom in byte
- * @page_size: number of byte which can be written in one go
- * @flags: tunable options, check AT24_FLAG_* defines
- * @setup: an optional callback invoked after eeprom is probed; enables kernel
-	code to access eeprom via nvmem, see example
- * @context: optional parameter passed to setup()
- *
- * If you set up a custom eeprom type, please double-check the parameters.
- * Especially page_size needs extra care, as you risk data loss if your value
- * is bigger than what the chip actually supports!
- *
- * An example in pseudo code for a setup() callback:
- *
- * void get_mac_addr(struct nvmem_device *nvmem, void *context)
- * {
- *	u8 *mac_addr = ethernet_pdata->mac_addr;
- *	off_t offset = context;
- *
- *	// Read MAC addr from EEPROM
- *	if (nvmem_device_read(nvmem, offset, ETH_ALEN, mac_addr) == ETH_ALEN)
- *		pr_info("Read MAC addr from EEPROM: %pM\n", mac_addr);
- * }
- *
- * This function pointer and context can now be set up in at24_platform_data.
- */
-
-struct at24_platform_data {
-	u32		byte_len;		/* size (sum of all addr) */
-	u16		page_size;		/* for writes */
-	u8		flags;
-#define AT24_FLAG_ADDR16	BIT(7)	/* address pointer is 16 bit */
-#define AT24_FLAG_READONLY	BIT(6)	/* sysfs-entry will be read-only */
-#define AT24_FLAG_IRUGO		BIT(5)	/* sysfs-entry will be world-readable */
-#define AT24_FLAG_TAKE8ADDR	BIT(4)	/* take always 8 addresses (24c00) */
-#define AT24_FLAG_SERIAL	BIT(3)	/* factory-programmed serial number */
-#define AT24_FLAG_MAC		BIT(2)	/* factory-programmed mac address */
-#define AT24_FLAG_NO_RDROL	BIT(1)	/* does not auto-rollover reads to */
-					/* the next slave address */
-
-	void		(*setup)(struct nvmem_device *nvmem, void *context);
-	void		*context;
-};
-
-#endif /* _LINUX_AT24_H */
-- 
cgit v1.2.3


From fc36ffda326706b21f70a4aff0c77d9bc94c4f0a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 5 Dec 2018 11:33:34 +0100
Subject: iwlwifi: mvm: support FTM initiator

Add support for FTM initiator, i.e. peer measurements with FTM
if the firmware supports FTM.

Additionally, add two defines we depend on in
include/linux/ieee80211.h.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
---
 .../net/wireless/intel/iwlwifi/fw/api/location.h   |  10 +-
 drivers/net/wireless/intel/iwlwifi/mvm/Makefile    |   3 +-
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |   3 +
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 459 +++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c  |  56 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h       |  16 +
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c       |   7 +
 include/linux/ieee80211.h                          |   2 +
 8 files changed, 552 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c

(limited to 'include')

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index 6da91ec0df55..10cac5f987e7 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,6 +29,7 @@
  *
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
  * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -403,7 +405,10 @@ enum iwl_tof_response_mode {
  * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_A: use antenna A fo TX ACKs during FTM
  * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_B: use antenna B fo TX ACKs during FTM
  * @IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_C: use antenna C fo TX ACKs during FTM
- * @IWL_TOF_INITIATOR_FLAGS_MINDELTA_NO_PREF: no preference for minDeltaFTM
+ * @IWL_TOF_INITIATOR_FLAGS_SPECIFIC_CALIB: use the specific calib value from
+ *	the range request command
+ * @IWL_TOF_INITIATOR_FLAGS_COMMON_CALIB: use the common calib value from the
+ *	ragne request command
  */
 enum iwl_tof_initiator_flags {
 	IWL_TOF_INITIATOR_FLAGS_FAST_ALGO_DISABLED = BIT(0),
@@ -413,7 +418,8 @@ enum iwl_tof_initiator_flags {
 	IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_A = BIT(4),
 	IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_B = BIT(5),
 	IWL_TOF_INITIATOR_FLAGS_TX_CHAIN_SEL_C = BIT(6),
-	IWL_TOF_INITIATOR_FLAGS_MINDELTA_NO_PREF = BIT(7),
+	IWL_TOF_INITIATOR_FLAGS_SPECIFIC_CALIB = BIT(15),
+	IWL_TOF_INITIATOR_FLAGS_COMMON_CALIB   = BIT(16),
 }; /* LOCATION_RANGE_REQ_CMD_API_S_VER_5 */
 
 #define IWL_MVM_TOF_MAX_APS 5
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
index 56e8d073f5aa..dd268c4bd371 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/Makefile
@@ -4,7 +4,8 @@ iwlmvm-y += fw.o mac80211.o nvm.o ops.o phy-ctxt.o mac-ctxt.o
 iwlmvm-y += utils.o rx.o rxmq.o tx.o binding.o quota.o sta.o sf.o
 iwlmvm-y += scan.o time-event.o rs.o rs-fw.o
 iwlmvm-y += power.o coex.o
-iwlmvm-y += tt.o offloading.o tdls.o ftm-responder.o
+iwlmvm-y += tt.o offloading.o tdls.o
+iwlmvm-y += ftm-responder.o ftm-initiator.o
 iwlmvm-$(CONFIG_IWLWIFI_DEBUGFS) += debugfs.o debugfs-vif.o
 iwlmvm-$(CONFIG_IWLWIFI_LEDS) += led.o
 iwlmvm-$(CONFIG_PM) += d3.o
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index d96ada3c06fc..58e29af12a14 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -63,6 +63,7 @@
 #define __MVM_CONSTANTS_H
 
 #include <linux/ieee80211.h>
+#include "fw-api.h"
 
 #define IWL_MVM_UAPSD_NOAGG_BSSIDS_NUM		20
 
@@ -145,5 +146,7 @@
 #define IWL_MVM_RS_TPC_SR_NO_INCREASE		85	/* percent */
 #define IWL_MVM_RS_TPC_TX_POWER_STEP		3
 #define IWL_MVM_ENABLE_EBS			1
+#define IWL_MVM_FTM_INITIATOR_ALGO		IWL_TOF_ALGO_TYPE_MAX_LIKE
+#define IWL_MVM_FTM_INITIATOR_DYNACK		true
 
 #endif /* __MVM_CONSTANTS_H */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
new file mode 100644
index 000000000000..eb6f084a0f8a
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -0,0 +1,459 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ * Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2019 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#include <linux/etherdevice.h>
+#include <linux/math64.h>
+#include <net/cfg80211.h>
+#include "mvm.h"
+#include "iwl-io.h"
+#include "iwl-prph.h"
+#include "constants.h"
+
+struct iwl_mvm_loc_entry {
+	struct list_head list;
+	u8 addr[ETH_ALEN];
+	u8 lci_len, civic_len;
+	u8 buf[];
+};
+
+static void iwl_mvm_ftm_reset(struct iwl_mvm *mvm)
+{
+	struct iwl_mvm_loc_entry *e, *t;
+
+	mvm->ftm_initiator.req = NULL;
+	mvm->ftm_initiator.req_wdev = NULL;
+	memset(mvm->ftm_initiator.responses, 0,
+	       sizeof(mvm->ftm_initiator.responses));
+	list_for_each_entry_safe(e, t, &mvm->ftm_initiator.loc_list, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+}
+
+void iwl_mvm_ftm_restart(struct iwl_mvm *mvm)
+{
+	struct cfg80211_pmsr_result result = {
+		.status = NL80211_PMSR_STATUS_FAILURE,
+		.final = 1,
+		.host_time = ktime_get_boot_ns(),
+		.type = NL80211_PMSR_TYPE_FTM,
+	};
+	int i;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (!mvm->ftm_initiator.req)
+		return;
+
+	for (i = 0; i < mvm->ftm_initiator.req->n_peers; i++) {
+		memcpy(result.addr, mvm->ftm_initiator.req->peers[i].addr,
+		       ETH_ALEN);
+		result.ftm.burst_index = mvm->ftm_initiator.responses[i];
+
+		cfg80211_pmsr_report(mvm->ftm_initiator.req_wdev,
+				     mvm->ftm_initiator.req,
+				     &result, GFP_KERNEL);
+	}
+
+	cfg80211_pmsr_complete(mvm->ftm_initiator.req_wdev,
+			       mvm->ftm_initiator.req, GFP_KERNEL);
+	iwl_mvm_ftm_reset(mvm);
+}
+
+static int
+iwl_ftm_range_request_status_to_err(enum iwl_tof_range_request_status s)
+{
+	switch (s) {
+	case IWL_TOF_RANGE_REQUEST_STATUS_SUCCESS:
+		return 0;
+	case IWL_TOF_RANGE_REQUEST_STATUS_BUSY:
+		return -EBUSY;
+	default:
+		WARN_ON_ONCE(1);
+		return -EIO;
+	}
+}
+
+int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+		      struct cfg80211_pmsr_request *req)
+{
+	struct iwl_tof_range_req_cmd cmd = {
+		.request_id = req->cookie,
+		.req_timeout = DIV_ROUND_UP(req->timeout, 100),
+		.num_of_ap = req->n_peers,
+		/*
+		 * We treat it always as random, since if not we'll
+		 * have filled our local address there instead.
+		 */
+		.macaddr_random = 1,
+	};
+	struct iwl_host_cmd hcmd = {
+		.id = iwl_cmd_id(TOF_RANGE_REQ_CMD, LOCATION_GROUP, 0),
+		.data[0] = &cmd,
+		.len[0] = sizeof(cmd),
+		.dataflags[0] = IWL_HCMD_DFL_DUP,
+	};
+	u32 status = 0;
+	int err, i;
+
+	/* use maximum for "no timeout" or bigger than what we can do */
+	if (!req->timeout || req->timeout > 255 * 100)
+		cmd.req_timeout = 255;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (mvm->ftm_initiator.req)
+		return -EBUSY;
+
+	memcpy(cmd.macaddr_template, req->mac_addr, ETH_ALEN);
+	for (i = 0; i < ETH_ALEN; i++)
+		cmd.macaddr_mask[i] = ~req->mac_addr_mask[i];
+
+	for (i = 0; i < cmd.num_of_ap; i++) {
+		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+		struct iwl_tof_range_req_ap_entry *cmd_target = &cmd.ap[i];
+		u32 freq = peer->chandef.chan->center_freq;
+
+		cmd_target->channel_num = ieee80211_frequency_to_channel(freq);
+		switch (peer->chandef.width) {
+		case NL80211_CHAN_WIDTH_20_NOHT:
+			cmd_target->bandwidth = IWL_TOF_BW_20_LEGACY;
+			break;
+		case NL80211_CHAN_WIDTH_20:
+			cmd_target->bandwidth = IWL_TOF_BW_20_HT;
+			break;
+		case NL80211_CHAN_WIDTH_40:
+			cmd_target->bandwidth = IWL_TOF_BW_40;
+			break;
+		case NL80211_CHAN_WIDTH_80:
+			cmd_target->bandwidth = IWL_TOF_BW_80;
+			break;
+		default:
+			IWL_ERR(mvm, "Unsupported BW in FTM request (%d)\n",
+				peer->chandef.width);
+			return -EINVAL;
+		}
+		cmd_target->ctrl_ch_position =
+			(peer->chandef.width > NL80211_CHAN_WIDTH_20) ?
+			iwl_mvm_get_ctrl_pos(&peer->chandef) : 0;
+
+		memcpy(cmd_target->bssid, peer->addr, ETH_ALEN);
+		cmd_target->measure_type = 0; /* regular two-sided FTM */
+		cmd_target->num_of_bursts = peer->ftm.num_bursts_exp;
+		cmd_target->burst_period =
+			cpu_to_le16(peer->ftm.burst_period);
+		cmd_target->samples_per_burst = peer->ftm.ftms_per_burst;
+		cmd_target->retries_per_sample = peer->ftm.ftmr_retries;
+		cmd_target->asap_mode = peer->ftm.asap;
+		cmd_target->enable_dyn_ack = IWL_MVM_FTM_INITIATOR_DYNACK;
+
+		if (peer->ftm.request_lci)
+			cmd_target->location_req |= IWL_TOF_LOC_LCI;
+		if (peer->ftm.request_civicloc)
+			cmd_target->location_req |= IWL_TOF_LOC_CIVIC;
+
+		cmd_target->algo_type = IWL_MVM_FTM_INITIATOR_ALGO;
+	}
+
+	if (vif->bss_conf.assoc)
+		memcpy(cmd.range_req_bssid, vif->bss_conf.bssid, ETH_ALEN);
+	else
+		eth_broadcast_addr(cmd.range_req_bssid);
+
+	err = iwl_mvm_send_cmd_status(mvm, &hcmd, &status);
+	if (!err && status) {
+		IWL_ERR(mvm, "FTM range request command failure, status: %u\n",
+			status);
+		err = iwl_ftm_range_request_status_to_err(status);
+	}
+
+	if (!err) {
+		mvm->ftm_initiator.req = req;
+		mvm->ftm_initiator.req_wdev = ieee80211_vif_to_wdev(vif);
+	}
+
+	return err;
+}
+
+void iwl_mvm_ftm_abort(struct iwl_mvm *mvm, struct cfg80211_pmsr_request *req)
+{
+	struct iwl_tof_range_abort_cmd cmd = {
+		.request_id = req->cookie,
+	};
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (req != mvm->ftm_initiator.req)
+		return;
+
+	if (iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(TOF_RANGE_ABORT_CMD,
+						 LOCATION_GROUP, 0),
+				 0, sizeof(cmd), &cmd))
+		IWL_ERR(mvm, "failed to abort FTM process\n");
+}
+
+static int iwl_mvm_ftm_find_peer(struct cfg80211_pmsr_request *req,
+				 const u8 *addr)
+{
+	int i;
+
+	for (i = 0; i < req->n_peers; i++) {
+		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+
+		if (ether_addr_equal_unaligned(peer->addr, addr))
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+static u64 iwl_mvm_ftm_get_host_time(struct iwl_mvm *mvm, __le32 fw_gp2_ts)
+{
+	u32 gp2_ts = le32_to_cpu(fw_gp2_ts);
+	u32 curr_gp2, diff;
+	u64 now_from_boot_ns;
+
+	iwl_mvm_get_sync_time(mvm, &curr_gp2, &now_from_boot_ns);
+
+	if (curr_gp2 >= gp2_ts)
+		diff = curr_gp2 - gp2_ts;
+	else
+		diff = curr_gp2 + (U32_MAX - gp2_ts + 1);
+
+	return now_from_boot_ns - (u64)diff * 1000;
+}
+
+static void iwl_mvm_ftm_get_lci_civic(struct iwl_mvm *mvm,
+				      struct cfg80211_pmsr_result *res)
+{
+	struct iwl_mvm_loc_entry *entry;
+
+	list_for_each_entry(entry, &mvm->ftm_initiator.loc_list, list) {
+		if (!ether_addr_equal_unaligned(res->addr, entry->addr))
+			continue;
+
+		if (entry->lci_len) {
+			res->ftm.lci_len = entry->lci_len;
+			res->ftm.lci = entry->buf;
+		}
+
+		if (entry->civic_len) {
+			res->ftm.civicloc_len = entry->civic_len;
+			res->ftm.civicloc = entry->buf + entry->lci_len;
+		}
+
+		/* we found the entry we needed */
+		break;
+	}
+}
+
+void iwl_mvm_ftm_range_resp(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	struct iwl_tof_range_rsp_ntfy *fw_resp = (void *)pkt->data;
+	int i;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	if (!mvm->ftm_initiator.req) {
+		IWL_ERR(mvm, "Got FTM response but have no request?\n");
+		return;
+	}
+
+	if (fw_resp->request_id != (u8)mvm->ftm_initiator.req->cookie) {
+		IWL_ERR(mvm, "Request ID mismatch, got %u, active %u\n",
+			fw_resp->request_id,
+			(u8)mvm->ftm_initiator.req->cookie);
+		return;
+	}
+
+	if (fw_resp->num_of_aps > mvm->ftm_initiator.req->n_peers) {
+		IWL_ERR(mvm, "FTM range response invalid\n");
+		return;
+	}
+
+	for (i = 0; i < fw_resp->num_of_aps && i < IWL_MVM_TOF_MAX_APS; i++) {
+		struct iwl_tof_range_rsp_ap_entry_ntfy *fw_ap = &fw_resp->ap[i];
+		struct cfg80211_pmsr_result result = {};
+		int peer_idx;
+
+		peer_idx = iwl_mvm_ftm_find_peer(mvm->ftm_initiator.req,
+						 fw_ap->bssid);
+		if (peer_idx < 0) {
+			IWL_WARN(mvm,
+				 "Unknown address (%pM, target #%d) in FTM response.\n",
+				 fw_ap->bssid, i);
+			continue;
+		}
+
+		switch (fw_ap->measure_status) {
+		case IWL_TOF_ENTRY_SUCCESS:
+			result.status = NL80211_PMSR_STATUS_SUCCESS;
+			break;
+		case IWL_TOF_ENTRY_TIMING_MEASURE_TIMEOUT:
+			result.status = NL80211_PMSR_STATUS_TIMEOUT;
+			break;
+		case IWL_TOF_ENTRY_NO_RESPONSE:
+			result.status = NL80211_PMSR_STATUS_FAILURE;
+			result.ftm.failure_reason =
+				NL80211_PMSR_FTM_FAILURE_NO_RESPONSE;
+			break;
+		case IWL_TOF_ENTRY_REQUEST_REJECTED:
+			result.status = NL80211_PMSR_STATUS_FAILURE;
+			result.ftm.failure_reason =
+				NL80211_PMSR_FTM_FAILURE_PEER_BUSY;
+			result.ftm.busy_retry_time = fw_ap->refusal_period;
+			break;
+		default:
+			result.status = NL80211_PMSR_STATUS_FAILURE;
+			result.ftm.failure_reason =
+				NL80211_PMSR_FTM_FAILURE_UNSPECIFIED;
+			break;
+		}
+		memcpy(result.addr, fw_ap->bssid, ETH_ALEN);
+		result.host_time = iwl_mvm_ftm_get_host_time(mvm,
+							     fw_ap->timestamp);
+		result.type = NL80211_PMSR_TYPE_FTM;
+		result.ftm.burst_index = mvm->ftm_initiator.responses[peer_idx];
+		mvm->ftm_initiator.responses[peer_idx]++;
+		/*
+		 * FIXME: the firmware needs to report this, we don't even know
+		 *        the number of bursts the responder picked (if we asked
+		 *        it to)
+		 */
+		result.final = 0;
+		result.ftm.rssi_avg = fw_ap->rssi;
+		result.ftm.rssi_avg_valid = 1;
+		result.ftm.rssi_spread = fw_ap->rssi_spread;
+		result.ftm.rssi_spread_valid = 1;
+		result.ftm.rtt_avg = (s32)le32_to_cpu(fw_ap->rtt);
+		result.ftm.rtt_avg_valid = 1;
+		result.ftm.rtt_variance = le32_to_cpu(fw_ap->rtt_variance);
+		result.ftm.rtt_variance_valid = 1;
+		result.ftm.rtt_spread = le32_to_cpu(fw_ap->rtt_spread);
+		result.ftm.rtt_spread_valid = 1;
+
+		iwl_mvm_ftm_get_lci_civic(mvm, &result);
+
+		cfg80211_pmsr_report(mvm->ftm_initiator.req_wdev,
+				     mvm->ftm_initiator.req,
+				     &result, GFP_KERNEL);
+	}
+
+	if (fw_resp->last_in_batch) {
+		cfg80211_pmsr_complete(mvm->ftm_initiator.req_wdev,
+				       mvm->ftm_initiator.req,
+				       GFP_KERNEL);
+		iwl_mvm_ftm_reset(mvm);
+	}
+}
+
+void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	const struct ieee80211_mgmt *mgmt = (void *)pkt->data;
+	size_t len = iwl_rx_packet_payload_len(pkt);
+	struct iwl_mvm_loc_entry *entry;
+	const u8 *ies, *lci, *civic, *msr_ie;
+	size_t ies_len, lci_len = 0, civic_len = 0;
+	size_t baselen = IEEE80211_MIN_ACTION_SIZE +
+			 sizeof(mgmt->u.action.u.ftm);
+	static const u8 rprt_type_lci = IEEE80211_SPCT_MSR_RPRT_TYPE_LCI;
+	static const u8 rprt_type_civic = IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC;
+
+	if (len <= baselen)
+		return;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	ies = mgmt->u.action.u.ftm.variable;
+	ies_len = len - baselen;
+
+	msr_ie = cfg80211_find_ie_match(WLAN_EID_MEASURE_REPORT, ies, ies_len,
+					&rprt_type_lci, 1, 4);
+	if (msr_ie) {
+		lci = msr_ie + 2;
+		lci_len = msr_ie[1];
+	}
+
+	msr_ie = cfg80211_find_ie_match(WLAN_EID_MEASURE_REPORT, ies, ies_len,
+					&rprt_type_civic, 1, 4);
+	if (msr_ie) {
+		civic = msr_ie + 2;
+		civic_len = msr_ie[1];
+	}
+
+	entry = kmalloc(sizeof(*entry) + lci_len + civic_len, GFP_KERNEL);
+	if (!entry)
+		return;
+
+	memcpy(entry->addr, mgmt->bssid, ETH_ALEN);
+
+	entry->lci_len = lci_len;
+	if (lci_len)
+		memcpy(entry->buf, lci, lci_len);
+
+	entry->civic_len = civic_len;
+	if (civic_len)
+		memcpy(entry->buf + lci_len, civic, civic_len);
+
+	list_add_tail(&entry->list, &mvm->ftm_initiator.loc_list);
+}
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index cba1a0fe33ca..9377fca39edf 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -184,6 +184,29 @@ static const struct iwl_fw_bcast_filter iwl_mvm_default_bcast_filters[] = {
 };
 #endif
 
+static const struct cfg80211_pmsr_capabilities iwl_mvm_pmsr_capa = {
+	.max_peers = IWL_MVM_TOF_MAX_APS,
+	.report_ap_tsf = 1,
+	.randomize_mac_addr = 1,
+
+	.ftm = {
+		.supported = 1,
+		.asap = 1,
+		.non_asap = 1,
+		.request_lci = 1,
+		.request_civicloc = 1,
+		.max_bursts_exponent = -1, /* all supported */
+		.max_ftms_per_burst = 0, /* no limits */
+		.bandwidths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
+			      BIT(NL80211_CHAN_WIDTH_20) |
+			      BIT(NL80211_CHAN_WIDTH_40) |
+			      BIT(NL80211_CHAN_WIDTH_80),
+		.preambles = BIT(NL80211_PREAMBLE_LEGACY) |
+			     BIT(NL80211_PREAMBLE_HT) |
+			     BIT(NL80211_PREAMBLE_VHT),
+	},
+};
+
 void iwl_mvm_ref(struct iwl_mvm *mvm, enum iwl_mvm_ref_type ref_type)
 {
 	if (!iwl_mvm_is_d0i3_supported(mvm))
@@ -549,9 +572,11 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	}
 
 	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_FTM_CALIBRATED))
+			IWL_UCODE_TLV_CAPA_FTM_CALIBRATED)) {
 		wiphy_ext_feature_set(hw->wiphy,
 				      NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER);
+		hw->wiphy->pmsr_capa = &iwl_mvm_pmsr_capa;
+	}
 
 	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
 	hw->wiphy->features |=
@@ -1186,6 +1211,8 @@ static void iwl_mvm_restart_cleanup(struct iwl_mvm *mvm)
 	iwl_mvm_cleanup_roc_te(mvm);
 	ieee80211_remain_on_channel_expired(mvm->hw);
 
+	iwl_mvm_ftm_restart(mvm);
+
 	/*
 	 * cleanup all interfaces, even inactive ones, as some might have
 	 * gone down during the HW restart
@@ -4895,6 +4922,31 @@ iwl_mvm_mac_get_ftm_responder_stats(struct ieee80211_hw *hw,
 	return 0;
 }
 
+static int iwl_mvm_start_pmsr(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif,
+			      struct cfg80211_pmsr_request *request)
+{
+	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+	int ret;
+
+	mutex_lock(&mvm->mutex);
+	ret = iwl_mvm_ftm_start(mvm, vif, request);
+	mutex_unlock(&mvm->mutex);
+
+	return ret;
+}
+
+static void iwl_mvm_abort_pmsr(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif,
+			       struct cfg80211_pmsr_request *request)
+{
+	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
+
+	mutex_lock(&mvm->mutex);
+	iwl_mvm_ftm_abort(mvm, request);
+	mutex_unlock(&mvm->mutex);
+}
+
 static bool iwl_mvm_can_hw_csum(struct sk_buff *skb)
 {
 	u8 protocol = ip_hdr(skb)->protocol;
@@ -4998,6 +5050,8 @@ const struct ieee80211_ops iwl_mvm_hw_ops = {
 	.get_survey = iwl_mvm_mac_get_survey,
 	.sta_statistics = iwl_mvm_mac_sta_statistics,
 	.get_ftm_responder_stats = iwl_mvm_mac_get_ftm_responder_stats,
+	.start_pmsr = iwl_mvm_start_pmsr,
+	.abort_pmsr = iwl_mvm_abort_pmsr,
 
 	.can_aggregate_in_amsdu = iwl_mvm_mac_can_aggregate,
 #ifdef CONFIG_IWLWIFI_DEBUGFS
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 2bd330a093fb..e9873fc7bd2b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1151,6 +1151,12 @@ struct iwl_mvm {
 	struct ieee80211_cipher_scheme cs[IWL_UCODE_MAX_CS];
 
 	struct cfg80211_ftm_responder_stats ftm_resp_stats;
+	struct {
+		struct cfg80211_pmsr_request *req;
+		struct wireless_dev *req_wdev;
+		struct list_head loc_list;
+		int responses[IWL_MVM_TOF_MAX_APS];
+	} ftm_initiator;
 
 	struct ieee80211_vif *nan_vif;
 #define IWL_MAX_BAID	32
@@ -2069,6 +2075,16 @@ void iwl_mvm_ftm_restart_responder(struct iwl_mvm *mvm,
 void iwl_mvm_ftm_responder_stats(struct iwl_mvm *mvm,
 				 struct iwl_rx_cmd_buffer *rxb);
 
+/* FTM initiator */
+void iwl_mvm_ftm_restart(struct iwl_mvm *mvm);
+void iwl_mvm_ftm_range_resp(struct iwl_mvm *mvm,
+			    struct iwl_rx_cmd_buffer *rxb);
+void iwl_mvm_ftm_lc_notif(struct iwl_mvm *mvm,
+			  struct iwl_rx_cmd_buffer *rxb);
+int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+		      struct cfg80211_pmsr_request *request);
+void iwl_mvm_ftm_abort(struct iwl_mvm *mvm, struct cfg80211_pmsr_request *req);
+
 /* TDLS */
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index d5644d252fe0..0c276124bf0f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -302,6 +302,12 @@ static const struct iwl_rx_handlers iwl_mvm_rx_handlers[] = {
 		   RX_HANDLER_SYNC),
 	RX_HANDLER_GRP(LOCATION_GROUP, TOF_RESPONDER_STATS,
 		       iwl_mvm_ftm_responder_stats, RX_HANDLER_ASYNC_LOCKED),
+
+	RX_HANDLER_GRP(LOCATION_GROUP, TOF_RANGE_RESPONSE_NOTIF,
+		       iwl_mvm_ftm_range_resp, RX_HANDLER_ASYNC_LOCKED),
+	RX_HANDLER_GRP(LOCATION_GROUP, TOF_LC_NOTIF,
+		       iwl_mvm_ftm_lc_notif, RX_HANDLER_ASYNC_LOCKED),
+
 	RX_HANDLER_GRP(DEBUG_GROUP, MFU_ASSERT_DUMP_NTF,
 		       iwl_mvm_mfu_assert_dump_notif, RX_HANDLER_SYNC),
 	RX_HANDLER_GRP(PROT_OFFLOAD_GROUP, STORED_BEACON_NTF,
@@ -693,6 +699,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	INIT_LIST_HEAD(&mvm->aux_roc_te_list);
 	INIT_LIST_HEAD(&mvm->async_handlers_list);
 	spin_lock_init(&mvm->time_event_lock);
+	INIT_LIST_HEAD(&mvm->ftm_initiator.loc_list);
 
 	INIT_WORK(&mvm->async_handlers_wk, iwl_mvm_async_handlers_wk);
 	INIT_WORK(&mvm->roc_done_wk, iwl_mvm_roc_done_wk);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 3b04e72315e1..f1f66e675ca1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2118,6 +2118,8 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 #define IEEE80211_SPCT_MSR_RPRT_TYPE_BASIC	0
 #define IEEE80211_SPCT_MSR_RPRT_TYPE_CCA	1
 #define IEEE80211_SPCT_MSR_RPRT_TYPE_RPI	2
+#define IEEE80211_SPCT_MSR_RPRT_TYPE_LCI	8
+#define IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC	11
 
 /* 802.11g ERP information element */
 #define WLAN_ERP_NON_ERP_PRESENT (1<<0)
-- 
cgit v1.2.3


From 540bfab7fbff6ab9092bb28aaf804af0b4d576ae Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 13 Feb 2019 10:45:50 +0300
Subject: usb: typec: Rationalize the API for the muxes

Since with accessory modes there is no need for additional
identification when requesting a handle to the mux, we can
replace the second parameter that is passed to the
typec_mux_get() function with a pointer to alternate mode
description structure, and simply passing NULL with
accessory modes.

This change means the naming of the mux device connections
can be updated. Alternate and Accessory Modes will both be
handled with muxes named "mode-switch", and the orientation
switches will be named "orientation-switch".

Future identification of the alternate modes will be later
done using device property "svid" of the mux.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jun Li <jun.li@nxp.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c     |  7 ++-----
 drivers/usb/typec/mux.c       | 10 ++++++----
 include/linux/usb/typec_mux.h |  3 ++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 41c0d790a50f..45abe2c7e9f3 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1496,11 +1496,8 @@ typec_port_register_altmode(struct typec_port *port,
 {
 	struct typec_altmode *adev;
 	struct typec_mux *mux;
-	char id[10];
 
-	sprintf(id, "id%04xm%02x", desc->svid, desc->mode);
-
-	mux = typec_mux_get(&port->dev, id);
+	mux = typec_mux_get(&port->dev, desc);
 	if (IS_ERR(mux))
 		return ERR_CAST(mux);
 
@@ -1593,7 +1590,7 @@ struct typec_port *typec_register_port(struct device *parent,
 		return ERR_CAST(port->sw);
 	}
 
-	port->mux = typec_mux_get(&port->dev, "typec-mux");
+	port->mux = typec_mux_get(&port->dev, NULL);
 	if (IS_ERR(port->mux)) {
 		put_device(&port->dev);
 		return ERR_CAST(port->mux);
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index d990aa510fab..8975f58e1d60 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -48,7 +48,7 @@ struct typec_switch *typec_switch_get(struct device *dev)
 	struct typec_switch *sw;
 
 	mutex_lock(&switch_lock);
-	sw = device_connection_find_match(dev, "typec-switch", NULL,
+	sw = device_connection_find_match(dev, "orientation-switch", NULL,
 					  typec_switch_match);
 	if (!IS_ERR_OR_NULL(sw)) {
 		WARN_ON(!try_module_get(sw->dev->driver->owner));
@@ -128,19 +128,21 @@ static void *typec_mux_match(struct device_connection *con, int ep, void *data)
 /**
  * typec_mux_get - Find USB Type-C Multiplexer
  * @dev: The caller device
- * @name: Mux identifier
+ * @desc: Alt Mode description
  *
  * Finds a mux linked to the caller. This function is primarily meant for the
  * Type-C drivers. Returns a reference to the mux on success, NULL if no
  * matching connection was found, or ERR_PTR(-EPROBE_DEFER) when a connection
  * was found but the mux has not been enumerated yet.
  */
-struct typec_mux *typec_mux_get(struct device *dev, const char *name)
+struct typec_mux *typec_mux_get(struct device *dev,
+				const struct typec_altmode_desc *desc)
 {
 	struct typec_mux *mux;
 
 	mutex_lock(&mux_lock);
-	mux = device_connection_find_match(dev, name, NULL, typec_mux_match);
+	mux = device_connection_find_match(dev, "mode-switch", (void *)desc,
+					   typec_mux_match);
 	if (!IS_ERR_OR_NULL(mux)) {
 		WARN_ON(!try_module_get(mux->dev->driver->owner));
 		get_device(mux->dev);
diff --git a/include/linux/usb/typec_mux.h b/include/linux/usb/typec_mux.h
index 79293f630ee1..43f40685e53c 100644
--- a/include/linux/usb/typec_mux.h
+++ b/include/linux/usb/typec_mux.h
@@ -47,7 +47,8 @@ void typec_switch_put(struct typec_switch *sw);
 int typec_switch_register(struct typec_switch *sw);
 void typec_switch_unregister(struct typec_switch *sw);
 
-struct typec_mux *typec_mux_get(struct device *dev, const char *name);
+struct typec_mux *
+typec_mux_get(struct device *dev, const struct typec_altmode_desc *desc);
 void typec_mux_put(struct typec_mux *mux);
 int typec_mux_register(struct typec_mux *mux);
 void typec_mux_unregister(struct typec_mux *mux);
-- 
cgit v1.2.3


From ec69e9533c4879c81eb7122771792864eb49af35 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 13 Feb 2019 10:45:54 +0300
Subject: usb: roles: Find the muxes by also matching against the device node

When the connections are defined in firmware, struct
device_connection will have the fwnode member pointing to
the device node (struct fwnode_handle) of the requested
device, and the endpoint will not be used at all in that
case.

Acked-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jun Li <jun.li@nxp.com>
Tested-by: Jun Li <jun.li@nxp.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/roles/class.c | 21 ++++++++++++++++++---
 include/linux/usb/role.h  |  2 ++
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index 99116af07f1d..f45d8df5cfb8 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/usb/role.h>
+#include <linux/property.h>
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -84,7 +85,12 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_get_role);
 
-static int __switch_match(struct device *dev, const void *name)
+static int switch_fwnode_match(struct device *dev, const void *fwnode)
+{
+	return dev_fwnode(dev) == fwnode;
+}
+
+static int switch_name_match(struct device *dev, const void *name)
 {
 	return !strcmp((const char *)name, dev_name(dev));
 }
@@ -94,8 +100,16 @@ static void *usb_role_switch_match(struct device_connection *con, int ep,
 {
 	struct device *dev;
 
-	dev = class_find_device(role_class, NULL, con->endpoint[ep],
-				__switch_match);
+	if (con->fwnode) {
+		if (!fwnode_property_present(con->fwnode, con->id))
+			return NULL;
+
+		dev = class_find_device(role_class, NULL, con->fwnode,
+					switch_fwnode_match);
+	} else {
+		dev = class_find_device(role_class, NULL, con->endpoint[ep],
+					switch_name_match);
+	}
 
 	return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER);
 }
@@ -266,6 +280,7 @@ usb_role_switch_register(struct device *parent,
 	sw->get = desc->get;
 
 	sw->dev.parent = parent;
+	sw->dev.fwnode = desc->fwnode;
 	sw->dev.class = role_class;
 	sw->dev.type = &usb_role_dev_type;
 	dev_set_name(&sw->dev, "%s-role-switch", dev_name(parent));
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index edc51be4a77c..c05ffa6abda9 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -18,6 +18,7 @@ typedef enum usb_role (*usb_role_switch_get_t)(struct device *dev);
 
 /**
  * struct usb_role_switch_desc - USB Role Switch Descriptor
+ * @fwnode: The device node to be associated with the role switch
  * @usb2_port: Optional reference to the host controller port device (USB2)
  * @usb3_port: Optional reference to the host controller port device (USB3)
  * @udc: Optional reference to the peripheral controller device
@@ -32,6 +33,7 @@ typedef enum usb_role (*usb_role_switch_get_t)(struct device *dev);
  * usb_role_switch_register() before registering the switch.
  */
 struct usb_role_switch_desc {
+	struct fwnode_handle *fwnode;
 	struct device *usb2_port;
 	struct device *usb3_port;
 	struct device *udc;
-- 
cgit v1.2.3


From 09aa11cfda9d8186046bcd1adcd6498b688114f4 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 13 Feb 2019 10:45:52 +0300
Subject: device connection: Add fwnode member to struct device_connection

This will prepare the device connection API for connections
described in firmware.

Acked-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Jun Li <jun.li@nxp.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 6cb4640b6160..7a9ff5f83664 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -757,11 +757,17 @@ struct device_dma_parameters {
 
 /**
  * struct device_connection - Device Connection Descriptor
+ * @fwnode: The device node of the connected device
  * @endpoint: The names of the two devices connected together
  * @id: Unique identifier for the connection
  * @list: List head, private, for internal use only
+ *
+ * NOTE: @fwnode is not used together with @endpoint. @fwnode is used when
+ * platform firmware defines the connection. When the connection is registered
+ * with device_connection_add() @endpoint is used instead.
  */
 struct device_connection {
+	struct fwnode_handle	*fwnode;
 	const char		*endpoint[2];
 	const char		*id;
 	struct list_head	list;
-- 
cgit v1.2.3


From a0584ee9aed805446b044ce855e67264f0dc619e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:24:58 -0500
Subject: SUNRPC: Use struct xdr_stream when decoding RPC Reply header

Modernize and harden the code path that parses an RPC Reply
message.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    |  15 ++-
 include/linux/sunrpc/xdr.h     |   1 +
 net/sunrpc/auth.c              |  63 ++++++++-----
 net/sunrpc/auth_gss/auth_gss.c | 204 ++++++++++++++++++++++-------------------
 net/sunrpc/auth_null.c         |  31 +++----
 net/sunrpc/auth_unix.c         |  42 +++++----
 net/sunrpc/clnt.c              |  88 +++++++++---------
 7 files changed, 243 insertions(+), 201 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 96e237f8e60b..c51e1893f77e 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -134,11 +134,12 @@ struct rpc_credops {
 	int			(*crmarshal)(struct rpc_task *task,
 					     struct xdr_stream *xdr);
 	int			(*crrefresh)(struct rpc_task *);
-	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
+	int			(*crvalidate)(struct rpc_task *task,
+					      struct xdr_stream *xdr);
 	int			(*crwrap_req)(struct rpc_task *task,
 					      struct xdr_stream *xdr);
-	int			(*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
-						void *, __be32 *, void *);
+	int			(*crunwrap_resp)(struct rpc_task *task,
+						 struct xdr_stream *xdr);
 	int			(*crkey_timeout)(struct rpc_cred *);
 	char *			(*crstringify_acceptor)(struct rpc_cred *);
 	bool			(*crneed_reencode)(struct rpc_task *);
@@ -168,12 +169,16 @@ struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 void			put_rpccred(struct rpc_cred *);
 int			rpcauth_marshcred(struct rpc_task *task,
 					  struct xdr_stream *xdr);
-__be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
+int			rpcauth_checkverf(struct rpc_task *task,
+					  struct xdr_stream *xdr);
 int			rpcauth_wrap_req_encode(struct rpc_task *task,
 						struct xdr_stream *xdr);
 int			rpcauth_wrap_req(struct rpc_task *task,
 					 struct xdr_stream *xdr);
-int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+int			rpcauth_unwrap_resp_decode(struct rpc_task *task,
+						   struct xdr_stream *xdr);
+int			rpcauth_unwrap_resp(struct rpc_task *task,
+					    struct xdr_stream *xdr);
 bool			rpcauth_xmit_need_reencode(struct rpc_task *task);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index c54041950cc0..65af6a204b75 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -89,6 +89,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 
 #define	rpc_auth_null	cpu_to_be32(RPC_AUTH_NULL)
 #define	rpc_auth_unix	cpu_to_be32(RPC_AUTH_UNIX)
+#define	rpc_auth_short	cpu_to_be32(RPC_AUTH_SHORT)
 #define	rpc_auth_gss	cpu_to_be32(RPC_AUTH_GSS)
 
 #define	rpc_call	cpu_to_be32(RPC_CALL)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index add2135d9b01..e7861026b9e5 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -17,6 +17,8 @@
 #include <linux/sunrpc/gss_api.h>
 #include <linux/spinlock.h>
 
+#include <trace/events/sunrpc.h>
+
 #define RPC_CREDCACHE_DEFAULT_HASHBITS	(4)
 struct rpc_cred_cache {
 	struct hlist_head	*hashtable;
@@ -773,14 +775,6 @@ int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
 	return ops->crmarshal(task, xdr);
 }
 
-__be32 *
-rpcauth_checkverf(struct rpc_task *task, __be32 *p)
-{
-	struct rpc_cred	*cred = task->tk_rqstp->rq_cred;
-
-	return cred->cr_ops->crvalidate(task, p);
-}
-
 /**
  * rpcauth_wrap_req_encode - XDR encode the RPC procedure
  * @task: controlling RPC task
@@ -814,27 +808,52 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 	return ops->crwrap_req(task, xdr);
 }
 
-static int
-rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-			  __be32 *data, void *obj)
+/**
+ * rpcauth_checkverf - Validate verifier in RPC Reply header
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing RPC Reply header
+ *
+ * On success, @xdr is updated to point past the verifier and
+ * zero is returned. Otherwise, @xdr is in an undefined state
+ * and a negative errno is returned.
+ */
+int
+rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct xdr_stream xdr;
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp);
-	return decode(rqstp, &xdr, obj);
+	return ops->crvalidate(task, xdr);
 }
 
+/**
+ * rpcauth_unwrap_resp_decode - Invoke XDR decode function
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
 int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
-		__be32 *data, void *obj)
+rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+	kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
+
+	return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
+}
+EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
+
+/**
+ * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
+{
+	const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;
 
-	if (cred->cr_ops->crunwrap_resp)
-		return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
-						   data, obj);
-	/* By default, we decode the arguments normally. */
-	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+	return ops->crunwrap_resp(task, xdr);
 }
 
 bool
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b333b1bdad45..206788e8b787 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1671,59 +1671,62 @@ gss_refresh_null(struct rpc_task *task)
 	return 0;
 }
 
-static __be32 *
-gss_validate(struct rpc_task *task, __be32 *p)
+static int
+gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	__be32		*seq = NULL;
+	__be32		*p, *seq = NULL;
 	struct kvec	iov;
 	struct xdr_buf	verf_buf;
 	struct xdr_netobj mic;
-	u32		flav,len;
-	u32		maj_stat;
-	__be32		*ret = ERR_PTR(-EIO);
+	u32		len, maj_stat;
+	int		status;
 
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		goto validate_failed;
+	if (*p++ != rpc_auth_gss)
+		goto validate_failed;
+	len = be32_to_cpup(p);
+	if (len > RPC_MAX_AUTH_SIZE)
+		goto validate_failed;
+	p = xdr_inline_decode(xdr, len);
+	if (!p)
+		goto validate_failed;
 
-	flav = ntohl(*p++);
-	if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
-		goto out_bad;
-	if (flav != RPC_AUTH_GSS)
-		goto out_bad;
 	seq = kmalloc(4, GFP_NOFS);
 	if (!seq)
-		goto out_bad;
-	*seq = htonl(task->tk_rqstp->rq_seqno);
+		goto validate_failed;
+	*seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
 	iov.iov_base = seq;
 	iov.iov_len = 4;
 	xdr_buf_from_iov(&iov, &verf_buf);
 	mic.data = (u8 *)p;
 	mic.len = len;
-
-	ret = ERR_PTR(-EACCES);
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	if (maj_stat) {
-		dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
-			task->tk_pid, __func__, maj_stat);
-		goto out_bad;
-	}
+	if (maj_stat)
+		goto bad_mic;
+
 	/* We leave it to unwrap to calculate au_rslack. For now we just
 	 * calculate the length of the verifier: */
 	cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+	status = 0;
+out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
-			task->tk_pid, __func__);
-	kfree(seq);
-	return p + XDR_QUADLEN(len);
-out_bad:
-	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
-		PTR_ERR(ret));
 	kfree(seq);
-	return ret;
+	return status;
+
+validate_failed:
+	status = -EIO;
+	goto out;
+bad_mic:
+	dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
+		task->tk_pid, __func__, maj_stat);
+	status = -EACCES;
+	goto out;
 }
 
 static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
@@ -1921,79 +1924,98 @@ out:
 	return status;
 }
 
-static inline int
+static int
+gss_unwrap_resp_auth(struct rpc_cred *cred)
+{
+	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize;
+	return 0;
+}
+
+static int
 gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, __be32 **p)
+		      struct rpc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
-	struct xdr_buf integ_buf;
+	struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
+	u32 data_offset, mic_offset, integ_len, maj_stat;
 	struct xdr_netobj mic;
-	u32 data_offset, mic_offset;
-	u32 integ_len;
-	u32 maj_stat;
-	int status = -EIO;
+	__be32 *p;
 
-	integ_len = ntohl(*(*p)++);
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (unlikely(!p))
+		goto unwrap_failed;
+	integ_len = be32_to_cpup(p++);
 	if (integ_len & 3)
-		return status;
-	data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+		goto unwrap_failed;
+	data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
 	mic_offset = integ_len + data_offset;
 	if (mic_offset > rcv_buf->len)
-		return status;
-	if (ntohl(*(*p)++) != rqstp->rq_seqno)
-		return status;
-
-	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
-				mic_offset - data_offset))
-		return status;
+		goto unwrap_failed;
+	if (be32_to_cpup(p) != rqstp->rq_seqno)
+		goto unwrap_failed;
 
+	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+		goto unwrap_failed;
 	if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
-		return status;
-
+		goto unwrap_failed;
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	if (maj_stat != GSS_S_COMPLETE)
-		return status;
+		goto bad_mic;
+
+	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
+				   1 + XDR_QUADLEN(mic.len);
 	return 0;
+unwrap_failed:
+	return -EIO;
+bad_mic:
+	dprintk("RPC:       %s: gss_verify_mic returned error 0x%08x\n",
+		__func__, maj_stat);
+	return -EIO;
 }
 
-static inline int
+static int
 gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		struct rpc_rqst *rqstp, __be32 **p)
-{
-	struct xdr_buf  *rcv_buf = &rqstp->rq_rcv_buf;
-	u32 offset;
-	u32 opaque_len;
-	u32 maj_stat;
-	int status = -EIO;
-
-	opaque_len = ntohl(*(*p)++);
-	offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+		     struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
+	struct kvec *head = rqstp->rq_rcv_buf.head;
+	unsigned int savedlen = rcv_buf->len;
+	u32 offset, opaque_len, maj_stat;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (unlikely(!p))
+		goto unwrap_failed;
+	opaque_len = be32_to_cpup(p++);
+	offset = (u8 *)(p) - (u8 *)head->iov_base;
 	if (offset + opaque_len > rcv_buf->len)
-		return status;
-	/* remove padding: */
+		goto unwrap_failed;
 	rcv_buf->len = offset + opaque_len;
 
 	maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	if (maj_stat != GSS_S_COMPLETE)
-		return status;
-	if (ntohl(*(*p)++) != rqstp->rq_seqno)
-		return status;
-
-	return 0;
-}
+		goto bad_unwrap;
+	/* gss_unwrap decrypted the sequence number */
+	if (be32_to_cpup(p++) != rqstp->rq_seqno)
+		goto unwrap_failed;
 
-static int
-gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
-		      __be32 *p, void *obj)
-{
-	struct xdr_stream xdr;
+	/* gss_unwrap redacts the opaque blob from the head iovec.
+	 * rcv_buf has changed, thus the stream needs to be reset.
+	 */
+	xdr_init_decode(xdr, rcv_buf, p, rqstp);
 
-	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp);
-	return decode(rqstp, &xdr, obj);
+	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
+				   XDR_QUADLEN(savedlen - rcv_buf->len);
+	return 0;
+unwrap_failed:
+	return -EIO;
+bad_unwrap:
+	dprintk("RPC:       %s: gss_unwrap returned error 0x%08x\n",
+		__func__, maj_stat);
+	return -EIO;
 }
 
 static bool
@@ -2037,39 +2059,33 @@ out:
 }
 
 static int
-gss_unwrap_resp(struct rpc_task *task,
-		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
+gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+	struct rpc_rqst *rqstp = task->tk_rqstp;
+	struct rpc_cred *cred = rqstp->rq_cred;
 	struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
 			gc_base);
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	__be32		*savedp = p;
-	struct kvec	*head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
-	int		savedlen = head->iov_len;
-	int             status = -EIO;
+	int status = -EIO;
 
 	if (ctx->gc_proc != RPC_GSS_PROC_DATA)
 		goto out_decode;
 	switch (gss_cred->gc_service) {
 	case RPC_GSS_SVC_NONE:
+		status = gss_unwrap_resp_auth(cred);
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
-		if (status)
-			goto out;
+		status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
-		status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
-		if (status)
-			goto out;
+		status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr);
 		break;
 	}
-	/* take into account extra slack for integrity and privacy cases: */
-	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
-						+ (savedlen - head->iov_len);
+	if (status)
+		goto out;
+
 out_decode:
-	status = gss_unwrap_req_decode(decode, rqstp, p, obj);
+	status = rpcauth_unwrap_resp_decode(task, xdr);
 out:
 	gss_put_ctx(ctx);
 	dprintk("RPC: %5u %s returning %d\n",
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 797f8472c21b..bf96975ffc4b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -86,25 +86,19 @@ nul_refresh(struct rpc_task *task)
 	return 0;
 }
 
-static __be32 *
-nul_validate(struct rpc_task *task, __be32 *p)
+static int
+nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	rpc_authflavor_t	flavor;
-	u32			size;
-
-	flavor = ntohl(*p++);
-	if (flavor != RPC_AUTH_NULL) {
-		printk("RPC: bad verf flavor: %u\n", flavor);
-		return ERR_PTR(-EIO);
-	}
-
-	size = ntohl(*p++);
-	if (size != 0) {
-		printk("RPC: bad verf size: %u\n", size);
-		return ERR_PTR(-EIO);
-	}
-
-	return p;
+	__be32 *p;
+
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		return -EIO;
+	if (*p++ != rpc_auth_null)
+		return -EIO;
+	if (*p != xdr_zero)
+		return -EIO;
+	return 0;
 }
 
 const struct rpc_authops authnull_ops = {
@@ -134,6 +128,7 @@ const struct rpc_credops null_credops = {
 	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= nul_refresh,
 	.crvalidate	= nul_validate,
+	.crunwrap_resp	= rpcauth_unwrap_resp_decode,
 };
 
 static
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 1d5b7ed9c6f7..5ea84a96f96e 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -160,29 +160,32 @@ unx_refresh(struct rpc_task *task)
 	return 0;
 }
 
-static __be32 *
-unx_validate(struct rpc_task *task, __be32 *p)
+static int
+unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
-	rpc_authflavor_t	flavor;
-	u32			size;
-
-	flavor = ntohl(*p++);
-	if (flavor != RPC_AUTH_NULL &&
-	    flavor != RPC_AUTH_UNIX &&
-	    flavor != RPC_AUTH_SHORT) {
-		printk("RPC: bad verf flavor: %u\n", flavor);
-		return ERR_PTR(-EIO);
-	}
+	__be32 *p;
+	u32 size;
 
-	size = ntohl(*p++);
-	if (size > RPC_MAX_AUTH_SIZE) {
-		printk("RPC: giant verf size: %u\n", size);
-		return ERR_PTR(-EIO);
+	p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+	if (!p)
+		return -EIO;
+	switch (*p++) {
+	case rpc_auth_null:
+	case rpc_auth_unix:
+	case rpc_auth_short:
+		break;
+	default:
+		return -EIO;
 	}
-	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
-	p += (size >> 2);
+	size = be32_to_cpup(p);
+	if (size > RPC_MAX_AUTH_SIZE)
+		return -EIO;
+	p = xdr_inline_decode(xdr, size);
+	if (!p)
+		return -EIO;
 
-	return p;
+	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
+	return 0;
 }
 
 int __init rpc_init_authunix(void)
@@ -223,4 +226,5 @@ const struct rpc_credops unix_credops = {
 	.crwrap_req	= rpcauth_wrap_req_encode,
 	.crrefresh	= unx_refresh,
 	.crvalidate	= unx_validate,
+	.crunwrap_resp	= rpcauth_unwrap_resp_decode,
 };
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e9735089bd66..803e93105af1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,8 @@ static void	call_connect_status(struct rpc_task *task);
 
 static int	rpc_encode_header(struct rpc_task *task,
 				  struct xdr_stream *xdr);
-static __be32	*rpc_decode_header(struct rpc_task *task);
+static int	rpc_decode_header(struct rpc_task *task,
+				  struct xdr_stream *xdr);
 static int	rpc_ping(struct rpc_clnt *clnt);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
@@ -2251,12 +2252,11 @@ call_decode(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
 	struct rpc_rqst	*req = task->tk_rqstp;
-	kxdrdproc_t	decode = task->tk_msg.rpc_proc->p_decode;
-	__be32		*p;
+	struct xdr_stream xdr;
 
 	dprint_status(task);
 
-	if (!decode) {
+	if (!task->tk_msg.rpc_proc->p_decode) {
 		task->tk_action = rpc_exit_task;
 		return;
 	}
@@ -2292,29 +2292,27 @@ call_decode(struct rpc_task *task)
 		goto out_retry;
 	}
 
-	p = rpc_decode_header(task);
-	if (IS_ERR(p)) {
-		if (p == ERR_PTR(-EAGAIN))
-			goto out_retry;
+	xdr_init_decode(&xdr, &req->rq_rcv_buf,
+			req->rq_rcv_buf.head[0].iov_base, req);
+	switch (rpc_decode_header(task, &xdr)) {
+	case 0:
+		task->tk_action = rpc_exit_task;
+		task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+		dprintk("RPC: %5u %s result %d\n",
+			task->tk_pid, __func__, task->tk_status);
 		return;
-	}
-	task->tk_action = rpc_exit_task;
-
-	task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
-					      task->tk_msg.rpc_resp);
-
-	dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
-			task->tk_status);
-	return;
+	case -EAGAIN:
 out_retry:
-	task->tk_status = 0;
-	/* Note: rpc_decode_header() may have freed the RPC slot */
-	if (task->tk_rqstp == req) {
-		xdr_free_bvec(&req->rq_rcv_buf);
-		req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
-		if (task->tk_client->cl_discrtry)
-			xprt_conditional_disconnect(req->rq_xprt,
-					req->rq_connect_cookie);
+		task->tk_status = 0;
+		/* Note: rpc_decode_header() may have freed the RPC slot */
+		if (task->tk_rqstp == req) {
+			xdr_free_bvec(&req->rq_rcv_buf);
+			req->rq_reply_bytes_recvd = 0;
+			req->rq_rcv_buf.len = 0;
+			if (task->tk_client->cl_discrtry)
+				xprt_conditional_disconnect(req->rq_xprt,
+							    req->rq_connect_cookie);
+		}
 	}
 }
 
@@ -2347,14 +2345,12 @@ out_fail:
 	return error;
 }
 
-static noinline __be32 *
-rpc_decode_header(struct rpc_task *task)
+static noinline int
+rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
 {
 	struct rpc_clnt *clnt = task->tk_client;
-	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
-	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
-	__be32	*p = iov->iov_base;
 	int error = -EACCES;
+	__be32 *p;
 
 	/* RFC-1014 says that the representation of XDR data must be a
 	 * multiple of four bytes
@@ -2363,25 +2359,26 @@ rpc_decode_header(struct rpc_task *task)
 	 */
 	if (task->tk_rqstp->rq_rcv_buf.len & 3)
 		goto out_badlen;
-	if ((len -= 3) < 0)
-		goto out_unparsable;
 
+	p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+	if (!p)
+		goto out_unparsable;
 	p++;	/* skip XID */
 	if (*p++ != rpc_reply)
 		goto out_unparsable;
 	if (*p++ != rpc_msg_accepted)
 		goto out_msg_denied;
 
-	p = rpcauth_checkverf(task, p);
-	if (IS_ERR(p))
+	error = rpcauth_checkverf(task, xdr);
+	if (error)
 		goto out_verifier;
 
-	len = p - (__be32 *)iov->iov_base - 1;
-	if (len < 0)
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
 		goto out_unparsable;
-	switch (*p++) {
+	switch (*p) {
 	case rpc_success:
-		return p;
+		return 0;
 	case rpc_prog_unavail:
 		trace_rpc__prog_unavail(task);
 		error = -EPFNOSUPPORT;
@@ -2406,11 +2403,11 @@ out_garbage:
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		task->tk_action = call_encode;
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 	}
 out_err:
 	rpc_exit(task, error);
-	return ERR_PTR(error);
+	return error;
 
 out_badlen:
 	trace_rpc__unparsable(task);
@@ -2424,10 +2421,12 @@ out_unparsable:
 
 out_verifier:
 	trace_rpc_bad_verifier(task);
-	error = PTR_ERR(p);
 	goto out_garbage;
 
 out_msg_denied:
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
+		goto out_unparsable;
 	switch (*p++) {
 	case rpc_auth_error:
 		break;
@@ -2441,6 +2440,9 @@ out_msg_denied:
 		goto out_err;
 	}
 
+	p = xdr_inline_decode(xdr, sizeof(*p));
+	if (!p)
+		goto out_unparsable;
 	switch (*p++) {
 	case rpc_autherr_rejectedcred:
 	case rpc_autherr_rejectedverf:
@@ -2454,7 +2456,7 @@ out_msg_denied:
 		/* Ensure we obtain a new XID! */
 		xprt_release(task);
 		task->tk_action = call_reserve;
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 	case rpc_autherr_badcred:
 	case rpc_autherr_badverf:
 		/* possibly garbled cred/verf? */
@@ -2463,7 +2465,7 @@ out_msg_denied:
 		task->tk_garb_retry--;
 		trace_rpc__bad_creds(task);
 		task->tk_action = call_encode;
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 	case rpc_autherr_tooweak:
 		trace_rpc__auth_tooweak(task);
 		pr_warn("RPC: server %s requires stronger authentication.\n",
-- 
cgit v1.2.3


From 0c77668ddb4e7bdfbca462c6185d154d0b8889ae Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:25:04 -0500
Subject: SUNRPC: Introduce trace points in rpc_auth_gss.ko

Add infrastructure for trace points in the RPC_AUTH_GSS kernel
module, and add a few sample trace points. These report exceptional
or unexpected events, and observe the assignment of GSS sequence
numbers.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcgss.h  | 361 +++++++++++++++++++++++++++++++++++++++++
 include/trace/events/rpcrdma.h |  12 +-
 include/trace/events/sunrpc.h  |  61 ++++++-
 net/sunrpc/auth_gss/Makefile   |   2 +-
 net/sunrpc/auth_gss/auth_gss.c | 165 +++++++++----------
 net/sunrpc/auth_gss/trace.c    |  11 ++
 net/sunrpc/xprt.c              |  10 +-
 7 files changed, 530 insertions(+), 92 deletions(-)
 create mode 100644 include/trace/events/rpcgss.h
 create mode 100644 net/sunrpc/auth_gss/trace.c

(limited to 'include')

diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
new file mode 100644
index 000000000000..d1f7fe1b6fe4
--- /dev/null
+++ b/include/trace/events/rpcgss.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Oracle.  All rights reserved.
+ *
+ * Trace point definitions for the "rpcgss" subsystem.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rpcgss
+
+#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RPCGSS_H
+
+#include <linux/tracepoint.h>
+
+/**
+ ** GSS-API related trace events
+ **/
+
+TRACE_DEFINE_ENUM(GSS_S_BAD_MECH);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAME);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_SIG);
+TRACE_DEFINE_ENUM(GSS_S_NO_CRED);
+TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL);
+TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_FAILURE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_QOP);
+TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED);
+TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT);
+TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN);
+TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
+
+#define show_gss_status(x)						\
+	__print_flags(x, "|",						\
+		{ GSS_S_BAD_MECH, "GSS_S_BAD_MECH" },			\
+		{ GSS_S_BAD_NAME, "GSS_S_BAD_NAME" },			\
+		{ GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" },		\
+		{ GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" },		\
+		{ GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" },		\
+		{ GSS_S_BAD_SIG, "GSS_S_BAD_SIG" },			\
+		{ GSS_S_NO_CRED, "GSS_S_NO_CRED" },			\
+		{ GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" },		\
+		{ GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" },	\
+		{ GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \
+		{ GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \
+		{ GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" },	\
+		{ GSS_S_FAILURE, "GSS_S_FAILURE" },			\
+		{ GSS_S_BAD_QOP, "GSS_S_BAD_QOP" },			\
+		{ GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" },		\
+		{ GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" },		\
+		{ GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" },	\
+		{ GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" },		\
+		{ GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" },	\
+		{ GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" },	\
+		{ GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" },			\
+		{ GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" },		\
+		{ GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" })
+
+
+DECLARE_EVENT_CLASS(rpcgss_gssapi_event,
+	TP_PROTO(
+		const struct rpc_task *task,
+		u32 maj_stat
+	),
+
+	TP_ARGS(task, maj_stat),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, maj_stat)
+
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->maj_stat = maj_stat;
+	),
+
+	TP_printk("task:%u@%u maj_stat=%s",
+		__entry->task_id, __entry->client_id,
+		__entry->maj_stat == 0 ?
+		"GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat))
+);
+
+#define DEFINE_GSSAPI_EVENT(name)					\
+	DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name,		\
+			TP_PROTO(					\
+				const struct rpc_task *task,		\
+				u32 maj_stat				\
+			),						\
+			TP_ARGS(task, maj_stat))
+
+TRACE_EVENT(rpcgss_import_ctx,
+	TP_PROTO(
+		int status
+	),
+
+	TP_ARGS(status),
+
+	TP_STRUCT__entry(
+		__field(int, status)
+	),
+
+	TP_fast_assign(
+		__entry->status = status;
+	),
+
+	TP_printk("status=%d", __entry->status)
+);
+
+DEFINE_GSSAPI_EVENT(get_mic);
+DEFINE_GSSAPI_EVENT(verify_mic);
+DEFINE_GSSAPI_EVENT(wrap);
+DEFINE_GSSAPI_EVENT(unwrap);
+
+
+/**
+ ** GSS auth unwrap failures
+ **/
+
+TRACE_EVENT(rpcgss_unwrap_failed,
+	TP_PROTO(
+		const struct rpc_task *task
+	),
+
+	TP_ARGS(task),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+	),
+
+	TP_printk("task:%u@%u", __entry->task_id, __entry->client_id)
+);
+
+TRACE_EVENT(rpcgss_bad_seqno,
+	TP_PROTO(
+		const struct rpc_task *task,
+		u32 expected,
+		u32 received
+	),
+
+	TP_ARGS(task, expected, received),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, expected)
+		__field(u32, received)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->expected = expected;
+		__entry->received = received;
+	),
+
+	TP_printk("task:%u@%u expected seqno %u, received seqno %u",
+		__entry->task_id, __entry->client_id,
+		__entry->expected, __entry->received)
+);
+
+TRACE_EVENT(rpcgss_seqno,
+	TP_PROTO(
+		const struct rpc_task *task
+	),
+
+	TP_ARGS(task),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__field(u32, seqno)
+	),
+
+	TP_fast_assign(
+		const struct rpc_rqst *rqst = task->tk_rqstp;
+
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		__entry->seqno = rqst->rq_seqno;
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x seqno=%u",
+		__entry->task_id, __entry->client_id,
+		__entry->xid, __entry->seqno)
+);
+
+TRACE_EVENT(rpcgss_need_reencode,
+	TP_PROTO(
+		const struct rpc_task *task,
+		u32 seq_xmit,
+		bool ret
+	),
+
+	TP_ARGS(task, seq_xmit, ret),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__field(u32, seq_xmit)
+		__field(u32, seqno)
+		__field(bool, ret)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+		__entry->seq_xmit = seq_xmit;
+		__entry->seqno = task->tk_rqstp->rq_seqno;
+		__entry->ret = ret;
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
+		__entry->task_id, __entry->client_id,
+		__entry->xid, __entry->seqno, __entry->seq_xmit,
+		__entry->ret ? "" : "un")
+);
+
+/**
+ ** gssd upcall related trace events
+ **/
+
+TRACE_EVENT(rpcgss_upcall_msg,
+	TP_PROTO(
+		const char *buf
+	),
+
+	TP_ARGS(buf),
+
+	TP_STRUCT__entry(
+		__string(msg, buf)
+	),
+
+	TP_fast_assign(
+		__assign_str(msg, buf)
+	),
+
+	TP_printk("msg='%s'", __get_str(msg))
+);
+
+TRACE_EVENT(rpcgss_upcall_result,
+	TP_PROTO(
+		u32 uid,
+		int result
+	),
+
+	TP_ARGS(uid, result),
+
+	TP_STRUCT__entry(
+		__field(u32, uid)
+		__field(int, result)
+
+	),
+
+	TP_fast_assign(
+		__entry->uid = uid;
+		__entry->result = result;
+	),
+
+	TP_printk("for uid %u, result=%d", __entry->uid, __entry->result)
+);
+
+TRACE_EVENT(rpcgss_context,
+	TP_PROTO(
+		unsigned long expiry,
+		unsigned long now,
+		unsigned int timeout,
+		unsigned int len,
+		const u8 *data
+	),
+
+	TP_ARGS(expiry, now, timeout, len, data),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, expiry)
+		__field(unsigned long, now)
+		__field(unsigned int, timeout)
+		__field(int, len)
+		__string(acceptor, data)
+	),
+
+	TP_fast_assign(
+		__entry->expiry = expiry;
+		__entry->now = now;
+		__entry->timeout = timeout;
+		__entry->len = len;
+		strncpy(__get_str(acceptor), data, len);
+	),
+
+	TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s",
+		__entry->expiry, __entry->now, __entry->timeout,
+		__entry->len, __get_str(acceptor))
+);
+
+
+/**
+ ** Miscellaneous events
+ */
+
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
+
+#define show_pseudoflavor(x)						\
+	__print_symbolic(x,						\
+		{ RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" },		\
+		{ RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" },		\
+		{ RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" })
+
+
+TRACE_EVENT(rpcgss_createauth,
+	TP_PROTO(
+		unsigned int flavor,
+		int error
+	),
+
+	TP_ARGS(flavor, error),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, flavor)
+		__field(int, error)
+
+	),
+
+	TP_fast_assign(
+		__entry->flavor = flavor;
+		__entry->error = error;
+	),
+
+	TP_printk("flavor=%s error=%d",
+		show_pseudoflavor(__entry->flavor), __entry->error)
+);
+
+
+#endif	/* _TRACE_RPCGSS_H */
+
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 399b1aedc927..962975b4313f 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -521,12 +521,18 @@ TRACE_EVENT(xprtrdma_post_send,
 
 	TP_STRUCT__entry(
 		__field(const void *, req)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
 		__field(int, num_sge)
 		__field(int, signaled)
 		__field(int, status)
 	),
 
 	TP_fast_assign(
+		const struct rpc_rqst *rqst = &req->rl_slot;
+
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
 		__entry->req = req;
 		__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
 		__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
@@ -534,9 +540,11 @@ TRACE_EVENT(xprtrdma_post_send,
 		__entry->status = status;
 	),
 
-	TP_printk("req=%p, %d SGEs%s, status=%d",
+	TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d",
+		__entry->task_id, __entry->client_id,
 		__entry->req, __entry->num_sge,
-		(__entry->signaled ? ", signaled" : ""),
+		(__entry->num_sge == 1 ? "" : "s"),
+		(__entry->signaled ? "signaled " : ""),
 		__entry->status
 	)
 );
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0654e9c50371..e58dda8e038c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -655,9 +655,68 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
 
 DEFINE_RPC_XPRT_EVENT(timer);
 DEFINE_RPC_XPRT_EVENT(lookup_rqst);
-DEFINE_RPC_XPRT_EVENT(transmit);
 DEFINE_RPC_XPRT_EVENT(complete_rqst);
 
+TRACE_EVENT(xprt_transmit,
+	TP_PROTO(
+		const struct rpc_rqst *rqst,
+		int status
+	),
+
+	TP_ARGS(rqst, status),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__field(u32, seqno)
+		__field(int, status)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+		__entry->seqno = rqst->rq_seqno;
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"task:%u@%u xid=0x%08x seqno=%u status=%d",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->seqno, __entry->status)
+);
+
+TRACE_EVENT(xprt_enq_xmit,
+	TP_PROTO(
+		const struct rpc_task *task,
+		int stage
+	),
+
+	TP_ARGS(task, stage),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+		__field(u32, seqno)
+		__field(int, stage)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = task->tk_pid;
+		__entry->client_id = task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+		__entry->seqno = task->tk_rqstp->rq_seqno;
+		__entry->stage = stage;
+	),
+
+	TP_printk(
+		"task:%u@%u xid=0x%08x seqno=%u stage=%d",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->seqno, __entry->stage)
+);
+
 TRACE_EVENT(xprt_ping,
 	TP_PROTO(const struct rpc_xprt *xprt, int status),
 
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index c374268b008f..4a29f4c5dac4 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
 auth_rpcgss-y := auth_gss.o gss_generic_token.o \
 	gss_mech_switch.o svcauth_gss.o \
-	gss_rpc_upcall.o gss_rpc_xdr.o
+	gss_rpc_upcall.o gss_rpc_xdr.o trace.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 206788e8b787..3d1fbd6d3370 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -55,6 +55,8 @@
 
 #include "../netns.h"
 
+#include <trace/events/rpcgss.h>
+
 static const struct rpc_authops authgss_ops;
 
 static const struct rpc_credops gss_credops;
@@ -260,6 +262,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 	}
 	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
 	if (ret < 0) {
+		trace_rpcgss_import_ctx(ret);
 		p = ERR_PTR(ret);
 		goto err;
 	}
@@ -275,12 +278,9 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 	if (IS_ERR(p))
 		goto err;
 done:
-	dprintk("RPC:       %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
-		__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
-		ctx->gc_acceptor.data);
-	return p;
+	trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+			     ctx->gc_acceptor.len, ctx->gc_acceptor.data);
 err:
-	dprintk("RPC:       %s returns error %ld\n", __func__, -PTR_ERR(p));
 	return p;
 }
 
@@ -354,10 +354,8 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
 		if (auth && pos->auth->service != auth->service)
 			continue;
 		refcount_inc(&pos->count);
-		dprintk("RPC:       %s found msg %p\n", __func__, pos);
 		return pos;
 	}
-	dprintk("RPC:       %s found nothing\n", __func__);
 	return NULL;
 }
 
@@ -456,7 +454,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 	size_t buflen = sizeof(gss_msg->databuf);
 	int len;
 
-	len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
+	len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
 			from_kuid(&init_user_ns, gss_msg->uid));
 	buflen -= len;
 	p += len;
@@ -467,7 +465,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 	 * identity that we are authenticating to.
 	 */
 	if (target_name) {
-		len = scnprintf(p, buflen, "target=%s ", target_name);
+		len = scnprintf(p, buflen, " target=%s", target_name);
 		buflen -= len;
 		p += len;
 		gss_msg->msg.len += len;
@@ -487,11 +485,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		char *c = strchr(service_name, '@');
 
 		if (!c)
-			len = scnprintf(p, buflen, "service=%s ",
+			len = scnprintf(p, buflen, " service=%s",
 					service_name);
 		else
 			len = scnprintf(p, buflen,
-					"service=%.*s srchost=%s ",
+					" service=%.*s srchost=%s",
 					(int)(c - service_name),
 					service_name, c + 1);
 		buflen -= len;
@@ -500,17 +498,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 	}
 
 	if (mech->gm_upcall_enctypes) {
-		len = scnprintf(p, buflen, "enctypes=%s ",
+		len = scnprintf(p, buflen, " enctypes=%s",
 				mech->gm_upcall_enctypes);
 		buflen -= len;
 		p += len;
 		gss_msg->msg.len += len;
 	}
+	trace_rpcgss_upcall_msg(gss_msg->databuf);
 	len = scnprintf(p, buflen, "\n");
 	if (len == 0)
 		goto out_overflow;
 	gss_msg->msg.len += len;
-
 	gss_msg->msg.data = gss_msg->databuf;
 	return 0;
 out_overflow:
@@ -603,8 +601,6 @@ gss_refresh_upcall(struct rpc_task *task)
 	struct rpc_pipe *pipe;
 	int err = 0;
 
-	dprintk("RPC: %5u %s for uid %u\n",
-		task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
 	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
 		/* XXX: warning on the first, under the assumption we
@@ -612,7 +608,8 @@ gss_refresh_upcall(struct rpc_task *task)
 		warn_gssd();
 		task->tk_timeout = 15*HZ;
 		rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
-		return -EAGAIN;
+		err = -EAGAIN;
+		goto out;
 	}
 	if (IS_ERR(gss_msg)) {
 		err = PTR_ERR(gss_msg);
@@ -635,9 +632,8 @@ gss_refresh_upcall(struct rpc_task *task)
 	spin_unlock(&pipe->lock);
 	gss_release_msg(gss_msg);
 out:
-	dprintk("RPC: %5u %s for uid %u result %d\n",
-		task->tk_pid, __func__,
-		from_kuid(&init_user_ns, cred->cr_cred->fsuid),	err);
+	trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+					     cred->cr_cred->fsuid), err);
 	return err;
 }
 
@@ -652,14 +648,13 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
 	DEFINE_WAIT(wait);
 	int err;
 
-	dprintk("RPC:       %s for uid %u\n",
-		__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
 retry:
 	err = 0;
 	/* if gssd is down, just skip upcalling altogether */
 	if (!gssd_running(net)) {
 		warn_gssd();
-		return -EACCES;
+		err = -EACCES;
+		goto out;
 	}
 	gss_msg = gss_setup_upcall(gss_auth, cred);
 	if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -700,8 +695,8 @@ out_intr:
 	finish_wait(&gss_msg->waitqueue, &wait);
 	gss_release_msg(gss_msg);
 out:
-	dprintk("RPC:       %s for uid %u result %d\n",
-		__func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+	trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+					     cred->cr_cred->fsuid), err);
 	return err;
 }
 
@@ -794,7 +789,6 @@ err_put_ctx:
 err:
 	kfree(buf);
 out:
-	dprintk("RPC:       %s returning %zd\n", __func__, err);
 	return err;
 }
 
@@ -863,8 +857,6 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 	struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
 
 	if (msg->errno < 0) {
-		dprintk("RPC:       %s releasing msg %p\n",
-			__func__, gss_msg);
 		refcount_inc(&gss_msg->count);
 		gss_unhash_msg(gss_msg);
 		if (msg->errno == -ETIMEDOUT)
@@ -1024,8 +1016,6 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	struct rpc_auth * auth;
 	int err = -ENOMEM; /* XXX? */
 
-	dprintk("RPC:       creating GSS authenticator for client %p\n", clnt);
-
 	if (!try_module_get(THIS_MODULE))
 		return ERR_PTR(err);
 	if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@@ -1041,10 +1031,8 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	gss_auth->net = get_net(rpc_net_ns(clnt));
 	err = -EINVAL;
 	gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
-	if (!gss_auth->mech) {
-		dprintk("RPC:       Pseudoflavor %d not found!\n", flavor);
+	if (!gss_auth->mech)
 		goto err_put_net;
-	}
 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
 	if (gss_auth->service == 0)
 		goto err_put_mech;
@@ -1099,6 +1087,7 @@ err_free:
 	kfree(gss_auth);
 out_dec:
 	module_put(THIS_MODULE);
+	trace_rpcgss_createauth(flavor, err);
 	return ERR_PTR(err);
 }
 
@@ -1135,9 +1124,6 @@ gss_destroy(struct rpc_auth *auth)
 	struct gss_auth *gss_auth = container_of(auth,
 			struct gss_auth, rpc_auth);
 
-	dprintk("RPC:       destroying GSS authenticator %p flavor %d\n",
-			auth, auth->au_flavor);
-
 	if (hash_hashed(&gss_auth->hash)) {
 		spin_lock(&gss_auth_hash_lock);
 		hash_del(&gss_auth->hash);
@@ -1300,8 +1286,6 @@ gss_send_destroy_context(struct rpc_cred *cred)
 static void
 gss_do_free_ctx(struct gss_cl_ctx *ctx)
 {
-	dprintk("RPC:       %s\n", __func__);
-
 	gss_delete_sec_context(&ctx->gc_gss_ctx);
 	kfree(ctx->gc_wire_ctx.data);
 	kfree(ctx->gc_acceptor.data);
@@ -1324,7 +1308,6 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
 static void
 gss_free_cred(struct gss_cred *gss_cred)
 {
-	dprintk("RPC:       %s cred=%p\n", __func__, gss_cred);
 	kfree(gss_cred);
 }
 
@@ -1381,10 +1364,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
 	struct gss_cred	*cred = NULL;
 	int err = -ENOMEM;
 
-	dprintk("RPC:       %s for uid %d, flavor %d\n",
-		__func__, from_kuid(&init_user_ns, acred->cred->fsuid),
-		auth->au_flavor);
-
 	if (!(cred = kzalloc(sizeof(*cred), gfp)))
 		goto out_err;
 
@@ -1400,7 +1379,6 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
 	return &cred->gc_base;
 
 out_err:
-	dprintk("RPC:       %s failed with error %d\n", __func__, err);
 	return ERR_PTR(err);
 }
 
@@ -1544,15 +1522,14 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 	struct xdr_netobj mic;
 	struct kvec	iov;
 	struct xdr_buf	verf_buf;
-
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+	int status;
 
 	/* Credential */
 
 	p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
 			      ctx->gc_wire_ctx.len);
 	if (!p)
-		goto out_put_ctx;
+		goto marshal_failed;
 	*p++ = rpc_auth_gss;
 	cred_len = p++;
 
@@ -1560,7 +1537,8 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 	req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
 	spin_unlock(&ctx->gc_seq_lock);
 	if (req->rq_seqno == MAXSEQ)
-		goto out_expired;
+		goto expired;
+	trace_rpcgss_seqno(task);
 
 	*p++ = cpu_to_be32(RPC_GSS_VERSION);
 	*p++ = cpu_to_be32(ctx->gc_proc);
@@ -1579,25 +1557,31 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
 
 	p = xdr_reserve_space(xdr, sizeof(*p));
 	if (!p)
-		goto out_put_ctx;
+		goto marshal_failed;
 	*p++ = rpc_auth_gss;
 	mic.data = (u8 *)(p + 1);
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
-		goto out_expired;
+		goto expired;
 	else if (maj_stat != 0)
-		goto out_put_ctx;
+		goto bad_mic;
 	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
-		goto out_put_ctx;
-	gss_put_ctx(ctx);
-	return 0;
-out_expired:
+		goto marshal_failed;
+	status = 0;
+out:
 	gss_put_ctx(ctx);
+	return status;
+expired:
 	clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	return -EKEYEXPIRED;
-out_put_ctx:
-	gss_put_ctx(ctx);
-	return -EMSGSIZE;
+	status = -EKEYEXPIRED;
+	goto out;
+marshal_failed:
+	status = -EMSGSIZE;
+	goto out;
+bad_mic:
+	trace_rpcgss_get_mic(task, maj_stat);
+	status = -EIO;
+	goto out;
 }
 
 static int gss_renew_cred(struct rpc_task *task)
@@ -1723,8 +1707,7 @@ validate_failed:
 	status = -EIO;
 	goto out;
 bad_mic:
-	dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
-		task->tk_pid, __func__, maj_stat);
+	trace_rpcgss_verify_mic(task, maj_stat);
 	status = -EACCES;
 	goto out;
 }
@@ -1761,13 +1744,16 @@ static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		goto wrap_failed;
+		goto bad_mic;
 	/* Check that the trailing MIC fit in the buffer, after the fact */
 	if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
 		goto wrap_failed;
 	return 0;
 wrap_failed:
 	return -EMSGSIZE;
+bad_mic:
+	trace_rpcgss_get_mic(task, maj_stat);
+	return -EIO;
 }
 
 static void
@@ -1860,7 +1846,6 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 		memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
 		snd_buf->tail[0].iov_base = tmp;
 	}
-	status = -EIO;
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
 	maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
 	/* slack space should prevent this ever happening: */
@@ -1871,7 +1856,7 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
 		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
 	else if (maj_stat)
-		goto wrap_failed;
+		goto bad_wrap;
 
 	*opaque_len = cpu_to_be32(snd_buf->len - offset);
 	/* guess whether the pad goes into the head or the tail: */
@@ -1888,6 +1873,9 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	return 0;
 wrap_failed:
 	return status;
+bad_wrap:
+	trace_rpcgss_wrap(task, maj_stat);
+	return -EIO;
 }
 
 static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
@@ -1898,7 +1886,6 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
 	int status;
 
-	dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
 	status = -EIO;
 	if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
 		/* The spec seems a little ambiguous here, but I think that not
@@ -1917,10 +1904,11 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
 	case RPC_GSS_SVC_PRIVACY:
 		status = gss_wrap_req_priv(cred, ctx, task, xdr);
 		break;
+	default:
+		status = -EIO;
 	}
 out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
 	return status;
 }
 
@@ -1932,8 +1920,9 @@ gss_unwrap_resp_auth(struct rpc_cred *cred)
 }
 
 static int
-gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		      struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
+		      struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+		      struct xdr_stream *xdr)
 {
 	struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
 	u32 data_offset, mic_offset, integ_len, maj_stat;
@@ -1951,7 +1940,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 	if (mic_offset > rcv_buf->len)
 		goto unwrap_failed;
 	if (be32_to_cpup(p) != rqstp->rq_seqno)
-		goto unwrap_failed;
+		goto bad_seqno;
 
 	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
 		goto unwrap_failed;
@@ -1967,16 +1956,20 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 				   1 + XDR_QUADLEN(mic.len);
 	return 0;
 unwrap_failed:
+	trace_rpcgss_unwrap_failed(task);
+	return -EIO;
+bad_seqno:
+	trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
 	return -EIO;
 bad_mic:
-	dprintk("RPC:       %s: gss_verify_mic returned error 0x%08x\n",
-		__func__, maj_stat);
+	trace_rpcgss_verify_mic(task, maj_stat);
 	return -EIO;
 }
 
 static int
-gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
-		     struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
+		     struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+		     struct xdr_stream *xdr)
 {
 	struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
 	struct kvec *head = rqstp->rq_rcv_buf.head;
@@ -2000,7 +1993,7 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 		goto bad_unwrap;
 	/* gss_unwrap decrypted the sequence number */
 	if (be32_to_cpup(p++) != rqstp->rq_seqno)
-		goto unwrap_failed;
+		goto bad_seqno;
 
 	/* gss_unwrap redacts the opaque blob from the head iovec.
 	 * rcv_buf has changed, thus the stream needs to be reset.
@@ -2011,10 +2004,13 @@ gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
 				   XDR_QUADLEN(savedlen - rcv_buf->len);
 	return 0;
 unwrap_failed:
+	trace_rpcgss_unwrap_failed(task);
+	return -EIO;
+bad_seqno:
+	trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
 	return -EIO;
 bad_unwrap:
-	dprintk("RPC:       %s: gss_unwrap returned error 0x%08x\n",
-		__func__, maj_stat);
+	trace_rpcgss_unwrap(task, maj_stat);
 	return -EIO;
 }
 
@@ -2030,14 +2026,14 @@ gss_xmit_need_reencode(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_cred *cred = req->rq_cred;
 	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
-	u32 win, seq_xmit;
+	u32 win, seq_xmit = 0;
 	bool ret = true;
 
 	if (!ctx)
-		return true;
+		goto out;
 
 	if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
-		goto out;
+		goto out_ctx;
 
 	seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
 	while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
@@ -2046,15 +2042,18 @@ gss_xmit_need_reencode(struct rpc_task *task)
 		seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
 		if (seq_xmit == tmp) {
 			ret = false;
-			goto out;
+			goto out_ctx;
 		}
 	}
 
 	win = ctx->gc_win;
 	if (win > 0)
 		ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
-out:
+
+out_ctx:
 	gss_put_ctx(ctx);
+out:
+	trace_rpcgss_need_reencode(task, seq_xmit, ret);
 	return ret;
 }
 
@@ -2075,10 +2074,10 @@ gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
 		status = gss_unwrap_resp_auth(cred);
 		break;
 	case RPC_GSS_SVC_INTEGRITY:
-		status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr);
+		status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
 		break;
 	case RPC_GSS_SVC_PRIVACY:
-		status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr);
+		status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
 		break;
 	}
 	if (status)
@@ -2088,8 +2087,6 @@ out_decode:
 	status = rpcauth_unwrap_resp_decode(task, xdr);
 out:
 	gss_put_ctx(ctx);
-	dprintk("RPC: %5u %s returning %d\n",
-		task->tk_pid, __func__, status);
 	return status;
 }
 
diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
new file mode 100644
index 000000000000..5576f1e66de9
--- /dev/null
+++ b/net/sunrpc/auth_gss/trace.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, 2019 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_err.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcgss.h>
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f1ec2110efeb..bc7489f1fe55 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1165,6 +1165,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 				/* Note: req is added _before_ pos */
 				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
 				INIT_LIST_HEAD(&req->rq_xmit2);
+				trace_xprt_enq_xmit(task, 1);
 				goto out;
 			}
 		} else if (RPC_IS_SWAPPER(task)) {
@@ -1176,6 +1177,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 				/* Note: req is added _before_ pos */
 				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
 				INIT_LIST_HEAD(&req->rq_xmit2);
+				trace_xprt_enq_xmit(task, 2);
 				goto out;
 			}
 		} else if (!req->rq_seqno) {
@@ -1184,11 +1186,13 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 					continue;
 				list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
 				INIT_LIST_HEAD(&req->rq_xmit);
+				trace_xprt_enq_xmit(task, 3);
 				goto out;
 			}
 		}
 		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
 		INIT_LIST_HEAD(&req->rq_xmit2);
+		trace_xprt_enq_xmit(task, 4);
 out:
 		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 		spin_unlock(&xprt->queue_lock);
@@ -1313,8 +1317,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 	int is_retrans = RPC_WAS_SENT(task);
 	int status;
 
-	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
-
 	if (!req->rq_bytes_sent) {
 		if (xprt_request_data_received(task)) {
 			status = 0;
@@ -1336,9 +1338,9 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
 	connect_cookie = xprt->connect_cookie;
 	status = xprt->ops->send_request(req);
-	trace_xprt_transmit(xprt, req->rq_xid, status);
 	if (status != 0) {
 		req->rq_ntrans--;
+		trace_xprt_transmit(req, status);
 		return status;
 	}
 
@@ -1347,7 +1349,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
 	xprt_inject_disconnect(xprt);
 
-	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
@@ -1360,6 +1361,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 
 	req->rq_connect_cookie = connect_cookie;
 out_dequeue:
+	trace_xprt_transmit(req, status);
 	xprt_request_dequeue_transmit(task);
 	rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
 	return status;
-- 
cgit v1.2.3


From 241b1f419f0ea9966d574d7cc67377c74982a125 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:25:09 -0500
Subject: SUNRPC: Remove xdr_buf_trim()

The key action of xdr_buf_trim() is that it shortens buf->len, the
length of the xdr_buf's content. The other actions -- shortening the
head, pages, and tail components -- are actually not necessary. In
particular, changing the size of those components can corrupt the
RPC message contained in the buffer. This is an accident waiting to
happen rather than a current bug, as far as we know.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Acked-by: Bruce Fields <bfields@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h          |  1 -
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  8 +++++---
 net/sunrpc/auth_gss/svcauth_gss.c   |  2 +-
 net/sunrpc/xdr.c                    | 41 -------------------------------------
 4 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 65af6a204b75..9ee3970ba59c 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -179,7 +179,6 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
 extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 5cdde6cb703a..14a0aff0cd84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -570,14 +570,16 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	 */
 	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
 	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
-	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
-							buf->head[0].iov_len);
+	if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+	    buf->head[0].iov_len)
+		return GSS_S_FAILURE;
 	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
 	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
 
 	/* Trim off the trailing "extra count" and checksum blob */
-	xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+	buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+
 	return GSS_S_COMPLETE;
 }
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 152790ed309c..f1aabab4a4c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -896,7 +896,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
 	if (svc_getnl(&buf->head[0]) != seq)
 		goto out;
 	/* trim off the mic and padding at the end before returning */
-	xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+	buf->len -= 4 + round_up_to_quad(mic.len);
 	stat = 0;
 out:
 	kfree(mic.data);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 5f0aa53fa4ae..4bce61978062 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1139,47 +1139,6 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 }
 EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
 
-/**
- * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
- * @buf: buf to be trimmed
- * @len: number of bytes to reduce "buf" by
- *
- * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
- * that it's possible that we'll trim less than that amount if the xdr_buf is
- * too small, or if (for instance) it's all in the head and the parser has
- * already read too far into it.
- */
-void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
-{
-	size_t cur;
-	unsigned int trim = len;
-
-	if (buf->tail[0].iov_len) {
-		cur = min_t(size_t, buf->tail[0].iov_len, trim);
-		buf->tail[0].iov_len -= cur;
-		trim -= cur;
-		if (!trim)
-			goto fix_len;
-	}
-
-	if (buf->page_len) {
-		cur = min_t(unsigned int, buf->page_len, trim);
-		buf->page_len -= cur;
-		trim -= cur;
-		if (!trim)
-			goto fix_len;
-	}
-
-	if (buf->head[0].iov_len) {
-		cur = min_t(size_t, buf->head[0].iov_len, trim);
-		buf->head[0].iov_len -= cur;
-		trim -= cur;
-	}
-fix_len:
-	buf->len -= (len - trim);
-}
-EXPORT_SYMBOL_GPL(xdr_buf_trim);
-
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
 	unsigned int this_len;
-- 
cgit v1.2.3


From cf500bac8fd48b57f38ece890235923d4ed5ee91 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:25:20 -0500
Subject: SUNRPC: Introduce rpc_prepare_reply_pages()

prepare_reply_buffer() and its NFSv4 equivalents expose the details
of the RPC header and the auth slack values to upper layer
consumers, creating a layering violation, and duplicating code.

Remedy these issues by adding a new RPC client API that hides those
details from upper layers in a common helper function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs2xdr.c              | 27 +++++------------------
 fs/nfs/nfs3xdr.c              | 29 ++++++------------------
 fs/nfs/nfs4xdr.c              | 51 ++++++++++++++++++-------------------------
 include/linux/sunrpc/clnt.h   |  3 +++
 include/trace/events/sunrpc.h | 37 +++++++++++++++++++++++++++++++
 net/sunrpc/clnt.c             | 19 ++++++++++++++++
 net/sunrpc/xdr.c              |  9 ++++++++
 7 files changed, 102 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index bac3a4e2cb5d..1dcd0feda32d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -65,21 +65,6 @@
 
 static int nfs_stat_to_errno(enum nfs_stat);
 
-/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
-				 unsigned int base, unsigned int len,
-				 unsigned int bufsize)
-{
-	struct rpc_auth	*auth = req->rq_cred->cr_auth;
-	unsigned int replen;
-
-	replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
 /*
  * Encode/decode NFSv2 basic data types
  *
@@ -593,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
 	const struct nfs_readlinkargs *args = data;
 
 	encode_fhandle(xdr, args->fh);
-	prepare_reply_buffer(req, args->pages, args->pgbase,
-					args->pglen, NFS_readlinkres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->pglen, NFS_readlinkres_sz);
 }
 
 /*
@@ -629,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 	const struct nfs_pgio_args *args = data;
 
 	encode_readargs(xdr, args);
-	prepare_reply_buffer(req, args->pages, args->pgbase,
-					args->count, NFS_readres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->count, NFS_readres_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -787,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
 	const struct nfs_readdirargs *args = data;
 
 	encode_readdirargs(xdr, args);
-	prepare_reply_buffer(req, args->pages, 0,
-					args->count, NFS_readdirres_sz);
+	rpc_prepare_reply_pages(req, args->pages, 0,
+				args->count, NFS_readdirres_sz);
 }
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 4aa3ffe1800e..a54dcf4bfb1d 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -104,21 +104,6 @@ static const umode_t nfs_type2fmt[] = {
 	[NF3FIFO] = S_IFIFO,
 };
 
-/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
-				 unsigned int base, unsigned int len,
-				 unsigned int bufsize)
-{
-	struct rpc_auth	*auth = req->rq_cred->cr_auth;
-	unsigned int replen;
-
-	replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
 /*
  * Encode/decode NFSv3 basic data types
  *
@@ -910,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
 	const struct nfs3_readlinkargs *args = data;
 
 	encode_nfs_fh3(xdr, args->fh);
-	prepare_reply_buffer(req, args->pages, args->pgbase,
-					args->pglen, NFS3_readlinkres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->pglen, NFS3_readlinkres_sz);
 }
 
 /*
@@ -943,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 	unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
 
 	encode_read3args(xdr, args);
-	prepare_reply_buffer(req, args->pages, args->pgbase,
-					args->count, replen);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->count, replen);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -1236,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
 	const struct nfs3_readdirargs *args = data;
 
 	encode_readdir3args(xdr, args);
-	prepare_reply_buffer(req, args->pages, 0,
+	rpc_prepare_reply_pages(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
 }
 
@@ -1278,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
 	const struct nfs3_readdirargs *args = data;
 
 	encode_readdirplus3args(xdr, args);
-	prepare_reply_buffer(req, args->pages, 0,
+	rpc_prepare_reply_pages(req, args->pages, 0,
 				args->count, NFS3_readdirres_sz);
 }
 
@@ -1323,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 	encode_nfs_fh3(xdr, args->fh);
 	encode_uint32(xdr, args->mask);
 	if (args->mask & (NFS_ACL | NFS_DFACL)) {
-		prepare_reply_buffer(req, args->pages, 0,
+		rpc_prepare_reply_pages(req, args->pages, 0,
 					NFSACL_MAXPAGES << PAGE_SHIFT,
 					ACL3_getaclres_sz);
 		req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38a4cbc18657..d0fa18df32ea 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1016,12 +1016,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
 				struct compound_hdr *hdr)
 {
 	__be32 *p;
-	struct rpc_auth *auth = req->rq_cred->cr_auth;
 
 	/* initialize running count of expected bytes in reply.
 	 * NOTE: the replied tag SHOULD be the same is the one sent,
 	 * but this is not required as a MUST for the server to do so. */
-	hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
+	hdr->replen = 3 + hdr->taglen;
 
 	WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN);
 	encode_string(xdr, hdr->taglen, hdr->tag);
@@ -2341,9 +2340,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
 	if (args->lg_args) {
 		encode_layoutget(xdr, args->lg_args, &hdr);
-		xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-				 args->lg_args->layout.pages,
-				 0, args->lg_args->layout.pglen);
+		rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+					args->lg_args->layout.pglen,
+					hdr.replen);
 	}
 	encode_nops(&hdr);
 }
@@ -2387,9 +2386,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
 	encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
 	if (args->lg_args) {
 		encode_layoutget(xdr, args->lg_args, &hdr);
-		xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-				 args->lg_args->layout.pages,
-				 0, args->lg_args->layout.pglen);
+		rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+					args->lg_args->layout.pglen,
+					hdr.replen);
 	}
 	encode_nops(&hdr);
 }
@@ -2499,8 +2498,8 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_readlink(xdr, args, req, &hdr);
 
-	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
-			args->pgbase, args->pglen);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->pglen, hdr.replen);
 	encode_nops(&hdr);
 }
 
@@ -2520,11 +2519,8 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_readdir(xdr, args, req, &hdr);
 
-	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
-			 args->pgbase, args->count);
-	dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
-			__func__, hdr.replen << 2, args->pages,
-			args->pgbase, args->count);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->count, hdr.replen);
 	encode_nops(&hdr);
 }
 
@@ -2544,8 +2540,8 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_read(xdr, args, &hdr);
 
-	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-			 args->pages, args->pgbase, args->count);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+				args->count, hdr.replen);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	encode_nops(&hdr);
 }
@@ -2591,9 +2587,8 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_getattr(xdr, nfs4_acl_bitmap, NULL,
 			ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
 
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-		args->acl_pages, 0, args->acl_len);
-
+	rpc_prepare_reply_pages(req, args->acl_pages, 0,
+				args->acl_len, replen);
 	encode_nops(&hdr);
 }
 
@@ -2814,9 +2809,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 		encode_fs_locations(xdr, args->bitmask, &hdr);
 	}
 
-	/* Set up reply kvec to capture returned fs_locations array. */
-	xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
-			 (struct page **)&args->page, 0, PAGE_SIZE);
+	rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
+				PAGE_SIZE, replen);
 	encode_nops(&hdr);
 }
 
@@ -3018,10 +3012,8 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 
 	/* set up reply kvec. Subtract notification bitmap max size (2)
 	 * so that notification bitmap is put in xdr_buf tail */
-	xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
-			 args->pdev->pages, args->pdev->pgbase,
-			 args->pdev->pglen);
-
+	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
+				args->pdev->pglen, hdr.replen - 2);
 	encode_nops(&hdr);
 }
 
@@ -3042,9 +3034,8 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
 	encode_layoutget(xdr, args, &hdr);
 
-	xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
-	    args->layout.pages, 0, args->layout.pglen);
-
+	rpc_prepare_reply_pages(req, args->layout.pages, 0,
+				args->layout.pglen, hdr.replen);
 	encode_nops(&hdr);
 }
 
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 1c441714d569..98bc9883b230 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -169,6 +169,9 @@ int		rpcb_v4_register(struct net *net, const u32 program,
 				 const char *netid);
 void		rpcb_getport_async(struct rpc_task *);
 
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+			     unsigned int base, unsigned int len,
+			     unsigned int hdrsize);
 void		rpc_call_start(struct rpc_task *);
 int		rpc_call_async(struct rpc_clnt *clnt,
 			       const struct rpc_message *msg, int flags,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index e58dda8e038c..8451f30c6a0f 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -461,6 +461,43 @@ TRACE_EVENT(rpc_xdr_alignment,
 	)
 );
 
+TRACE_EVENT(rpc_reply_pages,
+	TP_PROTO(
+		const struct rpc_rqst *req
+	),
+
+	TP_ARGS(req),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(const void *, head_base)
+		__field(size_t, head_len)
+		__field(const void *, tail_base)
+		__field(size_t, tail_len)
+		__field(unsigned int, page_len)
+	),
+
+	TP_fast_assign(
+		__entry->task_id = req->rq_task->tk_pid;
+		__entry->client_id = req->rq_task->tk_client->cl_clid;
+
+		__entry->head_base = req->rq_rcv_buf.head[0].iov_base;
+		__entry->head_len = req->rq_rcv_buf.head[0].iov_len;
+		__entry->page_len = req->rq_rcv_buf.page_len;
+		__entry->tail_base = req->rq_rcv_buf.tail[0].iov_base;
+		__entry->tail_len = req->rq_rcv_buf.tail[0].iov_len;
+	),
+
+	TP_printk(
+		"task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n",
+		__entry->task_id, __entry->client_id,
+		__entry->head_base, __entry->head_len,
+		__entry->page_len,
+		__entry->tail_base, __entry->tail_len
+	)
+);
+
 /*
  * First define the enums in the below macros to be exported to userspace
  * via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 803e93105af1..f780605fffe0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1164,6 +1164,25 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 }
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
+/**
+ * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
+ * @req: RPC request to prepare
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ * @hdrsize: expected size of upper layer reply header, in XDR words
+ *
+ */
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+			     unsigned int base, unsigned int len,
+			     unsigned int hdrsize)
+{
+	hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
+	xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
+	trace_rpc_reply_pages(req);
+}
+EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
+
 void
 rpc_call_start(struct rpc_task *task)
 {
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4bce61978062..7cca51560442 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -163,6 +163,15 @@ xdr_free_bvec(struct xdr_buf *buf)
 	buf->bvec = NULL;
 }
 
+/**
+ * xdr_inline_pages - Prepare receive buffer for a large reply
+ * @xdr: xdr_buf into which reply will be placed
+ * @offset: expected offset where data payload will start, in bytes
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ *
+ */
 void
 xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 		 struct page **pages, unsigned int base, unsigned int len)
-- 
cgit v1.2.3


From 6a47b4da551a762217215aeeda22e46469c5868a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 14 Feb 2019 11:38:05 +0200
Subject: regulator: add regulator_desc_list_voltage_linear_range

Add regulator_desc_list_voltage_linear_range which can be used
by drivers for getting the voltages before regulator is registered.
This may be useful for drivers which need to fetch the voltage
selectors at device-tree parsing callback.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Mark Brown <broonie@kernel.org>
Tested-by: Angus Ainslie <angus@akkea.ca>
Reviewed-by: Angus Ainslie <angus@akkea.ca>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c      | 39 +++++++++++++++++++++++++++++----------
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 35 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 5686a1335bd3..68ac6017ef28 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -594,28 +594,30 @@ int regulator_list_voltage_pickable_linear_range(struct regulator_dev *rdev,
 EXPORT_SYMBOL_GPL(regulator_list_voltage_pickable_linear_range);
 
 /**
- * regulator_list_voltage_linear_range - List voltages for linear ranges
+ * regulator_desc_list_voltage_linear_range - List voltages for linear ranges
  *
- * @rdev: Regulator device
+ * @desc: Regulator desc for regulator which volatges are to be listed
  * @selector: Selector to convert into a voltage
  *
  * Regulators with a series of simple linear mappings between voltages
- * and selectors can set linear_ranges in the regulator descriptor and
- * then use this function as their list_voltage() operation,
+ * and selectors who have set linear_ranges in the regulator descriptor
+ * can use this function prior regulator registration to list voltages.
+ * This is useful when voltages need to be listed during device-tree
+ * parsing.
  */
-int regulator_list_voltage_linear_range(struct regulator_dev *rdev,
-					unsigned int selector)
+int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc,
+					     unsigned int selector)
 {
 	const struct regulator_linear_range *range;
 	int i;
 
-	if (!rdev->desc->n_linear_ranges) {
-		BUG_ON(!rdev->desc->n_linear_ranges);
+	if (!desc->n_linear_ranges) {
+		BUG_ON(!desc->n_linear_ranges);
 		return -EINVAL;
 	}
 
-	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
-		range = &rdev->desc->linear_ranges[i];
+	for (i = 0; i < desc->n_linear_ranges; i++) {
+		range = &desc->linear_ranges[i];
 
 		if (!(selector >= range->min_sel &&
 		      selector <= range->max_sel))
@@ -628,6 +630,23 @@ int regulator_list_voltage_linear_range(struct regulator_dev *rdev,
 
 	return -EINVAL;
 }
+EXPORT_SYMBOL_GPL(regulator_desc_list_voltage_linear_range);
+
+/**
+ * regulator_list_voltage_linear_range - List voltages for linear ranges
+ *
+ * @rdev: Regulator device
+ * @selector: Selector to convert into a voltage
+ *
+ * Regulators with a series of simple linear mappings between voltages
+ * and selectors can set linear_ranges in the regulator descriptor and
+ * then use this function as their list_voltage() operation,
+ */
+int regulator_list_voltage_linear_range(struct regulator_dev *rdev,
+					unsigned int selector)
+{
+	return regulator_desc_list_voltage_linear_range(rdev->desc, selector);
+}
 EXPORT_SYMBOL_GPL(regulator_list_voltage_linear_range);
 
 /**
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 7f8345bff4e1..05efe2b057c1 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -539,4 +539,10 @@ void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 void regulator_lock(struct regulator_dev *rdev);
 void regulator_unlock(struct regulator_dev *rdev);
 
+/*
+ * Helper functions intended to be used by regulator drivers prior registering
+ * their regulators.
+ */
+int regulator_desc_list_voltage_linear_range(const struct regulator_desc *desc,
+					     unsigned int selector);
 #endif
-- 
cgit v1.2.3


From 41cb8d189c9d4964df52a6f497cab7b301ae831b Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Mon, 14 Jan 2019 16:44:59 +0530
Subject: PCI: endpoint: Add new pci_epc_ops to get EPC features

Add a new pci_epc_ops ->get_features() to get the features
supported by the EPC. Since EPC can provide different features to
different functions, the ->get_features() ops takes _func_no_ as
an argument.

Tested-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/pci/endpoint/pci-epc-core.c | 30 ++++++++++++++++++++++++++++++
 include/linux/pci-epc.h             | 22 ++++++++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 094dcc3203b8..5a099479d9ab 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -83,6 +83,36 @@ err:
 }
 EXPORT_SYMBOL_GPL(pci_epc_get);
 
+/**
+ * pci_epc_get_features() - get the features supported by EPC
+ * @epc: the features supported by *this* EPC device will be returned
+ * @func_no: the features supported by the EPC device specific to the
+ *	     endpoint function with func_no will be returned
+ *
+ * Invoke to get the features provided by the EPC which may be
+ * specific to an endpoint function. Returns pci_epc_features on success
+ * and NULL for any failures.
+ */
+const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
+						    u8 func_no)
+{
+	const struct pci_epc_features *epc_features;
+	unsigned long flags;
+
+	if (IS_ERR_OR_NULL(epc) || func_no >= epc->max_functions)
+		return NULL;
+
+	if (!epc->ops->get_features)
+		return NULL;
+
+	spin_lock_irqsave(&epc->lock, flags);
+	epc_features = epc->ops->get_features(epc, func_no);
+	spin_unlock_irqrestore(&epc->lock, flags);
+
+	return epc_features;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_features);
+
 /**
  * pci_epc_stop() - stop the PCI link
  * @epc: the link of the EPC device that has to be stopped
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 37dab8116901..79fbcf94e14d 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -59,6 +59,8 @@ struct pci_epc_ops {
 			     enum pci_epc_irq_type type, u16 interrupt_num);
 	int	(*start)(struct pci_epc *epc);
 	void	(*stop)(struct pci_epc *epc);
+	const struct pci_epc_features* (*get_features)(struct pci_epc *epc,
+						       u8 func_no);
 	struct module *owner;
 };
 
@@ -100,6 +102,24 @@ struct pci_epc {
 	unsigned int			features;
 };
 
+/**
+ * struct pci_epc_features - features supported by a EPC device per function
+ * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up
+ * @msi_capable: indicate if the endpoint function has MSI capability
+ * @msix_capable: indicate if the endpoint function has MSI-X capability
+ * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver
+ * @bar_fixed_64bit: bitmap to indicate fixed 64bit BARs
+ * @bar_fixed_size: Array specifying the size supported by each BAR
+ */
+struct pci_epc_features {
+	unsigned int	linkup_notifier : 1;
+	unsigned int	msi_capable : 1;
+	unsigned int	msix_capable : 1;
+	u8	reserved_bar;
+	u8	bar_fixed_64bit;
+	u64	bar_fixed_size[BAR_5 + 1];
+};
+
 #define EPC_FEATURE_NO_LINKUP_NOTIFIER		BIT(0)
 #define EPC_FEATURE_BAR_MASK			(BIT(1) | BIT(2) | BIT(3))
 #define EPC_FEATURE_MSIX_AVAILABLE		BIT(4)
@@ -158,6 +178,8 @@ int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
 		      enum pci_epc_irq_type type, u16 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
+const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
+						    u8 func_no);
 struct pci_epc *pci_epc_get(const char *epc_name);
 void pci_epc_put(struct pci_epc *epc);
 
-- 
cgit v1.2.3


From 76d9c68b360f852e784170f10cb431e4713c7d0b Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 14 Feb 2019 16:45:55 +0100
Subject: ASoC: dmaengine: Remove unused
 SND_DMAENGINE_PCM_FLAG_CUSTOM_CHANNEL_NAME flag

There is now no users of this flag so remove it together with
related code.  The chan_name field of snd_dmaengine_dai_dma_data
data structure is not removed as it is still in use by the PXA
platform.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/dmaengine_pcm.h         |  4 ----
 sound/soc/soc-generic-dmaengine-pcm.c | 21 ++++-----------------
 2 files changed, 4 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h
index 2c4cfaa135a6..c679f6116580 100644
--- a/include/sound/dmaengine_pcm.h
+++ b/include/sound/dmaengine_pcm.h
@@ -99,10 +99,6 @@ void snd_dmaengine_pcm_set_config_from_dai_data(
  * playback.
  */
 #define SND_DMAENGINE_PCM_FLAG_HALF_DUPLEX BIT(3)
-/*
- * The PCM streams have custom channel names specified.
- */
-#define SND_DMAENGINE_PCM_FLAG_CUSTOM_CHANNEL_NAME BIT(4)
 
 /**
  * struct snd_dmaengine_pcm_config - Configuration data for dmaengine based PCM
diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c
index 1b44e363c50c..f1ab6285a085 100644
--- a/sound/soc/soc-generic-dmaengine-pcm.c
+++ b/sound/soc/soc-generic-dmaengine-pcm.c
@@ -265,7 +265,6 @@ static int dmaengine_pcm_new(struct snd_soc_pcm_runtime *rtd)
 	struct dmaengine_pcm *pcm = soc_component_to_pcm(component);
 	const struct snd_dmaengine_pcm_config *config = pcm->config;
 	struct device *dev = component->dev;
-	struct snd_dmaengine_dai_dma_data *dma_data;
 	struct snd_pcm_substream *substream;
 	size_t prealloc_buffer_size;
 	size_t max_buffer_size;
@@ -285,19 +284,9 @@ static int dmaengine_pcm_new(struct snd_soc_pcm_runtime *rtd)
 		if (!substream)
 			continue;
 
-		dma_data = snd_soc_dai_get_dma_data(rtd->cpu_dai, substream);
-
-		if (!pcm->chan[i] &&
-		    ((pcm->flags & SND_DMAENGINE_PCM_FLAG_CUSTOM_CHANNEL_NAME) ||
-		     (config && config->chan_names[i]))) {
-			const char *chan_name = dma_data->chan_name;
-
-			if (config && config->chan_names[i])
-				chan_name = config->chan_names[i];
-
+		if (!pcm->chan[i] && config && config->chan_names[i])
 			pcm->chan[i] = dma_request_slave_channel(dev,
-				chan_name);
-		}
+				config->chan_names[i]);
 
 		if (!pcm->chan[i] && (pcm->flags & SND_DMAENGINE_PCM_FLAG_COMPAT)) {
 			pcm->chan[i] = dmaengine_pcm_compat_request_channel(rtd,
@@ -420,10 +409,8 @@ static int dmaengine_pcm_request_chan_of(struct dmaengine_pcm *pcm,
 	const char *name;
 	struct dma_chan *chan;
 
-	if ((pcm->flags & (SND_DMAENGINE_PCM_FLAG_NO_DT |
-			   SND_DMAENGINE_PCM_FLAG_CUSTOM_CHANNEL_NAME)) ||
-	     (!dev->of_node && !(config && config->dma_dev &&
-				config->dma_dev->of_node)))
+	if ((pcm->flags & SND_DMAENGINE_PCM_FLAG_NO_DT) || (!dev->of_node &&
+	    !(config && config->dma_dev && config->dma_dev->of_node)))
 		return 0;
 
 	if (config && config->dma_dev) {
-- 
cgit v1.2.3


From a00275baa68e1ee226cc659f54dc3a571f3ad600 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:25:31 -0500
Subject: SUNRPC: Make AUTH_SYS and AUTH_NULL set au_verfsize

au_verfsize will be needed for a non-flavor-specific computation
in a subsequent patch.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    | 3 +--
 net/sunrpc/auth_gss/auth_gss.c | 1 +
 net/sunrpc/auth_null.c         | 1 +
 net/sunrpc/auth_unix.c         | 5 ++++-
 4 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index c51e1893f77e..359dfdd04e77 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -77,8 +77,7 @@ struct rpc_auth {
 				/* guess at number of u32's auth adds before
 				 * reply data; normally the verifier size: */
 	unsigned int		au_rslack;
-				/* for gss, used to calculate au_rslack: */
-	unsigned int		au_verfsize;
+	unsigned int		au_verfsize;	/* size of reply verifier */
 
 	unsigned int		au_flags;	/* various flags */
 	const struct rpc_authops *au_ops;		/* operations */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fda454c9b594..731e7a482e18 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1016,6 +1016,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	auth = &gss_auth->rpc_auth;
 	auth->au_cslack = GSS_CRED_SLACK >> 2;
 	auth->au_rslack = GSS_VERF_SLACK >> 2;
+	auth->au_verfsize = GSS_VERF_SLACK >> 2;
 	auth->au_flags = 0;
 	auth->au_ops = &authgss_ops;
 	auth->au_flavor = flavor;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index bf96975ffc4b..9ae08248a9e1 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -114,6 +114,7 @@ static
 struct rpc_auth null_auth = {
 	.au_cslack	= NUL_CALLSLACK,
 	.au_rslack	= NUL_REPLYSLACK,
+	.au_verfsize	= NUL_REPLYSLACK,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
 	.au_count	= REFCOUNT_INIT(1),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 5ea84a96f96e..a93c56442487 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -163,6 +163,7 @@ unx_refresh(struct rpc_task *task)
 static int
 unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 {
+	struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
 	__be32 *p;
 	u32 size;
 
@@ -184,7 +185,8 @@ unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 	if (!p)
 		return -EIO;
 
-	task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
+	auth->au_verfsize = XDR_QUADLEN(size) + 2;
+	auth->au_rslack = XDR_QUADLEN(size) + 2;
 	return 0;
 }
 
@@ -212,6 +214,7 @@ static
 struct rpc_auth		unix_auth = {
 	.au_cslack	= UNX_CALLSLACK,
 	.au_rslack	= NUL_REPLYSLACK,
+	.au_verfsize	= NUL_REPLYSLACK,
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
 	.au_count	= REFCOUNT_INIT(1),
-- 
cgit v1.2.3


From 35e77d21baa04b554bf3dc9a08dfa7e569286e51 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 11 Feb 2019 11:25:36 -0500
Subject: SUNRPC: Add rpc_auth::au_ralign field

Currently rpc_inline_rcv_pages() uses au_rslack to estimate the
size of the upper layer reply header. This is fine for auth flavors
where au_verfsize == au_rslack.

However, some auth flavors have more going on. krb5i for example has
two more words after the verifier, and another blob following the
RPC message. The calculation involving au_rslack pushes the upper
layer reply header too far into the rcv_buf.

au_rslack is still valuable: it's the amount of buffer space needed
for the reply, and is used when allocating the reply buffer. We'll
keep that.

But, add a new field that can be used to properly estimate the
location of the upper layer header in each RPC reply, based on the
auth flavor in use.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/auth.h    |  9 ++++-----
 net/sunrpc/auth_gss/auth_gss.c | 18 +++++++++++++-----
 net/sunrpc/auth_null.c         |  1 +
 net/sunrpc/auth_unix.c         |  1 +
 net/sunrpc/clnt.c              |  2 +-
 5 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 359dfdd04e77..5f9076fdb090 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -74,13 +74,12 @@ struct rpc_cred_cache;
 struct rpc_authops;
 struct rpc_auth {
 	unsigned int		au_cslack;	/* call cred size estimate */
-				/* guess at number of u32's auth adds before
-				 * reply data; normally the verifier size: */
-	unsigned int		au_rslack;
+	unsigned int		au_rslack;	/* reply cred size estimate */
 	unsigned int		au_verfsize;	/* size of reply verifier */
+	unsigned int		au_ralign;	/* words before UL header */
 
-	unsigned int		au_flags;	/* various flags */
-	const struct rpc_authops *au_ops;		/* operations */
+	unsigned int		au_flags;
+	const struct rpc_authops *au_ops;
 	rpc_authflavor_t	au_flavor;	/* pseudoflavor (note may
 						 * differ from the flavor in
 						 * au_ops->au_flavor in gss
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 731e7a482e18..c67e2ad151ae 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1017,6 +1017,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	auth->au_cslack = GSS_CRED_SLACK >> 2;
 	auth->au_rslack = GSS_VERF_SLACK >> 2;
 	auth->au_verfsize = GSS_VERF_SLACK >> 2;
+	auth->au_ralign = GSS_VERF_SLACK >> 2;
 	auth->au_flags = 0;
 	auth->au_ops = &authgss_ops;
 	auth->au_flavor = flavor;
@@ -1891,7 +1892,10 @@ out:
 static int
 gss_unwrap_resp_auth(struct rpc_cred *cred)
 {
-	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize;
+	struct rpc_auth *auth = cred->cr_auth;
+
+	auth->au_rslack = auth->au_verfsize;
+	auth->au_ralign = auth->au_verfsize;
 	return 0;
 }
 
@@ -1902,6 +1906,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
 {
 	struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
 	u32 data_offset, mic_offset, integ_len, maj_stat;
+	struct rpc_auth *auth = cred->cr_auth;
 	struct xdr_netobj mic;
 	__be32 *p;
 
@@ -1928,8 +1933,8 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
 	if (maj_stat != GSS_S_COMPLETE)
 		goto bad_mic;
 
-	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
-				   1 + XDR_QUADLEN(mic.len);
+	auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
+	auth->au_ralign = auth->au_verfsize + 2;
 	return 0;
 unwrap_failed:
 	trace_rpcgss_unwrap_failed(task);
@@ -1949,6 +1954,7 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
 {
 	struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
 	struct kvec *head = rqstp->rq_rcv_buf.head;
+	struct rpc_auth *auth = cred->cr_auth;
 	unsigned int savedlen = rcv_buf->len;
 	u32 offset, opaque_len, maj_stat;
 	__be32 *p;
@@ -1976,8 +1982,10 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
 	 */
 	xdr_init_decode(xdr, rcv_buf, p, rqstp);
 
-	cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
-				   XDR_QUADLEN(savedlen - rcv_buf->len);
+	auth->au_rslack = auth->au_verfsize + 2 +
+			  XDR_QUADLEN(savedlen - rcv_buf->len);
+	auth->au_ralign = auth->au_verfsize + 2 +
+			  XDR_QUADLEN(savedlen - rcv_buf->len);
 	return 0;
 unwrap_failed:
 	trace_rpcgss_unwrap_failed(task);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 9ae08248a9e1..41a633a4049e 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -115,6 +115,7 @@ struct rpc_auth null_auth = {
 	.au_cslack	= NUL_CALLSLACK,
 	.au_rslack	= NUL_REPLYSLACK,
 	.au_verfsize	= NUL_REPLYSLACK,
+	.au_ralign	= NUL_REPLYSLACK,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
 	.au_count	= REFCOUNT_INIT(1),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index a93c56442487..c048eb6deaaf 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -187,6 +187,7 @@ unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
 
 	auth->au_verfsize = XDR_QUADLEN(size) + 2;
 	auth->au_rslack = XDR_QUADLEN(size) + 2;
+	auth->au_ralign = XDR_QUADLEN(size) + 2;
 	return 0;
 }
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4ea38b029e2f..99bfeb17367c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1180,7 +1180,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
 	/* Subtract one to force an extra word of buffer space for the
 	 * payload's XDR pad to fall into the rcv_buf's tail iovec.
 	 */
-	hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
+	hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
 
 	xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
 	trace_rpc_reply_pages(req);
-- 
cgit v1.2.3


From ca5e9aba753ed15d173c7a7b88e4d402b7ca8121 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Tue, 12 Feb 2019 19:26:03 -0800
Subject: time: Add time_types.h

sys/time.h is the mandated include for many time related
defines. However, linux/time.h overlaps sys/time.h
significantly and this makes including both from userspace
or one from the other impossible.

This also means that userspace can get away with including
sys/time.h whenever it needs linux/time.h and this is what's
been happening in the user world usually.

But, we have new data types that we plan to use in the uapi time
interfaces also defined in the linux/time.h. But, we are unable
to use these types when sys/time.h is included.

Hence, move the new types to a new header, time_types.h.
We intend to eventually have all the uapi defines that the kernel
uses defined in this header.
Note that the plan is to replace uapi interfaces with timeval to
use __kernel_old_timeval, timespec to use __kernel_old_timespec etc.

Reported-by: Ran Rozenstein <ranro@mellanox.com>
Fixes: 9718475e6908 ("socket: Add SO_TIMESTAMPING_NEW")
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/time.h       | 36 +-----------------------------------
 include/uapi/linux/time_types.h | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 35 deletions(-)
 create mode 100644 include/uapi/linux/time_types.h

(limited to 'include')

diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index b8ad1b86b942..958932effc5e 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -3,7 +3,7 @@
 #define _UAPI_LINUX_TIME_H
 
 #include <linux/types.h>
-
+#include <linux/time_types.h>
 
 #ifndef _STRUCT_TIMESPEC
 #define _STRUCT_TIMESPEC
@@ -23,7 +23,6 @@ struct timezone {
 	int	tz_dsttime;	/* type of dst correction */
 };
 
-
 /*
  * Names of the interval timers, and structure
  * defining a timer setting:
@@ -42,39 +41,6 @@ struct itimerval {
 	struct timeval it_value;	/* current value */
 };
 
-#ifndef __kernel_timespec
-struct __kernel_timespec {
-	__kernel_time64_t       tv_sec;                 /* seconds */
-	long long               tv_nsec;                /* nanoseconds */
-};
-#endif
-
-#ifndef __kernel_itimerspec
-struct __kernel_itimerspec {
-	struct __kernel_timespec it_interval;    /* timer period */
-	struct __kernel_timespec it_value;       /* timer expiration */
-};
-#endif
-
-/*
- * legacy timeval structure, only embedded in structures that
- * traditionally used 'timeval' to pass time intervals (not absolute
- * times). Do not add new users. If user space fails to compile
- * here, this is probably because it is not y2038 safe and needs to
- * be changed to use another interface.
- */
-#ifndef __kernel_old_timeval
-struct __kernel_old_timeval {
-	__kernel_long_t tv_sec;
-	__kernel_long_t tv_usec;
-};
-#endif
-
-struct __kernel_sock_timeval {
-	__s64 tv_sec;
-	__s64 tv_usec;
-};
-
 /*
  * The IDs of the various system clocks (for POSIX.1b interval timers):
  */
diff --git a/include/uapi/linux/time_types.h b/include/uapi/linux/time_types.h
new file mode 100644
index 000000000000..459070c61d47
--- /dev/null
+++ b/include/uapi/linux/time_types.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_TIME_TYPES_H
+#define _UAPI_LINUX_TIME_TYPES_H
+
+#include <linux/types.h>
+
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+	__kernel_time64_t       tv_sec;                 /* seconds */
+	long long               tv_nsec;                /* nanoseconds */
+};
+#endif
+
+#ifndef __kernel_itimerspec
+struct __kernel_itimerspec {
+	struct __kernel_timespec it_interval;    /* timer period */
+	struct __kernel_timespec it_value;       /* timer expiration */
+};
+#endif
+
+/*
+ * legacy timeval structure, only embedded in structures that
+ * traditionally used 'timeval' to pass time intervals (not absolute
+ * times). Do not add new users. If user space fails to compile
+ * here, this is probably because it is not y2038 safe and needs to
+ * be changed to use another interface.
+ */
+#ifndef __kernel_old_timeval
+struct __kernel_old_timeval {
+	__kernel_long_t tv_sec;
+	__kernel_long_t tv_usec;
+};
+#endif
+
+struct __kernel_sock_timeval {
+	__s64 tv_sec;
+	__s64 tv_usec;
+};
+
+#endif /* _UAPI_LINUX_TIME_TYPES_H */
-- 
cgit v1.2.3


From 460a2db0273efe15488a3e4b88da2678eb403178 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Tue, 12 Feb 2019 19:26:04 -0800
Subject: errqueue.h: Include time_types.h

Now that we have a separate header for struct __kernel_timespec,
include it directly without relying on userspace to do it.

Reported-by: Ran Rozenstein <ranro@mellanox.com>
Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/errqueue.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index d955b9e32288..28491dac074b 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -3,6 +3,7 @@
 #define _UAPI_LINUX_ERRQUEUE_H
 
 #include <linux/types.h>
+#include <linux/time_types.h>
 
 struct sock_extended_err {
 	__u32	ee_errno;	
-- 
cgit v1.2.3


From 2c7e4928b35660e2147d14d5e42849c22f44b55f Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@cern.ch>
Date: Thu, 14 Feb 2019 09:51:33 +0100
Subject: i2c: ocores: add SPDX tag

It adds the SPDX tag and it removes the old text about the GPLv2.

Signed-off-by: Federico Vaga <federico.vaga@cern.ch>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-ocores.c          | 5 +----
 include/linux/platform_data/i2c-ocores.h | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 5dea7b9ab7e5..78085a88d866 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * i2c-ocores.c: I2C bus driver for OpenCores I2C controller
  * (https://opencores.org/project/i2c/overview)
@@ -6,10 +7,6 @@
  *
  * Support for the GRLIB port of the controller by
  * Andreas Larsson <andreas@gaisler.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #include <linux/clk.h>
diff --git a/include/linux/platform_data/i2c-ocores.h b/include/linux/platform_data/i2c-ocores.h
index 113d6b12f650..8c416ff8affd 100644
--- a/include/linux/platform_data/i2c-ocores.h
+++ b/include/linux/platform_data/i2c-ocores.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * i2c-ocores.h - definitions for the i2c-ocores interface
  *
  * Peter Korsgaard <peter@korsgaard.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2.  This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
  */
 
 #ifndef _LINUX_I2C_OCORES_H
-- 
cgit v1.2.3


From 237b5f66e1ed8a58662f29bcd04442953cdb8b55 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 14 Feb 2019 04:24:50 +0100
Subject: i2c: ocores: Add support for bus clock via platform data

Add the I2C bus clock speed to the platform data structure.
If not set, default to 100KHz as before.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-ocores.c          | 5 ++++-
 include/linux/platform_data/i2c-ocores.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index 0d90a82a2c03..4eea18689e99 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -628,7 +628,10 @@ static int ocores_i2c_probe(struct platform_device *pdev)
 		i2c->reg_shift = pdata->reg_shift;
 		i2c->reg_io_width = pdata->reg_io_width;
 		i2c->ip_clock_khz = pdata->clock_khz;
-		i2c->bus_clock_khz = 100;
+		if (pdata->bus_khz)
+			i2c->bus_clock_khz = pdata->bus_khz;
+		else
+			i2c->bus_clock_khz = 100;
 	} else {
 		ret = ocores_i2c_of_probe(pdev, i2c);
 		if (ret)
diff --git a/include/linux/platform_data/i2c-ocores.h b/include/linux/platform_data/i2c-ocores.h
index 8c416ff8affd..e6326cbafe59 100644
--- a/include/linux/platform_data/i2c-ocores.h
+++ b/include/linux/platform_data/i2c-ocores.h
@@ -12,6 +12,7 @@ struct ocores_i2c_platform_data {
 	u32 reg_shift; /* register offset shift value */
 	u32 reg_io_width; /* register io read/write width */
 	u32 clock_khz; /* input clock in kHz */
+	u32 bus_khz; /* bus clock in kHz */
 	bool big_endian; /* registers are big endian */
 	u8 num_devices; /* number of devices in the devices list */
 	struct i2c_board_info const *devices; /* devices connected to the bus */
-- 
cgit v1.2.3


From d8434c31378d5f2009741fa171bd1143c77a6e4a Mon Sep 17 00:00:00 2001
From: John Sperbeck <jsperbeck@google.com>
Date: Tue, 12 Feb 2019 19:40:57 -0800
Subject: i2c: core-smbus: don't trace smbus_reply data on errors

If an smbus transfer fails, there's no guarantee that the output
buffer was written.  So, avoid trying to show the output buffer when
tracing after an error.  This was 'mostly harmless', but would trip
up kasan checking if left-over cruft in byte 0 is a large length,
causing us to read from unwritten memory.

Signed-off-by: John Sperbeck <jsperbeck@google.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-smbus.c | 2 +-
 include/trace/events/smbus.h | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c
index 9cd66cabb84f..132119112596 100644
--- a/drivers/i2c/i2c-core-smbus.c
+++ b/drivers/i2c/i2c-core-smbus.c
@@ -585,7 +585,7 @@ s32 __i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 trace:
 	/* If enabled, the reply tracepoint is conditional on read_write. */
 	trace_smbus_reply(adapter, addr, flags, read_write,
-			  command, protocol, data);
+			  command, protocol, data, res);
 	trace_smbus_result(adapter, addr, flags, read_write,
 			   command, protocol, res);
 
diff --git a/include/trace/events/smbus.h b/include/trace/events/smbus.h
index d2fb6e1d3e10..a4892a187842 100644
--- a/include/trace/events/smbus.h
+++ b/include/trace/events/smbus.h
@@ -138,9 +138,9 @@ TRACE_EVENT_CONDITION(smbus_reply,
 	TP_PROTO(const struct i2c_adapter *adap,
 		 u16 addr, unsigned short flags,
 		 char read_write, u8 command, int protocol,
-		 const union i2c_smbus_data *data),
-	TP_ARGS(adap, addr, flags, read_write, command, protocol, data),
-	TP_CONDITION(read_write == I2C_SMBUS_READ),
+		 const union i2c_smbus_data *data, int res),
+	TP_ARGS(adap, addr, flags, read_write, command, protocol, data, res),
+	TP_CONDITION(res >= 0 && read_write == I2C_SMBUS_READ),
 	TP_STRUCT__entry(
 		__field(int,	adapter_nr		)
 		__field(__u16,	addr			)
-- 
cgit v1.2.3


From 20bbf22a622178db71fb8bea5f9000d6f346185a Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:32 -0800
Subject: net/mlx5: Use void pointer as the type in address_of macro

Better to use void * and avoid unnecessary casts.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Daniel Jurgens <danielj@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0845a227a7b2..46223efa1877 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -67,7 +67,7 @@
 #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
-#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+#define MLX5_ADDR_OF(typ, p, fld) ((void *)((uint8_t *)(p) + MLX5_BYTE_OFF(typ, fld)))
 
 /* insert a value to a struct */
 #define MLX5_SET(typ, p, fld, v) do { \
-- 
cgit v1.2.3


From 7e4c4330a3bc7f34f82b5bc370821e1e16d425fb Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:33 -0800
Subject: net/mlx5: Use consistent vport num argument type

Use u16 for vport number, which matches how hardware refers to this
argument throughout commands.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 32 +++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   |  8 +++---
 include/linux/mlx5/vport.h                        |  8 +++---
 3 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index a44ea7b85614..d7382892e81c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -52,7 +52,7 @@ enum {
 struct vport_addr {
 	struct l2addr_node     node;
 	u8                     action;
-	u32                    vport;
+	u16                    vport;
 	struct mlx5_flow_handle *flow_rule;
 	bool mpfs; /* UC MAC was added to MPFs */
 	/* A flag indicating that mac was added due to mc promiscuous vport */
@@ -115,7 +115,7 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
-static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
+static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 				  u16 vlan, u8 qos, u8 set_flags)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
@@ -152,7 +152,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
 
 /* E-Switch FDB */
 static struct mlx5_flow_handle *
-__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
+__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
 			 u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
 	int match_header = (is_zero_ether_addr(mac_c) ? 0 :
@@ -215,7 +215,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
+esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 
@@ -224,7 +224,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 	u8 mac_v[ETH_ALEN];
@@ -237,7 +237,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 }
 
 static struct mlx5_flow_handle *
-esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
+esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u16 vport)
 {
 	u8 mac_c[ETH_ALEN];
 	u8 mac_v[ETH_ALEN];
@@ -377,7 +377,7 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
 static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 	int err;
 
 	/* Skip mlx5_mpfs_add_mac for PFs,
@@ -409,7 +409,7 @@ fdb_add:
 static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 	int err = 0;
 
 	/* Skip mlx5_mpfs_del_mac for PFs,
@@ -438,7 +438,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 				   struct esw_mc_addr *esw_mc)
 {
 	u8 *mac = vaddr->node.addr;
-	u32 vport_idx = 0;
+	u16 vport_idx = 0;
 
 	for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) {
 		struct mlx5_vport *vport = &esw->vports[vport_idx];
@@ -485,7 +485,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	struct hlist_head *hash = esw->mc_table;
 	struct esw_mc_addr *esw_mc;
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 
 	if (!esw->fdb_table.legacy.fdb)
 		return 0;
@@ -525,7 +525,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	struct hlist_head *hash = esw->mc_table;
 	struct esw_mc_addr *esw_mc;
 	u8 *mac = vaddr->node.addr;
-	u32 vport = vaddr->vport;
+	u16 vport = vaddr->vport;
 
 	if (!esw->fdb_table.legacy.fdb)
 		return 0;
@@ -564,7 +564,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 
 /* Apply vport UC/MC list to HW l2 table and FDB table */
 static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
-				      u32 vport_num, int list_type)
+				      u16 vport_num, int list_type)
 {
 	struct mlx5_vport *vport = &esw->vports[vport_num];
 	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
@@ -599,7 +599,7 @@ static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
 
 /* Sync vport UC/MC list from vport context */
 static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
-				       u32 vport_num, int list_type)
+				       u16 vport_num, int list_type)
 {
 	struct mlx5_vport *vport = &esw->vports[vport_num];
 	bool is_uc = list_type == MLX5_NVPRT_LIST_TYPE_UC;
@@ -686,7 +686,7 @@ out:
 /* Sync vport UC/MC list from vport context
  * Must be called after esw_update_vport_addr_list
  */
-static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	struct mlx5_vport *vport = &esw->vports[vport_num];
 	struct l2addr_node *node;
@@ -721,7 +721,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num)
 }
 
 /* Apply vport rx mode to HW FDB table */
-static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
+static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num,
 				    bool promisc, bool mc_promisc)
 {
 	struct esw_mc_addr *allmulti_addr = &esw->mc_promisc;
@@ -764,7 +764,7 @@ promisc:
 }
 
 /* Sync vport rx mode from vport context */
-static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num)
+static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	struct mlx5_vport *vport = &esw->vports[vport_num];
 	int promisc_all = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 9b150ce9d315..9a928eb48522 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -255,7 +255,7 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mtu);
 
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
-				  u32 vport,
+				  u16 vport,
 				  enum mlx5_list_type list_type,
 				  u8 addr_list[][ETH_ALEN],
 				  int *list_size)
@@ -373,7 +373,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_list);
 
 int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
-			       u32 vport,
+			       u16 vport,
 			       u16 vlans[],
 			       int *size)
 {
@@ -526,7 +526,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
 
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
-				    u32 vport, u64 node_guid)
+				    u16 vport, u64 node_guid)
 {
 	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
 	void *nic_vport_context;
@@ -827,7 +827,7 @@ int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 EXPORT_SYMBOL_GPL(mlx5_query_hca_vport_node_guid);
 
 int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
-				 u32 vport,
+				 u16 vport,
 				 int *promisc_uc,
 				 int *promisc_mc,
 				 int *promisc_all)
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 9c694808c212..1654b911cdb2 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -60,7 +60,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
 					   u64 *system_image_guid);
 int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
 int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
-				    u32 vport, u64 node_guid);
+				    u16 vport, u64 node_guid);
 int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
 					u16 *qkey_viol_cntr);
 int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
@@ -78,7 +78,7 @@ int mlx5_query_hca_vport_system_image_guid(struct mlx5_core_dev *dev,
 int mlx5_query_hca_vport_node_guid(struct mlx5_core_dev *dev,
 				   u64 *node_guid);
 int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
-				  u32 vport,
+				  u16 vport,
 				  enum mlx5_list_type list_type,
 				  u8 addr_list[][ETH_ALEN],
 				  int *list_size);
@@ -87,7 +87,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 				   u8 addr_list[][ETH_ALEN],
 				   int list_size);
 int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
-				 u32 vport,
+				 u16 vport,
 				 int *promisc_uc,
 				 int *promisc_mc,
 				 int *promisc_all);
@@ -96,7 +96,7 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
 				  int promisc_mc,
 				  int promisc_all);
 int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev,
-			       u32 vport,
+			       u16 vport,
 			       u16 vlans[],
 			       int *size);
 int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From 591905ba96796e3b677b14fa79f27127bfaab4ab Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:35 -0800
Subject: net/mlx5: Introduce Mellanox SmartNIC and modify page management
 logic

Mellanox's SmartNIC combines embedded CPU(e.g, ARM) processing power
with advanced network offloads to accelerate a multitude of security,
networking and storage applications.

With the introduction of the SmartNIC, there is a new PCI function
called Embedded CPU Physical Function(ECPF). And it's possible for a
PF to get its ICM pages from the ECPF PCI function. Driver shall
identify if it is running on such a function by reading a bit in
the initialization segment.

When firmware asks for pages, it would issue a page request event
specifying how many pages it requests and for which function. That
driver responds with a manage_pages command providing the requested
pages along with an indication for which function it is providing these
pages.

The encoding before this patch was as follows:
    function_id == 0: pages are requested for the function receiving
                      the EQE.
    function_id != 0: pages are requested for VF identified by the
                      function_id value

A new one bit field in the EQE identifies that pages are requested for
the ECPF.

The notion of page_supplier can be introduced here and to support that,
manage pages and query pages were modified so firmware can distinguish
the following cases:

1. Function provides pages for itself
2. PF provides pages for its VF
3. ECPF provides pages to itself
4. ECPF provides pages for another function

This distinction is possible through the introduction of the bit
"embedded_cpu_function" in query_pages, manage_pages and page request
EQE.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c     |  9 ++++
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h     | 25 ++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  2 +
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  2 +-
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 54 +++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/sriov.c    |  2 +-
 include/linux/mlx5/device.h                        |  2 +-
 include/linux/mlx5/driver.h                        |  9 +++-
 include/linux/mlx5/mlx5_ifc.h                      | 12 +++--
 10 files changed, 94 insertions(+), 25 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 0257731e6d42..07965350b903 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o
 #
 # Core extra
 #
-mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
new file mode 100644
index 000000000000..28b8c5c5c8c7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include "ecpf.h"
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
+{
+	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
new file mode 100644
index 000000000000..8b684f0ab48f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_ECPF_H__
+#define __MLX5_ECPF_H__
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+enum {
+	MLX5_ECPU_BIT_NUM = 23,
+};
+
+bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+
+#else  /* CONFIG_MLX5_ESWITCH */
+
+static inline bool
+mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+
+#endif /* CONFIG_MLX5_ESWITCH */
+
+#endif /* __MLX5_ECPF_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6d45518edbdc..08a3da2a8358 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -65,6 +65,7 @@
 #include "lib/vxlan.h"
 #include "lib/devcom.h"
 #include "diag/fw_tracer.h"
+#include "ecpf.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -898,6 +899,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	struct pci_dev *pdev = dev->pdev;
 	int err;
 
+	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
 		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c68dcea5985b..b127044293b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -121,7 +121,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 				       u32 modify_bitmask);
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
 			     struct ptp_system_timestamp *sts);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index a83b517b0714..41025387ff2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -48,6 +48,7 @@ enum {
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u16	func_id;
+	u8	ec_function;
 	s32	npages;
 	struct work_struct work;
 };
@@ -143,6 +144,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	MLX5_SET(query_pages_in, in, op_mod, boot ?
 		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
+	MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
@@ -253,7 +255,8 @@ err_mapping:
 	return err;
 }
 
-static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
+static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
+			     bool ec_function)
 {
 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
@@ -262,6 +265,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
@@ -270,7 +274,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
-		      int notify_fail)
+		      int notify_fail, bool ec_function)
 {
 	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
@@ -305,6 +309,7 @@ retry:
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err) {
@@ -316,8 +321,11 @@ retry:
 	dev->priv.fw_pages += npages;
 	if (func_id)
 		dev->priv.vfs_pages += npages;
+	else if (mlx5_core_is_ecpf(dev) && !ec_function)
+		dev->priv.peer_pf_pages += npages;
 
-	mlx5_core_dbg(dev, "err %d\n", err);
+	mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n",
+		      npages, ec_function, func_id, err);
 
 	kvfree(in);
 	return 0;
@@ -328,7 +336,7 @@ out_4k:
 out_free:
 	kvfree(in);
 	if (notify_fail)
-		page_notify_fail(dev, func_id);
+		page_notify_fail(dev, func_id, ec_function);
 	return err;
 }
 
@@ -364,7 +372,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
-			 int *nclaimed)
+			 int *nclaimed, bool ec_function)
 {
 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
 	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
@@ -385,6 +393,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
@@ -410,6 +419,8 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	dev->priv.fw_pages -= num_claimed;
 	if (func_id)
 		dev->priv.vfs_pages -= num_claimed;
+	else if (mlx5_core_is_ecpf(dev) && !ec_function)
+		dev->priv.peer_pf_pages -= num_claimed;
 
 out_free:
 	kvfree(out);
@@ -423,9 +434,10 @@ static void pages_work_handler(struct work_struct *work)
 	int err = 0;
 
 	if (req->npages < 0)
-		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
+		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
+				    req->ec_function);
 	else if (req->npages > 0)
-		err = give_pages(dev, req->func_id, req->npages, 1);
+		err = give_pages(dev, req->func_id, req->npages, 1, req->ec_function);
 
 	if (err)
 		mlx5_core_warn(dev, "%s fail %d\n",
@@ -434,6 +446,10 @@ static void pages_work_handler(struct work_struct *work)
 	kfree(req);
 }
 
+enum {
+	EC_FUNCTION_MASK = 0x8000,
+};
+
 static int req_pages_handler(struct notifier_block *nb,
 			     unsigned long type, void *data)
 {
@@ -441,6 +457,7 @@ static int req_pages_handler(struct notifier_block *nb,
 	struct mlx5_core_dev *dev;
 	struct mlx5_priv *priv;
 	struct mlx5_eqe *eqe;
+	bool ec_function;
 	u16 func_id;
 	s32 npages;
 
@@ -450,6 +467,7 @@ static int req_pages_handler(struct notifier_block *nb,
 
 	func_id = be16_to_cpu(eqe->data.req_pages.func_id);
 	npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
+	ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
 	mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
 		      func_id, npages);
 	req = kzalloc(sizeof(*req), GFP_ATOMIC);
@@ -461,6 +479,7 @@ static int req_pages_handler(struct notifier_block *nb,
 	req->dev = dev;
 	req->func_id = func_id;
 	req->npages = npages;
+	req->ec_function = ec_function;
 	INIT_WORK(&req->work, pages_work_handler);
 	queue_work(dev->priv.pg_wq, &req->work);
 	return NOTIFY_OK;
@@ -479,7 +498,7 @@ int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
 		      npages, boot ? "boot" : "init", func_id);
 
-	return give_pages(dev, func_id, npages, 0);
+	return give_pages(dev, func_id, npages, 0, mlx5_core_is_ecpf(dev));
 }
 
 enum {
@@ -513,7 +532,7 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 			fwp = rb_entry(p, struct fw_page, rb_node);
 			err = reclaim_pages(dev, fwp->func_id,
 					    optimal_reclaimed_pages(),
-					    &nclaimed);
+					    &nclaimed, mlx5_core_is_ecpf(dev));
 
 			if (err) {
 				mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
@@ -535,6 +554,9 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
 	WARN(dev->priv.vfs_pages,
 	     "VFs FW pages counter is %d after reclaiming all pages\n",
 	     dev->priv.vfs_pages);
+	WARN(dev->priv.peer_pf_pages,
+	     "Peer PF FW pages counter is %d after reclaiming all pages\n",
+	     dev->priv.peer_pf_pages);
 
 	return 0;
 }
@@ -567,10 +589,10 @@ void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
 	flush_workqueue(dev->priv.pg_wq);
 }
 
-int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
+int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages)
 {
 	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-	int prev_vfs_pages = dev->priv.vfs_pages;
+	int prev_pages = *pages;
 
 	/* In case of internal error we will free the pages manually later */
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -578,16 +600,16 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
 		return 0;
 	}
 
-	mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
+	mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_pages,
 		      dev->priv.name);
-	while (dev->priv.vfs_pages) {
+	while (*pages) {
 		if (time_after(jiffies, end)) {
-			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
+			mlx5_core_warn(dev, "aborting while there are %d pending pages\n", *pages);
 			return -ETIMEDOUT;
 		}
-		if (dev->priv.vfs_pages < prev_vfs_pages) {
+		if (*pages < prev_pages) {
 			end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
-			prev_vfs_pages = dev->priv.vfs_pages;
+			prev_pages = *pages;
 		}
 		msleep(50);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 6e178030d8fb..7b23fa8d2d60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -147,7 +147,7 @@ out:
 	if (MLX5_ESWITCH_MANAGER(dev))
 		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
 
-	if (mlx5_wait_for_vf_pages(dev))
+	if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages))
 		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
 }
 
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 46223efa1877..f2070350f60a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -591,7 +591,7 @@ struct mlx5_eqe_cmd {
 };
 
 struct mlx5_eqe_page_req {
-	u8		rsvd0[2];
+	__be16		ec_function;
 	__be16		func_id;
 	__be32		num_pages;
 	__be32		rsvd1[5];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 039c9398614c..cce4e8293384 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -522,6 +522,7 @@ struct mlx5_priv {
 	atomic_t		reg_pages;
 	struct list_head	free_list;
 	int			vfs_pages;
+	int			peer_pf_pages;
 
 	struct mlx5_core_health health;
 
@@ -652,6 +653,7 @@ struct mlx5_core_dev {
 		u32 mcam[MLX5_ST_SZ_DW(mcam_reg)];
 		u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
 		u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
+		u8  embedded_cpu;
 	} caps;
 	u64			sys_image_guid;
 	phys_addr_t		iseg_base;
@@ -922,7 +924,7 @@ void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s32 npages);
+				 s32 npages, bool ec_function);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
@@ -1076,6 +1078,11 @@ static inline int mlx5_core_is_pf(struct mlx5_core_dev *dev)
 	return !(dev->priv.pci_dev_data & MLX5_PCI_DEV_IS_VF);
 }
 
+static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
+{
+	return dev->caps.embedded_cpu;
+}
+
 #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
 #define MLX5_VPORT_MANAGER(mdev) \
 	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c5c679390fbd..46799b4c8859 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4441,7 +4441,8 @@ struct mlx5_ifc_query_pages_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x10];
+	u8         embedded_cpu_function[0x1];
+	u8         reserved_at_41[0xf];
 	u8         function_id[0x10];
 
 	u8         num_pages[0x20];
@@ -4460,7 +4461,8 @@ struct mlx5_ifc_query_pages_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         embedded_cpu_function[0x1];
+	u8         reserved_at_41[0xf];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -5880,7 +5882,8 @@ struct mlx5_ifc_manage_pages_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         embedded_cpu_function[0x1];
+	u8         reserved_at_41[0xf];
 	u8         function_id[0x10];
 
 	u8         input_num_entries[0x20];
@@ -8749,7 +8752,8 @@ struct mlx5_ifc_initial_seg_bits {
 	u8         initializing[0x1];
 	u8         reserved_at_fe1[0x4];
 	u8         nic_interface_supported[0x3];
-	u8         reserved_at_fe8[0x18];
+	u8         embedded_cpu[0x1];
+	u8         reserved_at_fe9[0x17];
 
 	struct mlx5_ifc_health_buffer_bits health_buffer;
 
-- 
cgit v1.2.3


From 22e939a91dcb9bd4b773f2a0c0cb4eb016679b49 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:36 -0800
Subject: net/mlx5: Update enable HCA dependency

With the introduction of ECPF, we require that the ECPF driver will
aways call enable/disable HCA for that PF in the same way a PF does
this for its VFs. The PF is still responsible for calling enable and
disable HCA for its VFs.

To distinguish between the ECPF executing enable/disable HCA for
itself or for the PF, it sets the embedded CPU function bit in the
input params struct of these commands. When the bit is cleared and
function ID is zero, it refers to the peer PF.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 76 ++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h |  4 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 14 +++++
 include/linux/mlx5/mlx5_ifc.h                  |  6 +-
 4 files changed, 98 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 28b8c5c5c8c7..1bcf8b8f9713 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -7,3 +7,79 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
 {
 	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
 }
+
+static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
+
+	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+	MLX5_SET(enable_hca_in, in, function_id, 0);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+}
+
+static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
+{
+	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
+
+	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+	MLX5_SET(disable_hca_in, in, function_id, 0);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_peer_pf_enable_hca(dev);
+	if (err)
+		mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
+			      err);
+
+	return err;
+}
+
+static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = mlx5_peer_pf_disable_hca(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
+			      err);
+		return;
+	}
+
+	err = mlx5_wait_for_pages(dev, &dev->priv.peer_pf_pages);
+	if (err)
+		mlx5_core_warn(dev, "Timeout reclaiming peer PF pages err(%d)\n",
+			       err);
+}
+
+int mlx5_ec_init(struct mlx5_core_dev *dev)
+{
+	int err = 0;
+
+	if (!mlx5_core_is_ecpf(dev))
+		return 0;
+
+	/* ECPF shall enable HCA for peer PF in the same way a PF
+	 * does this for its VFs.
+	 */
+	err = mlx5_peer_pf_init(dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_ecpf(dev))
+		return;
+
+	mlx5_peer_pf_cleanup(dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index 8b684f0ab48f..d3d7a00a02ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -14,11 +14,15 @@ enum {
 };
 
 bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
+int mlx5_ec_init(struct mlx5_core_dev *dev);
+void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
 static inline bool
 mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
+static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
+static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
 
 #endif /* CONFIG_MLX5_ESWITCH */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 08a3da2a8358..40d591c8e76c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -612,6 +612,8 @@ int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 
 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 	MLX5_SET(enable_hca_in, in, function_id, func_id);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+		 dev->caps.embedded_cpu);
 	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
@@ -622,6 +624,8 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 
 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 	MLX5_SET(disable_hca_in, in, function_id, func_id);
+	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
+		 dev->caps.embedded_cpu);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -1071,6 +1075,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_sriov;
 	}
 
+	err = mlx5_ec_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init embedded CPU\n");
+		goto err_ec;
+	}
+
 	if (mlx5_device_registered(dev)) {
 		mlx5_attach_device(dev);
 	} else {
@@ -1088,6 +1098,9 @@ out:
 	return 0;
 
 err_reg_dev:
+	mlx5_ec_cleanup(dev);
+
+err_ec:
 	mlx5_sriov_detach(dev);
 
 err_sriov:
@@ -1162,6 +1175,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	if (mlx5_device_registered(dev))
 		mlx5_detach_device(dev);
 
+	mlx5_ec_cleanup(dev);
 	mlx5_sriov_detach(dev);
 	mlx5_cleanup_fs(dev);
 	mlx5_accel_ipsec_cleanup(dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 46799b4c8859..1b6d5a563a3a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -6061,7 +6061,8 @@ struct mlx5_ifc_enable_hca_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         embedded_cpu_function[0x1];
+	u8         reserved_at_41[0xf];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -6105,7 +6106,8 @@ struct mlx5_ifc_disable_hca_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         embedded_cpu_function[0x1];
+	u8         reserved_at_41[0xf];
 	u8         function_id[0x10];
 
 	u8         reserved_at_60[0x20];
-- 
cgit v1.2.3


From c3a4e9f10714911486d09e7729195cc8fbedecd3 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:37 -0800
Subject: net/mlx5: Add query host params command

The QUERY_HOST_PARAMS command is used by an Embedded CPU Physical
Function (ECPF) driver to identify and retrieve information about the
PF on the host side. E.g, number of virtual functions and PCI BDF.

The number of VFs can be changed on the fly, a function is added to
query current number of VFs and will be used in downstream patches.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 27 +++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.h |  4 +++
 include/linux/mlx5/mlx5_ifc.h                  | 41 ++++++++++++++++++++++++++
 4 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a25a8c6f938e..46d70eb2d2f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -316,6 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
 	case MLX5_CMD_OP_DEALLOC_MEMIC:
 	case MLX5_CMD_OP_PAGE_FAULT_RESUME:
+	case MLX5_CMD_OP_QUERY_HOST_PARAMS:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -627,6 +628,7 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
+	MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index 1bcf8b8f9713..4746f2d28fb6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -83,3 +83,30 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev)
 
 	mlx5_peer_pf_cleanup(dev);
 }
+
+static int mlx5_query_host_params_context(struct mlx5_core_dev *dev,
+					  u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {};
+
+	MLX5_SET(query_host_params_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_HOST_PARAMS);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{
+	u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {};
+	int err;
+
+	err = mlx5_query_host_params_context(dev, out, sizeof(out));
+	if (err)
+		return err;
+
+	*num_vf = MLX5_GET(query_host_params_out, out,
+			   host_params_context.host_num_of_vfs);
+	mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
index d3d7a00a02ac..346372df218f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h
@@ -16,6 +16,7 @@ enum {
 bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev);
 int mlx5_ec_init(struct mlx5_core_dev *dev);
 void mlx5_ec_cleanup(struct mlx5_core_dev *dev);
+int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
@@ -23,6 +24,9 @@ static inline bool
 mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; }
 static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; }
 static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {}
+static inline int
+mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf)
+{ return -EOPNOTSUPP; }
 
 #endif /* CONFIG_MLX5_ESWITCH */
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1b6d5a563a3a..565046830559 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -142,6 +142,7 @@ enum {
 	MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
 	MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
 	MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
+	MLX5_CMD_OP_QUERY_HOST_PARAMS             = 0x740,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
 	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
@@ -9522,4 +9523,44 @@ struct mlx5_ifc_mtrc_ctrl_bits {
 	u8         reserved_at_80[0x180];
 };
 
+struct mlx5_ifc_host_params_context_bits {
+	u8         host_number[0x8];
+	u8         reserved_at_8[0x8];
+	u8         host_num_of_vfs[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         host_pci_bus[0x10];
+
+	u8         reserved_at_40[0x10];
+	u8         host_pci_device[0x10];
+
+	u8         reserved_at_60[0x10];
+	u8         host_pci_function[0x10];
+
+	u8         reserved_at_80[0x180];
+};
+
+struct mlx5_ifc_query_host_params_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_host_params_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_host_params_context_bits host_params_context;
+
+	u8         reserved_at_280[0x180];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 7f0d11c7e0d08304de55b6a571a69166f3d54160 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:38 -0800
Subject: net/mlx5: Add host params change event

In Embedded CPU (EC) configurations, the EC driver needs to know when
the number of virtual functions change on the corresponding PF at the
host side. This is required so the EC driver can create or destroy
representor net devices that represent the VFs ports.

Whenever a change in the number of VFs occurs, firmware will generate an
event towards the EC which will trigger a work to complete the rest of
the handling. The specifics of the handling will be introduced in a
downstream patch.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c     | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/events.c | 2 ++
 include/linux/mlx5/device.h                      | 2 ++
 include/linux/mlx5/driver.h                      | 5 +++++
 4 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 7092457705a2..5c02f9291799 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -530,6 +530,9 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev)
 	if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER);
 
+	if (mlx5_core_is_ecpf_esw_manager(dev))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE);
+
 	return async_event_mask;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c
index fbc42b7252a9..4f7f776d6332 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/events.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c
@@ -103,6 +103,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_STALL_EVENT";
 	case MLX5_EVENT_TYPE_CMD:
 		return "MLX5_EVENT_TYPE_CMD";
+	case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE:
+		return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
 	case MLX5_EVENT_TYPE_PAGE_FAULT:
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f2070350f60a..f93a5598b942 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -342,6 +342,8 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 	MLX5_EVENT_TYPE_NIC_VPORT_CHANGE   = 0xd,
 
+	MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE = 0xe,
+
 	MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
 
 	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index cce4e8293384..151563a12fc2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1083,6 +1083,11 @@ static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
 	return dev->caps.embedded_cpu;
 }
 
+static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev)
+{
+	return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
+}
+
 #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
 #define MLX5_VPORT_MANAGER(mdev) \
 	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
-- 
cgit v1.2.3


From feb393693316bd5de2c88a020f6ded51e3a4120b Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:39 -0800
Subject: net/mlx5: Provide an alternative VF upper bound for ECPF

ECPF doesn't support SR-IOV, but an ECPF E-Switch manager shall know
the max VFs supported by its peer host PF in order to control those
VF vports.

The current driver implementation uses the total vfs quantity as
provided by the pci sub-system for an upper bound of the VF vports
the e-switch code needs to deal with. This obviously can't work as
is on ECPF e-switch manager. For now, we use a hard coded value of
128 on such systems.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 151563a12fc2..46e0aa52a58a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1088,7 +1088,16 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev)
 	return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
 }
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs((mdev)->pdev))
+#define MLX5_HOST_PF_MAX_VFS	(127u)
+static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
+{
+	if (mlx5_core_is_ecpf_esw_manager(dev))
+		return MLX5_HOST_PF_MAX_VFS;
+	else
+		return pci_sriov_get_totalvfs(dev->pdev);
+}
+
+#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev))
 #define MLX5_VPORT_MANAGER(mdev) \
 	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
 	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-- 
cgit v1.2.3


From b05af6aacdb920dc3bfd27d53ade7f680d43265c Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:40 -0800
Subject: net/mlx5: E-Switch, Normalize the name of uplink vport number

Driver used to name uplink vport as FDB_UPLINK_VPORT, it's hard to
comply with the same naming convention along with the introduction of
other vports. Use MLX5_VPORT as the prefix for such vports and
relocate the uplink vport definition to public header file for the
benefits of both net and IB drivers.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c                |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 22 +++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  8 +++-----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  2 --
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++++++-------
 include/linux/mlx5/vport.h                         |  4 ++++
 7 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 3fb22967c098..99cae9a10195 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <linux/mlx5/vport.h>
 #include "ib_rep.h"
 #include "srq.h"
 
@@ -48,11 +49,10 @@ static const struct mlx5_ib_profile vf_rep_profile = {
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
-#define FDB_UPLINK_VPORT 0xffff
 	const struct mlx5_ib_profile *profile;
 	struct mlx5_ib_dev *ibdev;
 
-	if (rep->vport == FDB_UPLINK_VPORT)
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		profile = &uplink_rep_profile;
 	else
 		profile = &vf_rep_profile;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 96cc0c6a4014..c78d21b2501e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -152,7 +152,7 @@ static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 
-	if (rep->vport == FDB_UPLINK_VPORT)
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		mlx5e_uplink_rep_update_hw_counters(priv);
 	else
 		mlx5e_vf_rep_update_hw_counters(priv);
@@ -1207,7 +1207,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 		return false;
 
 	rep = rpriv->rep;
-	return (rep->vport == FDB_UPLINK_VPORT);
+	return (rep->vport == MLX5_VPORT_UPLINK);
 }
 
 static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
@@ -1343,7 +1343,7 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
 	params->sw_mtu      = netdev->mtu;
 
 	/* SQ */
-	if (rep->vport == FDB_UPLINK_VPORT)
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		params->log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 	else
 		params->log_sq_size = MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE;
@@ -1370,7 +1370,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
 	struct mlx5_eswitch_rep *rep = rpriv->rep;
 	struct mlx5_core_dev *mdev = priv->mdev;
 
-	if (rep->vport == FDB_UPLINK_VPORT) {
+	if (rep->vport == MLX5_VPORT_UPLINK) {
 		SET_NETDEV_DEV(netdev, &priv->mdev->pdev->dev);
 		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
 		/* we want a persistent mac for the uplink rep */
@@ -1402,7 +1402,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
 	netdev->hw_features    |= NETIF_F_TSO6;
 	netdev->hw_features    |= NETIF_F_RXCSUM;
 
-	if (rep->vport != FDB_UPLINK_VPORT)
+	if (rep->vport != MLX5_VPORT_UPLINK)
 		netdev->features |= NETIF_F_VLAN_CHALLENGED;
 
 	netdev->features |= netdev->hw_features;
@@ -1555,7 +1555,7 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
-	if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
 		uplink_priv = &rpriv->uplink_priv;
 
 		/* init shared tc flow table */
@@ -1591,7 +1591,7 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
 	for (tc = 0; tc < priv->profile->max_tc; tc++)
 		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
 
-	if (rpriv->rep->vport == FDB_UPLINK_VPORT) {
+	if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
 		/* clean indirect TC block notifications */
 		unregister_netdevice_notifier(&rpriv->uplink_priv.netdevice_nb);
 		mlx5e_rep_indr_clean_block_privs(rpriv);
@@ -1710,7 +1710,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	rpriv->rep = rep;
 
 	nch = mlx5e_get_max_num_channels(dev);
-	profile = (rep->vport == FDB_UPLINK_VPORT) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
+	profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile;
 	netdev = mlx5e_create_netdev(dev, profile, nch, rpriv);
 	if (!netdev) {
 		pr_warn("Failed to create representor netdev for vport %d\n",
@@ -1723,7 +1723,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	rep->rep_if[REP_ETH].priv = rpriv;
 	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 
-	if (rep->vport == FDB_UPLINK_VPORT) {
+	if (rep->vport == MLX5_VPORT_UPLINK) {
 		err = mlx5e_create_mdev_resources(dev);
 		if (err)
 			goto err_destroy_netdev;
@@ -1759,7 +1759,7 @@ err_detach_netdev:
 	mlx5e_detach_netdev(netdev_priv(netdev));
 
 err_destroy_mdev_resources:
-	if (rep->vport == FDB_UPLINK_VPORT)
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		mlx5e_destroy_mdev_resources(dev);
 
 err_destroy_netdev:
@@ -1779,7 +1779,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	unregister_netdev(netdev);
 	mlx5e_rep_neigh_cleanup(rpriv);
 	mlx5e_detach_netdev(priv);
-	if (rep->vport == FDB_UPLINK_VPORT)
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		mlx5e_destroy_mdev_resources(priv->mdev);
 	mlx5e_destroy_netdev(priv);
 	kfree(ppriv); /* mlx5e_rep_priv */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cae6c6d48984..1a73e661056a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1834,7 +1834,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
 
 	if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
 		rep = rpriv->rep;
-		if (rep->vport != FDB_UPLINK_VPORT &&
+		if (rep->vport != MLX5_VPORT_UPLINK &&
 		    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
 		    esw->offloads.inline_mode < match_level)) {
 			NL_SET_ERR_MSG_MOD(extack,
@@ -2724,7 +2724,7 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	bool is_rep_ingress = attr->in_rep->vport != FDB_UPLINK_VPORT &&
+	bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
 			      flow->flags & MLX5E_TC_FLOW_INGRESS;
 	bool act_is_encap = !!(attr->action &
 			       MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
@@ -2849,7 +2849,7 @@ static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f,
 	 * original flow and packets redirected from uplink use the
 	 * peer mdev.
 	 */
-	if (flow->esw_attr->in_rep->vport == FDB_UPLINK_VPORT)
+	if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
 		in_mdev = peer_priv->mdev;
 	else
 		in_mdev = priv->mdev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d7382892e81c..0db56b4b7009 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -40,8 +40,6 @@
 #include "eswitch.h"
 #include "fs_core.h"
 
-#define UPLINK_VPORT 0xFFFF
-
 enum {
 	MLX5_ACTION_NONE = 0,
 	MLX5_ACTION_ADD  = 1,
@@ -188,7 +186,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule,
 					misc_parameters);
 		mc_misc  = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 					misc_parameters);
-		MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT);
+		MLX5_SET(fte_match_set_misc, mv_misc, source_port, MLX5_VPORT_UPLINK);
 		MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port);
 	}
 
@@ -499,7 +497,7 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 		return -ENOMEM;
 
 	esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
-		esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
+		esw_fdb_set_vport_rule(esw, mac, MLX5_VPORT_UPLINK);
 
 	/* Add this multicast mac to all the mc promiscuous vports */
 	update_allmulti_vports(esw, vaddr, esw_mc);
@@ -736,7 +734,7 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u16 vport_num,
 		if (!allmulti_addr->uplink_rule)
 			allmulti_addr->uplink_rule =
 				esw_fdb_set_vport_allmulti_rule(esw,
-								UPLINK_VPORT);
+								MLX5_VPORT_UPLINK);
 		allmulti_addr->refcnt++;
 	} else if (vport->allmulti_rule) {
 		mlx5_del_flow_rules(vport->allmulti_rule);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 9c89eea9b2c3..94da74b1e6ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -49,8 +49,6 @@
 #define MLX5_MAX_MC_PER_VPORT(dev) \
 	(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
 
-#define FDB_UPLINK_VPORT 0xffff
-
 #define MLX5_MIN_BW_SHARE 1
 
 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 53065b6ae593..b0b1267eab07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -359,15 +359,15 @@ static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
 	in_rep  = attr->in_rep;
 	out_rep = attr->dests[0].rep;
 
-	if (push && in_rep->vport == FDB_UPLINK_VPORT)
+	if (push && in_rep->vport == MLX5_VPORT_UPLINK)
 		goto out_notsupp;
 
-	if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+	if (pop && out_rep->vport == MLX5_VPORT_UPLINK)
 		goto out_notsupp;
 
 	/* vport has vlan push configured, can't offload VF --> wire rules w.o it */
 	if (!push && !pop && fwd)
-		if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+		if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK)
 			goto out_notsupp;
 
 	/* protects against (1) setting rules with different vlans to push and
@@ -409,7 +409,7 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT) {
+		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) {
 			vport->vlan_refcount++;
 			attr->vlan_handled = true;
 		}
@@ -469,7 +469,7 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 
 	if (!push && !pop && fwd) {
 		/* tracks VF --> wire rules without vlan push action */
-		if (attr->dests[0].rep->vport == FDB_UPLINK_VPORT)
+		if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK)
 			vport->vlan_refcount--;
 
 		return 0;
@@ -1227,7 +1227,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 		ether_addr_copy(rep->hw_id, hw_id);
 	}
 
-	offloads->vport_reps[0].vport = FDB_UPLINK_VPORT;
+	offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK;
 
 	return 0;
 }
@@ -1811,7 +1811,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
-	if (vport == FDB_UPLINK_VPORT)
+	if (vport == MLX5_VPORT_UPLINK)
 		vport = UPLINK_REP_INDEX;
 
 	rep = &offloads->vport_reps[vport];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 1654b911cdb2..2e2928eacd97 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -42,6 +42,10 @@ enum {
 	MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
 };
 
+enum {
+	MLX5_VPORT_UPLINK		= 0xffff
+};
+
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state);
-- 
cgit v1.2.3


From bf3e4d387daed36aad2cfd4f493b07714ac0cd5e Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:41 -0800
Subject: net/mlx5: Relocate vport macros to the vport header file

These are two macros in the driver general header which deal with the
number of total vports and if a vport is vport manager. Such macros
are vport entities, better to place them at the vport header file.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c      | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 1 +
 include/linux/mlx5/driver.h                       | 6 ------
 include/linux/mlx5/vport.h                        | 7 +++++++
 4 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 5c02f9291799..bb6e5b5d9681 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/eq.h>
 #include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 79f122b45def..b6a7bc8f667c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/mutex.h>
 #include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/eswitch.h>
 
 #include "mlx5_core.h"
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 46e0aa52a58a..c5454f985e1d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1097,12 +1097,6 @@ static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
 		return pci_sriov_get_totalvfs(dev->pdev);
 }
 
-#define MLX5_TOTAL_VPORTS(mdev) (1 + mlx5_core_max_vfs(mdev))
-#define MLX5_VPORT_MANAGER(mdev) \
-	(MLX5_CAP_GEN(mdev, vport_group_manager) && \
-	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \
-	 mlx5_core_is_pf(mdev))
-
 static inline int mlx5_get_gid_table_len(u16 param)
 {
 	if (param > 4) {
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 2e2928eacd97..28f47e868fbc 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,6 +36,13 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
+#define MLX5_TOTAL_VPORTS(mdev)	(1 + mlx5_core_max_vfs(mdev))
+
+#define MLX5_VPORT_MANAGER(mdev)					\
+	(MLX5_CAP_GEN(mdev, vport_group_manager) &&			\
+	 (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&	\
+	 mlx5_core_is_pf(mdev))
+
 enum {
 	MLX5_CAP_INLINE_MODE_L2,
 	MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
-- 
cgit v1.2.3


From cd7e4186af9d968559852b4eeb1039b3419cc590 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:42 -0800
Subject: net/mlx5: E-Switch, Avoid magic numbers when initializing offloads
 mode

When dealing with the offloads mode initialization, driver refers to
the number of VFs and add magic number one (1) to take account of the
uplink. This is not clear and will make the code less readable after
adding other vports (e.g. host PF). As these are special vports
compared to VF vports, add a helper macro to denote such special
vports and eliminate the use of magic number.

Moreover, when creating offloads flow table and groups, the driver
reserves two more slots for UC and MC miss rules. Replace this magic
number with a helper macro as well.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  2 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 25 +++++++++++++---------
 include/linux/mlx5/vport.h                         |  4 +++-
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 0db56b4b7009..49a9e3877d2c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1638,7 +1638,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	} else {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-		err = esw_offloads_init(esw, nvfs + 1);
+		err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS);
 	}
 
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b0b1267eab07..1496e82b5108 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -46,6 +46,11 @@ enum {
 	FDB_SLOW_PATH
 };
 
+/* There are two match-all miss flows, one for unicast dst mac and
+ * one for multicast.
+ */
+#define MLX5_ESW_MISS_FLOWS (2)
+
 #define fdb_prio_table(esw, chain, prio, level) \
 	(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
 
@@ -904,8 +909,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 		esw->fdb_table.offloads.fdb_left[i] =
 			ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
 
-	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2 +
-		esw->total_vports;
+	table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ +
+		MLX5_ESW_MISS_FLOWS + esw->total_vports;
 
 	/* create the slow path fdb with encap set, so further table instances
 	 * can be created at run time while VFs are probed if the FW allows that.
@@ -999,7 +1004,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
 	dmac[0] = 0x01;
 
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
+		 ix + MLX5_ESW_MISS_FLOWS);
 
 	g = mlx5_create_flow_group(fdb, flow_group_in);
 	if (IS_ERR(g)) {
@@ -1048,7 +1054,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
 	esw_destroy_offloads_fast_fdb_tables(esw);
 }
 
-static int esw_create_offloads_table(struct mlx5_eswitch *esw)
+static int esw_create_offloads_table(struct mlx5_eswitch *esw, int nvports)
 {
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
@@ -1062,7 +1068,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 		return -EOPNOTSUPP;
 	}
 
-	ft_attr.max_fte = dev->priv.sriov.num_vfs + 2;
+	ft_attr.max_fte = nvports + MLX5_ESW_MISS_FLOWS;
 
 	ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
 	if (IS_ERR(ft_offloads)) {
@@ -1082,16 +1088,15 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_table(offloads->ft_offloads);
 }
 
-static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
+static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_group *g;
-	struct mlx5_priv *priv = &esw->dev->priv;
 	u32 *flow_group_in;
 	void *match_criteria, *misc;
 	int err = 0;
-	int nvports = priv->sriov.num_vfs + 2;
 
+	nvports = nvports + MLX5_ESW_MISS_FLOWS;
 	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
 	if (!flow_group_in)
 		return -ENOMEM;
@@ -1407,11 +1412,11 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
 	if (err)
 		return err;
 
-	err = esw_create_offloads_table(esw);
+	err = esw_create_offloads_table(esw, nvports);
 	if (err)
 		goto create_ft_err;
 
-	err = esw_create_vport_rx_group(esw);
+	err = esw_create_vport_rx_group(esw, nvports);
 	if (err)
 		goto create_fg_err;
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 28f47e868fbc..3bc05449ac39 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,7 +36,9 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
-#define MLX5_TOTAL_VPORTS(mdev)	(1 + mlx5_core_max_vfs(mdev))
+#define MLX5_VPORT_PF_PLACEHOLDER (1u)
+#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER)
+#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS +	mlx5_core_max_vfs(mdev))
 
 #define MLX5_VPORT_MANAGER(mdev)					\
 	(MLX5_CAP_GEN(mdev, vport_group_manager) &&			\
-- 
cgit v1.2.3


From bc4e12ffefdd886057eabe38135515690d0756a6 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:43 -0800
Subject: net/mlx5: Refactor queries to speed fields in Port Type and Speed
 register

This patch fascicles queries to speed related fields in Port Type and
Speed register (PTYS) into a single API. I addition, this patch
refactors functions which serves only Ethernet driver: remove the
protocol type as an input parameter, move code from 'core' directory
into 'en' directory and add 'eth' prefix to the function's name. The
patch also encapsulates functions that are not used outside the Ethernet
driver removes redundant include files.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c  |  75 +++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en/port.h  |  12 +++
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  23 ++---
 drivers/net/ethernet/mellanox/mlx5/core/port.c     | 106 ---------------------
 include/linux/mlx5/port.h                          |  11 ---
 6 files changed, 91 insertions(+), 142 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 87ce62e44898..efd08b41126c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -393,6 +393,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 				struct ib_port_attr *props)
 {
 	struct mlx5_ib_dev *dev = to_mdev(device);
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
 	struct mlx5_core_dev *mdev;
 	struct net_device *ndev, *upper;
 	enum ib_mtu ndev_ib_mtu;
@@ -416,10 +417,11 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	/* Possible bad flows are checked before filling out props so in case
 	 * of an error it will still be zeroed out.
 	 */
-	err = mlx5_query_port_eth_proto_oper(mdev, &eth_prot_oper,
-					     mdev_port_num);
+	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
+				   mdev_port_num);
 	if (err)
 		goto out;
+	eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
 
 	props->active_width     = IB_WIDTH_4X;
 	props->active_speed     = IB_SPEED_QDR;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 4a37713023be..9a1c2b2f87d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -63,6 +63,67 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
 	[MLX5E_50GBASE_KR2]       = 50000,
 };
 
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port,
+			      struct mlx5e_port_eth_proto *eproto)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	int err;
+
+	if (!eproto)
+		return -EINVAL;
+
+	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
+	if (err)
+		return err;
+
+	eproto->cap   = MLX5_GET(ptys_reg, out, eth_proto_capability);
+	eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
+	eproto->oper  = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	return 0;
+}
+
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+				 u8 *an_disable_cap, u8 *an_disable_admin)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+
+	*an_status = 0;
+	*an_disable_cap = 0;
+	*an_disable_admin = 0;
+
+	if (mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, 1))
+		return;
+
+	*an_status = MLX5_GET(ptys_reg, out, an_status);
+	*an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
+	*an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
+}
+
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+			   u32 proto_admin)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+	u8 an_disable_admin;
+	u8 an_disable_cap;
+	u8 an_status;
+
+	mlx5_port_query_eth_autoneg(dev, &an_status, &an_disable_cap,
+				    &an_disable_admin);
+	if (!an_disable_cap && an_disable)
+		return -EPERM;
+
+	memset(in, 0, sizeof(in));
+
+	MLX5_SET(ptys_reg, in, local_port, 1);
+	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
+	MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
+	MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+
+	return mlx5_core_access_reg(dev, in, sizeof(in), out,
+			    sizeof(out), MLX5_REG_PTYS, 0, 1);
+}
+
 u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
 {
 	unsigned long temp = eth_proto_oper;
@@ -78,16 +139,14 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
 
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {};
-	u32 eth_proto_oper;
+	struct mlx5e_port_eth_proto eproto;
 	int err;
 
-	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
+	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
 	if (err)
 		return err;
 
-	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	*speed = mlx5e_port_ptys2speed(eth_proto_oper);
+	*speed = mlx5e_port_ptys2speed(eproto.oper);
 	if (!(*speed))
 		err = -EINVAL;
 
@@ -96,17 +155,17 @@ int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
+	struct mlx5e_port_eth_proto eproto;
 	u32 max_speed = 0;
-	u32 proto_cap;
 	int err;
 	int i;
 
-	err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN);
+	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
 	if (err)
 		return err;
 
 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
-		if (proto_cap & MLX5E_PROT_MASK(i))
+		if (eproto.cap & MLX5E_PROT_MASK(i))
 			max_speed = max(max_speed, mlx5e_link_speed[i]);
 
 	*speed = max_speed;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index cd2160b8c9bf..4bdab8be10af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -36,6 +36,18 @@
 #include <linux/mlx5/driver.h>
 #include "en.h"
 
+struct mlx5e_port_eth_proto {
+	u32 cap;
+	u32 admin;
+	u32 oper;
+};
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port,
+			      struct mlx5e_port_eth_proto *eproto);
+void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
+				 u8 *an_disable_cap, u8 *an_disable_admin);
+int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
+			   u32 proto_admin);
 u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c9df08133718..c29e141d72fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -882,7 +882,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 				     const struct ethtool_link_ksettings *link_ksettings)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u32 eth_proto_cap, eth_proto_admin;
+	struct mlx5e_port_eth_proto eproto;
 	bool an_changes = false;
 	u8 an_disable_admin;
 	u8 an_disable_cap;
@@ -898,14 +898,14 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 		mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
 		mlx5e_port_speed2linkmodes(speed);
 
-	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
+	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
 	if (err) {
-		netdev_err(priv->netdev, "%s: query port eth proto cap failed: %d\n",
+		netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
 			   __func__, err);
 		goto out;
 	}
 
-	link_modes = link_modes & eth_proto_cap;
+	link_modes = link_modes & eproto.cap;
 	if (!link_modes) {
 		netdev_err(priv->netdev, "%s: Not supported link mode(s) requested",
 			   __func__);
@@ -913,24 +913,17 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_query_port_proto_admin(mdev, &eth_proto_admin, MLX5_PTYS_EN);
-	if (err) {
-		netdev_err(priv->netdev, "%s: query port eth proto admin failed: %d\n",
-			   __func__, err);
-		goto out;
-	}
-
-	mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status,
-				&an_disable_cap, &an_disable_admin);
+	mlx5_port_query_eth_autoneg(mdev, &an_status, &an_disable_cap,
+				    &an_disable_admin);
 
 	an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE;
 	an_changes = ((!an_disable && an_disable_admin) ||
 		      (an_disable && !an_disable_admin));
 
-	if (!an_changes && link_modes == eth_proto_admin)
+	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN);
+	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes);
 	mlx5_toggle_port_link(mdev);
 
 out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 2b82f35f4c35..b81542820528 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -30,10 +30,7 @@
  * SOFTWARE.
  */
 
-#include <linux/module.h>
-#include <linux/mlx5/driver.h>
 #include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
@@ -157,44 +154,6 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
 				    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
 
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-			      u32 *proto_cap, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-	if (err)
-		return err;
-
-	if (proto_mask == MLX5_PTYS_EN)
-		*proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
-	else
-		*proto_cap = MLX5_GET(ptys_reg, out, ib_proto_capability);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_cap);
-
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-				u32 *proto_admin, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1);
-	if (err)
-		return err;
-
-	if (proto_mask == MLX5_PTYS_EN)
-		*proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
-	else
-		*proto_admin = MLX5_GET(ptys_reg, out, ib_proto_admin);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_admin);
-
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 				    u8 *link_width_oper, u8 local_port)
 {
@@ -211,23 +170,6 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
 
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
-				   u32 *proto_oper, u8 local_port)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	int err;
-
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
-				   local_port);
-	if (err)
-		return err;
-
-	*proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
-
 int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
 				  u8 *proto_oper, u8 local_port)
 {
@@ -245,35 +187,6 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
 
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
-		       u32 proto_admin, int proto_mask)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
-	u8 an_disable_admin;
-	u8 an_disable_cap;
-	u8 an_status;
-
-	mlx5_query_port_autoneg(dev, proto_mask, &an_status,
-				&an_disable_cap, &an_disable_admin);
-	if (!an_disable_cap && an_disable)
-		return -EPERM;
-
-	memset(in, 0, sizeof(in));
-
-	MLX5_SET(ptys_reg, in, local_port, 1);
-	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
-	MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
-	if (proto_mask == MLX5_PTYS_EN)
-		MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
-	else
-		MLX5_SET(ptys_reg, in, ib_proto_admin, proto_admin);
-
-	return mlx5_core_access_reg(dev, in, sizeof(in), out,
-				    sizeof(out), MLX5_REG_PTYS, 0, 1);
-}
-EXPORT_SYMBOL_GPL(mlx5_set_port_ptys);
-
 /* This function should be used after setting a port register only */
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev)
 {
@@ -606,25 +519,6 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_pfc);
 
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
-			     u8 *an_status,
-			     u8 *an_disable_cap, u8 *an_disable_admin)
-{
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
-
-	*an_status = 0;
-	*an_disable_cap = 0;
-	*an_disable_admin = 0;
-
-	if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1))
-		return;
-
-	*an_status = MLX5_GET(ptys_reg, out, an_status);
-	*an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap);
-	*an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin);
-}
-EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg);
-
 int mlx5_max_tc(struct mlx5_core_dev *mdev)
 {
 	u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8;
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index bf4bc01ffb0c..5be7eefa6d75 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -110,27 +110,16 @@ enum mlx5e_connector_type {
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
 			 int ptys_size, int proto_mask, u8 local_port);
-int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev,
-			      u32 *proto_cap, int proto_mask);
-int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev,
-				u32 *proto_admin, int proto_mask);
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 				    u8 *link_width_oper, u8 local_port);
 int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
 				  u8 *proto_oper, u8 local_port);
-int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
-				   u32 *proto_oper, u8 local_port);
-int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
-		       u32 proto_admin, int proto_mask);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status);
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status);
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration);
-void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask,
-			     u8 *an_status,
-			     u8 *an_disable_cap, u8 *an_disable_admin);
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port);
 void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);
-- 
cgit v1.2.3


From a0a899895692d4227cc55b087f5f47e185af7f23 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:44 -0800
Subject: net/mlx5: Add new fields to Port Type and Speed register

Register Port Type and Speed (PTYS) introduces three new fields
extending the speed/protocols the can be reported and configured.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 565046830559..5decffe565fb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7826,21 +7826,23 @@ struct mlx5_ifc_ptys_reg_bits {
 	u8         proto_mask[0x3];
 
 	u8         an_status[0x4];
-	u8         reserved_at_24[0x3c];
+	u8         reserved_at_24[0x1c];
+
+	u8         ext_eth_proto_capability[0x20];
 
 	u8         eth_proto_capability[0x20];
 
 	u8         ib_link_width_capability[0x10];
 	u8         ib_proto_capability[0x10];
 
-	u8         reserved_at_a0[0x20];
+	u8         ext_eth_proto_admin[0x20];
 
 	u8         eth_proto_admin[0x20];
 
 	u8         ib_link_width_admin[0x10];
 	u8         ib_proto_admin[0x10];
 
-	u8         reserved_at_100[0x20];
+	u8         ext_eth_proto_oper[0x20];
 
 	u8         eth_proto_oper[0x20];
 
@@ -8289,7 +8291,9 @@ struct mlx5_ifc_mpegc_reg_bits {
 struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         reserved_at_0[0x6d];
 	u8         rx_icrc_encapsulated_counter[0x1];
-	u8	   reserved_at_6e[0x8];
+	u8	   reserved_at_6e[0x4];
+	u8         ptys_extended_ethernet[0x1];
+	u8	   reserved_at_73[0x3];
 	u8         pfcc_mask[0x1];
 	u8         reserved_at_77[0x3];
 	u8         per_lane_error_counters[0x1];
-- 
cgit v1.2.3


From a08b4ed1373dc59e3e15029bc6f135ba0f53c9a7 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Tue, 12 Feb 2019 22:55:45 -0800
Subject: net/mlx5: Add support to ext_* fields introduced in Port Type and
 Speed register

This patch exposes new link modes (including 50Gbps per lane), and ext_*
fields which describes the new link modes in Port Type and Speed
register (PTYS).
Access functions, translation functions (speed <-> HW bits) and
link max speed function were modified.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c  | 85 ++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en/port.h  |  8 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  9 ++-
 include/linux/mlx5/port.h                          | 19 +++++
 5 files changed, 94 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index efd08b41126c..3677c00fa3bb 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -421,7 +421,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 				   mdev_port_num);
 	if (err)
 		goto out;
-	eth_prot_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, false,
+					   eth_proto_oper);
 
 	props->active_width     = IB_WIDTH_4X;
 	props->active_speed     = IB_SPEED_QDR;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 9a1c2b2f87d8..122927f3a600 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -63,7 +63,31 @@ static const u32 mlx5e_link_speed[MLX5E_LINK_MODES_NUMBER] = {
 	[MLX5E_50GBASE_KR2]       = 50000,
 };
 
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port,
+static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = {
+	[MLX5E_SGMII_100M]			= 100,
+	[MLX5E_1000BASE_X_SGMII]		= 1000,
+	[MLX5E_5GBASE_R]			= 5000,
+	[MLX5E_10GBASE_XFI_XAUI_1]		= 10000,
+	[MLX5E_40GBASE_XLAUI_4_XLPPI_4]		= 40000,
+	[MLX5E_25GAUI_1_25GBASE_CR_KR]		= 25000,
+	[MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2]	= 50000,
+	[MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR]	= 50000,
+	[MLX5E_CAUI_4_100GBASE_CR4_KR4]		= 100000,
+	[MLX5E_200GAUI_4_200GBASE_CR4_KR4]	= 200000,
+	[MLX5E_400GAUI_8]			= 400000,
+};
+
+static void mlx5e_port_get_speed_arr(struct mlx5_core_dev *mdev,
+				     const u32 **arr, u32 *size)
+{
+	bool ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+
+	*size = ext ? ARRAY_SIZE(mlx5e_ext_link_speed) :
+		      ARRAY_SIZE(mlx5e_link_speed);
+	*arr  = ext ? mlx5e_ext_link_speed : mlx5e_link_speed;
+}
+
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
 			      struct mlx5e_port_eth_proto *eproto)
 {
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
@@ -72,13 +96,17 @@ int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port,
 	if (!eproto)
 		return -EINVAL;
 
+	if (ext !=  MLX5_CAP_PCAM_FEATURE(dev, ptys_extended_ethernet))
+		return -EOPNOTSUPP;
+
 	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, port);
 	if (err)
 		return err;
 
-	eproto->cap   = MLX5_GET(ptys_reg, out, eth_proto_capability);
-	eproto->admin = MLX5_GET(ptys_reg, out, eth_proto_admin);
-	eproto->oper  = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	eproto->cap   = MLX5_GET_ETH_PROTO(ptys_reg, out, ext,
+					   eth_proto_capability);
+	eproto->admin = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_admin);
+	eproto->oper  = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
 	return 0;
 }
 
@@ -100,7 +128,7 @@ void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
 }
 
 int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
-			   u32 proto_admin)
+			   u32 proto_admin, bool ext)
 {
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
@@ -118,38 +146,46 @@ int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
 	MLX5_SET(ptys_reg, in, local_port, 1);
 	MLX5_SET(ptys_reg, in, an_disable_admin, an_disable);
 	MLX5_SET(ptys_reg, in, proto_mask, MLX5_PTYS_EN);
-	MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
+	if (ext)
+		MLX5_SET(ptys_reg, in, ext_eth_proto_admin, proto_admin);
+	else
+		MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin);
 
 	return mlx5_core_access_reg(dev, in, sizeof(in), out,
 			    sizeof(out), MLX5_REG_PTYS, 0, 1);
 }
 
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper)
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper)
 {
 	unsigned long temp = eth_proto_oper;
+	const u32 *table;
 	u32 speed = 0;
+	u32 max_size;
 	int i;
 
-	i = find_first_bit(&temp, MLX5E_LINK_MODES_NUMBER);
-	if (i < MLX5E_LINK_MODES_NUMBER)
-		speed = mlx5e_link_speed[i];
-
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	i = find_first_bit(&temp, max_size);
+	if (i < max_size)
+		speed = table[i];
 	return speed;
 }
 
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
 	struct mlx5e_port_eth_proto eproto;
+	bool ext;
 	int err;
 
-	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
+	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err)
-		return err;
+		goto out;
 
-	*speed = mlx5e_port_ptys2speed(eproto.oper);
+	*speed = mlx5e_port_ptys2speed(mdev, eproto.oper);
 	if (!(*speed))
 		err = -EINVAL;
 
+out:
 	return err;
 }
 
@@ -157,31 +193,38 @@ int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed)
 {
 	struct mlx5e_port_eth_proto eproto;
 	u32 max_speed = 0;
+	const u32 *table;
+	u32 max_size;
+	bool ext;
 	int err;
 	int i;
 
-	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
+	ext = MLX5_CAP_PCAM_FEATURE(mdev, ptys_extended_ethernet);
+	err = mlx5_port_query_eth_proto(mdev, 1, ext, &eproto);
 	if (err)
 		return err;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i)
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	for (i = 0; i < max_size; ++i)
 		if (eproto.cap & MLX5E_PROT_MASK(i))
-			max_speed = max(max_speed, mlx5e_link_speed[i]);
+			max_speed = max(max_speed, table[i]);
 
 	*speed = max_speed;
 	return 0;
 }
 
-u32 mlx5e_port_speed2linkmodes(u32 speed)
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed)
 {
 	u32 link_modes = 0;
+	const u32 *table;
+	u32 max_size;
 	int i;
 
-	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
-		if (mlx5e_link_speed[i] == speed)
+	mlx5e_port_get_speed_arr(mdev, &table, &max_size);
+	for (i = 0; i < max_size; ++i) {
+		if (table[i] == speed)
 			link_modes |= MLX5E_PROT_MASK(i);
 	}
-
 	return link_modes;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index 4bdab8be10af..70f536ec51c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -42,16 +42,16 @@ struct mlx5e_port_eth_proto {
 	u32 oper;
 };
 
-int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port,
+int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext,
 			      struct mlx5e_port_eth_proto *eproto);
 void mlx5_port_query_eth_autoneg(struct mlx5_core_dev *dev, u8 *an_status,
 				 u8 *an_disable_cap, u8 *an_disable_admin);
 int mlx5_port_set_eth_ptys(struct mlx5_core_dev *dev, bool an_disable,
-			   u32 proto_admin);
-u32 mlx5e_port_ptys2speed(u32 eth_proto_oper);
+			   u32 proto_admin, bool ext);
+u32 mlx5e_port_ptys2speed(struct mlx5_core_dev *mdev, u32 eth_proto_oper);
 int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
 int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed);
-u32 mlx5e_port_speed2linkmodes(u32 speed);
+u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed);
 
 int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
 int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c29e141d72fb..8343cf7d292c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -695,13 +695,14 @@ static void get_speed_duplex(struct net_device *netdev,
 			     u32 eth_proto_oper,
 			     struct ethtool_link_ksettings *link_ksettings)
 {
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 	u32 speed = SPEED_UNKNOWN;
 	u8 duplex = DUPLEX_UNKNOWN;
 
 	if (!netif_carrier_ok(netdev))
 		goto out;
 
-	speed = mlx5e_port_ptys2speed(eth_proto_oper);
+	speed = mlx5e_port_ptys2speed(priv->mdev, eth_proto_oper);
 	if (!speed) {
 		speed = SPEED_UNKNOWN;
 		goto out;
@@ -896,9 +897,9 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 
 	link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ?
 		mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) :
-		mlx5e_port_speed2linkmodes(speed);
+		mlx5e_port_speed2linkmodes(mdev, speed);
 
-	err = mlx5_port_query_eth_proto(mdev, 1, &eproto);
+	err = mlx5_port_query_eth_proto(mdev, 1, false, &eproto);
 	if (err) {
 		netdev_err(priv->netdev, "%s: query port eth proto failed: %d\n",
 			   __func__, err);
@@ -923,7 +924,7 @@ int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv,
 	if (!an_changes && link_modes == eproto.admin)
 		goto out;
 
-	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes);
+	mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, false);
 	mlx5_toggle_port_link(mdev);
 
 out:
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 5be7eefa6d75..814fa194663b 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -92,6 +92,22 @@ enum mlx5e_link_mode {
 	MLX5E_LINK_MODES_NUMBER,
 };
 
+enum mlx5e_ext_link_mode {
+	MLX5E_SGMII_100M			= 0,
+	MLX5E_1000BASE_X_SGMII			= 1,
+	MLX5E_5GBASE_R				= 3,
+	MLX5E_10GBASE_XFI_XAUI_1		= 4,
+	MLX5E_40GBASE_XLAUI_4_XLPPI_4		= 5,
+	MLX5E_25GAUI_1_25GBASE_CR_KR		= 6,
+	MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2	= 7,
+	MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR	= 8,
+	MLX5E_CAUI_4_100GBASE_CR4_KR4		= 9,
+	MLX5E_100GAUI_2_100GBASE_CR2_KR2	= 10,
+	MLX5E_200GAUI_4_200GBASE_CR4_KR4	= 12,
+	MLX5E_400GAUI_8				= 15,
+	MLX5E_EXT_LINK_MODES_NUMBER,
+};
+
 enum mlx5e_connector_type {
 	MLX5E_PORT_UNKNOWN	= 0,
 	MLX5E_PORT_NONE			= 1,
@@ -106,6 +122,9 @@ enum mlx5e_connector_type {
 };
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
+#define MLX5_GET_ETH_PROTO(reg, out, ext, field)	\
+	(ext ? MLX5_GET(reg, out, ext_##field) :	\
+	MLX5_GET(reg, out, field))
 
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
-- 
cgit v1.2.3


From d31c85fc864269ab8e6fb6ed36c87f2fc33a9430 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 13 Feb 2019 15:21:09 +0000
Subject: snd/hda, drm/i915: Track the display_power_status using a cookie

drm/i915 is tracking all wakeref owners with a cookie in order to
identify leaks. To that end, each rpm acquisition ops->get_power is
assigned a cookie which should be passed to ops->put_power to signify
its release (and removal from the list of wakeref owners). As snd/hda is
already using a bool to track current status of display_power extending
that to an unsigned long to hold the boolean cookie is a trivial
extension, and will quell all doubt that snd/hda is the cause of the
device runtime pm leaks.

v2: Keep using the power abstraction for local wakeref tracking.
v3: BUILD_BUG_ON impedance mismatch

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Jani Nikula <jani.nikula@intel.com>
Acked-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190213152109.16997-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/intel_audio.c | 23 ++++++++++++++---------
 include/drm/drm_audio_component.h  |  7 +++++--
 include/sound/hdaudio.h            |  2 +-
 sound/hda/hdac_component.c         | 18 ++++++++++++------
 4 files changed, 32 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index de26cd0a5497..32dfd41a93ed 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -741,27 +741,31 @@ void intel_init_audio_hooks(struct drm_i915_private *dev_priv)
 	}
 }
 
-static void i915_audio_component_get_power(struct device *kdev)
+static unsigned long i915_audio_component_get_power(struct device *kdev)
 {
-	intel_display_power_get(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO);
+	/* Catch potential impedance mismatches before they occur! */
+	BUILD_BUG_ON(sizeof(intel_wakeref_t) > sizeof(unsigned long));
+
+	return intel_display_power_get(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO);
 }
 
-static void i915_audio_component_put_power(struct device *kdev)
+static void i915_audio_component_put_power(struct device *kdev,
+					   unsigned long cookie)
 {
-	intel_display_power_put_unchecked(kdev_to_i915(kdev),
-					  POWER_DOMAIN_AUDIO);
+	intel_display_power_put(kdev_to_i915(kdev), POWER_DOMAIN_AUDIO, cookie);
 }
 
 static void i915_audio_component_codec_wake_override(struct device *kdev,
 						     bool enable)
 {
 	struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
+	unsigned long cookie;
 	u32 tmp;
 
 	if (!IS_GEN(dev_priv, 9))
 		return;
 
-	i915_audio_component_get_power(kdev);
+	cookie = i915_audio_component_get_power(kdev);
 
 	/*
 	 * Enable/disable generating the codec wake signal, overriding the
@@ -779,7 +783,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev,
 		usleep_range(1000, 1500);
 	}
 
-	i915_audio_component_put_power(kdev);
+	i915_audio_component_put_power(kdev, cookie);
 }
 
 /* Get CDCLK in kHz  */
@@ -850,12 +854,13 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port,
 	struct i915_audio_component *acomp = dev_priv->audio_component;
 	struct intel_encoder *encoder;
 	struct intel_crtc *crtc;
+	unsigned long cookie;
 	int err = 0;
 
 	if (!HAS_DDI(dev_priv))
 		return 0;
 
-	i915_audio_component_get_power(kdev);
+	cookie = i915_audio_component_get_power(kdev);
 	mutex_lock(&dev_priv->av_mutex);
 
 	/* 1. get the pipe */
@@ -875,7 +880,7 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port,
 
  unlock:
 	mutex_unlock(&dev_priv->av_mutex);
-	i915_audio_component_put_power(kdev);
+	i915_audio_component_put_power(kdev, cookie);
 	return err;
 }
 
diff --git a/include/drm/drm_audio_component.h b/include/drm/drm_audio_component.h
index 4923b00328c1..d0c7444319f5 100644
--- a/include/drm/drm_audio_component.h
+++ b/include/drm/drm_audio_component.h
@@ -18,14 +18,17 @@ struct drm_audio_component_ops {
 	 * @get_power: get the POWER_DOMAIN_AUDIO power well
 	 *
 	 * Request the power well to be turned on.
+	 *
+	 * Returns a wakeref cookie to be passed back to the corresponding
+	 * call to @put_power.
 	 */
-	void (*get_power)(struct device *);
+	unsigned long (*get_power)(struct device *);
 	/**
 	 * @put_power: put the POWER_DOMAIN_AUDIO power well
 	 *
 	 * Allow the power well to be turned off.
 	 */
-	void (*put_power)(struct device *);
+	void (*put_power)(struct device *, unsigned long);
 	/**
 	 * @codec_wake_override: Enable/disable codec wake signal
 	 */
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index b4fa1c775251..a438ec8e535b 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -367,7 +367,7 @@ struct hdac_bus {
 	/* DRM component interface */
 	struct drm_audio_component *audio_component;
 	long display_power_status;
-	bool display_power_active;
+	unsigned long display_power_active;
 
 	/* parameters required for enhanced capabilities */
 	int num_streams;
diff --git a/sound/hda/hdac_component.c b/sound/hda/hdac_component.c
index a6d37b9d6413..2702548b788a 100644
--- a/sound/hda/hdac_component.c
+++ b/sound/hda/hdac_component.c
@@ -79,17 +79,23 @@ void snd_hdac_display_power(struct hdac_bus *bus, unsigned int idx, bool enable)
 
 	if (bus->display_power_status) {
 		if (!bus->display_power_active) {
+			unsigned long cookie = -1;
+
 			if (acomp->ops->get_power)
-				acomp->ops->get_power(acomp->dev);
+				cookie = acomp->ops->get_power(acomp->dev);
+
 			snd_hdac_set_codec_wakeup(bus, true);
 			snd_hdac_set_codec_wakeup(bus, false);
-			bus->display_power_active = true;
+			bus->display_power_active = cookie;
 		}
 	} else {
 		if (bus->display_power_active) {
+			unsigned long cookie = bus->display_power_active;
+
 			if (acomp->ops->put_power)
-				acomp->ops->put_power(acomp->dev);
-			bus->display_power_active = false;
+				acomp->ops->put_power(acomp->dev, cookie);
+
+			bus->display_power_active = 0;
 		}
 	}
 }
@@ -325,9 +331,9 @@ int snd_hdac_acomp_exit(struct hdac_bus *bus)
 		return 0;
 
 	if (WARN_ON(bus->display_power_active) && acomp->ops)
-		acomp->ops->put_power(acomp->dev);
+		acomp->ops->put_power(acomp->dev, bus->display_power_active);
 
-	bus->display_power_active = false;
+	bus->display_power_active = 0;
 	bus->display_power_status = 0;
 
 	component_master_del(dev, &hdac_component_master_ops);
-- 
cgit v1.2.3


From 593db80390cf40f1b9dcc790020d2edae87183fb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 10 Jan 2019 16:25:32 +0200
Subject: vmbus: Switch to use new generic UUID API

There are new types and helpers that are supposed to be used in new code.

As a preparation to get rid of legacy types and API functions do
the conversion here.

Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: devel@linuxdriverproject.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by:  Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/channel.c      |  4 +-
 drivers/hv/channel_mgmt.c | 18 ++++-----
 drivers/hv/hyperv_vmbus.h |  4 +-
 drivers/hv/vmbus_drv.c    | 48 ++++++++---------------
 include/linux/hyperv.h    | 98 +++++++++++++++++++++++------------------------
 5 files changed, 79 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index bea4c9850247..23381c41d087 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -282,8 +282,8 @@ int vmbus_open(struct vmbus_channel *newchannel,
 EXPORT_SYMBOL_GPL(vmbus_open);
 
 /* Used for Hyper-V Socket: a guest client's connect() to the host */
-int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
-				  const uuid_le *shv_host_servie_id)
+int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
+				  const guid_t *shv_host_servie_id)
 {
 	struct vmbus_channel_tl_connect_request conn_msg;
 	int ret;
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index d01689079e9b..62703b354d6d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -141,7 +141,7 @@ static const struct vmbus_device vmbus_devs[] = {
 };
 
 static const struct {
-	uuid_le guid;
+	guid_t guid;
 } vmbus_unsupported_devs[] = {
 	{ HV_AVMA1_GUID },
 	{ HV_AVMA2_GUID },
@@ -171,26 +171,26 @@ static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 }
 
-static bool is_unsupported_vmbus_devs(const uuid_le *guid)
+static bool is_unsupported_vmbus_devs(const guid_t *guid)
 {
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
-		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+		if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
 			return true;
 	return false;
 }
 
 static u16 hv_get_dev_type(const struct vmbus_channel *channel)
 {
-	const uuid_le *guid = &channel->offermsg.offer.if_type;
+	const guid_t *guid = &channel->offermsg.offer.if_type;
 	u16 i;
 
 	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
 		return HV_UNKNOWN;
 
 	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
-		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
+		if (guid_equal(guid, &vmbus_devs[i].guid))
 			return i;
 	}
 	pr_info("Unknown GUID: %pUl\n", guid);
@@ -561,10 +561,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 	atomic_dec(&vmbus_connection.offer_in_progress);
 
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
-		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
-				 newchannel->offermsg.offer.if_type) &&
-		    !uuid_le_cmp(channel->offermsg.offer.if_instance,
-				 newchannel->offermsg.offer.if_instance)) {
+		if (guid_equal(&channel->offermsg.offer.if_type,
+			       &newchannel->offermsg.offer.if_type) &&
+		    guid_equal(&channel->offermsg.offer.if_instance,
+			       &newchannel->offermsg.offer.if_instance)) {
 			fnew = false;
 			break;
 		}
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index a1f6ce6e5974..cb86b133eb4d 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -312,8 +312,8 @@ extern const struct vmbus_channel_message_table_entry
 
 /* General vmbus interface */
 
-struct hv_device *vmbus_device_create(const uuid_le *type,
-				      const uuid_le *instance,
+struct hv_device *vmbus_device_create(const guid_t *type,
+				      const guid_t *instance,
 				      struct vmbus_channel *channel);
 
 int vmbus_device_register(struct hv_device *child_device_obj);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 403fee01572c..126c2de39e35 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -654,38 +654,28 @@ static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
 	return ret;
 }
 
-static const uuid_le null_guid;
-
-static inline bool is_null_guid(const uuid_le *guid)
-{
-	if (uuid_le_cmp(*guid, null_guid))
-		return false;
-	return true;
-}
-
 static const struct hv_vmbus_device_id *
-hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const uuid_le *guid)
-
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid)
 {
 	if (id == NULL)
 		return NULL; /* empty device table */
 
-	for (; !is_null_guid(&id->guid); id++)
-		if (!uuid_le_cmp(id->guid, *guid))
+	for (; !guid_is_null(&id->guid); id++)
+		if (guid_equal(&id->guid, guid))
 			return id;
 
 	return NULL;
 }
 
 static const struct hv_vmbus_device_id *
-hv_vmbus_dynid_match(struct hv_driver *drv, const uuid_le *guid)
+hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid)
 {
 	const struct hv_vmbus_device_id *id = NULL;
 	struct vmbus_dynid *dynid;
 
 	spin_lock(&drv->dynids.lock);
 	list_for_each_entry(dynid, &drv->dynids.list, node) {
-		if (!uuid_le_cmp(dynid->id.guid, *guid)) {
+		if (guid_equal(&dynid->id.guid, guid)) {
 			id = &dynid->id;
 			break;
 		}
@@ -695,9 +685,7 @@ hv_vmbus_dynid_match(struct hv_driver *drv, const uuid_le *guid)
 	return id;
 }
 
-static const struct hv_vmbus_device_id vmbus_device_null = {
-	.guid = NULL_UUID_LE,
-};
+static const struct hv_vmbus_device_id vmbus_device_null;
 
 /*
  * Return a matching hv_vmbus_device_id pointer.
@@ -706,7 +694,7 @@ static const struct hv_vmbus_device_id vmbus_device_null = {
 static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 							struct hv_device *dev)
 {
-	const uuid_le *guid = &dev->dev_type;
+	const guid_t *guid = &dev->dev_type;
 	const struct hv_vmbus_device_id *id;
 
 	/* When driver_override is set, only bind to the matching driver */
@@ -726,7 +714,7 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 }
 
 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
-static int vmbus_add_dynid(struct hv_driver *drv, uuid_le *guid)
+static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
 {
 	struct vmbus_dynid *dynid;
 
@@ -764,10 +752,10 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 			    size_t count)
 {
 	struct hv_driver *drv = drv_to_hv_drv(driver);
-	uuid_le guid;
+	guid_t guid;
 	ssize_t retval;
 
-	retval = uuid_le_to_bin(buf, &guid);
+	retval = guid_parse(buf, &guid);
 	if (retval)
 		return retval;
 
@@ -791,10 +779,10 @@ static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
 {
 	struct hv_driver *drv = drv_to_hv_drv(driver);
 	struct vmbus_dynid *dynid, *n;
-	uuid_le guid;
+	guid_t guid;
 	ssize_t retval;
 
-	retval = uuid_le_to_bin(buf, &guid);
+	retval = guid_parse(buf, &guid);
 	if (retval)
 		return retval;
 
@@ -803,7 +791,7 @@ static ssize_t remove_id_store(struct device_driver *driver, const char *buf,
 	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
 		struct hv_vmbus_device_id *id = &dynid->id;
 
-		if (!uuid_le_cmp(id->guid, guid)) {
+		if (guid_equal(&id->guid, &guid)) {
 			list_del(&dynid->node);
 			kfree(dynid);
 			retval = count;
@@ -1556,8 +1544,8 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel)
  * vmbus_device_create - Creates and registers a new child device
  * on the vmbus.
  */
-struct hv_device *vmbus_device_create(const uuid_le *type,
-				      const uuid_le *instance,
+struct hv_device *vmbus_device_create(const guid_t *type,
+				      const guid_t *instance,
 				      struct vmbus_channel *channel)
 {
 	struct hv_device *child_device_obj;
@@ -1569,12 +1557,10 @@ struct hv_device *vmbus_device_create(const uuid_le *type,
 	}
 
 	child_device_obj->channel = channel;
-	memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
-	memcpy(&child_device_obj->dev_instance, instance,
-	       sizeof(uuid_le));
+	guid_copy(&child_device_obj->dev_type, type);
+	guid_copy(&child_device_obj->dev_instance, instance);
 	child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */
 
-
 	return child_device_obj;
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index dcb6977afce9..d5678a0fe598 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -222,8 +222,8 @@ static inline u32 hv_get_avail_to_write_percent(
  * struct contains the fundamental information about an offer.
  */
 struct vmbus_channel_offer {
-	uuid_le if_type;
-	uuid_le if_instance;
+	guid_t if_type;
+	guid_t if_instance;
 
 	/*
 	 * These two fields are not currently used.
@@ -614,8 +614,8 @@ struct vmbus_channel_initiate_contact {
 /* Hyper-V socket: guest's connect()-ing to host */
 struct vmbus_channel_tl_connect_request {
 	struct vmbus_channel_message_header header;
-	uuid_le guest_endpoint_id;
-	uuid_le host_service_id;
+	guid_t guest_endpoint_id;
+	guid_t host_service_id;
 } __packed;
 
 struct vmbus_channel_version_response {
@@ -714,7 +714,7 @@ enum vmbus_device_type {
 
 struct vmbus_device {
 	u16  dev_type;
-	uuid_le guid;
+	guid_t guid;
 	bool perf_device;
 };
 
@@ -1096,7 +1096,7 @@ struct hv_driver {
 	bool hvsock;
 
 	/* the device type supported by this driver */
-	uuid_le dev_type;
+	guid_t dev_type;
 	const struct hv_vmbus_device_id *id_table;
 
 	struct device_driver driver;
@@ -1116,10 +1116,10 @@ struct hv_driver {
 /* Base device object */
 struct hv_device {
 	/* the device type id of this device */
-	uuid_le dev_type;
+	guid_t dev_type;
 
 	/* the device instance id of this device */
-	uuid_le dev_instance;
+	guid_t dev_instance;
 	u16 vendor_id;
 	u16 device_id;
 
@@ -1188,102 +1188,102 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size);
  * {f8615163-df3e-46c5-913f-f2d2f965ed0e}
  */
 #define HV_NIC_GUID \
-	.guid = UUID_LE(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \
-			0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e)
+	.guid = GUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x91, 0x3f, \
+			  0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e)
 
 /*
  * IDE GUID
  * {32412632-86cb-44a2-9b5c-50d1417354f5}
  */
 #define HV_IDE_GUID \
-	.guid = UUID_LE(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \
-			0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5)
+	.guid = GUID_INIT(0x32412632, 0x86cb, 0x44a2, 0x9b, 0x5c, \
+			  0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5)
 
 /*
  * SCSI GUID
  * {ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}
  */
 #define HV_SCSI_GUID \
-	.guid = UUID_LE(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \
-			0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f)
+	.guid = GUID_INIT(0xba6163d9, 0x04a1, 0x4d29, 0xb6, 0x05, \
+			  0x72, 0xe2, 0xff, 0xb1, 0xdc, 0x7f)
 
 /*
  * Shutdown GUID
  * {0e0b6031-5213-4934-818b-38d90ced39db}
  */
 #define HV_SHUTDOWN_GUID \
-	.guid = UUID_LE(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \
-			0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb)
+	.guid = GUID_INIT(0x0e0b6031, 0x5213, 0x4934, 0x81, 0x8b, \
+			  0x38, 0xd9, 0x0c, 0xed, 0x39, 0xdb)
 
 /*
  * Time Synch GUID
  * {9527E630-D0AE-497b-ADCE-E80AB0175CAF}
  */
 #define HV_TS_GUID \
-	.guid = UUID_LE(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \
-			0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf)
+	.guid = GUID_INIT(0x9527e630, 0xd0ae, 0x497b, 0xad, 0xce, \
+			  0xe8, 0x0a, 0xb0, 0x17, 0x5c, 0xaf)
 
 /*
  * Heartbeat GUID
  * {57164f39-9115-4e78-ab55-382f3bd5422d}
  */
 #define HV_HEART_BEAT_GUID \
-	.guid = UUID_LE(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \
-			0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d)
+	.guid = GUID_INIT(0x57164f39, 0x9115, 0x4e78, 0xab, 0x55, \
+			  0x38, 0x2f, 0x3b, 0xd5, 0x42, 0x2d)
 
 /*
  * KVP GUID
  * {a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}
  */
 #define HV_KVP_GUID \
-	.guid = UUID_LE(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \
-			0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6)
+	.guid = GUID_INIT(0xa9a0f4e7, 0x5a45, 0x4d96, 0xb8, 0x27, \
+			  0x8a, 0x84, 0x1e, 0x8c, 0x03, 0xe6)
 
 /*
  * Dynamic memory GUID
  * {525074dc-8985-46e2-8057-a307dc18a502}
  */
 #define HV_DM_GUID \
-	.guid = UUID_LE(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \
-			0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02)
+	.guid = GUID_INIT(0x525074dc, 0x8985, 0x46e2, 0x80, 0x57, \
+			  0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02)
 
 /*
  * Mouse GUID
  * {cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}
  */
 #define HV_MOUSE_GUID \
-	.guid = UUID_LE(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \
-			0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a)
+	.guid = GUID_INIT(0xcfa8b69e, 0x5b4a, 0x4cc0, 0xb9, 0x8b, \
+			  0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a)
 
 /*
  * Keyboard GUID
  * {f912ad6d-2b17-48ea-bd65-f927a61c7684}
  */
 #define HV_KBD_GUID \
-	.guid = UUID_LE(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \
-			0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84)
+	.guid = GUID_INIT(0xf912ad6d, 0x2b17, 0x48ea, 0xbd, 0x65, \
+			  0xf9, 0x27, 0xa6, 0x1c, 0x76, 0x84)
 
 /*
  * VSS (Backup/Restore) GUID
  */
 #define HV_VSS_GUID \
-	.guid = UUID_LE(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \
-			0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40)
+	.guid = GUID_INIT(0x35fa2e29, 0xea23, 0x4236, 0x96, 0xae, \
+			  0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40)
 /*
  * Synthetic Video GUID
  * {DA0A7802-E377-4aac-8E77-0558EB1073F8}
  */
 #define HV_SYNTHVID_GUID \
-	.guid = UUID_LE(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \
-			0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8)
+	.guid = GUID_INIT(0xda0a7802, 0xe377, 0x4aac, 0x8e, 0x77, \
+			  0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8)
 
 /*
  * Synthetic FC GUID
  * {2f9bcc4a-0069-4af3-b76b-6fd0be528cda}
  */
 #define HV_SYNTHFC_GUID \
-	.guid = UUID_LE(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \
-			0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda)
+	.guid = GUID_INIT(0x2f9bcc4a, 0x0069, 0x4af3, 0xb7, 0x6b, \
+			  0x6f, 0xd0, 0xbe, 0x52, 0x8c, 0xda)
 
 /*
  * Guest File Copy Service
@@ -1291,16 +1291,16 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size);
  */
 
 #define HV_FCOPY_GUID \
-	.guid = UUID_LE(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \
-			0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92)
+	.guid = GUID_INIT(0x34d14be3, 0xdee4, 0x41c8, 0x9a, 0xe7, \
+			  0x6b, 0x17, 0x49, 0x77, 0xc1, 0x92)
 
 /*
  * NetworkDirect. This is the guest RDMA service.
  * {8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}
  */
 #define HV_ND_GUID \
-	.guid = UUID_LE(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \
-			0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01)
+	.guid = GUID_INIT(0x8c2eaf3d, 0x32a7, 0x4b09, 0xab, 0x99, \
+			  0xbd, 0x1f, 0x1c, 0x86, 0xb5, 0x01)
 
 /*
  * PCI Express Pass Through
@@ -1308,8 +1308,8 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size);
  */
 
 #define HV_PCIE_GUID \
-	.guid = UUID_LE(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \
-			0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
+	.guid = GUID_INIT(0x44c4f61d, 0x4444, 0x4400, 0x9d, 0x52, \
+			  0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
 
 /*
  * Linux doesn't support the 3 devices: the first two are for
@@ -1321,16 +1321,16 @@ void vmbus_free_mmio(resource_size_t start, resource_size_t size);
  */
 
 #define HV_AVMA1_GUID \
-	.guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
-			0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
+	.guid = GUID_INIT(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
+			  0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
 
 #define HV_AVMA2_GUID \
-	.guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
-			0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
+	.guid = GUID_INIT(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
+			  0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
 
 #define HV_RDV_GUID \
-	.guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
-			0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
+	.guid = GUID_INIT(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
+			  0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
 
 /*
  * Common header for Hyper-V ICs
@@ -1432,7 +1432,7 @@ struct ictimesync_ref_data {
 struct hyperv_service_callback {
 	u8 msg_type;
 	char *log_msg;
-	uuid_le data;
+	guid_t data;
 	struct vmbus_channel *channel;
 	void (*callback)(void *context);
 };
@@ -1452,8 +1452,8 @@ void vmbus_setevent(struct vmbus_channel *channel);
 
 extern __u32 vmbus_proto_version;
 
-int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id,
-				  const uuid_le *shv_host_servie_id);
+int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
+				  const guid_t *shv_host_servie_id);
 void vmbus_set_event(struct vmbus_channel *channel);
 
 /* Get the start of the ring buffer. */
-- 
cgit v1.2.3


From 396ae57ef1ef978d1d21cdb7586ba184a3f22453 Mon Sep 17 00:00:00 2001
From: Kimberly Brown <kimbrownkd@gmail.com>
Date: Mon, 4 Feb 2019 02:13:09 -0500
Subject: Drivers: hv: vmbus: Expose counters for interrupts and full
 conditions

Counter values for per-channel interrupts and ring buffer full
conditions are useful for investigating performance.

Expose counters in sysfs for 2 types of guest to host interrupts:
1) Interrupts caused by the channel's outbound ring buffer transitioning
from empty to not empty
2) Interrupts caused by the channel's inbound ring buffer transitioning
from full to not full while a packet is waiting for enough buffer space to
become available

Expose 2 counters in sysfs for the number of times that write operations
encountered a full outbound ring buffer:
1) The total number of write operations that encountered a full
condition
2) The number of write operations that were the first to encounter a
full condition

Increment the outbound full condition counters in the
hv_ringbuffer_write() function because, for most drivers, a full
outbound ring buffer is detected in that function. Also increment the
outbound full condition counters in the set_channel_pending_send_size()
function. In the hv_sock driver, a full outbound ring buffer is detected
and set_channel_pending_send_size() is called before
hv_ringbuffer_write() is called.

I tested this patch by confirming that the sysfs files were created and
observing the counter values. The values seemed to increase by a
reasonable amount when the Hyper-v related drivers were in use.

Signed-off-by: Kimberly Brown <kimbrownkd@gmail.com>
Reviewed-by:  Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 Documentation/ABI/stable/sysfs-bus-vmbus | 33 +++++++++++++++++++++++
 drivers/hv/ring_buffer.c                 | 14 +++++++++-
 drivers/hv/vmbus_drv.c                   | 36 +++++++++++++++++++++++++
 include/linux/hyperv.h                   | 46 ++++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/ABI/stable/sysfs-bus-vmbus b/Documentation/ABI/stable/sysfs-bus-vmbus
index 3fed8fdb873d..826689dcc2e6 100644
--- a/Documentation/ABI/stable/sysfs-bus-vmbus
+++ b/Documentation/ABI/stable/sysfs-bus-vmbus
@@ -146,3 +146,36 @@ KernelVersion:	4.16
 Contact:	Stephen Hemminger <sthemmin@microsoft.com>
 Description:	Binary file created by uio_hv_generic for ring buffer
 Users:		Userspace drivers
+
+What:           /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_in_full
+Date:           February 2019
+KernelVersion:  5.0
+Contact:        Michael Kelley <mikelley@microsoft.com>
+Description:    Number of guest to host interrupts caused by the inbound ring
+		buffer transitioning from full to not full while a packet is
+		waiting for buffer space to become available
+Users:          Debugging tools
+
+What:           /sys/bus/vmbus/devices/<UUID>/channels/<N>/intr_out_empty
+Date:           February 2019
+KernelVersion:  5.0
+Contact:        Michael Kelley <mikelley@microsoft.com>
+Description:    Number of guest to host interrupts caused by the outbound ring
+		buffer transitioning from empty to not empty
+Users:          Debugging tools
+
+What:           /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_first
+Date:           February 2019
+KernelVersion:  5.0
+Contact:        Michael Kelley <mikelley@microsoft.com>
+Description:    Number of write operations that were the first to encounter an
+		outbound ring buffer full condition
+Users:          Debugging tools
+
+What:           /sys/bus/vmbus/devices/<UUID>/channels/<N>/out_full_total
+Date:           February 2019
+KernelVersion:  5.0
+Contact:        Michael Kelley <mikelley@microsoft.com>
+Description:    Total number of write operations that encountered an outbound
+		ring buffer full condition
+Users:          Debugging tools
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 1f1a55e07733..9e8b31ccc142 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -74,8 +74,10 @@ static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
 	 * This is the only case we need to signal when the
 	 * ring transitions from being empty to non-empty.
 	 */
-	if (old_write == READ_ONCE(rbi->ring_buffer->read_index))
+	if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
+		++channel->intr_out_empty;
 		vmbus_setevent(channel);
+	}
 }
 
 /* Get the next write location for the specified ring buffer. */
@@ -272,10 +274,19 @@ int hv_ringbuffer_write(struct vmbus_channel *channel,
 	 * is empty since the read index == write index.
 	 */
 	if (bytes_avail_towrite <= totalbytes_towrite) {
+		++channel->out_full_total;
+
+		if (!channel->out_full_flag) {
+			++channel->out_full_first;
+			channel->out_full_flag = true;
+		}
+
 		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 		return -EAGAIN;
 	}
 
+	channel->out_full_flag = false;
+
 	/* Write to the ring buffer */
 	next_write_location = hv_get_next_write_location(outring_info);
 
@@ -530,6 +541,7 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)
 	if (curr_write_sz <= pending_sz)
 		return;
 
+	++channel->intr_in_full;
 	vmbus_setevent(channel);
 }
 EXPORT_SYMBOL_GPL(hv_pkt_iter_close);
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 126c2de39e35..1264b17e7e9d 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1484,6 +1484,38 @@ static ssize_t channel_events_show(const struct vmbus_channel *channel, char *bu
 }
 static VMBUS_CHAN_ATTR(events, S_IRUGO, channel_events_show, NULL);
 
+static ssize_t channel_intr_in_full_show(const struct vmbus_channel *channel,
+					 char *buf)
+{
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)channel->intr_in_full);
+}
+static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL);
+
+static ssize_t channel_intr_out_empty_show(const struct vmbus_channel *channel,
+					   char *buf)
+{
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)channel->intr_out_empty);
+}
+static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL);
+
+static ssize_t channel_out_full_first_show(const struct vmbus_channel *channel,
+					   char *buf)
+{
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)channel->out_full_first);
+}
+static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL);
+
+static ssize_t channel_out_full_total_show(const struct vmbus_channel *channel,
+					   char *buf)
+{
+	return sprintf(buf, "%llu\n",
+		       (unsigned long long)channel->out_full_total);
+}
+static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL);
+
 static ssize_t subchannel_monitor_id_show(const struct vmbus_channel *channel,
 					  char *buf)
 {
@@ -1509,6 +1541,10 @@ static struct attribute *vmbus_chan_attrs[] = {
 	&chan_attr_latency.attr,
 	&chan_attr_interrupts.attr,
 	&chan_attr_events.attr,
+	&chan_attr_intr_in_full.attr,
+	&chan_attr_intr_out_empty.attr,
+	&chan_attr_out_full_first.attr,
+	&chan_attr_out_full_total.attr,
 	&chan_attr_monitor_id.attr,
 	&chan_attr_subchannel_id.attr,
 	NULL
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index d5678a0fe598..64698ec8f2ac 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -751,6 +751,19 @@ struct vmbus_channel {
 	u64	interrupts;	/* Host to Guest interrupts */
 	u64	sig_events;	/* Guest to Host events */
 
+	/*
+	 * Guest to host interrupts caused by the outbound ring buffer changing
+	 * from empty to not empty.
+	 */
+	u64 intr_out_empty;
+
+	/*
+	 * Indicates that a full outbound ring buffer was encountered. The flag
+	 * is set to true when a full outbound ring buffer is encountered and
+	 * set to false when a write to the outbound ring buffer is completed.
+	 */
+	bool out_full_flag;
+
 	/* Channel callback's invoked in softirq context */
 	struct tasklet_struct callback_event;
 	void (*onchannel_callback)(void *context);
@@ -903,6 +916,24 @@ struct vmbus_channel {
 	 * vmbus_connection.work_queue and hang: see vmbus_process_offer().
 	 */
 	struct work_struct add_channel_work;
+
+	/*
+	 * Guest to host interrupts caused by the inbound ring buffer changing
+	 * from full to not full while a packet is waiting.
+	 */
+	u64 intr_in_full;
+
+	/*
+	 * The total number of write operations that encountered a full
+	 * outbound ring buffer.
+	 */
+	u64 out_full_total;
+
+	/*
+	 * The number of write operations that were the first to encounter a
+	 * full outbound ring buffer.
+	 */
+	u64 out_full_first;
 };
 
 static inline bool is_hvsock_channel(const struct vmbus_channel *c)
@@ -936,6 +967,21 @@ static inline void *get_per_channel_state(struct vmbus_channel *c)
 static inline void set_channel_pending_send_size(struct vmbus_channel *c,
 						 u32 size)
 {
+	unsigned long flags;
+
+	if (size) {
+		spin_lock_irqsave(&c->outbound.ring_lock, flags);
+		++c->out_full_total;
+
+		if (!c->out_full_flag) {
+			++c->out_full_first;
+			c->out_full_flag = true;
+		}
+		spin_unlock_irqrestore(&c->outbound.ring_lock, flags);
+	} else {
+		c->out_full_flag = false;
+	}
+
 	c->outbound.ring_buffer->pending_send_sz = size;
 }
 
-- 
cgit v1.2.3


From bd30cf533b77420b7c504c09cef5ba26b0c9dcb4 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Fri, 8 Feb 2019 15:50:08 +0200
Subject: crypto: export arc4 defines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some arc4 cipher algorithm defines show up in two places:
crypto/arc4.c and drivers/crypto/bcm/cipher.h.
Let's export them in a common header and update their users.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Reviewed-by: Horia Geantă <horia.geanta@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/arc4.c               |  5 +----
 drivers/crypto/bcm/cipher.h |  4 +---
 include/crypto/arc4.h       | 13 +++++++++++++
 3 files changed, 15 insertions(+), 7 deletions(-)
 create mode 100644 include/crypto/arc4.h

(limited to 'include')

diff --git a/crypto/arc4.c b/crypto/arc4.c
index 652d24399afa..6c93342e3405 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -13,14 +13,11 @@
  */
 
 #include <crypto/algapi.h>
+#include <crypto/arc4.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
-#define ARC4_MIN_KEY_SIZE	1
-#define ARC4_MAX_KEY_SIZE	256
-#define ARC4_BLOCK_SIZE		1
-
 struct arc4_ctx {
 	u32 S[256];
 	u32 x, y;
diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h
index 763c425c41ca..f6da49758954 100644
--- a/drivers/crypto/bcm/cipher.h
+++ b/drivers/crypto/bcm/cipher.h
@@ -23,6 +23,7 @@
 #include <crypto/aes.h>
 #include <crypto/internal/hash.h>
 #include <crypto/aead.h>
+#include <crypto/arc4.h>
 #include <crypto/gcm.h>
 #include <crypto/sha.h>
 #include <crypto/sha3.h>
@@ -34,9 +35,6 @@
 /* Driver supports up to MAX_SPUS SPU blocks */
 #define MAX_SPUS 16
 
-#define ARC4_MIN_KEY_SIZE   1
-#define ARC4_MAX_KEY_SIZE   256
-#define ARC4_BLOCK_SIZE     1
 #define ARC4_STATE_SIZE     4
 
 #define CCM_AES_IV_SIZE    16
diff --git a/include/crypto/arc4.h b/include/crypto/arc4.h
new file mode 100644
index 000000000000..5b2c24ab0139
--- /dev/null
+++ b/include/crypto/arc4.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CRYPTO_ARC4_H
+#define _CRYPTO_ARC4_H
+
+#define ARC4_MIN_KEY_SIZE	1
+#define ARC4_MAX_KEY_SIZE	256
+#define ARC4_BLOCK_SIZE		1
+
+#endif /* _CRYPTO_ARC4_H */
-- 
cgit v1.2.3


From 1e9efe6c9976552e88c6e6feaca3a78b8cf5aaf6 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Mon, 14 Jan 2019 16:45:05 +0530
Subject: PCI: endpoint: Add helper to get first unreserved BAR

Add a helper function pci_epc_get_first_free_bar() to get the first
unreserved BAR that can be used for endpoint function.

Tested-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 drivers/pci/endpoint/pci-epc-core.c | 23 +++++++++++++++++++++++
 include/linux/pci-epc.h             |  2 ++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 5a099479d9ab..e4712a0f249c 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -83,6 +83,29 @@ err:
 }
 EXPORT_SYMBOL_GPL(pci_epc_get);
 
+/**
+ * pci_epc_get_first_free_bar() - helper to get first unreserved BAR
+ * @epc_features: pci_epc_features structure that holds the reserved bar bitmap
+ *
+ * Invoke to get the first unreserved BAR that can be used for endpoint
+ * function. For any incorrect value in reserved_bar return '0'.
+ */
+unsigned int pci_epc_get_first_free_bar(const struct pci_epc_features
+					*epc_features)
+{
+	int free_bar;
+
+	if (!epc_features)
+		return 0;
+
+	free_bar = ffz(epc_features->reserved_bar);
+	if (free_bar > 5)
+		return 0;
+
+	return free_bar;
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_first_free_bar);
+
 /**
  * pci_epc_get_features() - get the features supported by EPC
  * @epc: the features supported by *this* EPC device will be returned
diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 79fbcf94e14d..94e1ecff98ce 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -180,6 +180,8 @@ int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
 const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
 						    u8 func_no);
+unsigned int pci_epc_get_first_free_bar(const struct pci_epc_features
+					*epc_features);
 struct pci_epc *pci_epc_get(const char *epc_name);
 void pci_epc_put(struct pci_epc *epc);
 
-- 
cgit v1.2.3


From 35ce0d7922d68021062a955407740d262f9ac811 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Mon, 14 Jan 2019 16:45:13 +0530
Subject: PCI: endpoint: Remove features member in struct pci_epc

Since EPC features are now implemented using pci_epc_features and
all the EPC drivers are moved to using pci_epc_features, remove
features member in struct pci_epc and all the helper macros for
configuring the features.

Tested-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
---
 include/linux/pci-epc.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h
index 94e1ecff98ce..c3ffa3917f88 100644
--- a/include/linux/pci-epc.h
+++ b/include/linux/pci-epc.h
@@ -99,7 +99,6 @@ struct pci_epc {
 	struct config_group		*group;
 	/* spinlock to protect against concurrent access of EP controller */
 	spinlock_t			lock;
-	unsigned int			features;
 };
 
 /**
@@ -120,14 +119,6 @@ struct pci_epc_features {
 	u64	bar_fixed_size[BAR_5 + 1];
 };
 
-#define EPC_FEATURE_NO_LINKUP_NOTIFIER		BIT(0)
-#define EPC_FEATURE_BAR_MASK			(BIT(1) | BIT(2) | BIT(3))
-#define EPC_FEATURE_MSIX_AVAILABLE		BIT(4)
-#define EPC_FEATURE_SET_BAR(features, bar)	\
-		(features |= (EPC_FEATURE_BAR_MASK & (bar << 1)))
-#define EPC_FEATURE_GET_BAR(features)		\
-		((features & EPC_FEATURE_BAR_MASK) >> 1)
-
 #define to_pci_epc(device) container_of((device), struct pci_epc, dev)
 
 #define pci_epc_create(dev, ops)    \
-- 
cgit v1.2.3


From 7416f1f206877fa2f61ada3dadbefdb4817b541f Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 14 Feb 2019 10:12:48 -0800
Subject: PM / Domains: Mark "name" const in genpd_dev_pm_attach_by_name()

The genpd_dev_pm_attach_by_name() simply takes the name and passes it
to of_property_match_string() where the argument is "const char *".
Adding a const here allows a later patch to add a const to
dev_pm_domain_attach_by_name() which allows drivers to pass in a name
that was declared "const" in a driver.

Fixes: 5d6be70add65 ("PM / Domains: Introduce option to attach a device by name to genpd")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 2 +-
 include/linux/pm_domain.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 45eafe8cf7dd..2c334c01fc43 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2483,7 +2483,7 @@ EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
  * power-domain-names DT property. For further description see
  * genpd_dev_pm_attach_by_id().
  */
-struct device *genpd_dev_pm_attach_by_name(struct device *dev, char *name)
+struct device *genpd_dev_pm_attach_by_name(struct device *dev, const char *name)
 {
 	int index;
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index dd364abb649a..203be5082f33 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -271,7 +271,7 @@ int genpd_dev_pm_attach(struct device *dev);
 struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 					 unsigned int index);
 struct device *genpd_dev_pm_attach_by_name(struct device *dev,
-					   char *name);
+					   const char *name);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
 static inline int of_genpd_add_provider_simple(struct device_node *np,
 					struct generic_pm_domain *genpd)
@@ -324,7 +324,7 @@ static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev,
 }
 
 static inline struct device *genpd_dev_pm_attach_by_name(struct device *dev,
-							 char *name)
+							 const char *name)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From eeb35df05244c268cd69b425edf6dc6a49ee7ab4 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 14 Feb 2019 10:12:49 -0800
Subject: PM / Domains: Mark "name" const in dev_pm_domain_attach_by_name()

As of the patch ("PM / Domains: Mark "name" const in
genpd_dev_pm_attach_by_name()") it's clear that the name in
dev_pm_domain_attach_by_name() can be const.  Mark it as so.  This
allows drivers to pass in a name that was declared "const" in a
driver.

Fixes: 27dceb81f445 ("PM / Domains: Introduce dev_pm_domain_attach_by_name()")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 2 +-
 include/linux/pm_domain.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index b413951c6abc..22aedb28aad7 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -160,7 +160,7 @@ EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
  * For a detailed function description, see dev_pm_domain_attach_by_id().
  */
 struct device *dev_pm_domain_attach_by_name(struct device *dev,
-					    char *name)
+					    const char *name)
 {
 	if (dev->pm_domain)
 		return ERR_PTR(-EEXIST);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 203be5082f33..1ed5874bcee0 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -341,7 +341,7 @@ int dev_pm_domain_attach(struct device *dev, bool power_on);
 struct device *dev_pm_domain_attach_by_id(struct device *dev,
 					  unsigned int index);
 struct device *dev_pm_domain_attach_by_name(struct device *dev,
-					    char *name);
+					    const char *name);
 void dev_pm_domain_detach(struct device *dev, bool power_off);
 void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
 #else
@@ -355,7 +355,7 @@ static inline struct device *dev_pm_domain_attach_by_id(struct device *dev,
 	return NULL;
 }
 static inline struct device *dev_pm_domain_attach_by_name(struct device *dev,
-							  char *name)
+							  const char *name)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From f8ebfaf6684b03084858d8c55f81867e5171af08 Mon Sep 17 00:00:00 2001
From: Jan Sokolowski <jan.sokolowski@intel.com>
Date: Wed, 13 Feb 2019 18:07:29 +0100
Subject: net: bpf: remove XDP_QUERY_XSK_UMEM enumerator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit c9b47cc1fabc ("xsk: fix bug when trying to use both copy and
zero-copy on one queue id") moved the umem query code to the AF_XDP
core, and therefore removed the need to query the netdevice for a
umem.

This patch removes XDP_QUERY_XSK_UMEM and all code that implement that
behavior, which is just dead code.

Signed-off-by: Jan Sokolowski <jan.sokolowski@intel.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  3 ---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c         | 28 ----------------------
 drivers/net/ethernet/intel/i40e/i40e_xsk.h         |  2 --
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  3 ---
 .../net/ethernet/intel/ixgbe/ixgbe_txrx_common.h   |  2 --
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c       | 17 -------------
 include/linux/netdevice.h                          |  7 +++---
 7 files changed, 3 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 44856a84738d..5e74a5127849 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12128,9 +12128,6 @@ static int i40e_xdp(struct net_device *dev,
 	case XDP_QUERY_PROG:
 		xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
 		return 0;
-	case XDP_QUERY_XSK_UMEM:
-		return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
-					   xdp->xsk.queue_id);
 	case XDP_SETUP_XSK_UMEM:
 		return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
 					   xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 96d849460d9b..e190a2c2b9ff 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -154,34 +154,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 	return 0;
 }
 
-/**
- * i40e_xsk_umem_query - Queries a certain ring/qid for its UMEM
- * @vsi: Current VSI
- * @umem: UMEM associated to the ring, if any
- * @qid: Rx ring to associate UMEM to
- *
- * This function will store, if any, the UMEM associated to certain ring.
- *
- * Returns 0 on success, <0 on failure
- **/
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
-			u16 qid)
-{
-	struct net_device *netdev = vsi->netdev;
-	struct xdp_umem *queried_umem;
-
-	if (vsi->type != I40E_VSI_MAIN)
-		return -EINVAL;
-
-	queried_umem = xdp_get_umem_from_qid(netdev, qid);
-
-	if (!queried_umem)
-		return -EINVAL;
-
-	*umem = queried_umem;
-	return 0;
-}
-
 /**
  * i40e_xsk_umem_setup - Enable/disassociate a UMEM to/from a ring/qid
  * @vsi: Current VSI
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 9038c5d5cf08..8cc0a2e7d9a2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -10,8 +10,6 @@ struct zero_copy_allocator;
 
 int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
 int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
-int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
-			u16 qid);
 int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
 			u16 qid);
 void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b53087a980ef..38c430b94ae3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10280,9 +10280,6 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 		xdp->prog_id = adapter->xdp_prog ?
 			adapter->xdp_prog->aux->id : 0;
 		return 0;
-	case XDP_QUERY_XSK_UMEM:
-		return ixgbe_xsk_umem_query(adapter, &xdp->xsk.umem,
-					    xdp->xsk.queue_id);
 	case XDP_SETUP_XSK_UMEM:
 		return ixgbe_xsk_umem_setup(adapter, xdp->xsk.umem,
 					    xdp->xsk.queue_id);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 53d4089f5644..d93a690aff74 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -30,8 +30,6 @@ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring);
 
 struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
 				struct ixgbe_ring *ring);
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
-			 u16 qid);
 int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
 			 u16 qid);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 65c3e2c979d4..98870707b51a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -174,23 +174,6 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
 	return 0;
 }
 
-int ixgbe_xsk_umem_query(struct ixgbe_adapter *adapter, struct xdp_umem **umem,
-			 u16 qid)
-{
-	if (qid >= adapter->num_rx_queues)
-		return -EINVAL;
-
-	if (adapter->xsk_umems) {
-		if (qid >= adapter->num_xsk_umems)
-			return -EINVAL;
-		*umem = adapter->xsk_umems[qid];
-		return 0;
-	}
-
-	*umem = NULL;
-	return 0;
-}
-
 int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
 			 u16 qid)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d95e634f3fe..6aedaf1e9a25 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -868,7 +868,6 @@ enum bpf_netdev_command {
 	/* BPF program for offload callbacks, invoked at program load time. */
 	BPF_OFFLOAD_MAP_ALLOC,
 	BPF_OFFLOAD_MAP_FREE,
-	XDP_QUERY_XSK_UMEM,
 	XDP_SETUP_XSK_UMEM,
 };
 
@@ -895,10 +894,10 @@ struct netdev_bpf {
 		struct {
 			struct bpf_offloaded_map *offmap;
 		};
-		/* XDP_QUERY_XSK_UMEM, XDP_SETUP_XSK_UMEM */
+		/* XDP_SETUP_XSK_UMEM */
 		struct {
-			struct xdp_umem *umem; /* out for query*/
-			u16 queue_id; /* in for query */
+			struct xdp_umem *umem;
+			u16 queue_id;
 		} xsk;
 	};
 };
-- 
cgit v1.2.3


From d277ce2d3a75c6c116a6119c3745694f5941eff5 Mon Sep 17 00:00:00 2001
From: Andreas Kemnade <andreas@kemnade.info>
Date: Wed, 16 Jan 2019 23:04:27 +0100
Subject: clk: ti: add a usecount for autoidle

Multiple users might deny autoidle on a clock. So we should have some
counting here, also according to the comment in  _setup_iclk_autoidle().
Also setting autoidle regs is not atomic, so there is another reason
for locking.

Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
Acked-by: Tony Lindgren <tony@atomide.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/ti/autoidle.c | 32 ++++++++++++++++++++++++++++----
 include/linux/clk/ti.h    |  1 +
 2 files changed, 29 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/ti/autoidle.c b/drivers/clk/ti/autoidle.c
index a129b4b36ea3..964e97b5478a 100644
--- a/drivers/clk/ti/autoidle.c
+++ b/drivers/clk/ti/autoidle.c
@@ -36,17 +36,41 @@ struct clk_ti_autoidle {
 
 static LIST_HEAD(autoidle_clks);
 
+/*
+ * we have some non-atomic read/write
+ * operations behind it, so lets
+ * take one lock for handling autoidle
+ * of all clocks
+ */
+static DEFINE_SPINLOCK(autoidle_spinlock);
+
 static int _omap2_clk_deny_idle(struct clk_hw_omap *clk)
 {
-	if (clk->ops && clk->ops->deny_idle)
-		clk->ops->deny_idle(clk);
+	if (clk->ops && clk->ops->deny_idle) {
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&autoidle_spinlock, irqflags);
+		clk->autoidle_count++;
+		if (clk->autoidle_count == 1)
+			clk->ops->deny_idle(clk);
+
+		spin_unlock_irqrestore(&autoidle_spinlock, irqflags);
+	}
 	return 0;
 }
 
 static int _omap2_clk_allow_idle(struct clk_hw_omap *clk)
 {
-	if (clk->ops && clk->ops->allow_idle)
-		clk->ops->allow_idle(clk);
+	if (clk->ops && clk->ops->allow_idle) {
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&autoidle_spinlock, irqflags);
+		clk->autoidle_count--;
+		if (clk->autoidle_count == 0)
+			clk->ops->allow_idle(clk);
+
+		spin_unlock_irqrestore(&autoidle_spinlock, irqflags);
+	}
 	return 0;
 }
 
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index eacc5df57b99..78872efc7be0 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -160,6 +160,7 @@ struct clk_hw_omap {
 	struct clockdomain	*clkdm;
 	const struct clk_hw_omap_ops	*ops;
 	u32			context;
+	int			autoidle_count;
 };
 
 /*
-- 
cgit v1.2.3


From 8a2ee44a371c8cbef587ea609908c3cbf1645231 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 15 Feb 2019 19:13:07 +0800
Subject: btrfs: look at bi_size for repair decisions

bio_readpage_error currently uses bi_vcnt to decide if it is worth
retrying an I/O.  But the vector count is mostly an implementation
artifact - it really should figure out if there is more than a
single sector worth retrying.  Use bi_size for that and shift by
PAGE_SHIFT.  This really should be blocks/sectors, but given that
btrfs doesn't support a sector size different from the PAGE_SIZE
using the page size keeps the changes to a minimum.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/btrfs/extent_io.c | 2 +-
 include/linux/bio.h  | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 52abe4082680..dc8ba3ee515d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2350,7 +2350,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 	int read_mode = 0;
 	blk_status_t status;
 	int ret;
-	unsigned failed_bio_pages = bio_pages_all(failed_bio);
+	unsigned failed_bio_pages = failed_bio->bi_iter.bi_size >> PAGE_SHIFT;
 
 	BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7380b094dcca..72b4f7be2106 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -263,12 +263,6 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
 		bv->bv_len = iter.bi_bvec_done;
 }
 
-static inline unsigned bio_pages_all(struct bio *bio)
-{
-	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
-	return bio->bi_vcnt;
-}
-
 static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
 {
 	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
-- 
cgit v1.2.3


From 19d62f6d00972f957c94aba0975c14490cfed385 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:09 +0800
Subject: block: remove bvec_iter_rewind()

Commit 7759eb23fd980 ("block: remove bio_rewind_iter()") removes
bio_rewind_iter(), then no one uses bvec_iter_rewind() any more,
so remove it.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 02c73c6aa805..ba0ae40e77c9 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -92,30 +92,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
-static inline bool bvec_iter_rewind(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned int bytes)
-{
-	while (bytes) {
-		unsigned len = min(bytes, iter->bi_bvec_done);
-
-		if (iter->bi_bvec_done == 0) {
-			if (WARN_ONCE(iter->bi_idx == 0,
-				      "Attempted to rewind iter beyond "
-				      "bvec's boundaries\n")) {
-				return false;
-			}
-			iter->bi_idx--;
-			iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
-			continue;
-		}
-		bytes -= len;
-		iter->bi_size += len;
-		iter->bi_bvec_done -= len;
-	}
-	return true;
-}
-
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
-- 
cgit v1.2.3


From 3d75ca0adef4280650c6690a0c4702a74a6f3c95 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:10 +0800
Subject: block: introduce multi-page bvec helpers

This patch introduces helpers of 'mp_bvec_iter_*' for multi-page bvec
support.

The introduced helpers treate one bvec as real multi-page segment,
which may include more than one pages.

The existed helpers of bvec_iter_* are interfaces for supporting current
bvec iterator which is thought as single-page by drivers, fs, dm and
etc. These introduced helpers will build single-page bvec in flight, so
this way won't break current bio/bvec users, which needn't any change.

Follows some multi-page bvec background:

- bvecs stored in bio->bi_io_vec is always multi-page style

- bvec(struct bio_vec) represents one physically contiguous I/O
  buffer, now the buffer may include more than one page after
  multi-page bvec is supported, and all these pages represented
  by one bvec is physically contiguous. Before multi-page bvec
  support, at most one page is included in one bvec, we call it
  single-page bvec.

- .bv_page of the bvec points to the 1st page in the multi-page bvec

- .bv_offset of the bvec is the offset of the buffer in the bvec

The effect on the current drivers/filesystem/dm/bcache/...:

- almost everyone supposes that one bvec only includes one single
  page, so we keep the sp interface not changed, for example,
  bio_for_each_segment() still returns single-page bvec

- bio_for_each_segment_all() will return single-page bvec too

- during iterating, iterator variable(struct bvec_iter) is always
  updated in multi-page bvec style, and bvec_iter_advance() is kept
  not changed

- returned(copied) single-page bvec is built in flight by bvec
  helpers from the stored multi-page bvec

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ba0ae40e77c9..0ae729b1c9fe 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <linux/mm.h>
 
 /*
  * was unsigned short, but we might as well be ready for > 64kB I/O pages
@@ -50,16 +51,39 @@ struct bvec_iter {
  */
 #define __bvec_iter_bvec(bvec, iter)	(&(bvec)[(iter).bi_idx])
 
-#define bvec_iter_page(bvec, iter)				\
+/* multi-page (mp_bvec) helpers */
+#define mp_bvec_iter_page(bvec, iter)				\
 	(__bvec_iter_bvec((bvec), (iter))->bv_page)
 
-#define bvec_iter_len(bvec, iter)				\
+#define mp_bvec_iter_len(bvec, iter)				\
 	min((iter).bi_size,					\
 	    __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
 
-#define bvec_iter_offset(bvec, iter)				\
+#define mp_bvec_iter_offset(bvec, iter)				\
 	(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
 
+#define mp_bvec_iter_page_idx(bvec, iter)			\
+	(mp_bvec_iter_offset((bvec), (iter)) / PAGE_SIZE)
+
+#define mp_bvec_iter_bvec(bvec, iter)				\
+((struct bio_vec) {						\
+	.bv_page	= mp_bvec_iter_page((bvec), (iter)),	\
+	.bv_len		= mp_bvec_iter_len((bvec), (iter)),	\
+	.bv_offset	= mp_bvec_iter_offset((bvec), (iter)),	\
+})
+
+/* For building single-page bvec in flight */
+ #define bvec_iter_offset(bvec, iter)				\
+	(mp_bvec_iter_offset((bvec), (iter)) % PAGE_SIZE)
+
+#define bvec_iter_len(bvec, iter)				\
+	min_t(unsigned, mp_bvec_iter_len((bvec), (iter)),		\
+	      PAGE_SIZE - bvec_iter_offset((bvec), (iter)))
+
+#define bvec_iter_page(bvec, iter)				\
+	nth_page(mp_bvec_iter_page((bvec), (iter)),		\
+		 mp_bvec_iter_page_idx((bvec), (iter)))
+
 #define bvec_iter_bvec(bvec, iter)				\
 ((struct bio_vec) {						\
 	.bv_page	= bvec_iter_page((bvec), (iter)),	\
-- 
cgit v1.2.3


From d18d91740ad22e9d7998884c4d80523d0ba95ddf Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:11 +0800
Subject: block: introduce bio_for_each_bvec() and rq_for_each_bvec()

bio_for_each_bvec() is used for iterating over multi-page bvec for bio
split & merge code.

rq_for_each_bvec() can be used for drivers which may handle the
multi-page bvec directly, so far loop is one perfect use case.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h    | 10 ++++++++++
 include/linux/blkdev.h |  4 ++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 72b4f7be2106..7ef8a7505c0a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -156,6 +156,16 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 #define bio_for_each_segment(bvl, bio, iter)				\
 	__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
 
+#define __bio_for_each_bvec(bvl, bio, iter, start)		\
+	for (iter = (start);						\
+	     (iter).bi_size &&						\
+		((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \
+	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
+
+/* iterate over multi-page bvec */
+#define bio_for_each_bvec(bvl, bio, iter)			\
+	__bio_for_each_bvec(bvl, bio, iter, (bio)->bi_iter)
+
 #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
 
 static inline unsigned bio_segments(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3603270cb82d..b6292d469ea4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -792,6 +792,10 @@ struct req_iterator {
 	__rq_for_each_bio(_iter.bio, _rq)			\
 		bio_for_each_segment(bvl, _iter.bio, _iter.iter)
 
+#define rq_for_each_bvec(bvl, _rq, _iter)			\
+	__rq_for_each_bio(_iter.bio, _rq)			\
+		bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
+
 #define rq_iter_last(bvec, _iter)				\
 		(_iter.bio->bi_next == NULL &&			\
 		 bio_iter_last(bvec, _iter.iter))
-- 
cgit v1.2.3


From 45a3fb95298b326ab8175f2bd97bd8666017b692 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:14 +0800
Subject: block: introduce mp_bvec_last_segment()

BTRFS and guard_bio_eod() need to get the last singlepage segment
from one multipage bvec, so introduce this helper to make them happy.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 0ae729b1c9fe..21f76bad7be2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -131,4 +131,26 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	.bi_bvec_done	= 0,						\
 }
 
+/*
+ * Get the last single-page segment from the multi-page bvec and store it
+ * in @seg
+ */
+static inline void mp_bvec_last_segment(const struct bio_vec *bvec,
+					struct bio_vec *seg)
+{
+	unsigned total = bvec->bv_offset + bvec->bv_len;
+	unsigned last_page = (total - 1) / PAGE_SIZE;
+
+	seg->bv_page = nth_page(bvec->bv_page, last_page);
+
+	/* the whole segment is inside the last page */
+	if (bvec->bv_offset >= last_page * PAGE_SIZE) {
+		seg->bv_offset = bvec->bv_offset % PAGE_SIZE;
+		seg->bv_len = bvec->bv_len;
+	} else {
+		seg->bv_offset = 0;
+		seg->bv_len = total - last_page * PAGE_SIZE;
+	}
+}
+
 #endif /* __LINUX_BVEC_ITER_H */
-- 
cgit v1.2.3


From 6dc4f100c175dd0511ae8674786e7c9006cdfbfa Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:19 +0800
Subject: block: allow bio_for_each_segment_all() to iterate over multi-page
 bvec

This patch introduces one extra iterator variable to bio_for_each_segment_all(),
then we can allow bio_for_each_segment_all() to iterate over multi-page bvec.

Given it is just one mechannical & simple change on all bio_for_each_segment_all()
users, this patch does tree-wide change in one single patch, so that we can
avoid to use a temporary helper for this conversion.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                       | 27 ++++++++++++++++++---------
 block/bounce.c                    |  6 ++++--
 drivers/md/bcache/btree.c         |  3 ++-
 drivers/md/dm-crypt.c             |  3 ++-
 drivers/md/raid1.c                |  3 ++-
 drivers/staging/erofs/data.c      |  3 ++-
 drivers/staging/erofs/unzip_vle.c |  3 ++-
 fs/block_dev.c                    |  6 ++++--
 fs/btrfs/compression.c            |  3 ++-
 fs/btrfs/disk-io.c                |  3 ++-
 fs/btrfs/extent_io.c              |  9 ++++++---
 fs/btrfs/inode.c                  |  6 ++++--
 fs/btrfs/raid56.c                 |  3 ++-
 fs/crypto/bio.c                   |  3 ++-
 fs/direct-io.c                    |  4 +++-
 fs/exofs/ore.c                    |  3 ++-
 fs/exofs/ore_raid.c               |  3 ++-
 fs/ext4/page-io.c                 |  3 ++-
 fs/ext4/readpage.c                |  3 ++-
 fs/f2fs/data.c                    |  9 ++++++---
 fs/gfs2/lops.c                    |  9 ++++++---
 fs/gfs2/meta_io.c                 |  3 ++-
 fs/iomap.c                        |  6 ++++--
 fs/mpage.c                        |  3 ++-
 fs/xfs/xfs_aops.c                 |  5 +++--
 include/linux/bio.h               | 11 +++++++++--
 include/linux/bvec.h              | 30 ++++++++++++++++++++++++++++++
 27 files changed, 127 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 4db1008309ed..968b12fea564 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1072,8 +1072,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter)
 {
 	int i;
 	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		ssize_t ret;
 
 		ret = copy_page_from_iter(bvec->bv_page,
@@ -1103,8 +1104,9 @@ static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter)
 {
 	int i;
 	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		ssize_t ret;
 
 		ret = copy_page_to_iter(bvec->bv_page,
@@ -1126,8 +1128,9 @@ void bio_free_pages(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i)
+	bio_for_each_segment_all(bvec, bio, i, iter_all)
 		__free_page(bvec->bv_page);
 }
 EXPORT_SYMBOL(bio_free_pages);
@@ -1295,6 +1298,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 	struct bio *bio;
 	int ret;
 	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
 
 	if (!iov_iter_count(iter))
 		return ERR_PTR(-EINVAL);
@@ -1368,7 +1372,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
 	return bio;
 
  out_unmap:
-	bio_for_each_segment_all(bvec, bio, j) {
+	bio_for_each_segment_all(bvec, bio, j, iter_all) {
 		put_page(bvec->bv_page);
 	}
 	bio_put(bio);
@@ -1379,11 +1383,12 @@ static void __bio_unmap_user(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	/*
 	 * make sure we dirty pages we wrote to
 	 */
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		if (bio_data_dir(bio) == READ)
 			set_page_dirty_lock(bvec->bv_page);
 
@@ -1475,8 +1480,9 @@ static void bio_copy_kern_endio_read(struct bio *bio)
 	char *p = bio->bi_private;
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
 		p += bvec->bv_len;
 	}
@@ -1585,8 +1591,9 @@ void bio_set_pages_dirty(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		if (!PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
 	}
@@ -1596,8 +1603,9 @@ static void bio_release_pages(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i)
+	bio_for_each_segment_all(bvec, bio, i, iter_all)
 		put_page(bvec->bv_page);
 }
 
@@ -1644,8 +1652,9 @@ void bio_check_pages_dirty(struct bio *bio)
 	struct bio_vec *bvec;
 	unsigned long flags;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page))
 			goto defer;
 	}
diff --git a/block/bounce.c b/block/bounce.c
index ffb9e9ecfa7e..add085e28b1d 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -165,11 +165,12 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool)
 	struct bio_vec *bvec, orig_vec;
 	int i;
 	struct bvec_iter orig_iter = bio_orig->bi_iter;
+	struct bvec_iter_all iter_all;
 
 	/*
 	 * free up bounce indirect pages used
 	 */
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		orig_vec = bio_iter_iovec(bio_orig, orig_iter);
 		if (bvec->bv_page != orig_vec.bv_page) {
 			dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
@@ -294,6 +295,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bool bounce = false;
 	int sectors = 0;
 	bool passthrough = bio_is_passthrough(*bio_orig);
+	struct bvec_iter_all iter_all;
 
 	bio_for_each_segment(from, *bio_orig, iter) {
 		if (i++ < BIO_MAX_PAGES)
@@ -313,7 +315,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
 	bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
 			&bounce_bio_set);
 
-	bio_for_each_segment_all(to, bio, i) {
+	bio_for_each_segment_all(to, bio, i, iter_all) {
 		struct page *page = to->bv_page;
 
 		if (page_to_pfn(page) <= q->limits.bounce_pfn)
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 23cb1dc7296b..64def336f053 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -432,8 +432,9 @@ static void do_btree_node_write(struct btree *b)
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
+		struct bvec_iter_all iter_all;
 
-		bio_for_each_segment_all(bv, b->bio, j)
+		bio_for_each_segment_all(bv, b->bio, j, iter_all)
 			memcpy(page_address(bv->bv_page),
 			       base + j * PAGE_SIZE, PAGE_SIZE);
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 47d4e0d30bf0..9a29037f5615 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1447,8 +1447,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
 {
 	unsigned int i;
 	struct bio_vec *bv;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bv, clone, i) {
+	bio_for_each_segment_all(bv, clone, i, iter_all) {
 		BUG_ON(!bv->bv_page);
 		mempool_free(bv->bv_page, &cc->page_pool);
 	}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7e63ccc4ae7b..88c61d3090b0 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2112,13 +2112,14 @@ static void process_checks(struct r1bio *r1_bio)
 		struct page **spages = get_resync_pages(sbio)->pages;
 		struct bio_vec *bi;
 		int page_len[RESYNC_PAGES] = { 0 };
+		struct bvec_iter_all iter_all;
 
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
 		/* Now we can 'fixup' the error value */
 		sbio->bi_status = 0;
 
-		bio_for_each_segment_all(bi, sbio, j)
+		bio_for_each_segment_all(bi, sbio, j, iter_all)
 			page_len[j] = bi->bv_len;
 
 		if (!status) {
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
index 5a55f0bfdfbb..4871ba7b7d9a 100644
--- a/drivers/staging/erofs/data.c
+++ b/drivers/staging/erofs/data.c
@@ -20,8 +20,9 @@ static inline void read_endio(struct bio *bio)
 	int i;
 	struct bio_vec *bvec;
 	const blk_status_t err = bio->bi_status;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 
 		/* page is already locked */
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
index 4ac1099a39c6..c057c5616b1d 100644
--- a/drivers/staging/erofs/unzip_vle.c
+++ b/drivers/staging/erofs/unzip_vle.c
@@ -830,8 +830,9 @@ static inline void z_erofs_vle_read_endio(struct bio *bio)
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 	struct address_space *mc = NULL;
 #endif
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 		bool cachemngd = false;
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 58a4c1217fa8..7758adee6efe 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -211,6 +211,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	ssize_t ret;
 	blk_qc_t qc;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
@@ -260,7 +261,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 	}
 	__set_current_state(TASK_RUNNING);
 
-	bio_for_each_segment_all(bvec, &bio, i) {
+	bio_for_each_segment_all(bvec, &bio, i, iter_all) {
 		if (should_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
 		put_page(bvec->bv_page);
@@ -329,8 +330,9 @@ static void blkdev_bio_end_io(struct bio *bio)
 	} else {
 		struct bio_vec *bvec;
 		int i;
+		struct bvec_iter_all iter_all;
 
-		bio_for_each_segment_all(bvec, bio, i)
+		bio_for_each_segment_all(bvec, bio, i, iter_all)
 			put_page(bvec->bv_page);
 		bio_put(bio);
 	}
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 548057630b69..6896ea60c843 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -162,13 +162,14 @@ csum_failed:
 	} else {
 		int i;
 		struct bio_vec *bvec;
+		struct bvec_iter_all iter_all;
 
 		/*
 		 * we have verified the checksum already, set page
 		 * checked so the end_io handlers know about it
 		 */
 		ASSERT(!bio_flagged(bio, BIO_CLONED));
-		bio_for_each_segment_all(bvec, cb->orig_bio, i)
+		bio_for_each_segment_all(bvec, cb->orig_bio, i, iter_all)
 			SetPageChecked(bvec->bv_page);
 
 		bio_endio(cb->orig_bio);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 6a2a2a951705..ca1b7da6dd1b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -832,9 +832,10 @@ static blk_status_t btree_csum_one_bio(struct bio *bio)
 	struct bio_vec *bvec;
 	struct btrfs_root *root;
 	int i, ret = 0;
+	struct bvec_iter_all iter_all;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		root = BTRFS_I(bvec->bv_page->mapping->host)->root;
 		ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
 		if (ret)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 986ef49b0269..4ed58c9a94a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2422,9 +2422,10 @@ static void end_bio_extent_writepage(struct bio *bio)
 	u64 start;
 	u64 end;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2493,9 +2494,10 @@ static void end_bio_extent_readpage(struct bio *bio)
 	int mirror;
 	int ret;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -3635,9 +3637,10 @@ static void end_bio_extent_buffer_writepage(struct bio *bio)
 	struct bio_vec *bvec;
 	struct extent_buffer *eb;
 	int i, done;
+	struct bvec_iter_all iter_all;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 
 		eb = (struct extent_buffer *)page->private;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5c349667c761..7ade5769f691 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7777,6 +7777,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 	struct bio_vec *bvec;
 	struct extent_io_tree *io_tree, *failure_tree;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (bio->bi_status)
 		goto end;
@@ -7788,7 +7789,7 @@ static void btrfs_retry_endio_nocsum(struct bio *bio)
 
 	done->uptodate = 1;
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i)
+	bio_for_each_segment_all(bvec, bio, i, iter_all)
 		clean_io_failure(BTRFS_I(inode)->root->fs_info, failure_tree,
 				 io_tree, done->start, bvec->bv_page,
 				 btrfs_ino(BTRFS_I(inode)), 0);
@@ -7867,6 +7868,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	int uptodate;
 	int ret;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (bio->bi_status)
 		goto end;
@@ -7880,7 +7882,7 @@ static void btrfs_retry_endio(struct bio *bio)
 	failure_tree = &BTRFS_I(inode)->io_failure_tree;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
 					     bvec->bv_offset, done->start,
 					     bvec->bv_len);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index e74455eb42f9..1869ba8e5981 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1443,10 +1443,11 @@ static void set_bio_pages_uptodate(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	ASSERT(!bio_flagged(bio, BIO_CLONED));
 
-	bio_for_each_segment_all(bvec, bio, i)
+	bio_for_each_segment_all(bvec, bio, i, iter_all)
 		SetPageUptodate(bvec->bv_page);
 }
 
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 0959044c5cee..5759bcd018cd 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -30,8 +30,9 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
 {
 	struct bio_vec *bv;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i, iter_all) {
 		struct page *page = bv->bv_page;
 		int ret = fscrypt_decrypt_page(page->mapping->host, page,
 				PAGE_SIZE, 0, page->index);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ec2fb6fe6d37..9bb015bc4a83 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -551,7 +551,9 @@ static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio)
 	if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
-		bio_for_each_segment_all(bvec, bio, i) {
+		struct bvec_iter_all iter_all;
+
+		bio_for_each_segment_all(bvec, bio, i, iter_all) {
 			struct page *page = bvec->bv_page;
 
 			if (dio->op == REQ_OP_READ && !PageCompound(page) &&
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 5331a15a61f1..24a8e34882e9 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -420,8 +420,9 @@ static void _clear_bio(struct bio *bio)
 {
 	struct bio_vec *bv;
 	unsigned i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i, iter_all) {
 		unsigned this_count = bv->bv_len;
 
 		if (likely(PAGE_SIZE == this_count))
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 199590f36203..e83bab54b03e 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -468,11 +468,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
 	/* loop on all devices all pages */
 	for (d = 0; d < ios->numdevs; d++) {
 		struct bio *bio = ios->per_dev[d].bio;
+		struct bvec_iter_all iter_all;
 
 		if (!bio)
 			continue;
 
-		bio_for_each_segment_all(bv, bio, i) {
+		bio_for_each_segment_all(bv, bio, i, iter_all) {
 			struct page *page = bv->bv_page;
 
 			SetPageUptodate(page);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d58d8dd..cff4c4aa7a9c 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -63,8 +63,9 @@ static void ext4_finish_bio(struct bio *bio)
 {
 	int i;
 	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 		struct page *data_page = NULL;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 6aa282ee455a..e53639784892 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -72,6 +72,7 @@ static void mpage_end_io(struct bio *bio)
 {
 	struct bio_vec *bv;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (ext4_bio_encrypted(bio)) {
 		if (bio->bi_status) {
@@ -81,7 +82,7 @@ static void mpage_end_io(struct bio *bio)
 			return;
 		}
 	}
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i, iter_all) {
 		struct page *page = bv->bv_page;
 
 		if (!bio->bi_status) {
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index f91d8630c9a2..da060b77f64d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -87,8 +87,9 @@ static void __read_end_io(struct bio *bio)
 	struct page *page;
 	struct bio_vec *bv;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i, iter_all) {
 		page = bv->bv_page;
 
 		/* PG_error was set if any post_read step failed */
@@ -164,13 +165,14 @@ static void f2fs_write_end_io(struct bio *bio)
 	struct f2fs_sb_info *sbi = bio->bi_private;
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
 		f2fs_show_injection_info(FAULT_WRITE_IO);
 		bio->bi_status = BLK_STS_IOERR;
 	}
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 		enum count_type type = WB_DATA_TYPE(page);
 
@@ -347,6 +349,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 	struct bio_vec *bvec;
 	struct page *target;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (!io->bio)
 		return false;
@@ -354,7 +357,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 	if (!inode && !page && !ino)
 		return true;
 
-	bio_for_each_segment_all(bvec, io->bio, i) {
+	bio_for_each_segment_all(bvec, io->bio, i, iter_all) {
 
 		if (bvec->bv_page->mapping)
 			target = bvec->bv_page;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 94dcab655bc0..15deefeaafd0 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -170,7 +170,8 @@ u64 gfs2_log_bmap(struct gfs2_sbd *sdp)
  * that is pinned in the pagecache.
  */
 
-static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec,
+static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp,
+				  struct bio_vec *bvec,
 				  blk_status_t error)
 {
 	struct buffer_head *bh, *next;
@@ -208,6 +209,7 @@ static void gfs2_end_log_write(struct bio *bio)
 	struct bio_vec *bvec;
 	struct page *page;
 	int i;
+	struct bvec_iter_all iter_all;
 
 	if (bio->bi_status) {
 		fs_err(sdp, "Error %d writing to journal, jid=%u\n",
@@ -215,7 +217,7 @@ static void gfs2_end_log_write(struct bio *bio)
 		wake_up(&sdp->sd_logd_waitq);
 	}
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		page = bvec->bv_page;
 		if (page_has_buffers(page))
 			gfs2_end_log_write_bh(sdp, bvec, bio->bi_status);
@@ -388,8 +390,9 @@ static void gfs2_end_log_read(struct bio *bio)
 	struct page *page;
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		page = bvec->bv_page;
 		if (bio->bi_status) {
 			int err = blk_status_to_errno(bio->bi_status);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index be9c0bf697fe..3201342404a7 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -190,8 +190,9 @@ static void gfs2_meta_read_endio(struct bio *bio)
 {
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i) {
+	bio_for_each_segment_all(bvec, bio, i, iter_all) {
 		struct page *page = bvec->bv_page;
 		struct buffer_head *bh = page_buffers(page);
 		unsigned int len = bvec->bv_len;
diff --git a/fs/iomap.c b/fs/iomap.c
index a3088fae567b..af736acd9006 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -267,8 +267,9 @@ iomap_read_end_io(struct bio *bio)
 	int error = blk_status_to_errno(bio->bi_status);
 	struct bio_vec *bvec;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bvec, bio, i)
+	bio_for_each_segment_all(bvec, bio, i, iter_all)
 		iomap_read_page_end_io(bvec, error);
 	bio_put(bio);
 }
@@ -1559,8 +1560,9 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	} else {
 		struct bio_vec *bvec;
 		int i;
+		struct bvec_iter_all iter_all;
 
-		bio_for_each_segment_all(bvec, bio, i)
+		bio_for_each_segment_all(bvec, bio, i, iter_all)
 			put_page(bvec->bv_page);
 		bio_put(bio);
 	}
diff --git a/fs/mpage.c b/fs/mpage.c
index c820dc9bebab..3f19da75178b 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -48,8 +48,9 @@ static void mpage_end_io(struct bio *bio)
 {
 	struct bio_vec *bv;
 	int i;
+	struct bvec_iter_all iter_all;
 
-	bio_for_each_segment_all(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i, iter_all) {
 		struct page *page = bv->bv_page;
 		page_endio(page, bio_op(bio),
 			   blk_status_to_errno(bio->bi_status));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 338b9d9984e0..1f1829e506e8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -62,7 +62,7 @@ xfs_find_daxdev_for_inode(
 static void
 xfs_finish_page_writeback(
 	struct inode		*inode,
-	struct bio_vec		*bvec,
+	struct bio_vec	*bvec,
 	int			error)
 {
 	struct iomap_page	*iop = to_iomap_page(bvec->bv_page);
@@ -98,6 +98,7 @@ xfs_destroy_ioend(
 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
 		struct bio_vec	*bvec;
 		int		i;
+		struct bvec_iter_all iter_all;
 
 		/*
 		 * For the last bio, bi_private points to the ioend, so we
@@ -109,7 +110,7 @@ xfs_destroy_ioend(
 			next = bio->bi_private;
 
 		/* walk each page on bio, ending page IO on them */
-		bio_for_each_segment_all(bvec, bio, i)
+		bio_for_each_segment_all(bvec, bio, i, iter_all)
 			xfs_finish_page_writeback(inode, bvec, error);
 		bio_put(bio);
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7ef8a7505c0a..089370eb84d9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -128,12 +128,19 @@ static inline bool bio_full(struct bio *bio)
 	return bio->bi_vcnt >= bio->bi_max_vecs;
 }
 
+#define mp_bvec_for_each_segment(bv, bvl, i, iter_all)			\
+	for (bv = bvec_init_iter_all(&iter_all);			\
+		(iter_all.done < (bvl)->bv_len) &&			\
+		(mp_bvec_next_segment((bvl), &iter_all), 1);		\
+		iter_all.done += bv->bv_len, i += 1)
+
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
-#define bio_for_each_segment_all(bvl, bio, i)				\
-	for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
+#define bio_for_each_segment_all(bvl, bio, i, iter_all)		\
+	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
+		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 				    unsigned bytes)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 21f76bad7be2..30a57b68d017 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -45,6 +45,12 @@ struct bvec_iter {
 						   current bvec */
 };
 
+struct bvec_iter_all {
+	struct bio_vec	bv;
+	int		idx;
+	unsigned	done;
+};
+
 /*
  * various member access, note that bio_data should of course not be used
  * on highmem page vectors
@@ -131,6 +137,30 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	.bi_bvec_done	= 0,						\
 }
 
+static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
+{
+	iter_all->bv.bv_page = NULL;
+	iter_all->done = 0;
+
+	return &iter_all->bv;
+}
+
+static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
+					struct bvec_iter_all *iter_all)
+{
+	struct bio_vec *bv = &iter_all->bv;
+
+	if (bv->bv_page) {
+		bv->bv_page = nth_page(bv->bv_page, 1);
+		bv->bv_offset = 0;
+	} else {
+		bv->bv_page = bvec->bv_page;
+		bv->bv_offset = bvec->bv_offset;
+	}
+	bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
+			   bvec->bv_len - iter_all->done);
+}
+
 /*
  * Get the last single-page segment from the multi-page bvec and store it
  * in @seg
-- 
cgit v1.2.3


From 07173c3ec276cbb18dc0e0687d37d310e98a1480 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:20 +0800
Subject: block: enable multipage bvecs

This patch pulls the trigger for multi-page bvecs.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 22 +++++++++++++++-------
 fs/iomap.c          |  4 ++--
 fs/xfs/xfs_aops.c   |  4 ++--
 include/linux/bio.h |  2 +-
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 968b12fea564..83a2dfa417ca 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -753,6 +753,8 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * @page: page to add
  * @len: length of the data to add
  * @off: offset of the data in @page
+ * @same_page: if %true only merge if the new data is in the same physical
+ *		page as the last segment of the bio.
  *
  * Try to add the data at @page + @off to the last bvec of @bio.  This is a
  * a useful optimisation for file systems with a block size smaller than the
@@ -761,19 +763,25 @@ EXPORT_SYMBOL(bio_add_pc_page);
  * Return %true on success or %false on failure.
  */
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off)
+		unsigned int len, unsigned int off, bool same_page)
 {
 	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
 		return false;
 
 	if (bio->bi_vcnt > 0) {
 		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+		phys_addr_t vec_end_addr = page_to_phys(bv->bv_page) +
+			bv->bv_offset + bv->bv_len - 1;
+		phys_addr_t page_addr = page_to_phys(page);
 
-		if (page == bv->bv_page && off == bv->bv_offset + bv->bv_len) {
-			bv->bv_len += len;
-			bio->bi_iter.bi_size += len;
-			return true;
-		}
+		if (vec_end_addr + 1 != page_addr + off)
+			return false;
+		if (same_page && (vec_end_addr & PAGE_MASK) != page_addr)
+			return false;
+
+		bv->bv_len += len;
+		bio->bi_iter.bi_size += len;
+		return true;
 	}
 	return false;
 }
@@ -819,7 +827,7 @@ EXPORT_SYMBOL_GPL(__bio_add_page);
 int bio_add_page(struct bio *bio, struct page *page,
 		 unsigned int len, unsigned int offset)
 {
-	if (!__bio_try_merge_page(bio, page, len, offset)) {
+	if (!__bio_try_merge_page(bio, page, len, offset, false)) {
 		if (bio_full(bio))
 			return 0;
 		__bio_add_page(bio, page, len, offset);
diff --git a/fs/iomap.c b/fs/iomap.c
index af736acd9006..0c350e658b7f 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -318,7 +318,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 	 */
 	sector = iomap_sector(iomap, pos);
 	if (ctx->bio && bio_end_sector(ctx->bio) == sector) {
-		if (__bio_try_merge_page(ctx->bio, page, plen, poff))
+		if (__bio_try_merge_page(ctx->bio, page, plen, poff, true))
 			goto done;
 		is_contig = true;
 	}
@@ -349,7 +349,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		ctx->bio->bi_end_io = iomap_read_end_io;
 	}
 
-	__bio_add_page(ctx->bio, page, plen, poff);
+	bio_add_page(ctx->bio, page, plen, poff);
 done:
 	/*
 	 * Move the caller beyond our range so that it keeps making progress.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 1f1829e506e8..b9fd44168f61 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -616,12 +616,12 @@ xfs_add_to_ioend(
 				bdev, sector);
 	}
 
-	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff)) {
+	if (!__bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, true)) {
 		if (iop)
 			atomic_inc(&iop->write_count);
 		if (bio_full(wpc->ioend->io_bio))
 			xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
-		__bio_add_page(wpc->ioend->io_bio, page, len, poff);
+		bio_add_page(wpc->ioend->io_bio, page, len, poff);
 	}
 
 	wpc->ioend->io_size += len;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 089370eb84d9..9f77adcfde82 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -441,7 +441,7 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off);
+		unsigned int len, unsigned int off, bool same_page);
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
-- 
cgit v1.2.3


From 6861428921b51113520cd47897be6c2774e4fc58 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:21 +0800
Subject: block: always define BIO_MAX_PAGES as 256

Now multi-page bvec can cover CONFIG_THP_SWAP, so we don't need to
increase BIO_MAX_PAGES for it.

CONFIG_THP_SWAP needs to split one THP into normal pages and adds
them all to one bio. With multipage-bvec, it just takes one bvec to
hold them all.

Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9f77adcfde82..bdd11d4c2f05 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -34,15 +34,7 @@
 #define BIO_BUG_ON
 #endif
 
-#ifdef CONFIG_THP_SWAP
-#if HPAGE_PMD_NR > 256
-#define BIO_MAX_PAGES		HPAGE_PMD_NR
-#else
 #define BIO_MAX_PAGES		256
-#endif
-#else
-#define BIO_MAX_PAGES		256
-#endif
 
 #define bio_prio(bio)			(bio)->bi_ioprio
 #define bio_set_prio(bio, prio)		((bio)->bi_ioprio = prio)
-- 
cgit v1.2.3


From 2705c93742e91730d335838025d75d8043861174 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:23 +0800
Subject: block: kill QUEUE_FLAG_NO_SG_MERGE

Since bdced438acd83ad83a6c ("block: setup bi_phys_segments after splitting"),
physical segment number is mainly figured out in blk_queue_split() for
fast path, and the flag of BIO_SEG_VALID is set there too.

Now only blk_recount_segments() and blk_recalc_rq_segments() use this
flag.

Basically blk_recount_segments() is bypassed in fast path given BIO_SEG_VALID
is set in blk_queue_split().

For another user of blk_recalc_rq_segments():

- run in partial completion branch of blk_update_request, which is an unusual case

- run in blk_cloned_rq_check_limits(), still not a big problem if the flag is killed
since dm-rq is the only user.

Multi-page bvec is enabled now, not doing S/G merging is rather pointless with the
current setup of the I/O path, as it isn't going to save you a significant amount
of cycles.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      | 31 ++++++-------------------------
 block/blk-mq-debugfs.c |  1 -
 block/blk-mq.c         |  3 ---
 drivers/md/dm-table.c  | 13 -------------
 include/linux/blkdev.h |  1 -
 5 files changed, 6 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1912499b08b7..bed065904677 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -358,8 +358,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio)
 EXPORT_SYMBOL(blk_queue_split);
 
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
-					     struct bio *bio,
-					     bool no_sg_merge)
+					     struct bio *bio)
 {
 	struct bio_vec bv, bvprv = { NULL };
 	int prev = 0;
@@ -385,13 +384,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	nr_phys_segs = 0;
 	for_each_bio(bio) {
 		bio_for_each_bvec(bv, bio, iter) {
-			/*
-			 * If SG merging is disabled, each bio vector is
-			 * a segment
-			 */
-			if (no_sg_merge)
-				goto new_segment;
-
 			if (prev) {
 				if (seg_size + bv.bv_len
 				    > queue_max_segment_size(q))
@@ -421,27 +413,16 @@ new_segment:
 
 void blk_recalc_rq_segments(struct request *rq)
 {
-	bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
-			&rq->q->queue_flags);
-
-	rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio,
-			no_sg_merge);
+	rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio);
 }
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
-	unsigned short seg_cnt = bio_segments(bio);
-
-	if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
-			(seg_cnt < queue_max_segments(q)))
-		bio->bi_phys_segments = seg_cnt;
-	else {
-		struct bio *nxt = bio->bi_next;
+	struct bio *nxt = bio->bi_next;
 
-		bio->bi_next = NULL;
-		bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
-		bio->bi_next = nxt;
-	}
+	bio->bi_next = NULL;
+	bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+	bio->bi_next = nxt;
 
 	bio_set_flag(bio, BIO_SEG_VALID);
 }
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index c782e81db627..697d6213c82b 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -128,7 +128,6 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(SAME_FORCE),
 	QUEUE_FLAG_NAME(DEAD),
 	QUEUE_FLAG_NAME(INIT_DONE),
-	QUEUE_FLAG_NAME(NO_SG_MERGE),
 	QUEUE_FLAG_NAME(POLL),
 	QUEUE_FLAG_NAME(WC),
 	QUEUE_FLAG_NAME(FUA),
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 44d471ff8754..fa508ee31742 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2837,9 +2837,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	    set->map[HCTX_TYPE_POLL].nr_queues)
 		blk_queue_flag_set(QUEUE_FLAG_POLL, q);
 
-	if (!(set->flags & BLK_MQ_F_SG_MERGE))
-		blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
-
 	q->sg_reserved_size = INT_MAX;
 
 	INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b1be754cc41..ba9481f1bf3c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1698,14 +1698,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
 	return q && !blk_queue_add_random(q);
 }
 
-static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
-				   sector_t start, sector_t len, void *data)
-{
-	struct request_queue *q = bdev_get_queue(dev->bdev);
-
-	return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
-}
-
 static bool dm_table_all_devices_attribute(struct dm_table *t,
 					   iterate_devices_callout_fn func)
 {
@@ -1902,11 +1894,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	if (!dm_table_supports_write_zeroes(t))
 		q->limits.max_write_zeroes_sectors = 0;
 
-	if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
-		blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
-	else
-		blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
-
 	dm_table_verify_integrity(t);
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b6292d469ea4..faed9d9eb84c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -588,7 +588,6 @@ struct request_queue {
 #define QUEUE_FLAG_SAME_FORCE	12	/* force complete on same CPU */
 #define QUEUE_FLAG_DEAD		13	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE	14	/* queue is initialized */
-#define QUEUE_FLAG_NO_SG_MERGE	15	/* don't attempt to merge SG segments*/
 #define QUEUE_FLAG_POLL		16	/* IO polling enabled if set */
 #define QUEUE_FLAG_WC		17	/* Write back caching */
 #define QUEUE_FLAG_FUA		18	/* device supports FUA writes */
-- 
cgit v1.2.3


From 56d18f62f556b80105e38e7975975cf7465aae3e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 15 Feb 2019 19:13:24 +0800
Subject: block: kill BLK_MQ_F_SG_MERGE

QUEUE_FLAG_NO_SG_MERGE has been killed, so kill BLK_MQ_F_SG_MERGE too.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c       | 1 -
 drivers/block/loop.c         | 2 +-
 drivers/block/nbd.c          | 2 +-
 drivers/block/rbd.c          | 2 +-
 drivers/block/skd_main.c     | 1 -
 drivers/block/xen-blkfront.c | 2 +-
 drivers/md/dm-rq.c           | 2 +-
 drivers/mmc/core/queue.c     | 3 +--
 drivers/scsi/scsi_lib.c      | 2 +-
 include/linux/blk-mq.h       | 1 -
 10 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 697d6213c82b..c39247c5ddb6 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -249,7 +249,6 @@ static const char *const alloc_policy_name[] = {
 static const char *const hctx_flag_name[] = {
 	HCTX_FLAG_NAME(SHOULD_MERGE),
 	HCTX_FLAG_NAME(TAG_SHARED),
-	HCTX_FLAG_NAME(SG_MERGE),
 	HCTX_FLAG_NAME(BLOCKING),
 	HCTX_FLAG_NAME(NO_SCHED),
 };
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 8ef583197414..3d63ad036398 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1937,7 +1937,7 @@ static int loop_add(struct loop_device **l, int i)
 	lo->tag_set.queue_depth = 128;
 	lo->tag_set.numa_node = NUMA_NO_NODE;
 	lo->tag_set.cmd_size = sizeof(struct loop_cmd);
-	lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	lo->tag_set.driver_data = lo;
 
 	err = blk_mq_alloc_tag_set(&lo->tag_set);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 7c9a949e876b..32a7ba1674b7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1571,7 +1571,7 @@ static int nbd_dev_add(int index)
 	nbd->tag_set.numa_node = NUMA_NO_NODE;
 	nbd->tag_set.cmd_size = sizeof(struct nbd_cmd);
 	nbd->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
-		BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
+		BLK_MQ_F_BLOCKING;
 	nbd->tag_set.driver_data = nbd;
 
 	err = blk_mq_alloc_tag_set(&nbd->tag_set);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1e92b61d0bd5..abe9e1c89227 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3988,7 +3988,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
 	rbd_dev->tag_set.ops = &rbd_mq_ops;
 	rbd_dev->tag_set.queue_depth = rbd_dev->opts->queue_depth;
 	rbd_dev->tag_set.numa_node = NUMA_NO_NODE;
-	rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	rbd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	rbd_dev->tag_set.nr_hw_queues = 1;
 	rbd_dev->tag_set.cmd_size = sizeof(struct work_struct);
 
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index ab893a7571a2..7d3ad6c22ee5 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -2843,7 +2843,6 @@ static int skd_cons_disk(struct skd_device *skdev)
 		skdev->sgs_per_request * sizeof(struct scatterlist);
 	skdev->tag_set.numa_node = NUMA_NO_NODE;
 	skdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
-		BLK_MQ_F_SG_MERGE |
 		BLK_ALLOC_POLICY_TO_MQ_FLAG(BLK_TAG_ALLOC_FIFO);
 	skdev->tag_set.driver_data = skdev;
 	rc = blk_mq_alloc_tag_set(&skdev->tag_set);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 0ed4b200fa58..d43a5677ccbc 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -977,7 +977,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 	} else
 		info->tag_set.queue_depth = BLK_RING_SIZE(info);
 	info->tag_set.numa_node = NUMA_NO_NODE;
-	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	info->tag_set.cmd_size = sizeof(struct blkif_req);
 	info->tag_set.driver_data = info;
 
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 4eb5f8c56535..b2f8eb2365ee 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -527,7 +527,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 	md->tag_set->ops = &dm_mq_ops;
 	md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
 	md->tag_set->numa_node = md->numa_node_id;
-	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
 	md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
 	md->tag_set->driver_data = md;
 
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 35cc138b096d..cc19e71c71d4 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -410,8 +410,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
 	else
 		mq->tag_set.queue_depth = MMC_QUEUE_DEPTH;
 	mq->tag_set.numa_node = NUMA_NO_NODE;
-	mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE |
-			    BLK_MQ_F_BLOCKING;
+	mq->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
 	mq->tag_set.nr_hw_queues = 1;
 	mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
 	mq->tag_set.driver_data = mq;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6d65ac584eba..6cadbe945bdb 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1899,7 +1899,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 	shost->tag_set.queue_depth = shost->can_queue;
 	shost->tag_set.cmd_size = cmd_size;
 	shost->tag_set.numa_node = NUMA_NO_NODE;
-	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	shost->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 	shost->tag_set.flags |=
 		BLK_ALLOC_POLICY_TO_MQ_FLAG(shost->hostt->tag_alloc_policy);
 	shost->tag_set.driver_data = shost;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0e030f5f76b6..b0c814bcc7e3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -218,7 +218,6 @@ struct blk_mq_ops {
 enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
-	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_BLOCKING	= 1 << 5,
 	BLK_MQ_F_NO_SCHED	= 1 << 6,
 	BLK_MQ_F_ALLOC_POLICY_START_BIT = 8,
-- 
cgit v1.2.3


From 625239d4ad43590f6639737ee900884f7d801411 Mon Sep 17 00:00:00 2001
From: Loys Ollivier <lollivier@baylibre.com>
Date: Wed, 13 Feb 2019 16:09:28 +0100
Subject: gnss: add mtk receiver type support

Add an MTK (Mediatek) type to the "GNSS_TYPE" attribute.

Note that MTK receivers support a subset of NMEA 0183 with vendor
extensions.

Signed-off-by: Loys Ollivier <lollivier@baylibre.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/gnss/core.c  | 1 +
 include/linux/gnss.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
index 4291a0dd22aa..320cfca80d5f 100644
--- a/drivers/gnss/core.c
+++ b/drivers/gnss/core.c
@@ -334,6 +334,7 @@ static const char * const gnss_type_names[GNSS_TYPE_COUNT] = {
 	[GNSS_TYPE_NMEA]	= "NMEA",
 	[GNSS_TYPE_SIRF]	= "SiRF",
 	[GNSS_TYPE_UBX]		= "UBX",
+	[GNSS_TYPE_MTK]		= "MTK",
 };
 
 static const char *gnss_type_name(struct gnss_device *gdev)
diff --git a/include/linux/gnss.h b/include/linux/gnss.h
index 43546977098c..36968a0f33e8 100644
--- a/include/linux/gnss.h
+++ b/include/linux/gnss.h
@@ -22,6 +22,7 @@ enum gnss_type {
 	GNSS_TYPE_NMEA = 0,
 	GNSS_TYPE_SIRF,
 	GNSS_TYPE_UBX,
+	GNSS_TYPE_MTK,
 
 	GNSS_TYPE_COUNT
 };
-- 
cgit v1.2.3


From 8605a1366015afb0ec7dd486262d0b4088e3c75f Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Fri, 15 Feb 2019 14:04:57 +0530
Subject: drm/i915: enum port definition is moved into i915_drm.h

For the reusability of the enum port in other driver modules
(like mei_hdcp), enum port definition is moved from I915 local header
intel_display.h to drm/i915_drm.h

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
[danvet: Fix subject prefix.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550219730-17734-3-git-send-email-ramalingam.c@intel.com
---
 drivers/gpu/drm/i915/intel_display.h | 16 +---------------
 include/drm/i915_drm.h               | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h
index 4262452963b3..79203666fc62 100644
--- a/drivers/gpu/drm/i915/intel_display.h
+++ b/drivers/gpu/drm/i915/intel_display.h
@@ -26,6 +26,7 @@
 #define _INTEL_DISPLAY_H_
 
 #include <drm/drm_util.h>
+#include <drm/i915_drm.h>
 
 enum i915_gpio {
 	GPIOA,
@@ -150,21 +151,6 @@ enum plane_id {
 	for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
 		for_each_if((__crtc)->plane_ids_mask & BIT(__p))
 
-enum port {
-	PORT_NONE = -1,
-
-	PORT_A = 0,
-	PORT_B,
-	PORT_C,
-	PORT_D,
-	PORT_E,
-	PORT_F,
-
-	I915_MAX_PORTS
-};
-
-#define port_name(p) ((p) + 'A')
-
 /*
  * Ports identifier referenced from other drivers.
  * Expected to remain stable over time
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c44703f471b3..7523e9a7b6e2 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -100,4 +100,19 @@ extern struct resource intel_graphics_stolen_res;
 #define INTEL_GEN11_BSM_DW1	0xc4
 #define   INTEL_BSM_MASK	(-(1u << 20))
 
+enum port {
+	PORT_NONE = -1,
+
+	PORT_A = 0,
+	PORT_B,
+	PORT_C,
+	PORT_D,
+	PORT_E,
+	PORT_F,
+
+	I915_MAX_PORTS
+};
+
+#define port_name(p) ((p) + 'A')
+
 #endif				/* _I915_DRM_H_ */
-- 
cgit v1.2.3


From 1626eab70ebc61d015e69a4bc3479d9228539343 Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Fri, 15 Feb 2019 14:04:58 +0530
Subject: drm/i915: header for i915 - MEI_HDCP interface

Header defines the interface for the I915 and MEI_HDCP drivers.
This interface is specific to the usage of mei_hdcp from gen9+
platforms for ME FW based HDCP2.2 services.

And Generic HDCP2.2 protocol specific definitions
are added at drm/drm_hdcp.h.

v2:
  Commit msg is enhanced [Daniel]
v3:
  i915_hdcp_comp_master is defined.

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> [v2]
Reviewed-by: Uma Shankar <uma.shankar@intel.com> [v2]
[danvet: Fix subject to drm/i915.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550219730-17734-4-git-send-email-ramalingam.c@intel.com
---
 include/drm/i915_mei_hdcp_interface.h | 149 ++++++++++++++++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 include/drm/i915_mei_hdcp_interface.h

(limited to 'include')

diff --git a/include/drm/i915_mei_hdcp_interface.h b/include/drm/i915_mei_hdcp_interface.h
new file mode 100644
index 000000000000..8c344255146a
--- /dev/null
+++ b/include/drm/i915_mei_hdcp_interface.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: (GPL-2.0+) */
+/*
+ * Copyright © 2017-2018 Intel Corporation
+ *
+ * Authors:
+ * Ramalingam C <ramalingam.c@intel.com>
+ */
+
+#ifndef _I915_MEI_HDCP_INTERFACE_H_
+#define _I915_MEI_HDCP_INTERFACE_H_
+
+#include <linux/mutex.h>
+#include <linux/device.h>
+#include <drm/drm_hdcp.h>
+#include <drm/i915_drm.h>
+
+/**
+ * enum hdcp_port_type - HDCP port implementation type defined by ME FW
+ * @HDCP_PORT_TYPE_INVALID: Invalid hdcp port type
+ * @HDCP_PORT_TYPE_INTEGRATED: In-Host HDCP2.x port
+ * @HDCP_PORT_TYPE_LSPCON: HDCP2.2 discrete wired Tx port with LSPCON
+ *			   (HDMI 2.0) solution
+ * @HDCP_PORT_TYPE_CPDP: HDCP2.2 discrete wired Tx port using the CPDP (DP 1.3)
+ *			 solution
+ */
+enum hdcp_port_type {
+	HDCP_PORT_TYPE_INVALID,
+	HDCP_PORT_TYPE_INTEGRATED,
+	HDCP_PORT_TYPE_LSPCON,
+	HDCP_PORT_TYPE_CPDP
+};
+
+/**
+ * enum hdcp_wired_protocol - HDCP adaptation used on the port
+ * @HDCP_PROTOCOL_INVALID: Invalid HDCP adaptation protocol
+ * @HDCP_PROTOCOL_HDMI: HDMI adaptation of HDCP used on the port
+ * @HDCP_PROTOCOL_DP: DP adaptation of HDCP used on the port
+ */
+enum hdcp_wired_protocol {
+	HDCP_PROTOCOL_INVALID,
+	HDCP_PROTOCOL_HDMI,
+	HDCP_PROTOCOL_DP
+};
+
+/**
+ * struct hdcp_port_data - intel specific HDCP port data
+ * @port: port index as per I915
+ * @port_type: HDCP port type as per ME FW classification
+ * @protocol: HDCP adaptation as per ME FW
+ * @k: No of streams transmitted on a port. Only on DP MST this is != 1
+ * @seq_num_m: Count of RepeaterAuth_Stream_Manage msg propagated.
+ *	       Initialized to 0 on AKE_INIT. Incremented after every successful
+ *	       transmission of RepeaterAuth_Stream_Manage message. When it rolls
+ *	       over re-Auth has to be triggered.
+ * @streams: struct hdcp2_streamid_type[k]. Defines the type and id for the
+ *	     streams
+ */
+struct hdcp_port_data {
+	enum port port;
+	u8 port_type;
+	u8 protocol;
+	u16 k;
+	u32 seq_num_m;
+	struct hdcp2_streamid_type *streams;
+};
+
+/**
+ * struct i915_hdcp_component_ops- ops for HDCP2.2 services.
+ * @owner: Module providing the ops
+ * @initiate_hdcp2_session: Initiate a Wired HDCP2.2 Tx Session.
+ *			    And Prepare AKE_Init.
+ * @verify_receiver_cert_prepare_km: Verify the Receiver Certificate
+ *				     AKE_Send_Cert and prepare
+				     AKE_Stored_Km/AKE_No_Stored_Km
+ * @verify_hprime: Verify AKE_Send_H_prime
+ * @store_pairing_info: Store pairing info received
+ * @initiate_locality_check: Prepare LC_Init
+ * @verify_lprime: Verify lprime
+ * @get_session_key: Prepare SKE_Send_Eks
+ * @repeater_check_flow_prepare_ack: Validate the Downstream topology
+ *				     and prepare rep_ack
+ * @verify_mprime: Verify mprime
+ * @enable_hdcp_authentication:  Mark a port as authenticated.
+ * @close_hdcp_session: Close the Wired HDCP Tx session per port.
+ *			This also disables the authenticated state of the port.
+ */
+struct i915_hdcp_component_ops {
+	/**
+	 * @owner: mei_hdcp module
+	 */
+	struct module *owner;
+
+	int (*initiate_hdcp2_session)(struct device *dev,
+				      struct hdcp_port_data *data,
+				      struct hdcp2_ake_init *ake_data);
+	int (*verify_receiver_cert_prepare_km)(struct device *dev,
+					       struct hdcp_port_data *data,
+					       struct hdcp2_ake_send_cert
+								*rx_cert,
+					       bool *km_stored,
+					       struct hdcp2_ake_no_stored_km
+								*ek_pub_km,
+					       size_t *msg_sz);
+	int (*verify_hprime)(struct device *dev,
+			     struct hdcp_port_data *data,
+			     struct hdcp2_ake_send_hprime *rx_hprime);
+	int (*store_pairing_info)(struct device *dev,
+				  struct hdcp_port_data *data,
+				  struct hdcp2_ake_send_pairing_info
+								*pairing_info);
+	int (*initiate_locality_check)(struct device *dev,
+				       struct hdcp_port_data *data,
+				       struct hdcp2_lc_init *lc_init_data);
+	int (*verify_lprime)(struct device *dev,
+			     struct hdcp_port_data *data,
+			     struct hdcp2_lc_send_lprime *rx_lprime);
+	int (*get_session_key)(struct device *dev,
+			       struct hdcp_port_data *data,
+			       struct hdcp2_ske_send_eks *ske_data);
+	int (*repeater_check_flow_prepare_ack)(struct device *dev,
+					       struct hdcp_port_data *data,
+					       struct hdcp2_rep_send_receiverid_list
+								*rep_topology,
+					       struct hdcp2_rep_send_ack
+								*rep_send_ack);
+	int (*verify_mprime)(struct device *dev,
+			     struct hdcp_port_data *data,
+			     struct hdcp2_rep_stream_ready *stream_ready);
+	int (*enable_hdcp_authentication)(struct device *dev,
+					  struct hdcp_port_data *data);
+	int (*close_hdcp_session)(struct device *dev,
+				  struct hdcp_port_data *data);
+};
+
+/**
+ * struct i915_hdcp_component_master - Used for communication between i915
+ * and mei_hdcp drivers for the HDCP2.2 services
+ * @mei_dev: device that provide the HDCP2.2 service from MEI Bus.
+ * @hdcp_ops: Ops implemented by mei_hdcp driver, used by i915 driver.
+ */
+struct i915_hdcp_comp_master {
+	struct device *mei_dev;
+	const struct i915_hdcp_component_ops *ops;
+
+	/* To protect the above members. */
+	struct mutex mutex;
+};
+
+#endif /* _I915_MEI_HDCP_INTERFACE_H_ */
-- 
cgit v1.2.3


From 1bf7cb4d8f3d1f7388b230903cae35dfb43901d3 Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Fri, 15 Feb 2019 14:05:00 +0530
Subject: drm/i915: MEI interface definition

Defining the mei-i915 interface functions and initialization of
the interface.

v2:
  Adjust to the new interface changes. [Tomas]
  Added further debug logs for the failures at MEI i/f.
  port in hdcp_port data is equipped to handle -ve values.
v3:
  mei comp is matched for global i915 comp master. [Daniel]
  In hdcp_shim hdcp_protocol() is replaced with const variable. [Daniel]
  mei wrappers are adjusted as per the i/f change [Daniel]
v4:
  port initialization is done only at hdcp2_init only [Danvet]
v5:
  I915 registers a subcomponent to be matched with mei_hdcp [Daniel]
v6:
  HDCP_disable for all connectors incase of comp_unbind.
  Tear down HDCP comp interface at i915_unload [Daniel]
v7:
  Component init and fini are moved out of connector ops [Daniel]
  hdcp_disable is not called from unbind. [Daniel]

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> [v11]
[danvet: For the topic/mei-hdcp shared branch drop everything but the
header change needed by both drm/i915 and mei-hdcp. Also drop the no
longer needed device.h include.]
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550219730-17734-6-git-send-email-ramalingam.c@intel.com
---
 include/drm/i915_component.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h
index 72fbb037f9b3..dcb95bd9dee6 100644
--- a/include/drm/i915_component.h
+++ b/include/drm/i915_component.h
@@ -28,6 +28,7 @@
 
 enum i915_component_type {
 	I915_COMPONENT_AUDIO = 1,
+	I915_COMPONENT_HDCP,
 };
 
 /* MAX_PORT is the number of port
-- 
cgit v1.2.3


From 32097060189bf215439e719c5df514399235c52e Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Fri, 15 Feb 2019 14:05:04 +0530
Subject: drm: helper functions for hdcp2 seq_num to from u32

Library functions for endianness are aligned for 16/32/64 bits.
But hdcp sequence numbers are 24bits(big endian).
So for their conversion to and from u32 helper functions are developed.

v2:
  Comment is updated. [Daniel]
  Reviewed-by Uma.

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550219730-17734-10-git-send-email-ramalingam.c@intel.com
---
 include/drm/drm_hdcp.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index a6de09c5e47f..c21682f76cd3 100644
--- a/include/drm/drm_hdcp.h
+++ b/include/drm/drm_hdcp.h
@@ -250,4 +250,22 @@ struct hdcp2_dp_errata_stream_type {
 #define HDCP_2_2_HDMI_RXSTATUS_READY(x)		((x) & BIT(2))
 #define HDCP_2_2_HDMI_RXSTATUS_REAUTH_REQ(x)	((x) & BIT(3))
 
+/*
+ * Helper functions to convert 24bit big endian hdcp sequence number to
+ * host format and back
+ */
+static inline
+u32 drm_hdcp2_seq_num_to_u32(u8 seq_num[HDCP_2_2_SEQ_NUM_LEN])
+{
+	return (u32)(seq_num[2] | seq_num[1] << 8 | seq_num[0] << 16);
+}
+
+static inline
+void drm_hdcp2_u32_to_seq_num(u8 seq_num[HDCP_2_2_SEQ_NUM_LEN], u32 val)
+{
+	seq_num[0] = val >> 16;
+	seq_num[1] = val >> 8;
+	seq_num[2] = val;
+}
+
 #endif
-- 
cgit v1.2.3


From 3d9dfd060391928bd615db62ecddea5e1255edfd Mon Sep 17 00:00:00 2001
From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Date: Thu, 7 Feb 2019 18:44:47 +0200
Subject: IB/uverbs: Add ib_ucontext to uverbs_attr_bundle sent from ioctl and
 cmd flows

Add ib_ucontext to the uverbs_attr_bundle sent down the iocl and cmd flows
as soon as the flow has ib_uobject.

In addition, remove rdma_get_ucontext helper function that is only used by
ib_umem_get.

Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/rdma_core.c    | 32 ++++++++++++++++++++++++++++++++
 drivers/infiniband/core/umem.c         | 10 +++++++---
 drivers/infiniband/core/uverbs_cmd.c   |  2 +-
 drivers/infiniband/core/uverbs_ioctl.c |  3 +++
 drivers/infiniband/core/uverbs_main.c  | 25 +------------------------
 include/rdma/ib_verbs.h                |  2 --
 include/rdma/uverbs_ioctl.h            |  1 +
 include/rdma/uverbs_std_types.h        | 18 +++++++++++++-----
 8 files changed, 58 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a260d2f8e0b7..96f919fe86e7 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -438,6 +438,38 @@ free:
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
 }
+struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type,
+				  u32 object_id,
+				  struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj;
+
+	uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
+				       object_id, UVERBS_LOOKUP_READ);
+	if (IS_ERR(uobj))
+		return uobj;
+
+	attrs->context = uobj->context;
+
+	return uobj;
+}
+
+struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type,
+				   u32 object_id,
+				   struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj;
+
+	uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
+				       object_id, UVERBS_LOOKUP_WRITE);
+
+	if (IS_ERR(uobj))
+		return uobj;
+
+	attrs->context = uobj->context;
+
+	return uobj;
+}
 
 static struct ib_uobject *
 alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index b69d3efa8712..fe5551562dbc 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -96,9 +96,13 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	struct scatterlist *sg, *sg_list_start;
 	unsigned int gup_flags = FOLL_WRITE;
 
-	context = rdma_get_ucontext(udata);
-	if (IS_ERR(context))
-		return ERR_CAST(context);
+	if (!udata)
+		return ERR_PTR(-EIO);
+
+	context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
+			  ->context;
+	if (!context)
+		return ERR_PTR(-EIO);
 
 	if (dmasync)
 		dma_attrs |= DMA_ATTR_WRITE_BARRIER;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5ac143f22df0..c2e7733a7fe0 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2634,7 +2634,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
 }
 EXPORT_SYMBOL(flow_resources_add);
 
-static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
+static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
 				       struct ib_uverbs_flow_spec *kern_spec,
 				       union ib_flow_spec *ib_spec,
 				       struct ib_uflow_resources *uflow_res)
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 0ca04d224015..e1379949e663 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -213,6 +213,7 @@ static int uverbs_process_idrs_array(struct bundle_priv *pbundle,
 			ret = PTR_ERR(attr->uobjects[i]);
 			break;
 		}
+		pbundle->bundle.context = attr->uobjects[i]->context;
 	}
 
 	attr->len = i;
@@ -330,6 +331,7 @@ static int uverbs_process_attr(struct bundle_priv *pbundle,
 					uattr->data_s64);
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
+		pbundle->bundle.context = o_attr->uobject->context;
 		__set_bit(attr_bkey, pbundle->uobj_finalize);
 
 		if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
@@ -592,6 +594,7 @@ static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
 	pbundle->method_elm = method_elm;
 	pbundle->method_key = attrs_iter.index;
 	pbundle->bundle.ufile = ufile;
+	pbundle->bundle.context = NULL; /* only valid if bundle has uobject */
 	pbundle->radix = &uapi->radix;
 	pbundle->radix_slots = slot;
 	pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index accc61cc93ac..70b7d80431a9 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -101,30 +101,6 @@ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
 }
 EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
 
-/* rdma_get_ucontext - Return the ucontext from a udata
- * @udata: The udata to get the context from
- *
- * This can only be called from within a uapi method that was passed ib_udata
- * as a parameter. It returns the ucontext associated with the udata, or ERR_PTR
- * if the udata is NULL or the ucontext has been disassociated.
- */
-struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata)
-{
-	if (!udata)
-		return ERR_PTR(-EIO);
-
-	/*
-	 * FIXME: Really all cases that get here with a udata will have
-	 * already called ib_uverbs_get_ucontext_file, or located a uobject
-	 * that points to a ucontext. We could store that result in the udata
-	 * so this function can't fail.
-	 */
-	return ib_uverbs_get_ucontext_file(
-		container_of(udata, struct uverbs_attr_bundle, driver_udata)
-			->ufile);
-}
-EXPORT_SYMBOL(rdma_get_ucontext);
-
 int uverbs_dealloc_mw(struct ib_mw *mw)
 {
 	struct ib_pd *pd = mw->pd;
@@ -719,6 +695,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 
 	memset(bundle.attr_present, 0, sizeof(bundle.attr_present));
 	bundle.ufile = file;
+	bundle.context = NULL; /* only valid if bundle has uobject */
 	if (!method_elm->is_ex) {
 		size_t in_len = hdr.in_words * 4 - sizeof(hdr);
 		size_t out_len = hdr.out_words * 4;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 135fab2c016c..64ee7c08be22 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4247,8 +4247,6 @@ void rdma_roce_rescan_device(struct ib_device *ibdev);
 
 struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile);
 
-struct ib_ucontext *rdma_get_ucontext(struct ib_udata *udata);
-
 int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs);
 
 struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index 27da906beea7..b14a9ee786e9 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -652,6 +652,7 @@ struct uverbs_attr_bundle {
 	struct ib_udata driver_udata;
 	struct ib_udata ucore;
 	struct ib_uverbs_file *ufile;
+	struct ib_ucontext *context;
 	DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
 	struct uverbs_attr attrs[];
 };
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 883abcf6d36e..794c47565971 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -48,9 +48,12 @@
 #define uobj_get_type(_attrs, _object)                                         \
 	uapi_get_object((_attrs)->ufile->device->uapi, _object)
 
+struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type,
+				  u32 object_id,
+				  struct uverbs_attr_bundle *attrs);
+
 #define uobj_get_read(_type, _id, _attrs)                                      \
-	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
-				_uobj_check_id(_id), UVERBS_LOOKUP_READ)
+	_uobj_get_read(_type, _uobj_check_id(_id), _attrs)
 
 #define ufd_get_read(_type, _fdnum, _attrs)                                    \
 	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
@@ -67,9 +70,12 @@ static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 	((struct ib_##_object *)_uobj_get_obj_read(                            \
 		uobj_get_read(_type, _id, _attrs)))
 
+struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type,
+				   u32 object_id,
+				   struct uverbs_attr_bundle *attrs);
+
 #define uobj_get_write(_type, _id, _attrs)                                     \
-	rdma_lookup_get_uobject(uobj_get_type(_attrs, _type), (_attrs)->ufile, \
-				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
+	_uobj_get_write(_type, _uobj_check_id(_id), _attrs)
 
 int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
 			   const struct uverbs_attr_bundle *attrs);
@@ -123,8 +129,10 @@ __uobj_alloc(const struct uverbs_api_object *obj,
 {
 	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, attrs->ufile);
 
-	if (!IS_ERR(uobj))
+	if (!IS_ERR(uobj)) {
 		*ib_dev = uobj->context->device;
+		attrs->context = uobj->context;
+	}
 	return uobj;
 }
 
-- 
cgit v1.2.3


From 730623f4a56fa42d4559715ff2f4a5c32b3ae8bf Mon Sep 17 00:00:00 2001
From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Date: Thu, 7 Feb 2019 18:44:48 +0200
Subject: IB/verbs: Add helper function rdma_udata_to_drv_context

Helper function to get driver's context out of ib_udata wrapped in
uverbs_attr_bundle for user objects or NULL for kernel objects.

Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/rdma/uverbs_ioctl.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index b14a9ee786e9..28570ac2b6a0 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -664,6 +664,23 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
 			attrs_bundle->attr_present);
 }
 
+/**
+ * rdma_udata_to_drv_context - Helper macro to get the driver's context out of
+ *                             ib_udata which is embedded in uverbs_attr_bundle.
+ *
+ * If udata is not NULL this cannot fail. Otherwise a NULL udata will result
+ * in a NULL ucontext pointer, as a safety precaution. Callers should be using
+ * 'udata' to determine if the driver call is in user or kernel mode, not
+ * 'ucontext'.
+ *
+ */
+#define rdma_udata_to_drv_context(udata, drv_dev_struct, member)               \
+	(udata ? container_of(container_of(udata, struct uverbs_attr_bundle,   \
+					   driver_udata)                       \
+				      ->context,                               \
+			      drv_dev_struct, member) :                        \
+		 (drv_dev_struct *)NULL)
+
 #define IS_UVERBS_COPY_ERR(_ret)		((_ret) && (_ret) != -ENOENT)
 
 static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
-- 
cgit v1.2.3


From 89944450547334aa6655e0cd4aec8df1897a205a Mon Sep 17 00:00:00 2001
From: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Date: Thu, 7 Feb 2019 18:44:49 +0200
Subject: IB/{hw,sw}: Remove 'uobject->context' dependency in object creation
 APIs

Now when we have the udata passed to all the ib_xxx object creation APIs
and the additional macro 'rdma_udata_to_drv_context' to get the
ib_ucontext from ib_udata stored in uverbs_attr_bundle, we can finally
start to remove the dependency of the drivers in the
ib_xxx->uobject->context.

Signed-off-by: Shamir Rabinovitch <shamir.rabinovitch@oracle.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c     | 20 ++++----
 drivers/infiniband/hw/cxgb3/iwch_provider.c  |  4 +-
 drivers/infiniband/hw/cxgb4/qp.c             |  9 ++--
 drivers/infiniband/hw/hns/hns_roce_qp.c      | 21 ++++----
 drivers/infiniband/hw/i40iw/i40iw_verbs.c    |  9 ++--
 drivers/infiniband/hw/mlx4/mr.c              | 12 ++---
 drivers/infiniband/hw/mlx4/qp.c              | 72 +++++++++++++++-------------
 drivers/infiniband/hw/mlx4/srq.c             |  9 ++--
 drivers/infiniband/hw/mlx5/devx.c            | 21 ++++----
 drivers/infiniband/hw/mlx5/qp.c              | 58 ++++++++++++----------
 drivers/infiniband/hw/mlx5/srq.c             |  8 ++--
 drivers/infiniband/hw/mthca/mthca_provider.c | 20 ++++----
 drivers/infiniband/hw/mthca/mthca_qp.c       | 13 +++--
 drivers/infiniband/hw/mthca/mthca_srq.c      | 21 ++++----
 drivers/infiniband/hw/nes/nes_verbs.c        | 12 ++---
 drivers/infiniband/hw/qedr/verbs.c           |  2 +-
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c |  5 +-
 drivers/infiniband/sw/rdmavt/qp.c            |  5 +-
 drivers/infiniband/sw/rdmavt/srq.c           |  5 +-
 drivers/infiniband/sw/rdmavt/vt.c            |  4 --
 drivers/infiniband/sw/rxe/rxe_qp.c           |  8 ++--
 drivers/infiniband/sw/rxe/rxe_verbs.c        |  6 ++-
 include/rdma/rdma_vt.h                       |  5 ++
 23 files changed, 194 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2ed778683c6b..83bf6f5d67c0 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -48,6 +48,7 @@
 #include <rdma/ib_addr.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "bnxt_ulp.h"
 
@@ -722,12 +723,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
 
 	/* Write AVID to shared page. */
 	if (udata) {
-		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
-		struct bnxt_re_ucontext *uctx;
+		struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
+			udata, struct bnxt_re_ucontext, ib_uctx);
 		unsigned long flag;
 		u32 *wrptr;
 
-		uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
 		spin_lock_irqsave(&uctx->sh_lock, flag);
 		wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
 		*wrptr = ah->qplib_ah.id;
@@ -872,10 +872,9 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
 	struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
 	struct ib_umem *umem;
 	int bytes = 0, psn_sz;
-	struct ib_ucontext *context = pd->ib_pd.uobject->context;
-	struct bnxt_re_ucontext *cntx = container_of(context,
-						     struct bnxt_re_ucontext,
-						     ib_uctx);
+	struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
+		udata, struct bnxt_re_ucontext, ib_uctx);
+
 	if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
 		return -EFAULT;
 
@@ -1359,10 +1358,9 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev,
 	struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
 	struct ib_umem *umem;
 	int bytes = 0;
-	struct ib_ucontext *context = pd->ib_pd.uobject->context;
-	struct bnxt_re_ucontext *cntx = container_of(context,
-						     struct bnxt_re_ucontext,
-						     ib_uctx);
+	struct bnxt_re_ucontext *cntx = rdma_udata_to_drv_context(
+		udata, struct bnxt_re_ucontext, ib_uctx);
+
 	if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
 		return -EFAULT;
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 80dff6804e48..b74fd90a22dc 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -53,6 +53,7 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "cxio_hal.h"
 #include "iwch.h"
@@ -825,7 +826,8 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
 	 * Kernel users need more wq space for fastreg WRs which can take
 	 * 2 WR fragments.
 	 */
-	ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL;
+	ucontext = rdma_udata_to_drv_context(udata, struct iwch_ucontext,
+					     ibucontext);
 	if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
 		wqsize = roundup_pow_of_two(rqsize +
 				roundup_pow_of_two(attrs->cap.max_send_wr * 2));
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 0fe87b9c1e10..2509f65f4420 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/module.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "iw_cxgb4.h"
 
@@ -2136,7 +2137,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	struct c4iw_cq *rchp;
 	struct c4iw_create_qp_resp uresp;
 	unsigned int sqsize, rqsize = 0;
-	struct c4iw_ucontext *ucontext;
+	struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct c4iw_ucontext, ibucontext);
 	int ret;
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
 	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
@@ -2170,8 +2172,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	if (sqsize < 8)
 		sqsize = 8;
 
-	ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
-
 	qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
 	if (!qhp)
 		return ERR_PTR(-ENOMEM);
@@ -2718,7 +2718,8 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
 	rqsize = attrs->attr.max_wr + 1;
 	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
 
-	ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+	ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
+					     ibucontext);
 
 	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
 	if (!srq)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 728ad8b6de2f..57c76eafef2f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -35,6 +35,7 @@
 #include <linux/platform_device.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
 #include "hns_roce_common.h"
 #include "hns_roce_device.h"
 #include "hns_roce_hem.h"
@@ -560,6 +561,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_ib_create_qp ucmd;
 	struct hns_roce_ib_create_qp_resp resp = {};
+	struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(
+		udata, struct hns_roce_ucontext, ibucontext);
 	unsigned long qpn = 0;
 	int ret = 0;
 	u32 page_shift;
@@ -670,9 +673,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		    (udata->inlen >= sizeof(ucmd)) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_sq(init_attr)) {
-			ret = hns_roce_db_map_user(
-				to_hr_ucontext(ib_pd->uobject->context), udata,
-				ucmd.sdb_addr, &hr_qp->sdb);
+			ret = hns_roce_db_map_user(uctx, udata, ucmd.sdb_addr,
+						   &hr_qp->sdb);
 			if (ret) {
 				dev_err(dev, "sq record doorbell map failed!\n");
 				goto err_mtt;
@@ -686,9 +688,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_rq(init_attr)) {
-			ret = hns_roce_db_map_user(
-				to_hr_ucontext(ib_pd->uobject->context), udata,
-				ucmd.db_addr, &hr_qp->rdb);
+			ret = hns_roce_db_map_user(uctx, udata, ucmd.db_addr,
+						   &hr_qp->rdb);
 			if (ret) {
 				dev_err(dev, "rq record doorbell map failed!\n");
 				goto err_sq_dbmap;
@@ -838,9 +839,7 @@ err_wrid:
 		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_rq(init_attr))
-			hns_roce_db_unmap_user(
-					to_hr_ucontext(ib_pd->uobject->context),
-					&hr_qp->rdb);
+			hns_roce_db_unmap_user(uctx, &hr_qp->rdb);
 	} else {
 		kfree(hr_qp->sq.wrid);
 		kfree(hr_qp->rq.wrid);
@@ -852,9 +851,7 @@ err_sq_dbmap:
 		    (udata->inlen >= sizeof(ucmd)) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_sq(init_attr))
-			hns_roce_db_unmap_user(
-					to_hr_ucontext(ib_pd->uobject->context),
-					&hr_qp->sdb);
+			hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
 
 err_mtt:
 	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index d5fb2b927587..76b4d1218696 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -45,6 +45,7 @@
 #include <rdma/iw_cm.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
 #include "i40iw.h"
 
 /**
@@ -556,7 +557,8 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
 	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
 	struct i40iw_cqp *iwcqp = &iwdev->cqp;
 	struct i40iw_qp *iwqp;
-	struct i40iw_ucontext *ucontext;
+	struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct i40iw_ucontext, ibucontext);
 	struct i40iw_create_qp_req req;
 	struct i40iw_create_qp_resp uresp;
 	u32 qp_num = 0;
@@ -665,7 +667,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
 		}
 		iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
 		iwqp->user_mode = 1;
-		ucontext = to_ucontext(ibpd->uobject->context);
 
 		if (req.user_wqe_buffers) {
 			struct i40iw_pbl *iwpbl;
@@ -1819,7 +1820,8 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
 {
 	struct i40iw_pd *iwpd = to_iwpd(pd);
 	struct i40iw_device *iwdev = to_iwdev(pd->device);
-	struct i40iw_ucontext *ucontext;
+	struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct i40iw_ucontext, ibucontext);
 	struct i40iw_pble_alloc *palloc;
 	struct i40iw_pbl *iwpbl;
 	struct i40iw_mr *iwmr;
@@ -1860,7 +1862,6 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
 	iwmr->region = region;
 	iwmr->ibmr.pd = pd;
 	iwmr->ibmr.device = pd->device;
-	ucontext = to_ucontext(pd->uobject->context);
 
 	iwmr->page_size = PAGE_SIZE;
 	iwmr->page_msk = PAGE_MASK;
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 56639ecd53ad..395379a480cb 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -367,8 +367,7 @@ end:
 	return block_shift;
 }
 
-static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context,
-					struct ib_udata *udata, u64 start,
+static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start,
 					u64 length, u64 virt_addr,
 					int access_flags)
 {
@@ -416,8 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	mr->umem = mlx4_get_umem_mr(pd->uobject->context, udata, start, length,
-				    virt_addr, access_flags);
+	mr->umem =
+		mlx4_get_umem_mr(udata, start, length, virt_addr, access_flags);
 	if (IS_ERR(mr->umem)) {
 		err = PTR_ERR(mr->umem);
 		goto err_free;
@@ -506,9 +505,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
 		ib_umem_release(mmr->umem);
-		mmr->umem =
-			mlx4_get_umem_mr(mr->uobject->context, udata, start,
-					 length, virt_addr, mr_access_flags);
+		mmr->umem = mlx4_get_umem_mr(udata, start, length, virt_addr,
+					     mr_access_flags);
 		if (IS_ERR(mmr->umem)) {
 			err = PTR_ERR(mmr->umem);
 			/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index e38bab50cecf..429a59c5801c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -41,6 +41,7 @@
 #include <rdma/ib_pack.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_mad.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/qp.h>
@@ -52,7 +53,8 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
 			     struct mlx4_ib_cq *recv_cq);
 static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
 			       struct mlx4_ib_cq *recv_cq);
-static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state);
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
+			      struct ib_udata *udata);
 
 enum {
 	MLX4_IB_ACK_REQ_FREQ	= 8,
@@ -863,6 +865,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	int err;
 	struct mlx4_ib_sqp *sqp = NULL;
 	struct mlx4_ib_qp *qp;
+	struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context(
+		udata, struct mlx4_ib_ucontext, ibucontext);
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
 	struct mlx4_ib_cq *mcq;
 	unsigned long flags;
@@ -1038,7 +1042,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 
 		if (qp_has_rq(init_attr)) {
 			err = mlx4_ib_db_map_user(
-				to_mucontext(pd->uobject->context), udata,
+				context, udata,
 				(src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr :
 							  ucmd.wq.db_addr,
 				&qp->db);
@@ -1112,8 +1116,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			}
 		}
 	} else if (src == MLX4_IB_RWQ_SRC) {
-		err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp,
-					range_size, &qpn);
+		err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn);
 		if (err)
 			goto err_wrid;
 	} else {
@@ -1184,8 +1187,7 @@ err_qpn:
 		if (qp->flags & MLX4_IB_QP_NETIF)
 			mlx4_ib_steer_qp_free(dev, qpn, 1);
 		else if (src == MLX4_IB_RWQ_SRC)
-			mlx4_ib_release_wqn(to_mucontext(pd->uobject->context),
-					    qp, 0);
+			mlx4_ib_release_wqn(context, qp, 0);
 		else
 			mlx4_qp_release_range(dev->dev, qpn, 1);
 	}
@@ -1195,7 +1197,7 @@ err_proxy:
 err_wrid:
 	if (udata) {
 		if (qp_has_rq(init_attr))
-			mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
+			mlx4_ib_db_unmap_user(context, &qp->db);
 	} else {
 		kvfree(qp->sq.wrid);
 		kvfree(qp->rq.wrid);
@@ -1942,7 +1944,8 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
  * Go over all RSS QP's childes (WQs) and apply their HW state according to
  * their logic state if the RSS QP is the first RSS QP associated for the WQ.
  */
-static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
+static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num,
+			    struct ib_udata *udata)
 {
 	int err = 0;
 	int i;
@@ -1966,7 +1969,7 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
 		}
 		wq->port = port_num;
 		if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) {
-			err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY);
+			err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY, udata);
 			if (err) {
 				mutex_unlock(&wq->mutex);
 				break;
@@ -1988,7 +1991,8 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
 
 			if ((wq->rss_usecnt == 1) &&
 			    (ibwq->state == IB_WQS_RDY))
-				if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+				if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET,
+						       udata))
 					pr_warn("failed to reverse WQN=0x%06x\n",
 						ibwq->wq_num);
 			wq->rss_usecnt--;
@@ -2000,7 +2004,8 @@ static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
 	return err;
 }
 
-static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
+static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl,
+				struct ib_udata *udata)
 {
 	int i;
 
@@ -2011,7 +2016,7 @@ static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
 		mutex_lock(&wq->mutex);
 
 		if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY))
-			if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+			if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET, udata))
 				pr_warn("failed to reverse WQN=%x\n",
 					ibwq->wq_num);
 		wq->rss_usecnt--;
@@ -2043,9 +2048,10 @@ static void fill_qp_rss_context(struct mlx4_qp_context *context,
 
 static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 			       const struct ib_qp_attr *attr, int attr_mask,
-			       enum ib_qp_state cur_state, enum ib_qp_state new_state)
+			       enum ib_qp_state cur_state,
+			       enum ib_qp_state new_state,
+			       struct ib_udata *udata)
 {
-	struct ib_uobject *ibuobject;
 	struct ib_srq  *ibsrq;
 	const struct ib_gid_attr *gid_attr = NULL;
 	struct ib_rwq_ind_table *rwq_ind_tbl;
@@ -2054,6 +2060,8 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	struct mlx4_ib_qp *qp;
 	struct mlx4_ib_pd *pd;
 	struct mlx4_ib_cq *send_cq, *recv_cq;
+	struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx4_ib_ucontext, ibucontext);
 	struct mlx4_qp_context *context;
 	enum mlx4_qp_optpar optpar = 0;
 	int sqd_event;
@@ -2065,7 +2073,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 		struct ib_wq *ibwq;
 
 		ibwq	    = (struct ib_wq *)src;
-		ibuobject   = ibwq->uobject;
 		ibsrq	    = NULL;
 		rwq_ind_tbl = NULL;
 		qp_type     = IB_QPT_RAW_PACKET;
@@ -2076,7 +2083,6 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 		struct ib_qp *ibqp;
 
 		ibqp	    = (struct ib_qp *)src;
-		ibuobject   = ibqp->uobject;
 		ibsrq	    = ibqp->srq;
 		rwq_ind_tbl = ibqp->rwq_ind_tbl;
 		qp_type     = ibqp->qp_type;
@@ -2161,11 +2167,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 			context->param3 |= cpu_to_be32(1 << 30);
 	}
 
-	if (ibuobject)
+	if (ucontext)
 		context->usr_page = cpu_to_be32(
-			mlx4_to_hw_uar_index(dev->dev,
-					     to_mucontext(ibuobject->context)
-					     ->uar.index));
+			mlx4_to_hw_uar_index(dev->dev, ucontext->uar.index));
 	else
 		context->usr_page = cpu_to_be32(
 			mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -2297,7 +2301,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
 
 	/* Set "fast registration enabled" for all kernel QPs */
-	if (!ibuobject)
+	if (!ucontext)
 		context->params1 |= cpu_to_be32(1 << 11);
 
 	if (attr_mask & IB_QP_RNR_RETRY) {
@@ -2434,7 +2438,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	else
 		sqd_event = 0;
 
-	if (!ibuobject &&
+	if (!ucontext &&
 	    cur_state == IB_QPS_RESET &&
 	    new_state == IB_QPS_INIT)
 		context->rlkey_roce_mode |= (1 << 4);
@@ -2445,7 +2449,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	 * headroom is stamped so that the hardware doesn't start
 	 * processing stale work requests.
 	 */
-	if (!ibuobject &&
+	if (!ucontext &&
 	    cur_state == IB_QPS_RESET &&
 	    new_state == IB_QPS_INIT) {
 		struct mlx4_wqe_ctrl_seg *ctrl;
@@ -2509,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	 * entries and reinitialize the QP.
 	 */
 	if (new_state == IB_QPS_RESET) {
-		if (!ibuobject) {
+		if (!ucontext) {
 			mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
 					 ibsrq ? to_msrq(ibsrq) : NULL);
 			if (send_cq != recv_cq)
@@ -2735,16 +2739,17 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	}
 
 	if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) {
-		err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num);
+		err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num,
+				       udata);
 		if (err)
 			goto out;
 	}
 
 	err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask,
-				  cur_state, new_state);
+				  cur_state, new_state, udata);
 
 	if (ibqp->rwq_ind_tbl && err)
-		bring_down_rss_rwqs(ibqp->rwq_ind_tbl);
+		bring_down_rss_rwqs(ibqp->rwq_ind_tbl, udata);
 
 	if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
 		attr->port_num = 1;
@@ -4122,7 +4127,8 @@ static int ib_wq2qp_state(enum ib_wq_state state)
 	}
 }
 
-static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state,
+			      struct ib_udata *udata)
 {
 	struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
 	enum ib_qp_state qp_cur_state;
@@ -4146,7 +4152,8 @@ static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
 		attr_mask = IB_QP_PORT;
 
 		err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr,
-					  attr_mask, IB_QPS_RESET, IB_QPS_INIT);
+					  attr_mask, IB_QPS_RESET, IB_QPS_INIT,
+					  udata);
 		if (err) {
 			pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n",
 				 ibwq->wq_num);
@@ -4158,12 +4165,13 @@ static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
 
 	attr_mask = 0;
 	err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask,
-				  qp_cur_state,  qp_new_state);
+				  qp_cur_state,  qp_new_state, udata);
 
 	if (err && (qp_cur_state == IB_QPS_INIT)) {
 		qp_new_state = IB_QPS_RESET;
 		if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL,
-					attr_mask, IB_QPS_INIT, IB_QPS_RESET)) {
+					attr_mask, IB_QPS_INIT, IB_QPS_RESET,
+					udata)) {
 			pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n",
 				ibwq->wq_num);
 			qp_new_state = IB_QPS_INIT;
@@ -4226,7 +4234,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
 	 * WQ, so we can apply its port on the WQ.
 	 */
 	if (qp->rss_usecnt)
-		err = _mlx4_ib_modify_wq(ibwq, new_state);
+		err = _mlx4_ib_modify_wq(ibwq, new_state, udata);
 
 	if (!err)
 		ibwq->state = new_state;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 498588eac051..381cf899bcef 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -37,6 +37,7 @@
 
 #include "mlx4_ib.h"
 #include <rdma/mlx4-abi.h>
+#include <rdma/uverbs_ioctl.h>
 
 static void *get_wqe(struct mlx4_ib_srq *srq, int n)
 {
@@ -73,6 +74,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 				  struct ib_udata *udata)
 {
 	struct mlx4_ib_dev *dev = to_mdev(pd->device);
+	struct mlx4_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx4_ib_ucontext, ibucontext);
 	struct mlx4_ib_srq *srq;
 	struct mlx4_wqe_srq_next_seg *next;
 	struct mlx4_wqe_data_seg *scatter;
@@ -128,8 +131,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 
-		err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
-					  udata, ucmd.db_addr, &srq->db);
+		err = mlx4_ib_db_map_user(ucontext, udata, ucmd.db_addr,
+					  &srq->db);
 		if (err)
 			goto err_mtt;
 	} else {
@@ -202,7 +205,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 
 err_wrid:
 	if (udata)
-		mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
+		mlx4_ib_db_unmap_user(ucontext, &srq->db);
 	else
 		kvfree(srq->wrid);
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 8e6d23d6859f..eaa055007f28 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1151,7 +1151,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 	void *cmd_out;
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
 		attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
-	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
 	struct devx_obj *obj;
@@ -1227,8 +1228,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
 					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
 							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
-	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
-	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
 	void *cmd_out;
 	int err;
 	int uid;
@@ -1268,11 +1270,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
 					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
 							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
-	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
 	void *cmd_out;
 	int err;
 	int uid;
-	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
 
 	uid = devx_get_uid(c, cmd_in);
 	if (uid < 0)
@@ -1370,11 +1373,12 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_ASYNC_QUERY)(
 				attrs,
 				MLX5_IB_ATTR_DEVX_OBJ_QUERY_ASYNC_HANDLE);
 	u16 cmd_out_len;
-	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
 	struct ib_uobject *fd_uobj;
 	int err;
 	int uid;
-	struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
+	struct mlx5_ib_dev *mdev = to_mdev(c->ibucontext.device);
 	struct devx_async_cmd_event_file *ev_file;
 	struct devx_async_data *async_data;
 
@@ -1530,7 +1534,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
 	struct ib_uobject *uobj = uverbs_attr_get_uobject(
 		attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
 	u32 obj_id;
-	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context(
+		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
 	int err;
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 4f2bc101b061..6b1f0e76900b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -794,6 +794,8 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 			  struct ib_udata *udata, struct mlx5_ib_rwq *rwq,
 			  struct mlx5_ib_create_wq *ucmd)
 {
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	int page_shift = 0;
 	int npages;
 	u32 offset = 0;
@@ -828,8 +830,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		    (unsigned long long)ucmd->buf_addr, rwq->buf_size,
 		    npages, page_shift, ncont, offset);
 
-	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
-				  ucmd->db_addr, &rwq->db);
+	err = mlx5_ib_db_map_user(ucontext, udata, ucmd->db_addr, &rwq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map failed\n");
 		goto err_umem;
@@ -877,7 +878,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		return err;
 	}
 
-	context = to_mucontext(pd->uobject->context);
+	context = rdma_udata_to_drv_context(udata, struct mlx5_ib_ucontext,
+					    ibucontext);
 	if (ucmd.flags & MLX5_QP_FLAG_BFREG_INDEX) {
 		uar_index = bfregn_to_uar_index(dev, &context->bfregi,
 						ucmd.bfreg_index, true);
@@ -1456,9 +1458,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
 	struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
 	struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
-	struct ib_uobject *uobj = pd->uobject;
-	struct ib_ucontext *ucontext = uobj->context;
-	struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+	struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	int err;
 	u32 tdn = mucontext->tdn;
 	u16 uid = to_mpd(pd)->uid;
@@ -1572,9 +1573,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 				 struct ib_qp_init_attr *init_attr,
 				 struct ib_udata *udata)
 {
-	struct ib_uobject *uobj = pd->uobject;
-	struct ib_ucontext *ucontext = uobj->context;
-	struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+	struct mlx5_ib_ucontext *mucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_ib_create_qp_resp resp = {};
 	int inlen;
 	int err;
@@ -1916,6 +1916,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
 	struct mlx5_core_dev *mdev = dev->mdev;
 	struct mlx5_ib_create_qp_resp resp = {};
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_ib_cq *send_cq;
 	struct mlx5_ib_cq *recv_cq;
 	unsigned long flags;
@@ -2018,8 +2020,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 				      MLX5_QP_FLAG_TYPE_DCT))
 			return -EINVAL;
 
-		err = get_qp_user_index(to_mucontext(pd->uobject->context),
-					&ucmd, udata->inlen, &uidx);
+		err = get_qp_user_index(ucontext, &ucmd, udata->inlen, &uidx);
 		if (err)
 			return err;
 
@@ -2503,8 +2504,11 @@ static const char *ib_qp_type_str(enum ib_qp_type type)
 
 static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 					struct ib_qp_init_attr *attr,
-					struct mlx5_ib_create_qp *ucmd)
+					struct mlx5_ib_create_qp *ucmd,
+					struct ib_udata *udata)
 {
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	struct mlx5_ib_qp *qp;
 	int err = 0;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
@@ -2513,8 +2517,7 @@ static struct ib_qp *mlx5_ib_create_dct(struct ib_pd *pd,
 	if (!attr->srq || !attr->recv_cq)
 		return ERR_PTR(-EINVAL);
 
-	err = get_qp_user_index(to_mucontext(pd->uobject->context),
-				ucmd, sizeof(*ucmd), &uidx);
+	err = get_qp_user_index(ucontext, ucmd, sizeof(*ucmd), &uidx);
 	if (err)
 		return ERR_PTR(err);
 
@@ -2596,15 +2599,17 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 	int err;
 	struct ib_qp_init_attr mlx_init_attr;
 	struct ib_qp_init_attr *init_attr = verbs_init_attr;
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 
 	if (pd) {
 		dev = to_mdev(pd->device);
 
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
-			if (!udata) {
+			if (!ucontext) {
 				mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
 				return ERR_PTR(-EINVAL);
-			} else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+			} else if (!ucontext->cqe_version) {
 				mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
 				return ERR_PTR(-EINVAL);
 			}
@@ -2636,7 +2641,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				return ERR_PTR(-EINVAL);
 			}
 		} else {
-			return mlx5_ib_create_dct(pd, init_attr, &ucmd);
+			return mlx5_ib_create_dct(pd, init_attr, &ucmd, udata);
 		}
 	}
 
@@ -3274,14 +3279,12 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_pd *pd,
 				    struct mlx5_ib_qp_base *qp_base,
-				    u8 port_num)
+				    u8 port_num, struct ib_udata *udata)
 {
-	struct mlx5_ib_ucontext *ucontext = NULL;
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	unsigned int tx_port_affinity;
 
-	if (pd && pd->ibpd.uobject && pd->ibpd.uobject->context)
-		ucontext = to_mucontext(pd->ibpd.uobject->context);
-
 	if (ucontext) {
 		tx_port_affinity = (unsigned int)atomic_add_return(
 					   1, &ucontext->tx_port_affinity) %
@@ -3304,8 +3307,10 @@ static unsigned int get_tx_affinity(struct mlx5_ib_dev *dev,
 
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			       const struct ib_qp_attr *attr, int attr_mask,
-			       enum ib_qp_state cur_state, enum ib_qp_state new_state,
-			       const struct mlx5_ib_modify_qp *ucmd)
+			       enum ib_qp_state cur_state,
+			       enum ib_qp_state new_state,
+			       const struct mlx5_ib_modify_qp *ucmd,
+			       struct ib_udata *udata)
 {
 	static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
 		[MLX5_QP_STATE_RST] = {
@@ -3396,7 +3401,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		    (ibqp->qp_type == IB_QPT_XRC_TGT)) {
 			if (dev->lag_active) {
 				u8 p = mlx5_core_native_port_num(dev->mdev);
-				tx_affinity = get_tx_affinity(dev, pd, base, p);
+				tx_affinity = get_tx_affinity(dev, pd, base, p,
+							      udata);
 				context->flags |= cpu_to_be32(tx_affinity << 24);
 			}
 		}
@@ -3879,7 +3885,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	}
 
 	err = __mlx5_ib_modify_qp(ibqp, attr, attr_mask, cur_state,
-				  new_state, &ucmd);
+				  new_state, &ucmd, udata);
 
 out:
 	mutex_unlock(&qp->mutex);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 22bd774e0b4e..1ec1beb1296b 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -47,6 +47,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_create_srq ucmd = {};
+	struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mlx5_ib_ucontext, ibucontext);
 	size_t ucmdlen;
 	int err;
 	int npages;
@@ -71,8 +73,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 		return -EINVAL;
 
 	if (in->type != IB_SRQT_BASIC) {
-		err = get_srq_user_index(to_mucontext(pd->uobject->context),
-					 &ucmd, udata->inlen, &uidx);
+		err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx);
 		if (err)
 			return err;
 	}
@@ -103,8 +104,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 
 	mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0);
 
-	err = mlx5_ib_db_map_user(to_mucontext(pd->uobject->context), udata,
-				  ucmd.db_addr, &srq->db);
+	err = mlx5_ib_db_map_user(ucontext, udata, ucmd.db_addr, &srq->db);
 	if (err) {
 		mlx5_ib_dbg(dev, "map doorbell failed\n");
 		goto err_in;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 516c8cf9c0fd..80c3af217d96 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -37,6 +37,7 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -435,7 +436,8 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
 				       struct ib_udata *udata)
 {
 	struct mthca_create_srq ucmd;
-	struct mthca_ucontext *context = NULL;
+	struct mthca_ucontext *context = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
 	struct mthca_srq *srq;
 	int err;
 
@@ -447,8 +449,6 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
 		return ERR_PTR(-ENOMEM);
 
 	if (udata) {
-		context = to_mucontext(pd->uobject->context);
-
 		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 			err = -EFAULT;
 			goto err_free;
@@ -510,6 +510,8 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 				     struct ib_qp_init_attr *init_attr,
 				     struct ib_udata *udata)
 {
+	struct mthca_ucontext *context = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
 	struct mthca_create_qp ucmd;
 	struct mthca_qp *qp;
 	int err;
@@ -522,15 +524,11 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 	case IB_QPT_UC:
 	case IB_QPT_UD:
 	{
-		struct mthca_ucontext *context;
-
 		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 
 		if (udata) {
-			context = to_mucontext(pd->uobject->context);
-
 			if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
 				kfree(qp);
 				return ERR_PTR(-EFAULT);
@@ -568,8 +566,6 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
 				     &init_attr->cap, qp, udata);
 
 		if (err && udata) {
-			context = to_mucontext(pd->uobject->context);
-
 			mthca_unmap_user_db(to_mdev(pd->device),
 					    &context->uar,
 					    context->db_tab,
@@ -898,6 +894,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 {
 	struct mthca_dev *dev = to_mdev(pd->device);
 	struct sg_dma_page_iter sg_iter;
+	struct mthca_ucontext *context = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
 	struct mthca_mr *mr;
 	struct mthca_reg_mr ucmd;
 	u64 *pages;
@@ -906,12 +904,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	int write_mtt_size;
 
 	if (udata->inlen < sizeof ucmd) {
-		if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+		if (!context->reg_mr_warned) {
 			mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
 				   current->comm);
 			mthca_warn(dev, "  Update libmthca to fix this.\n");
 		}
-		++to_mucontext(pd->uobject->context)->reg_mr_warned;
+		++context->reg_mr_warned;
 		ucmd.mr_attrs = 0;
 	} else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 		return ERR_PTR(-EFAULT);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 4e5b5cc17f1d..7a5b25d13faa 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -42,6 +42,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_pack.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
@@ -554,10 +555,14 @@ static int mthca_path_set(struct mthca_dev *dev, const struct rdma_ah_attr *ah,
 
 static int __mthca_modify_qp(struct ib_qp *ibqp,
 			     const struct ib_qp_attr *attr, int attr_mask,
-			     enum ib_qp_state cur_state, enum ib_qp_state new_state)
+			     enum ib_qp_state cur_state,
+			     enum ib_qp_state new_state,
+			     struct ib_udata *udata)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
+	struct mthca_ucontext *context = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
 	struct mthca_mailbox *mailbox;
 	struct mthca_qp_param *qp_param;
 	struct mthca_qp_context *qp_context;
@@ -619,8 +624,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp,
 	/* leave arbel_sched_queue as 0 */
 
 	if (qp->ibqp.uobject)
-		qp_context->usr_page =
-			cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index);
+		qp_context->usr_page = cpu_to_be32(context->uar.index);
 	else
 		qp_context->usr_page = cpu_to_be32(dev->driver_uar.index);
 	qp_context->local_qpn  = cpu_to_be32(qp->qpn);
@@ -913,7 +917,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		goto out;
 	}
 
-	err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+	err = __mthca_modify_qp(ibqp, attr, attr_mask, cur_state, new_state,
+				udata);
 
 out:
 	mutex_unlock(&qp->mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index b8333c79e3fa..06b920385512 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -36,6 +36,8 @@
 
 #include <asm/io.h>
 
+#include <rdma/uverbs_ioctl.h>
+
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
 #include "mthca_memfree.h"
@@ -96,17 +98,19 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
 					 struct mthca_pd *pd,
 					 struct mthca_srq *srq,
 					 struct mthca_tavor_srq_context *context,
-					 bool is_user)
+					 struct ib_udata *udata)
 {
+	struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
+
 	memset(context, 0, sizeof *context);
 
 	context->wqe_base_ds = cpu_to_be64(1 << (srq->wqe_shift - 4));
 	context->state_pd    = cpu_to_be32(pd->pd_num);
 	context->lkey        = cpu_to_be32(srq->mr.ibmr.lkey);
 
-	if (is_user)
-		context->uar =
-			cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+	if (udata)
+		context->uar = cpu_to_be32(ucontext->uar.index);
 	else
 		context->uar = cpu_to_be32(dev->driver_uar.index);
 }
@@ -115,8 +119,10 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
 					 struct mthca_pd *pd,
 					 struct mthca_srq *srq,
 					 struct mthca_arbel_srq_context *context,
-					 bool is_user)
+					 struct ib_udata *udata)
 {
+	struct mthca_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct mthca_ucontext, ibucontext);
 	int logsize, max;
 
 	memset(context, 0, sizeof *context);
@@ -131,9 +137,8 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
 	context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
 	context->db_index = cpu_to_be32(srq->db_index);
 	context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
-	if (is_user)
-		context->logstride_usrpage |=
-			cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
+	if (udata)
+		context->logstride_usrpage |= cpu_to_be32(ucontext->uar.index);
 	else
 		context->logstride_usrpage |= cpu_to_be32(dev->driver_uar.index);
 	context->eq_pd = cpu_to_be32(MTHCA_EQ_ASYNC << 24 | pd->pd_num);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 138844299d73..38480b7708eb 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -41,6 +41,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/iw_cm.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "nes.h"
 
@@ -974,7 +975,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	struct nes_qp *nesqp;
 	struct nes_cq *nescq;
-	struct nes_ucontext *nes_ucontext;
+	struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context(
+		udata, struct nes_ucontext, ibucontext);
 	struct nes_hw_cqp_wqe *cqp_wqe;
 	struct nes_cqp_request *cqp_request;
 	struct nes_create_qp_req req;
@@ -1055,9 +1057,8 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 				}
 				if (req.user_qp_buffer)
 					nesqp->nesuqp_addr = req.user_qp_buffer;
-				if (udata && (ibpd->uobject->context)) {
+				if (udata) {
 					nesqp->user_mode = 1;
-					nes_ucontext = to_nesucontext(ibpd->uobject->context);
 					if (virt_wqs) {
 						err = 1;
 						list_for_each_entry(nespbl, &nes_ucontext->qp_reg_mem_list, list) {
@@ -1078,7 +1079,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 						}
 					}
 
-					nes_ucontext = to_nesucontext(ibpd->uobject->context);
 					nesqp->mmap_sq_db_index =
 						find_next_zero_bit(nes_ucontext->allocated_wqs,
 								   NES_MAX_USER_WQ_REGIONS, nes_ucontext->first_free_wq);
@@ -2099,7 +2099,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	struct ib_mr *ibmr = ERR_PTR(-EINVAL);
 	struct sg_dma_page_iter dma_iter;
-	struct nes_ucontext *nes_ucontext;
+	struct nes_ucontext *nes_ucontext = rdma_udata_to_drv_context(
+		udata, struct nes_ucontext, ibucontext);
 	struct nes_pbl *nespbl;
 	struct nes_mr *nesmr;
 	struct ib_umem *region;
@@ -2342,7 +2343,6 @@ reg_user_mr_err:
 				return ERR_PTR(-ENOMEM);
 			}
 			nesmr->region = region;
-			nes_ucontext = to_nesucontext(pd->uobject->context);
 			pbl_depth = region->length >> 12;
 			pbl_depth += (region->length & (4096-1)) ? 1 : 0;
 			nespbl->pbl_size = pbl_depth*sizeof(u64);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a613ebde322f..d51bc3ede9d1 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1434,7 +1434,7 @@ struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
 	hw_srq->max_wr = init_attr->attr.max_wr;
 	hw_srq->max_sges = init_attr->attr.max_sge;
 
-	if (udata && ibpd->uobject && ibpd->uobject->context) {
+	if (udata) {
 		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
 			DP_ERR(dev,
 			       "create srq: problem copying data from user space\n");
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 0ced89b51448..b4575b1c2a62 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -37,6 +37,7 @@
 
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_addr.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "usnic_abi.h"
 #include "usnic_ib.h"
@@ -482,7 +483,8 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 	int err;
 	struct usnic_ib_dev *us_ibdev;
 	struct usnic_ib_qp_grp *qp_grp;
-	struct usnic_ib_ucontext *ucontext;
+	struct usnic_ib_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct usnic_ib_ucontext, ibucontext);
 	int cq_cnt;
 	struct usnic_vnic_res_spec res_spec;
 	struct usnic_ib_create_qp_cmd cmd;
@@ -490,7 +492,6 @@ struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
 
 	usnic_dbg("\n");
 
-	ucontext = to_uucontext(pd->uobject->context);
 	us_ibdev = to_usdev(pd->device);
 
 	if (init_attr->create_flags)
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6b1b2b75ef60..72664f0a52ee 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -53,6 +53,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_hdrs.h>
 #include <rdma/opa_addr.h>
+#include <rdma/uverbs_ioctl.h>
 #include "qp.h"
 #include "vt.h"
 #include "trace.h"
@@ -956,6 +957,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	size_t sg_list_sz;
 	struct ib_qp *ret = ERR_PTR(-ENOMEM);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
+	struct rvt_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct rvt_ucontext, ibucontext);
 	void *priv = NULL;
 	size_t sqsize;
 
@@ -1129,7 +1132,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 			u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
 
 			qp->ip = rvt_create_mmap_info(rdi, s,
-						      ibpd->uobject->context,
+						      &ucontext->ibucontext,
 						      qp->r_rq.wq);
 			if (!qp->ip) {
 				ret = ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 78e06fc456c5..895b3fabd0bf 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -48,6 +48,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "srq.h"
 #include "vt.h"
@@ -77,6 +78,8 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
 			      struct ib_udata *udata)
 {
 	struct rvt_dev_info *dev = ib_to_rvt(ibpd->device);
+	struct rvt_ucontext *ucontext = rdma_udata_to_drv_context(
+		udata, struct rvt_ucontext, ibucontext);
 	struct rvt_srq *srq;
 	u32 sz;
 	struct ib_srq *ret;
@@ -119,7 +122,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
 		u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz;
 
 		srq->ip =
-		    rvt_create_mmap_info(dev, s, ibpd->uobject->context,
+		    rvt_create_mmap_info(dev, s, &ucontext->ibucontext,
 					 srq->rq.wq);
 		if (!srq->ip) {
 			ret = ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index a19832c73d5a..ee9c82cb3b6b 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -284,10 +284,6 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num,
 					 &gid->global.interface_id);
 }
 
-struct rvt_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
 static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
 						*ibucontext)
 {
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index be5d76b2bcca..09ede70dc1e8 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <rdma/uverbs_ioctl.h>
 
 #include "rxe.h"
 #include "rxe_loc.h"
@@ -343,7 +344,8 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
 	struct rxe_cq *rcq = to_rcq(init->recv_cq);
 	struct rxe_cq *scq = to_rcq(init->send_cq);
 	struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
-	struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
+	struct rxe_ucontext *ucontext =
+		rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc);
 
 	rxe_add_ref(pd);
 	rxe_add_ref(rcq);
@@ -358,11 +360,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
 
 	rxe_qp_init_misc(rxe, qp, init);
 
-	err = rxe_qp_init_req(rxe, qp, init, context, uresp);
+	err = rxe_qp_init_req(rxe, qp, init, &ucontext->ibuc, uresp);
 	if (err)
 		goto err1;
 
-	err = rxe_qp_init_resp(rxe, qp, init, context, uresp);
+	err = rxe_qp_init_resp(rxe, qp, init, &ucontext->ibuc, uresp);
 	if (err)
 		goto err2;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 051c3930e808..ffca654c8697 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -33,6 +33,7 @@
 
 #include <linux/dma-mapping.h>
 #include <net/addrconf.h>
+#include <rdma/uverbs_ioctl.h>
 #include "rxe.h"
 #include "rxe_loc.h"
 #include "rxe_queue.h"
@@ -320,8 +321,9 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
 	int err;
 	struct rxe_dev *rxe = to_rdev(ibpd->device);
 	struct rxe_pd *pd = to_rpd(ibpd);
+	struct rxe_ucontext *ucontext =
+		rdma_udata_to_drv_context(udata, struct rxe_ucontext, ibuc);
 	struct rxe_srq *srq;
-	struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
 	struct rxe_create_srq_resp __user *uresp = NULL;
 
 	if (udata) {
@@ -344,7 +346,7 @@ static struct ib_srq *rxe_create_srq(struct ib_pd *ibpd,
 	rxe_add_ref(pd);
 	srq->pd = pd;
 
-	err = rxe_srq_from_init(rxe, srq, init, context, uresp);
+	err = rxe_srq_from_init(rxe, srq, init, &ucontext->ibuc, uresp);
 	if (err)
 		goto err2;
 
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 87d66c9630d7..4c257aff7d32 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -186,6 +186,11 @@ struct rvt_driver_params {
 	u8 reserved_operations;
 };
 
+/* User context */
+struct rvt_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
 /* Protection domain */
 struct rvt_pd {
 	struct ib_pd ibpd;
-- 
cgit v1.2.3


From a1b3839ac4a4933c7c5167efd7b6b091130d11aa Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Thu, 8 Nov 2018 22:37:04 +0200
Subject: net/mlx5: E-Switch, Properly refer to the esw manager vport

In SmartNIC mode, the eswitch manager is not necessarily the PF
(vport 0). Use a helper function to get the correct eswitch manager
vport number and cache on the eswitch instance for fast reference.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 35 ++++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 10 +++++++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  7 +++--
 include/linux/mlx5/vport.h                         |  2 ++
 4 files changed, 39 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 05830696abd8..9c622749dbde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -378,16 +378,16 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	u16 vport = vaddr->vport;
 	int err;
 
-	/* Skip mlx5_mpfs_add_mac for PFs,
-	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	/* Skip mlx5_mpfs_add_mac for eswitch_managers,
+	 * it is already done by its netdev in mlx5e_execute_l2_action
 	 */
-	if (!vport)
+	if (esw->manager_vport == vport)
 		goto fdb_add;
 
 	err = mlx5_mpfs_add_mac(esw->dev, mac);
 	if (err) {
 		esw_warn(esw->dev,
-			 "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
+			 "Failed to add L2 table mac(%pM) for vport(0x%x), err(%d)\n",
 			 mac, vport, err);
 		return err;
 	}
@@ -410,10 +410,10 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	u16 vport = vaddr->vport;
 	int err = 0;
 
-	/* Skip mlx5_mpfs_del_mac for PFs,
-	 * it is already done by the PF netdev in mlx5e_execute_l2_action
+	/* Skip mlx5_mpfs_del_mac for eswitch managerss,
+	 * it is already done by its netdev in mlx5e_execute_l2_action
 	 */
-	if (!vport || !vaddr->mpfs)
+	if (!vaddr->mpfs || esw->manager_vport == vport)
 		goto fdb_del;
 
 	err = mlx5_mpfs_del_mac(esw->dev, mac);
@@ -1457,15 +1457,22 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 {
 	int vport_num = vport->vport;
 
-	if (!vport_num)
+	if (esw->manager_vport == vport_num)
 		return;
 
 	mlx5_modify_vport_admin_state(esw->dev,
 				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
 				      vport_num,
 				      vport->info.link_state);
-	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
-	mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+
+	/* Host PF has its own mac/guid. */
+	if (vport_num) {
+		mlx5_modify_nic_vport_mac_address(esw->dev, vport_num,
+						  vport->info.mac);
+		mlx5_modify_nic_vport_node_guid(esw->dev, vport_num,
+						vport->info.node_guid);
+	}
+
 	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
 			       (vport->info.vlan || vport->info.qos));
 
@@ -1537,8 +1544,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 	vport->enabled_events = enable_events;
 	vport->enabled = true;
 
-	/* only PF is trusted by default */
-	if (!vport_num)
+	/* Esw manager is trusted by default. Host PF (vport 0) is trusted as well
+	 * in smartNIC as it's a vport group manager.
+	 */
+	if (esw->manager_vport == vport_num ||
+	    (!vport_num && mlx5_core_is_ecpf(esw->dev)))
 		vport->info.trusted = true;
 
 	esw_vport_change_handle_locked(vport);
@@ -1733,6 +1743,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		return -ENOMEM;
 
 	esw->dev = dev;
+	esw->manager_vport = mlx5_eswitch_manager_vport(dev);
 
 	esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
 	if (!esw->work_queue) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 0a3eee8746c1..959a9e28d08f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -38,6 +38,7 @@
 #include <net/devlink.h>
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include "lib/mpfs.h"
 
@@ -204,6 +205,7 @@ struct mlx5_eswitch {
 	struct mlx5_esw_offload offloads;
 	int                     mode;
 	int                     nvports;
+	u16                     manager_vport;
 };
 
 void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
@@ -363,6 +365,14 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0,
 
 #define esw_debug(dev, format, ...)				\
 	mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
+
+/* The returned number is valid only when the dev is eswitch manager. */
+static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_ecpf_esw_manager(dev) ?
+		MLX5_VPORT_ECPF : MLX5_VPORT_PF;
+}
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 9128b45f3f37..af2c44d31357 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -522,7 +522,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
 	MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
-	MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */
+	/* source vport is the esw manager */
+	MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
@@ -567,7 +568,7 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev,
 			 source_eswitch_owner_vhca_id);
 
 	dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-	dest->vport.num = 0;
+	dest->vport.num = peer_dev->priv.eswitch->manager_vport;
 	dest->vport.vhca_id = MLX5_CAP_GEN(peer_dev, vhca_id);
 	dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 }
@@ -666,7 +667,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 	dmac_c[0] = 0x01;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-	dest.vport.num = 0;
+	dest.vport.num = esw->manager_vport;
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
 	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec,
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 3bc05449ac39..b67bcc95ab5d 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -52,6 +52,8 @@ enum {
 };
 
 enum {
+	MLX5_VPORT_PF			= 0x0,
+	MLX5_VPORT_ECPF			= 0xfffe,
 	MLX5_VPORT_UPLINK		= 0xffff
 };
 
-- 
cgit v1.2.3


From cbc44e76bfcdcaccd079487367593ee3f94d006d Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Fri, 1 Feb 2019 17:34:55 -0600
Subject: net/mlx5: E-Switch, Properly refer to host PF vport as other vport

Commands referring to vports use the following scheme:

1. When referring to my own vport, put 0 in vport and 0 in other_vport.
2. When referring to another vport, put the vport number of the
   referred vport and put 1 in other_vport. It was assumed that driver
   is accessing other vport when vport number is greater than 0.

With the above scheme, the case that ECPF eswitch manager is trying
to access host PF vport will fall over with scheme 1 as the vport
number is 0. This is apparently wrong as driver is trying to refer
other vport.

As such usage can only happen in the eswitch context, change relevant
functions to provide other vport input properly.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  |  6 ++++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 ++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/vport.c   | 10 ++++------
 include/linux/mlx5/vport.h                        |  4 ++--
 4 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 685f1975be58..f84889bbe2a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1083,7 +1083,8 @@ static int mlx5e_vf_rep_open(struct net_device *dev)
 
 	if (!mlx5_modify_vport_admin_state(priv->mdev,
 					   MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					   rep->vport, MLX5_VPORT_ADMIN_STATE_UP))
+					   rep->vport, 1,
+					   MLX5_VPORT_ADMIN_STATE_UP))
 		netif_carrier_on(dev);
 
 unlock:
@@ -1101,7 +1102,8 @@ static int mlx5e_vf_rep_close(struct net_device *dev)
 	mutex_lock(&priv->state_lock);
 	mlx5_modify_vport_admin_state(priv->mdev,
 				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-				      rep->vport, MLX5_VPORT_ADMIN_STATE_DOWN);
+				      rep->vport, 1,
+				      MLX5_VPORT_ADMIN_STATE_DOWN);
 	ret = mlx5e_close_locked(dev);
 	mutex_unlock(&priv->state_lock);
 	return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9c622749dbde..648c743cc947 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1462,7 +1462,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 
 	mlx5_modify_vport_admin_state(esw->dev,
 				      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-				      vport_num,
+				      vport_num, 1,
 				      vport->info.link_state);
 
 	/* Host PF has its own mac/guid. */
@@ -1581,10 +1581,10 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 	esw_vport_change_handle_locked(vport);
 	vport->enabled_events = 0;
 	esw_vport_disable_qos(esw, vport_num);
-	if (vport_num && esw->mode == SRIOV_LEGACY) {
+	if (esw->mode == SRIOV_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
 					      MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					      vport_num,
+					      vport_num, 1,
 					      MLX5_VPORT_ADMIN_STATE_DOWN);
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
@@ -1875,7 +1875,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 
 	err = mlx5_modify_vport_admin_state(esw->dev,
 					    MLX5_VPORT_STATE_OP_MOD_ESW_VPORT,
-					    vport, link_state);
+					    vport, 1, link_state);
 	if (err) {
 		mlx5_core_warn(esw->dev,
 			       "Failed to set vport %d link state, err = %d",
@@ -2137,7 +2137,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
 	    !MLX5_CAP_GEN(dev, transmit_discard_vport_down))
 		return 0;
 
-	err = mlx5_query_vport_down_stats(dev, vport_idx,
+	err = mlx5_query_vport_down_stats(dev, vport_idx, 1,
 					  &rx_discard_vport_down,
 					  &tx_discard_vport_down);
 	if (err)
@@ -2174,8 +2174,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(query_vport_counter_in, in, other_vport, 1);
+	MLX5_SET(query_vport_counter_in, in, other_vport, 1);
 
 	memset(out, 0, outlen);
 	err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 9a928eb48522..ef95feca9961 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -64,7 +64,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 }
 
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-				  u16 vport, u8 state)
+				  u16 vport, u8 other_vport, u8 state)
 {
 	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
@@ -73,8 +73,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 		 MLX5_CMD_OP_MODIFY_VPORT_STATE);
 	MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
 	MLX5_SET(modify_vport_state_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_vport_state_in, in, other_vport, 1);
+	MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
 	MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
 	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
@@ -1057,7 +1056,7 @@ free:
 EXPORT_SYMBOL_GPL(mlx5_core_query_vport_counter);
 
 int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
-				u64 *rx_discard_vport_down,
+				u8 other_vport, u64 *rx_discard_vport_down,
 				u64 *tx_discard_vport_down)
 {
 	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
@@ -1068,8 +1067,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
 		 MLX5_CMD_OP_QUERY_VNIC_ENV);
 	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
 	MLX5_SET(query_vnic_env_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+	MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
 
 	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (err)
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index b67bcc95ab5d..b7edcb1dadd8 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -59,7 +59,7 @@ enum {
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
-				  u16 vport, u8 state);
+				  u16 vport, u8 other_vport, u8 state);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr);
 int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
@@ -121,7 +121,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
 int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
 int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
 int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
-				u64 *rx_discard_vport_down,
+				u8 other_vport, u64 *rx_discard_vport_down,
 				u64 *tx_discard_vport_down);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 				  int vf, u8 port_num, void *out,
-- 
cgit v1.2.3


From c9b99abcf232f69ddff158b1f313fd7d2654414b Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Thu, 31 Jan 2019 14:40:53 -0600
Subject: net/mlx5: E-Switch, Split VF and special vports for offloads mode

When driver is entering offloads mode, there are two major tasks to
do: initialize flow steering and create representors. Flow steering
should make sure enough flow table/group spaces are reserved for all
reps. Representors will be created in a group, all or none.

With the introduction of ECPF, flow steering should still reserve the
same spaces. But, the representors are not always loaded/unloaded in a
single piece. Once ECPF is in offloads mode, it will get the number
of VF changing event from host PF. In such scenario, only the VF reps
should be loaded/unloaded, not the reps for special vports (such as
the uplink vport).

Thus, when entering offloads mode, driver should specify the total
number of reps, and the number of VF reps separately. When leaving
offloads mode, the cleanup should use the information self-contained
in eswitch such as number of VFs.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  7 ++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  5 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 57 ++++++++++++++++------
 include/linux/mlx5/vport.h                         |  1 +
 4 files changed, 48 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 648c743cc947..be6c2931d2a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1641,7 +1641,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	} else {
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
-		err = esw_offloads_init(esw, nvfs + MLX5_SPECIAL_VPORTS);
+		err = esw_offloads_init(esw, nvfs,
+					nvfs + MLX5_SPECIAL_VPORTS);
 	}
 
 	if (err)
@@ -1683,7 +1684,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 {
 	struct esw_mc_addr *mc_promisc;
 	int old_mode;
-	int nvports;
 	int i;
 
 	if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE)
@@ -1693,7 +1693,6 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 		 esw->enabled_vports, esw->mode);
 
 	mc_promisc = &esw->mc_promisc;
-	nvports = esw->enabled_vports;
 
 	if (esw->mode == SRIOV_LEGACY)
 		mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
@@ -1709,7 +1708,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 	if (esw->mode == SRIOV_LEGACY)
 		esw_destroy_legacy_fdb_table(esw);
 	else if (esw->mode == SRIOV_OFFLOADS)
-		esw_offloads_cleanup(esw, nvports);
+		esw_offloads_cleanup(esw);
 
 	old_mode = esw->mode;
 	esw->mode = SRIOV_NONE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 959a9e28d08f..fd845e6c44d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -208,8 +208,9 @@ struct mlx5_eswitch {
 	u16                     manager_vport;
 };
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+void esw_offloads_cleanup(struct mlx5_eswitch *esw);
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+		      int total_nvports);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 19969d487a01..14f7ad67cfe4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -54,6 +54,8 @@ enum {
 #define fdb_prio_table(esw, chain, prio, level) \
 	(esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
 
+#define UPLINK_REP_INDEX 0
+
 static struct mlx5_flow_table *
 esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
 static void
@@ -1239,19 +1241,28 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	return 0;
 }
 
+static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
+				      struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+	if (!rep->rep_if[rep_type].valid)
+		return;
+
+	rep->rep_if[rep_type].unload(rep);
+}
+
 static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
 					  u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
 	int vport;
 
-	for (vport = nvports - 1; vport >= 0; vport--) {
+	for (vport = nvports; vport >= MLX5_VPORT_FIRST_VF; vport--) {
 		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->rep_if[rep_type].valid)
-			continue;
-
-		rep->rep_if[rep_type].unload(rep);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
 	}
+
+	rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX];
+	__esw_offloads_unload_rep(esw, rep, rep_type);
 }
 
 static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
@@ -1262,6 +1273,15 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
 		esw_offloads_unload_reps_type(esw, nvports, rep_type);
 }
 
+static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
+				   struct mlx5_eswitch_rep *rep, u8 rep_type)
+{
+	if (!rep->rep_if[rep_type].valid)
+		return 0;
+
+	return rep->rep_if[rep_type].load(esw->dev, rep);
+}
+
 static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
 				       u8 rep_type)
 {
@@ -1269,12 +1289,14 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
 	int vport;
 	int err;
 
-	for (vport = 0; vport < nvports; vport++) {
-		rep = &esw->offloads.vport_reps[vport];
-		if (!rep->rep_if[rep_type].valid)
-			continue;
+	rep = &esw->offloads.vport_reps[UPLINK_REP_INDEX];
+	err = __esw_offloads_load_rep(esw, rep, rep_type);
+	if (err)
+		goto out;
 
-		err = rep->rep_if[rep_type].load(esw->dev, rep);
+	for (vport = MLX5_VPORT_FIRST_VF; vport <= nvports; vport++) {
+		rep = &esw->offloads.vport_reps[vport];
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
 		if (err)
 			goto err_reps;
 	}
@@ -1283,6 +1305,7 @@ static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
 
 err_reps:
 	esw_offloads_unload_reps_type(esw, vport, rep_type);
+out:
 	return err;
 }
 
@@ -1440,17 +1463,18 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw)
 	esw_destroy_offloads_fdb_tables(esw);
 }
 
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
+int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
+		      int total_nvports)
 {
 	int err;
 
 	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
 
-	err = esw_offloads_steering_init(esw, nvports);
+	err = esw_offloads_steering_init(esw, total_nvports);
 	if (err)
 		return err;
 
-	err = esw_offloads_load_reps(esw, nvports);
+	err = esw_offloads_load_reps(esw, vf_nvports);
 	if (err)
 		goto err_reps;
 
@@ -1481,10 +1505,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw,
 	return err;
 }
 
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports)
+void esw_offloads_cleanup(struct mlx5_eswitch *esw)
 {
+	u16 num_vfs = esw->dev->priv.sriov.num_vfs;
+
 	esw_offloads_devcom_cleanup(esw);
-	esw_offloads_unload_reps(esw, nvports);
+	esw_offloads_unload_reps(esw, num_vfs);
 	esw_offloads_steering_cleanup(esw);
 }
 
@@ -1822,7 +1848,6 @@ EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
-#define UPLINK_REP_INDEX 0
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index b7edcb1dadd8..755aeea19e1c 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -53,6 +53,7 @@ enum {
 
 enum {
 	MLX5_VPORT_PF			= 0x0,
+	MLX5_VPORT_FIRST_VF		= 0x1,
 	MLX5_VPORT_ECPF			= 0xfffe,
 	MLX5_VPORT_UPLINK		= 0xffff
 };
-- 
cgit v1.2.3


From f121e0ea9586b2c937bf1ff9a0b682dc6424ce1d Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 29 Jan 2019 21:48:31 -0600
Subject: net/mlx5: E-Switch, Add state to eswitch vport representors

Currently the eswitch vport reps have a valid indicator, which is
set on register and unset on unregister. However, a rep can be loaded
or not loaded when doing unregister, current driver checks if the
vport of that rep is enabled as a flag to imply the rep is loaded.
However, for ECPF, this is not valid as the host PF will enable the
vports for its VFs instead.

Add three states: {unregistered, registered, loaded}, with the
following state changes across different operations:

	create: (none)       -> unregistered
	reg:    unregistered -> registered
	load:   registered   -> loaded
	unload: loaded       -> registered
	unreg:  registered   -> unregistered

Note that the state shall only be updated inside eswitch driver rather
than individual drivers such as ETH or IB.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Suggested-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++++-------
 include/linux/mlx5/eswitch.h                       |  8 +++++-
 2 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 4979c7ee0ad7..c6c9dad69ba8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -364,7 +364,7 @@ static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
 	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
 	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
 		rep = &esw->offloads.vport_reps[vf_vport];
-		if (!rep->rep_if[REP_ETH].valid)
+		if (rep->rep_if[REP_ETH].state != REP_LOADED)
 			continue;
 
 		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
@@ -1256,7 +1256,7 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_esw_offload *offloads;
 	struct mlx5_eswitch_rep *rep;
-	u8 hw_id[ETH_ALEN];
+	u8 hw_id[ETH_ALEN], rep_type;
 	int vport;
 
 	esw->offloads.vport_reps = kcalloc(total_vfs,
@@ -1271,6 +1271,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	mlx5_esw_for_all_reps(esw, vport, rep) {
 		rep->vport = vport;
 		ether_addr_copy(rep->hw_id, hw_id);
+
+		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
+			rep->rep_if[rep_type].state = REP_UNREGISTERED;
 	}
 
 	offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK;
@@ -1281,10 +1284,11 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw,
 				      struct mlx5_eswitch_rep *rep, u8 rep_type)
 {
-	if (!rep->rep_if[rep_type].valid)
+	if (rep->rep_if[rep_type].state != REP_LOADED)
 		return;
 
 	rep->rep_if[rep_type].unload(rep);
+	rep->rep_if[rep_type].state = REP_REGISTERED;
 }
 
 static void esw_offloads_unload_reps_type(struct mlx5_eswitch *esw, int nvports,
@@ -1311,10 +1315,18 @@ static void esw_offloads_unload_reps(struct mlx5_eswitch *esw, int nvports)
 static int __esw_offloads_load_rep(struct mlx5_eswitch *esw,
 				   struct mlx5_eswitch_rep *rep, u8 rep_type)
 {
-	if (!rep->rep_if[rep_type].valid)
+	int err = 0;
+
+	if (rep->rep_if[rep_type].state != REP_REGISTERED)
 		return 0;
 
-	return rep->rep_if[rep_type].load(esw->dev, rep);
+	err = rep->rep_if[rep_type].load(esw->dev, rep);
+	if (err)
+		return err;
+
+	rep->rep_if[rep_type].state = REP_LOADED;
+
+	return 0;
 }
 
 static int esw_offloads_load_reps_type(struct mlx5_eswitch *esw, int nvports,
@@ -1861,7 +1873,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 	rep_if->get_proto_dev = __rep_if->get_proto_dev;
 	rep_if->priv = __rep_if->priv;
 
-	rep_if->valid = true;
+	rep_if->state = REP_REGISTERED;
 }
 EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
 
@@ -1873,10 +1885,11 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
 	rep = &offloads->vport_reps[vport_index];
 
-	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
+	if (esw->mode == SRIOV_OFFLOADS &&
+	    rep->rep_if[rep_type].state == REP_LOADED)
 		rep->rep_if[rep_type].unload(rep);
 
-	rep->rep_if[rep_type].valid = false;
+	rep->rep_if[rep_type].state = REP_UNREGISTERED;
 }
 EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
 
@@ -1896,7 +1909,7 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 
 	rep = mlx5_eswitch_get_rep(esw, vport);
 
-	if (rep->rep_if[rep_type].valid &&
+	if (rep->rep_if[rep_type].state == REP_LOADED &&
 	    rep->rep_if[rep_type].get_proto_dev)
 		return rep->rep_if[rep_type].get_proto_dev(rep);
 	return NULL;
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index fab5121ffb8f..e3dbc1bc0917 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -22,6 +22,12 @@ enum {
 	NUM_REP_TYPES,
 };
 
+enum {
+	REP_UNREGISTERED,
+	REP_REGISTERED,
+	REP_LOADED,
+};
+
 struct mlx5_eswitch_rep;
 struct mlx5_eswitch_rep_if {
 	int		       (*load)(struct mlx5_core_dev *dev,
@@ -29,7 +35,7 @@ struct mlx5_eswitch_rep_if {
 	void		       (*unload)(struct mlx5_eswitch_rep *rep);
 	void		       *(*get_proto_dev)(struct mlx5_eswitch_rep *rep);
 	void			*priv;
-	bool		       valid;
+	u8			state;
 };
 
 struct mlx5_eswitch_rep {
-- 
cgit v1.2.3


From f8e8fa0262eaf544490a11746c524333158ef0b6 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Thu, 31 Jan 2019 17:42:57 -0600
Subject: net/mlx5: E-Switch, Centralize repersentor reg/unreg to eswitch
 driver

Eswitch has two users: IB and ETH. They both register repersentors
when mlx5 interface is added, and unregister the repersentors when
mlx5 interface is removed. Ideally, each driver should only deal with
the entities which are unique to itself. However, current IB and ETH
drivers have to perform the following eswitch operations:

1. When registering, specify how many vports to register. This number
   is the same for both drivers which is the total available vport
   numbers.
2. When unregistering, specify the number of registered vports to do
   unregister. Also, unload the repersentors which are already loaded.

It's unnecessary for eswitch driver to hands out the control of above
operations to individual driver users, as they're not unique to each
driver. Instead, such operations should be centralized to eswitch
driver. This consolidates eswitch control flow, and simplified IB and
ETH driver.

This patch doesn't change any functionality.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c                | 20 ++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 19 +++------
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 45 +++++++++++-----------
 include/linux/mlx5/eswitch.h                       | 11 ++----
 4 files changed, 39 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 99cae9a10195..4700cffb5a00 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -95,26 +95,20 @@ static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
 void mlx5_ib_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vports = MLX5_TOTAL_VPORTS(mdev);
 	struct mlx5_eswitch_rep_if rep_if = {};
-	int vport;
-
-	for (vport = 0; vport < total_vports; vport++) {
-		rep_if.load = mlx5_ib_vport_rep_load;
-		rep_if.unload = mlx5_ib_vport_rep_unload;
-		rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
-		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_IB);
-	}
+
+	rep_if.load = mlx5_ib_vport_rep_load;
+	rep_if.unload = mlx5_ib_vport_rep_unload;
+	rep_if.get_proto_dev = mlx5_ib_vport_get_proto_dev;
+
+	mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_IB);
 }
 
 void mlx5_ib_unregister_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vports = MLX5_TOTAL_VPORTS(mdev);
-	int vport;
 
-	for (vport = total_vports - 1; vport >= 0; vport--)
-		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_IB);
+	mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
 }
 
 u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index f84889bbe2a0..287d48e5b073 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1798,25 +1798,18 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-	int vport;
+	struct mlx5_eswitch_rep_if rep_if = {};
 
-	for (vport = 0; vport < total_vfs; vport++) {
-		struct mlx5_eswitch_rep_if rep_if = {};
+	rep_if.load = mlx5e_vport_rep_load;
+	rep_if.unload = mlx5e_vport_rep_unload;
+	rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
 
-		rep_if.load = mlx5e_vport_rep_load;
-		rep_if.unload = mlx5e_vport_rep_unload;
-		rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev;
-		mlx5_eswitch_register_vport_rep(esw, vport, &rep_if, REP_ETH);
-	}
+	mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH);
 }
 
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
-	int vport;
 
-	for (vport = total_vfs - 1; vport >= 0; vport--)
-		mlx5_eswitch_unregister_vport_rep(esw, vport, REP_ETH);
+	mlx5_eswitch_unregister_vport_reps(esw, REP_ETH);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7131d41796fb..b702b56c457e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1923,40 +1923,39 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap)
 	return 0;
 }
 
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *__rep_if,
-				     u8 rep_type)
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+				      struct mlx5_eswitch_rep_if *__rep_if,
+				      u8 rep_type)
 {
-	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep_if *rep_if;
+	struct mlx5_eswitch_rep *rep;
+	int i;
 
-	rep_if = &offloads->vport_reps[vport_index].rep_if[rep_type];
-
-	rep_if->load   = __rep_if->load;
-	rep_if->unload = __rep_if->unload;
-	rep_if->get_proto_dev = __rep_if->get_proto_dev;
-	rep_if->priv = __rep_if->priv;
+	mlx5_esw_for_all_reps(esw, i, rep) {
+		rep_if = &rep->rep_if[rep_type];
+		rep_if->load   = __rep_if->load;
+		rep_if->unload = __rep_if->unload;
+		rep_if->get_proto_dev = __rep_if->get_proto_dev;
+		rep_if->priv = __rep_if->priv;
 
-	rep_if->state = REP_REGISTERED;
+		rep_if->state = REP_REGISTERED;
+	}
 }
-EXPORT_SYMBOL(mlx5_eswitch_register_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps);
 
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index, u8 rep_type)
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type)
 {
-	struct mlx5_esw_offload *offloads = &esw->offloads;
+	u16 max_vf = mlx5_core_max_vfs(esw->dev);
 	struct mlx5_eswitch_rep *rep;
+	int i;
 
-	rep = &offloads->vport_reps[vport_index];
-
-	if (esw->mode == SRIOV_OFFLOADS &&
-	    rep->rep_if[rep_type].state == REP_LOADED)
-		rep->rep_if[rep_type].unload(rep);
+	if (esw->mode == SRIOV_OFFLOADS)
+		__unload_reps_all_vport(esw, max_vf, rep_type);
 
-	rep->rep_if[rep_type].state = REP_UNREGISTERED;
+	mlx5_esw_for_all_reps(esw, i, rep)
+		rep->rep_if[rep_type].state = REP_UNREGISTERED;
 }
-EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_rep);
+EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps);
 
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type)
 {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index e3dbc1bc0917..96d8435421de 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -46,13 +46,10 @@ struct mlx5_eswitch_rep {
 	u32		       vlan_refcount;
 };
 
-void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     int vport_index,
-				     struct mlx5_eswitch_rep_if *rep_if,
-				     u8 rep_type);
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport_index,
-				       u8 rep_type);
+void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw,
+				      struct mlx5_eswitch_rep_if *rep_if,
+				      u8 rep_type);
+void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type);
 void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 				 int vport,
 				 u8 rep_type);
-- 
cgit v1.2.3


From 5ae5162066d8e59e365678a9e76fc4d8f6b78d40 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Fri, 14 Dec 2018 09:33:22 -0600
Subject: net/mlx5: E-Switch, Assign a different position for uplink rep and
 vport

In offloads mode, the current implementation puts the uplink
representor at index zero of the vport reps array. It is not "natural"
to place it at index 0 since we want to put the representor for vport
0 at index 0 with the introduction of SmartNIC. A separate patch will
handle the case whether a rep is needed for vport 0 (PF vport).

So, we want to have a different placeholder for uplink vport and
representor. It was placed at the end of vport and rep array. Since
vport number can no longer act as an index into the vport or
representors arrays, use functions to map vport numbers to indices
when accessing the vports or representors arrays, and vice versa.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 11 +++++-----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 24 ++++++++++++++++++++++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 11 ++--------
 include/linux/mlx5/vport.h                         | 11 +++++++---
 4 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index d1454f18c0a7..bb7f72467df9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -84,8 +84,10 @@ enum {
 static struct mlx5_vport *mlx5_eswitch_get_vport(struct mlx5_eswitch *esw,
 						 u16 vport_num)
 {
+	u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+
 	WARN_ON(vport_num > esw->total_vports - 1);
-	return &esw->vports[vport_num];
+	return &esw->vports[idx];
 }
 
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
@@ -1756,8 +1758,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	int total_vports = MLX5_TOTAL_VPORTS(dev);
 	struct mlx5_eswitch *esw;
 	struct mlx5_vport *vport;
-	int vport_num;
-	int err;
+	int err, i;
 
 	if (!MLX5_VPORT_MANAGER(dev))
 		return 0;
@@ -1798,8 +1799,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	hash_init(esw->offloads.mod_hdr_tbl);
 	mutex_init(&esw->state_lock);
 
-	mlx5_esw_for_all_vports(esw, vport_num, vport) {
-		vport->vport = vport_num;
+	mlx5_esw_for_all_vports(esw, i, vport) {
+		vport->vport = mlx5_eswitch_index_to_vport_num(esw, i);
 		vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO;
 		vport->dev = dev;
 		INIT_WORK(&vport->vport_change_handler,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index fd845e6c44d5..2951c1296c3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -374,6 +374,30 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
 		MLX5_VPORT_ECPF : MLX5_VPORT_PF;
 }
 
+static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
+{
+	/* Uplink always locate at the last element of the array.*/
+	return esw->total_vports - 1;
+}
+
+static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
+						  u16 vport_num)
+{
+	if (vport_num == MLX5_VPORT_UPLINK)
+		return mlx5_eswitch_uplink_idx(esw);
+
+	return vport_num;
+}
+
+static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
+						  int index)
+{
+	if (index == mlx5_eswitch_uplink_idx(esw))
+		return MLX5_VPORT_UPLINK;
+
+	return index;
+}
+
 #else  /* CONFIG_MLX5_ESWITCH */
 /* eswitch API stubs */
 static inline int  mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index b702b56c457e..e787e9212174 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -85,10 +85,7 @@ enum {
 static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 						     u16 vport_num)
 {
-	u16 idx = vport_num;
-
-	if (vport_num == MLX5_VPORT_UPLINK)
-		idx = UPLINK_REP_INDEX;
+	u16 idx = mlx5_eswitch_vport_num_to_index(esw, vport_num);
 
 	WARN_ON(idx > esw->total_vports - 1);
 	return &esw->offloads.vport_reps[idx];
@@ -1254,7 +1251,6 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 {
 	int total_vfs = MLX5_TOTAL_VPORTS(esw->dev);
 	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_esw_offload *offloads;
 	struct mlx5_eswitch_rep *rep;
 	u8 hw_id[ETH_ALEN], rep_type;
 	int vport;
@@ -1265,19 +1261,16 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	if (!esw->offloads.vport_reps)
 		return -ENOMEM;
 
-	offloads = &esw->offloads;
 	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
 
 	mlx5_esw_for_all_reps(esw, vport, rep) {
-		rep->vport = vport;
+		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport);
 		ether_addr_copy(rep->hw_id, hw_id);
 
 		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
 			rep->rep_if[rep_type].state = REP_UNREGISTERED;
 	}
 
-	offloads->vport_reps[0].vport = MLX5_VPORT_UPLINK;
-
 	return 0;
 }
 
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 755aeea19e1c..134248c02786 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -36,9 +36,14 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
-#define MLX5_VPORT_PF_PLACEHOLDER (1u)
-#define MLX5_SPECIAL_VPORTS (MLX5_VPORT_PF_PLACEHOLDER)
-#define MLX5_TOTAL_VPORTS(mdev) (MLX5_SPECIAL_VPORTS +	mlx5_core_max_vfs(mdev))
+#define MLX5_VPORT_PF_PLACEHOLDER		(1u)
+#define MLX5_VPORT_UPLINK_PLACEHOLDER		(1u)
+
+#define MLX5_SPECIAL_VPORTS	(MLX5_VPORT_PF_PLACEHOLDER +		\
+				 MLX5_VPORT_UPLINK_PLACEHOLDER)
+
+#define MLX5_TOTAL_VPORTS(mdev)	(MLX5_SPECIAL_VPORTS +			\
+				 mlx5_core_max_vfs(mdev))
 
 #define MLX5_VPORT_MANAGER(mdev)					\
 	(MLX5_CAP_GEN(mdev, vport_group_manager) &&			\
-- 
cgit v1.2.3


From 81cd229c294e2e416e9161d9286d34f3aaf19348 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Mon, 10 Dec 2018 11:59:33 -0600
Subject: net/mlx5: E-Switch, Consider ECPF vport depends on eswitch ownership

ECPF connects to the eswitch through vport 0xfffe. ECPF may or may
not be the eswitch manager depending on firmware configuration.

1. If ECPF is eswitch manager: ECPF will take over the eswitch manager
   responsibility. A rep of the host PF shall be created at the ECPF
   side for the eswitch manager to control.

2. If ECPF is not eswitch manager: host PF will be the eswitch manager,
   ECPF acts similar as a VF to the host PF. Host PF will be aware
   of the ECPF vport presence and control it's rep.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  8 ++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  | 15 ++++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 79 +++++++++++++++++++++-
 include/linux/mlx5/driver.h                        |  5 ++
 include/linux/mlx5/mlx5_ifc.h                      |  3 +-
 include/linux/mlx5/vport.h                         |  8 ++-
 6 files changed, 110 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index bb7f72467df9..d2ab1ee19b2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1667,7 +1667,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH);
 		mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB);
 		err = esw_offloads_init(esw, nvfs,
-					nvfs + MLX5_SPECIAL_VPORTS);
+					nvfs + MLX5_SPECIAL_VPORTS(esw->dev));
 	}
 
 	if (err)
@@ -1687,6 +1687,12 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF);
 	esw_enable_vport(esw, vport, enabled_events);
 
+	/* Enable ECPF vports */
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_ECPF);
+		esw_enable_vport(esw, vport, enabled_events);
+	}
+
 	/* Enable VF vports */
 	mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs)
 		esw_enable_vport(esw, vport, enabled_events);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2951c1296c3e..2baa0d71380c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -380,9 +380,20 @@ static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw)
 	return esw->total_vports - 1;
 }
 
+static inline int mlx5_eswitch_ecpf_idx(struct mlx5_eswitch *esw)
+{
+	return esw->total_vports - 2;
+}
+
 static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
 						  u16 vport_num)
 {
+	if (vport_num == MLX5_VPORT_ECPF) {
+		if (!mlx5_ecpf_vport_exists(esw->dev))
+			esw_warn(esw->dev, "ECPF vport doesn't exist!\n");
+		return mlx5_eswitch_ecpf_idx(esw);
+	}
+
 	if (vport_num == MLX5_VPORT_UPLINK)
 		return mlx5_eswitch_uplink_idx(esw);
 
@@ -392,6 +403,10 @@ static inline int mlx5_eswitch_vport_num_to_index(struct mlx5_eswitch *esw,
 static inline int mlx5_eswitch_index_to_vport_num(struct mlx5_eswitch *esw,
 						  int index)
 {
+	if (index == mlx5_eswitch_ecpf_idx(esw) &&
+	    mlx5_ecpf_vport_exists(esw->dev))
+		return MLX5_VPORT_ECPF;
+
 	if (index == mlx5_eswitch_uplink_idx(esw))
 		return MLX5_VPORT_UPLINK;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index e787e9212174..84a33f8e3350 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -639,14 +639,35 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 			    misc_parameters);
 
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF);
+		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					   spec, &flow_act, &dest, 1);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto add_pf_flow_err;
+		}
+		flows[MLX5_VPORT_PF] = flow;
+	}
+
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_ECPF);
+		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
+					   spec, &flow_act, &dest, 1);
+		if (IS_ERR(flow)) {
+			err = PTR_ERR(flow);
+			goto add_ecpf_flow_err;
+		}
+		flows[mlx5_eswitch_ecpf_idx(esw)] = flow;
+	}
+
 	mlx5_esw_for_each_vf_vport(esw, i, mlx5_core_max_vfs(esw->dev)) {
 		MLX5_SET(fte_match_set_misc, misc, source_port, i);
 		flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 					   spec, &flow_act, &dest, 1);
 		if (IS_ERR(flow)) {
 			err = PTR_ERR(flow);
-			esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
-			goto add_flow_err;
+			goto add_vf_flow_err;
 		}
 		flows[i] = flow;
 	}
@@ -656,10 +677,18 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw,
 	kvfree(spec);
 	return 0;
 
-add_flow_err:
+add_vf_flow_err:
 	nvports = --i;
 	mlx5_esw_for_each_vf_vport_reverse(esw, i, nvports)
 		mlx5_del_flow_rules(flows[i]);
+
+	if (mlx5_ecpf_vport_exists(esw->dev))
+		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+add_ecpf_flow_err:
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+add_pf_flow_err:
+	esw_warn(esw->dev, "FDB: Failed to add peer miss flow rule err %d\n", err);
 	kvfree(flows);
 alloc_flows_err:
 	kvfree(spec);
@@ -676,6 +705,12 @@ static void esw_del_fdb_peer_miss_rules(struct mlx5_eswitch *esw)
 	mlx5_esw_for_each_vf_vport_reverse(esw, i, mlx5_core_max_vfs(esw->dev))
 		mlx5_del_flow_rules(flows[i]);
 
+	if (mlx5_ecpf_vport_exists(esw->dev))
+		mlx5_del_flow_rules(flows[mlx5_eswitch_ecpf_idx(esw)]);
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev))
+		mlx5_del_flow_rules(flows[MLX5_VPORT_PF]);
+
 	kvfree(flows);
 }
 
@@ -1288,6 +1323,16 @@ static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
 {
 	struct mlx5_eswitch_rep *rep;
 
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
 	__esw_offloads_unload_rep(esw, rep, rep_type);
 }
@@ -1351,6 +1396,34 @@ static int __load_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type)
 
 	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
 	err = __esw_offloads_load_rep(esw, rep, rep_type);
+	if (err)
+		return err;
+
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
+		if (err)
+			goto err_pf;
+	}
+
+	if (mlx5_ecpf_vport_exists(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_ECPF);
+		err = __esw_offloads_load_rep(esw, rep, rep_type);
+		if (err)
+			goto err_ecpf;
+	}
+
+	return 0;
+
+err_ecpf:
+	if (mlx5_core_is_ecpf_esw_manager(esw->dev)) {
+		rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_PF);
+		__esw_offloads_unload_rep(esw, rep, rep_type);
+	}
+
+err_pf:
+	rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK);
+	__esw_offloads_unload_rep(esw, rep, rep_type);
 	return err;
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c5454f985e1d..c2de50f02b33 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1088,6 +1088,11 @@ static inline bool mlx5_core_is_ecpf_esw_manager(struct mlx5_core_dev *dev)
 	return dev->caps.embedded_cpu && MLX5_CAP_GEN(dev, eswitch_manager);
 }
 
+static inline bool mlx5_ecpf_vport_exists(struct mlx5_core_dev *dev)
+{
+	return mlx5_core_is_pf(dev) && MLX5_CAP_ESW(dev, ecpf_vport_exists);
+}
+
 #define MLX5_HOST_PF_MAX_VFS	(127u)
 static inline u16 mlx5_core_max_vfs(struct mlx5_core_dev *dev)
 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5decffe565fb..b7bb774b57b0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -631,7 +631,8 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_at_5[0x17];
+	u8         reserved_at_5[0x16];
+	u8         ecpf_vport_exists[0x1];
 	u8         counter_eswitch_affinity[0x1];
 	u8         merged_eswitch[0x1];
 	u8         nic_vport_node_guid_modify[0x1];
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 134248c02786..0eef548b9946 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -38,11 +38,13 @@
 
 #define MLX5_VPORT_PF_PLACEHOLDER		(1u)
 #define MLX5_VPORT_UPLINK_PLACEHOLDER		(1u)
+#define MLX5_VPORT_ECPF_PLACEHOLDER(mdev)	(mlx5_ecpf_vport_exists(mdev))
 
-#define MLX5_SPECIAL_VPORTS	(MLX5_VPORT_PF_PLACEHOLDER +		\
-				 MLX5_VPORT_UPLINK_PLACEHOLDER)
+#define MLX5_SPECIAL_VPORTS(mdev) (MLX5_VPORT_PF_PLACEHOLDER +		\
+				   MLX5_VPORT_UPLINK_PLACEHOLDER +	\
+				   MLX5_VPORT_ECPF_PLACEHOLDER(mdev))
 
-#define MLX5_TOTAL_VPORTS(mdev)	(MLX5_SPECIAL_VPORTS +			\
+#define MLX5_TOTAL_VPORTS(mdev)	(MLX5_SPECIAL_VPORTS(mdev) +		\
 				 mlx5_core_max_vfs(mdev))
 
 #define MLX5_VPORT_MANAGER(mdev)					\
-- 
cgit v1.2.3


From db610a640eeeb268c36a4558414f28e1c269433e Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 24 Jan 2019 17:48:38 -0800
Subject: f2fs: add quick mode of checkpoint=disable for QA

This mode returns mount() quickly with EAGAIN. We can trigger this by
shutdown(F2FS_GOING_DOWN_NEED_FSCK).

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 5 +++++
 fs/f2fs/f2fs.h          | 2 ++
 fs/f2fs/file.c          | 6 +++---
 fs/f2fs/segment.c       | 3 +++
 fs/f2fs/super.c         | 5 +++++
 include/linux/f2fs_fs.h | 1 +
 6 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index f955cd3e0677..622dca707752 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1259,6 +1259,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	else
 		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
+		__set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
+
 	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
 		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
 	else
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6b6ec5600089..fe95abb05d40 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -191,6 +191,7 @@ enum {
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
 #define DEF_DISABLE_INTERVAL		5	/* 5 secs */
+#define DEF_DISABLE_QUICK_INTERVAL	1	/* 1 secs */
 #define DEF_UMOUNT_DISCARD_TIMEOUT	5	/* 5 secs */
 
 struct cp_control {
@@ -1101,6 +1102,7 @@ enum {
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
 	SBI_CP_DISABLED,			/* CP was disabled last mount */
+	SBI_CP_DISABLED_QUICK,			/* CP was disabled quickly */
 	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
 	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
 	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0d461321edfc..fe6f92fbba38 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1972,11 +1972,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 		break;
 	case F2FS_GOING_DOWN_NEED_FSCK:
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
+		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+		set_sbi_flag(sbi, SBI_IS_DIRTY);
 		/* do checkpoint only */
 		ret = f2fs_sync_fs(sb, 1);
-		if (ret)
-			goto out;
-		break;
+		goto out;
 	default:
 		ret = -EINVAL;
 		goto out;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5b2b9be6f28d..342b720fb4db 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -868,6 +868,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
 
 	if (holes[DATA] > ovp || holes[NODE] > ovp)
 		return -EAGAIN;
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
+		dirty_segments(sbi) > overprovision_segments(sbi))
+		return -EAGAIN;
 	return 0;
 }
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 24efd76ca151..5e1f8573a17f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3205,6 +3205,10 @@ try_onemore:
 
 	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
 		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
+		set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
+		sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
+	}
 
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
@@ -3392,6 +3396,7 @@ skip_recovery:
 				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
+	clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
 	return 0;
 
 free_meta:
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index d7711048ef93..d6befe1f9dc7 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -116,6 +116,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_QUICK_FLAG		0x00002000
 #define CP_DISABLED_FLAG		0x00001000
 #define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
-- 
cgit v1.2.3


From 1ffdc3807589779626924eb600db784d3d943a08 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliangtang@gmail.com>
Date: Fri, 25 Jan 2019 15:35:01 +0800
Subject: f2fs: fix typos in code comments

lengh -> length

Signed-off-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index d6befe1f9dc7..8d57aaee8166 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -187,7 +187,7 @@ struct f2fs_orphan_block {
 struct f2fs_extent {
 	__le32 fofs;		/* start file offset of the extent */
 	__le32 blk;		/* start block address of the extent */
-	__le32 len;		/* lengh of the extent */
+	__le32 len;		/* length of the extent */
 } __packed;
 
 #define F2FS_NAME_LEN		255
@@ -512,7 +512,7 @@ typedef __le32	f2fs_hash_t;
 struct f2fs_dir_entry {
 	__le32 hash_code;	/* hash code of file name */
 	__le32 ino;		/* inode number */
-	__le16 name_len;	/* lengh of file name */
+	__le16 name_len;	/* length of file name */
 	__u8 file_type;		/* file type */
 } __packed;
 
-- 
cgit v1.2.3


From 5c418dc789a3898717ebf2caa5716ba91a7150b2 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Fri, 15 Feb 2019 17:55:51 +0100
Subject: efi: Fix build error due to enum collision between efi.h and ima.h

The following commit:

  a893ea15d764 ("tpm: move tpm_chip definition to include/linux/tpm.h")

introduced a build error when both IMA and EFI are enabled:

    In file included from ../security/integrity/ima/ima_fs.c:30:
    ../security/integrity/ima/ima.h:176:7: error: redeclaration of enumerator "NONE"

What happens is that both headers (ima.h and efi.h) defines the same
'NONE' constant, and it broke when they started getting included from
the same file:

Rework to prefix the EFI enum with 'EFI_*'.

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-efi@vger.kernel.org
Link: http://lkml.kernel.org/r/20190215165551.12220-2-ard.biesheuvel@linaro.org
[ Cleaned up the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/platform/efi/quirks.c          |  4 +--
 drivers/firmware/efi/runtime-wrappers.c | 48 ++++++++++++++++-----------------
 include/linux/efi.h                     | 26 +++++++++---------
 3 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 9ce85e605052..458a0e2bcc57 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -717,7 +717,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
 	 * "efi_mm" cannot be used to check if the page fault had occurred
 	 * in the firmware context because efi=old_map doesn't use efi_pgd.
 	 */
-	if (efi_rts_work.efi_rts_id == NONE)
+	if (efi_rts_work.efi_rts_id == EFI_NONE)
 		return;
 
 	/*
@@ -742,7 +742,7 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
 	 * because this case occurs *very* rarely and hence could be improved
 	 * on a need by basis.
 	 */
-	if (efi_rts_work.efi_rts_id == RESET_SYSTEM) {
+	if (efi_rts_work.efi_rts_id == EFI_RESET_SYSTEM) {
 		pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
 		machine_real_restart(MRR_BIOS);
 		return;
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 8903b9ccfc2b..8bbbbf160d05 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -85,7 +85,7 @@ struct efi_runtime_work efi_rts_work;
 		pr_err("Failed to queue work to efi_rts_wq.\n");	\
 									\
 exit:									\
-	efi_rts_work.efi_rts_id = NONE;					\
+	efi_rts_work.efi_rts_id = EFI_NONE;				\
 	efi_rts_work.status;						\
 })
 
@@ -168,50 +168,50 @@ static void efi_call_rts(struct work_struct *work)
 	arg5 = efi_rts_work.arg5;
 
 	switch (efi_rts_work.efi_rts_id) {
-	case GET_TIME:
+	case EFI_GET_TIME:
 		status = efi_call_virt(get_time, (efi_time_t *)arg1,
 				       (efi_time_cap_t *)arg2);
 		break;
-	case SET_TIME:
+	case EFI_SET_TIME:
 		status = efi_call_virt(set_time, (efi_time_t *)arg1);
 		break;
-	case GET_WAKEUP_TIME:
+	case EFI_GET_WAKEUP_TIME:
 		status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
 				       (efi_bool_t *)arg2, (efi_time_t *)arg3);
 		break;
-	case SET_WAKEUP_TIME:
+	case EFI_SET_WAKEUP_TIME:
 		status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
 				       (efi_time_t *)arg2);
 		break;
-	case GET_VARIABLE:
+	case EFI_GET_VARIABLE:
 		status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
 				       (efi_guid_t *)arg2, (u32 *)arg3,
 				       (unsigned long *)arg4, (void *)arg5);
 		break;
-	case GET_NEXT_VARIABLE:
+	case EFI_GET_NEXT_VARIABLE:
 		status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
 				       (efi_char16_t *)arg2,
 				       (efi_guid_t *)arg3);
 		break;
-	case SET_VARIABLE:
+	case EFI_SET_VARIABLE:
 		status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
 				       (efi_guid_t *)arg2, *(u32 *)arg3,
 				       *(unsigned long *)arg4, (void *)arg5);
 		break;
-	case QUERY_VARIABLE_INFO:
+	case EFI_QUERY_VARIABLE_INFO:
 		status = efi_call_virt(query_variable_info, *(u32 *)arg1,
 				       (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
 		break;
-	case GET_NEXT_HIGH_MONO_COUNT:
+	case EFI_GET_NEXT_HIGH_MONO_COUNT:
 		status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
 		break;
-	case UPDATE_CAPSULE:
+	case EFI_UPDATE_CAPSULE:
 		status = efi_call_virt(update_capsule,
 				       (efi_capsule_header_t **)arg1,
 				       *(unsigned long *)arg2,
 				       *(unsigned long *)arg3);
 		break;
-	case QUERY_CAPSULE_CAPS:
+	case EFI_QUERY_CAPSULE_CAPS:
 		status = efi_call_virt(query_capsule_caps,
 				       (efi_capsule_header_t **)arg1,
 				       *(unsigned long *)arg2, (u64 *)arg3,
@@ -235,7 +235,7 @@ static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(GET_TIME, tm, tc, NULL, NULL, NULL);
+	status = efi_queue_work(EFI_GET_TIME, tm, tc, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -246,7 +246,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(SET_TIME, tm, NULL, NULL, NULL, NULL);
+	status = efi_queue_work(EFI_SET_TIME, tm, NULL, NULL, NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
 }
@@ -259,7 +259,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm, NULL,
+	status = efi_queue_work(EFI_GET_WAKEUP_TIME, enabled, pending, tm, NULL,
 				NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -271,7 +271,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
+	status = efi_queue_work(EFI_SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
 				NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -287,7 +287,7 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
+	status = efi_queue_work(EFI_GET_VARIABLE, name, vendor, attr, data_size,
 				data);
 	up(&efi_runtime_lock);
 	return status;
@@ -301,7 +301,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor,
+	status = efi_queue_work(EFI_GET_NEXT_VARIABLE, name_size, name, vendor,
 				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -317,7 +317,7 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(SET_VARIABLE, name, vendor, &attr, &data_size,
+	status = efi_queue_work(EFI_SET_VARIABLE, name, vendor, &attr, &data_size,
 				data);
 	up(&efi_runtime_lock);
 	return status;
@@ -352,7 +352,7 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(QUERY_VARIABLE_INFO, &attr, storage_space,
+	status = efi_queue_work(EFI_QUERY_VARIABLE_INFO, &attr, storage_space,
 				remaining_space, max_variable_size, NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -384,7 +384,7 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
+	status = efi_queue_work(EFI_GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
 				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -400,7 +400,7 @@ static void virt_efi_reset_system(int reset_type,
 			"could not get exclusive access to the firmware\n");
 		return;
 	}
-	efi_rts_work.efi_rts_id = RESET_SYSTEM;
+	efi_rts_work.efi_rts_id = EFI_RESET_SYSTEM;
 	__efi_call_virt(reset_system, reset_type, status, data_size, data);
 	up(&efi_runtime_lock);
 }
@@ -416,7 +416,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(UPDATE_CAPSULE, capsules, &count, &sg_list,
+	status = efi_queue_work(EFI_UPDATE_CAPSULE, capsules, &count, &sg_list,
 				NULL, NULL);
 	up(&efi_runtime_lock);
 	return status;
@@ -434,7 +434,7 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
 	if (down_interruptible(&efi_runtime_lock))
 		return EFI_ABORTED;
-	status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, &count,
+	status = efi_queue_work(EFI_QUERY_CAPSULE_CAPS, capsules, &count,
 				max_size, reset_type, NULL);
 	up(&efi_runtime_lock);
 	return status;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index be08518c2553..eecd1079617a 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1719,19 +1719,19 @@ extern int efi_tpm_eventlog_init(void);
  * fault happened while executing an efi runtime service.
  */
 enum efi_rts_ids {
-	NONE,
-	GET_TIME,
-	SET_TIME,
-	GET_WAKEUP_TIME,
-	SET_WAKEUP_TIME,
-	GET_VARIABLE,
-	GET_NEXT_VARIABLE,
-	SET_VARIABLE,
-	QUERY_VARIABLE_INFO,
-	GET_NEXT_HIGH_MONO_COUNT,
-	RESET_SYSTEM,
-	UPDATE_CAPSULE,
-	QUERY_CAPSULE_CAPS,
+	EFI_NONE,
+	EFI_GET_TIME,
+	EFI_SET_TIME,
+	EFI_GET_WAKEUP_TIME,
+	EFI_SET_WAKEUP_TIME,
+	EFI_GET_VARIABLE,
+	EFI_GET_NEXT_VARIABLE,
+	EFI_SET_VARIABLE,
+	EFI_QUERY_VARIABLE_INFO,
+	EFI_GET_NEXT_HIGH_MONO_COUNT,
+	EFI_RESET_SYSTEM,
+	EFI_UPDATE_CAPSULE,
+	EFI_QUERY_CAPSULE_CAPS,
 };
 
 /*
-- 
cgit v1.2.3


From 744e458aebf8dc7f33eee9af61aeb0145de921a6 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 17 Feb 2019 10:28:33 +0100
Subject: net: phy: add helper linkmode_adv_to_mii_10gbt_adv_t

Add a helper linkmode_adv_to_mii_10gbt_adv_t(), similar to
linkmode_adv_to_mii_adv_t.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index bfa7114167d7..dd46828b4c47 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -261,6 +261,31 @@ static inline u16 ethtool_adv_to_mmd_eee_adv_t(u32 adv)
 	return reg;
 }
 
+/**
+ * linkmode_adv_to_mii_10gbt_adv_t
+ * @advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the C45
+ * 10GBASE-T AN CONTROL (7.32) register.
+ */
+static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV2_5G;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV5G;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			      advertising))
+		result |= MDIO_AN_10GBT_CTRL_ADV10G;
+
+	return result;
+}
+
 int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 
-- 
cgit v1.2.3


From 9a5dc8af441668a3db7fdcd927cb288be62c0a2e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 17 Feb 2019 10:29:19 +0100
Subject: net: phy: add genphy_c45_an_config_aneg

C45 configuration of 10/100 and multi-giga bit auto negotiation
advertisement is standardized. Configuration of 1000Base-T however
appears to be vendor specific. Move the generic code out of the
Marvell driver into the common phy-c45.c file.

v2:
- change function name to genphy_c45_an_config_aneg

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
[hkallweit1@gmail.com: use new helper linkmode_adv_to_mii_10gbt_adv_t and split patch]
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h       |  1 +
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 7af5fa81daf6..e17672bd180e 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -74,6 +74,50 @@ int genphy_c45_pma_setup_forced(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_pma_setup_forced);
 
+/**
+ * genphy_c45_an_config_aneg - configure advertisement registers
+ * @phydev: target phy_device struct
+ *
+ * Configure advertisement registers based on modes set in phydev->advertising
+ *
+ * Returns negative errno code on failure, 0 if advertisement didn't change,
+ * or 1 if advertised modes changed.
+ */
+int genphy_c45_an_config_aneg(struct phy_device *phydev)
+{
+	int changed = 0, ret;
+	u32 adv;
+
+	linkmode_and(phydev->advertising, phydev->advertising,
+		     phydev->supported);
+
+	adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
+
+	ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_ADVERTISE,
+			     ADVERTISE_ALL | ADVERTISE_100BASE4 |
+			     ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM,
+			     adv);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		changed = 1;
+
+	adv = linkmode_adv_to_mii_10gbt_adv_t(phydev->advertising);
+
+	ret = phy_modify_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+			     MDIO_AN_10GBT_CTRL_ADV10G |
+			     MDIO_AN_10GBT_CTRL_ADV5G |
+			     MDIO_AN_10GBT_CTRL_ADV2_5G,
+			     adv);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		changed = 1;
+
+	return changed;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_an_config_aneg);
+
 /**
  * genphy_c45_an_disable_aneg - disable auto-negotiation
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index bf1070c2a53b..3db507e68191 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1101,6 +1101,7 @@ int genphy_c45_read_link(struct phy_device *phydev);
 int genphy_c45_read_lpa(struct phy_device *phydev);
 int genphy_c45_read_pma(struct phy_device *phydev);
 int genphy_c45_pma_setup_forced(struct phy_device *phydev);
+int genphy_c45_an_config_aneg(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 int genphy_c45_pma_read_abilities(struct phy_device *phydev);
-- 
cgit v1.2.3


From 9c03b282badb7600058321c80e367f93b4013bd0 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 14 Feb 2019 09:15:10 -0800
Subject: trace: events: add a few neigh tracepoints

The goal here is to trace neigh state changes covering all possible
neigh update paths. Plus have a specific trace point in neigh_update
to cover flags sent to neigh_update.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/neigh.h | 204 +++++++++++++++++++++++++++++++++++++++++++
 net/core/net-traces.c        |   8 ++
 2 files changed, 212 insertions(+)
 create mode 100644 include/trace/events/neigh.h

(limited to 'include')

diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h
new file mode 100644
index 000000000000..ed10353eee73
--- /dev/null
+++ b/include/trace/events/neigh.h
@@ -0,0 +1,204 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM neigh
+
+#if !defined(_TRACE_NEIGH_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NEIGH_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/tracepoint.h>
+#include <net/neighbour.h>
+
+#define neigh_state_str(state)				\
+	__print_symbolic(state,				\
+		{ NUD_INCOMPLETE, "incomplete" },	\
+		{ NUD_REACHABLE, "reachable" },		\
+		{ NUD_STALE, "stale" },			\
+		{ NUD_DELAY, "delay" },			\
+		{ NUD_PROBE, "probe" },			\
+		{ NUD_FAILED, "failed" })
+
+TRACE_EVENT(neigh_update,
+
+	TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new,
+		 u32 flags, u32 nlmsg_pid),
+
+	TP_ARGS(n, lladdr, new, flags, nlmsg_pid),
+
+	TP_STRUCT__entry(
+		__field(u32, family)
+		__string(dev, (n->dev ? n->dev->name : "NULL"))
+		__array(u8, lladdr, MAX_ADDR_LEN)
+		__field(u8, lladdr_len)
+		__field(u8, flags)
+		__field(u8, nud_state)
+		__field(u8, type)
+		__field(u8, dead)
+		__field(int, refcnt)
+		__array(__u8, primary_key4, 4)
+		__array(__u8, primary_key6, 16)
+		__field(unsigned long, confirmed)
+		__field(unsigned long, updated)
+		__field(unsigned long, used)
+		__array(u8, new_lladdr, MAX_ADDR_LEN)
+		__field(u8, new_state)
+		__field(u32, update_flags)
+		__field(u32, pid)
+	),
+
+	TP_fast_assign(
+		int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->family = n->tbl->family;
+		__assign_str(dev, (n->dev ? n->dev->name : "NULL"));
+		__entry->lladdr_len = lladdr_len;
+		memcpy(__entry->lladdr, n->ha, lladdr_len);
+		__entry->flags = n->flags;
+		__entry->nud_state = n->nud_state;
+		__entry->type = n->type;
+		__entry->dead = n->dead;
+		__entry->refcnt = refcount_read(&n->refcnt);
+		pin6 = (struct in6_addr *)__entry->primary_key6;
+		p32 = (__be32 *)__entry->primary_key4;
+
+		if (n->tbl->family == AF_INET)
+			*p32 = *(__be32 *)n->primary_key;
+		else
+			*p32 = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (n->tbl->family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->primary_key6;
+			*pin6 = *(struct in6_addr *)n->primary_key;
+		} else
+#endif
+		{
+			ipv6_addr_set_v4mapped(*p32, pin6);
+		}
+		__entry->confirmed = n->confirmed;
+		__entry->updated = n->updated;
+		__entry->used = n->used;
+		if (lladdr)
+			memcpy(__entry->new_lladdr, lladdr, lladdr_len);
+		__entry->new_state = new;
+		__entry->update_flags = flags;
+		__entry->pid = nlmsg_pid;
+	),
+
+	TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x "
+		  "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c "
+		  "confirmed %lu updated %lu used %lu new_lladdr %s "
+		  "new_state %02x update_flags %02x pid %d",
+		  __entry->family, __get_str(dev),
+		  __print_hex_str(__entry->lladdr, __entry->lladdr_len),
+		  __entry->flags, neigh_state_str(__entry->nud_state),
+		  __entry->type, __entry->dead, __entry->refcnt,
+		  __entry->primary_key4, __entry->primary_key6,
+		  __entry->confirmed, __entry->updated, __entry->used,
+		  __print_hex_str(__entry->new_lladdr, __entry->lladdr_len),
+		  __entry->new_state,
+		  __entry->update_flags, __entry->pid)
+);
+
+DECLARE_EVENT_CLASS(neigh__update,
+	TP_PROTO(struct neighbour *n, int err),
+	TP_ARGS(n, err),
+	TP_STRUCT__entry(
+		__field(u32, family)
+		__string(dev, (n->dev ? n->dev->name : "NULL"))
+		__array(u8, lladdr, MAX_ADDR_LEN)
+		__field(u8, lladdr_len)
+		__field(u8, flags)
+		__field(u8, nud_state)
+		__field(u8, type)
+		__field(u8, dead)
+		__field(int, refcnt)
+		__array(__u8, primary_key4, 4)
+		__array(__u8, primary_key6, 16)
+		__field(unsigned long, confirmed)
+		__field(unsigned long, updated)
+		__field(unsigned long, used)
+		__field(u32, err)
+	),
+
+	TP_fast_assign(
+		int lladdr_len = (n->dev ? n->dev->addr_len : MAX_ADDR_LEN);
+		struct in6_addr *pin6;
+		__be32 *p32;
+
+		__entry->family = n->tbl->family;
+		__assign_str(dev, (n->dev ? n->dev->name : "NULL"));
+		__entry->lladdr_len = lladdr_len;
+		memcpy(__entry->lladdr, n->ha, lladdr_len);
+		__entry->flags = n->flags;
+		__entry->nud_state = n->nud_state;
+		__entry->type = n->type;
+		__entry->dead = n->dead;
+		__entry->refcnt = refcount_read(&n->refcnt);
+		pin6 = (struct in6_addr *)__entry->primary_key6;
+		p32 = (__be32 *)__entry->primary_key4;
+
+		if (n->tbl->family == AF_INET)
+			*p32 = *(__be32 *)n->primary_key;
+		else
+			*p32 = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+		if (n->tbl->family == AF_INET6) {
+			pin6 = (struct in6_addr *)__entry->primary_key6;
+			*pin6 = *(struct in6_addr *)n->primary_key;
+		} else
+#endif
+		{
+			ipv6_addr_set_v4mapped(*p32, pin6);
+		}
+
+		__entry->confirmed = n->confirmed;
+		__entry->updated = n->updated;
+		__entry->used = n->used;
+		__entry->err = err;
+	),
+
+	TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x "
+		  "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c "
+		  "confirmed %lu updated %lu used %lu err %d",
+		  __entry->family, __get_str(dev),
+		  __print_hex_str(__entry->lladdr, __entry->lladdr_len),
+		  __entry->flags, neigh_state_str(__entry->nud_state),
+		  __entry->type, __entry->dead, __entry->refcnt,
+		  __entry->primary_key4, __entry->primary_key6,
+		  __entry->confirmed, __entry->updated, __entry->used,
+		  __entry->err)
+);
+
+DEFINE_EVENT(neigh__update, neigh_update_done,
+	TP_PROTO(struct neighbour *neigh, int err),
+	TP_ARGS(neigh, err)
+);
+
+DEFINE_EVENT(neigh__update, neigh_timer_handler,
+	TP_PROTO(struct neighbour *neigh, int err),
+	TP_ARGS(neigh, err)
+);
+
+DEFINE_EVENT(neigh__update, neigh_event_send_done,
+	TP_PROTO(struct neighbour *neigh, int err),
+	TP_ARGS(neigh, err)
+);
+
+DEFINE_EVENT(neigh__update, neigh_event_send_dead,
+	TP_PROTO(struct neighbour *neigh, int err),
+	TP_ARGS(neigh, err)
+);
+
+DEFINE_EVENT(neigh__update, neigh_cleanup_and_release,
+	TP_PROTO(struct neighbour *neigh, int rc),
+	TP_ARGS(neigh, rc)
+);
+
+#endif /* _TRACE_NEIGH_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 419af6dfe29f..470b179d599e 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -43,6 +43,14 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
 #endif
 
+#include <trace/events/neigh.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead);
+EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
-- 
cgit v1.2.3


From 76726ccb7f461c83040e7082cf95fe1dea2afd1f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 14 Feb 2019 13:40:44 -0800
Subject: devlink: add flash update command

Add devlink flash update command. Advanced NICs have firmware
stored in flash and often cryptographically secured. Updating
that flash is handled by management firmware. Ethtool has a
flash update command which served us well, however, it has two
shortcomings:
 - it takes rtnl_lock unnecessarily - really flash update has
   nothing to do with networking, so using a networking device
   as a handle is suboptimal, which leads us to the second one:
 - it requires a functioning netdev - in case device enters an
   error state and can't spawn a netdev (e.g. communication
   with the device fails) there is no netdev to use as a handle
   for flashing.

Devlink already has the ability to report the firmware versions,
now with the ability to update the firmware/flash we will be
able to recover devices in bad state.

To enable updates of sub-components of the FW allow passing
component name.  This name should correspond to one of the
versions reported in devlink info.

v1: - replace target id with component name (Jiri).

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  3 +++
 include/uapi/linux/devlink.h |  6 ++++++
 net/core/devlink.c           | 30 ++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index c6d88759b7d5..18d7a051f412 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -521,6 +521,9 @@ struct devlink_ops {
 				      struct netlink_ext_ack *extack);
 	int (*info_get)(struct devlink *devlink, struct devlink_info_req *req,
 			struct netlink_ext_ack *extack);
+	int (*flash_update)(struct devlink *devlink, const char *file_name,
+			    const char *component,
+			    struct netlink_ext_ack *extack);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 72d9f7c89190..53de8802a000 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -103,6 +103,8 @@ enum devlink_command {
 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET,
 	DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR,
 
+	DEVLINK_CMD_FLASH_UPDATE,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -326,6 +328,10 @@ enum devlink_attr {
 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
+
+	DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME,	/* string */
+	DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,	/* string */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 1d7502a5a651..4a1ad0b13e52 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2662,6 +2662,27 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	return devlink->ops->reload(devlink, info->extack);
 }
 
+static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct devlink *devlink = info->user_ptr[0];
+	const char *file_name, *component;
+	struct nlattr *nla_component;
+
+	if (!devlink->ops->flash_update)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME])
+		return -EINVAL;
+	file_name = nla_data(info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME]);
+
+	nla_component = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT];
+	component = nla_component ? nla_data(nla_component) : NULL;
+
+	return devlink->ops->flash_update(devlink, file_name, component,
+					  info->extack);
+}
+
 static const struct devlink_param devlink_param_generic[] = {
 	{
 		.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -4883,6 +4904,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
 	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -5171,6 +5194,13 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK |
 				  DEVLINK_NL_FLAG_NO_LOCK,
 	},
+	{
+		.cmd = DEVLINK_CMD_FLASH_UPDATE,
+		.doit = devlink_nl_cmd_flash_update,
+		.policy = devlink_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
-- 
cgit v1.2.3


From 4eceba17200c03a2678edbdcff5d800aded607be Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 14 Feb 2019 13:40:45 -0800
Subject: ethtool: add compat for flash update

If driver does not support ethtool flash update operation
call into devlink.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h |  7 +++++++
 net/core/devlink.c    | 30 ++++++++++++++++++++++++++++++
 net/core/ethtool.c    | 12 +++++++++---
 3 files changed, 46 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 18d7a051f412..a2da49dd9147 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1195,11 +1195,18 @@ devlink_health_report(struct devlink_health_reporter *reporter,
 #if IS_REACHABLE(CONFIG_NET_DEVLINK)
 void devlink_compat_running_version(struct net_device *dev,
 				    char *buf, size_t len);
+int devlink_compat_flash_update(struct net_device *dev, const char *file_name);
 #else
 static inline void
 devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
 {
 }
+
+static inline int
+devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif /* _NET_DEVLINK_H_ */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4a1ad0b13e52..04d98550c78c 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6450,6 +6450,36 @@ out:
 	mutex_unlock(&devlink_mutex);
 }
 
+int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
+{
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(devlink_port, &devlink->port_list, list) {
+			int ret = -EOPNOTSUPP;
+
+			if (devlink_port->type != DEVLINK_PORT_TYPE_ETH ||
+			    devlink_port->type_dev != dev)
+				continue;
+
+			mutex_unlock(&devlink_mutex);
+			if (devlink->ops->flash_update)
+				ret = devlink->ops->flash_update(devlink,
+								 file_name,
+								 NULL, NULL);
+			mutex_unlock(&devlink->lock);
+			return ret;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+	mutex_unlock(&devlink_mutex);
+
+	return -EOPNOTSUPP;
+}
+
 static int __init devlink_module_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d2c47cdf25da..1320e8dce559 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -2038,11 +2038,17 @@ static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
 
 	if (copy_from_user(&efl, useraddr, sizeof(efl)))
 		return -EFAULT;
+	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
 
-	if (!dev->ethtool_ops->flash_device)
-		return -EOPNOTSUPP;
+	if (!dev->ethtool_ops->flash_device) {
+		int ret;
 
-	efl.data[ETHTOOL_FLASH_MAX_FILENAME - 1] = 0;
+		rtnl_unlock();
+		ret = devlink_compat_flash_update(dev, efl.data);
+		rtnl_lock();
+
+		return ret;
+	}
 
 	return dev->ethtool_ops->flash_device(dev, &efl);
 }
-- 
cgit v1.2.3


From 58ecf2688cc9b44d2e8f830c16212edbeaef4dce Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 16 Feb 2019 10:37:56 +0800
Subject: ptr_ring: remove duplicated include from ptr_ring.h

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 186cd8e970c7..8da46ac44a2e 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -26,7 +26,6 @@
 #include <linux/cache.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
-#include <linux/cache.h>
 #include <linux/slab.h>
 #include <asm/errno.h>
 #endif
-- 
cgit v1.2.3


From 357b4da50a62e2fd70eacee21cdbd22d4c7a7b60 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 14 Feb 2019 11:42:39 +0100
Subject: x86: respect memory size limiting via mem= parameter

When limiting memory size via kernel parameter "mem=" this should be
respected even in case of memory made accessible via a PCI card.

Today this kind of memory won't be made usable in initial memory
setup as the memory won't be visible in E820 map, but it might be
added when adding PCI devices due to corresponding ACPI table entries.

Not respecting "mem=" can be corrected by adding a global max_mem_size
variable set by parse_memopt() which will result in rejecting adding
memory areas resulting in a memory size above the allowed limit.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/kernel/e820.c         | 5 +++++
 include/linux/memory_hotplug.h | 2 ++
 mm/memory_hotplug.c            | 6 ++++++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 50895c2f937d..e67513e2cbbb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -14,6 +14,7 @@
 #include <linux/acpi.h>
 #include <linux/firmware-map.h>
 #include <linux/sort.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/e820/api.h>
 #include <asm/setup.h>
@@ -881,6 +882,10 @@ static int __init parse_memopt(char *p)
 
 	e820__range_remove(mem_size, ULLONG_MAX - mem_size, E820_TYPE_RAM, 1);
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+	max_mem_size = mem_size;
+#endif
+
 	return 0;
 }
 early_param("mem", parse_memopt);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 368267c1b71b..cfd12078172a 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -100,6 +100,8 @@ extern void __online_page_free(struct page *page);
 
 extern int try_online_node(int nid);
 
+extern u64 max_mem_size;
+
 extern bool memhp_auto_online;
 /* If movable_node boot option specified */
 extern bool movable_node_enabled;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 124e794867c5..519f9db063ff 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -96,10 +96,16 @@ void mem_hotplug_done(void)
 	cpus_read_unlock();
 }
 
+u64 max_mem_size = U64_MAX;
+
 /* add this memory to iomem resource */
 static struct resource *register_memory_resource(u64 start, u64 size)
 {
 	struct resource *res, *conflict;
+
+	if (start + size > max_mem_size)
+		return ERR_PTR(-E2BIG);
+
 	res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 	if (!res)
 		return ERR_PTR(-ENOMEM);
-- 
cgit v1.2.3


From 1d988ed46543ca36c010634c97ac32114362ddb1 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Thu, 14 Feb 2019 11:42:40 +0100
Subject: x86/xen: dont add memory above max allowed allocation

Don't allow memory to be added above the allowed maximum allocation
limit set by Xen.

Trying to do so would result in cases like the following:

[  584.559652] ------------[ cut here ]------------
[  584.564897] WARNING: CPU: 2 PID: 1 at ../arch/x86/xen/multicalls.c:129 xen_alloc_pte+0x1c7/0x390()
[  584.575151] Modules linked in:
[  584.578643] Supported: Yes
[  584.581750] CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.4.120-92.70-default #1
[  584.590000] Hardware name: Cisco Systems Inc UCSC-C460-M4/UCSC-C460-M4, BIOS C460M4.4.0.1b.0.0629181419 06/29/2018
[  584.601862]  0000000000000000 ffffffff813175a0 0000000000000000 ffffffff8184777c
[  584.610200]  ffffffff8107f4e1 ffff880487eb7000 ffff8801862b79c0 ffff88048608d290
[  584.618537]  0000000000487eb7 ffffea0000000201 ffffffff81009de7 ffffffff81068561
[  584.626876] Call Trace:
[  584.629699]  [<ffffffff81019ad9>] dump_trace+0x59/0x340
[  584.635645]  [<ffffffff81019eaa>] show_stack_log_lvl+0xea/0x170
[  584.642391]  [<ffffffff8101ac51>] show_stack+0x21/0x40
[  584.648238]  [<ffffffff813175a0>] dump_stack+0x5c/0x7c
[  584.654085]  [<ffffffff8107f4e1>] warn_slowpath_common+0x81/0xb0
[  584.660932]  [<ffffffff81009de7>] xen_alloc_pte+0x1c7/0x390
[  584.667289]  [<ffffffff810647f0>] pmd_populate_kernel.constprop.6+0x40/0x80
[  584.675241]  [<ffffffff815ecfe8>] phys_pmd_init+0x210/0x255
[  584.681587]  [<ffffffff815ed207>] phys_pud_init+0x1da/0x247
[  584.687931]  [<ffffffff815edb3b>] kernel_physical_mapping_init+0xf5/0x1d4
[  584.695682]  [<ffffffff815e9bdd>] init_memory_mapping+0x18d/0x380
[  584.702631]  [<ffffffff81064699>] arch_add_memory+0x59/0xf0

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/setup.c      | 13 +++++++++++++
 drivers/xen/xen-balloon.c | 11 +++++++++++
 include/xen/xen.h         |  4 ++++
 3 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d5f303c0e656..548d1e0a5ba1 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -12,6 +12,7 @@
 #include <linux/memblock.h>
 #include <linux/cpuidle.h>
 #include <linux/cpufreq.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/elf.h>
 #include <asm/vdso.h>
@@ -589,6 +590,14 @@ static void __init xen_align_and_add_e820_region(phys_addr_t start,
 	if (type == E820_TYPE_RAM) {
 		start = PAGE_ALIGN(start);
 		end &= ~((phys_addr_t)PAGE_SIZE - 1);
+#ifdef CONFIG_MEMORY_HOTPLUG
+		/*
+		 * Don't allow adding memory not in E820 map while booting the
+		 * system. Once the balloon driver is up it will remove that
+		 * restriction again.
+		 */
+		max_mem_size = end;
+#endif
 	}
 
 	e820__range_add(start, end - start, type);
@@ -748,6 +757,10 @@ char * __init xen_memory_setup(void)
 	memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
 	set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
 
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
+	xen_saved_max_mem_size = max_mem_size;
+#endif
+
 	op = xen_initial_domain() ?
 		XENMEM_machine_memory_map :
 		XENMEM_memory_map;
diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c
index 2acbfe104e46..a67236b02452 100644
--- a/drivers/xen/xen-balloon.c
+++ b/drivers/xen/xen-balloon.c
@@ -37,6 +37,7 @@
 #include <linux/mm_types.h>
 #include <linux/init.h>
 #include <linux/capability.h>
+#include <linux/memory_hotplug.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
@@ -50,6 +51,10 @@
 
 #define BALLOON_CLASS_NAME "xen_memory"
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+u64 xen_saved_max_mem_size = 0;
+#endif
+
 static struct device balloon_dev;
 
 static int register_balloon(struct device *dev);
@@ -63,6 +68,12 @@ static void watch_target(struct xenbus_watch *watch,
 	static bool watch_fired;
 	static long target_diff;
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+	/* The balloon driver will take care of adding memory now. */
+	if (xen_saved_max_mem_size)
+		max_mem_size = xen_saved_max_mem_size;
+#endif
+
 	err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
 	if (err != 1) {
 		/* This is ok (for domain0 at least) - so just return */
diff --git a/include/xen/xen.h b/include/xen/xen.h
index 0e2156786ad2..19d032373de5 100644
--- a/include/xen/xen.h
+++ b/include/xen/xen.h
@@ -46,4 +46,8 @@ struct bio_vec;
 bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 		const struct bio_vec *vec2);
 
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
+extern u64 xen_saved_max_mem_size;
+#endif
+
 #endif	/* _XEN_XEN_H */
-- 
cgit v1.2.3


From 40b46b3b2f098e3740f65024099a7c55ff4b9866 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 Feb 2019 18:05:05 +0100
Subject: cpufreq: davinci: move configuration to include/linux/platform_data

The header containing the configuration structure for davinci cpufreq
driver lives in mach-davinci/include/mach/. This is fine for now but
if we want to make davinci part of the multi_v5 build, no code external
to mach-davinci should include machine-specific headers.

Move the configuration structure to include/linux/platform_data.

While we're at it: convert the GPL-2.0 boilerplate to a proper SPDX
license identifier.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 arch/arm/mach-davinci/da850.c                 |  2 +-
 arch/arm/mach-davinci/include/mach/cpufreq.h  | 26 --------------------------
 drivers/cpufreq/davinci-cpufreq.c             |  5 +----
 include/linux/platform_data/davinci-cpufreq.h | 19 +++++++++++++++++++
 4 files changed, 21 insertions(+), 31 deletions(-)
 delete mode 100644 arch/arm/mach-davinci/include/mach/cpufreq.h
 create mode 100644 include/linux/platform_data/davinci-cpufreq.h

(limited to 'include')

diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index e7b78df2bfef..a02ff431ba47 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -21,6 +21,7 @@
 #include <linux/mfd/da8xx-cfgchip.h>
 #include <linux/platform_data/clk-da8xx-cfgchip.h>
 #include <linux/platform_data/clk-davinci-pll.h>
+#include <linux/platform_data/davinci-cpufreq.h>
 #include <linux/platform_data/gpio-davinci.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -29,7 +30,6 @@
 #include <asm/mach/map.h>
 
 #include <mach/common.h>
-#include <mach/cpufreq.h>
 #include <mach/cputype.h>
 #include <mach/da8xx.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-davinci/include/mach/cpufreq.h b/arch/arm/mach-davinci/include/mach/cpufreq.h
deleted file mode 100644
index 3c089cfb6cd6..000000000000
--- a/arch/arm/mach-davinci/include/mach/cpufreq.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * TI DaVinci CPUFreq platform support.
- *
- * Copyright (C) 2009 Texas Instruments, Inc. http://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#ifndef _MACH_DAVINCI_CPUFREQ_H
-#define _MACH_DAVINCI_CPUFREQ_H
-
-#include <linux/cpufreq.h>
-
-struct davinci_cpufreq_config {
-	struct cpufreq_frequency_table *freq_table;
-	int (*set_voltage) (unsigned int index);
-	int (*init) (void);
-};
-
-#endif
diff --git a/drivers/cpufreq/davinci-cpufreq.c b/drivers/cpufreq/davinci-cpufreq.c
index d54a27c99121..940fe85db97a 100644
--- a/drivers/cpufreq/davinci-cpufreq.c
+++ b/drivers/cpufreq/davinci-cpufreq.c
@@ -23,13 +23,10 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/platform_data/davinci-cpufreq.h>
 #include <linux/platform_device.h>
 #include <linux/export.h>
 
-#include <mach/hardware.h>
-#include <mach/cpufreq.h>
-#include <mach/common.h>
-
 struct davinci_cpufreq {
 	struct device *dev;
 	struct clk *armclk;
diff --git a/include/linux/platform_data/davinci-cpufreq.h b/include/linux/platform_data/davinci-cpufreq.h
new file mode 100644
index 000000000000..3fbf9f2793b5
--- /dev/null
+++ b/include/linux/platform_data/davinci-cpufreq.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TI DaVinci CPUFreq platform support.
+ *
+ * Copyright (C) 2009 Texas Instruments, Inc. http://www.ti.com/
+ */
+
+#ifndef _MACH_DAVINCI_CPUFREQ_H
+#define _MACH_DAVINCI_CPUFREQ_H
+
+#include <linux/cpufreq.h>
+
+struct davinci_cpufreq_config {
+	struct cpufreq_frequency_table *freq_table;
+	int (*set_voltage)(unsigned int index);
+	int (*init)(void);
+};
+
+#endif /* _MACH_DAVINCI_CPUFREQ_H */
-- 
cgit v1.2.3


From 99b9d7b4970cf131fd17a8f4ad4870049bd7a365 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 16 Feb 2019 00:39:13 +0200
Subject: habanalabs: add basic Goya support

This patch adds a basic support for the Goya device. The code initializes
the device's PCI controller and PCI bars. It also initializes various S/W
structures and adds some basic helper functions.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/Makefile            |   3 +
 drivers/misc/habanalabs/device.c            |  71 ++++
 drivers/misc/habanalabs/goya/Makefile       |   3 +
 drivers/misc/habanalabs/goya/goya.c         | 632 ++++++++++++++++++++++++++++
 drivers/misc/habanalabs/goya/goyaP.h        | 152 +++++++
 drivers/misc/habanalabs/habanalabs.h        | 137 ++++++
 drivers/misc/habanalabs/habanalabs_drv.c    |   3 +
 drivers/misc/habanalabs/include/goya/goya.h |  45 ++
 include/uapi/misc/habanalabs.h              |  20 +
 9 files changed, 1066 insertions(+)
 create mode 100644 drivers/misc/habanalabs/goya/Makefile
 create mode 100644 drivers/misc/habanalabs/goya/goya.c
 create mode 100644 drivers/misc/habanalabs/goya/goyaP.h
 create mode 100644 drivers/misc/habanalabs/include/goya/goya.h
 create mode 100644 include/uapi/misc/habanalabs.h

(limited to 'include')

diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 0910f7fa34ec..6f1ead69bd77 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -5,3 +5,6 @@
 obj-m	:= habanalabs.o
 
 habanalabs-y := habanalabs_drv.o device.o
+
+include $(src)/goya/Makefile
+habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 6189fd0e2ccd..a4feaa784db3 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -120,8 +120,11 @@ err_cdev_add:
  */
 static int device_early_init(struct hl_device *hdev)
 {
+	int rc;
+
 	switch (hdev->asic_type) {
 	case ASIC_GOYA:
+		goya_set_asic_funcs(hdev);
 		strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 		break;
 	default:
@@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev)
 		return -EINVAL;
 	}
 
+	rc = hdev->asic_funcs->early_init(hdev);
+	if (rc)
+		return rc;
+
 	return 0;
 }
 
@@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev)
  */
 static void device_early_fini(struct hl_device *hdev)
 {
+
+	if (hdev->asic_funcs->early_fini)
+		hdev->asic_funcs->early_fini(hdev);
+
 }
 
 /*
@@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev)
  */
 int hl_device_suspend(struct hl_device *hdev)
 {
+	int rc;
+
 	pci_save_state(hdev->pdev);
 
+	rc = hdev->asic_funcs->suspend(hdev);
+	if (rc)
+		dev_err(hdev->dev,
+			"Failed to disable PCI access of device CPU\n");
+
 	/* Shut down the device */
 	pci_disable_device(hdev->pdev);
 	pci_set_power_state(hdev->pdev, PCI_D3hot);
@@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev)
 		return rc;
 	}
 
+	rc = hdev->asic_funcs->resume(hdev);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to enable PCI access from device CPU\n");
+		return rc;
+	}
+
 	return 0;
 }
 
@@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	if (rc)
 		goto release_device;
 
+	/*
+	 * Start calling ASIC initialization. First S/W then H/W and finally
+	 * late init
+	 */
+	rc = hdev->asic_funcs->sw_init(hdev);
+	if (rc)
+		goto early_fini;
+
 	dev_notice(hdev->dev,
 		"Successfully added device to habanalabs driver\n");
 
 	return 0;
 
+early_fini:
+	device_early_fini(hdev);
 release_device:
 	device_destroy(hclass, hdev->dev->devt);
 	cdev_del(&hdev->cdev);
@@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Mark device as disabled */
 	hdev->disabled = true;
 
+	/* Call ASIC S/W finalize function */
+	hdev->asic_funcs->sw_fini(hdev);
+
 	device_early_fini(hdev);
 
 	/* Hide device from user */
@@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
 
 	return *val ? 0 : -ETIMEDOUT;
 }
+
+/*
+ * MMIO register access helper functions.
+ */
+
+/*
+ * hl_rreg - Read an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ *
+ * Returns the value of the MMIO register we are asked to read
+ *
+ */
+inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
+{
+	return readl(hdev->rmmio + reg);
+}
+
+/*
+ * hl_wreg - Write to an MMIO register
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @reg: MMIO register offset (in bytes)
+ * @val: 32-bit value
+ *
+ * Writes the 32-bit value into the MMIO register
+ *
+ */
+inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
+{
+	writel(val, hdev->rmmio + reg);
+}
diff --git a/drivers/misc/habanalabs/goya/Makefile b/drivers/misc/habanalabs/goya/Makefile
new file mode 100644
index 000000000000..38d43006386d
--- /dev/null
+++ b/drivers/misc/habanalabs/goya/Makefile
@@ -0,0 +1,3 @@
+subdir-ccflags-y += -I$(src)
+
+HL_GOYA_FILES :=  goya/goya.o
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
new file mode 100644
index 000000000000..ceaffa9afd83
--- /dev/null
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -0,0 +1,632 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "goyaP.h"
+#include "include/goya/asic_reg/goya_masks.h"
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/genalloc.h>
+
+/*
+ * GOYA security scheme:
+ *
+ * 1. Host is protected by:
+ *        - Range registers (When MMU is enabled, DMA RR does NOT protect host)
+ *        - MMU
+ *
+ * 2. DRAM is protected by:
+ *        - Range registers (protect the first 512MB)
+ *        - MMU (isolation between users)
+ *
+ * 3. Configuration is protected by:
+ *        - Range registers
+ *        - Protection bits
+ *
+ * When MMU is disabled:
+ *
+ * QMAN DMA: PQ, CQ, CP, DMA are secured.
+ * PQ, CB and the data are on the host.
+ *
+ * QMAN TPC/MME:
+ * PQ, CQ and CP are not secured.
+ * PQ, CB and the data are on the SRAM/DRAM.
+ *
+ * Since QMAN DMA is secured, KMD is parsing the DMA CB:
+ *     - KMD checks DMA pointer
+ *     - WREG, MSG_PROT are not allowed.
+ *     - MSG_LONG/SHORT are allowed.
+ *
+ * A read/write transaction by the QMAN to a protected area will succeed if
+ * and only if the QMAN's CP is secured and MSG_PROT is used
+ *
+ *
+ * When MMU is enabled:
+ *
+ * QMAN DMA: PQ, CQ and CP are secured.
+ * MMU is set to bypass on the Secure props register of the QMAN.
+ * The reasons we don't enable MMU for PQ, CQ and CP are:
+ *     - PQ entry is in kernel address space and KMD doesn't map it.
+ *     - CP writes to MSIX register and to kernel address space (completion
+ *       queue).
+ *
+ * DMA is not secured but because CP is secured, KMD still needs to parse the
+ * CB, but doesn't need to check the DMA addresses.
+ *
+ * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
+ * doesn't map memory in MMU.
+ *
+ * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
+ *
+ * DMA RR does NOT protect host because DMA is not secured
+ *
+ */
+
+#define GOYA_MMU_REGS_NUM		61
+
+#define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */
+
+#define GOYA_RESET_TIMEOUT_MSEC		500		/* 500ms */
+#define GOYA_PLDM_RESET_TIMEOUT_MSEC	20000		/* 20s */
+#define GOYA_RESET_WAIT_MSEC		1		/* 1ms */
+#define GOYA_CPU_RESET_WAIT_MSEC	100		/* 100ms */
+#define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
+#define GOYA_CPU_TIMEOUT_USEC		10000000	/* 10s */
+#define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
+
+#define GOYA_QMAN0_FENCE_VAL		0xD169B243
+
+#define GOYA_MAX_INITIATORS		20
+
+static void goya_get_fixed_properties(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+
+	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
+
+	prop->dram_base_address = DRAM_PHYS_BASE;
+	prop->dram_size = DRAM_PHYS_DEFAULT_SIZE;
+	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
+	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
+
+	prop->sram_base_address = SRAM_BASE_ADDR;
+	prop->sram_size = SRAM_SIZE;
+	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
+	prop->sram_user_base_address = prop->sram_base_address +
+						SRAM_USER_BASE_OFFSET;
+
+	prop->host_phys_base_address = HOST_PHYS_BASE;
+	prop->va_space_host_start_address = VA_HOST_SPACE_START;
+	prop->va_space_host_end_address = VA_HOST_SPACE_END;
+	prop->va_space_dram_start_address = VA_DDR_SPACE_START;
+	prop->va_space_dram_end_address = VA_DDR_SPACE_END;
+	prop->cfg_size = CFG_SIZE;
+	prop->max_asid = MAX_ASID;
+	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
+
+	prop->high_pll = PLL_HIGH_DEFAULT;
+}
+
+/*
+ * goya_pci_bars_map - Map PCI BARS of Goya device
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Request PCI regions and map them to kernel virtual addresses.
+ * Returns 0 on success
+ *
+ */
+int goya_pci_bars_map(struct hl_device *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	int rc;
+
+	rc = pci_request_regions(pdev, HL_NAME);
+	if (rc) {
+		dev_err(hdev->dev, "Cannot obtain PCI resources\n");
+		return rc;
+	}
+
+	hdev->pcie_bar[SRAM_CFG_BAR_ID] =
+			pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
+	if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
+		dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
+		rc = -ENODEV;
+		goto err_release_regions;
+	}
+
+	hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
+	if (!hdev->pcie_bar[MSIX_BAR_ID]) {
+		dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
+		rc = -ENODEV;
+		goto err_unmap_sram_cfg;
+	}
+
+	hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
+	if (!hdev->pcie_bar[DDR_BAR_ID]) {
+		dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
+		rc = -ENODEV;
+		goto err_unmap_msix;
+	}
+
+	hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
+				(CFG_BASE - SRAM_BASE_ADDR);
+
+	return 0;
+
+err_unmap_msix:
+	iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
+err_unmap_sram_cfg:
+	iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
+err_release_regions:
+	pci_release_regions(pdev);
+
+	return rc;
+}
+
+/*
+ * goya_pci_bars_unmap - Unmap PCI BARS of Goya device
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Release all PCI BARS and unmap their virtual addresses
+ *
+ */
+static void goya_pci_bars_unmap(struct hl_device *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+
+	iounmap(hdev->pcie_bar[DDR_BAR_ID]);
+	iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
+	iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
+	pci_release_regions(pdev);
+}
+
+/*
+ * goya_elbi_write - Write through the ELBI interface
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * return 0 on success, -1 on failure
+ *
+ */
+static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	ktime_t timeout;
+	u32 val;
+
+	/* Clear previous status */
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+	pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
+				PCI_CONFIG_ELBI_CTRL_WRITE);
+
+	timeout = ktime_add_ms(ktime_get(), 10);
+	for (;;) {
+		pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+		if (val & PCI_CONFIG_ELBI_STS_MASK)
+			break;
+		if (ktime_compare(ktime_get(), timeout) > 0) {
+			pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+						&val);
+			break;
+		}
+		usleep_range(300, 500);
+	}
+
+	if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
+		return 0;
+
+	if (val & PCI_CONFIG_ELBI_STS_ERR) {
+		dev_err(hdev->dev, "Error writing to ELBI\n");
+		return -EIO;
+	}
+
+	if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+		dev_err(hdev->dev, "ELBI write didn't finish in time\n");
+		return -EIO;
+	}
+
+	dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
+	return -EIO;
+}
+
+/*
+ * goya_iatu_write - iatu write routine
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
+{
+	u32 dbi_offset;
+	int rc;
+
+	dbi_offset = addr & 0xFFF;
+
+	rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
+	rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);
+
+	if (rc)
+		return -EIO;
+
+	return 0;
+}
+
+void goya_reset_link_through_bridge(struct hl_device *hdev)
+{
+	struct pci_dev *pdev = hdev->pdev;
+	struct pci_dev *parent_port;
+	u16 val;
+
+	parent_port = pdev->bus->self;
+	pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
+	val |= PCI_BRIDGE_CTL_BUS_RESET;
+	pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+	ssleep(1);
+
+	val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
+	pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+	ssleep(3);
+}
+
+/*
+ * goya_set_ddr_bar_base - set DDR bar to map specific device address
+ *
+ * @hdev: pointer to hl_device structure
+ * @addr: address in DDR. Must be aligned to DDR bar size
+ *
+ * This function configures the iATU so that the DDR bar will start at the
+ * specified addr.
+ *
+ */
+static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	int rc;
+
+	if ((goya) && (goya->ddr_bar_cur_addr == addr))
+		return 0;
+
+	/* Inbound Region 1 - Bar 4 - Point to DDR */
+	rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
+	rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
+	rc |= goya_iatu_write(hdev, 0x300, 0);
+	/* Enable + Bar match + match enable + Bar 4 */
+	rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);
+
+	/* Return the DBI window to the default location */
+	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
+	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
+		return -EIO;
+	}
+
+	if (goya)
+		goya->ddr_bar_cur_addr = addr;
+
+	return 0;
+}
+
+/*
+ * goya_init_iatu - Initialize the iATU unit inside the PCI controller
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * This is needed in case the firmware doesn't initialize the iATU
+ *
+ */
+static int goya_init_iatu(struct hl_device *hdev)
+{
+	int rc;
+
+	/* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
+	rc  = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
+	rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
+	rc |= goya_iatu_write(hdev, 0x100, 0);
+	/* Enable + Bar match + match enable */
+	rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);
+
+	/* Inbound Region 1 - Bar 4 - Point to DDR */
+	rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
+
+	/* Outbound Region 0 - Point to Host */
+	rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
+	rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
+	rc |= goya_iatu_write(hdev, 0x010,
+		lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
+	rc |= goya_iatu_write(hdev, 0x014, 0);
+	rc |= goya_iatu_write(hdev, 0x018, 0);
+	rc |= goya_iatu_write(hdev, 0x020,
+		upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
+	/* Increase region size */
+	rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
+	/* Enable */
+	rc |= goya_iatu_write(hdev, 0x004, 0x80000000);
+
+	/* Return the DBI window to the default location */
+	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
+	rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
+
+	if (rc)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * goya_early_init - GOYA early initialization code
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Verify PCI bars
+ * Set DMA masks
+ * PCI controller initialization
+ * Map PCI bars
+ *
+ */
+static int goya_early_init(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct pci_dev *pdev = hdev->pdev;
+	u32 val;
+	int rc;
+
+	goya_get_fixed_properties(hdev);
+
+	/* Check BAR sizes */
+	if (pci_resource_len(pdev, SRAM_CFG_BAR_ID) != CFG_BAR_SIZE) {
+		dev_err(hdev->dev,
+			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+			SRAM_CFG_BAR_ID,
+			(unsigned long long) pci_resource_len(pdev,
+							SRAM_CFG_BAR_ID),
+			CFG_BAR_SIZE);
+		return -ENODEV;
+	}
+
+	if (pci_resource_len(pdev, MSIX_BAR_ID) != MSIX_BAR_SIZE) {
+		dev_err(hdev->dev,
+			"Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
+			MSIX_BAR_ID,
+			(unsigned long long) pci_resource_len(pdev,
+								MSIX_BAR_ID),
+			MSIX_BAR_SIZE);
+		return -ENODEV;
+	}
+
+	prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
+
+	/* set DMA mask for GOYA */
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (rc) {
+		dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_err(hdev->dev,
+				"Unable to set pci dma mask to 32 bits\n");
+			return rc;
+		}
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (rc) {
+		dev_warn(hdev->dev,
+			"Unable to set pci consistent dma mask to 39 bits\n");
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc) {
+			dev_err(hdev->dev,
+				"Unable to set pci consistent dma mask to 32 bits\n");
+			return rc;
+		}
+	}
+
+	if (hdev->reset_pcilink)
+		goya_reset_link_through_bridge(hdev);
+
+	rc = pci_enable_device_mem(pdev);
+	if (rc) {
+		dev_err(hdev->dev, "can't enable PCI device\n");
+		return rc;
+	}
+
+	pci_set_master(pdev);
+
+	rc = goya_init_iatu(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to initialize iATU\n");
+		goto disable_device;
+	}
+
+	rc = goya_pci_bars_map(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
+		goto disable_device;
+	}
+
+	val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
+	if (val & PSOC_GLOBAL_CONF_BOOT_STRAP_PINS_SRIOV_EN_MASK)
+		dev_warn(hdev->dev,
+			"PCI strap is not configured correctly, PCI bus errors may occur\n");
+
+	return 0;
+
+disable_device:
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+
+	return rc;
+}
+
+/*
+ * goya_early_fini - GOYA early finalization code
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Unmap PCI bars
+ *
+ */
+int goya_early_fini(struct hl_device *hdev)
+{
+	goya_pci_bars_unmap(hdev);
+
+	pci_clear_master(hdev->pdev);
+	pci_disable_device(hdev->pdev);
+
+	return 0;
+}
+
+/*
+ * goya_sw_init - Goya software initialization code
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static int goya_sw_init(struct hl_device *hdev)
+{
+	struct goya_device *goya;
+	int rc;
+
+	/* Allocate device structure */
+	goya = kzalloc(sizeof(*goya), GFP_KERNEL);
+	if (!goya)
+		return -ENOMEM;
+
+	/* according to goya_init_iatu */
+	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
+	hdev->asic_specific = goya;
+
+	/* Create DMA pool for small allocations */
+	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
+			&hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0);
+	if (!hdev->dma_pool) {
+		dev_err(hdev->dev, "failed to create DMA pool\n");
+		rc = -ENOMEM;
+		goto free_goya_device;
+	}
+
+	hdev->cpu_accessible_dma_mem =
+			hdev->asic_funcs->dma_alloc_coherent(hdev,
+					CPU_ACCESSIBLE_MEM_SIZE,
+					&hdev->cpu_accessible_dma_address,
+					GFP_KERNEL | __GFP_ZERO);
+
+	if (!hdev->cpu_accessible_dma_mem) {
+		dev_err(hdev->dev,
+			"failed to allocate %d of dma memory for CPU accessible memory space\n",
+			CPU_ACCESSIBLE_MEM_SIZE);
+		rc = -ENOMEM;
+		goto free_dma_pool;
+	}
+
+	hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
+	if (!hdev->cpu_accessible_dma_pool) {
+		dev_err(hdev->dev,
+			"Failed to create CPU accessible DMA pool\n");
+		rc = -ENOMEM;
+		goto free_cpu_pq_dma_mem;
+	}
+
+	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
+				(uintptr_t) hdev->cpu_accessible_dma_mem,
+				CPU_ACCESSIBLE_MEM_SIZE, -1);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to add memory to CPU accessible DMA pool\n");
+		rc = -EFAULT;
+		goto free_cpu_pq_pool;
+	}
+
+	spin_lock_init(&goya->hw_queues_lock);
+
+	return 0;
+
+free_cpu_pq_pool:
+	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
+free_cpu_pq_dma_mem:
+	hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
+			hdev->cpu_accessible_dma_mem,
+			hdev->cpu_accessible_dma_address);
+free_dma_pool:
+	dma_pool_destroy(hdev->dma_pool);
+free_goya_device:
+	kfree(goya);
+
+	return rc;
+}
+
+/*
+ * goya_sw_fini - Goya software tear-down code
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+int goya_sw_fini(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
+
+	hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
+			hdev->cpu_accessible_dma_mem,
+			hdev->cpu_accessible_dma_address);
+
+	dma_pool_destroy(hdev->dma_pool);
+
+	kfree(goya);
+
+	return 0;
+}
+
+int goya_suspend(struct hl_device *hdev)
+{
+	return 0;
+}
+
+int goya_resume(struct hl_device *hdev)
+{
+	return 0;
+}
+
+void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flags)
+{
+	return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
+}
+
+void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
+				dma_addr_t dma_handle)
+{
+	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
+}
+
+static const struct hl_asic_funcs goya_funcs = {
+	.early_init = goya_early_init,
+	.early_fini = goya_early_fini,
+	.sw_init = goya_sw_init,
+	.sw_fini = goya_sw_fini,
+	.suspend = goya_suspend,
+	.resume = goya_resume,
+	.dma_alloc_coherent = goya_dma_alloc_coherent,
+	.dma_free_coherent = goya_dma_free_coherent,
+};
+
+/*
+ * goya_set_asic_funcs - set Goya function pointers
+ *
+ * @*hdev: pointer to hl_device structure
+ *
+ */
+void goya_set_asic_funcs(struct hl_device *hdev)
+{
+	hdev->asic_funcs = &goya_funcs;
+}
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
new file mode 100644
index 000000000000..6a78976e2098
--- /dev/null
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef GOYAP_H_
+#define GOYAP_H_
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+#include "include/goya/goya.h"
+
+#define NUMBER_OF_CMPLT_QUEUES		5
+#define NUMBER_OF_EXT_HW_QUEUES		5
+#define NUMBER_OF_CPU_HW_QUEUES		1
+#define NUMBER_OF_INT_HW_QUEUES		9
+#define NUMBER_OF_HW_QUEUES		(NUMBER_OF_EXT_HW_QUEUES + \
+					NUMBER_OF_CPU_HW_QUEUES + \
+					NUMBER_OF_INT_HW_QUEUES)
+
+/*
+ * Number of MSIX interrupts IDS:
+ * Each completion queue has 1 ID
+ * The event queue has 1 ID
+ */
+#define NUMBER_OF_INTERRUPTS		(NUMBER_OF_CMPLT_QUEUES + 1)
+
+#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
+#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
+#endif
+
+#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
+#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
+#endif
+
+#define QMAN_FENCE_TIMEOUT_USEC		10000	/* 10 ms */
+
+#define QMAN_STOP_TIMEOUT_USEC		100000	/* 100 ms */
+
+#define TPC_ENABLED_MASK		0xFF
+
+#define PLL_HIGH_DEFAULT		1575000000	/* 1.575 GHz */
+
+#define GOYA_ARMCP_INFO_TIMEOUT		10000000	/* 10s */
+
+#define DRAM_PHYS_DEFAULT_SIZE		0x100000000ull	/* 4GB */
+
+/* DRAM Memory Map */
+
+#define CPU_FW_IMAGE_SIZE	0x10000000	/* 256MB */
+#define MMU_PAGE_TABLES_SIZE	0x0E000000	/* 224MB */
+#define MMU_CACHE_MNG_SIZE	0x00001000	/* 4KB */
+#define CPU_PQ_PKT_SIZE		0x00001000	/* 4KB */
+#define CPU_PQ_DATA_SIZE	0x01FFE000	/* 32MB - 8KB  */
+
+#define CPU_FW_IMAGE_ADDR	DRAM_PHYS_BASE
+#define MMU_PAGE_TABLES_ADDR	(CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
+#define MMU_CACHE_MNG_ADDR	(MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
+#define CPU_PQ_PKT_ADDR		(MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE)
+#define CPU_PQ_DATA_ADDR	(CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
+#define DRAM_BASE_ADDR_USER	(CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
+
+#if (DRAM_BASE_ADDR_USER != 0x20000000)
+#error "KMD must reserve 512MB"
+#endif
+
+/*
+ * SRAM Memory Map for KMD
+ *
+ * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
+ * MME/TPC QMANs
+ *
+ */
+
+#define MME_QMAN_BASE_OFFSET	0x000000	/* Must be 0 */
+#define MME_QMAN_LENGTH		64
+#define TPC_QMAN_LENGTH		64
+
+#define TPC0_QMAN_BASE_OFFSET	(MME_QMAN_BASE_OFFSET + \
+				(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC1_QMAN_BASE_OFFSET	(TPC0_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC2_QMAN_BASE_OFFSET	(TPC1_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC3_QMAN_BASE_OFFSET	(TPC2_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC4_QMAN_BASE_OFFSET	(TPC3_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC5_QMAN_BASE_OFFSET	(TPC4_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC6_QMAN_BASE_OFFSET	(TPC5_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+#define TPC7_QMAN_BASE_OFFSET	(TPC6_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+
+#define SRAM_KMD_RES_OFFSET	(TPC7_QMAN_BASE_OFFSET + \
+				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
+
+#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
+#error "MME/TPC QMANs SRAM space exceeds limit"
+#endif
+
+#define SRAM_USER_BASE_OFFSET	GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START
+
+/* Virtual address space */
+#define VA_HOST_SPACE_START	0x1000000000000ull	/* 256TB */
+#define VA_HOST_SPACE_END	0x3FF8000000000ull	/* 1PB - 1TB */
+#define VA_HOST_SPACE_SIZE	(VA_HOST_SPACE_END - \
+					VA_HOST_SPACE_START) /* 767TB */
+
+#define VA_DDR_SPACE_START	0x800000000ull		/* 32GB */
+#define VA_DDR_SPACE_END	0x2000000000ull		/* 128GB */
+#define VA_DDR_SPACE_SIZE	(VA_DDR_SPACE_END - \
+					VA_DDR_SPACE_START)	/* 128GB */
+
+#define DMA_MAX_TRANSFER_SIZE	0xFFFFFFFF
+
+#define HW_CAP_PLL		0x00000001
+#define HW_CAP_DDR_0		0x00000002
+#define HW_CAP_DDR_1		0x00000004
+#define HW_CAP_MME		0x00000008
+#define HW_CAP_CPU		0x00000010
+#define HW_CAP_DMA		0x00000020
+#define HW_CAP_MSIX		0x00000040
+#define HW_CAP_CPU_Q		0x00000080
+#define HW_CAP_MMU		0x00000100
+#define HW_CAP_TPC_MBIST	0x00000200
+#define HW_CAP_GOLDEN		0x00000400
+#define HW_CAP_TPC		0x00000800
+
+#define CPU_PKT_SHIFT		5
+#define CPU_PKT_SIZE		(1 << CPU_PKT_SHIFT)
+#define CPU_PKT_MASK		(~((1 << CPU_PKT_SHIFT) - 1))
+#define CPU_MAX_PKTS_IN_CB	32
+#define CPU_CB_SIZE		(CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB)
+#define CPU_ACCESSIBLE_MEM_SIZE	(HL_QUEUE_LENGTH * CPU_CB_SIZE)
+
+enum goya_fw_component {
+	FW_COMP_UBOOT,
+	FW_COMP_PREBOOT
+};
+
+struct goya_device {
+	/* TODO: remove hw_queues_lock after moving to scheduler code */
+	spinlock_t	hw_queues_lock;
+	u64		ddr_bar_cur_addr;
+	u32		hw_cap_initialized;
+};
+
+#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index fa628b05db13..5076e680a73c 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -14,9 +14,62 @@
 
 #define HL_NAME				"habanalabs"
 
+#define HL_MAX_QUEUES			128
+
 struct hl_device;
 
 
+/**
+ * struct asic_fixed_properties - ASIC specific immutable properties.
+ * @sram_base_address: SRAM physical start address.
+ * @sram_end_address: SRAM physical end address.
+ * @sram_user_base_address - SRAM physical start address for user access.
+ * @dram_base_address: DRAM physical start address.
+ * @dram_end_address: DRAM physical end address.
+ * @dram_user_base_address: DRAM physical start address for user access.
+ * @dram_size: DRAM total size.
+ * @dram_pci_bar_size: size of PCI bar towards DRAM.
+ * @host_phys_base_address: base physical address of host memory for
+ *				transactions that the device generates.
+ * @va_space_host_start_address: base address of virtual memory range for
+ *                               mapping host memory.
+ * @va_space_host_end_address: end address of virtual memory range for
+ *                             mapping host memory.
+ * @va_space_dram_start_address: base address of virtual memory range for
+ *                               mapping DRAM memory.
+ * @va_space_dram_end_address: end address of virtual memory range for
+ *                             mapping DRAM memory.
+ * @cfg_size: configuration space size on SRAM.
+ * @sram_size: total size of SRAM.
+ * @max_asid: maximum number of open contexts (ASIDs).
+ * @completion_queues_count: number of completion queues.
+ * @high_pll: high PLL frequency used by the device.
+ * @tpc_enabled_mask: which TPCs are enabled.
+ */
+struct asic_fixed_properties {
+	u64			sram_base_address;
+	u64			sram_end_address;
+	u64			sram_user_base_address;
+	u64			dram_base_address;
+	u64			dram_end_address;
+	u64			dram_user_base_address;
+	u64			dram_size;
+	u64			dram_pci_bar_size;
+	u64			host_phys_base_address;
+	u64			va_space_host_start_address;
+	u64			va_space_host_end_address;
+	u64			va_space_dram_start_address;
+	u64			va_space_dram_end_address;
+	u32			cfg_size;
+	u32			sram_size;
+	u32			max_asid;
+	u32			high_pll;
+	u8			completion_queues_count;
+	u8			tpc_enabled_mask;
+};
+
+
+#define HL_QUEUE_LENGTH			256
 /*
  * ASICs
  */
@@ -33,6 +86,36 @@ enum hl_asic_type {
 	ASIC_INVALID
 };
 
+/**
+ * struct hl_asic_funcs - ASIC specific functions that are can be called from
+ *                        common code.
+ * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
+ * @early_fini: tears down what was done in early_init.
+ * @sw_init: sets up driver state, does not configure H/W.
+ * @sw_fini: tears down driver state, does not configure H/W.
+ * @suspend: handles IP specific H/W or SW changes for suspend.
+ * @resume: handles IP specific H/W or SW changes for resume.
+ * @dma_alloc_coherent: Allocate coherent DMA memory by calling
+ *                      dma_alloc_coherent(). This is ASIC function because its
+ *                      implementation is not trivial when the driver is loaded
+ *                      in simulation mode (not upstreamed).
+ * @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
+ *                     This is ASIC function because its implementation is not
+ *                     trivial when the driver is loaded in simulation mode
+ *                     (not upstreamed).
+ */
+struct hl_asic_funcs {
+	int (*early_init)(struct hl_device *hdev);
+	int (*early_fini)(struct hl_device *hdev);
+	int (*sw_init)(struct hl_device *hdev);
+	int (*sw_fini)(struct hl_device *hdev);
+	int (*suspend)(struct hl_device *hdev);
+	int (*resume)(struct hl_device *hdev);
+	void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flag);
+	void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
+					void *cpu_addr, dma_addr_t dma_handle);
+};
 
 /*
  * FILE PRIVATE STRUCTURE
@@ -62,26 +145,78 @@ struct hl_fpriv {
  */
 #define HL_MAX_MINORS	256
 
+/*
+ * Registers read & write functions.
+ */
+
+u32 hl_rreg(struct hl_device *hdev, u32 reg);
+void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
+
+#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
+	readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)
+
+#define RREG32(reg) hl_rreg(hdev, (reg))
+#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
+#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n",	\
+				hl_rreg(hdev, (reg)))
+
+#define WREG32_P(reg, val, mask)				\
+	do {							\
+		u32 tmp_ = RREG32(reg);				\
+		tmp_ &= (mask);					\
+		tmp_ |= ((val) & ~(mask));			\
+		WREG32(reg, tmp_);				\
+	} while (0)
+#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
+
+#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
+#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
+#define WREG32_FIELD(reg, field, val)	\
+	WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
+			(val) << REG_FIELD_SHIFT(reg, field))
+
 /**
  * struct hl_device - habanalabs device structure.
  * @pdev: pointer to PCI device, can be NULL in case of simulator device.
+ * @pcie_bar: array of available PCIe bars.
+ * @rmmio: configuration area address on SRAM.
  * @cdev: related char device.
  * @dev: realted kernel basic device structure.
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
+ * @dma_pool: DMA pool for small allocations.
+ * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
+ * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
+ * @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
+ * @asic_prop: ASIC specific immutable properties.
+ * @asic_funcs: ASIC specific functions.
+ * @asic_specific: ASIC specific information to use only from ASIC files.
  * @major: habanalabs KMD major.
  * @id: device minor.
  * @disabled: is device disabled.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
+	void __iomem			*pcie_bar[6];
+	void __iomem			*rmmio;
 	struct cdev			cdev;
 	struct device			*dev;
 	char				asic_name[16];
 	enum hl_asic_type		asic_type;
+	struct dma_pool			*dma_pool;
+	void				*cpu_accessible_dma_mem;
+	dma_addr_t			cpu_accessible_dma_address;
+	struct gen_pool			*cpu_accessible_dma_pool;
+	struct asic_fixed_properties	asic_prop;
+	const struct hl_asic_funcs	*asic_funcs;
+	void				*asic_specific;
 	u32				major;
 	u16				id;
 	u8				disabled;
+
+	/* Parameters for bring-up */
+	u8				reset_pcilink;
 };
 
 
@@ -128,4 +263,6 @@ void hl_device_fini(struct hl_device *hdev);
 int hl_device_suspend(struct hl_device *hdev);
 int hl_device_resume(struct hl_device *hdev);
 
+void goya_set_asic_funcs(struct hl_device *hdev);
+
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index a5251ed277d1..edf626b2b7b1 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -122,6 +122,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 
 	hdev->major = hl_major;
 
+	/* Parameters for bring-up - set them to defaults */
+	hdev->reset_pcilink = 0;
+
 	hdev->disabled = true;
 	hdev->pdev = pdev; /* can be NULL in case of simulator device */
 
diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h
new file mode 100644
index 000000000000..614149efa412
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/goya.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef GOYA_H
+#define GOYA_H
+
+#include "asic_reg/goya_regs.h"
+
+#include <linux/types.h>
+
+#define SRAM_CFG_BAR_ID		0
+#define MSIX_BAR_ID		2
+#define DDR_BAR_ID		4
+
+#define CFG_BAR_SIZE		0x10000000ull		/* 256MB */
+#define MSIX_BAR_SIZE		0x1000ull		/* 4KB */
+
+#define CFG_BASE		0x7FFC000000ull
+#define CFG_SIZE		0x4000000		/* 32MB CFG + 32MB DBG*/
+
+#define SRAM_BASE_ADDR		0x7FF0000000ull
+#define SRAM_SIZE		0x32A0000		/* 50.625MB */
+
+#define DRAM_PHYS_BASE		0x0ull
+
+#define HOST_PHYS_BASE		0x8000000000ull		/* 0.5TB */
+#define HOST_PHYS_SIZE		0x1000000000000ull	/* 0.25PB (48 bits) */
+
+#define GOYA_MSIX_ENTRIES	8
+
+#define QMAN_PQ_ENTRY_SIZE	16			/* Bytes */
+
+#define MAX_ASID		1024
+
+#define PROT_BITS_OFFS		0xF80
+
+#define DMA_MAX_NUM		5
+
+#define TPC_MAX_NUM		8
+
+#endif /* GOYA_H */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
new file mode 100644
index 000000000000..a0ec23adf8f5
--- /dev/null
+++ b/include/uapi/misc/habanalabs.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef HABANALABS_H_
+#define HABANALABS_H_
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/*
+ * Defines that are asic-specific but constitutes as ABI between kernel driver
+ * and userspace
+ */
+#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START	0x8000	/* 32KB */
+
+#endif /* HABANALABS_H_ */
-- 
cgit v1.2.3


From be5d926b5c10430671ae975b80efb7a5652e3f9a Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 16 Feb 2019 00:39:15 +0200
Subject: habanalabs: add command buffer module

This patch adds the command buffer (CB) module, which allows the user to
create and destroy CBs and to map them to the user's process
address-space.

A command buffer is a memory blocks that reside in DMA-able address-space
and is physically contiguous so it can be accessed by the device without
MMU translation. The command buffer memory is allocated using the
coherent DMA API.

When creating a new CB, the IOCTL returns a handle of it, and the
user-space process needs to use that handle to mmap the buffer to get a VA
in the user's address-space.

Before destroying (freeing) a CB, the user must unmap the CB's VA using the
CB handle.

Each CB has a reference counter, which tracks its usage in command
submissions and also its mmaps (only a single mmap is allowed).

The driver maintains a pool of pre-allocated CBs in order to reduce
latency during command submissions. In case the pool is empty, the driver
will go to the slow-path of allocating a new CB, i.e. calling
dma_alloc_coherent.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/Makefile           |   3 +-
 drivers/misc/habanalabs/command_buffer.c   | 433 +++++++++++++++++++++++++++++
 drivers/misc/habanalabs/device.c           |  43 ++-
 drivers/misc/habanalabs/goya/goya.c        |  28 ++
 drivers/misc/habanalabs/habanalabs.h       |  86 ++++++
 drivers/misc/habanalabs/habanalabs_drv.c   |   2 +
 drivers/misc/habanalabs/habanalabs_ioctl.c |  99 +++++++
 include/uapi/misc/habanalabs.h             |  46 +++
 8 files changed, 738 insertions(+), 2 deletions(-)
 create mode 100644 drivers/misc/habanalabs/command_buffer.c
 create mode 100644 drivers/misc/habanalabs/habanalabs_ioctl.c

(limited to 'include')

diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 3ffbadc2ca01..2530c9b78ca4 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -4,7 +4,8 @@
 
 obj-m	:= habanalabs.o
 
-habanalabs-y := habanalabs_drv.o device.o context.o asid.o
+habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
+		command_buffer.o
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c
new file mode 100644
index 000000000000..e659ca3035e4
--- /dev/null
+++ b/drivers/misc/habanalabs/command_buffer.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
+{
+	hdev->asic_funcs->dma_free_coherent(hdev, cb->size,
+			(void *) (uintptr_t) cb->kernel_address,
+			cb->bus_address);
+	kfree(cb);
+}
+
+static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb)
+{
+	if (cb->is_pool) {
+		spin_lock(&hdev->cb_pool_lock);
+		list_add(&cb->pool_list, &hdev->cb_pool);
+		spin_unlock(&hdev->cb_pool_lock);
+	} else {
+		cb_fini(hdev, cb);
+	}
+}
+
+static void cb_release(struct kref *ref)
+{
+	struct hl_device *hdev;
+	struct hl_cb *cb;
+
+	cb = container_of(ref, struct hl_cb, refcount);
+	hdev = cb->hdev;
+
+	cb_do_release(hdev, cb);
+}
+
+static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
+					int ctx_id)
+{
+	struct hl_cb *cb;
+	void *p;
+
+	/*
+	 * We use of GFP_ATOMIC here because this function can be called from
+	 * the latency-sensitive code path for command submission. Due to H/W
+	 * limitations in some of the ASICs, the kernel must copy the user CB
+	 * that is designated for an external queue and actually enqueue
+	 * the kernel's copy. Hence, we must never sleep in this code section
+	 * and must use GFP_ATOMIC for all memory allocations.
+	 */
+	if (ctx_id == HL_KERNEL_ASID_ID)
+		cb = kzalloc(sizeof(*cb), GFP_ATOMIC);
+	else
+		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+
+	if (!cb)
+		return NULL;
+
+	if (ctx_id == HL_KERNEL_ASID_ID)
+		p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
+						&cb->bus_address, GFP_ATOMIC);
+	else
+		p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
+						&cb->bus_address,
+						GFP_USER | __GFP_ZERO);
+	if (!p) {
+		dev_err(hdev->dev,
+			"failed to allocate %d of dma memory for CB\n",
+			cb_size);
+		kfree(cb);
+		return NULL;
+	}
+
+	cb->kernel_address = (u64) (uintptr_t) p;
+	cb->size = cb_size;
+
+	return cb;
+}
+
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+			u32 cb_size, u64 *handle, int ctx_id)
+{
+	struct hl_cb *cb;
+	bool alloc_new_cb = true;
+	int rc;
+
+	if (hdev->disabled) {
+		dev_warn_ratelimited(hdev->dev,
+			"Device is disabled. Can't create new CBs\n");
+		rc = -EBUSY;
+		goto out_err;
+	}
+
+	if (cb_size > HL_MAX_CB_SIZE) {
+		dev_err(hdev->dev,
+			"CB size %d must be less then %d\n",
+			cb_size, HL_MAX_CB_SIZE);
+		rc = -EINVAL;
+		goto out_err;
+	}
+
+	/* Minimum allocation must be PAGE SIZE */
+	if (cb_size < PAGE_SIZE)
+		cb_size = PAGE_SIZE;
+
+	if (ctx_id == HL_KERNEL_ASID_ID &&
+			cb_size <= hdev->asic_prop.cb_pool_cb_size) {
+
+		spin_lock(&hdev->cb_pool_lock);
+		if (!list_empty(&hdev->cb_pool)) {
+			cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
+					pool_list);
+			list_del(&cb->pool_list);
+			spin_unlock(&hdev->cb_pool_lock);
+			alloc_new_cb = false;
+		} else {
+			spin_unlock(&hdev->cb_pool_lock);
+			dev_dbg(hdev->dev, "CB pool is empty\n");
+		}
+	}
+
+	if (alloc_new_cb) {
+		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
+		if (!cb) {
+			rc = -ENOMEM;
+			goto out_err;
+		}
+	}
+
+	cb->hdev = hdev;
+	cb->ctx_id = ctx_id;
+
+	spin_lock(&mgr->cb_lock);
+	rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
+	spin_unlock(&mgr->cb_lock);
+
+	if (rc < 0) {
+		dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n");
+		goto release_cb;
+	}
+
+	cb->id = rc;
+
+	kref_init(&cb->refcount);
+	spin_lock_init(&cb->lock);
+
+	/*
+	 * idr is 32-bit so we can safely OR it with a mask that is above
+	 * 32 bit
+	 */
+	*handle = cb->id | HL_MMAP_CB_MASK;
+	*handle <<= PAGE_SHIFT;
+
+	return 0;
+
+release_cb:
+	cb_do_release(hdev, cb);
+out_err:
+	*handle = 0;
+
+	return rc;
+}
+
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle)
+{
+	struct hl_cb *cb;
+	u32 handle;
+	int rc = 0;
+
+	/*
+	 * handle was given to user to do mmap, I need to shift it back to
+	 * how the idr module gave it to me
+	 */
+	cb_handle >>= PAGE_SHIFT;
+	handle = (u32) cb_handle;
+
+	spin_lock(&mgr->cb_lock);
+
+	cb = idr_find(&mgr->cb_handles, handle);
+	if (cb) {
+		idr_remove(&mgr->cb_handles, handle);
+		spin_unlock(&mgr->cb_lock);
+		kref_put(&cb->refcount, cb_release);
+	} else {
+		spin_unlock(&mgr->cb_lock);
+		dev_err(hdev->dev,
+			"CB destroy failed, no match to handle 0x%x\n", handle);
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	union hl_cb_args *args = data;
+	struct hl_device *hdev = hpriv->hdev;
+	u64 handle;
+	int rc;
+
+	switch (args->in.op) {
+	case HL_CB_OP_CREATE:
+		rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size,
+					&handle, hpriv->ctx->asid);
+		memset(args, 0, sizeof(*args));
+		args->out.cb_handle = handle;
+		break;
+	case HL_CB_OP_DESTROY:
+		rc = hl_cb_destroy(hdev, &hpriv->cb_mgr,
+					args->in.cb_handle);
+		break;
+	default:
+		rc = -ENOTTY;
+		break;
+	}
+
+	return rc;
+}
+
+static void cb_vm_close(struct vm_area_struct *vma)
+{
+	struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data;
+
+	cb->mmap_size -= vma->vm_end - vma->vm_start;
+
+	if (cb->mmap_size)
+		return;
+
+	spin_lock(&cb->lock);
+	cb->mmap = false;
+	spin_unlock(&cb->lock);
+
+	hl_cb_put(cb);
+	vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct cb_vm_ops = {
+	.close = cb_vm_close
+};
+
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_cb *cb;
+	phys_addr_t address;
+	u32 handle;
+	int rc;
+
+	handle = vma->vm_pgoff;
+
+	/* reference was taken here */
+	cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle);
+	if (!cb) {
+		dev_err(hdev->dev,
+			"CB mmap failed, no match to handle %d\n", handle);
+		return -EINVAL;
+	}
+
+	/* Validation check */
+	if ((vma->vm_end - vma->vm_start) != cb->size) {
+		dev_err(hdev->dev,
+			"CB mmap failed, mmap size 0x%lx != 0x%x cb size\n",
+			vma->vm_end - vma->vm_start, cb->size);
+		rc = -EINVAL;
+		goto put_cb;
+	}
+
+	spin_lock(&cb->lock);
+
+	if (cb->mmap) {
+		dev_err(hdev->dev,
+			"CB mmap failed, CB already mmaped to user\n");
+		rc = -EINVAL;
+		goto release_lock;
+	}
+
+	cb->mmap = true;
+
+	spin_unlock(&cb->lock);
+
+	vma->vm_ops = &cb_vm_ops;
+
+	/*
+	 * Note: We're transferring the cb reference to
+	 * vma->vm_private_data here.
+	 */
+
+	vma->vm_private_data = cb;
+
+	/* Calculate address for CB */
+	address = virt_to_phys((void *) (uintptr_t) cb->kernel_address);
+
+	rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+					address, cb->size);
+
+	if (rc) {
+		spin_lock(&cb->lock);
+		cb->mmap = false;
+		goto release_lock;
+	}
+
+	cb->mmap_size = cb->size;
+
+	return 0;
+
+release_lock:
+	spin_unlock(&cb->lock);
+put_cb:
+	hl_cb_put(cb);
+	return rc;
+}
+
+struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr,
+			u32 handle)
+{
+	struct hl_cb *cb;
+
+	spin_lock(&mgr->cb_lock);
+	cb = idr_find(&mgr->cb_handles, handle);
+
+	if (!cb) {
+		spin_unlock(&mgr->cb_lock);
+		dev_warn(hdev->dev,
+			"CB get failed, no match to handle %d\n", handle);
+		return NULL;
+	}
+
+	kref_get(&cb->refcount);
+
+	spin_unlock(&mgr->cb_lock);
+
+	return cb;
+
+}
+
+void hl_cb_put(struct hl_cb *cb)
+{
+	kref_put(&cb->refcount, cb_release);
+}
+
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr)
+{
+	spin_lock_init(&mgr->cb_lock);
+	idr_init(&mgr->cb_handles);
+}
+
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr)
+{
+	struct hl_cb *cb;
+	struct idr *idp;
+	u32 id;
+
+	idp = &mgr->cb_handles;
+
+	idr_for_each_entry(idp, cb, id) {
+		if (kref_put(&cb->refcount, cb_release) != 1)
+			dev_err(hdev->dev,
+				"CB %d for CTX ID %d is still alive\n",
+				id, cb->ctx_id);
+	}
+
+	idr_destroy(&mgr->cb_handles);
+}
+
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
+{
+	u64 cb_handle;
+	struct hl_cb *cb;
+	int rc;
+
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
+			HL_KERNEL_ASID_ID);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc);
+		return NULL;
+	}
+
+	cb_handle >>= PAGE_SHIFT;
+	cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
+	/* hl_cb_get should never fail here so use kernel WARN */
+	WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
+	if (!cb)
+		goto destroy_cb;
+
+	return cb;
+
+destroy_cb:
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT);
+
+	return NULL;
+}
+
+int hl_cb_pool_init(struct hl_device *hdev)
+{
+	struct hl_cb *cb;
+	int i;
+
+	INIT_LIST_HEAD(&hdev->cb_pool);
+	spin_lock_init(&hdev->cb_pool_lock);
+
+	for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
+		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
+				HL_KERNEL_ASID_ID);
+		if (cb) {
+			cb->is_pool = true;
+			list_add(&cb->pool_list, &hdev->cb_pool);
+		} else {
+			hl_cb_pool_fini(hdev);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+int hl_cb_pool_fini(struct hl_device *hdev)
+{
+	struct hl_cb *cb, *tmp;
+
+	list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) {
+		list_del(&cb->pool_list);
+		cb_fini(hdev, cb);
+	}
+
+	return 0;
+}
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 2423588ecf22..77f0018e7aa9 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -52,6 +52,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 {
 	struct hl_fpriv *hpriv = filp->private_data;
 
+	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 
 	filp->private_data = NULL;
@@ -61,10 +62,34 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+/*
+ * hl_mmap - mmap function for habanalabs device
+ *
+ * @*filp: pointer to file structure
+ * @*vma: pointer to vm_area_struct of the process
+ *
+ * Called when process does an mmap on habanalabs device. Call the device's mmap
+ * function at the end of the common code.
+ */
+static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct hl_fpriv *hpriv = filp->private_data;
+
+	if ((vma->vm_pgoff & HL_MMAP_CB_MASK) == HL_MMAP_CB_MASK) {
+		vma->vm_pgoff ^= HL_MMAP_CB_MASK;
+		return hl_cb_mmap(hpriv, vma);
+	}
+
+	return hpriv->hdev->asic_funcs->mmap(hpriv, vma);
+}
+
 static const struct file_operations hl_ops = {
 	.owner = THIS_MODULE,
 	.open = hl_device_open,
-	.release = hl_device_release
+	.release = hl_device_release,
+	.mmap = hl_mmap,
+	.unlocked_ioctl = hl_ioctl,
+	.compat_ioctl = hl_ioctl
 };
 
 /*
@@ -149,6 +174,8 @@ static int device_early_init(struct hl_device *hdev)
 	if (rc)
 		goto early_fini;
 
+	hl_cb_mgr_init(&hdev->kernel_cb_mgr);
+
 	mutex_init(&hdev->fd_open_cnt_lock);
 	atomic_set(&hdev->fd_open_cnt, 0);
 
@@ -170,6 +197,8 @@ early_fini:
 static void device_early_fini(struct hl_device *hdev)
 {
 
+	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
+
 	hl_asid_fini(hdev);
 
 	if (hdev->asic_funcs->early_fini)
@@ -284,11 +313,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 		goto free_ctx;
 	}
 
+	rc = hl_cb_pool_init(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "failed to initialize CB pool\n");
+		goto release_ctx;
+	}
+
 	dev_notice(hdev->dev,
 		"Successfully added device to habanalabs driver\n");
 
 	return 0;
 
+release_ctx:
+	if (hl_ctx_put(hdev->kernel_ctx) != 1)
+		dev_err(hdev->dev,
+			"kernel ctx is still alive on initialization failure\n");
 free_ctx:
 	kfree(hdev->kernel_ctx);
 sw_fini:
@@ -325,6 +364,8 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Mark device as disabled */
 	hdev->disabled = true;
 
+	hl_cb_pool_fini(hdev);
+
 	/* Release kernel context */
 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
 		dev_err(hdev->dev, "kernel ctx is still alive\n");
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ceaffa9afd83..88b047f7d85d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -82,6 +82,9 @@
 
 #define GOYA_MAX_INITIATORS		20
 
+#define GOYA_CB_POOL_CB_CNT		512
+#define GOYA_CB_POOL_CB_SIZE		0x20000		/* 128KB */
+
 static void goya_get_fixed_properties(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -109,6 +112,8 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 
 	prop->high_pll = PLL_HIGH_DEFAULT;
+	prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
+	prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
 }
 
 /*
@@ -597,6 +602,27 @@ int goya_resume(struct hl_device *hdev)
 	return 0;
 }
 
+int goya_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+	return -EINVAL;
+}
+
+int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+		u64 kaddress, phys_addr_t paddress, u32 size)
+{
+	int rc;
+
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
+			VM_DONTCOPY | VM_NORESERVE;
+
+	rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
+				size, vma->vm_page_prot);
+	if (rc)
+		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
+
+	return rc;
+}
+
 void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags)
 {
@@ -616,6 +642,8 @@ static const struct hl_asic_funcs goya_funcs = {
 	.sw_fini = goya_sw_fini,
 	.suspend = goya_suspend,
 	.resume = goya_resume,
+	.mmap = goya_mmap,
+	.cb_mmap = goya_cb_mmap,
 	.dma_alloc_coherent = goya_dma_alloc_coherent,
 	.dma_free_coherent = goya_dma_free_coherent,
 };
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index ca8171ca3a04..63741c7224b6 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -14,9 +14,12 @@
 
 #define HL_NAME				"habanalabs"
 
+#define HL_MMAP_CB_MASK			(0x8000000000000000ull >> PAGE_SHIFT)
+
 #define HL_MAX_QUEUES			128
 
 struct hl_device;
+struct hl_fpriv;
 
 
 /**
@@ -44,6 +47,8 @@ struct hl_device;
  * @max_asid: maximum number of open contexts (ASIDs).
  * @completion_queues_count: number of completion queues.
  * @high_pll: high PLL frequency used by the device.
+ * @cb_pool_cb_cnt: number of CBs in the CB pool.
+ * @cb_pool_cb_size: size of each CB in the CB pool.
  * @tpc_enabled_mask: which TPCs are enabled.
  */
 struct asic_fixed_properties {
@@ -64,11 +69,60 @@ struct asic_fixed_properties {
 	u32			sram_size;
 	u32			max_asid;
 	u32			high_pll;
+	u32			cb_pool_cb_cnt;
+	u32			cb_pool_cb_size;
 	u8			completion_queues_count;
 	u8			tpc_enabled_mask;
 };
 
 
+/*
+ * Command Buffers
+ */
+
+#define HL_MAX_CB_SIZE		0x200000	/* 2MB */
+
+/**
+ * struct hl_cb_mgr - describes a Command Buffer Manager.
+ * @cb_lock: protects cb_handles.
+ * @cb_handles: an idr to hold all command buffer handles.
+ */
+struct hl_cb_mgr {
+	spinlock_t		cb_lock;
+	struct idr		cb_handles; /* protected by cb_lock */
+};
+
+/**
+ * struct hl_cb - describes a Command Buffer.
+ * @refcount: reference counter for usage of the CB.
+ * @hdev: pointer to device this CB belongs to.
+ * @lock: spinlock to protect mmap/cs flows.
+ * @pool_list: node in pool list of command buffers.
+ * @kernel_address: Holds the CB's kernel virtual address.
+ * @bus_address: Holds the CB's DMA address.
+ * @mmap_size: Holds the CB's size that was mmaped.
+ * @size: holds the CB's size.
+ * @id: the CB's ID.
+ * @ctx_id: holds the ID of the owner's context.
+ * @mmap: true if the CB is currently mmaped to user.
+ * @is_pool: true if CB was acquired from the pool, false otherwise.
+ */
+struct hl_cb {
+	struct kref		refcount;
+	struct hl_device	*hdev;
+	spinlock_t		lock;
+	struct list_head	pool_list;
+	u64			kernel_address;
+	dma_addr_t		bus_address;
+	u32			mmap_size;
+	u32			size;
+	u32			id;
+	u32			ctx_id;
+	u8			mmap;
+	u8			is_pool;
+};
+
+
 #define HL_QUEUE_LENGTH			256
 
 
@@ -97,6 +151,8 @@ enum hl_asic_type {
  * @sw_fini: tears down driver state, does not configure H/W.
  * @suspend: handles IP specific H/W or SW changes for suspend.
  * @resume: handles IP specific H/W or SW changes for resume.
+ * @mmap: mmap function, does nothing.
+ * @cb_mmap: maps a CB.
  * @dma_alloc_coherent: Allocate coherent DMA memory by calling
  *                      dma_alloc_coherent(). This is ASIC function because its
  *                      implementation is not trivial when the driver is loaded
@@ -113,6 +169,9 @@ struct hl_asic_funcs {
 	int (*sw_fini)(struct hl_device *hdev);
 	int (*suspend)(struct hl_device *hdev);
 	int (*resume)(struct hl_device *hdev);
+	int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+			u64 kaddress, phys_addr_t paddress, u32 size);
 	void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flag);
 	void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
@@ -163,6 +222,7 @@ struct hl_ctx_mgr {
  * @taskpid: current process ID.
  * @ctx: current executing context.
  * @ctx_mgr: context manager to handle multiple context for this FD.
+ * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
  * @refcount: number of related contexts.
  */
 struct hl_fpriv {
@@ -171,6 +231,7 @@ struct hl_fpriv {
 	struct pid		*taskpid;
 	struct hl_ctx		*ctx; /* TODO: remove for multiple ctx */
 	struct hl_ctx_mgr	ctx_mgr;
+	struct hl_cb_mgr	cb_mgr;
 	struct kref		refcount;
 };
 
@@ -225,6 +286,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
  * @kernel_ctx: KMD context structure.
+ * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
  * @dma_pool: DMA pool for small allocations.
  * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
  * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
@@ -240,6 +302,8 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
+ * @cb_pool: list of preallocated CBs.
+ * @cb_pool_lock: protects the CB pool.
  * @user_ctx: current user context executing.
  * @fd_open_cnt: number of open user processes.
  * @major: habanalabs KMD major.
@@ -255,6 +319,7 @@ struct hl_device {
 	char				asic_name[16];
 	enum hl_asic_type		asic_type;
 	struct hl_ctx			*kernel_ctx;
+	struct hl_cb_mgr		kernel_cb_mgr;
 	struct dma_pool			*dma_pool;
 	void				*cpu_accessible_dma_mem;
 	dma_addr_t			cpu_accessible_dma_address;
@@ -266,6 +331,10 @@ struct hl_device {
 	struct asic_fixed_properties	asic_prop;
 	const struct hl_asic_funcs	*asic_funcs;
 	void				*asic_specific;
+
+	struct list_head		cb_pool;
+	spinlock_t			cb_pool_lock;
+
 	/* TODO: remove user_ctx for multiple process support */
 	struct hl_ctx			*user_ctx;
 	atomic_t			fd_open_cnt;
@@ -334,6 +403,23 @@ int hl_device_resume(struct hl_device *hdev);
 void hl_hpriv_get(struct hl_fpriv *hpriv);
 void hl_hpriv_put(struct hl_fpriv *hpriv);
 
+int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, u32 cb_size,
+		u64 *handle, int ctx_id);
+int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
+int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+struct hl_cb *hl_cb_get(struct hl_device *hdev,	struct hl_cb_mgr *mgr,
+			u32 handle);
+void hl_cb_put(struct hl_cb *cb);
+void hl_cb_mgr_init(struct hl_cb_mgr *mgr);
+void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr);
+struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
+int hl_cb_pool_init(struct hl_device *hdev);
+int hl_cb_pool_fini(struct hl_device *hdev);
+
 void goya_set_asic_funcs(struct hl_device *hdev);
 
+/* IOCTLs */
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 6fddd801aca3..8628d1d8f037 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -116,6 +116,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	kref_init(&hpriv->refcount);
 	nonseekable_open(inode, filp);
 
+	hl_cb_mgr_init(&hpriv->cb_mgr);
 	hl_ctx_mgr_init(&hpriv->ctx_mgr);
 
 	rc = hl_ctx_create(hdev, hpriv);
@@ -131,6 +132,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 out_err:
 	filp->private_data = NULL;
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
+	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 	kfree(hpriv);
 
 close_device:
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
new file mode 100644
index 000000000000..e53265fe9543
--- /dev/null
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+#define HL_IOCTL_DEF(ioctl, _func) \
+	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
+
+static const struct hl_ioctl_desc hl_ioctls[] = {
+	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl)
+};
+
+#define HL_CORE_IOCTL_COUNT	ARRAY_SIZE(hl_ioctls)
+
+long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct hl_fpriv *hpriv = filep->private_data;
+	struct hl_device *hdev = hpriv->hdev;
+	hl_ioctl_t *func;
+	const struct hl_ioctl_desc *ioctl = NULL;
+	unsigned int nr = _IOC_NR(cmd);
+	char stack_kdata[128] = {0};
+	char *kdata = NULL;
+	unsigned int usize, asize;
+	int retcode;
+
+	if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
+		u32 hl_size;
+
+		ioctl = &hl_ioctls[nr];
+
+		hl_size = _IOC_SIZE(ioctl->cmd);
+		usize = asize = _IOC_SIZE(cmd);
+		if (hl_size > asize)
+			asize = hl_size;
+
+		cmd = ioctl->cmd;
+	} else {
+		dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
+			  task_pid_nr(current), nr);
+		return -ENOTTY;
+	}
+
+	/* Do not trust userspace, use our own definition */
+	func = ioctl->func;
+
+	if (unlikely(!func)) {
+		dev_dbg(hdev->dev, "no function\n");
+		retcode = -ENOTTY;
+		goto out_err;
+	}
+
+	if (cmd & (IOC_IN | IOC_OUT)) {
+		if (asize <= sizeof(stack_kdata)) {
+			kdata = stack_kdata;
+		} else {
+			kdata = kzalloc(asize, GFP_KERNEL);
+			if (!kdata) {
+				retcode = -ENOMEM;
+				goto out_err;
+			}
+		}
+	}
+
+	if (cmd & IOC_IN) {
+		if (copy_from_user(kdata, (void __user *)arg, usize)) {
+			retcode = -EFAULT;
+			goto out_err;
+		}
+	} else if (cmd & IOC_OUT) {
+		memset(kdata, 0, usize);
+	}
+
+	retcode = func(hpriv, kdata);
+
+	if (cmd & IOC_OUT)
+		if (copy_to_user((void __user *)arg, kdata, usize))
+			retcode = -EFAULT;
+
+out_err:
+	if (retcode)
+		dev_dbg(hdev->dev,
+			"error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
+			  task_pid_nr(current), cmd, nr);
+
+	if (kdata != stack_kdata)
+		kfree(kdata);
+
+	return retcode;
+}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a0ec23adf8f5..a8edfd3e9c95 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -17,4 +17,50 @@
  */
 #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START	0x8000	/* 32KB */
 
+/* Opcode to create a new command buffer */
+#define HL_CB_OP_CREATE		0
+/* Opcode to destroy previously created command buffer */
+#define HL_CB_OP_DESTROY	1
+
+struct hl_cb_in {
+	/* Handle of CB or 0 if we want to create one */
+	__u64 cb_handle;
+	/* HL_CB_OP_* */
+	__u32 op;
+	/* Size of CB. Minimum requested size must be PAGE_SIZE */
+	__u32 cb_size;
+	/* Context ID - Currently not in use */
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+struct hl_cb_out {
+	/* Handle of CB */
+	__u64 cb_handle;
+};
+
+union hl_cb_args {
+	struct hl_cb_in in;
+	struct hl_cb_out out;
+};
+
+/*
+ * Command Buffer
+ * - Request a Command Buffer
+ * - Destroy a Command Buffer
+ *
+ * The command buffers are memory blocks that reside in DMA-able address
+ * space and are physically contiguous so they can be accessed by the device
+ * directly. They are allocated using the coherent DMA API.
+ *
+ * When creating a new CB, the IOCTL returns a handle of it, and the user-space
+ * process needs to use that handle to mmap the buffer so it can access them.
+ *
+ */
+#define HL_IOCTL_CB		\
+		_IOWR('H', 0x02, union hl_cb_args)
+
+#define HL_COMMAND_START	0x02
+#define HL_COMMAND_END		0x03
+
 #endif /* HABANALABS_H_ */
-- 
cgit v1.2.3


From 9494a8dd8d22cbff8ce358aaa223fffe1b070cb0 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 16 Feb 2019 00:39:17 +0200
Subject: habanalabs: add h/w queues module

This patch adds the H/W queues module and the code to initialize Goya's
various compute and DMA engines and their queues.

Goya has 5 DMA channels, 8 TPC engines and a single MME engine. For each
channel/engine, there is a H/W queue logic which is used to pass commands
from the user to the H/W. That logic is called QMAN.

There are two types of QMANs: external and internal. The DMA QMANs are
considered external while the TPC and MME QMANs are considered internal.
For each external queue there is a completion queue, which is located on
the Host memory.

The differences between external and internal QMANs are:

1. The location of the queue's memory. External QMANs are located on the
   Host memory while internal QMANs are located on the on-chip memory.

2. The external QMAN write an entry to a completion queue and sends an
   MSI-X interrupt upon completion of a command buffer that was given to
   it. The internal QMAN doesn't do that.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/Makefile                   |    2 +-
 drivers/misc/habanalabs/device.c                   |   75 +-
 drivers/misc/habanalabs/goya/goya.c                | 1527 ++++++++++++++++++--
 drivers/misc/habanalabs/goya/goyaP.h               |    7 +
 drivers/misc/habanalabs/habanalabs.h               |  174 ++-
 drivers/misc/habanalabs/habanalabs_drv.c           |    5 +
 drivers/misc/habanalabs/hw_queue.c                 |  400 +++++
 drivers/misc/habanalabs/include/armcp_if.h         |  292 ++++
 .../habanalabs/include/goya/goya_async_events.h    |  186 +++
 .../misc/habanalabs/include/goya/goya_packets.h    |  129 ++
 drivers/misc/habanalabs/include/qman_if.h          |   56 +
 drivers/misc/habanalabs/irq.c                      |  149 ++
 include/uapi/misc/habanalabs.h                     |   29 +
 13 files changed, 2915 insertions(+), 116 deletions(-)
 create mode 100644 drivers/misc/habanalabs/hw_queue.c
 create mode 100644 drivers/misc/habanalabs/include/goya/goya_async_events.h
 create mode 100644 drivers/misc/habanalabs/include/goya/goya_packets.h
 create mode 100644 drivers/misc/habanalabs/include/qman_if.h
 create mode 100644 drivers/misc/habanalabs/irq.c

(limited to 'include')

diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index 2530c9b78ca4..c07f3ccb57dc 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -5,7 +5,7 @@
 obj-m	:= habanalabs.o
 
 habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
-		command_buffer.o
+		command_buffer.o hw_queue.o irq.o
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 5eefc2952be5..06e2b7f32499 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -174,13 +174,23 @@ static int device_early_init(struct hl_device *hdev)
 	if (rc)
 		goto early_fini;
 
+	hdev->cq_wq = alloc_workqueue("hl-free-jobs", WQ_UNBOUND, 0);
+	if (hdev->cq_wq == NULL) {
+		dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
+		rc = -ENOMEM;
+		goto asid_fini;
+	}
+
 	hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 
 	mutex_init(&hdev->fd_open_cnt_lock);
+	mutex_init(&hdev->send_cpu_message_lock);
 	atomic_set(&hdev->fd_open_cnt, 0);
 
 	return 0;
 
+asid_fini:
+	hl_asid_fini(hdev);
 early_fini:
 	if (hdev->asic_funcs->early_fini)
 		hdev->asic_funcs->early_fini(hdev);
@@ -196,9 +206,12 @@ early_fini:
  */
 static void device_early_fini(struct hl_device *hdev)
 {
+	mutex_destroy(&hdev->send_cpu_message_lock);
 
 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 
+	destroy_workqueue(hdev->cq_wq);
+
 	hl_asid_fini(hdev);
 
 	if (hdev->asic_funcs->early_fini)
@@ -277,7 +290,7 @@ int hl_device_resume(struct hl_device *hdev)
  */
 int hl_device_init(struct hl_device *hdev, struct class *hclass)
 {
-	int rc;
+	int i, rc, cq_ready_cnt;
 
 	/* Create device */
 	rc = device_setup_cdev(hdev, hclass, hdev->id, &hl_ops);
@@ -298,11 +311,48 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	if (rc)
 		goto early_fini;
 
+	/*
+	 * Initialize the H/W queues. Must be done before hw_init, because
+	 * there the addresses of the kernel queue are being written to the
+	 * registers of the device
+	 */
+	rc = hl_hw_queues_create(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "failed to initialize kernel queues\n");
+		goto sw_fini;
+	}
+
+	/*
+	 * Initialize the completion queues. Must be done before hw_init,
+	 * because there the addresses of the completion queues are being
+	 * passed as arguments to request_irq
+	 */
+	hdev->completion_queue =
+			kcalloc(hdev->asic_prop.completion_queues_count,
+				sizeof(*hdev->completion_queue), GFP_KERNEL);
+
+	if (!hdev->completion_queue) {
+		dev_err(hdev->dev, "failed to allocate completion queues\n");
+		rc = -ENOMEM;
+		goto hw_queues_destroy;
+	}
+
+	for (i = 0, cq_ready_cnt = 0;
+			i < hdev->asic_prop.completion_queues_count;
+			i++, cq_ready_cnt++) {
+		rc = hl_cq_init(hdev, &hdev->completion_queue[i], i);
+		if (rc) {
+			dev_err(hdev->dev,
+				"failed to initialize completion queue\n");
+			goto cq_fini;
+		}
+	}
+
 	/* Allocate the kernel context */
 	hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
 	if (!hdev->kernel_ctx) {
 		rc = -ENOMEM;
-		goto sw_fini;
+		goto cq_fini;
 	}
 
 	hdev->user_ctx = NULL;
@@ -328,6 +378,14 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
 	hdev->disabled = false;
 
+	/* Check that the communication with the device is working */
+	rc = hdev->asic_funcs->test_queues(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to detect if device is alive\n");
+		rc = 0;
+		goto out_disabled;
+	}
+
 	dev_notice(hdev->dev,
 		"Successfully added device to habanalabs driver\n");
 
@@ -339,6 +397,12 @@ release_ctx:
 			"kernel ctx is still alive on initialization failure\n");
 free_ctx:
 	kfree(hdev->kernel_ctx);
+cq_fini:
+	for (i = 0 ; i < cq_ready_cnt ; i++)
+		hl_cq_fini(hdev, &hdev->completion_queue[i]);
+	kfree(hdev->completion_queue);
+hw_queues_destroy:
+	hl_hw_queues_destroy(hdev);
 sw_fini:
 	hdev->asic_funcs->sw_fini(hdev);
 early_fini:
@@ -368,6 +432,7 @@ out_disabled:
  */
 void hl_device_fini(struct hl_device *hdev)
 {
+	int i;
 	dev_info(hdev->dev, "Removing device\n");
 
 	/* Mark device as disabled */
@@ -382,6 +447,12 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Reset the H/W. It will be in idle state after this returns */
 	hdev->asic_funcs->hw_fini(hdev, true);
 
+	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
+		hl_cq_fini(hdev, &hdev->completion_queue[i]);
+	kfree(hdev->completion_queue);
+
+	hl_hw_queues_destroy(hdev);
+
 	/* Call ASIC S/W finalize function */
 	hdev->asic_funcs->sw_fini(hdev);
 
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 1fa5b91fd703..a45a183d4e5c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -90,6 +90,26 @@
 static void goya_get_fixed_properties(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	int i;
+
+	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
+		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
+		prop->hw_queues_props[i].kmd_only = 0;
+	}
+
+	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
+		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
+		prop->hw_queues_props[i].kmd_only = 1;
+	}
+
+	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
+			NUMBER_OF_INT_HW_QUEUES; i++) {
+		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
+		prop->hw_queues_props[i].kmd_only = 0;
+	}
+
+	for (; i < HL_MAX_QUEUES; i++)
+		prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
 
 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 
@@ -118,6 +138,18 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->high_pll = PLL_HIGH_DEFAULT;
 }
 
+int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+{
+	struct armcp_packet pkt;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = opcode << ARMCP_PKT_CTL_OPCODE_SHIFT;
+
+	return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
+			sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+}
+
 /*
  * goya_pci_bars_map - Map PCI BARS of Goya device
  *
@@ -510,6 +542,8 @@ static int goya_sw_init(struct hl_device *hdev)
 	if (!goya)
 		return -ENOMEM;
 
+	goya->test_cpu_queue = goya_test_cpu_queue;
+
 	/* according to goya_init_iatu */
 	goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
 	hdev->asic_specific = goya;
@@ -596,6 +630,311 @@ int goya_sw_fini(struct hl_device *hdev)
 	return 0;
 }
 
+static void goya_init_dma_qman(struct hl_device *hdev, int dma_id,
+		dma_addr_t bus_address)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	u32 mtr_base_lo, mtr_base_hi;
+	u32 so_base_lo, so_base_hi;
+	u32 gic_base_lo, gic_base_hi;
+	u32 reg_off = dma_id * (mmDMA_QM_1_PQ_PI - mmDMA_QM_0_PQ_PI);
+
+	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+
+	WREG32(mmDMA_QM_0_PQ_BASE_LO + reg_off, lower_32_bits(bus_address));
+	WREG32(mmDMA_QM_0_PQ_BASE_HI + reg_off, upper_32_bits(bus_address));
+
+	WREG32(mmDMA_QM_0_PQ_SIZE + reg_off, ilog2(HL_QUEUE_LENGTH));
+	WREG32(mmDMA_QM_0_PQ_PI + reg_off, 0);
+	WREG32(mmDMA_QM_0_PQ_CI + reg_off, 0);
+
+	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
+	WREG32(mmDMA_QM_0_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
+	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
+	WREG32(mmDMA_QM_0_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
+	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
+	WREG32(mmDMA_QM_0_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
+	WREG32(mmDMA_QM_0_GLBL_ERR_WDATA + reg_off,
+			GOYA_ASYNC_EVENT_ID_DMA0_QM + dma_id);
+
+	/* PQ has buffer of 2 cache lines, while CQ has 8 lines */
+	WREG32(mmDMA_QM_0_PQ_CFG1 + reg_off, 0x00020002);
+	WREG32(mmDMA_QM_0_CQ_CFG1 + reg_off, 0x00080008);
+
+	if (dma_id == 0)
+		WREG32(mmDMA_QM_0_GLBL_PROT + reg_off, QMAN_DMA_FULLY_TRUSTED);
+	else
+		if (goya->hw_cap_initialized & HW_CAP_MMU)
+			WREG32(mmDMA_QM_0_GLBL_PROT + reg_off,
+					QMAN_DMA_PARTLY_TRUSTED);
+		else
+			WREG32(mmDMA_QM_0_GLBL_PROT + reg_off,
+					QMAN_DMA_FULLY_TRUSTED);
+
+	WREG32(mmDMA_QM_0_GLBL_ERR_CFG + reg_off, QMAN_DMA_ERR_MSG_EN);
+	WREG32(mmDMA_QM_0_GLBL_CFG0 + reg_off, QMAN_DMA_ENABLE);
+}
+
+static void goya_init_dma_ch(struct hl_device *hdev, int dma_id)
+{
+	u32 gic_base_lo, gic_base_hi;
+	u64 sob_addr;
+	u32 reg_off = dma_id * (mmDMA_CH_1_CFG1 - mmDMA_CH_0_CFG1);
+
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+
+	WREG32(mmDMA_CH_0_ERRMSG_ADDR_LO + reg_off, gic_base_lo);
+	WREG32(mmDMA_CH_0_ERRMSG_ADDR_HI + reg_off, gic_base_hi);
+	WREG32(mmDMA_CH_0_ERRMSG_WDATA + reg_off,
+			GOYA_ASYNC_EVENT_ID_DMA0_CH + dma_id);
+
+	if (dma_id) {
+		sob_addr = CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1000 +
+				(dma_id - 1) * 4;
+		WREG32(mmDMA_CH_0_WR_COMP_ADDR_LO + reg_off,
+				lower_32_bits(sob_addr));
+		WREG32(mmDMA_CH_0_WR_COMP_ADDR_HI + reg_off,
+				upper_32_bits(sob_addr));
+		WREG32(mmDMA_CH_0_WR_COMP_WDATA + reg_off, 0x80000001);
+	}
+}
+
+/*
+ * goya_init_dma_qmans - Initialize QMAN DMA registers
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Initialize the H/W registers of the QMAN DMA channels
+ *
+ */
+static void goya_init_dma_qmans(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	struct hl_hw_queue *q;
+	dma_addr_t bus_address;
+	int i;
+
+	if (goya->hw_cap_initialized & HW_CAP_DMA)
+		return;
+
+	q = &hdev->kernel_queues[0];
+
+	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++, q++) {
+		bus_address = q->bus_address +
+				hdev->asic_prop.host_phys_base_address;
+
+		goya_init_dma_qman(hdev, i, bus_address);
+		goya_init_dma_ch(hdev, i);
+	}
+
+	goya->hw_cap_initialized |= HW_CAP_DMA;
+}
+
+/*
+ * goya_disable_external_queues - Disable external queues
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static void goya_disable_external_queues(struct hl_device *hdev)
+{
+	WREG32(mmDMA_QM_0_GLBL_CFG0, 0);
+	WREG32(mmDMA_QM_1_GLBL_CFG0, 0);
+	WREG32(mmDMA_QM_2_GLBL_CFG0, 0);
+	WREG32(mmDMA_QM_3_GLBL_CFG0, 0);
+	WREG32(mmDMA_QM_4_GLBL_CFG0, 0);
+}
+
+static int goya_stop_queue(struct hl_device *hdev, u32 cfg_reg,
+				u32 cp_sts_reg, u32 glbl_sts0_reg)
+{
+	int rc;
+	u32 status;
+
+	/* use the values of TPC0 as they are all the same*/
+
+	WREG32(cfg_reg, 1 << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
+
+	status = RREG32(cp_sts_reg);
+	if (status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK) {
+		rc = hl_poll_timeout(
+			hdev,
+			cp_sts_reg,
+			status,
+			!(status & TPC0_QM_CP_STS_FENCE_IN_PROGRESS_MASK),
+			1000,
+			QMAN_FENCE_TIMEOUT_USEC);
+
+		/* if QMAN is stuck in fence no need to check for stop */
+		if (rc)
+			return 0;
+	}
+
+	rc = hl_poll_timeout(
+		hdev,
+		glbl_sts0_reg,
+		status,
+		(status & TPC0_QM_GLBL_STS0_CP_IS_STOP_MASK),
+		1000,
+		QMAN_STOP_TIMEOUT_USEC);
+
+	if (rc) {
+		dev_err(hdev->dev,
+			"Timeout while waiting for QMAN to stop\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * goya_stop_external_queues - Stop external queues
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Returns 0 on success
+ *
+ */
+static int goya_stop_external_queues(struct hl_device *hdev)
+{
+	int rc, retval = 0;
+
+	rc = goya_stop_queue(hdev,
+			mmDMA_QM_0_GLBL_CFG1,
+			mmDMA_QM_0_CP_STS,
+			mmDMA_QM_0_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop DMA QMAN 0\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmDMA_QM_1_GLBL_CFG1,
+			mmDMA_QM_1_CP_STS,
+			mmDMA_QM_1_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop DMA QMAN 1\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmDMA_QM_2_GLBL_CFG1,
+			mmDMA_QM_2_CP_STS,
+			mmDMA_QM_2_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop DMA QMAN 2\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmDMA_QM_3_GLBL_CFG1,
+			mmDMA_QM_3_CP_STS,
+			mmDMA_QM_3_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop DMA QMAN 3\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmDMA_QM_4_GLBL_CFG1,
+			mmDMA_QM_4_CP_STS,
+			mmDMA_QM_4_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop DMA QMAN 4\n");
+		retval = -EIO;
+	}
+
+	return retval;
+}
+
+static void goya_resume_external_queues(struct hl_device *hdev)
+{
+	WREG32(mmDMA_QM_0_GLBL_CFG1, 0);
+	WREG32(mmDMA_QM_1_GLBL_CFG1, 0);
+	WREG32(mmDMA_QM_2_GLBL_CFG1, 0);
+	WREG32(mmDMA_QM_3_GLBL_CFG1, 0);
+	WREG32(mmDMA_QM_4_GLBL_CFG1, 0);
+}
+
+/*
+ * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Returns 0 on success
+ *
+ */
+int goya_init_cpu_queues(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	dma_addr_t bus_address;
+	u32 status;
+	struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
+	int err;
+
+	if (!hdev->cpu_queues_enable)
+		return 0;
+
+	if (goya->hw_cap_initialized & HW_CAP_CPU_Q)
+		return 0;
+
+	bus_address = cpu_pq->bus_address +
+			hdev->asic_prop.host_phys_base_address;
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0, lower_32_bits(bus_address));
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1, upper_32_bits(bus_address));
+
+	bus_address = hdev->cpu_accessible_dma_address +
+			hdev->asic_prop.host_phys_base_address;
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8, lower_32_bits(bus_address));
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9, upper_32_bits(bus_address));
+
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE);
+
+	/* Used for EQ CI */
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
+
+	WREG32(mmCPU_IF_PF_PQ_PI, 0);
+
+	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
+
+	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
+			GOYA_ASYNC_EVENT_ID_PI_UPDATE);
+
+	err = hl_poll_timeout(
+		hdev,
+		mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
+		status,
+		(status == PQ_INIT_STATUS_READY_FOR_HOST),
+		1000,
+		GOYA_CPU_TIMEOUT_USEC);
+
+	if (err) {
+		dev_err(hdev->dev,
+			"Failed to communicate with ARM CPU (ArmCP timeout)\n");
+		return -EIO;
+	}
+
+	goya->hw_cap_initialized |= HW_CAP_CPU_Q;
+	return 0;
+}
+
 static void goya_set_pll_refclk(struct hl_device *hdev)
 {
 	WREG32(mmCPU_PLL_DIV_SEL_0, 0x0);
@@ -1023,144 +1362,644 @@ static void goya_init_golden_registers(struct hl_device *hdev)
 	goya->hw_cap_initialized |= HW_CAP_GOLDEN;
 }
 
-
-/*
- * goya_push_fw_to_device - Push FW code to device
- *
- * @hdev: pointer to hl_device structure
- *
- * Copy fw code from firmware file to device memory.
- * Returns 0 on success
- *
- */
-static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
-					void __iomem *dst)
+static void goya_init_mme_qman(struct hl_device *hdev)
 {
-	const struct firmware *fw;
-	const u64 *fw_data;
-	size_t fw_size, i;
-	int rc;
+	u32 mtr_base_lo, mtr_base_hi;
+	u32 so_base_lo, so_base_hi;
+	u32 gic_base_lo, gic_base_hi;
+	u64 qman_base_addr;
 
-	rc = request_firmware(&fw, fw_name, hdev->dev);
+	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 
-	if (rc) {
-		dev_err(hdev->dev, "Failed to request %s\n", fw_name);
-		goto out;
-	}
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 
-	fw_size = fw->size;
-	if ((fw_size % 4) != 0) {
-		dev_err(hdev->dev, "illegal %s firmware size %zu\n",
-			fw_name, fw_size);
-		rc = -EINVAL;
-		goto out;
-	}
+	qman_base_addr = hdev->asic_prop.sram_base_address +
+				MME_QMAN_BASE_OFFSET;
 
-	dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+	WREG32(mmMME_QM_PQ_BASE_LO, lower_32_bits(qman_base_addr));
+	WREG32(mmMME_QM_PQ_BASE_HI, upper_32_bits(qman_base_addr));
+	WREG32(mmMME_QM_PQ_SIZE, ilog2(MME_QMAN_LENGTH));
+	WREG32(mmMME_QM_PQ_PI, 0);
+	WREG32(mmMME_QM_PQ_CI, 0);
+	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_LO_OFFSET, 0x10C0);
+	WREG32(mmMME_QM_CP_LDMA_SRC_BASE_HI_OFFSET, 0x10C4);
+	WREG32(mmMME_QM_CP_LDMA_TSIZE_OFFSET, 0x10C8);
+	WREG32(mmMME_QM_CP_LDMA_COMMIT_OFFSET, 0x10CC);
 
-	fw_data = (const u64 *) fw->data;
+	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
+	WREG32(mmMME_QM_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
+	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_LO, so_base_lo);
+	WREG32(mmMME_QM_CP_MSG_BASE1_ADDR_HI, so_base_hi);
 
-	if ((fw->size % 8) != 0)
-		fw_size -= 8;
+	/* QMAN CQ has 8 cache lines */
+	WREG32(mmMME_QM_CQ_CFG1, 0x00080008);
 
-	for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
-		if (!(i & (0x80000 - 1))) {
-			dev_dbg(hdev->dev,
-				"copied so far %zu out of %zu for %s firmware",
-				i, fw_size, fw_name);
-			usleep_range(20, 100);
-		}
+	WREG32(mmMME_QM_GLBL_ERR_ADDR_LO, gic_base_lo);
+	WREG32(mmMME_QM_GLBL_ERR_ADDR_HI, gic_base_hi);
 
-		writeq(*fw_data, dst);
-	}
+	WREG32(mmMME_QM_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_QM);
 
-	if ((fw->size % 8) != 0)
-		writel(*(const u32 *) fw_data, dst);
+	WREG32(mmMME_QM_GLBL_ERR_CFG, QMAN_MME_ERR_MSG_EN);
 
-out:
-	release_firmware(fw);
-	return rc;
+	WREG32(mmMME_QM_GLBL_PROT, QMAN_MME_ERR_PROT);
+
+	WREG32(mmMME_QM_GLBL_CFG0, QMAN_MME_ENABLE);
 }
 
-static int goya_pldm_init_cpu(struct hl_device *hdev)
+static void goya_init_mme_cmdq(struct hl_device *hdev)
 {
-	char fw_name[200];
-	void __iomem *dst;
-	u32 val, unit_rst_val;
-	int rc;
+	u32 mtr_base_lo, mtr_base_hi;
+	u32 so_base_lo, so_base_hi;
+	u32 gic_base_lo, gic_base_hi;
+	u64 qman_base_addr;
 
-	/* Must initialize SRAM scrambler before pushing u-boot to SRAM */
-	goya_init_golden_registers(hdev);
+	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 
-	/* Put ARM cores into reset */
-	WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
-	val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 
-	/* Reset the CA53 MACRO */
-	unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
-	WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
-	val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
-	WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
-	val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+	qman_base_addr = hdev->asic_prop.sram_base_address +
+				MME_QMAN_BASE_OFFSET;
 
-	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
-	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
-	rc = goya_push_fw_to_device(hdev, fw_name, dst);
-	if (rc)
-		return rc;
+	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
+	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
+	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO,	so_base_lo);
+	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_HI, so_base_hi);
 
-	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
-	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
-	rc = goya_push_fw_to_device(hdev, fw_name, dst);
-	if (rc)
-		return rc;
+	/* CMDQ CQ has 20 cache lines */
+	WREG32(mmMME_CMDQ_CQ_CFG1, 0x00140014);
 
-	WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
-	WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
+	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_LO, gic_base_lo);
+	WREG32(mmMME_CMDQ_GLBL_ERR_ADDR_HI, gic_base_hi);
 
-	WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
-		lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
-	WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
-		upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
+	WREG32(mmMME_CMDQ_GLBL_ERR_WDATA, GOYA_ASYNC_EVENT_ID_MME_CMDQ);
 
-	/* Release ARM core 0 from reset */
-	WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
-					CPU_RESET_CORE0_DEASSERT);
-	val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+	WREG32(mmMME_CMDQ_GLBL_ERR_CFG, CMDQ_MME_ERR_MSG_EN);
 
-	return 0;
+	WREG32(mmMME_CMDQ_GLBL_PROT, CMDQ_MME_ERR_PROT);
+
+	WREG32(mmMME_CMDQ_GLBL_CFG0, CMDQ_MME_ENABLE);
 }
 
-/*
- * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
- * The version string should be located by that offset.
- */
-static void goya_read_device_fw_version(struct hl_device *hdev,
-					enum goya_fw_component fwc)
+static void goya_init_mme_qmans(struct hl_device *hdev)
 {
-	const char *name;
-	u32 ver_off;
-	char *dest;
+	struct goya_device *goya = hdev->asic_specific;
+	u32 so_base_lo, so_base_hi;
 
-	switch (fwc) {
-	case FW_COMP_UBOOT:
-		ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
-		dest = hdev->asic_prop.uboot_ver;
-		name = "U-Boot";
-		break;
-	case FW_COMP_PREBOOT:
-		ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
-		dest = hdev->asic_prop.preboot_ver;
-		name = "Preboot";
-		break;
-	default:
-		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
+	if (goya->hw_cap_initialized & HW_CAP_MME)
 		return;
-	}
 
-	ver_off &= ~((u32)SRAM_BASE_ADDR);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
 
-	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
+	WREG32(mmMME_SM_BASE_ADDRESS_LOW, so_base_lo);
+	WREG32(mmMME_SM_BASE_ADDRESS_HIGH, so_base_hi);
+
+	goya_init_mme_qman(hdev);
+	goya_init_mme_cmdq(hdev);
+
+	goya->hw_cap_initialized |= HW_CAP_MME;
+}
+
+static void goya_init_tpc_qman(struct hl_device *hdev, u32 base_off, int tpc_id)
+{
+	u32 mtr_base_lo, mtr_base_hi;
+	u32 so_base_lo, so_base_hi;
+	u32 gic_base_lo, gic_base_hi;
+	u64 qman_base_addr;
+	u32 reg_off = tpc_id * (mmTPC1_QM_PQ_PI - mmTPC0_QM_PQ_PI);
+
+	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+
+	qman_base_addr = hdev->asic_prop.sram_base_address + base_off;
+
+	WREG32(mmTPC0_QM_PQ_BASE_LO + reg_off, lower_32_bits(qman_base_addr));
+	WREG32(mmTPC0_QM_PQ_BASE_HI + reg_off, upper_32_bits(qman_base_addr));
+	WREG32(mmTPC0_QM_PQ_SIZE + reg_off, ilog2(TPC_QMAN_LENGTH));
+	WREG32(mmTPC0_QM_PQ_PI + reg_off, 0);
+	WREG32(mmTPC0_QM_PQ_CI + reg_off, 0);
+	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET + reg_off, 0x10C0);
+	WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_HI_OFFSET + reg_off, 0x10C4);
+	WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET + reg_off, 0x10C8);
+	WREG32(mmTPC0_QM_CP_LDMA_COMMIT_OFFSET + reg_off, 0x10CC);
+
+	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
+	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
+	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
+	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
+
+	WREG32(mmTPC0_QM_CQ_CFG1 + reg_off, 0x00080008);
+
+	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
+	WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
+
+	WREG32(mmTPC0_QM_GLBL_ERR_WDATA + reg_off,
+			GOYA_ASYNC_EVENT_ID_TPC0_QM + tpc_id);
+
+	WREG32(mmTPC0_QM_GLBL_ERR_CFG + reg_off, QMAN_TPC_ERR_MSG_EN);
+
+	WREG32(mmTPC0_QM_GLBL_PROT + reg_off, QMAN_TPC_ERR_PROT);
+
+	WREG32(mmTPC0_QM_GLBL_CFG0 + reg_off, QMAN_TPC_ENABLE);
+}
+
+static void goya_init_tpc_cmdq(struct hl_device *hdev, int tpc_id)
+{
+	u32 mtr_base_lo, mtr_base_hi;
+	u32 so_base_lo, so_base_hi;
+	u32 gic_base_lo, gic_base_hi;
+	u32 reg_off = tpc_id * (mmTPC1_CMDQ_CQ_CFG1 - mmTPC0_CMDQ_CQ_CFG1);
+
+	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+
+	gic_base_lo =
+		lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+	gic_base_hi =
+		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
+
+	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_LO + reg_off, mtr_base_lo);
+	WREG32(mmTPC0_CMDQ_CP_MSG_BASE0_ADDR_HI + reg_off, mtr_base_hi);
+	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_LO + reg_off, so_base_lo);
+	WREG32(mmTPC0_CMDQ_CP_MSG_BASE1_ADDR_HI + reg_off, so_base_hi);
+
+	WREG32(mmTPC0_CMDQ_CQ_CFG1 + reg_off, 0x00140014);
+
+	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_LO + reg_off, gic_base_lo);
+	WREG32(mmTPC0_CMDQ_GLBL_ERR_ADDR_HI + reg_off, gic_base_hi);
+
+	WREG32(mmTPC0_CMDQ_GLBL_ERR_WDATA + reg_off,
+			GOYA_ASYNC_EVENT_ID_TPC0_CMDQ + tpc_id);
+
+	WREG32(mmTPC0_CMDQ_GLBL_ERR_CFG + reg_off, CMDQ_TPC_ERR_MSG_EN);
+
+	WREG32(mmTPC0_CMDQ_GLBL_PROT + reg_off, CMDQ_TPC_ERR_PROT);
+
+	WREG32(mmTPC0_CMDQ_GLBL_CFG0 + reg_off, CMDQ_TPC_ENABLE);
+}
+
+static void goya_init_tpc_qmans(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	u32 so_base_lo, so_base_hi;
+	u32 cfg_off = mmTPC1_CFG_SM_BASE_ADDRESS_LOW -
+			mmTPC0_CFG_SM_BASE_ADDRESS_LOW;
+	int i;
+
+	if (goya->hw_cap_initialized & HW_CAP_TPC)
+		return;
+
+	so_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	so_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+
+	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
+		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_LOW + i * cfg_off,
+				so_base_lo);
+		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + i * cfg_off,
+				so_base_hi);
+	}
+
+	goya_init_tpc_qman(hdev, TPC0_QMAN_BASE_OFFSET, 0);
+	goya_init_tpc_qman(hdev, TPC1_QMAN_BASE_OFFSET, 1);
+	goya_init_tpc_qman(hdev, TPC2_QMAN_BASE_OFFSET, 2);
+	goya_init_tpc_qman(hdev, TPC3_QMAN_BASE_OFFSET, 3);
+	goya_init_tpc_qman(hdev, TPC4_QMAN_BASE_OFFSET, 4);
+	goya_init_tpc_qman(hdev, TPC5_QMAN_BASE_OFFSET, 5);
+	goya_init_tpc_qman(hdev, TPC6_QMAN_BASE_OFFSET, 6);
+	goya_init_tpc_qman(hdev, TPC7_QMAN_BASE_OFFSET, 7);
+
+	for (i = 0 ; i < TPC_MAX_NUM ; i++)
+		goya_init_tpc_cmdq(hdev, i);
+
+	goya->hw_cap_initialized |= HW_CAP_TPC;
+}
+
+/*
+ * goya_disable_internal_queues - Disable internal queues
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ */
+static void goya_disable_internal_queues(struct hl_device *hdev)
+{
+	WREG32(mmMME_QM_GLBL_CFG0, 0);
+	WREG32(mmMME_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC0_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC0_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC1_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC1_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC2_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC2_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC3_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC3_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC4_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC4_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC5_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC5_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC6_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC6_CMDQ_GLBL_CFG0, 0);
+
+	WREG32(mmTPC7_QM_GLBL_CFG0, 0);
+	WREG32(mmTPC7_CMDQ_GLBL_CFG0, 0);
+}
+
+/*
+ * goya_stop_internal_queues - Stop internal queues
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Returns 0 on success
+ *
+ */
+static int goya_stop_internal_queues(struct hl_device *hdev)
+{
+	int rc, retval = 0;
+
+	/*
+	 * Each queue (QMAN) is a separate H/W logic. That means that each
+	 * QMAN can be stopped independently and failure to stop one does NOT
+	 * mandate we should not try to stop other QMANs
+	 */
+
+	rc = goya_stop_queue(hdev,
+			mmMME_QM_GLBL_CFG1,
+			mmMME_QM_CP_STS,
+			mmMME_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop MME QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmMME_CMDQ_GLBL_CFG1,
+			mmMME_CMDQ_CP_STS,
+			mmMME_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop MME CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC0_QM_GLBL_CFG1,
+			mmTPC0_QM_CP_STS,
+			mmTPC0_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 0 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC0_CMDQ_GLBL_CFG1,
+			mmTPC0_CMDQ_CP_STS,
+			mmTPC0_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 0 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC1_QM_GLBL_CFG1,
+			mmTPC1_QM_CP_STS,
+			mmTPC1_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 1 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC1_CMDQ_GLBL_CFG1,
+			mmTPC1_CMDQ_CP_STS,
+			mmTPC1_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 1 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC2_QM_GLBL_CFG1,
+			mmTPC2_QM_CP_STS,
+			mmTPC2_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 2 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC2_CMDQ_GLBL_CFG1,
+			mmTPC2_CMDQ_CP_STS,
+			mmTPC2_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 2 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC3_QM_GLBL_CFG1,
+			mmTPC3_QM_CP_STS,
+			mmTPC3_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 3 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC3_CMDQ_GLBL_CFG1,
+			mmTPC3_CMDQ_CP_STS,
+			mmTPC3_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 3 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC4_QM_GLBL_CFG1,
+			mmTPC4_QM_CP_STS,
+			mmTPC4_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 4 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC4_CMDQ_GLBL_CFG1,
+			mmTPC4_CMDQ_CP_STS,
+			mmTPC4_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 4 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC5_QM_GLBL_CFG1,
+			mmTPC5_QM_CP_STS,
+			mmTPC5_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 5 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC5_CMDQ_GLBL_CFG1,
+			mmTPC5_CMDQ_CP_STS,
+			mmTPC5_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 5 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC6_QM_GLBL_CFG1,
+			mmTPC6_QM_CP_STS,
+			mmTPC6_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 6 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC6_CMDQ_GLBL_CFG1,
+			mmTPC6_CMDQ_CP_STS,
+			mmTPC6_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 6 CMDQ\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC7_QM_GLBL_CFG1,
+			mmTPC7_QM_CP_STS,
+			mmTPC7_QM_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 7 QMAN\n");
+		retval = -EIO;
+	}
+
+	rc = goya_stop_queue(hdev,
+			mmTPC7_CMDQ_GLBL_CFG1,
+			mmTPC7_CMDQ_CP_STS,
+			mmTPC7_CMDQ_GLBL_STS0);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop TPC 7 CMDQ\n");
+		retval = -EIO;
+	}
+
+	return retval;
+}
+
+static void goya_resume_internal_queues(struct hl_device *hdev)
+{
+	WREG32(mmMME_QM_GLBL_CFG1, 0);
+	WREG32(mmMME_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC0_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC1_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC2_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC3_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC4_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC5_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC6_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0);
+
+	WREG32(mmTPC7_QM_GLBL_CFG1, 0);
+	WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0);
+}
+
+
+/*
+ * goya_push_fw_to_device - Push FW code to device
+ *
+ * @hdev: pointer to hl_device structure
+ *
+ * Copy fw code from firmware file to device memory.
+ * Returns 0 on success
+ *
+ */
+static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
+					void __iomem *dst)
+{
+	const struct firmware *fw;
+	const u64 *fw_data;
+	size_t fw_size, i;
+	int rc;
+
+	rc = request_firmware(&fw, fw_name, hdev->dev);
+
+	if (rc) {
+		dev_err(hdev->dev, "Failed to request %s\n", fw_name);
+		goto out;
+	}
+
+	fw_size = fw->size;
+	if ((fw_size % 4) != 0) {
+		dev_err(hdev->dev, "illegal %s firmware size %zu\n",
+			fw_name, fw_size);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+
+	fw_data = (const u64 *) fw->data;
+
+	if ((fw->size % 8) != 0)
+		fw_size -= 8;
+
+	for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
+		if (!(i & (0x80000 - 1))) {
+			dev_dbg(hdev->dev,
+				"copied so far %zu out of %zu for %s firmware",
+				i, fw_size, fw_name);
+			usleep_range(20, 100);
+		}
+
+		writeq(*fw_data, dst);
+	}
+
+	if ((fw->size % 8) != 0)
+		writel(*(const u32 *) fw_data, dst);
+
+out:
+	release_firmware(fw);
+	return rc;
+}
+
+static int goya_pldm_init_cpu(struct hl_device *hdev)
+{
+	char fw_name[200];
+	void __iomem *dst;
+	u32 val, unit_rst_val;
+	int rc;
+
+	/* Must initialize SRAM scrambler before pushing u-boot to SRAM */
+	goya_init_golden_registers(hdev);
+
+	/* Put ARM cores into reset */
+	WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
+	val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+
+	/* Reset the CA53 MACRO */
+	unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+	WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
+	val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+	WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
+	val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+
+	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
+	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
+	rc = goya_push_fw_to_device(hdev, fw_name, dst);
+	if (rc)
+		return rc;
+
+	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
+	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
+	rc = goya_push_fw_to_device(hdev, fw_name, dst);
+	if (rc)
+		return rc;
+
+	WREG32(mmPSOC_GLOBAL_CONF_UBOOT_MAGIC, KMD_MSG_FIT_RDY);
+	WREG32(mmPSOC_GLOBAL_CONF_WARM_REBOOT, CPU_BOOT_STATUS_NA);
+
+	WREG32(mmCPU_CA53_CFG_RST_ADDR_LSB_0,
+		lower_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
+	WREG32(mmCPU_CA53_CFG_RST_ADDR_MSB_0,
+		upper_32_bits(SRAM_BASE_ADDR + UBOOT_FW_OFFSET));
+
+	/* Release ARM core 0 from reset */
+	WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
+					CPU_RESET_CORE0_DEASSERT);
+	val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+
+	return 0;
+}
+
+/*
+ * FW component passes an offset from SRAM_BASE_ADDR in SCRATCHPAD_xx.
+ * The version string should be located by that offset.
+ */
+static void goya_read_device_fw_version(struct hl_device *hdev,
+					enum goya_fw_component fwc)
+{
+	const char *name;
+	u32 ver_off;
+	char *dest;
+
+	switch (fwc) {
+	case FW_COMP_UBOOT:
+		ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
+		dest = hdev->asic_prop.uboot_ver;
+		name = "U-Boot";
+		break;
+	case FW_COMP_PREBOOT:
+		ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
+		dest = hdev->asic_prop.preboot_ver;
+		name = "Preboot";
+		break;
+	default:
+		dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
+		return;
+	}
+
+	ver_off &= ~((u32)SRAM_BASE_ADDR);
+
+	if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
 		memcpy_fromio(dest, hdev->pcie_bar[SRAM_CFG_BAR_ID] + ver_off,
 							VERSION_MAX_LEN);
 	} else {
@@ -1344,6 +2183,19 @@ static int goya_hw_init(struct hl_device *hdev)
 
 	goya_init_security(hdev);
 
+	goya_init_dma_qmans(hdev);
+
+	goya_init_mme_qmans(hdev);
+
+	goya_init_tpc_qmans(hdev);
+
+	rc = goya_init_cpu_queues(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
+			rc);
+		goto disable_queues;
+	}
+
 	/* CPU initialization is finished, we can now move to 48 bit DMA mask */
 	rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
 	if (rc) {
@@ -1352,7 +2204,7 @@ static int goya_hw_init(struct hl_device *hdev)
 		if (rc) {
 			dev_err(hdev->dev,
 				"Unable to set pci dma mask to 32 bits\n");
-			return rc;
+			goto disable_pci_access;
 		}
 	}
 
@@ -1364,7 +2216,7 @@ static int goya_hw_init(struct hl_device *hdev)
 		if (rc) {
 			dev_err(hdev->dev,
 				"Unable to set pci consistent dma mask to 32 bits\n");
-			return rc;
+			goto disable_pci_access;
 		}
 	}
 
@@ -1372,6 +2224,14 @@ static int goya_hw_init(struct hl_device *hdev)
 	val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
 
 	return 0;
+
+disable_pci_access:
+	goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+disable_queues:
+	goya_disable_internal_queues(hdev);
+	goya_disable_external_queues(hdev);
+
+	return rc;
 }
 
 /*
@@ -1460,12 +2320,40 @@ static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
 
 int goya_suspend(struct hl_device *hdev)
 {
-	return 0;
+	int rc;
+
+	rc = goya_stop_internal_queues(hdev);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop internal queues\n");
+		return rc;
+	}
+
+	rc = goya_stop_external_queues(hdev);
+
+	if (rc) {
+		dev_err(hdev->dev, "failed to stop external queues\n");
+		return rc;
+	}
+
+	rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+	if (rc)
+		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
+
+	return rc;
 }
 
 int goya_resume(struct hl_device *hdev)
 {
-	return 0;
+	int rc;
+
+	goya_resume_external_queues(hdev);
+	goya_resume_internal_queues(hdev);
+
+	rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+	if (rc)
+		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
+	return rc;
 }
 
 int goya_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
@@ -1489,6 +2377,101 @@ int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
 	return rc;
 }
 
+void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
+{
+	u32 db_reg_offset, db_value;
+	bool invalid_queue = false;
+
+	switch (hw_queue_id) {
+	case GOYA_QUEUE_ID_DMA_0:
+		db_reg_offset = mmDMA_QM_0_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_DMA_1:
+		db_reg_offset = mmDMA_QM_1_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_DMA_2:
+		db_reg_offset = mmDMA_QM_2_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_DMA_3:
+		db_reg_offset = mmDMA_QM_3_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_DMA_4:
+		db_reg_offset = mmDMA_QM_4_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_CPU_PQ:
+		if (hdev->cpu_queues_enable)
+			db_reg_offset = mmCPU_IF_PF_PQ_PI;
+		else
+			invalid_queue = true;
+		break;
+
+	case GOYA_QUEUE_ID_MME:
+		db_reg_offset = mmMME_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC0:
+		db_reg_offset = mmTPC0_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC1:
+		db_reg_offset = mmTPC1_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC2:
+		db_reg_offset = mmTPC2_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC3:
+		db_reg_offset = mmTPC3_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC4:
+		db_reg_offset = mmTPC4_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC5:
+		db_reg_offset = mmTPC5_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC6:
+		db_reg_offset = mmTPC6_QM_PQ_PI;
+		break;
+
+	case GOYA_QUEUE_ID_TPC7:
+		db_reg_offset = mmTPC7_QM_PQ_PI;
+		break;
+
+	default:
+		invalid_queue = true;
+	}
+
+	if (invalid_queue) {
+		/* Should never get here */
+		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
+			hw_queue_id);
+		return;
+	}
+
+	db_value = pi;
+
+	/* ring the doorbell */
+	WREG32(db_reg_offset, db_value);
+
+	if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
+		WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
+				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
+}
+
+void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
+{
+	/* Not needed in Goya */
+}
+
 void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags)
 {
@@ -1501,6 +2484,315 @@ void goya_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
 }
 
+void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
+				dma_addr_t *dma_handle,	u16 *queue_len)
+{
+	void *base;
+	u32 offset;
+
+	*dma_handle = hdev->asic_prop.sram_base_address;
+
+	base = hdev->pcie_bar[SRAM_CFG_BAR_ID];
+
+	switch (queue_id) {
+	case GOYA_QUEUE_ID_MME:
+		offset = MME_QMAN_BASE_OFFSET;
+		*queue_len = MME_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC0:
+		offset = TPC0_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC1:
+		offset = TPC1_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC2:
+		offset = TPC2_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC3:
+		offset = TPC3_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC4:
+		offset = TPC4_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC5:
+		offset = TPC5_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC6:
+		offset = TPC6_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	case GOYA_QUEUE_ID_TPC7:
+		offset = TPC7_QMAN_BASE_OFFSET;
+		*queue_len = TPC_QMAN_LENGTH;
+		break;
+	default:
+		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
+		return NULL;
+	}
+
+	base += offset;
+	*dma_handle += offset;
+
+	return base;
+}
+
+int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
+				u32 timeout, long *result)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	struct armcp_packet *pkt;
+	dma_addr_t pkt_dma_addr;
+	u32 tmp;
+	int rc = 0;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
+		if (result)
+			*result = 0;
+		return 0;
+	}
+
+	if (len > CPU_CB_SIZE) {
+		dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
+			len);
+		return -ENOMEM;
+	}
+
+	pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
+								&pkt_dma_addr);
+	if (!pkt) {
+		dev_err(hdev->dev,
+			"Failed to allocate DMA memory for packet to CPU\n");
+		return -ENOMEM;
+	}
+
+	memcpy(pkt, msg, len);
+
+	mutex_lock(&hdev->send_cpu_message_lock);
+
+	if (hdev->disabled)
+		goto out;
+
+	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len,
+			pkt_dma_addr);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
+		goto out;
+	}
+
+	rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
+					timeout, &tmp);
+
+	hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ);
+
+	if (rc == -ETIMEDOUT) {
+		dev_err(hdev->dev,
+			"Timeout while waiting for CPU packet fence\n");
+		goto out;
+	}
+
+	if (tmp == ARMCP_PACKET_FENCE_VAL) {
+		rc = (pkt->ctl & ARMCP_PKT_CTL_RC_MASK) >>
+						ARMCP_PKT_CTL_RC_SHIFT;
+		if (rc) {
+			dev_err(hdev->dev,
+				"F/W ERROR %d for CPU packet %d\n",
+				rc, (pkt->ctl & ARMCP_PKT_CTL_OPCODE_MASK)
+						>> ARMCP_PKT_CTL_OPCODE_SHIFT);
+			rc = -EINVAL;
+		} else if (result) {
+			*result = pkt->result;
+		}
+	} else {
+		dev_err(hdev->dev, "CPU packet wrong fence value\n");
+		rc = -EINVAL;
+	}
+
+out:
+	mutex_unlock(&hdev->send_cpu_message_lock);
+
+	hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
+
+	return rc;
+}
+
+int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
+{
+	struct packet_msg_prot *fence_pkt;
+	dma_addr_t pkt_dma_addr;
+	u32 fence_val, tmp;
+	dma_addr_t fence_dma_addr;
+	u32 *fence_ptr;
+	int rc;
+
+	fence_val = GOYA_QMAN0_FENCE_VAL;
+
+	fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
+							&fence_dma_addr);
+	if (!fence_ptr) {
+		dev_err(hdev->dev,
+			"Failed to allocate memory for queue testing\n");
+		return -ENOMEM;
+	}
+
+	*fence_ptr = 0;
+
+	fence_pkt = hdev->asic_funcs->dma_pool_zalloc(hdev,
+					sizeof(struct packet_msg_prot),
+					GFP_KERNEL, &pkt_dma_addr);
+	if (!fence_pkt) {
+		dev_err(hdev->dev,
+			"Failed to allocate packet for queue testing\n");
+		rc = -ENOMEM;
+		goto free_fence_ptr;
+	}
+
+	fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+			(1 << GOYA_PKT_CTL_EB_SHIFT) |
+			(1 << GOYA_PKT_CTL_MB_SHIFT);
+	fence_pkt->value = fence_val;
+	fence_pkt->addr = fence_dma_addr +
+				hdev->asic_prop.host_phys_base_address;
+
+	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
+					sizeof(struct packet_msg_prot),
+					pkt_dma_addr);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to send fence packet\n");
+		goto free_pkt;
+	}
+
+	rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
+					GOYA_TEST_QUEUE_WAIT_USEC, &tmp);
+
+	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+
+	if ((!rc) && (tmp == fence_val)) {
+		dev_info(hdev->dev,
+			"queue test on H/W queue %d succeeded\n",
+			hw_queue_id);
+	} else {
+		dev_err(hdev->dev,
+			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
+			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
+		rc = -EINVAL;
+	}
+
+free_pkt:
+	hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_pkt,
+					pkt_dma_addr);
+free_fence_ptr:
+	hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
+					fence_dma_addr);
+	return rc;
+}
+
+int goya_test_cpu_queue(struct hl_device *hdev)
+{
+	struct armcp_packet test_pkt;
+	long result;
+	int rc;
+
+	/* cpu_queues_enable flag is always checked in send cpu message */
+
+	memset(&test_pkt, 0, sizeof(test_pkt));
+
+	test_pkt.ctl = ARMCP_PACKET_TEST << ARMCP_PKT_CTL_OPCODE_SHIFT;
+	test_pkt.value = ARMCP_PACKET_FENCE_VAL;
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
+			sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+	if (!rc)
+		dev_info(hdev->dev, "queue test on CPU queue succeeded\n");
+	else
+		dev_err(hdev->dev, "CPU queue test failed (0x%08lX)\n", result);
+
+	return rc;
+}
+
+static int goya_test_queues(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	int i, rc, ret_val = 0;
+
+	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
+		rc = goya_test_queue(hdev, i);
+		if (rc)
+			ret_val = -EINVAL;
+	}
+
+	if (hdev->cpu_queues_enable) {
+		rc = goya->test_cpu_queue(hdev);
+		if (rc)
+			ret_val = -EINVAL;
+	}
+
+	return ret_val;
+}
+
+void *goya_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
+				dma_addr_t *dma_handle)
+{
+	if (size > GOYA_DMA_POOL_BLK_SIZE)
+		return NULL;
+
+	return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
+}
+
+void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
+			dma_addr_t dma_addr)
+{
+	dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
+}
+
+void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+			dma_addr_t *dma_handle)
+{
+	u64 kernel_addr;
+
+	/* roundup to CPU_PKT_SIZE */
+	size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
+
+	kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
+
+	*dma_handle = hdev->cpu_accessible_dma_address +
+		(kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
+
+	return (void *) (uintptr_t) kernel_addr;
+}
+
+void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+			void *vaddr)
+{
+	/* roundup to CPU_PKT_SIZE */
+	size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
+
+	gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
+			size);
+}
+
+
+static void goya_hw_queues_lock(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	spin_lock(&goya->hw_queues_lock);
+}
+
+static void goya_hw_queues_unlock(struct hl_device *hdev)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	spin_unlock(&goya->hw_queues_lock);
+}
+
 static const struct hl_asic_funcs goya_funcs = {
 	.early_init = goya_early_init,
 	.early_fini = goya_early_fini,
@@ -1512,8 +2804,19 @@ static const struct hl_asic_funcs goya_funcs = {
 	.resume = goya_resume,
 	.mmap = goya_mmap,
 	.cb_mmap = goya_cb_mmap,
+	.ring_doorbell = goya_ring_doorbell,
+	.flush_pq_write = goya_flush_pq_write,
 	.dma_alloc_coherent = goya_dma_alloc_coherent,
 	.dma_free_coherent = goya_dma_free_coherent,
+	.get_int_queue_base = goya_get_int_queue_base,
+	.test_queues = goya_test_queues,
+	.dma_pool_zalloc = goya_dma_pool_zalloc,
+	.dma_pool_free = goya_dma_pool_free,
+	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
+	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
+	.hw_queues_lock = goya_hw_queues_lock,
+	.hw_queues_unlock = goya_hw_queues_unlock,
+	.send_cpu_message = goya_send_cpu_message
 };
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 65cbb45d7083..791605cbecfe 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -11,7 +11,9 @@
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 #include "include/hl_boot_if.h"
+#include "include/goya/goya_packets.h"
 #include "include/goya/goya.h"
+#include "include/goya/goya_async_events.h"
 #include "include/goya/goya_fw_if.h"
 
 #define NUMBER_OF_CMPLT_QUEUES		5
@@ -145,12 +147,17 @@ enum goya_fw_component {
 };
 
 struct goya_device {
+	int (*test_cpu_queue)(struct hl_device *hdev);
+
 	/* TODO: remove hw_queues_lock after moving to scheduler code */
 	spinlock_t	hw_queues_lock;
 	u64		ddr_bar_cur_addr;
 	u32		hw_cap_initialized;
 };
 
+int goya_test_cpu_queue(struct hl_device *hdev);
+int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
+				u32 timeout, long *result);
 void goya_init_security(struct hl_device *hdev);
 
 #endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index e099f7a9dac2..2121babbebdc 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -9,6 +9,7 @@
 #define HABANALABSP_H_
 
 #include "include/armcp_if.h"
+#include "include/qman_if.h"
 
 #define pr_fmt(fmt)			"habanalabs: " fmt
 
@@ -26,9 +27,36 @@
 struct hl_device;
 struct hl_fpriv;
 
+/**
+ * enum hl_queue_type - Supported QUEUE types.
+ * @QUEUE_TYPE_NA: queue is not available.
+ * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the
+ *                  host.
+ * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
+ *			memories and/or operates the compute engines.
+ * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
+ */
+enum hl_queue_type {
+	QUEUE_TYPE_NA,
+	QUEUE_TYPE_EXT,
+	QUEUE_TYPE_INT,
+	QUEUE_TYPE_CPU
+};
+
+/**
+ * struct hw_queue_properties - queue information.
+ * @type: queue type.
+ * @kmd_only: true if only KMD is allowed to send a job to this queue, false
+ *            otherwise.
+ */
+struct hw_queue_properties {
+	enum hl_queue_type	type;
+	u8			kmd_only;
+};
 
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
+ * @hw_queues_props: H/W queues properties.
  * @uboot_ver: F/W U-boot version.
  * @preboot_ver: F/W Preboot version.
  * @sram_base_address: SRAM physical start address.
@@ -59,6 +87,7 @@ struct hl_fpriv;
  * @tpc_enabled_mask: which TPCs are enabled.
  */
 struct asic_fixed_properties {
+	struct hw_queue_properties	hw_queues_props[HL_MAX_QUEUES];
 	char			uboot_ver[VERSION_MAX_LEN];
 	char			preboot_ver[VERSION_MAX_LEN];
 	u64			sram_base_address;
@@ -132,7 +161,89 @@ struct hl_cb {
 };
 
 
+/*
+ * QUEUES
+ */
+
+struct hl_cs_job;
+
+/*
+ * Currently, there are two limitations on the maximum length of a queue:
+ *
+ * 1. The memory footprint of the queue. The current allocated space for the
+ *    queue is PAGE_SIZE. Because each entry in the queue is HL_BD_SIZE,
+ *    the maximum length of the queue can be PAGE_SIZE / HL_BD_SIZE,
+ *    which currently is 4096/16 = 256 entries.
+ *
+ *    To increase that, we need either to decrease the size of the
+ *    BD (difficult), or allocate more than a single page (easier).
+ *
+ * 2. Because the size of the JOB handle field in the BD CTL / completion queue
+ *    is 10-bit, we can have up to 1024 open jobs per hardware queue.
+ *    Therefore, each queue can hold up to 1024 entries.
+ *
+ * HL_QUEUE_LENGTH is in units of struct hl_bd.
+ * HL_QUEUE_LENGTH * sizeof(struct hl_bd) should be <= HL_PAGE_SIZE
+ */
+
+#define HL_PAGE_SIZE			4096 /* minimum page size */
+/* Must be power of 2 (HL_PAGE_SIZE / HL_BD_SIZE) */
 #define HL_QUEUE_LENGTH			256
+#define HL_QUEUE_SIZE_IN_BYTES		(HL_QUEUE_LENGTH * HL_BD_SIZE)
+
+/*
+ * HL_CQ_LENGTH is in units of struct hl_cq_entry.
+ * HL_CQ_LENGTH should be <= HL_PAGE_SIZE
+ */
+#define HL_CQ_LENGTH			HL_QUEUE_LENGTH
+#define HL_CQ_SIZE_IN_BYTES		(HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
+
+
+
+/**
+ * struct hl_hw_queue - describes a H/W transport queue.
+ * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
+ * @queue_type: type of queue.
+ * @kernel_address: holds the queue's kernel virtual address.
+ * @bus_address: holds the queue's DMA address.
+ * @pi: holds the queue's pi value.
+ * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci).
+ * @hw_queue_id: the id of the H/W queue.
+ * @int_queue_len: length of internal queue (number of entries).
+ * @valid: is the queue valid (we have array of 32 queues, not all of them
+ *		exists).
+ */
+struct hl_hw_queue {
+	struct hl_cs_job	**shadow_queue;
+	enum hl_queue_type	queue_type;
+	u64			kernel_address;
+	dma_addr_t		bus_address;
+	u32			pi;
+	u32			ci;
+	u32			hw_queue_id;
+	u16			int_queue_len;
+	u8			valid;
+};
+
+/**
+ * struct hl_cq - describes a completion queue
+ * @hdev: pointer to the device structure
+ * @kernel_address: holds the queue's kernel virtual address
+ * @bus_address: holds the queue's DMA address
+ * @hw_queue_id: the id of the matching H/W queue
+ * @ci: ci inside the queue
+ * @pi: pi inside the queue
+ * @free_slots_cnt: counter of free slots in queue
+ */
+struct hl_cq {
+	struct hl_device	*hdev;
+	u64			kernel_address;
+	dma_addr_t		bus_address;
+	u32			hw_queue_id;
+	u32			ci;
+	u32			pi;
+	atomic_t		free_slots_cnt;
+};
 
 
 /*
@@ -164,6 +275,8 @@ enum hl_asic_type {
  * @resume: handles IP specific H/W or SW changes for resume.
  * @mmap: mmap function, does nothing.
  * @cb_mmap: maps a CB.
+ * @ring_doorbell: increment PI on a given QMAN.
+ * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
  * @dma_alloc_coherent: Allocate coherent DMA memory by calling
  *                      dma_alloc_coherent(). This is ASIC function because its
  *                      implementation is not trivial when the driver is loaded
@@ -172,6 +285,16 @@ enum hl_asic_type {
  *                     This is ASIC function because its implementation is not
  *                     trivial when the driver is loaded in simulation mode
  *                     (not upstreamed).
+ * @get_int_queue_base: get the internal queue base address.
+ * @test_queues: run simple test on all queues for sanity check.
+ * @dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool.
+ *                   size of allocation is HL_DMA_POOL_BLK_SIZE.
+ * @dma_pool_free: free small DMA allocation from pool.
+ * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
+ * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ * @hw_queues_lock: acquire H/W queues lock.
+ * @hw_queues_unlock: release H/W queues lock.
+ * @send_cpu_message: send buffer to ArmCP.
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -185,10 +308,27 @@ struct hl_asic_funcs {
 	int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			u64 kaddress, phys_addr_t paddress, u32 size);
+	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
+	void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
 	void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flag);
 	void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
 					void *cpu_addr, dma_addr_t dma_handle);
+	void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
+				dma_addr_t *dma_handle, u16 *queue_len);
+	int (*test_queues)(struct hl_device *hdev);
+	void* (*dma_pool_zalloc)(struct hl_device *hdev, size_t size,
+				gfp_t mem_flags, dma_addr_t *dma_handle);
+	void (*dma_pool_free)(struct hl_device *hdev, void *vaddr,
+				dma_addr_t dma_addr);
+	void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
+				size_t size, dma_addr_t *dma_handle);
+	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
+				size_t size, void *vaddr);
+	void (*hw_queues_lock)(struct hl_device *hdev);
+	void (*hw_queues_unlock)(struct hl_device *hdev);
+	int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
+				u16 len, u32 timeout, long *result);
 };
 
 
@@ -224,6 +364,17 @@ struct hl_ctx_mgr {
 };
 
 
+
+
+/**
+ * struct hl_cs_job - command submission job.
+ * @finish_work: workqueue object to run when job is completed.
+ * @id: the id of this job inside a CS.
+ */
+struct hl_cs_job {
+	struct work_struct	finish_work;
+	u32			id;
+};
 /*
  * FILE PRIVATE STRUCTURE
  */
@@ -298,7 +449,11 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
  * @dev: realted kernel basic device structure.
  * @asic_name: ASIC specific nmae.
  * @asic_type: ASIC specific type.
+ * @completion_queue: array of hl_cq.
+ * @cq_wq: work queue of completion queues for executing work in process context
+ * @eq_wq: work queue of event queue for executing work in process context.
  * @kernel_ctx: KMD context structure.
+ * @kernel_queues: array of hl_hw_queue.
  * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
  * @dma_pool: DMA pool for small allocations.
  * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
@@ -312,6 +467,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
  *                    only a single process at a time. In addition, we need a
  *                    lock here so we can flush user processes which are opening
  *                    the device while we are trying to hard reset it
+ * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
@@ -331,7 +487,10 @@ struct hl_device {
 	struct device			*dev;
 	char				asic_name[16];
 	enum hl_asic_type		asic_type;
+	struct hl_cq			*completion_queue;
+	struct workqueue_struct		*cq_wq;
 	struct hl_ctx			*kernel_ctx;
+	struct hl_hw_queue		*kernel_queues;
 	struct hl_cb_mgr		kernel_cb_mgr;
 	struct dma_pool			*dma_pool;
 	void				*cpu_accessible_dma_mem;
@@ -341,6 +500,7 @@ struct hl_device {
 	struct mutex			asid_mutex;
 	/* TODO: remove fd_open_cnt_lock for multiple process support */
 	struct mutex			fd_open_cnt_lock;
+	struct mutex			send_cpu_message_lock;
 	struct asic_fixed_properties	asic_prop;
 	const struct hl_asic_funcs	*asic_funcs;
 	void				*asic_specific;
@@ -358,6 +518,7 @@ struct hl_device {
 	/* Parameters for bring-up */
 	u8				cpu_enable;
 	u8				reset_pcilink;
+	u8				cpu_queues_enable;
 	u8				fw_loading;
 	u8				pldm;
 };
@@ -400,7 +561,18 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr, u32 timeout_us,
 				u32 *val);
 int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
 				u32 timeout_us, u32 *val);
-
+int hl_hw_queues_create(struct hl_device *hdev);
+void hl_hw_queues_destroy(struct hl_device *hdev);
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+				u32 cb_size, u64 cb_ptr);
+u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+
+#define hl_queue_inc_ptr(p)		hl_hw_queue_add_ptr(p, 1)
+#define hl_pi_2_offset(pi)		((pi) & (HL_QUEUE_LENGTH - 1))
+
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
 int hl_asid_init(struct hl_device *hdev);
 void hl_asid_fini(struct hl_device *hdev);
 unsigned long hl_asid_alloc(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 59c2fd196659..93576249307b 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -169,6 +169,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	/* Parameters for bring-up - set them to defaults */
 	hdev->cpu_enable = 1;
 	hdev->reset_pcilink = 0;
+	hdev->cpu_queues_enable = 1;
 	hdev->fw_loading = 1;
 	hdev->pldm = 0;
 
@@ -176,6 +177,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	if (!hdev->cpu_enable)
 		hdev->fw_loading = 0;
 
+	/* If we don't load FW, no need to initialize CPU queues */
+	if (!hdev->fw_loading)
+		hdev->cpu_queues_enable = 0;
+
 	hdev->disabled = true;
 	hdev->pdev = pdev; /* can be NULL in case of simulator device */
 
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
new file mode 100644
index 000000000000..2ec43f36cdb8
--- /dev/null
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -0,0 +1,400 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/slab.h>
+
+/*
+ * hl_queue_add_ptr - add to pi or ci and checks if it wraps around
+ *
+ * @ptr: the current pi/ci value
+ * @val: the amount to add
+ *
+ * Add val to ptr. It can go until twice the queue length.
+ */
+inline u32 hl_hw_queue_add_ptr(u32 ptr, u16 val)
+{
+	ptr += val;
+	ptr &= ((HL_QUEUE_LENGTH << 1) - 1);
+	return ptr;
+}
+
+static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
+{
+	int delta = (q->pi - q->ci);
+
+	if (delta >= 0)
+		return (queue_len - delta);
+	else
+		return (abs(delta) - queue_len);
+}
+
+/*
+ * ext_queue_submit_bd - Submit a buffer descriptor to an external queue
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @q: pointer to habanalabs queue structure
+ * @ctl: BD's control word
+ * @len: BD's length
+ * @ptr: BD's pointer
+ *
+ * This function assumes there is enough space on the queue to submit a new
+ * BD to it. It initializes the next BD and calls the device specific
+ * function to set the pi (and doorbell)
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+				u32 ctl, u32 len, u64 ptr)
+{
+	struct hl_bd *bd;
+
+	bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
+	bd += hl_pi_2_offset(q->pi);
+	bd->ctl = ctl;
+	bd->len = len;
+	bd->ptr = ptr + hdev->asic_prop.host_phys_base_address;
+
+	q->pi = hl_queue_inc_ptr(q->pi);
+	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * ext_queue_sanity_checks - perform some sanity checks on external queue
+ *
+ * @hdev              : pointer to hl_device structure
+ * @q                 :	pointer to hl_hw_queue structure
+ * @num_of_entries    : how many entries to check for space
+ * @reserve_cq_entry  :	whether to reserve an entry in the cq
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ * - Make sure we have enough space in the completion queue
+ * - Reserve space in the completion queue (needs to be reversed if there
+ *   is a failure down the road before the actual submission of work). Only
+ *   do this action if reserve_cq_entry is true
+ *
+ */
+static int ext_queue_sanity_checks(struct hl_device *hdev,
+				struct hl_hw_queue *q, int num_of_entries,
+				bool reserve_cq_entry)
+{
+	atomic_t *free_slots =
+			&hdev->completion_queue[q->hw_queue_id].free_slots_cnt;
+	int free_slots_cnt;
+
+	/* Check we have enough space in the queue */
+	free_slots_cnt = queue_free_slots(q, HL_QUEUE_LENGTH);
+
+	if (free_slots_cnt < num_of_entries) {
+		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+			q->hw_queue_id, num_of_entries);
+		return -EAGAIN;
+	}
+
+	if (reserve_cq_entry) {
+		/*
+		 * Check we have enough space in the completion queue
+		 * Add -1 to counter (decrement) unless counter was already 0
+		 * In that case, CQ is full so we can't submit a new CB because
+		 * we won't get ack on its completion
+		 * atomic_add_unless will return 0 if counter was already 0
+		 */
+		if (atomic_add_negative(num_of_entries * -1, free_slots)) {
+			dev_dbg(hdev->dev, "No space for %d on CQ %d\n",
+				num_of_entries, q->hw_queue_id);
+			atomic_add(num_of_entries, free_slots);
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: Queue's type
+ * @cb_size: size of CB
+ * @cb_ptr: pointer to CB location
+ *
+ * This function sends a single CB, that must NOT generate a completion entry
+ *
+ */
+int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
+				u32 cb_size, u64 cb_ptr)
+{
+	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+	int rc;
+
+	/*
+	 * The CPU queue is a synchronous queue with an effective depth of
+	 * a single entry (although it is allocated with room for multiple
+	 * entries). Therefore, there is a different lock, called
+	 * send_cpu_message_lock, that serializes accesses to the CPU queue.
+	 * As a result, we don't need to lock the access to the entire H/W
+	 * queues module when submitting a JOB to the CPU queue
+	 */
+	if (q->queue_type != QUEUE_TYPE_CPU)
+		hdev->asic_funcs->hw_queues_lock(hdev);
+
+	if (hdev->disabled) {
+		rc = -EPERM;
+		goto out;
+	}
+
+	rc = ext_queue_sanity_checks(hdev, q, 1, false);
+	if (rc)
+		goto out;
+
+	ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+
+out:
+	if (q->queue_type != QUEUE_TYPE_CPU)
+		hdev->asic_funcs->hw_queues_unlock(hdev);
+
+	return rc;
+}
+
+/*
+ * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
+ *
+ * @hdev: pointer to hl_device structure
+ * @hw_queue_id: which queue to increment its ci
+ */
+void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
+{
+	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
+
+	q->ci = hl_queue_inc_ptr(q->ci);
+}
+
+static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
+					struct hl_hw_queue *q)
+{
+	void *p;
+	int rc;
+
+	p = hdev->asic_funcs->dma_alloc_coherent(hdev,
+				HL_QUEUE_SIZE_IN_BYTES,
+				&q->bus_address, GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	q->kernel_address = (u64) (uintptr_t) p;
+
+	q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
+					sizeof(*q->shadow_queue),
+					GFP_KERNEL);
+	if (!q->shadow_queue) {
+		dev_err(hdev->dev,
+			"Failed to allocate shadow queue for H/W queue %d\n",
+			q->hw_queue_id);
+		rc = -ENOMEM;
+		goto free_queue;
+	}
+
+	/* Make sure read/write pointers are initialized to start of queue */
+	q->ci = 0;
+	q->pi = 0;
+
+	return 0;
+
+free_queue:
+	hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
+			(void *) (uintptr_t) q->kernel_address, q->bus_address);
+
+	return rc;
+}
+
+static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+	void *p;
+
+	p = hdev->asic_funcs->get_int_queue_base(hdev, q->hw_queue_id,
+					&q->bus_address, &q->int_queue_len);
+	if (!p) {
+		dev_err(hdev->dev,
+			"Failed to get base address for internal queue %d\n",
+			q->hw_queue_id);
+		return -EFAULT;
+	}
+
+	q->kernel_address = (u64) (uintptr_t) p;
+	q->pi = 0;
+	q->ci = 0;
+
+	return 0;
+}
+
+static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+	return ext_and_cpu_hw_queue_init(hdev, q);
+}
+
+static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+	return ext_and_cpu_hw_queue_init(hdev, q);
+}
+
+/*
+ * hw_queue_init - main initialization function for H/W queue object
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ * @hw_queue_id: The id of the H/W queue
+ *
+ * Allocate dma-able memory for the queue and initialize fields
+ * Returns 0 on success
+ */
+static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+			u32 hw_queue_id)
+{
+	int rc;
+
+	BUILD_BUG_ON(HL_QUEUE_SIZE_IN_BYTES > HL_PAGE_SIZE);
+
+	q->hw_queue_id = hw_queue_id;
+
+	switch (q->queue_type) {
+	case QUEUE_TYPE_EXT:
+		rc = ext_hw_queue_init(hdev, q);
+		break;
+
+	case QUEUE_TYPE_INT:
+		rc = int_hw_queue_init(hdev, q);
+		break;
+
+	case QUEUE_TYPE_CPU:
+		rc = cpu_hw_queue_init(hdev, q);
+		break;
+
+	case QUEUE_TYPE_NA:
+		q->valid = 0;
+		return 0;
+
+	default:
+		dev_crit(hdev->dev, "wrong queue type %d during init\n",
+			q->queue_type);
+		rc = -EINVAL;
+		break;
+	}
+
+	if (rc)
+		return rc;
+
+	q->valid = 1;
+
+	return 0;
+}
+
+/*
+ * hw_queue_fini - destroy queue
+ *
+ * @hdev: pointer to hl_device device structure
+ * @q: pointer to hl_hw_queue queue structure
+ *
+ * Free the queue memory
+ */
+static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+	if (!q->valid)
+		return;
+
+	/*
+	 * If we arrived here, there are no jobs waiting on this queue
+	 * so we can safely remove it.
+	 * This is because this function can only called when:
+	 * 1. Either a context is deleted, which only can occur if all its
+	 *    jobs were finished
+	 * 2. A context wasn't able to be created due to failure or timeout,
+	 *    which means there are no jobs on the queue yet
+	 *
+	 * The only exception are the queues of the kernel context, but
+	 * if they are being destroyed, it means that the entire module is
+	 * being removed. If the module is removed, it means there is no open
+	 * user context. It also means that if a job was submitted by
+	 * the kernel driver (e.g. context creation), the job itself was
+	 * released by the kernel driver when a timeout occurred on its
+	 * Completion. Thus, we don't need to release it again.
+	 */
+
+	if (q->queue_type == QUEUE_TYPE_INT)
+		return;
+
+	kfree(q->shadow_queue);
+
+	hdev->asic_funcs->dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES,
+			(void *) (uintptr_t) q->kernel_address, q->bus_address);
+}
+
+int hl_hw_queues_create(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *asic = &hdev->asic_prop;
+	struct hl_hw_queue *q;
+	int i, rc, q_ready_cnt;
+
+	hdev->kernel_queues = kcalloc(HL_MAX_QUEUES,
+				sizeof(*hdev->kernel_queues), GFP_KERNEL);
+
+	if (!hdev->kernel_queues) {
+		dev_err(hdev->dev, "Not enough memory for H/W queues\n");
+		return -ENOMEM;
+	}
+
+	/* Initialize the H/W queues */
+	for (i = 0, q_ready_cnt = 0, q = hdev->kernel_queues;
+			i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
+
+		q->queue_type = asic->hw_queues_props[i].type;
+		rc = hw_queue_init(hdev, q, i);
+		if (rc) {
+			dev_err(hdev->dev,
+				"failed to initialize queue %d\n", i);
+			goto release_queues;
+		}
+	}
+
+	return 0;
+
+release_queues:
+	for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
+		hw_queue_fini(hdev, q);
+
+	kfree(hdev->kernel_queues);
+
+	return rc;
+}
+
+void hl_hw_queues_destroy(struct hl_device *hdev)
+{
+	struct hl_hw_queue *q;
+	int i;
+
+	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
+		hw_queue_fini(hdev, q);
+
+	kfree(hdev->kernel_queues);
+}
+
+void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset)
+{
+	struct hl_hw_queue *q;
+	int i;
+
+	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++) {
+		if ((!q->valid) ||
+			((!hard_reset) && (q->queue_type == QUEUE_TYPE_CPU)))
+			continue;
+		q->pi = q->ci = 0;
+	}
+}
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index 85fc2efe144b..cc37003aa6b7 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -10,10 +10,302 @@
 
 #include <linux/types.h>
 
+enum pq_init_status {
+	PQ_INIT_STATUS_NA = 0,
+	PQ_INIT_STATUS_READY_FOR_CP,
+	PQ_INIT_STATUS_READY_FOR_HOST
+};
+
+/*
+ * ArmCP Primary Queue Packets
+ *
+ * During normal operation, KMD needs to send various messages to ArmCP,
+ * usually either to SET some value into a H/W periphery or to GET the current
+ * value of some H/W periphery. For example, SET the frequency of MME/TPC and
+ * GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by KMD
+ * itself, e.g. power management code. In either case, the communication from
+ * KMD to ArmCP will *always* be in synchronous mode, meaning that KMD will
+ * send a single message and poll until the message was acknowledged and the
+ * results are ready (if results are needed).
+ *
+ * This means that only a single message can be sent at a time and KMD must
+ * wait for its result before sending the next message. Having said that,
+ * because these are control messages which are sent in a relatively low
+ * frequency, this limitation seems acceptable. It's important to note that
+ * in case of multiple devices, messages to different devices *can* be sent
+ * at the same time.
+ *
+ * The message, inputs/outputs (if relevant) and fence object will be located
+ * on the device DDR at an address that will be determined by KMD. During
+ * device initialization phase, KMD will pass to ArmCP that address.  Most of
+ * the message types will contain inputs/outputs inside the message itself.
+ * The common part of each message will contain the opcode of the message (its
+ * type) and a field representing a fence object.
+ *
+ * When KMD wishes to send a message to ArmCP, it will write the message
+ * contents to the device DDR, clear the fence object and then write the
+ * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
+ * the 484 interrupt-id to the ARM core.
+ *
+ * Upon receiving the 484 interrupt-id, ArmCP will read the message from the
+ * DDR. In case the message is a SET operation, ArmCP will first perform the
+ * operation and then write to the fence object on the device DDR. In case the
+ * message is a GET operation, ArmCP will first fill the results section on the
+ * device DDR and then write to the fence object. If an error occurred, ArmCP
+ * will fill the rc field with the right error code.
+ *
+ * In the meantime, KMD will poll on the fence object. Once KMD sees that the
+ * fence object is signaled, it will read the results from the device DDR
+ * (if relevant) and resume the code execution in KMD.
+ *
+ * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
+ * so the value being put by the KMD matches the value read by ArmCP
+ *
+ * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
+ *
+ * Detailed description:
+ *
+ * ARMCP_PACKET_DISABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU must NOT issue PCI
+ *       transactions (read/write) towards the Host CPU. This also include
+ *       sending MSI-X interrupts.
+ *       This packet is usually sent before the device is moved to D3Hot state.
+ *
+ * ARMCP_PACKET_ENABLE_PCI_ACCESS -
+ *       After receiving this packet the embedded CPU is allowed to issue PCI
+ *       transactions towards the Host CPU, including sending MSI-X interrupts.
+ *       This packet is usually send after the device is moved to D0 state.
+ *
+ * ARMCP_PACKET_TEMPERATURE_GET -
+ *       Fetch the current temperature / Max / Max Hyst / Critical /
+ *       Critical Hyst of a specified thermal sensor. The packet's
+ *       arguments specify the desired sensor and the field to get.
+ *
+ * ARMCP_PACKET_VOLTAGE_GET -
+ *       Fetch the voltage / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_CURRENT_GET -
+ *       Fetch the current / Max / Min of a specified sensor. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_FAN_SPEED_GET -
+ *       Fetch the speed / Max / Min of a specified fan. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_GET -
+ *       Fetch the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor and type.
+ *
+ * ARMCP_PACKET_PWM_SET -
+ *       Set the pwm value / mode of a specified pwm. The packet's
+ *       arguments specify the sensor, type and value.
+ *
+ * ARMCP_PACKET_FREQUENCY_SET -
+ *       Set the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL and the desired frequency. The actual frequency in the device
+ *       might differ from the requested frequency.
+ *
+ * ARMCP_PACKET_FREQUENCY_GET -
+ *       Fetch the frequency of a specified PLL. The packet's arguments specify
+ *       the PLL.
+ *
+ * ARMCP_PACKET_LED_SET -
+ *       Set the state of a specified led. The packet's arguments
+ *       specify the led and the desired state.
+ *
+ * ARMCP_PACKET_I2C_WR -
+ *       Write 32-bit value to I2C device. The packet's arguments specify the
+ *       I2C bus, address and value.
+ *
+ * ARMCP_PACKET_I2C_RD -
+ *       Read 32-bit value from I2C device. The packet's arguments specify the
+ *       I2C bus and address.
+ *
+ * ARMCP_PACKET_INFO_GET -
+ *       Fetch information from the device as specified in the packet's
+ *       structure. KMD passes the max size it allows the ArmCP to write to
+ *       the structure, to prevent data corruption in case of mismatched
+ *       KMD/FW versions.
+ *
+ * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ -
+ *       Unmask the given IRQ. The IRQ number is specified in the value field.
+ *       The packet is sent after receiving an interrupt and printing its
+ *       relevant information.
+ *
+ * ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY -
+ *       Unmask the given IRQs. The IRQs numbers are specified in an array right
+ *       after the armcp_packet structure, where its first element is the array
+ *       length. The packet is sent after a soft reset was done in order to
+ *       handle any interrupts that were sent during the reset process.
+ *
+ * ARMCP_PACKET_TEST -
+ *       Test packet for ArmCP connectivity. The CPU will put the fence value
+ *       in the result field.
+ *
+ * ARMCP_PACKET_FREQUENCY_CURR_GET -
+ *       Fetch the current frequency of a specified PLL. The packet's arguments
+ *       specify the PLL.
+ *
+ * ARMCP_PACKET_MAX_POWER_GET -
+ *       Fetch the maximal power of the device.
+ *
+ * ARMCP_PACKET_MAX_POWER_SET -
+ *       Set the maximal power of the device. The packet's arguments specify
+ *       the power.
+ *
+ * ARMCP_PACKET_EEPROM_DATA_GET -
+ *       Get EEPROM data from the ArmCP kernel. The buffer is specified in the
+ *       addr field. The CPU will put the returned data size in the result
+ *       field. In addition, KMD passes the max size it allows the ArmCP to
+ *       write to the structure, to prevent data corruption in case of
+ *       mismatched KMD/FW versions.
+ *
+ */
+
+enum armcp_packet_id {
+	ARMCP_PACKET_DISABLE_PCI_ACCESS = 1,	/* internal */
+	ARMCP_PACKET_ENABLE_PCI_ACCESS,		/* internal */
+	ARMCP_PACKET_TEMPERATURE_GET,		/* sysfs */
+	ARMCP_PACKET_VOLTAGE_GET,		/* sysfs */
+	ARMCP_PACKET_CURRENT_GET,		/* sysfs */
+	ARMCP_PACKET_FAN_SPEED_GET,		/* sysfs */
+	ARMCP_PACKET_PWM_GET,			/* sysfs */
+	ARMCP_PACKET_PWM_SET,			/* sysfs */
+	ARMCP_PACKET_FREQUENCY_SET,		/* sysfs */
+	ARMCP_PACKET_FREQUENCY_GET,		/* sysfs */
+	ARMCP_PACKET_LED_SET,			/* debugfs */
+	ARMCP_PACKET_I2C_WR,			/* debugfs */
+	ARMCP_PACKET_I2C_RD,			/* debugfs */
+	ARMCP_PACKET_INFO_GET,			/* IOCTL */
+	ARMCP_PACKET_FLASH_PROGRAM_REMOVED,
+	ARMCP_PACKET_UNMASK_RAZWI_IRQ,		/* internal */
+	ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY,	/* internal */
+	ARMCP_PACKET_TEST,			/* internal */
+	ARMCP_PACKET_FREQUENCY_CURR_GET,	/* sysfs */
+	ARMCP_PACKET_MAX_POWER_GET,		/* sysfs */
+	ARMCP_PACKET_MAX_POWER_SET,		/* sysfs */
+	ARMCP_PACKET_EEPROM_DATA_GET,		/* sysfs */
+};
+
+#define ARMCP_PACKET_FENCE_VAL	0xFE8CE7A5
+
+#define ARMCP_PKT_CTL_RC_SHIFT		12
+#define ARMCP_PKT_CTL_RC_MASK		0x0000F000
+
+#define ARMCP_PKT_CTL_OPCODE_SHIFT	16
+#define ARMCP_PKT_CTL_OPCODE_MASK	0x1FFF0000
+
+struct armcp_packet {
+	union {
+		__le64 value;	/* For SET packets */
+		__le64 result;	/* For GET packets */
+		__le64 addr;	/* For PQ */
+	};
+
+	__le32 ctl;
+
+	__le32 fence;		/* Signal to KMD that message is completed */
+
+	union {
+		struct {/* For temperature/current/voltage/fan/pwm get/set */
+			__le16 sensor_index;
+			__le16 type;
+		};
+
+		struct {	/* For I2C read/write */
+			__u8 i2c_bus;
+			__u8 i2c_addr;
+			__u8 i2c_reg;
+			__u8 pad; /* unused */
+		};
+
+		/* For frequency get/set */
+		__le32 pll_index;
+
+		/* For led set */
+		__le32 led_index;
+
+		/* For get Armcp info/EEPROM data */
+		__le32 data_max_size;
+	};
+};
+
+struct armcp_unmask_irq_arr_packet {
+	struct armcp_packet armcp_pkt;
+	__le32 length;
+	__le32 irqs[0];
+};
+
+enum armcp_packet_rc {
+	armcp_packet_success,
+	armcp_packet_invalid,
+	armcp_packet_fault
+};
+
+enum armcp_temp_type {
+	armcp_temp_input,
+	armcp_temp_max = 6,
+	armcp_temp_max_hyst,
+	armcp_temp_crit,
+	armcp_temp_crit_hyst
+};
+
+enum armcp_in_attributes {
+	armcp_in_input,
+	armcp_in_min,
+	armcp_in_max
+};
+
+enum armcp_curr_attributes {
+	armcp_curr_input,
+	armcp_curr_min,
+	armcp_curr_max
+};
+
+enum armcp_fan_attributes {
+	armcp_fan_input,
+	armcp_fan_min = 2,
+	armcp_fan_max
+};
+
+enum armcp_pwm_attributes {
+	armcp_pwm_input,
+	armcp_pwm_enable
+};
+
+/* Event Queue Packets */
+
+struct eq_generic_event {
+	__le64 data[7];
+};
+
 /*
  * ArmCP info
  */
 
 #define VERSION_MAX_LEN			128
+#define ARMCP_MAX_SENSORS		128
+
+struct armcp_sensor {
+	__le32 type;
+	__le32 flags;
+};
+
+struct armcp_info {
+	struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
+	__u8 kernel_version[VERSION_MAX_LEN];
+	__le32 reserved[3];
+	__le32 cpld_version;
+	__le32 infineon_version;
+	__u8 fuse_version[VERSION_MAX_LEN];
+	__u8 thermal_version[VERSION_MAX_LEN];
+	__u8 armcp_version[VERSION_MAX_LEN];
+	__le64 dram_size;
+};
 
 #endif /* ARMCP_IF_H */
diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h
new file mode 100644
index 000000000000..497937a17ee9
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef __GOYA_ASYNC_EVENTS_H_
+#define __GOYA_ASYNC_EVENTS_H_
+
+enum goya_async_event_id {
+	GOYA_ASYNC_EVENT_ID_PCIE_IF = 33,
+	GOYA_ASYNC_EVENT_ID_TPC0_ECC = 36,
+	GOYA_ASYNC_EVENT_ID_TPC1_ECC = 39,
+	GOYA_ASYNC_EVENT_ID_TPC2_ECC = 42,
+	GOYA_ASYNC_EVENT_ID_TPC3_ECC = 45,
+	GOYA_ASYNC_EVENT_ID_TPC4_ECC = 48,
+	GOYA_ASYNC_EVENT_ID_TPC5_ECC = 51,
+	GOYA_ASYNC_EVENT_ID_TPC6_ECC = 54,
+	GOYA_ASYNC_EVENT_ID_TPC7_ECC = 57,
+	GOYA_ASYNC_EVENT_ID_MME_ECC = 60,
+	GOYA_ASYNC_EVENT_ID_MME_ECC_EXT = 61,
+	GOYA_ASYNC_EVENT_ID_MMU_ECC = 63,
+	GOYA_ASYNC_EVENT_ID_DMA_MACRO = 64,
+	GOYA_ASYNC_EVENT_ID_DMA_ECC = 66,
+	GOYA_ASYNC_EVENT_ID_CPU_IF_ECC = 75,
+	GOYA_ASYNC_EVENT_ID_PSOC_MEM = 78,
+	GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT = 79,
+	GOYA_ASYNC_EVENT_ID_SRAM0 = 81,
+	GOYA_ASYNC_EVENT_ID_SRAM1 = 82,
+	GOYA_ASYNC_EVENT_ID_SRAM2 = 83,
+	GOYA_ASYNC_EVENT_ID_SRAM3 = 84,
+	GOYA_ASYNC_EVENT_ID_SRAM4 = 85,
+	GOYA_ASYNC_EVENT_ID_SRAM5 = 86,
+	GOYA_ASYNC_EVENT_ID_SRAM6 = 87,
+	GOYA_ASYNC_EVENT_ID_SRAM7 = 88,
+	GOYA_ASYNC_EVENT_ID_SRAM8 = 89,
+	GOYA_ASYNC_EVENT_ID_SRAM9 = 90,
+	GOYA_ASYNC_EVENT_ID_SRAM10 = 91,
+	GOYA_ASYNC_EVENT_ID_SRAM11 = 92,
+	GOYA_ASYNC_EVENT_ID_SRAM12 = 93,
+	GOYA_ASYNC_EVENT_ID_SRAM13 = 94,
+	GOYA_ASYNC_EVENT_ID_SRAM14 = 95,
+	GOYA_ASYNC_EVENT_ID_SRAM15 = 96,
+	GOYA_ASYNC_EVENT_ID_SRAM16 = 97,
+	GOYA_ASYNC_EVENT_ID_SRAM17 = 98,
+	GOYA_ASYNC_EVENT_ID_SRAM18 = 99,
+	GOYA_ASYNC_EVENT_ID_SRAM19 = 100,
+	GOYA_ASYNC_EVENT_ID_SRAM20 = 101,
+	GOYA_ASYNC_EVENT_ID_SRAM21 = 102,
+	GOYA_ASYNC_EVENT_ID_SRAM22 = 103,
+	GOYA_ASYNC_EVENT_ID_SRAM23 = 104,
+	GOYA_ASYNC_EVENT_ID_SRAM24 = 105,
+	GOYA_ASYNC_EVENT_ID_SRAM25 = 106,
+	GOYA_ASYNC_EVENT_ID_SRAM26 = 107,
+	GOYA_ASYNC_EVENT_ID_SRAM27 = 108,
+	GOYA_ASYNC_EVENT_ID_SRAM28 = 109,
+	GOYA_ASYNC_EVENT_ID_SRAM29 = 110,
+	GOYA_ASYNC_EVENT_ID_GIC500 = 112,
+	GOYA_ASYNC_EVENT_ID_PCIE_DEC = 115,
+	GOYA_ASYNC_EVENT_ID_TPC0_DEC = 117,
+	GOYA_ASYNC_EVENT_ID_TPC1_DEC = 120,
+	GOYA_ASYNC_EVENT_ID_TPC2_DEC = 123,
+	GOYA_ASYNC_EVENT_ID_TPC3_DEC = 126,
+	GOYA_ASYNC_EVENT_ID_TPC4_DEC = 129,
+	GOYA_ASYNC_EVENT_ID_TPC5_DEC = 132,
+	GOYA_ASYNC_EVENT_ID_TPC6_DEC = 135,
+	GOYA_ASYNC_EVENT_ID_TPC7_DEC = 138,
+	GOYA_ASYNC_EVENT_ID_AXI_ECC = 139,
+	GOYA_ASYNC_EVENT_ID_L2_RAM_ECC = 140,
+	GOYA_ASYNC_EVENT_ID_MME_WACS = 141,
+	GOYA_ASYNC_EVENT_ID_MME_WACSD = 142,
+	GOYA_ASYNC_EVENT_ID_PLL0 = 143,
+	GOYA_ASYNC_EVENT_ID_PLL1 = 144,
+	GOYA_ASYNC_EVENT_ID_PLL3 = 146,
+	GOYA_ASYNC_EVENT_ID_PLL4 = 147,
+	GOYA_ASYNC_EVENT_ID_PLL5 = 148,
+	GOYA_ASYNC_EVENT_ID_PLL6 = 149,
+	GOYA_ASYNC_EVENT_ID_CPU_AXI_SPLITTER = 155,
+	GOYA_ASYNC_EVENT_ID_PSOC_AXI_DEC = 159,
+	GOYA_ASYNC_EVENT_ID_PSOC = 160,
+	GOYA_ASYNC_EVENT_ID_PCIE_FLR = 171,
+	GOYA_ASYNC_EVENT_ID_PCIE_HOT_RESET = 172,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG0 = 174,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG1 = 175,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG2 = 176,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG3 = 177,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG0 = 178,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG1 = 179,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG2 = 180,
+	GOYA_ASYNC_EVENT_ID_PCIE_QID1_ENG3 = 181,
+	GOYA_ASYNC_EVENT_ID_PCIE_APB = 182,
+	GOYA_ASYNC_EVENT_ID_PCIE_QDB = 183,
+	GOYA_ASYNC_EVENT_ID_PCIE_BM_D_P_WR = 184,
+	GOYA_ASYNC_EVENT_ID_PCIE_BM_D_RD = 185,
+	GOYA_ASYNC_EVENT_ID_PCIE_BM_U_P_WR = 186,
+	GOYA_ASYNC_EVENT_ID_PCIE_BM_U_RD = 187,
+	GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU = 190,
+	GOYA_ASYNC_EVENT_ID_TPC0_KRN_ERR = 191,
+	GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU = 200,
+	GOYA_ASYNC_EVENT_ID_TPC1_KRN_ERR = 201,
+	GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU = 210,
+	GOYA_ASYNC_EVENT_ID_TPC2_KRN_ERR = 211,
+	GOYA_ASYNC_EVENT_ID_TPC3_BMON_SPMU = 220,
+	GOYA_ASYNC_EVENT_ID_TPC3_KRN_ERR = 221,
+	GOYA_ASYNC_EVENT_ID_TPC4_BMON_SPMU = 230,
+	GOYA_ASYNC_EVENT_ID_TPC4_KRN_ERR = 231,
+	GOYA_ASYNC_EVENT_ID_TPC5_BMON_SPMU = 240,
+	GOYA_ASYNC_EVENT_ID_TPC5_KRN_ERR = 241,
+	GOYA_ASYNC_EVENT_ID_TPC6_BMON_SPMU = 250,
+	GOYA_ASYNC_EVENT_ID_TPC6_KRN_ERR = 251,
+	GOYA_ASYNC_EVENT_ID_TPC7_BMON_SPMU = 260,
+	GOYA_ASYNC_EVENT_ID_TPC7_KRN_ERR = 261,
+	GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU0 = 270,
+	GOYA_ASYNC_EVENT_ID_MMU_SBA_SPMU1 = 271,
+	GOYA_ASYNC_EVENT_ID_MME_WACS_UP = 272,
+	GOYA_ASYNC_EVENT_ID_MME_WACS_DOWN = 273,
+	GOYA_ASYNC_EVENT_ID_MMU_PAGE_FAULT = 280,
+	GOYA_ASYNC_EVENT_ID_MMU_WR_PERM = 281,
+	GOYA_ASYNC_EVENT_ID_MMU_DBG_BM = 282,
+	GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 = 290,
+	GOYA_ASYNC_EVENT_ID_DMA_BM_CH1 = 291,
+	GOYA_ASYNC_EVENT_ID_DMA_BM_CH2 = 292,
+	GOYA_ASYNC_EVENT_ID_DMA_BM_CH3 = 293,
+	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4 = 294,
+	GOYA_ASYNC_EVENT_ID_DDR0_PHY_DFI = 300,
+	GOYA_ASYNC_EVENT_ID_DDR0_ECC_SCRUB = 301,
+	GOYA_ASYNC_EVENT_ID_DDR0_DB_ECC = 302,
+	GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC = 303,
+	GOYA_ASYNC_EVENT_ID_DDR0_SB_ECC_MC = 304,
+	GOYA_ASYNC_EVENT_ID_DDR0_AXI_RD = 305,
+	GOYA_ASYNC_EVENT_ID_DDR0_AXI_WR = 306,
+	GOYA_ASYNC_EVENT_ID_DDR1_PHY_DFI = 310,
+	GOYA_ASYNC_EVENT_ID_DDR1_ECC_SCRUB = 311,
+	GOYA_ASYNC_EVENT_ID_DDR1_DB_ECC = 312,
+	GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC = 313,
+	GOYA_ASYNC_EVENT_ID_DDR1_SB_ECC_MC = 314,
+	GOYA_ASYNC_EVENT_ID_DDR1_AXI_RD = 315,
+	GOYA_ASYNC_EVENT_ID_DDR1_AXI_WR = 316,
+	GOYA_ASYNC_EVENT_ID_CPU_BMON = 320,
+	GOYA_ASYNC_EVENT_ID_TS_EAST = 322,
+	GOYA_ASYNC_EVENT_ID_TS_WEST = 323,
+	GOYA_ASYNC_EVENT_ID_TS_NORTH = 324,
+	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_0 = 330,
+	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_1 = 331,
+	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_2 = 332,
+	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET = 356,
+	GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT = 361,
+	GOYA_ASYNC_EVENT_ID_TPC0_CMDQ = 430,
+	GOYA_ASYNC_EVENT_ID_TPC1_CMDQ = 431,
+	GOYA_ASYNC_EVENT_ID_TPC2_CMDQ = 432,
+	GOYA_ASYNC_EVENT_ID_TPC3_CMDQ = 433,
+	GOYA_ASYNC_EVENT_ID_TPC4_CMDQ = 434,
+	GOYA_ASYNC_EVENT_ID_TPC5_CMDQ = 435,
+	GOYA_ASYNC_EVENT_ID_TPC6_CMDQ = 436,
+	GOYA_ASYNC_EVENT_ID_TPC7_CMDQ = 437,
+	GOYA_ASYNC_EVENT_ID_TPC0_QM = 438,
+	GOYA_ASYNC_EVENT_ID_TPC1_QM = 439,
+	GOYA_ASYNC_EVENT_ID_TPC2_QM = 440,
+	GOYA_ASYNC_EVENT_ID_TPC3_QM = 441,
+	GOYA_ASYNC_EVENT_ID_TPC4_QM = 442,
+	GOYA_ASYNC_EVENT_ID_TPC5_QM = 443,
+	GOYA_ASYNC_EVENT_ID_TPC6_QM = 444,
+	GOYA_ASYNC_EVENT_ID_TPC7_QM = 445,
+	GOYA_ASYNC_EVENT_ID_MME_QM = 447,
+	GOYA_ASYNC_EVENT_ID_MME_CMDQ = 448,
+	GOYA_ASYNC_EVENT_ID_DMA0_QM = 449,
+	GOYA_ASYNC_EVENT_ID_DMA1_QM = 450,
+	GOYA_ASYNC_EVENT_ID_DMA2_QM = 451,
+	GOYA_ASYNC_EVENT_ID_DMA3_QM = 452,
+	GOYA_ASYNC_EVENT_ID_DMA4_QM = 453,
+	GOYA_ASYNC_EVENT_ID_DMA_ON_HBW = 454,
+	GOYA_ASYNC_EVENT_ID_DMA0_CH = 455,
+	GOYA_ASYNC_EVENT_ID_DMA1_CH = 456,
+	GOYA_ASYNC_EVENT_ID_DMA2_CH = 457,
+	GOYA_ASYNC_EVENT_ID_DMA3_CH = 458,
+	GOYA_ASYNC_EVENT_ID_DMA4_CH = 459,
+	GOYA_ASYNC_EVENT_ID_PI_UPDATE = 484,
+	GOYA_ASYNC_EVENT_ID_HALT_MACHINE = 485,
+	GOYA_ASYNC_EVENT_ID_INTS_REGISTER = 486,
+	GOYA_ASYNC_EVENT_ID_SOFT_RESET = 487,
+	GOYA_ASYNC_EVENT_ID_LAST_VALID_ID = 1023,
+	GOYA_ASYNC_EVENT_ID_SIZE
+};
+
+#endif /* __GOYA_ASYNC_EVENTS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/goya_packets.h b/drivers/misc/habanalabs/include/goya/goya_packets.h
new file mode 100644
index 000000000000..a14407b975e4
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/goya_packets.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2017-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef GOYA_PACKETS_H
+#define GOYA_PACKETS_H
+
+#include <linux/types.h>
+
+#define PACKET_HEADER_PACKET_ID_SHIFT		56
+#define PACKET_HEADER_PACKET_ID_MASK		0x1F00000000000000ull
+
+enum packet_id {
+	PACKET_WREG_32 = 0x1,
+	PACKET_WREG_BULK = 0x2,
+	PACKET_MSG_LONG = 0x3,
+	PACKET_MSG_SHORT = 0x4,
+	PACKET_CP_DMA = 0x5,
+	PACKET_MSG_PROT = 0x7,
+	PACKET_FENCE = 0x8,
+	PACKET_LIN_DMA = 0x9,
+	PACKET_NOP = 0xA,
+	PACKET_STOP = 0xB,
+	MAX_PACKET_ID = (PACKET_HEADER_PACKET_ID_MASK >>
+				PACKET_HEADER_PACKET_ID_SHIFT) + 1
+};
+
+enum goya_dma_direction {
+	DMA_HOST_TO_DRAM,
+	DMA_HOST_TO_SRAM,
+	DMA_DRAM_TO_SRAM,
+	DMA_SRAM_TO_DRAM,
+	DMA_SRAM_TO_HOST,
+	DMA_DRAM_TO_HOST,
+	DMA_DRAM_TO_DRAM,
+	DMA_SRAM_TO_SRAM,
+	DMA_ENUM_MAX
+};
+
+#define GOYA_PKT_CTL_OPCODE_SHIFT	24
+#define GOYA_PKT_CTL_OPCODE_MASK	0x1F000000
+
+#define GOYA_PKT_CTL_EB_SHIFT		29
+#define GOYA_PKT_CTL_EB_MASK		0x20000000
+
+#define GOYA_PKT_CTL_RB_SHIFT		30
+#define GOYA_PKT_CTL_RB_MASK		0x40000000
+
+#define GOYA_PKT_CTL_MB_SHIFT		31
+#define GOYA_PKT_CTL_MB_MASK		0x80000000
+
+struct packet_nop {
+	__le32 reserved;
+	__le32 ctl;
+};
+
+struct packet_stop {
+	__le32 reserved;
+	__le32 ctl;
+};
+
+#define GOYA_PKT_WREG32_CTL_REG_OFFSET_SHIFT	0
+#define GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK	0x0000FFFF
+
+struct packet_wreg32 {
+	__le32 value;
+	__le32 ctl;
+};
+
+struct packet_wreg_bulk {
+	__le32 size64;
+	__le32 ctl;
+	__le64 values[0]; /* data starts here */
+};
+
+struct packet_msg_long {
+	__le32 value;
+	__le32 ctl;
+	__le64 addr;
+};
+
+struct packet_msg_short {
+	__le32 value;
+	__le32 ctl;
+};
+
+struct packet_msg_prot {
+	__le32 value;
+	__le32 ctl;
+	__le64 addr;
+};
+
+struct packet_fence {
+	__le32 cfg;
+	__le32 ctl;
+};
+
+#define GOYA_PKT_LIN_DMA_CTL_WO_SHIFT		0
+#define GOYA_PKT_LIN_DMA_CTL_WO_MASK		0x00000001
+
+#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_SHIFT	1
+#define GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK	0x00000002
+
+#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_SHIFT	2
+#define GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK	0x00000004
+
+#define GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT	6
+#define GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK	0x00000040
+
+#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT	20
+#define GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK	0x00700000
+
+struct packet_lin_dma {
+	__le32 tsize;
+	__le32 ctl;
+	__le64 src_addr;
+	__le64 dst_addr;
+};
+
+struct packet_cp_dma {
+	__le32 tsize;
+	__le32 ctl;
+	__le64 src_addr;
+};
+
+#endif /* GOYA_PACKETS_H */
diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h
new file mode 100644
index 000000000000..bf59bbe27fdc
--- /dev/null
+++ b/drivers/misc/habanalabs/include/qman_if.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef QMAN_IF_H
+#define QMAN_IF_H
+
+#include <linux/types.h>
+
+/*
+ * PRIMARY QUEUE
+ */
+
+struct hl_bd {
+	__le64	ptr;
+	__le32	len;
+	__le32	ctl;
+};
+
+#define HL_BD_SIZE			sizeof(struct hl_bd)
+
+/*
+ * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
+ * valid. 1 means the repeat field is valid, 0 means not-valid,
+ * i.e. repeat == 1
+ */
+#define BD_CTL_REPEAT_VALID_SHIFT	24
+#define BD_CTL_REPEAT_VALID_MASK	0x01000000
+
+#define BD_CTL_SHADOW_INDEX_SHIFT	0
+#define BD_CTL_SHADOW_INDEX_MASK	0x00000FFF
+
+/*
+ * COMPLETION QUEUE
+ */
+
+struct hl_cq_entry {
+	__le32	data;
+};
+
+#define HL_CQ_ENTRY_SIZE		sizeof(struct hl_cq_entry)
+
+#define CQ_ENTRY_READY_SHIFT			31
+#define CQ_ENTRY_READY_MASK			0x80000000
+
+#define CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT	30
+#define CQ_ENTRY_SHADOW_INDEX_VALID_MASK	0x40000000
+
+#define CQ_ENTRY_SHADOW_INDEX_SHIFT		BD_CTL_SHADOW_INDEX_SHIFT
+#define CQ_ENTRY_SHADOW_INDEX_MASK		BD_CTL_SHADOW_INDEX_MASK
+
+
+#endif /* QMAN_IF_H */
diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
new file mode 100644
index 000000000000..6b7d35f6af08
--- /dev/null
+++ b/drivers/misc/habanalabs/irq.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/irqreturn.h>
+
+/*
+ * hl_cq_inc_ptr - increment ci or pi of cq
+ *
+ * @ptr: the current ci or pi value of the completion queue
+ *
+ * Increment ptr by 1. If it reaches the number of completion queue
+ * entries, set it to 0
+ */
+inline u32 hl_cq_inc_ptr(u32 ptr)
+{
+	ptr++;
+	if (unlikely(ptr == HL_CQ_LENGTH))
+		ptr = 0;
+	return ptr;
+}
+
+/*
+ * hl_irq_handler_cq - irq handler for completion queue
+ *
+ * @irq: irq number
+ * @arg: pointer to completion queue structure
+ *
+ */
+irqreturn_t hl_irq_handler_cq(int irq, void *arg)
+{
+	struct hl_cq *cq = arg;
+	struct hl_device *hdev = cq->hdev;
+	struct hl_hw_queue *queue;
+	struct hl_cs_job *job;
+	bool shadow_index_valid;
+	u16 shadow_index;
+	u32 *cq_entry;
+	u32 *cq_base;
+
+	if (hdev->disabled) {
+		dev_dbg(hdev->dev,
+			"Device disabled but received IRQ %d for CQ %d\n",
+			irq, cq->hw_queue_id);
+		return IRQ_HANDLED;
+	}
+
+	cq_base = (u32 *) (uintptr_t) cq->kernel_address;
+
+	while (1) {
+		bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK)
+						>> CQ_ENTRY_READY_SHIFT);
+
+		if (!entry_ready)
+			break;
+
+		cq_entry = (u32 *) &cq_base[cq->ci];
+
+		/*
+		 * Make sure we read CQ entry contents after we've
+		 * checked the ownership bit.
+		 */
+		dma_rmb();
+
+		shadow_index_valid =
+			((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
+					>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
+
+		shadow_index = (u16)
+			((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK)
+					>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
+
+		queue = &hdev->kernel_queues[cq->hw_queue_id];
+
+		if ((shadow_index_valid) && (!hdev->disabled)) {
+			job = queue->shadow_queue[hl_pi_2_offset(shadow_index)];
+			queue_work(hdev->cq_wq, &job->finish_work);
+		}
+
+		/*
+		 * Update ci of the context's queue. There is no
+		 * need to protect it with spinlock because this update is
+		 * done only inside IRQ and there is a different IRQ per
+		 * queue
+		 */
+		queue->ci = hl_queue_inc_ptr(queue->ci);
+
+		/* Clear CQ entry ready bit */
+		cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK;
+
+		cq->ci = hl_cq_inc_ptr(cq->ci);
+
+		/* Increment free slots */
+		atomic_inc(&cq->free_slots_cnt);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * hl_cq_init - main initialization function for an cq object
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ * @hw_queue_id: The H/W queue ID this completion queue belongs to
+ *
+ * Allocate dma-able memory for the completion queue and initialize fields
+ * Returns 0 on success
+ */
+int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id)
+{
+	void *p;
+
+	BUILD_BUG_ON(HL_CQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
+
+	p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+				&q->bus_address, GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	q->hdev = hdev;
+	q->kernel_address = (u64) (uintptr_t) p;
+	q->hw_queue_id = hw_queue_id;
+	q->ci = 0;
+	q->pi = 0;
+
+	atomic_set(&q->free_slots_cnt, HL_CQ_LENGTH);
+
+	return 0;
+}
+
+/*
+ * hl_cq_fini - destroy completion queue
+ *
+ * @hdev: pointer to device structure
+ * @q: pointer to cq structure
+ *
+ * Free the completion queue memory
+ */
+void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
+{
+	hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
+			(void *) (uintptr_t) q->kernel_address, q->bus_address);
+}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a8edfd3e9c95..756266cf0416 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -17,6 +17,35 @@
  */
 #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START	0x8000	/* 32KB */
 
+/*
+ * Queue Numbering
+ *
+ * The external queues (DMA channels + CPU) MUST be before the internal queues
+ * and each group (DMA channels + CPU and internal) must be contiguous inside
+ * itself but there can be a gap between the two groups (although not
+ * recommended)
+ */
+
+enum goya_queue_id {
+	GOYA_QUEUE_ID_DMA_0 = 0,
+	GOYA_QUEUE_ID_DMA_1,
+	GOYA_QUEUE_ID_DMA_2,
+	GOYA_QUEUE_ID_DMA_3,
+	GOYA_QUEUE_ID_DMA_4,
+	GOYA_QUEUE_ID_CPU_PQ,
+	GOYA_QUEUE_ID_MME,
+	GOYA_QUEUE_ID_TPC0,
+	GOYA_QUEUE_ID_TPC1,
+	GOYA_QUEUE_ID_TPC2,
+	GOYA_QUEUE_ID_TPC3,
+	GOYA_QUEUE_ID_TPC4,
+	GOYA_QUEUE_ID_TPC5,
+	GOYA_QUEUE_ID_TPC6,
+	GOYA_QUEUE_ID_TPC7,
+	GOYA_QUEUE_ID_SIZE
+};
+
+
 /* Opcode to create a new command buffer */
 #define HL_CB_OP_CREATE		0
 /* Opcode to destroy previously created command buffer */
-- 
cgit v1.2.3


From eff6f4a0e70b7bcf4674f471a768860a74e638a6 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 16 Feb 2019 00:39:21 +0200
Subject: habanalabs: add command submission module

This patch adds the main flow for the user to submit work to the device.

Each work is described by a command submission object (CS). The CS contains
3 arrays of command buffers: One for execution, and two for context-switch
(store and restore).

For each CB, the user specifies on which queue to put that CB. In case of
an internal queue, the entry doesn't contain a pointer to the CB but the
address in the on-chip memory that the CB resides at.

The driver parses some of the CBs to enforce security restrictions.

The user receives a sequence number that represents the CS object. The user
can then query the driver regarding the status of the CS, using that
sequence number.

In case the CS doesn't finish before the timeout expires, the driver will
perform a soft-reset of the device.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/Makefile             |    3 +-
 drivers/misc/habanalabs/command_submission.c |  766 ++++++++++++++++++
 drivers/misc/habanalabs/context.c            |   52 +-
 drivers/misc/habanalabs/device.c             |   16 +
 drivers/misc/habanalabs/goya/goya.c          | 1126 ++++++++++++++++++++++++++
 drivers/misc/habanalabs/habanalabs.h         |  275 +++++++
 drivers/misc/habanalabs/habanalabs_drv.c     |   20 +
 drivers/misc/habanalabs/habanalabs_ioctl.c   |    4 +-
 drivers/misc/habanalabs/hw_queue.c           |  232 ++++++
 drivers/misc/habanalabs/memory.c             |  198 +++++
 include/uapi/misc/habanalabs.h               |  158 +++-
 11 files changed, 2843 insertions(+), 7 deletions(-)
 create mode 100644 drivers/misc/habanalabs/command_submission.c
 create mode 100644 drivers/misc/habanalabs/memory.c

(limited to 'include')

diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index b5607233d216..d2fd0e18b1eb 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -5,7 +5,8 @@
 obj-m	:= habanalabs.o
 
 habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
-		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o
+		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
+		command_submission.o
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
new file mode 100644
index 000000000000..ae68b97e428d
--- /dev/null
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -0,0 +1,766 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+static void job_wq_completion(struct work_struct *work);
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
+static void cs_do_release(struct kref *ref);
+
+static const char *hl_fence_get_driver_name(struct dma_fence *fence)
+{
+	return "HabanaLabs";
+}
+
+static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
+{
+	struct hl_dma_fence *hl_fence =
+		container_of(fence, struct hl_dma_fence, base_fence);
+
+	return dev_name(hl_fence->hdev->dev);
+}
+
+static bool hl_fence_enable_signaling(struct dma_fence *fence)
+{
+	return true;
+}
+
+static void hl_fence_release(struct dma_fence *fence)
+{
+	struct hl_dma_fence *hl_fence =
+		container_of(fence, struct hl_dma_fence, base_fence);
+
+	kfree_rcu(hl_fence, base_fence.rcu);
+}
+
+static const struct dma_fence_ops hl_fence_ops = {
+	.get_driver_name = hl_fence_get_driver_name,
+	.get_timeline_name = hl_fence_get_timeline_name,
+	.enable_signaling = hl_fence_enable_signaling,
+	.wait = dma_fence_default_wait,
+	.release = hl_fence_release
+};
+
+static void cs_get(struct hl_cs *cs)
+{
+	kref_get(&cs->refcount);
+}
+
+static int cs_get_unless_zero(struct hl_cs *cs)
+{
+	return kref_get_unless_zero(&cs->refcount);
+}
+
+static void cs_put(struct hl_cs *cs)
+{
+	kref_put(&cs->refcount, cs_do_release);
+}
+
+/*
+ * cs_parser - parse the user command submission
+ *
+ * @hpriv	: pointer to the private data of the fd
+ * @job        : pointer to the job that holds the command submission info
+ *
+ * The function parses the command submission of the user. It calls the
+ * ASIC specific parser, which returns a list of memory blocks to send
+ * to the device as different command buffers
+ *
+ */
+static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_cs_parser parser;
+	int rc;
+
+	parser.ctx_id = job->cs->ctx->asid;
+	parser.cs_sequence = job->cs->sequence;
+	parser.job_id = job->id;
+
+	parser.hw_queue_id = job->hw_queue_id;
+	parser.job_userptr_list = &job->userptr_list;
+	parser.patched_cb = NULL;
+	parser.user_cb = job->user_cb;
+	parser.user_cb_size = job->user_cb_size;
+	parser.ext_queue = job->ext_queue;
+	job->patched_cb = NULL;
+	parser.use_virt_addr = hdev->mmu_enable;
+
+	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+	if (job->ext_queue) {
+		if (!rc) {
+			job->patched_cb = parser.patched_cb;
+			job->job_cb_size = parser.patched_cb_size;
+
+			spin_lock(&job->patched_cb->lock);
+			job->patched_cb->cs_cnt++;
+			spin_unlock(&job->patched_cb->lock);
+		}
+
+		/*
+		 * Whether the parsing worked or not, we don't need the
+		 * original CB anymore because it was already parsed and
+		 * won't be accessed again for this CS
+		 */
+		spin_lock(&job->user_cb->lock);
+		job->user_cb->cs_cnt--;
+		spin_unlock(&job->user_cb->lock);
+		hl_cb_put(job->user_cb);
+		job->user_cb = NULL;
+	}
+
+	return rc;
+}
+
+static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
+{
+	struct hl_cs *cs = job->cs;
+
+	if (job->ext_queue) {
+		hl_userptr_delete_list(hdev, &job->userptr_list);
+
+		/*
+		 * We might arrive here from rollback and patched CB wasn't
+		 * created, so we need to check it's not NULL
+		 */
+		if (job->patched_cb) {
+			spin_lock(&job->patched_cb->lock);
+			job->patched_cb->cs_cnt--;
+			spin_unlock(&job->patched_cb->lock);
+
+			hl_cb_put(job->patched_cb);
+		}
+	}
+
+	/*
+	 * This is the only place where there can be multiple threads
+	 * modifying the list at the same time
+	 */
+	spin_lock(&cs->job_lock);
+	list_del(&job->cs_node);
+	spin_unlock(&cs->job_lock);
+
+	if (job->ext_queue)
+		cs_put(cs);
+
+	kfree(job);
+}
+
+static void cs_do_release(struct kref *ref)
+{
+	struct hl_cs *cs = container_of(ref, struct hl_cs,
+						refcount);
+	struct hl_device *hdev = cs->ctx->hdev;
+	struct hl_cs_job *job, *tmp;
+
+	cs->completed = true;
+
+	/*
+	 * Although if we reached here it means that all external jobs have
+	 * finished, because each one of them took refcnt to CS, we still
+	 * need to go over the internal jobs and free them. Otherwise, we
+	 * will have leaked memory and what's worse, the CS object (and
+	 * potentially the CTX object) could be released, while the JOB
+	 * still holds a pointer to them (but no reference).
+	 */
+	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+		free_job(hdev, job);
+
+	/* We also need to update CI for internal queues */
+	if (cs->submitted) {
+		hl_int_hw_queue_update_ci(cs);
+
+		spin_lock(&hdev->hw_queues_mirror_lock);
+		/* remove CS from hw_queues mirror list */
+		list_del_init(&cs->mirror_node);
+		spin_unlock(&hdev->hw_queues_mirror_lock);
+
+		/*
+		 * Don't cancel TDR in case this CS was timedout because we
+		 * might be running from the TDR context
+		 */
+		if ((!cs->timedout) &&
+			(hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
+			struct hl_cs *next;
+
+			if (cs->tdr_active)
+				cancel_delayed_work_sync(&cs->work_tdr);
+
+			spin_lock(&hdev->hw_queues_mirror_lock);
+
+			/* queue TDR for next CS */
+			next = list_first_entry_or_null(
+					&hdev->hw_queues_mirror_list,
+					struct hl_cs, mirror_node);
+
+			if ((next) && (!next->tdr_active)) {
+				next->tdr_active = true;
+				schedule_delayed_work(&next->work_tdr,
+							hdev->timeout_jiffies);
+			}
+
+			spin_unlock(&hdev->hw_queues_mirror_lock);
+		}
+	}
+
+	hl_ctx_put(cs->ctx);
+
+	if (cs->timedout)
+		dma_fence_set_error(cs->fence, -ETIMEDOUT);
+	else if (cs->aborted)
+		dma_fence_set_error(cs->fence, -EIO);
+
+	dma_fence_signal(cs->fence);
+	dma_fence_put(cs->fence);
+
+	kfree(cs);
+}
+
+static void cs_timedout(struct work_struct *work)
+{
+	struct hl_device *hdev;
+	int ctx_asid, rc;
+	struct hl_cs *cs = container_of(work, struct hl_cs,
+						 work_tdr.work);
+	rc = cs_get_unless_zero(cs);
+	if (!rc)
+		return;
+
+	if ((!cs->submitted) || (cs->completed)) {
+		cs_put(cs);
+		return;
+	}
+
+	/* Mark the CS is timed out so we won't try to cancel its TDR */
+	cs->timedout = true;
+
+	hdev = cs->ctx->hdev;
+	ctx_asid = cs->ctx->asid;
+
+	/* TODO: add information about last signaled seq and last emitted seq */
+	dev_err(hdev->dev, "CS %d.%llu got stuck!\n", ctx_asid, cs->sequence);
+
+	cs_put(cs);
+
+	if (hdev->reset_on_lockup)
+		hl_device_reset(hdev, false, false);
+}
+
+static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
+			struct hl_cs **cs_new)
+{
+	struct hl_dma_fence *fence;
+	struct dma_fence *other = NULL;
+	struct hl_cs *cs;
+	int rc;
+
+	cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
+	if (!cs)
+		return -ENOMEM;
+
+	cs->ctx = ctx;
+	cs->submitted = false;
+	cs->completed = false;
+	INIT_LIST_HEAD(&cs->job_list);
+	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
+	kref_init(&cs->refcount);
+	spin_lock_init(&cs->job_lock);
+
+	fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
+	if (!fence) {
+		rc = -ENOMEM;
+		goto free_cs;
+	}
+
+	fence->hdev = hdev;
+	spin_lock_init(&fence->lock);
+	cs->fence = &fence->base_fence;
+
+	spin_lock(&ctx->cs_lock);
+
+	fence->cs_seq = ctx->cs_sequence;
+	other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
+	if ((other) && (!dma_fence_is_signaled(other))) {
+		spin_unlock(&ctx->cs_lock);
+		rc = -EAGAIN;
+		goto free_fence;
+	}
+
+	dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
+			ctx->asid, ctx->cs_sequence);
+
+	cs->sequence = fence->cs_seq;
+
+	ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
+							&fence->base_fence;
+	ctx->cs_sequence++;
+
+	dma_fence_get(&fence->base_fence);
+
+	dma_fence_put(other);
+
+	spin_unlock(&ctx->cs_lock);
+
+	*cs_new = cs;
+
+	return 0;
+
+free_fence:
+	kfree(fence);
+free_cs:
+	kfree(cs);
+	return rc;
+}
+
+static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
+{
+	struct hl_cs_job *job, *tmp;
+
+	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
+		free_job(hdev, job);
+}
+
+void hl_cs_rollback_all(struct hl_device *hdev)
+{
+	struct hl_cs *cs, *tmp;
+
+	/* flush all completions */
+	flush_workqueue(hdev->cq_wq);
+
+	/* Make sure we don't have leftovers in the H/W queues mirror list */
+	list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
+				mirror_node) {
+		cs_get(cs);
+		cs->aborted = true;
+		dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
+					cs->ctx->asid, cs->sequence);
+		cs_rollback(hdev, cs);
+		cs_put(cs);
+	}
+}
+
+static void job_wq_completion(struct work_struct *work)
+{
+	struct hl_cs_job *job = container_of(work, struct hl_cs_job,
+						finish_work);
+	struct hl_cs *cs = job->cs;
+	struct hl_device *hdev = cs->ctx->hdev;
+
+	/* job is no longer needed */
+	free_job(hdev, job);
+}
+
+static struct hl_cb *validate_queue_index(struct hl_device *hdev,
+					struct hl_cb_mgr *cb_mgr,
+					struct hl_cs_chunk *chunk,
+					bool *ext_queue)
+{
+	struct asic_fixed_properties *asic = &hdev->asic_prop;
+	struct hw_queue_properties *hw_queue_prop;
+	u32 cb_handle;
+	struct hl_cb *cb;
+
+	/* Assume external queue */
+	*ext_queue = true;
+
+	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
+
+	if ((chunk->queue_index >= HL_MAX_QUEUES) ||
+			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
+		dev_err(hdev->dev, "Queue index %d is invalid\n",
+			chunk->queue_index);
+		return NULL;
+	}
+
+	if (hw_queue_prop->kmd_only) {
+		dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
+			chunk->queue_index);
+		return NULL;
+	} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
+		*ext_queue = false;
+		return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
+	}
+
+	/* Retrieve CB object */
+	cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
+
+	cb = hl_cb_get(hdev, cb_mgr, cb_handle);
+	if (!cb) {
+		dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
+		return NULL;
+	}
+
+	if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
+		dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
+		goto release_cb;
+	}
+
+	spin_lock(&cb->lock);
+	cb->cs_cnt++;
+	spin_unlock(&cb->lock);
+
+	return cb;
+
+release_cb:
+	hl_cb_put(cb);
+	return NULL;
+}
+
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
+{
+	struct hl_cs_job *job;
+
+	job = kzalloc(sizeof(*job), GFP_ATOMIC);
+	if (!job)
+		return NULL;
+
+	job->ext_queue = ext_queue;
+
+	if (job->ext_queue) {
+		INIT_LIST_HEAD(&job->userptr_list);
+		INIT_WORK(&job->finish_work, job_wq_completion);
+	}
+
+	return job;
+}
+
+static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
+			u32 num_chunks, u64 *cs_seq)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_cs_chunk *cs_chunk_array;
+	struct hl_cs_job *job;
+	struct hl_cs *cs;
+	struct hl_cb *cb;
+	bool ext_queue_present = false;
+	u32 size_to_copy;
+	int rc, i, parse_cnt;
+
+	*cs_seq = ULLONG_MAX;
+
+	if (num_chunks > HL_MAX_JOBS_PER_CS) {
+		dev_err(hdev->dev,
+			"Number of chunks can NOT be larger than %d\n",
+			HL_MAX_JOBS_PER_CS);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+					GFP_ATOMIC);
+	if (!cs_chunk_array) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
+	if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
+		rc = -EFAULT;
+		goto free_cs_chunk_array;
+	}
+
+	/* increment refcnt for context */
+	hl_ctx_get(hdev, hpriv->ctx);
+
+	rc = allocate_cs(hdev, hpriv->ctx, &cs);
+	if (rc) {
+		hl_ctx_put(hpriv->ctx);
+		goto free_cs_chunk_array;
+	}
+
+	*cs_seq = cs->sequence;
+
+	/* Validate ALL the CS chunks before submitting the CS */
+	for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
+		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
+		bool ext_queue;
+
+		cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
+					&ext_queue);
+		if (ext_queue) {
+			ext_queue_present = true;
+			if (!cb) {
+				rc = -EINVAL;
+				goto free_cs_object;
+			}
+		}
+
+		job = hl_cs_allocate_job(hdev, ext_queue);
+		if (!job) {
+			dev_err(hdev->dev, "Failed to allocate a new job\n");
+			rc = -ENOMEM;
+			if (ext_queue)
+				goto release_cb;
+			else
+				goto free_cs_object;
+		}
+
+		job->id = i + 1;
+		job->cs = cs;
+		job->user_cb = cb;
+		job->user_cb_size = chunk->cb_size;
+		if (job->ext_queue)
+			job->job_cb_size = cb->size;
+		else
+			job->job_cb_size = chunk->cb_size;
+		job->hw_queue_id = chunk->queue_index;
+
+		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+		list_add_tail(&job->cs_node, &cs->job_list);
+
+		/*
+		 * Increment CS reference. When CS reference is 0, CS is
+		 * done and can be signaled to user and free all its resources
+		 * Only increment for JOB on external queues, because only
+		 * for those JOBs we get completion
+		 */
+		if (job->ext_queue)
+			cs_get(cs);
+
+		rc = cs_parser(hpriv, job);
+		if (rc) {
+			dev_err(hdev->dev,
+				"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
+				cs->ctx->asid, cs->sequence, job->id, rc);
+			goto free_cs_object;
+		}
+	}
+
+	if (!ext_queue_present) {
+		dev_err(hdev->dev,
+			"Reject CS %d.%llu because no external queues jobs\n",
+			cs->ctx->asid, cs->sequence);
+		rc = -EINVAL;
+		goto free_cs_object;
+	}
+
+	rc = hl_hw_queue_schedule_cs(cs);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to submit CS %d.%llu to H/W queues, error %d\n",
+			cs->ctx->asid, cs->sequence, rc);
+		goto free_cs_object;
+	}
+
+	rc = HL_CS_STATUS_SUCCESS;
+	goto put_cs;
+
+release_cb:
+	spin_lock(&cb->lock);
+	cb->cs_cnt--;
+	spin_unlock(&cb->lock);
+	hl_cb_put(cb);
+free_cs_object:
+	cs_rollback(hdev, cs);
+	*cs_seq = ULLONG_MAX;
+	/* The path below is both for good and erroneous exits */
+put_cs:
+	/* We finished with the CS in this function, so put the ref */
+	cs_put(cs);
+free_cs_chunk_array:
+	kfree(cs_chunk_array);
+out:
+	return rc;
+}
+
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	union hl_cs_args *args = data;
+	struct hl_ctx *ctx = hpriv->ctx;
+	void __user *chunks;
+	u32 num_chunks;
+	u64 cs_seq = ULONG_MAX;
+	int rc, do_restore;
+	bool need_soft_reset = false;
+
+	if (hl_device_disabled_or_in_reset(hdev)) {
+		dev_warn(hdev->dev,
+			"Device is %s. Can't submit new CS\n",
+			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+		rc = -EBUSY;
+		goto out;
+	}
+
+	do_restore = atomic_cmpxchg(&ctx->thread_restore_token, 1, 0);
+
+	if (do_restore || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+		long ret;
+
+		chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
+		num_chunks = args->in.num_chunks_restore;
+
+		mutex_lock(&hpriv->restore_phase_mutex);
+
+		if (do_restore) {
+			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+			if (rc) {
+				dev_err_ratelimited(hdev->dev,
+					"Failed to switch to context %d, rejecting CS! %d\n",
+					ctx->asid, rc);
+				/*
+				 * If we timedout, we need to soft-reset because
+				 * QMAN is probably stuck. However, we can't
+				 * call to reset here directly because of
+				 * deadlock, so need to do it at the very end
+				 * of this function
+				 */
+				if (rc == -ETIMEDOUT)
+					need_soft_reset = true;
+				mutex_unlock(&hpriv->restore_phase_mutex);
+				goto out;
+			}
+		}
+
+		hdev->asic_funcs->restore_phase_topology(hdev);
+
+		if (num_chunks == 0) {
+			dev_dbg(hdev->dev,
+			"Need to run restore phase but restore CS is empty\n");
+			rc = 0;
+		} else {
+			rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
+						&cs_seq);
+		}
+
+		mutex_unlock(&hpriv->restore_phase_mutex);
+
+		if (rc) {
+			dev_err(hdev->dev,
+				"Failed to submit restore CS for context %d (%d)\n",
+				ctx->asid, rc);
+			goto out;
+		}
+
+		/* Need to wait for restore completion before execution phase */
+		if (num_chunks > 0) {
+			ret = _hl_cs_wait_ioctl(hdev, ctx,
+					jiffies_to_usecs(hdev->timeout_jiffies),
+					cs_seq);
+			if (ret <= 0) {
+				dev_err(hdev->dev,
+					"Restore CS for context %d failed to complete %ld\n",
+					ctx->asid, ret);
+				rc = -ENOEXEC;
+				goto out;
+			}
+		}
+
+		ctx->thread_restore_wait_token = 1;
+	} else if (!ctx->thread_restore_wait_token) {
+		u32 tmp;
+
+		rc = hl_poll_timeout_memory(hdev,
+			(u64) (uintptr_t) &ctx->thread_restore_wait_token,
+			jiffies_to_usecs(hdev->timeout_jiffies),
+			&tmp);
+
+		if (rc || !tmp) {
+			dev_err(hdev->dev,
+				"restore phase hasn't finished in time\n");
+			rc = -ETIMEDOUT;
+			goto out;
+		}
+	}
+
+	chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
+	num_chunks = args->in.num_chunks_execute;
+
+	if (num_chunks == 0) {
+		dev_err(hdev->dev,
+			"Got execute CS with 0 chunks, context %d\n",
+			ctx->asid);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
+
+out:
+	if (rc != -EAGAIN) {
+		memset(args, 0, sizeof(*args));
+		args->out.status = rc;
+		args->out.seq = cs_seq;
+	}
+
+	if ((rc == -ETIMEDOUT) && (need_soft_reset))
+		hl_device_reset(hdev, false, false);
+
+	return rc;
+}
+
+static long _hl_cs_wait_ioctl(struct hl_device *hdev,
+		struct hl_ctx *ctx, u64 timeout_us, u64 seq)
+{
+	struct dma_fence *fence;
+	unsigned long timeout;
+	long rc;
+
+	if (timeout_us == MAX_SCHEDULE_TIMEOUT)
+		timeout = timeout_us;
+	else
+		timeout = usecs_to_jiffies(timeout_us);
+
+	hl_ctx_get(hdev, ctx);
+
+	fence = hl_ctx_get_fence(ctx, seq);
+	if (IS_ERR(fence)) {
+		rc = PTR_ERR(fence);
+	} else if (fence) {
+		rc = dma_fence_wait_timeout(fence, true, timeout);
+		if (fence->error == -ETIMEDOUT)
+			rc = -ETIMEDOUT;
+		else if (fence->error == -EIO)
+			rc = -EIO;
+		dma_fence_put(fence);
+	} else
+		rc = 1;
+
+	hl_ctx_put(ctx);
+
+	return rc;
+}
+
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	union hl_wait_cs_args *args = data;
+	u64 seq = args->in.seq;
+	long rc;
+
+	rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
+
+	memset(args, 0, sizeof(*args));
+
+	if (rc < 0) {
+		dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n",
+			rc, seq);
+		if (rc == -ERESTARTSYS) {
+			args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
+			rc = -EINTR;
+		} else if (rc == -ETIMEDOUT) {
+			args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
+		} else if (rc == -EIO) {
+			args->out.status = HL_WAIT_CS_STATUS_ABORTED;
+		}
+		return rc;
+	}
+
+	if (rc == 0)
+		args->out.status = HL_WAIT_CS_STATUS_BUSY;
+	else
+		args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+
+	return 0;
+}
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index de1258e7a6e6..c3854714b46c 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -12,6 +12,18 @@
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
+	int i;
+
+	/*
+	 * If we arrived here, there are no jobs waiting for this context
+	 * on its queues so we can safely remove it.
+	 * This is because for each CS, we increment the ref count and for
+	 * every CS that was finished we decrement it and we won't arrive
+	 * to this function unless the ref count is 0
+	 */
+
+	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
+		dma_fence_put(ctx->cs_pending[i]);
 
 	if (ctx->asid != HL_KERNEL_ASID_ID)
 		hl_asid_free(hdev, ctx->asid);
@@ -23,8 +35,6 @@ void hl_ctx_do_release(struct kref *ref)
 
 	ctx = container_of(ref, struct hl_ctx, refcount);
 
-	dev_dbg(ctx->hdev->dev, "Now really releasing context %d\n", ctx->asid);
-
 	hl_ctx_fini(ctx);
 
 	if (ctx->hpriv)
@@ -90,6 +100,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
 	kref_init(&ctx->refcount);
 
+	ctx->cs_sequence = 1;
+	spin_lock_init(&ctx->cs_lock);
+	atomic_set(&ctx->thread_restore_token, 1);
+	ctx->thread_restore_wait_token = 0;
+
 	if (is_kernel_ctx) {
 		ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
 	} else {
@@ -100,8 +115,6 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 		}
 	}
 
-	dev_dbg(hdev->dev, "Created context with ASID %u\n", ctx->asid);
-
 	return 0;
 }
 
@@ -115,6 +128,37 @@ int hl_ctx_put(struct hl_ctx *ctx)
 	return kref_put(&ctx->refcount, hl_ctx_do_release);
 }
 
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct dma_fence *fence;
+
+	spin_lock(&ctx->cs_lock);
+
+	if (seq >= ctx->cs_sequence) {
+		dev_notice(hdev->dev,
+			"Can't wait on seq %llu because current CS is at seq %llu\n",
+			seq, ctx->cs_sequence);
+		spin_unlock(&ctx->cs_lock);
+		return ERR_PTR(-EINVAL);
+	}
+
+
+	if (seq + HL_MAX_PENDING_CS < ctx->cs_sequence) {
+		dev_dbg(hdev->dev,
+			"Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
+			seq, ctx->cs_sequence);
+		spin_unlock(&ctx->cs_lock);
+		return NULL;
+	}
+
+	fence = dma_fence_get(
+			ctx->cs_pending[seq & (HL_MAX_PENDING_CS - 1)]);
+	spin_unlock(&ctx->cs_lock);
+
+	return fence;
+}
+
 /*
  * hl_ctx_mgr_init - initialize the context manager
  *
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 2aa8a68cdf76..cc5f068df597 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -30,6 +30,8 @@ static void hpriv_release(struct kref *ref)
 
 	put_pid(hpriv->taskpid);
 
+	mutex_destroy(&hpriv->restore_phase_mutex);
+
 	kfree(hpriv);
 
 	/* Now the FD is really closed */
@@ -208,6 +210,8 @@ static int device_early_init(struct hl_device *hdev)
 
 	mutex_init(&hdev->fd_open_cnt_lock);
 	mutex_init(&hdev->send_cpu_message_lock);
+	INIT_LIST_HEAD(&hdev->hw_queues_mirror_list);
+	spin_lock_init(&hdev->hw_queues_mirror_lock);
 	atomic_set(&hdev->in_reset, 0);
 	atomic_set(&hdev->fd_open_cnt, 0);
 
@@ -593,6 +597,9 @@ again:
 	 */
 	hdev->asic_funcs->halt_engines(hdev, hard_reset);
 
+	/* Go over all the queues, release all CS and their jobs */
+	hl_cs_rollback_all(hdev);
+
 	if (hard_reset) {
 		/* Release kernel context */
 		if (hl_ctx_put(hdev->kernel_ctx) != 1) {
@@ -616,6 +623,12 @@ again:
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_reset(hdev, &hdev->completion_queue[i]);
 
+	/* Make sure the setup phase for the user context will run again */
+	if (hdev->user_ctx) {
+		atomic_set(&hdev->user_ctx->thread_restore_token, 1);
+		hdev->user_ctx->thread_restore_wait_token = 0;
+	}
+
 	/* Finished tear-down, starting to re-initialize */
 
 	if (hard_reset) {
@@ -952,6 +965,9 @@ void hl_device_fini(struct hl_device *hdev)
 	 */
 	hdev->asic_funcs->halt_engines(hdev, true);
 
+	/* Go over all the queues, release all CS and their jobs */
+	hl_cs_rollback_all(hdev);
+
 	hl_cb_pool_fini(hdev);
 
 	/* Release kernel context */
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 1fe1d6a1ff9e..e3878fd7dc94 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -95,6 +95,19 @@ static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
 		"goya cq 4", "goya cpu eq"
 };
 
+static u16 goya_packet_sizes[MAX_PACKET_ID] = {
+	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
+	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
+	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
+	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
+	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
+	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
+	[PACKET_FENCE]		= sizeof(struct packet_fence),
+	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
+	[PACKET_NOP]		= sizeof(struct packet_nop),
+	[PACKET_STOP]		= sizeof(struct packet_stop)
+};
+
 static const char *goya_axi_name[GOYA_MAX_INITIATORS] = {
 	"MME0",
 	"MME1",
@@ -2978,6 +2991,84 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
 	return base;
 }
 
+int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	struct packet_msg_prot *fence_pkt;
+	u32 *fence_ptr;
+	dma_addr_t fence_dma_addr;
+	struct hl_cb *cb;
+	u32 tmp;
+	int rc;
+
+	if (!hdev->asic_funcs->is_device_idle(hdev)) {
+		dev_err_ratelimited(hdev->dev,
+			"Can't send KMD job on QMAN0 if device is not idle\n");
+		return -EFAULT;
+	}
+
+	fence_ptr = hdev->asic_funcs->dma_pool_zalloc(hdev, 4, GFP_KERNEL,
+							&fence_dma_addr);
+	if (!fence_ptr) {
+		dev_err(hdev->dev,
+			"Failed to allocate fence memory for QMAN0\n");
+		return -ENOMEM;
+	}
+
+	*fence_ptr = 0;
+
+	if (goya->hw_cap_initialized & HW_CAP_MMU) {
+		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
+		RREG32(mmDMA_QM_0_GLBL_PROT);
+	}
+
+	/*
+	 * goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
+	 * synchronized kernel jobs we only need space for 1 packet_msg_prot
+	 */
+	job->job_cb_size -= sizeof(struct packet_msg_prot);
+
+	cb = job->patched_cb;
+
+	fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
+			job->job_cb_size - sizeof(struct packet_msg_prot));
+
+	fence_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+			(1 << GOYA_PKT_CTL_EB_SHIFT) |
+			(1 << GOYA_PKT_CTL_MB_SHIFT);
+	fence_pkt->value = GOYA_QMAN0_FENCE_VAL;
+	fence_pkt->addr = fence_dma_addr +
+			hdev->asic_prop.host_phys_base_address;
+
+	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_DMA_0,
+					job->job_cb_size, cb->bus_address);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
+		goto free_fence_ptr;
+	}
+
+	rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) fence_ptr,
+					HL_DEVICE_TIMEOUT_USEC, &tmp);
+
+	hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_DMA_0);
+
+	if ((rc) || (tmp != GOYA_QMAN0_FENCE_VAL)) {
+		dev_err(hdev->dev, "QMAN0 Job hasn't finished in time\n");
+		rc = -ETIMEDOUT;
+	}
+
+free_fence_ptr:
+	hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
+					fence_dma_addr);
+
+	if (goya->hw_cap_initialized & HW_CAP_MMU) {
+		WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
+		RREG32(mmDMA_QM_0_GLBL_PROT);
+	}
+
+	return rc;
+}
+
 int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
 				u32 timeout, long *result)
 {
@@ -3214,11 +3305,985 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 			size);
 }
 
+int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg, int nents,
+			enum dma_data_direction dir)
+{
+	if (!dma_map_sg(&hdev->pdev->dev, sg, nents, dir))
+		return -ENOMEM;
+
+	return 0;
+}
+
+void goya_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	dma_unmap_sg(&hdev->pdev->dev, sg, nents, dir);
+}
+
+u32 goya_get_dma_desc_list_size(struct hl_device *hdev,
+					struct sg_table *sgt)
+{
+	struct scatterlist *sg, *sg_next_iter;
+	u32 count, len, dma_desc_cnt, len_next;
+	dma_addr_t addr, addr_next;
+
+	dma_desc_cnt = 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+
+		len = sg_dma_len(sg);
+		addr = sg_dma_address(sg);
+
+		if (len == 0)
+			break;
+
+		while ((count + 1) < sgt->nents) {
+			sg_next_iter = sg_next(sg);
+			len_next = sg_dma_len(sg_next_iter);
+			addr_next = sg_dma_address(sg_next_iter);
+
+			if (len_next == 0)
+				break;
+
+			if ((addr + len == addr_next) &&
+				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
+				len += len_next;
+				count++;
+				sg = sg_next_iter;
+			} else {
+				break;
+			}
+		}
+
+		dma_desc_cnt++;
+	}
+
+	return dma_desc_cnt * sizeof(struct packet_lin_dma);
+}
+
+static int goya_pin_memory_before_cs(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt,
+				u64 addr, enum dma_data_direction dir)
+{
+	struct hl_userptr *userptr;
+	int rc;
+
+	if (hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
+			parser->job_userptr_list, &userptr))
+		goto already_pinned;
+
+	userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
+	if (!userptr)
+		return -ENOMEM;
+
+	rc = hl_pin_host_memory(hdev, addr, user_dma_pkt->tsize, userptr);
+	if (rc)
+		goto free_userptr;
+
+	list_add_tail(&userptr->job_node, parser->job_userptr_list);
+
+	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+					userptr->sgt->nents, dir);
+	if (rc) {
+		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+		goto unpin_memory;
+	}
+
+	userptr->dma_mapped = true;
+	userptr->dir = dir;
+
+already_pinned:
+	parser->patched_cb_size +=
+			goya_get_dma_desc_list_size(hdev, userptr->sgt);
+
+	return 0;
+
+unpin_memory:
+	hl_unpin_host_memory(hdev, userptr);
+free_userptr:
+	kfree(userptr);
+	return rc;
+}
+
+static int goya_validate_dma_pkt_host(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt)
+{
+	u64 device_memory_addr, addr;
+	enum dma_data_direction dir;
+	enum goya_dma_direction user_dir;
+	bool sram_addr = true;
+	bool skip_host_mem_pin = false;
+	bool user_memset;
+	int rc = 0;
+
+	user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+	user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
+
+	switch (user_dir) {
+	case DMA_HOST_TO_DRAM:
+		dev_dbg(hdev->dev, "DMA direction is HOST --> DRAM\n");
+		dir = DMA_TO_DEVICE;
+		sram_addr = false;
+		addr = user_dma_pkt->src_addr;
+		device_memory_addr = user_dma_pkt->dst_addr;
+		if (user_memset)
+			skip_host_mem_pin = true;
+		break;
+
+	case DMA_DRAM_TO_HOST:
+		dev_dbg(hdev->dev, "DMA direction is DRAM --> HOST\n");
+		dir = DMA_FROM_DEVICE;
+		sram_addr = false;
+		addr = user_dma_pkt->dst_addr;
+		device_memory_addr = user_dma_pkt->src_addr;
+		break;
+
+	case DMA_HOST_TO_SRAM:
+		dev_dbg(hdev->dev, "DMA direction is HOST --> SRAM\n");
+		dir = DMA_TO_DEVICE;
+		addr = user_dma_pkt->src_addr;
+		device_memory_addr = user_dma_pkt->dst_addr;
+		if (user_memset)
+			skip_host_mem_pin = true;
+		break;
+
+	case DMA_SRAM_TO_HOST:
+		dev_dbg(hdev->dev, "DMA direction is SRAM --> HOST\n");
+		dir = DMA_FROM_DEVICE;
+		addr = user_dma_pkt->dst_addr;
+		device_memory_addr = user_dma_pkt->src_addr;
+		break;
+	default:
+		dev_err(hdev->dev, "DMA direction is undefined\n");
+		return -EFAULT;
+	}
+
+	if (parser->ctx_id != HL_KERNEL_ASID_ID) {
+		if (sram_addr) {
+			if (!hl_mem_area_inside_range(device_memory_addr,
+					user_dma_pkt->tsize,
+					hdev->asic_prop.sram_user_base_address,
+					hdev->asic_prop.sram_end_address)) {
+
+				dev_err(hdev->dev,
+					"SRAM address 0x%llx + 0x%x is invalid\n",
+					device_memory_addr,
+					user_dma_pkt->tsize);
+				return -EFAULT;
+			}
+		} else {
+			if (!hl_mem_area_inside_range(device_memory_addr,
+					user_dma_pkt->tsize,
+					hdev->asic_prop.dram_user_base_address,
+					hdev->asic_prop.dram_end_address)) {
+
+				dev_err(hdev->dev,
+					"DRAM address 0x%llx + 0x%x is invalid\n",
+					device_memory_addr,
+					user_dma_pkt->tsize);
+				return -EFAULT;
+			}
+		}
+	}
+
+	if (skip_host_mem_pin)
+		parser->patched_cb_size += sizeof(*user_dma_pkt);
+	else {
+		if ((dir == DMA_TO_DEVICE) &&
+				(parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1)) {
+			dev_err(hdev->dev,
+				"Can't DMA from host on queue other then 1\n");
+			return -EFAULT;
+		}
+
+		rc = goya_pin_memory_before_cs(hdev, parser, user_dma_pkt,
+						addr, dir);
+	}
+
+	return rc;
+}
+
+static int goya_validate_dma_pkt_no_host(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt)
+{
+	u64 sram_memory_addr, dram_memory_addr;
+	enum goya_dma_direction user_dir;
+
+	user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+	if (user_dir == DMA_DRAM_TO_SRAM) {
+		dev_dbg(hdev->dev, "DMA direction is DRAM --> SRAM\n");
+		dram_memory_addr = user_dma_pkt->src_addr;
+		sram_memory_addr = user_dma_pkt->dst_addr;
+	} else {
+		dev_dbg(hdev->dev, "DMA direction is SRAM --> DRAM\n");
+		sram_memory_addr = user_dma_pkt->src_addr;
+		dram_memory_addr = user_dma_pkt->dst_addr;
+	}
+
+	if (!hl_mem_area_inside_range(sram_memory_addr, user_dma_pkt->tsize,
+				hdev->asic_prop.sram_user_base_address,
+				hdev->asic_prop.sram_end_address)) {
+		dev_err(hdev->dev, "SRAM address 0x%llx + 0x%x is invalid\n",
+			sram_memory_addr, user_dma_pkt->tsize);
+		return -EFAULT;
+	}
+
+	if (!hl_mem_area_inside_range(dram_memory_addr, user_dma_pkt->tsize,
+				hdev->asic_prop.dram_user_base_address,
+				hdev->asic_prop.dram_end_address)) {
+		dev_err(hdev->dev, "DRAM address 0x%llx + 0x%x is invalid\n",
+			dram_memory_addr, user_dma_pkt->tsize);
+		return -EFAULT;
+	}
+
+	parser->patched_cb_size += sizeof(*user_dma_pkt);
+
+	return 0;
+}
+
+static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt)
+{
+	enum goya_dma_direction user_dir;
+	int rc;
+
+	dev_dbg(hdev->dev, "DMA packet details:\n");
+	dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
+	dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
+	dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+
+	user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+	/*
+	 * Special handling for DMA with size 0. The H/W has a bug where
+	 * this can cause the QMAN DMA to get stuck, so block it here.
+	 */
+	if (user_dma_pkt->tsize == 0) {
+		dev_err(hdev->dev,
+			"Got DMA with size 0, might reset the device\n");
+		return -EINVAL;
+	}
+
+	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM))
+		rc = goya_validate_dma_pkt_no_host(hdev, parser, user_dma_pkt);
+	else
+		rc = goya_validate_dma_pkt_host(hdev, parser, user_dma_pkt);
+
+	return rc;
+}
+
+static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt)
+{
+	dev_dbg(hdev->dev, "DMA packet details:\n");
+	dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
+	dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
+	dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+
+	/*
+	 * WA for HW-23.
+	 * We can't allow user to read from Host using QMANs other than 1.
+	 */
+	if (parser->hw_queue_id > GOYA_QUEUE_ID_DMA_1 &&
+		hl_mem_area_inside_range(user_dma_pkt->src_addr,
+				user_dma_pkt->tsize,
+				hdev->asic_prop.va_space_host_start_address,
+				hdev->asic_prop.va_space_host_end_address)) {
+		dev_err(hdev->dev,
+			"Can't DMA from host on queue other then 1\n");
+		return -EFAULT;
+	}
+
+	if (user_dma_pkt->tsize == 0) {
+		dev_err(hdev->dev,
+			"Got DMA with size 0, might reset the device\n");
+		return -EINVAL;
+	}
+
+	parser->patched_cb_size += sizeof(*user_dma_pkt);
+
+	return 0;
+}
+
+static int goya_validate_wreg32(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_wreg32 *wreg_pkt)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	u32 sob_start_addr, sob_end_addr;
+	u16 reg_offset;
+
+	reg_offset = wreg_pkt->ctl & GOYA_PKT_WREG32_CTL_REG_OFFSET_MASK;
+
+	dev_dbg(hdev->dev, "WREG32 packet details:\n");
+	dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
+	dev_dbg(hdev->dev, "value      == 0x%x\n", wreg_pkt->value);
+
+	if (reg_offset != (mmDMA_CH_1_WR_COMP_ADDR_LO & 0xFFFF)) {
+		dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
+			reg_offset);
+		return -EPERM;
+	}
+
+	/*
+	 * With MMU, DMA channels are not secured, so it doesn't matter where
+	 * the WR COMP will be written to because it will go out with
+	 * non-secured property
+	 */
+	if (goya->hw_cap_initialized & HW_CAP_MMU)
+		return 0;
+
+	sob_start_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_0);
+	sob_end_addr = lower_32_bits(CFG_BASE + mmSYNC_MNGR_SOB_OBJ_1023);
+
+	if ((wreg_pkt->value < sob_start_addr) ||
+			(wreg_pkt->value > sob_end_addr)) {
+
+		dev_err(hdev->dev, "WREG32 packet with illegal value 0x%x\n",
+			wreg_pkt->value);
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static int goya_validate_cb(struct hl_device *hdev,
+			struct hl_cs_parser *parser, bool is_mmu)
+{
+	u32 cb_parsed_length = 0;
+	int rc = 0;
+
+	parser->patched_cb_size = 0;
+
+	/* cb_user_size is more than 0 so loop will always be executed */
+	while (cb_parsed_length < parser->user_cb_size) {
+		enum packet_id pkt_id;
+		u16 pkt_size;
+		void *user_pkt;
+
+		user_pkt = (void *) (uintptr_t)
+			(parser->user_cb->kernel_address + cb_parsed_length);
+
+		pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+				PACKET_HEADER_PACKET_ID_MASK) >>
+					PACKET_HEADER_PACKET_ID_SHIFT);
+
+		pkt_size = goya_packet_sizes[pkt_id];
+		cb_parsed_length += pkt_size;
+		if (cb_parsed_length > parser->user_cb_size) {
+			dev_err(hdev->dev,
+				"packet 0x%x is out of CB boundary\n", pkt_id);
+			rc = -EINVAL;
+			break;
+		}
+
+		switch (pkt_id) {
+		case PACKET_WREG_32:
+			/*
+			 * Although it is validated after copy in patch_cb(),
+			 * need to validate here as well because patch_cb() is
+			 * not called in MMU path while this function is called
+			 */
+			rc = goya_validate_wreg32(hdev, parser, user_pkt);
+			break;
+
+		case PACKET_WREG_BULK:
+			dev_err(hdev->dev,
+				"User not allowed to use WREG_BULK\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_MSG_PROT:
+			dev_err(hdev->dev,
+				"User not allowed to use MSG_PROT\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_CP_DMA:
+			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_STOP:
+			dev_err(hdev->dev, "User not allowed to use STOP\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_LIN_DMA:
+			if (is_mmu)
+				rc = goya_validate_dma_pkt_mmu(hdev, parser,
+						user_pkt);
+			else
+				rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
+						user_pkt);
+			break;
+
+		case PACKET_MSG_LONG:
+		case PACKET_MSG_SHORT:
+		case PACKET_FENCE:
+		case PACKET_NOP:
+			parser->patched_cb_size += pkt_size;
+			break;
+
+		default:
+			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
+				pkt_id);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc)
+			break;
+	}
+
+	/*
+	 * The new CB should have space at the end for two MSG_PROT packets:
+	 * 1. A packet that will act as a completion packet
+	 * 2. A packet that will generate MSI-X interrupt
+	 */
+	parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+
+	return rc;
+}
+
+static int goya_patch_dma_packet(struct hl_device *hdev,
+				struct hl_cs_parser *parser,
+				struct packet_lin_dma *user_dma_pkt,
+				struct packet_lin_dma *new_dma_pkt,
+				u32 *new_dma_pkt_size)
+{
+	struct hl_userptr *userptr;
+	struct scatterlist *sg, *sg_next_iter;
+	u32 count, len, dma_desc_cnt, len_next;
+	dma_addr_t dma_addr, dma_addr_next;
+	enum goya_dma_direction user_dir;
+	u64 device_memory_addr, addr;
+	enum dma_data_direction dir;
+	struct sg_table *sgt;
+	bool skip_host_mem_pin = false;
+	bool user_memset;
+	u32 user_rdcomp_mask, user_wrcomp_mask;
+
+	user_dir = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
+
+	user_memset = (user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
+			GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
+
+	if ((user_dir == DMA_DRAM_TO_SRAM) || (user_dir == DMA_SRAM_TO_DRAM) ||
+			(user_dma_pkt->tsize == 0)) {
+		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*new_dma_pkt));
+		*new_dma_pkt_size = sizeof(*new_dma_pkt);
+		return 0;
+	}
+
+	if ((user_dir == DMA_HOST_TO_DRAM) || (user_dir == DMA_HOST_TO_SRAM)) {
+		addr = user_dma_pkt->src_addr;
+		device_memory_addr = user_dma_pkt->dst_addr;
+		dir = DMA_TO_DEVICE;
+		if (user_memset)
+			skip_host_mem_pin = true;
+	} else {
+		addr = user_dma_pkt->dst_addr;
+		device_memory_addr = user_dma_pkt->src_addr;
+		dir = DMA_FROM_DEVICE;
+	}
+
+	if ((!skip_host_mem_pin) &&
+		(hl_userptr_is_pinned(hdev, addr, user_dma_pkt->tsize,
+			parser->job_userptr_list, &userptr) == false)) {
+		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
+				addr, user_dma_pkt->tsize);
+		return -EFAULT;
+	}
+
+	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
+		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
+		*new_dma_pkt_size = sizeof(*user_dma_pkt);
+		return 0;
+	}
+
+	user_rdcomp_mask =
+			(user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK);
+
+	user_wrcomp_mask =
+			(user_dma_pkt->ctl & GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
+
+	sgt = userptr->sgt;
+	dma_desc_cnt = 0;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, count) {
+		len = sg_dma_len(sg);
+		dma_addr = sg_dma_address(sg);
+
+		if (len == 0)
+			break;
+
+		while ((count + 1) < sgt->nents) {
+			sg_next_iter = sg_next(sg);
+			len_next = sg_dma_len(sg_next_iter);
+			dma_addr_next = sg_dma_address(sg_next_iter);
+
+			if (len_next == 0)
+				break;
+
+			if ((dma_addr + len == dma_addr_next) &&
+				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
+				len += len_next;
+				count++;
+				sg = sg_next_iter;
+			} else {
+				break;
+			}
+		}
+
+		new_dma_pkt->ctl = user_dma_pkt->ctl;
+		if (likely(dma_desc_cnt))
+			new_dma_pkt->ctl &= ~GOYA_PKT_CTL_EB_MASK;
+		new_dma_pkt->ctl &= ~(GOYA_PKT_LIN_DMA_CTL_RDCOMP_MASK |
+					GOYA_PKT_LIN_DMA_CTL_WRCOMP_MASK);
+		new_dma_pkt->tsize = len;
+
+		dma_addr += hdev->asic_prop.host_phys_base_address;
+
+		if (dir == DMA_TO_DEVICE) {
+			new_dma_pkt->src_addr = dma_addr;
+			new_dma_pkt->dst_addr = device_memory_addr;
+		} else {
+			new_dma_pkt->src_addr = device_memory_addr;
+			new_dma_pkt->dst_addr = dma_addr;
+		}
+
+		if (!user_memset)
+			device_memory_addr += len;
+		dma_desc_cnt++;
+		new_dma_pkt++;
+	}
+
+	if (!dma_desc_cnt) {
+		dev_err(hdev->dev,
+			"Error of 0 SG entries when patching DMA packet\n");
+		return -EFAULT;
+	}
+
+	/* Fix the last dma packet - rdcomp/wrcomp must be as user set them */
+	new_dma_pkt--;
+	new_dma_pkt->ctl |= (user_rdcomp_mask | user_wrcomp_mask);
+
+	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
+
+	return 0;
+}
+
+static int goya_patch_cb(struct hl_device *hdev,
+				struct hl_cs_parser *parser)
+{
+	u32 cb_parsed_length = 0;
+	u32 cb_patched_cur_length = 0;
+	int rc = 0;
+
+	/* cb_user_size is more than 0 so loop will always be executed */
+	while (cb_parsed_length < parser->user_cb_size) {
+		enum packet_id pkt_id;
+		u16 pkt_size;
+		u32 new_pkt_size = 0;
+		void *user_pkt, *kernel_pkt;
+
+		user_pkt = (void *) (uintptr_t)
+			(parser->user_cb->kernel_address + cb_parsed_length);
+		kernel_pkt = (void *) (uintptr_t)
+			(parser->patched_cb->kernel_address +
+					cb_patched_cur_length);
+
+		pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+				PACKET_HEADER_PACKET_ID_MASK) >>
+					PACKET_HEADER_PACKET_ID_SHIFT);
+
+		pkt_size = goya_packet_sizes[pkt_id];
+		cb_parsed_length += pkt_size;
+		if (cb_parsed_length > parser->user_cb_size) {
+			dev_err(hdev->dev,
+				"packet 0x%x is out of CB boundary\n", pkt_id);
+			rc = -EINVAL;
+			break;
+		}
+
+		switch (pkt_id) {
+		case PACKET_LIN_DMA:
+			rc = goya_patch_dma_packet(hdev, parser, user_pkt,
+						kernel_pkt, &new_pkt_size);
+			cb_patched_cur_length += new_pkt_size;
+			break;
+
+		case PACKET_WREG_32:
+			memcpy(kernel_pkt, user_pkt, pkt_size);
+			cb_patched_cur_length += pkt_size;
+			rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
+			break;
+
+		case PACKET_WREG_BULK:
+			dev_err(hdev->dev,
+				"User not allowed to use WREG_BULK\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_MSG_PROT:
+			dev_err(hdev->dev,
+				"User not allowed to use MSG_PROT\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_CP_DMA:
+			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_STOP:
+			dev_err(hdev->dev, "User not allowed to use STOP\n");
+			rc = -EPERM;
+			break;
+
+		case PACKET_MSG_LONG:
+		case PACKET_MSG_SHORT:
+		case PACKET_FENCE:
+		case PACKET_NOP:
+			memcpy(kernel_pkt, user_pkt, pkt_size);
+			cb_patched_cur_length += pkt_size;
+			break;
+
+		default:
+			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
+				pkt_id);
+			rc = -EINVAL;
+			break;
+		}
+
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
+static int goya_parse_cb_mmu(struct hl_device *hdev,
+		struct hl_cs_parser *parser)
+{
+	u64 patched_cb_handle;
+	u32 patched_cb_size;
+	struct hl_cb *user_cb;
+	int rc;
+
+	/*
+	 * The new CB should have space at the end for two MSG_PROT pkt:
+	 * 1. A packet that will act as a completion packet
+	 * 2. A packet that will generate MSI-X interrupt
+	 */
+	parser->patched_cb_size = parser->user_cb_size +
+			sizeof(struct packet_msg_prot) * 2;
+
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
+				parser->patched_cb_size,
+				&patched_cb_handle, HL_KERNEL_ASID_ID);
+
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to allocate patched CB for DMA CS %d\n",
+			rc);
+		return rc;
+	}
+
+	patched_cb_handle >>= PAGE_SHIFT;
+	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
+				(u32) patched_cb_handle);
+	/* hl_cb_get should never fail here so use kernel WARN */
+	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
+			(u32) patched_cb_handle);
+	if (!parser->patched_cb) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * The check that parser->user_cb_size <= parser->user_cb->size was done
+	 * in validate_queue_index().
+	 */
+	memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
+		(void *) (uintptr_t) parser->user_cb->kernel_address,
+		parser->user_cb_size);
+
+	patched_cb_size = parser->patched_cb_size;
+
+	/* validate patched CB instead of user CB */
+	user_cb = parser->user_cb;
+	parser->user_cb = parser->patched_cb;
+	rc = goya_validate_cb(hdev, parser, true);
+	parser->user_cb = user_cb;
+
+	if (rc) {
+		hl_cb_put(parser->patched_cb);
+		goto out;
+	}
+
+	if (patched_cb_size != parser->patched_cb_size) {
+		dev_err(hdev->dev, "user CB size mismatch\n");
+		hl_cb_put(parser->patched_cb);
+		rc = -EINVAL;
+		goto out;
+	}
+
+out:
+	/*
+	 * Always call cb destroy here because we still have 1 reference
+	 * to it by calling cb_get earlier. After the job will be completed,
+	 * cb_put will release it, but here we want to remove it from the
+	 * idr
+	 */
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
+					patched_cb_handle << PAGE_SHIFT);
+
+	return rc;
+}
+
+int goya_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser)
+{
+	u64 patched_cb_handle;
+	int rc;
+
+	rc = goya_validate_cb(hdev, parser, false);
+
+	if (rc)
+		goto free_userptr;
+
+	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
+				parser->patched_cb_size,
+				&patched_cb_handle, HL_KERNEL_ASID_ID);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to allocate patched CB for DMA CS %d\n", rc);
+		goto free_userptr;
+	}
+
+	patched_cb_handle >>= PAGE_SHIFT;
+	parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
+				(u32) patched_cb_handle);
+	/* hl_cb_get should never fail here so use kernel WARN */
+	WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
+			(u32) patched_cb_handle);
+	if (!parser->patched_cb) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = goya_patch_cb(hdev, parser);
+
+	if (rc)
+		hl_cb_put(parser->patched_cb);
+
+out:
+	/*
+	 * Always call cb destroy here because we still have 1 reference
+	 * to it by calling cb_get earlier. After the job will be completed,
+	 * cb_put will release it, but here we want to remove it from the
+	 * idr
+	 */
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
+				patched_cb_handle << PAGE_SHIFT);
+
+free_userptr:
+	if (rc)
+		hl_userptr_delete_list(hdev, parser->job_userptr_list);
+	return rc;
+}
+
+int goya_parse_cb_no_ext_quque(struct hl_device *hdev,
+		struct hl_cs_parser *parser)
+{
+	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
+	struct goya_device *goya = hdev->asic_specific;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU)) {
+		/* For internal queue jobs, just check if cb address is valid */
+		if (hl_mem_area_inside_range(
+				(u64) (uintptr_t) parser->user_cb,
+				parser->user_cb_size,
+				asic_prop->sram_user_base_address,
+				asic_prop->sram_end_address))
+			return 0;
+
+		if (hl_mem_area_inside_range(
+				(u64) (uintptr_t) parser->user_cb,
+				parser->user_cb_size,
+				asic_prop->dram_user_base_address,
+				asic_prop->dram_end_address))
+			return 0;
+
+		dev_err(hdev->dev,
+			"Internal CB address 0x%llx + 0x%x is not in SRAM nor in DRAM\n",
+			(u64) (uintptr_t) parser->user_cb,
+			parser->user_cb_size);
+
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	if (!parser->ext_queue)
+		return goya_parse_cb_no_ext_quque(hdev, parser);
+
+	if ((goya->hw_cap_initialized & HW_CAP_MMU) && parser->use_virt_addr)
+		return goya_parse_cb_mmu(hdev, parser);
+	else
+		return goya_parse_cb_no_mmu(hdev, parser);
+}
+
+void goya_add_end_of_cb_packets(u64 kernel_address, u32 len, u64 cq_addr,
+				u32 cq_val, u32 msix_vec)
+{
+	struct packet_msg_prot *cq_pkt;
+
+	cq_pkt = (struct packet_msg_prot *) (uintptr_t)
+		(kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
+
+	cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+			(1 << GOYA_PKT_CTL_EB_SHIFT) |
+			(1 << GOYA_PKT_CTL_MB_SHIFT);
+	cq_pkt->value = cq_val;
+	cq_pkt->addr = cq_addr;
+
+	cq_pkt++;
+
+	cq_pkt->ctl = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) |
+			(1 << GOYA_PKT_CTL_MB_SHIFT);
+	cq_pkt->value = msix_vec & 0x7FF;
+	cq_pkt->addr = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
+}
+
 static void goya_update_eq_ci(struct hl_device *hdev, u32 val)
 {
 	WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
 }
 
+int goya_context_switch(struct hl_device *hdev, u32 asid)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct packet_lin_dma *clear_sram_pkt;
+	struct hl_cs_parser parser;
+	struct hl_cs_job *job;
+	u32 cb_size;
+	struct hl_cb *cb;
+	int rc;
+
+	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	if (!cb)
+		return -EFAULT;
+
+	clear_sram_pkt = (struct packet_lin_dma *)
+					(uintptr_t) cb->kernel_address;
+
+	memset(clear_sram_pkt, 0, sizeof(*clear_sram_pkt));
+	cb_size = sizeof(*clear_sram_pkt);
+
+	clear_sram_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+		(DMA_HOST_TO_SRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
+		(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+		(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+		(1 << GOYA_PKT_CTL_RB_SHIFT) |
+		(1 << GOYA_PKT_CTL_MB_SHIFT));
+
+	clear_sram_pkt->src_addr = 0x7777777777777777ull;
+	clear_sram_pkt->dst_addr = prop->sram_base_address;
+	if (hdev->pldm)
+		clear_sram_pkt->tsize = 0x10000;
+	else
+		clear_sram_pkt->tsize = prop->sram_size;
+
+	job = hl_cs_allocate_job(hdev, true);
+	if (!job) {
+		dev_err(hdev->dev, "Failed to allocate a new job\n");
+		rc = -ENOMEM;
+		goto release_cb;
+	}
+
+	job->id = 0;
+	job->user_cb = cb;
+	job->user_cb->cs_cnt++;
+	job->user_cb_size = cb_size;
+	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
+
+	parser.ctx_id = HL_KERNEL_ASID_ID;
+	parser.cs_sequence = 0;
+	parser.job_id = job->id;
+	parser.hw_queue_id = job->hw_queue_id;
+	parser.job_userptr_list = &job->userptr_list;
+	parser.user_cb = job->user_cb;
+	parser.user_cb_size = job->user_cb_size;
+	parser.ext_queue = job->ext_queue;
+	parser.use_virt_addr = hdev->mmu_enable;
+
+	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to parse kernel CB during context switch\n");
+		goto free_job;
+	}
+
+	job->patched_cb = parser.patched_cb;
+	job->job_cb_size = parser.patched_cb_size;
+	job->patched_cb->cs_cnt++;
+
+	rc = goya_send_job_on_qman0(hdev, job);
+
+	job->patched_cb->cs_cnt--;
+	hl_cb_put(job->patched_cb);
+
+free_job:
+	hl_userptr_delete_list(hdev, &job->userptr_list);
+	kfree(job);
+	cb->cs_cnt--;
+
+release_cb:
+	hl_cb_put(cb);
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+	return rc;
+}
+
+void goya_restore_phase_topology(struct hl_device *hdev)
+{
+	int i, num_of_sob_in_longs, num_of_mon_in_longs;
+
+	num_of_sob_in_longs =
+		((mmSYNC_MNGR_SOB_OBJ_1023 - mmSYNC_MNGR_SOB_OBJ_0) + 4);
+
+	num_of_mon_in_longs =
+		((mmSYNC_MNGR_MON_STATUS_255 - mmSYNC_MNGR_MON_STATUS_0) + 4);
+
+	for (i = 0 ; i < num_of_sob_in_longs ; i += 4)
+		WREG32(mmSYNC_MNGR_SOB_OBJ_0 + i, 0);
+
+	for (i = 0 ; i < num_of_mon_in_longs ; i += 4)
+		WREG32(mmSYNC_MNGR_MON_STATUS_0 + i, 0);
+
+	/* Flush all WREG to prevent race */
+	i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
+}
+
 static void goya_get_axi_name(struct hl_device *hdev, u32 agent_id,
 		u16 event_type, char *axi_name, int len)
 {
@@ -3608,6 +4673,59 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
 
 }
 
+static bool goya_is_device_idle(struct hl_device *hdev)
+{
+	u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+	int i;
+
+	offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;
+
+	for (i = 0 ; i < DMA_MAX_NUM ; i++) {
+		dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+
+		if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
+				DMA_QM_IDLE_MASK)
+			return false;
+	}
+
+	offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
+
+	for (i = 0 ; i < TPC_MAX_NUM ; i++) {
+		tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
+		tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
+		tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
+
+		if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
+				TPC_QM_IDLE_MASK)
+			return false;
+
+		if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
+				TPC_CMDQ_IDLE_MASK)
+			return false;
+
+		if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
+				TPC_CFG_IDLE_MASK)
+			return false;
+	}
+
+	if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
+			MME_QM_IDLE_MASK)
+		return false;
+
+	if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
+			MME_CMDQ_IDLE_MASK)
+		return false;
+
+	if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
+			MME_ARCH_IDLE_MASK)
+		return false;
+
+	if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
+		return false;
+
+	return true;
+}
+
 static void goya_hw_queues_lock(struct hl_device *hdev)
 {
 	struct goya_device *goya = hdev->asic_specific;
@@ -3700,7 +4818,14 @@ static const struct hl_asic_funcs goya_funcs = {
 	.dma_pool_free = goya_dma_pool_free,
 	.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
 	.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
+	.hl_dma_unmap_sg = goya_dma_unmap_sg,
+	.cs_parser = goya_cs_parser,
+	.asic_dma_map_sg = goya_dma_map_sg,
+	.get_dma_desc_list_size = goya_get_dma_desc_list_size,
+	.add_end_of_cb_packets = goya_add_end_of_cb_packets,
 	.update_eq_ci = goya_update_eq_ci,
+	.context_switch = goya_context_switch,
+	.restore_phase_topology = goya_restore_phase_topology,
 	.add_device_attr = goya_add_device_attr,
 	.handle_eqe = goya_handle_eqe,
 	.set_pll_profile = goya_set_pll_profile,
@@ -3708,6 +4833,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.send_heartbeat = goya_send_heartbeat,
 	.enable_clock_gating = goya_init_clock_gating,
 	.disable_clock_gating = goya_disable_clock_gating,
+	.is_device_idle = goya_is_device_idle,
 	.soft_reset_late_init = goya_soft_reset_late_init,
 	.hw_queues_lock = goya_hw_queues_lock,
 	.hw_queues_unlock = goya_hw_queues_unlock,
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 744e37bbc2a6..9adc7c6ec08b 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -16,6 +16,9 @@
 #include <linux/cdev.h>
 #include <linux/iopoll.h>
 #include <linux/irqreturn.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-direction.h>
+#include <linux/scatterlist.h>
 
 #define HL_NAME				"habanalabs"
 
@@ -31,6 +34,11 @@
 
 #define HL_MAX_QUEUES			128
 
+#define HL_MAX_JOBS_PER_CS		64
+
+/* MUST BE POWER OF 2 and larger than 1 */
+#define HL_MAX_PENDING_CS		64
+
 struct hl_device;
 struct hl_fpriv;
 
@@ -61,6 +69,16 @@ struct hw_queue_properties {
 	u8			kmd_only;
 };
 
+/**
+ * enum vm_type_t - virtual memory mapping request information.
+ * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
+ * @VM_TYPE_PHYS_LIST: mapping of DRAM memory to device virtual address.
+ */
+enum vm_type_t {
+	VM_TYPE_USERPTR,
+	VM_TYPE_PHYS_LIST
+};
+
 /**
  * enum hl_device_hw_state - H/W device state. use this to understand whether
  *                           to do reset before hw_init or not
@@ -147,6 +165,19 @@ struct asic_fixed_properties {
 	u8			tpc_enabled_mask;
 };
 
+/**
+ * struct hl_dma_fence - wrapper for fence object used by command submissions.
+ * @base_fence: kernel fence object.
+ * @lock: spinlock to protect fence.
+ * @hdev: habanalabs device structure.
+ * @cs_seq: command submission sequence number.
+ */
+struct hl_dma_fence {
+	struct dma_fence	base_fence;
+	spinlock_t		lock;
+	struct hl_device	*hdev;
+	u64			cs_seq;
+};
 
 /*
  * Command Buffers
@@ -175,6 +206,7 @@ struct hl_cb_mgr {
  * @mmap_size: Holds the CB's size that was mmaped.
  * @size: holds the CB's size.
  * @id: the CB's ID.
+ * @cs_cnt: holds number of CS that this CB participates in.
  * @ctx_id: holds the ID of the owner's context.
  * @mmap: true if the CB is currently mmaped to user.
  * @is_pool: true if CB was acquired from the pool, false otherwise.
@@ -189,6 +221,7 @@ struct hl_cb {
 	u32			mmap_size;
 	u32			size;
 	u32			id;
+	u32			cs_cnt;
 	u32			ctx_id;
 	u8			mmap;
 	u8			is_pool;
@@ -313,6 +346,8 @@ enum hl_asic_type {
 	ASIC_INVALID
 };
 
+struct hl_cs_parser;
+
 /**
  * enum hl_pm_mng_profile - power management profile.
  * @PM_AUTO: internal clock is set by KMD.
@@ -372,7 +407,14 @@ enum hl_pll_frequency {
  * @dma_pool_free: free small DMA allocation from pool.
  * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
  * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
+ * @hl_dma_unmap_sg: DMA unmap scatter-gather list.
+ * @cs_parser: parse Command Submission.
+ * @asic_dma_map_sg: DMA map scatter-gather list.
+ * @get_dma_desc_list_size: get number of LIN_DMA packets required for CB.
+ * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it.
  * @update_eq_ci: update event queue CI.
+ * @context_switch: called upon ASID context switch.
+ * @restore_phase_topology: clear all SOBs amd MONs.
  * @add_device_attr: add ASIC specific device attributes.
  * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
  * @set_pll_profile: change PLL profile (manual/automatic).
@@ -380,6 +422,7 @@ enum hl_pll_frequency {
  * @send_heartbeat: send is-alive packet to ArmCP and verify response.
  * @enable_clock_gating: enable clock gating for reducing power consumption.
  * @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @is_device_idle: return true if device is idle, false otherwise.
  * @soft_reset_late_init: perform certain actions needed after soft reset.
  * @hw_queues_lock: acquire H/W queues lock.
  * @hw_queues_unlock: release H/W queues lock.
@@ -419,7 +462,20 @@ struct hl_asic_funcs {
 				size_t size, dma_addr_t *dma_handle);
 	void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
 				size_t size, void *vaddr);
+	void (*hl_dma_unmap_sg)(struct hl_device *hdev,
+				struct scatterlist *sg, int nents,
+				enum dma_data_direction dir);
+	int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser);
+	int (*asic_dma_map_sg)(struct hl_device *hdev,
+				struct scatterlist *sg, int nents,
+				enum dma_data_direction dir);
+	u32 (*get_dma_desc_list_size)(struct hl_device *hdev,
+					struct sg_table *sgt);
+	void (*add_end_of_cb_packets)(u64 kernel_address, u32 len, u64 cq_addr,
+					u32 cq_val, u32 msix_num);
 	void (*update_eq_ci)(struct hl_device *hdev, u32 val);
+	int (*context_switch)(struct hl_device *hdev, u32 asid);
+	void (*restore_phase_topology)(struct hl_device *hdev);
 	void (*add_device_attr)(struct hl_device *hdev,
 				struct attribute_group *dev_attr_grp);
 	void (*handle_eqe)(struct hl_device *hdev,
@@ -430,6 +486,7 @@ struct hl_asic_funcs {
 	int (*send_heartbeat)(struct hl_device *hdev);
 	void (*enable_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
+	bool (*is_device_idle)(struct hl_device *hdev);
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -453,12 +510,28 @@ struct hl_asic_funcs {
  * @hdev: pointer to the device structure.
  * @refcount: reference counter for the context. Context is released only when
  *		this hits 0l. It is incremented on CS and CS_WAIT.
+ * @cs_pending: array of DMA fence objects representing pending CS.
+ * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
+ *			to user so user could inquire about CS. It is used as
+ *			index to cs_pending array.
+ * @cs_lock: spinlock to protect cs_sequence.
+ * @thread_restore_token: token to prevent multiple threads of the same context
+ *				from running the restore phase. Only one thread
+ *				should run it.
+ * @thread_restore_wait_token: token to prevent the threads that didn't run
+ *				the restore phase from moving to their execution
+ *				phase before the restore phase has finished.
  * @asid: context's unique address space ID in the device's MMU.
  */
 struct hl_ctx {
 	struct hl_fpriv		*hpriv;
 	struct hl_device	*hdev;
 	struct kref		refcount;
+	struct dma_fence	*cs_pending[HL_MAX_PENDING_CS];
+	u64			cs_sequence;
+	spinlock_t		cs_lock;
+	atomic_t		thread_restore_token;
+	u32			thread_restore_wait_token;
 	u32			asid;
 };
 
@@ -473,14 +546,129 @@ struct hl_ctx_mgr {
 };
 
 
+
+/*
+ * COMMAND SUBMISSIONS
+ */
+
+/**
+ * struct hl_userptr - memory mapping chunk information
+ * @vm_type: type of the VM.
+ * @job_node: linked-list node for hanging the object on the Job's list.
+ * @vec: pointer to the frame vector.
+ * @sgt: pointer to the scatter-gather table that holds the pages.
+ * @dir: for DMA unmapping, the direction must be supplied, so save it.
+ * @debugfs_list: node in debugfs list of command submissions.
+ * @addr: user-space virtual pointer to the start of the memory area.
+ * @size: size of the memory area to pin & map.
+ * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
+ */
+struct hl_userptr {
+	enum vm_type_t		vm_type; /* must be first */
+	struct list_head	job_node;
+	struct frame_vector	*vec;
+	struct sg_table		*sgt;
+	enum dma_data_direction dir;
+	struct list_head	debugfs_list;
+	u64			addr;
+	u32			size;
+	u8			dma_mapped;
+};
+
+/**
+ * struct hl_cs - command submission.
+ * @jobs_in_queue_cnt: per each queue, maintain counter of submitted jobs.
+ * @ctx: the context this CS belongs to.
+ * @job_list: list of the CS's jobs in the various queues.
+ * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
+ * @refcount: reference counter for usage of the CS.
+ * @fence: pointer to the fence object of this CS.
+ * @work_tdr: delayed work node for TDR.
+ * @mirror_node : node in device mirror list of command submissions.
+ * @sequence: the sequence number of this CS.
+ * @submitted: true if CS was submitted to H/W.
+ * @completed: true if CS was completed by device.
+ * @timedout : true if CS was timedout.
+ * @tdr_active: true if TDR was activated for this CS (to prevent
+ *		double TDR activation).
+ * @aborted: true if CS was aborted due to some device error.
+ */
+struct hl_cs {
+	u8			jobs_in_queue_cnt[HL_MAX_QUEUES];
+	struct hl_ctx		*ctx;
+	struct list_head	job_list;
+	spinlock_t		job_lock;
+	struct kref		refcount;
+	struct dma_fence	*fence;
+	struct delayed_work	work_tdr;
+	struct list_head	mirror_node;
+	u64			sequence;
+	u8			submitted;
+	u8			completed;
+	u8			timedout;
+	u8			tdr_active;
+	u8			aborted;
+};
+
 /**
  * struct hl_cs_job - command submission job.
+ * @cs_node: the node to hang on the CS jobs list.
+ * @cs: the CS this job belongs to.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ *		the queue instead of the CB we got from the IOCTL.
  * @finish_work: workqueue object to run when job is completed.
+ * @userptr_list: linked-list of userptr mappings that belong to this job and
+ *			wait for completion.
  * @id: the id of this job inside a CS.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @ext_queue: whether the job is for external queue or internal queue.
  */
 struct hl_cs_job {
+	struct list_head	cs_node;
+	struct hl_cs		*cs;
+	struct hl_cb		*user_cb;
+	struct hl_cb		*patched_cb;
 	struct work_struct	finish_work;
+	struct list_head	userptr_list;
 	u32			id;
+	u32			hw_queue_id;
+	u32			user_cb_size;
+	u32			job_cb_size;
+	u8			ext_queue;
+};
+
+/**
+ * struct hl_cs_parser - command submission paerser properties.
+ * @user_cb: the CB we got from the user.
+ * @patched_cb: in case of patching, this is internal CB which is submitted on
+ *		the queue instead of the CB we got from the IOCTL.
+ * @job_userptr_list: linked-list of userptr mappings that belong to the related
+ *			job and wait for completion.
+ * @cs_sequence: the sequence number of the related CS.
+ * @ctx_id: the ID of the context the related CS belongs to.
+ * @hw_queue_id: the id of the H/W queue this job is submitted to.
+ * @user_cb_size: the actual size of the CB we got from the user.
+ * @patched_cb_size: the size of the CB after parsing.
+ * @ext_queue: whether the job is for external queue or internal queue.
+ * @job_id: the id of the related job inside the related CS.
+ * @use_virt_addr: whether to treat the addresses in the CB as virtual during
+ *			parsing.
+ */
+struct hl_cs_parser {
+	struct hl_cb		*user_cb;
+	struct hl_cb		*patched_cb;
+	struct list_head	*job_userptr_list;
+	u64			cs_sequence;
+	u32			ctx_id;
+	u32			hw_queue_id;
+	u32			user_cb_size;
+	u32			patched_cb_size;
+	u8			ext_queue;
+	u8			job_id;
+	u8			use_virt_addr;
 };
 
 
@@ -497,6 +685,7 @@ struct hl_cs_job {
  * @ctx_mgr: context manager to handle multiple context for this FD.
  * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
  * @refcount: number of related contexts.
+ * @restore_phase_mutex: lock for context switch and restore phase.
  */
 struct hl_fpriv {
 	struct hl_device	*hdev;
@@ -506,6 +695,7 @@ struct hl_fpriv {
 	struct hl_ctx_mgr	ctx_mgr;
 	struct hl_cb_mgr	cb_mgr;
 	struct kref		refcount;
+	struct mutex		restore_phase_mutex;
 };
 
 
@@ -577,6 +767,8 @@ struct hl_device_reset_work {
  * @eq_wq: work queue of event queue for executing work in process context.
  * @kernel_ctx: KMD context structure.
  * @kernel_queues: array of hl_hw_queue.
+ * @hw_queues_mirror_list: CS mirror list for TDR.
+ * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
  * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
  * @event_queue: event queue for IRQ from ArmCP.
  * @dma_pool: DMA pool for small allocations.
@@ -604,6 +796,7 @@ struct hl_device_reset_work {
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @fd_open_cnt: number of open user processes.
+ * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, KMD will restore this
  *             value and update the F/W after the re-initialization
@@ -617,7 +810,10 @@ struct hl_device_reset_work {
  * @hwmon_initialized: is H/W monitor sensors was initialized.
  * @hard_reset_pending: is there a hard reset work pending.
  * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
+ * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
+ *                   otherwise.
  * @init_done: is the initialization of the device done.
+ * @mmu_enable: is MMU enabled.
  */
 struct hl_device {
 	struct pci_dev			*pdev;
@@ -634,6 +830,8 @@ struct hl_device {
 	struct workqueue_struct		*eq_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
+	struct list_head		hw_queues_mirror_list;
+	spinlock_t			hw_queues_mirror_lock;
 	struct hl_cb_mgr		kernel_cb_mgr;
 	struct hl_eq			event_queue;
 	struct dma_pool			*dma_pool;
@@ -661,6 +859,7 @@ struct hl_device {
 	atomic_t			in_reset;
 	atomic_t			curr_pll_profile;
 	atomic_t			fd_open_cnt;
+	u64				timeout_jiffies;
 	u64				max_power;
 	u32				major;
 	u32				high_pll;
@@ -672,9 +871,11 @@ struct hl_device {
 	u8				hwmon_initialized;
 	u8				hard_reset_pending;
 	u8				heartbeat;
+	u8				reset_on_lockup;
 	u8				init_done;
 
 	/* Parameters for bring-up */
+	u8				mmu_enable;
 	u8				cpu_enable;
 	u8				reset_pcilink;
 	u8				cpu_queues_enable;
@@ -712,6 +913,58 @@ struct hl_ioctl_desc {
  * Kernel module functions that can be accessed by entire module
  */
 
+/**
+ * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area is inside the valid range, false otherwise.
+ */
+static inline bool hl_mem_area_inside_range(u64 address, u32 size,
+				u64 range_start_address, u64 range_end_address)
+{
+	u64 end_address = address + size;
+
+	if ((address >= range_start_address) &&
+			(end_address <= range_end_address) &&
+			(end_address > address))
+		return true;
+
+	return false;
+}
+
+/**
+ * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
+ * @address: The start address of the area we want to validate.
+ * @size: The size in bytes of the area we want to validate.
+ * @range_start_address: The start address of the valid range.
+ * @range_end_address: The end address of the valid range.
+ *
+ * Return: true if the area overlaps part or all of the valid range,
+ *		false otherwise.
+ */
+static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
+				u64 range_start_address, u64 range_end_address)
+{
+	u64 end_address = address + size;
+
+	if ((address >= range_start_address) &&
+			(address < range_end_address))
+		return true;
+
+	if ((end_address >= range_start_address) &&
+			(end_address < range_end_address))
+		return true;
+
+	if ((address < range_start_address) &&
+			(end_address >= range_end_address))
+		return true;
+
+	return false;
+}
+
 int hl_device_open(struct inode *inode, struct file *filp);
 bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
 int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
@@ -725,8 +978,10 @@ int hl_hw_queues_create(struct hl_device *hdev);
 void hl_hw_queues_destroy(struct hl_device *hdev);
 int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 				u32 cb_size, u64 cb_ptr);
+int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
+void hl_int_hw_queue_update_ci(struct hl_cs *cs);
 void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
 
 #define hl_queue_inc_ptr(p)		hl_hw_queue_add_ptr(p, 1)
@@ -740,6 +995,8 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q);
 void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q);
 irqreturn_t hl_irq_handler_cq(int irq, void *arg);
 irqreturn_t hl_irq_handler_eq(int irq, void *arg);
+u32 hl_cq_inc_ptr(u32 ptr);
+
 int hl_asid_init(struct hl_device *hdev);
 void hl_asid_fini(struct hl_device *hdev);
 unsigned long hl_asid_alloc(struct hl_device *hdev);
@@ -748,9 +1005,13 @@ void hl_asid_free(struct hl_device *hdev, unsigned long asid);
 int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv);
 void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx);
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx);
+void hl_ctx_do_release(struct kref *ref);
+void hl_ctx_get(struct hl_device *hdev,	struct hl_ctx *ctx);
 int hl_ctx_put(struct hl_ctx *ctx);
+struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
 void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
+
 int hl_device_init(struct hl_device *hdev, struct class *hclass);
 void hl_device_fini(struct hl_device *hdev);
 int hl_device_suspend(struct hl_device *hdev);
@@ -782,8 +1043,20 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size);
 int hl_cb_pool_init(struct hl_device *hdev);
 int hl_cb_pool_fini(struct hl_device *hdev);
 
+void hl_cs_rollback_all(struct hl_device *hdev);
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
+
 void goya_set_asic_funcs(struct hl_device *hdev);
 
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
+			struct hl_userptr *userptr);
+int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_userptr_delete_list(struct hl_device *hdev,
+				struct list_head *userptr_list);
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
+				struct list_head *userptr_list,
+				struct hl_userptr **userptr);
+
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
 void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
 long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
@@ -799,5 +1072,7 @@ void hl_set_max_power(struct hl_device *hdev, u64 value);
 /* IOCTLs */
 long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
 
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index b0bf77af1e40..77a1cc85e530 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -24,6 +24,17 @@ static struct class *hl_class;
 DEFINE_IDR(hl_devs_idr);
 DEFINE_MUTEX(hl_devs_idr_lock);
 
+static int timeout_locked = 5;
+static int reset_on_lockup = 1;
+
+module_param(timeout_locked, int, 0444);
+MODULE_PARM_DESC(timeout_locked,
+	"Device lockup timeout in seconds (0 = disabled, default 5s)");
+
+module_param(reset_on_lockup, int, 0444);
+MODULE_PARM_DESC(reset_on_lockup,
+	"Do device reset on lockup (0 = no, 1 = yes, default yes)");
+
 #define PCI_VENDOR_ID_HABANALABS	0x1da3
 
 #define PCI_IDS_GOYA			0x0001
@@ -113,6 +124,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hpriv->hdev = hdev;
 	filp->private_data = hpriv;
 	hpriv->filp = filp;
+	mutex_init(&hpriv->restore_phase_mutex);
 	kref_init(&hpriv->refcount);
 	nonseekable_open(inode, filp);
 
@@ -140,6 +152,7 @@ out_err:
 	filp->private_data = NULL;
 	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
+	mutex_destroy(&hpriv->restore_phase_mutex);
 	kfree(hpriv);
 
 close_device:
@@ -172,8 +185,10 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 		return -ENOMEM;
 
 	hdev->major = hl_major;
+	hdev->reset_on_lockup = reset_on_lockup;
 
 	/* Parameters for bring-up - set them to defaults */
+	hdev->mmu_enable = 0;
 	hdev->cpu_enable = 1;
 	hdev->reset_pcilink = 0;
 	hdev->cpu_queues_enable = 1;
@@ -193,6 +208,11 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	if (!hdev->cpu_queues_enable)
 		hdev->heartbeat = 0;
 
+	if (timeout_locked)
+		hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
+	else
+		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
+
 	hdev->disabled = true;
 	hdev->pdev = pdev; /* can be NULL in case of simulator device */
 
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index e56a51f6bab6..481db1a5e97e 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -16,7 +16,9 @@
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
 
 static const struct hl_ioctl_desc hl_ioctls[] = {
-	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl)
+	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
+	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
+	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl)
 };
 
 #define HL_CORE_IOCTL_COUNT	ARRAY_SIZE(hl_ioctls)
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 2ec43f36cdb8..68dfda59a875 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -34,6 +34,29 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
 		return (abs(delta) - queue_len);
 }
 
+void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+{
+	struct hl_device *hdev = cs->ctx->hdev;
+	struct hl_hw_queue *q;
+	int i;
+
+	hdev->asic_funcs->hw_queues_lock(hdev);
+
+	if (hdev->disabled)
+		goto out;
+
+	q = &hdev->kernel_queues[0];
+	for (i = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+		if (q->queue_type == QUEUE_TYPE_INT) {
+			q->ci += cs->jobs_in_queue_cnt[i];
+			q->ci &= ((q->int_queue_len << 1) - 1);
+		}
+	}
+
+out:
+	hdev->asic_funcs->hw_queues_unlock(hdev);
+}
+
 /*
  * ext_queue_submit_bd - Submit a buffer descriptor to an external queue
  *
@@ -119,6 +142,37 @@ static int ext_queue_sanity_checks(struct hl_device *hdev,
 	return 0;
 }
 
+/*
+ * int_queue_sanity_checks - perform some sanity checks on internal queue
+ *
+ * @hdev              : pointer to hl_device structure
+ * @q                 :	pointer to hl_hw_queue structure
+ * @num_of_entries    : how many entries to check for space
+ *
+ * H/W queues spinlock should be taken before calling this function
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the h/w queue
+ *
+ */
+static int int_queue_sanity_checks(struct hl_device *hdev,
+					struct hl_hw_queue *q,
+					int num_of_entries)
+{
+	int free_slots_cnt;
+
+	/* Check we have enough space in the queue */
+	free_slots_cnt = queue_free_slots(q, q->int_queue_len);
+
+	if (free_slots_cnt < num_of_entries) {
+		dev_dbg(hdev->dev, "Queue %d doesn't have room for %d CBs\n",
+			q->hw_queue_id, num_of_entries);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
 /*
  * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
  *
@@ -165,6 +219,184 @@ out:
 	return rc;
 }
 
+/*
+ * ext_hw_queue_schedule_job - submit an JOB to an external queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
+{
+	struct hl_device *hdev = job->cs->ctx->hdev;
+	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+	struct hl_cq_entry cq_pkt;
+	struct hl_cq *cq;
+	u64 cq_addr;
+	struct hl_cb *cb;
+	u32 ctl;
+	u32 len;
+	u64 ptr;
+
+	/*
+	 * Update the JOB ID inside the BD CTL so the device would know what
+	 * to write in the completion queue
+	 */
+	ctl = ((q->pi << BD_CTL_SHADOW_INDEX_SHIFT) & BD_CTL_SHADOW_INDEX_MASK);
+
+	cb = job->patched_cb;
+	len = job->job_cb_size;
+	ptr = cb->bus_address;
+
+	cq_pkt.data = (q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
+					& CQ_ENTRY_SHADOW_INDEX_MASK;
+	cq_pkt.data |= 1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT;
+	cq_pkt.data |= 1 << CQ_ENTRY_READY_SHIFT;
+
+	/*
+	 * No need to protect pi_offset because scheduling to the
+	 * H/W queues is done under the scheduler mutex
+	 *
+	 * No need to check if CQ is full because it was already
+	 * checked in hl_queue_sanity_checks
+	 */
+	cq = &hdev->completion_queue[q->hw_queue_id];
+	cq_addr = cq->bus_address +
+			hdev->asic_prop.host_phys_base_address;
+	cq_addr += cq->pi * sizeof(struct hl_cq_entry);
+
+	hdev->asic_funcs->add_end_of_cb_packets(cb->kernel_address, len,
+				cq_addr, cq_pkt.data, q->hw_queue_id);
+
+	q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
+
+	cq->pi = hl_cq_inc_ptr(cq->pi);
+
+	ext_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
+ * int_hw_queue_schedule_job - submit an JOB to an internal queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void int_hw_queue_schedule_job(struct hl_cs_job *job)
+{
+	struct hl_device *hdev = job->cs->ctx->hdev;
+	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+	struct hl_bd bd;
+	u64 *pi, *pbd = (u64 *) &bd;
+
+	bd.ctl = 0;
+	bd.len = job->job_cb_size;
+	bd.ptr = (u64) (uintptr_t) job->user_cb;
+
+	pi = (u64 *) (uintptr_t) (q->kernel_address +
+		((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
+
+	pi[0] = pbd[0];
+	pi[1] = pbd[1];
+
+	q->pi++;
+	q->pi &= ((q->int_queue_len << 1) - 1);
+
+	/* Flush PQ entry write. Relevant only for specific ASICs */
+	hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
+
+	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
+}
+
+/*
+ * hl_hw_queue_schedule_cs - schedule a command submission
+ *
+ * @job        : pointer to the CS
+ *
+ */
+int hl_hw_queue_schedule_cs(struct hl_cs *cs)
+{
+	struct hl_device *hdev = cs->ctx->hdev;
+	struct hl_cs_job *job, *tmp;
+	struct hl_hw_queue *q;
+	int rc = 0, i, cq_cnt;
+
+	hdev->asic_funcs->hw_queues_lock(hdev);
+
+	if (hl_device_disabled_or_in_reset(hdev)) {
+		dev_err(hdev->dev,
+			"device is disabled or in reset, CS rejected!\n");
+		rc = -EPERM;
+		goto out;
+	}
+
+	q = &hdev->kernel_queues[0];
+	/* This loop assumes all external queues are consecutive */
+	for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
+		if (q->queue_type == QUEUE_TYPE_EXT) {
+			if (cs->jobs_in_queue_cnt[i]) {
+				rc = ext_queue_sanity_checks(hdev, q,
+					cs->jobs_in_queue_cnt[i], true);
+				if (rc)
+					goto unroll_cq_resv;
+				cq_cnt++;
+			}
+		} else if (q->queue_type == QUEUE_TYPE_INT) {
+			if (cs->jobs_in_queue_cnt[i]) {
+				rc = int_queue_sanity_checks(hdev, q,
+					cs->jobs_in_queue_cnt[i]);
+				if (rc)
+					goto unroll_cq_resv;
+			}
+		}
+	}
+
+	spin_lock(&hdev->hw_queues_mirror_lock);
+	list_add_tail(&cs->mirror_node, &hdev->hw_queues_mirror_list);
+
+	/* Queue TDR if the CS is the first entry and if timeout is wanted */
+	if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
+			(list_first_entry(&hdev->hw_queues_mirror_list,
+					struct hl_cs, mirror_node) == cs)) {
+		cs->tdr_active = true;
+		schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
+		spin_unlock(&hdev->hw_queues_mirror_lock);
+	} else {
+		spin_unlock(&hdev->hw_queues_mirror_lock);
+	}
+
+	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) {
+		if (job->ext_queue)
+			ext_hw_queue_schedule_job(job);
+		else
+			int_hw_queue_schedule_job(job);
+	}
+
+	cs->submitted = true;
+
+	goto out;
+
+unroll_cq_resv:
+	/* This loop assumes all external queues are consecutive */
+	q = &hdev->kernel_queues[0];
+	for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
+		if ((q->queue_type == QUEUE_TYPE_EXT) &&
+				(cs->jobs_in_queue_cnt[i])) {
+			atomic_t *free_slots =
+				&hdev->completion_queue[i].free_slots_cnt;
+			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
+			cq_cnt--;
+		}
+	}
+
+out:
+	hdev->asic_funcs->hw_queues_unlock(hdev);
+
+	return rc;
+}
+
 /*
  * hl_hw_queue_inc_ci_kernel - increment ci for kernel's queue
  *
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
new file mode 100644
index 000000000000..ad14376a1c25
--- /dev/null
+++ b/drivers/misc/habanalabs/memory.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+
+/*
+ * hl_pin_host_memory - pins a chunk of host memory
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @addr                : the user-space virtual address of the memory area
+ * @size                : the size of the memory area
+ * @userptr	        : pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Pins the physical pages
+ * - Create a SG list from those pages
+ */
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
+			struct hl_userptr *userptr)
+{
+	u64 start, end;
+	u32 npages, offset;
+	int rc;
+
+	if (!size) {
+		dev_err(hdev->dev, "size to pin is invalid - %d\n",
+			size);
+		return -EINVAL;
+	}
+
+	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
+		dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n",
+			addr);
+		return -EFAULT;
+	}
+
+	/*
+	 * If the combination of the address and size requested for this memory
+	 * region causes an integer overflow, return error.
+	 */
+	if (((addr + size) < addr) ||
+			PAGE_ALIGN(addr + size) < (addr + size)) {
+		dev_err(hdev->dev,
+			"user pointer 0x%llx + %u causes integer overflow\n",
+			addr, size);
+		return -EINVAL;
+	}
+
+	start = addr & PAGE_MASK;
+	offset = addr & ~PAGE_MASK;
+	end = PAGE_ALIGN(addr + size);
+	npages = (end - start) >> PAGE_SHIFT;
+
+	userptr->size = size;
+	userptr->addr = addr;
+	userptr->dma_mapped = false;
+	INIT_LIST_HEAD(&userptr->job_node);
+
+	userptr->vec = frame_vector_create(npages);
+	if (!userptr->vec) {
+		dev_err(hdev->dev, "Failed to create frame vector\n");
+		return -ENOMEM;
+	}
+
+	rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+				userptr->vec);
+
+	if (rc != npages) {
+		dev_err(hdev->dev,
+			"Failed to map host memory, user ptr probably wrong\n");
+		if (rc < 0)
+			goto destroy_framevec;
+		rc = -EFAULT;
+		goto put_framevec;
+	}
+
+	if (frame_vector_to_pages(userptr->vec) < 0) {
+		dev_err(hdev->dev,
+			"Failed to translate frame vector to pages\n");
+		rc = -EFAULT;
+		goto put_framevec;
+	}
+
+	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+	if (!userptr->sgt) {
+		rc = -ENOMEM;
+		goto put_framevec;
+	}
+
+	rc = sg_alloc_table_from_pages(userptr->sgt,
+					frame_vector_pages(userptr->vec),
+					npages, offset, size, GFP_ATOMIC);
+	if (rc < 0) {
+		dev_err(hdev->dev, "failed to create SG table from pages\n");
+		goto free_sgt;
+	}
+
+	return 0;
+
+free_sgt:
+	kfree(userptr->sgt);
+put_framevec:
+	put_vaddr_frames(userptr->vec);
+destroy_framevec:
+	frame_vector_destroy(userptr->vec);
+	return rc;
+}
+
+/*
+ * hl_unpin_host_memory - unpins a chunk of host memory
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @userptr             : pointer to hl_userptr structure
+ *
+ * This function does the following:
+ * - Unpins the physical pages related to the host memory
+ * - Free the SG list
+ */
+int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+	struct page **pages;
+
+	if (userptr->dma_mapped)
+		hdev->asic_funcs->hl_dma_unmap_sg(hdev,
+				userptr->sgt->sgl,
+				userptr->sgt->nents,
+				userptr->dir);
+
+	pages = frame_vector_pages(userptr->vec);
+	if (!IS_ERR(pages)) {
+		int i;
+
+		for (i = 0; i < frame_vector_count(userptr->vec); i++)
+			set_page_dirty_lock(pages[i]);
+	}
+	put_vaddr_frames(userptr->vec);
+	frame_vector_destroy(userptr->vec);
+
+	list_del(&userptr->job_node);
+
+	sg_free_table(userptr->sgt);
+	kfree(userptr->sgt);
+
+	return 0;
+}
+
+/*
+ * hl_userptr_delete_list - clear userptr list
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @userptr_list        : pointer to the list to clear
+ *
+ * This function does the following:
+ * - Iterates over the list and unpins the host memory and frees the userptr
+ *   structure.
+ */
+void hl_userptr_delete_list(struct hl_device *hdev,
+				struct list_head *userptr_list)
+{
+	struct hl_userptr *userptr, *tmp;
+
+	list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) {
+		hl_unpin_host_memory(hdev, userptr);
+		kfree(userptr);
+	}
+
+	INIT_LIST_HEAD(userptr_list);
+}
+
+/*
+ * hl_userptr_is_pinned - returns whether the given userptr is pinned
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @userptr_list        : pointer to the list to clear
+ * @userptr             : pointer to userptr to check
+ *
+ * This function does the following:
+ * - Iterates over the list and checks if the given userptr is in it, means is
+ *   pinned. If so, returns true, otherwise returns false.
+ */
+bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
+				u32 size, struct list_head *userptr_list,
+				struct hl_userptr **userptr)
+{
+	list_for_each_entry((*userptr), userptr_list, job_node) {
+		if ((addr == (*userptr)->addr) && (size == (*userptr)->size))
+			return true;
+	}
+
+	return false;
+}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 756266cf0416..fba49417f607 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -73,6 +73,95 @@ union hl_cb_args {
 	struct hl_cb_out out;
 };
 
+/*
+ * This structure size must always be fixed to 64-bytes for backward
+ * compatibility
+ */
+struct hl_cs_chunk {
+	/*
+	 * For external queue, this represents a Handle of CB on the Host
+	 * For internal queue, this represents an SRAM or DRAM address of the
+	 * internal CB
+	 */
+	__u64 cb_handle;
+	/* Index of queue to put the CB on */
+	__u32 queue_index;
+	/*
+	 * Size of command buffer with valid packets
+	 * Can be smaller then actual CB size
+	 */
+	__u32 cb_size;
+	/* HL_CS_CHUNK_FLAGS_* */
+	__u32 cs_chunk_flags;
+	/* Align structure to 64 bytes */
+	__u32 pad[11];
+};
+
+#define HL_CS_FLAGS_FORCE_RESTORE	0x1
+
+#define HL_CS_STATUS_SUCCESS		0
+
+struct hl_cs_in {
+	/* this holds address of array of hl_cs_chunk for restore phase */
+	__u64 chunks_restore;
+	/* this holds address of array of hl_cs_chunk for execution phase */
+	__u64 chunks_execute;
+	/* this holds address of array of hl_cs_chunk for store phase -
+	 * Currently not in use
+	 */
+	__u64 chunks_store;
+	/* Number of chunks in restore phase array */
+	__u32 num_chunks_restore;
+	/* Number of chunks in execution array */
+	__u32 num_chunks_execute;
+	/* Number of chunks in restore phase array - Currently not in use */
+	__u32 num_chunks_store;
+	/* HL_CS_FLAGS_* */
+	__u32 cs_flags;
+	/* Context ID - Currently not in use */
+	__u32 ctx_id;
+};
+
+struct hl_cs_out {
+	/* this holds the sequence number of the CS to pass to wait ioctl */
+	__u64 seq;
+	/* HL_CS_STATUS_* */
+	__u32 status;
+	__u32 pad;
+};
+
+union hl_cs_args {
+	struct hl_cs_in in;
+	struct hl_cs_out out;
+};
+
+struct hl_wait_cs_in {
+	/* Command submission sequence number */
+	__u64 seq;
+	/* Absolute timeout to wait in microseconds */
+	__u64 timeout_us;
+	/* Context ID - Currently not in use */
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+#define HL_WAIT_CS_STATUS_COMPLETED	0
+#define HL_WAIT_CS_STATUS_BUSY		1
+#define HL_WAIT_CS_STATUS_TIMEDOUT	2
+#define HL_WAIT_CS_STATUS_ABORTED	3
+#define HL_WAIT_CS_STATUS_INTERRUPTED	4
+
+struct hl_wait_cs_out {
+	/* HL_WAIT_CS_STATUS_* */
+	__u32 status;
+	__u32 pad;
+};
+
+union hl_wait_cs_args {
+	struct hl_wait_cs_in in;
+	struct hl_wait_cs_out out;
+};
+
 /*
  * Command Buffer
  * - Request a Command Buffer
@@ -89,7 +178,74 @@ union hl_cb_args {
 #define HL_IOCTL_CB		\
 		_IOWR('H', 0x02, union hl_cb_args)
 
+/*
+ * Command Submission
+ *
+ * To submit work to the device, the user need to call this IOCTL with a set
+ * of JOBS. That set of JOBS constitutes a CS object.
+ * Each JOB will be enqueued on a specific queue, according to the user's input.
+ * There can be more then one JOB per queue.
+ *
+ * There are two types of queues - external and internal. External queues
+ * are DMA queues which transfer data from/to the Host. All other queues are
+ * internal. The driver will get completion notifications from the device only
+ * on JOBS which are enqueued in the external queues.
+ *
+ * This IOCTL is asynchronous in regard to the actual execution of the CS. This
+ * means it returns immediately after ALL the JOBS were enqueued on their
+ * relevant queues. Therefore, the user mustn't assume the CS has been completed
+ * or has even started to execute.
+ *
+ * Upon successful enqueue, the IOCTL returns an opaque handle which the user
+ * can use with the "Wait for CS" IOCTL to check whether the handle's CS
+ * external JOBS have been completed. Note that if the CS has internal JOBS
+ * which can execute AFTER the external JOBS have finished, the driver might
+ * report that the CS has finished executing BEFORE the internal JOBS have
+ * actually finish executing.
+ *
+ * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
+ * a second set is for "execution" phase and a third set is for "store" phase.
+ * The JOBS on the "restore" phase are enqueued only after context-switch
+ * (or if its the first CS for this context). The user can also order the
+ * driver to run the "restore" phase explicitly
+ *
+ */
+#define HL_IOCTL_CS			\
+		_IOWR('H', 0x03, union hl_cs_args)
+
+/*
+ * Wait for Command Submission
+ *
+ * The user can call this IOCTL with a handle it received from the CS IOCTL
+ * to wait until the handle's CS has finished executing. The user will wait
+ * inside the kernel until the CS has finished or until the user-requeusted
+ * timeout has expired.
+ *
+ * The return value of the IOCTL is a standard Linux error code. The possible
+ * values are:
+ *
+ * EINTR     - Kernel waiting has been interrupted, e.g. due to OS signal
+ *             that the user process received
+ * ETIMEDOUT - The CS has caused a timeout on the device
+ * EIO       - The CS was aborted (usually because the device was reset)
+ * ENODEV    - The device wants to do hard-reset (so user need to close FD)
+ *
+ * The driver also returns a custom define inside the IOCTL which can be:
+ *
+ * HL_WAIT_CS_STATUS_COMPLETED   - The CS has been completed successfully (0)
+ * HL_WAIT_CS_STATUS_BUSY        - The CS is still executing (0)
+ * HL_WAIT_CS_STATUS_TIMEDOUT    - The CS has caused a timeout on the device
+ *                                 (ETIMEDOUT)
+ * HL_WAIT_CS_STATUS_ABORTED     - The CS was aborted, usually because the
+ *                                 device was reset (EIO)
+ * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR)
+ *
+ */
+
+#define HL_IOCTL_WAIT_CS			\
+		_IOWR('H', 0x04, union hl_wait_cs_args)
+
 #define HL_COMMAND_START	0x02
-#define HL_COMMAND_END		0x03
+#define HL_COMMAND_END		0x05
 
 #endif /* HABANALABS_H_ */
-- 
cgit v1.2.3


From 0feaf86d4e69507ab9b2af7dcc63a6886352d5db Mon Sep 17 00:00:00 2001
From: Omer Shpigelman <oshpigelman@habana.ai>
Date: Sat, 16 Feb 2019 00:39:22 +0200
Subject: habanalabs: add virtual memory and MMU modules

This patch adds the Virtual Memory and MMU modules.

Goya has an internal MMU which provides process isolation on the internal
DDR. The internal MMU also performs translations for transactions that go
from Goya to the Host.

The driver is responsible for allocating and freeing memory on the DDR
upon user request. It also provides an interface to map and unmap DDR and
Host memory to the device address space.

The MMU in Goya supports 3-level and 4-level page tables. With 3-level, the
size of each page is 2MB, while with 4-level the size of each page is 4KB.

In the DDR, the physical pages are always 2MB.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/Makefile                   |    2 +-
 drivers/misc/habanalabs/context.c                  |   19 +-
 drivers/misc/habanalabs/device.c                   |   20 +-
 drivers/misc/habanalabs/goya/goya.c                |  395 +++++
 drivers/misc/habanalabs/habanalabs.h               |  189 ++-
 drivers/misc/habanalabs/habanalabs_drv.c           |    2 +-
 drivers/misc/habanalabs/habanalabs_ioctl.c         |    3 +-
 .../habanalabs/include/hw_ip/mmu/mmu_general.h     |   46 +
 .../misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h   |   15 +
 drivers/misc/habanalabs/memory.c                   | 1517 ++++++++++++++++++++
 drivers/misc/habanalabs/mmu.c                      |  691 +++++++++
 include/uapi/misc/habanalabs.h                     |  122 +-
 12 files changed, 3013 insertions(+), 8 deletions(-)
 create mode 100644 drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
 create mode 100644 drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
 create mode 100644 drivers/misc/habanalabs/mmu.c

(limited to 'include')

diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index d2fd0e18b1eb..fd46f8b48bab 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -6,7 +6,7 @@ obj-m	:= habanalabs.o
 
 habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
 		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
-		command_submission.o
+		command_submission.o mmu.o
 
 include $(src)/goya/Makefile
 habanalabs-y += $(HL_GOYA_FILES)
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index c3854714b46c..619ace1c4ef7 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -25,8 +25,10 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 	for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
 		dma_fence_put(ctx->cs_pending[i]);
 
-	if (ctx->asid != HL_KERNEL_ASID_ID)
+	if (ctx->asid != HL_KERNEL_ASID_ID) {
+		hl_vm_ctx_fini(ctx);
 		hl_asid_free(hdev, ctx->asid);
+	}
 }
 
 void hl_ctx_do_release(struct kref *ref)
@@ -96,6 +98,8 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 {
+	int rc = 0;
+
 	ctx->hdev = hdev;
 
 	kref_init(&ctx->refcount);
@@ -113,9 +117,22 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 			dev_err(hdev->dev, "No free ASID, failed to create context\n");
 			return -ENOMEM;
 		}
+
+		rc = hl_vm_ctx_init(ctx);
+		if (rc) {
+			dev_err(hdev->dev, "Failed to init mem ctx module\n");
+			rc = -ENOMEM;
+			goto mem_ctx_err;
+		}
 	}
 
 	return 0;
+
+mem_ctx_err:
+	if (ctx->asid != HL_KERNEL_ASID_ID)
+		hl_asid_free(hdev, ctx->asid);
+
+	return rc;
 }
 
 void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index cc5f068df597..d0929022655b 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -615,8 +615,10 @@ again:
 	/* Reset the H/W. It will be in idle state after this returns */
 	hdev->asic_funcs->hw_fini(hdev, hard_reset);
 
-	if (hard_reset)
+	if (hard_reset) {
+		hl_vm_fini(hdev);
 		hl_eq_reset(hdev, &hdev->event_queue);
+	}
 
 	/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
 	hl_hw_queue_reset(hdev, hard_reset);
@@ -677,6 +679,13 @@ again:
 			goto out_err;
 		}
 
+		rc = hl_vm_init(hdev);
+		if (rc) {
+			dev_err(hdev->dev,
+				"Failed to init memory module after hard reset\n");
+			goto out_err;
+		}
+
 		hl_set_max_power(hdev, hdev->max_power);
 
 		hdev->hard_reset_pending = false;
@@ -861,6 +870,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 		hdev->asic_name,
 		hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
 
+	rc = hl_vm_init(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to initialize memory module\n");
+		rc = 0;
+		goto out_disabled;
+	}
+
 	/*
 	 * hl_hwmon_init must be called after device_late_init, because only
 	 * there we get the information from the device about which
@@ -977,6 +993,8 @@ void hl_device_fini(struct hl_device *hdev)
 	/* Reset the H/W. It will be in idle state after this returns */
 	hdev->asic_funcs->hw_fini(hdev, true);
 
+	hl_vm_fini(hdev);
+
 	hl_eq_fini(hdev, &hdev->event_queue);
 
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e3878fd7dc94..89b82b989966 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -6,6 +6,8 @@
  */
 
 #include "goyaP.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+#include "include/hw_ip/mmu/mmu_v1_0.h"
 #include "include/goya/asic_reg/goya_masks.h"
 
 #include <linux/pci.h>
@@ -80,6 +82,7 @@
 #define GOYA_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
 #define GOYA_CPU_TIMEOUT_USEC		10000000	/* 10s */
 #define GOYA_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
+#define GOYA_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
 
 #define GOYA_QMAN0_FENCE_VAL		0xD169B243
 
@@ -131,6 +134,70 @@ static const char *goya_axi_name[GOYA_MAX_INITIATORS] = {
 	"MMU"
 };
 
+static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
+	mmDMA_QM_0_GLBL_NON_SECURE_PROPS,
+	mmDMA_QM_1_GLBL_NON_SECURE_PROPS,
+	mmDMA_QM_2_GLBL_NON_SECURE_PROPS,
+	mmDMA_QM_3_GLBL_NON_SECURE_PROPS,
+	mmDMA_QM_4_GLBL_NON_SECURE_PROPS,
+	mmTPC0_QM_GLBL_SECURE_PROPS,
+	mmTPC0_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC0_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC0_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC0_CFG_ARUSER,
+	mmTPC0_CFG_AWUSER,
+	mmTPC1_QM_GLBL_SECURE_PROPS,
+	mmTPC1_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC1_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC1_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC1_CFG_ARUSER,
+	mmTPC1_CFG_AWUSER,
+	mmTPC2_QM_GLBL_SECURE_PROPS,
+	mmTPC2_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC2_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC2_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC2_CFG_ARUSER,
+	mmTPC2_CFG_AWUSER,
+	mmTPC3_QM_GLBL_SECURE_PROPS,
+	mmTPC3_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC3_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC3_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC3_CFG_ARUSER,
+	mmTPC3_CFG_AWUSER,
+	mmTPC4_QM_GLBL_SECURE_PROPS,
+	mmTPC4_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC4_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC4_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC4_CFG_ARUSER,
+	mmTPC4_CFG_AWUSER,
+	mmTPC5_QM_GLBL_SECURE_PROPS,
+	mmTPC5_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC5_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC5_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC5_CFG_ARUSER,
+	mmTPC5_CFG_AWUSER,
+	mmTPC6_QM_GLBL_SECURE_PROPS,
+	mmTPC6_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC6_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC6_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC6_CFG_ARUSER,
+	mmTPC6_CFG_AWUSER,
+	mmTPC7_QM_GLBL_SECURE_PROPS,
+	mmTPC7_QM_GLBL_NON_SECURE_PROPS,
+	mmTPC7_CMDQ_GLBL_SECURE_PROPS,
+	mmTPC7_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmTPC7_CFG_ARUSER,
+	mmTPC7_CFG_AWUSER,
+	mmMME_QM_GLBL_SECURE_PROPS,
+	mmMME_QM_GLBL_NON_SECURE_PROPS,
+	mmMME_CMDQ_GLBL_SECURE_PROPS,
+	mmMME_CMDQ_GLBL_NON_SECURE_PROPS,
+	mmMME_SBA_CONTROL_DATA,
+	mmMME_SBB_CONTROL_DATA,
+	mmMME_SBC_CONTROL_DATA,
+	mmMME_WBC_CONTROL_DATA
+};
+
 #define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121
 
 static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
@@ -258,6 +325,10 @@ static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
 };
 
 static int goya_armcp_info_get(struct hl_device *hdev);
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+					u64 phys_addr);
 
 static void goya_get_fixed_properties(struct hl_device *hdev)
 {
@@ -296,6 +367,16 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->sram_user_base_address = prop->sram_base_address +
 						SRAM_USER_BASE_OFFSET;
 
+	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
+	if (hdev->pldm)
+		prop->mmu_pgt_size = 0x800000; /* 8MB */
+	else
+		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
+	prop->mmu_pte_size = HL_PTE_SIZE;
+	prop->mmu_hop_table_size = HOP_TABLE_SIZE;
+	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
+	prop->dram_page_size = PAGE_SIZE_2MB;
+
 	prop->host_phys_base_address = HOST_PHYS_BASE;
 	prop->va_space_host_start_address = VA_HOST_SPACE_START;
 	prop->va_space_host_end_address = VA_HOST_SPACE_END;
@@ -752,7 +833,18 @@ static int goya_late_init(struct hl_device *hdev)
 
 	goya_fetch_psoc_frequency(hdev);
 
+	rc = goya_mmu_clear_pgt_range(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
+		goto disable_pci_access;
+	}
+
 	return 0;
+
+disable_pci_access:
+	goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+
+	return rc;
 }
 
 /*
@@ -2565,6 +2657,54 @@ out:
 	return 0;
 }
 
+static int goya_mmu_init(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct goya_device *goya = hdev->asic_specific;
+	u64 hop0_addr;
+	int rc, i;
+
+	if (!hdev->mmu_enable)
+		return 0;
+
+	if (goya->hw_cap_initialized & HW_CAP_MMU)
+		return 0;
+
+	hdev->dram_supports_virtual_memory = true;
+
+	for (i = 0 ; i < prop->max_asid ; i++) {
+		hop0_addr = prop->mmu_pgt_addr +
+				(i * prop->mmu_hop_table_size);
+
+		rc = goya_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
+		if (rc) {
+			dev_err(hdev->dev,
+				"failed to set hop0 addr for asid %d\n", i);
+			goto err;
+		}
+	}
+
+	goya->hw_cap_initialized |= HW_CAP_MMU;
+
+	/* init MMU cache manage page */
+	WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
+	WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR << 40);
+
+	/* Remove follower feature due to performance bug */
+	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
+			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
+
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
+
+	WREG32(mmMMU_MMU_ENABLE, 1);
+	WREG32(mmMMU_SPI_MASK, 0xF);
+
+	return 0;
+
+err:
+	return rc;
+}
+
 /*
  * goya_hw_init - Goya hardware initialization code
  *
@@ -2614,6 +2754,10 @@ static int goya_hw_init(struct hl_device *hdev)
 		return rc;
 	}
 
+	rc = goya_mmu_init(hdev);
+	if (rc)
+		return rc;
+
 	goya_init_security(hdev);
 
 	goya_init_dma_qmans(hdev);
@@ -4249,6 +4393,10 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
 
 	rc = goya_send_job_on_qman0(hdev, job);
 
+	/* no point in setting the asid in case of failure */
+	if (!rc)
+		goya_mmu_prepare(hdev, asid);
+
 	job->patched_cb->cs_cnt--;
 	hl_cb_put(job->patched_cb);
 
@@ -4284,6 +4432,22 @@ void goya_restore_phase_topology(struct hl_device *hdev)
 	i = RREG32(mmSYNC_MNGR_SOB_OBJ_0);
 }
 
+static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	return readq(hdev->pcie_bar[DDR_BAR_ID] +
+			(addr - goya->ddr_bar_cur_addr));
+}
+
+static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
+{
+	struct goya_device *goya = hdev->asic_specific;
+
+	writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
+			(addr - goya->ddr_bar_cur_addr));
+}
+
 static void goya_get_axi_name(struct hl_device *hdev, u32 agent_id,
 		u16 event_type, char *axi_name, int len)
 {
@@ -4567,6 +4731,233 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
 	return goya->events_stat;
 }
 
+static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct goya_device *goya = hdev->asic_specific;
+	struct packet_lin_dma *clear_pgt_range_pkt;
+	struct hl_cs_parser parser;
+	struct hl_cs_job *job;
+	u32 cb_size;
+	struct hl_cb *cb;
+	int rc;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+		return 0;
+
+	cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
+	if (!cb)
+		return -EFAULT;
+
+	clear_pgt_range_pkt = (struct packet_lin_dma *)
+					(uintptr_t) cb->kernel_address;
+
+	memset(clear_pgt_range_pkt, 0, sizeof(*clear_pgt_range_pkt));
+	cb_size = sizeof(*clear_pgt_range_pkt);
+
+	clear_pgt_range_pkt->ctl =
+		((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
+		(DMA_HOST_TO_DRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
+		(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
+		(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
+		(1 << GOYA_PKT_CTL_RB_SHIFT) |
+		(1 << GOYA_PKT_CTL_MB_SHIFT));
+
+	clear_pgt_range_pkt->src_addr = 0;
+	clear_pgt_range_pkt->dst_addr = prop->mmu_pgt_addr;
+	clear_pgt_range_pkt->tsize = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
+
+	job = hl_cs_allocate_job(hdev, true);
+	if (!job) {
+		dev_err(hdev->dev, "Failed to allocate a new job\n");
+		rc = -ENOMEM;
+		goto release_cb;
+	}
+
+	job->id = 0;
+	job->user_cb = cb;
+	job->user_cb->cs_cnt++;
+	job->user_cb_size = cb_size;
+	job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
+
+	parser.ctx_id = HL_KERNEL_ASID_ID;
+	parser.cs_sequence = 0;
+	parser.job_id = job->id;
+	parser.hw_queue_id = job->hw_queue_id;
+	parser.job_userptr_list = &job->userptr_list;
+	parser.user_cb = job->user_cb;
+	parser.user_cb_size = job->user_cb_size;
+	parser.ext_queue = job->ext_queue;
+	parser.use_virt_addr = hdev->mmu_enable;
+
+	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to parse kernel CB when clearing pgt\n");
+		goto free_job;
+	}
+
+	job->patched_cb = parser.patched_cb;
+	job->job_cb_size = parser.patched_cb_size;
+	job->patched_cb->cs_cnt++;
+
+	rc = goya_send_job_on_qman0(hdev, job);
+
+	job->patched_cb->cs_cnt--;
+	hl_cb_put(job->patched_cb);
+
+free_job:
+	hl_userptr_delete_list(hdev, &job->userptr_list);
+	kfree(job);
+	cb->cs_cnt--;
+
+release_cb:
+	hl_cb_put(cb);
+	hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+	return rc;
+}
+
+static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	int i;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+		return;
+
+	if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
+		WARN(1, "asid %u is too big\n", asid);
+		return;
+	}
+
+	/* zero the MMBP and ASID bits and then set the ASID */
+	for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) {
+		WREG32_AND(goya_mmu_regs[i], ~0x7FF);
+		WREG32_OR(goya_mmu_regs[i], asid);
+	}
+}
+
+static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	u32 status, timeout_usec;
+	int rc;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+		return;
+
+	/* no need in L1 only invalidation in Goya */
+	if (!is_hard)
+		return;
+
+	if (hdev->pldm)
+		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+	else
+		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+	mutex_lock(&hdev->mmu_cache_lock);
+
+	/* L0 & L1 invalidation */
+	WREG32(mmSTLB_INV_ALL_START, 1);
+
+	rc = hl_poll_timeout(
+		hdev,
+		mmSTLB_INV_ALL_START,
+		status,
+		!status,
+		1000,
+		timeout_usec);
+
+	mutex_unlock(&hdev->mmu_cache_lock);
+
+	if (rc)
+		dev_notice_ratelimited(hdev->dev,
+			"Timeout when waiting for MMU cache invalidation\n");
+}
+
+static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
+		bool is_hard, u32 asid, u64 va, u64 size)
+{
+	struct goya_device *goya = hdev->asic_specific;
+	u32 status, timeout_usec, inv_data, pi;
+	int rc;
+
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+		return;
+
+	/* no need in L1 only invalidation in Goya */
+	if (!is_hard)
+		return;
+
+	if (hdev->pldm)
+		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+	else
+		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+	mutex_lock(&hdev->mmu_cache_lock);
+
+	/*
+	 * TODO: currently invalidate entire L0 & L1 as in regular hard
+	 * invalidation. Need to apply invalidation of specific cache lines with
+	 * mask of ASID & VA & size.
+	 * Note that L1 with be flushed entirely in any case.
+	 */
+
+	/* L0 & L1 invalidation */
+	inv_data = RREG32(mmSTLB_CACHE_INV);
+	/* PI is 8 bit */
+	pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
+	WREG32(mmSTLB_CACHE_INV,
+			(inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
+
+	rc = hl_poll_timeout(
+		hdev,
+		mmSTLB_INV_CONSUMER_INDEX,
+		status,
+		status == pi,
+		1000,
+		timeout_usec);
+
+	mutex_unlock(&hdev->mmu_cache_lock);
+
+	if (rc)
+		dev_notice_ratelimited(hdev->dev,
+			"Timeout when waiting for MMU cache invalidation\n");
+}
+
+static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+						u64 phys_addr)
+{
+	u32 status, timeout_usec;
+	int rc;
+
+	if (hdev->pldm)
+		timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+	else
+		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
+	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
+	WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
+
+	rc = hl_poll_timeout(
+		hdev,
+		MMU_ASID_BUSY,
+		status,
+		!(status & 0x80000000),
+		1000,
+		timeout_usec);
+
+	if (rc) {
+		dev_err(hdev->dev,
+			"Timeout during MMU hop0 config of asid %d\n", asid);
+		return rc;
+	}
+
+	return 0;
+}
+
 int goya_send_heartbeat(struct hl_device *hdev)
 {
 	struct goya_device *goya = hdev->asic_specific;
@@ -4830,6 +5221,10 @@ static const struct hl_asic_funcs goya_funcs = {
 	.handle_eqe = goya_handle_eqe,
 	.set_pll_profile = goya_set_pll_profile,
 	.get_events_stat = goya_get_events_stat,
+	.read_pte = goya_read_pte,
+	.write_pte = goya_write_pte,
+	.mmu_invalidate_cache = goya_mmu_invalidate_cache,
+	.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
 	.send_heartbeat = goya_send_heartbeat,
 	.enable_clock_gating = goya_init_clock_gating,
 	.disable_clock_gating = goya_disable_clock_gating,
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 9adc7c6ec08b..03085e7a12dd 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -19,6 +19,7 @@
 #include <linux/dma-fence.h>
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
+#include <linux/hashtable.h>
 
 #define HL_NAME				"habanalabs"
 
@@ -39,6 +40,31 @@
 /* MUST BE POWER OF 2 and larger than 1 */
 #define HL_MAX_PENDING_CS		64
 
+/* Memory */
+#define MEM_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
+
+/* MMU */
+#define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
+
+/**
+ * struct pgt_info - MMU hop page info.
+ * @node: hash linked-list node for the pgts hash of pgts.
+ * @addr: physical address of the pgt.
+ * @ctx: pointer to the owner ctx.
+ * @num_of_ptes: indicates how many ptes are used in the pgt.
+ *
+ * The MMU page tables hierarchy is placed on the DRAM. When a new level (hop)
+ * is needed during mapping, a new page is allocated and this structure holds
+ * its essential information. During unmapping, if no valid PTEs remained in the
+ * page, it is freed with its pgt_info structure.
+ */
+struct pgt_info {
+	struct hlist_node node;
+	u64 addr;
+	struct hl_ctx *ctx;
+	int num_of_ptes;
+};
+
 struct hl_device;
 struct hl_fpriv;
 
@@ -72,11 +98,11 @@ struct hw_queue_properties {
 /**
  * enum vm_type_t - virtual memory mapping request information.
  * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
- * @VM_TYPE_PHYS_LIST: mapping of DRAM memory to device virtual address.
+ * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
  */
 enum vm_type_t {
 	VM_TYPE_USERPTR,
-	VM_TYPE_PHYS_LIST
+	VM_TYPE_PHYS_PACK
 };
 
 /**
@@ -117,6 +143,12 @@ enum hl_device_hw_state {
  *                               mapping DRAM memory.
  * @va_space_dram_end_address: end address of virtual memory range for
  *                             mapping DRAM memory.
+ * @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
+ * @mmu_pgt_size: MMU page tables total size.
+ * @mmu_pte_size: PTE size in MMU page tables.
+ * @mmu_hop_table_size: MMU hop table size.
+ * @mmu_hop0_tables_total_size: total size of MMU hop0 tables.
+ * @dram_page_size: page size for MMU DRAM allocation.
  * @cfg_size: configuration space size on SRAM.
  * @sram_size: total size of SRAM.
  * @max_asid: maximum number of open contexts (ASIDs).
@@ -150,6 +182,12 @@ struct asic_fixed_properties {
 	u64			va_space_host_end_address;
 	u64			va_space_dram_start_address;
 	u64			va_space_dram_end_address;
+	u64			mmu_pgt_addr;
+	u32			mmu_pgt_size;
+	u32			mmu_pte_size;
+	u32			mmu_hop_table_size;
+	u32			mmu_hop0_tables_total_size;
+	u32			dram_page_size;
 	u32			cfg_size;
 	u32			sram_size;
 	u32			max_asid;
@@ -419,6 +457,12 @@ enum hl_pll_frequency {
  * @handle_eqe: handle event queue entry (IRQ) from ArmCP.
  * @set_pll_profile: change PLL profile (manual/automatic).
  * @get_events_stat: retrieve event queue entries histogram.
+ * @read_pte: read MMU page table entry from DRAM.
+ * @write_pte: write MMU page table entry to DRAM.
+ * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or
+ *                        hard (L0 & L1) flush.
+ * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
+ *                              ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to ArmCP and verify response.
  * @enable_clock_gating: enable clock gating for reducing power consumption.
  * @disable_clock_gating: disable clock for accessing registers on HBW.
@@ -483,6 +527,11 @@ struct hl_asic_funcs {
 	void (*set_pll_profile)(struct hl_device *hdev,
 			enum hl_pll_frequency freq);
 	void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
+	u64 (*read_pte)(struct hl_device *hdev, u64 addr);
+	void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
+	void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
+	void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
+			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
 	void (*enable_clock_gating)(struct hl_device *hdev);
 	void (*disable_clock_gating)(struct hl_device *hdev);
@@ -504,17 +553,40 @@ struct hl_asic_funcs {
 
 #define HL_KERNEL_ASID_ID	0
 
+/**
+ * struct hl_va_range - virtual addresses range.
+ * @lock: protects the virtual addresses list.
+ * @list: list of virtual addresses blocks available for mappings.
+ * @start_addr: range start address.
+ * @end_addr: range end address.
+ */
+struct hl_va_range {
+	struct mutex		lock;
+	struct list_head	list;
+	u64			start_addr;
+	u64			end_addr;
+};
+
 /**
  * struct hl_ctx - user/kernel context.
+ * @mem_hash: holds mapping from virtual address to virtual memory area
+ *		descriptor (hl_vm_phys_pg_list or hl_userptr).
+ * @mmu_hash: holds a mapping from virtual address to pgt_info structure.
  * @hpriv: pointer to the private (KMD) data of the process (fd).
  * @hdev: pointer to the device structure.
  * @refcount: reference counter for the context. Context is released only when
  *		this hits 0l. It is incremented on CS and CS_WAIT.
  * @cs_pending: array of DMA fence objects representing pending CS.
+ * @host_va_range: holds available virtual addresses for host mappings.
+ * @dram_va_range: holds available virtual addresses for DRAM mappings.
+ * @mem_hash_lock: protects the mem_hash.
+ * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
+ *            MMU hash or walking the PGT requires talking this lock
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
  *			to user so user could inquire about CS. It is used as
  *			index to cs_pending array.
  * @cs_lock: spinlock to protect cs_sequence.
+ * @dram_phys_mem: amount of used physical DRAM memory by this context.
  * @thread_restore_token: token to prevent multiple threads of the same context
  *				from running the restore phase. Only one thread
  *				should run it.
@@ -524,12 +596,19 @@ struct hl_asic_funcs {
  * @asid: context's unique address space ID in the device's MMU.
  */
 struct hl_ctx {
+	DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
+	DECLARE_HASHTABLE(mmu_hash, MMU_HASH_TABLE_BITS);
 	struct hl_fpriv		*hpriv;
 	struct hl_device	*hdev;
 	struct kref		refcount;
 	struct dma_fence	*cs_pending[HL_MAX_PENDING_CS];
+	struct hl_va_range	host_va_range;
+	struct hl_va_range	dram_va_range;
+	struct mutex		mem_hash_lock;
+	struct mutex		mmu_lock;
 	u64			cs_sequence;
 	spinlock_t		cs_lock;
+	atomic64_t		dram_phys_mem;
 	atomic_t		thread_restore_token;
 	u32			thread_restore_wait_token;
 	u32			asid;
@@ -672,6 +751,85 @@ struct hl_cs_parser {
 };
 
 
+/*
+ * MEMORY STRUCTURE
+ */
+
+/**
+ * struct hl_vm_hash_node - hash element from virtual address to virtual
+ *				memory area descriptor (hl_vm_phys_pg_list or
+ *				hl_userptr).
+ * @node: node to hang on the hash table in context object.
+ * @vaddr: key virtual address.
+ * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr).
+ */
+struct hl_vm_hash_node {
+	struct hlist_node	node;
+	u64			vaddr;
+	void			*ptr;
+};
+
+/**
+ * struct hl_vm_phys_pg_pack - physical page pack.
+ * @vm_type: describes the type of the virtual area descriptor.
+ * @pages: the physical page array.
+ * @mapping_cnt: number of shared mappings.
+ * @asid: the context related to this list.
+ * @npages: num physical pages in the pack.
+ * @page_size: size of each page in the pack.
+ * @total_size: total size of all the pages in this list.
+ * @flags: HL_MEM_* flags related to this list.
+ * @handle: the provided handle related to this list.
+ * @offset: offset from the first page.
+ * @contiguous: is contiguous physical memory.
+ * @created_from_userptr: is product of host virtual address.
+ */
+struct hl_vm_phys_pg_pack {
+	enum vm_type_t		vm_type; /* must be first */
+	u64			*pages;
+	atomic_t		mapping_cnt;
+	u32			asid;
+	u32			npages;
+	u32			page_size;
+	u32			total_size;
+	u32			flags;
+	u32			handle;
+	u32			offset;
+	u8			contiguous;
+	u8			created_from_userptr;
+};
+
+/**
+ * struct hl_vm_va_block - virtual range block information.
+ * @node: node to hang on the virtual range list in context object.
+ * @start: virtual range start address.
+ * @end: virtual range end address.
+ * @size: virtual range size.
+ */
+struct hl_vm_va_block {
+	struct list_head	node;
+	u64			start;
+	u64			end;
+	u64			size;
+};
+
+/**
+ * struct hl_vm - virtual memory manager for MMU.
+ * @dram_pg_pool: pool for DRAM physical pages of 2MB.
+ * @dram_pg_pool_refcount: reference counter for the pool usage.
+ * @idr_lock: protects the phys_pg_list_handles.
+ * @phys_pg_pack_handles: idr to hold all device allocations handles.
+ * @init_done: whether initialization was done. We need this because VM
+ *		initialization might be skipped during device initialization.
+ */
+struct hl_vm {
+	struct gen_pool		*dram_pg_pool;
+	struct kref		dram_pg_pool_refcount;
+	spinlock_t		idr_lock;
+	struct idr		phys_pg_pack_handles;
+	u8			init_done;
+};
+
 /*
  * FILE PRIVATE STRUCTURE
  */
@@ -787,12 +945,16 @@ struct hl_device_reset_work {
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
+ * @mmu_pgt_pool: pool of available MMU hops.
+ * @vm: virtual memory manager for MMU.
+ * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context
  * @hwmon_dev: H/W monitor device.
  * @pm_mng_profile: current power management profile.
  * @hl_chip_info: ASIC's sensors information.
  * @cb_pool: list of preallocated CBs.
  * @cb_pool_lock: protects the CB pool.
  * @user_ctx: current user context executing.
+ * @dram_used_mem: current DRAM memory consumption.
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @fd_open_cnt: number of open user processes.
@@ -812,6 +974,7 @@ struct hl_device_reset_work {
  * @heartbeat: is heartbeat sanity check towards ArmCP enabled.
  * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
  *                   otherwise.
+ * @dram_supports_virtual_memory: is MMU enabled towards DRAM.
  * @init_done: is the initialization of the device done.
  * @mmu_enable: is MMU enabled.
  */
@@ -846,6 +1009,9 @@ struct hl_device {
 	struct asic_fixed_properties	asic_prop;
 	const struct hl_asic_funcs	*asic_funcs;
 	void				*asic_specific;
+	struct gen_pool			*mmu_pgt_pool;
+	struct hl_vm			vm;
+	struct mutex			mmu_cache_lock;
 	struct device			*hwmon_dev;
 	enum hl_pm_mng_profile		pm_mng_profile;
 	struct hwmon_chip_info		*hl_chip_info;
@@ -856,6 +1022,7 @@ struct hl_device {
 	/* TODO: remove user_ctx for multiple process support */
 	struct hl_ctx			*user_ctx;
 
+	atomic64_t			dram_used_mem;
 	atomic_t			in_reset;
 	atomic_t			curr_pll_profile;
 	atomic_t			fd_open_cnt;
@@ -872,6 +1039,7 @@ struct hl_device {
 	u8				hard_reset_pending;
 	u8				heartbeat;
 	u8				reset_on_lockup;
+	u8				dram_supports_virtual_memory;
 	u8				init_done;
 
 	/* Parameters for bring-up */
@@ -1021,6 +1189,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 void hl_hpriv_get(struct hl_fpriv *hpriv);
 void hl_hpriv_put(struct hl_fpriv *hpriv);
 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+
 int hl_build_hwmon_channel_info(struct hl_device *hdev,
 		struct armcp_sensor *sensors_arr);
 
@@ -1048,6 +1217,12 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 
+int hl_vm_ctx_init(struct hl_ctx *ctx);
+void hl_vm_ctx_fini(struct hl_ctx *ctx);
+
+int hl_vm_init(struct hl_device *hdev);
+void hl_vm_fini(struct hl_device *hdev);
+
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
 			struct hl_userptr *userptr);
 int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
@@ -1057,6 +1232,15 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
 				struct list_head *userptr_list,
 				struct hl_userptr **userptr);
 
+int hl_mmu_init(struct hl_device *hdev);
+void hl_mmu_fini(struct hl_device *hdev);
+void hl_mmu_ctx_init(struct hl_ctx *ctx);
+void hl_mmu_ctx_fini(struct hl_ctx *ctx);
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
+void hl_mmu_swap_out(struct hl_ctx *ctx);
+void hl_mmu_swap_in(struct hl_ctx *ctx);
+
 long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
 void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
 long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
@@ -1074,5 +1258,6 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data);
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data);
 int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data);
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data);
 
 #endif /* HABANALABSP_H_ */
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 77a1cc85e530..436ccae0989d 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -188,7 +188,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 	hdev->reset_on_lockup = reset_on_lockup;
 
 	/* Parameters for bring-up - set them to defaults */
-	hdev->mmu_enable = 0;
+	hdev->mmu_enable = 1;
 	hdev->cpu_enable = 1;
 	hdev->reset_pcilink = 0;
 	hdev->cpu_queues_enable = 1;
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 481db1a5e97e..6e4dc5b5e696 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -18,7 +18,8 @@
 static const struct hl_ioctl_desc hl_ioctls[] = {
 	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
 	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
-	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl)
+	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
+	HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl)
 };
 
 #define HL_CORE_IOCTL_COUNT	ARRAY_SIZE(hl_ioctls)
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
new file mode 100644
index 000000000000..1bc36aba1426
--- /dev/null
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef INCLUDE_MMU_GENERAL_H_
+#define INCLUDE_MMU_GENERAL_H_
+
+#define PAGE_SHIFT_4KB			12
+#define PAGE_SHIFT_2MB			21
+#define PAGE_SIZE_2MB			(_AC(1, UL) << PAGE_SHIFT_2MB)
+#define PAGE_SIZE_4KB			(_AC(1, UL) << PAGE_SHIFT_4KB)
+#define PAGE_MASK_2MB			(~(PAGE_SIZE_2MB - 1))
+
+#define PAGE_PRESENT_MASK		0x0000000000001
+#define SWAP_OUT_MASK			0x0000000000004
+#define LAST_MASK			0x0000000000800
+#define PHYS_ADDR_MASK			0x3FFFFFFFFF000ull
+#define HOP0_MASK			0x3000000000000ull
+#define HOP1_MASK			0x0FF8000000000ull
+#define HOP2_MASK			0x0007FC0000000ull
+#define HOP3_MASK			0x000003FE00000
+#define HOP4_MASK			0x00000001FF000
+#define OFFSET_MASK			0x0000000000FFF
+
+#define HOP0_SHIFT			48
+#define HOP1_SHIFT			39
+#define HOP2_SHIFT			30
+#define HOP3_SHIFT			21
+#define HOP4_SHIFT			12
+
+#define PTE_PHYS_ADDR_SHIFT		12
+#define PTE_PHYS_ADDR_MASK		~0xFFF
+
+#define HL_PTE_SIZE			sizeof(u64)
+#define HOP_TABLE_SIZE			PAGE_SIZE_4KB
+#define HOP0_TABLES_TOTAL_SIZE		(HOP_TABLE_SIZE * MAX_ASID)
+
+#define MMU_HOP0_PA43_12_SHIFT		12
+#define MMU_HOP0_PA49_44_SHIFT		(12 + 32)
+
+#define MMU_CONFIG_TIMEOUT_USEC		2000 /* 2 ms */
+
+#endif /* INCLUDE_MMU_GENERAL_H_ */
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
new file mode 100644
index 000000000000..8539dd041f2c
--- /dev/null
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_v1_0.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef INCLUDE_MMU_V1_0_H_
+#define INCLUDE_MMU_V1_0_H_
+
+#define MMU_HOP0_PA43_12	0x490004
+#define MMU_HOP0_PA49_44	0x490008
+#define MMU_ASID_BUSY		0x490000
+
+#endif /* INCLUDE_MMU_V1_0_H_ */
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index ad14376a1c25..6650c8085fc6 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -5,10 +5,1198 @@
  * All Rights Reserved.
  */
 
+#include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
 
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <linux/genalloc.h>
+
+#define PGS_IN_2MB_PAGE	(PAGE_SIZE_2MB >> PAGE_SHIFT)
+#define HL_MMU_DEBUG	0
+
+/*
+ * The va ranges in context object contain a list with the available chunks of
+ * device virtual memory.
+ * There is one range for host allocations and one for DRAM allocations.
+ *
+ * On initialization each range contains one chunk of all of its available
+ * virtual range which is a half of the total device virtual range.
+ *
+ * On each mapping of physical pages, a suitable virtual range chunk (with a
+ * minimum size) is selected from the list. If the chunk size equals the
+ * requested size, the chunk is returned. Otherwise, the chunk is split into
+ * two chunks - one to return as result and a remainder to stay in the list.
+ *
+ * On each Unmapping of a virtual address, the relevant virtual chunk is
+ * returned to the list. The chunk is added to the list and if its edges match
+ * the edges of the adjacent chunks (means a contiguous chunk can be created),
+ * the chunks are merged.
+ *
+ * On finish, the list is checked to have only one chunk of all the relevant
+ * virtual range (which is a half of the device total virtual range).
+ * If not (means not all mappings were unmapped), a warning is printed.
+ */
+
+/*
+ * alloc_device_memory - allocate device memory
+ *
+ * @ctx                 : current context
+ * @args                : host parameters containing the requested size
+ * @ret_handle          : result handle
+ *
+ * This function does the following:
+ * - Allocate the requested size rounded up to 2MB pages
+ * - Return unique handle
+ */
+static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
+				u32 *ret_handle)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	u64 paddr = 0;
+	u32 total_size, num_pgs, num_curr_pgs, page_size, page_shift;
+	int handle, rc, i;
+	bool contiguous;
+
+	num_curr_pgs = 0;
+	page_size = hdev->asic_prop.dram_page_size;
+	page_shift = __ffs(page_size);
+	num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
+	total_size = num_pgs << page_shift;
+
+	contiguous = args->flags & HL_MEM_CONTIGUOUS;
+
+	if (contiguous) {
+		paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+		if (!paddr) {
+			dev_err(hdev->dev,
+				"failed to allocate %u huge contiguous pages\n",
+				num_pgs);
+			return -ENOMEM;
+		}
+	}
+
+	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+	if (!phys_pg_pack) {
+		rc = -ENOMEM;
+		goto pages_pack_err;
+	}
+
+	phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK;
+	phys_pg_pack->asid = ctx->asid;
+	phys_pg_pack->npages = num_pgs;
+	phys_pg_pack->page_size = page_size;
+	phys_pg_pack->total_size = total_size;
+	phys_pg_pack->flags = args->flags;
+	phys_pg_pack->contiguous = contiguous;
+
+	phys_pg_pack->pages = kcalloc(num_pgs, sizeof(u64), GFP_KERNEL);
+	if (!phys_pg_pack->pages) {
+		rc = -ENOMEM;
+		goto pages_arr_err;
+	}
+
+	if (phys_pg_pack->contiguous) {
+		for (i = 0 ; i < num_pgs ; i++)
+			phys_pg_pack->pages[i] = paddr + i * page_size;
+	} else {
+		for (i = 0 ; i < num_pgs ; i++) {
+			phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
+							vm->dram_pg_pool,
+							page_size);
+			if (!phys_pg_pack->pages[i]) {
+				dev_err(hdev->dev,
+					"ioctl failed to allocate page\n");
+				rc = -ENOMEM;
+				goto page_err;
+			}
+
+			num_curr_pgs++;
+		}
+	}
+
+	spin_lock(&vm->idr_lock);
+	handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
+				GFP_KERNEL);
+	spin_unlock(&vm->idr_lock);
+
+	if (handle < 0) {
+		dev_err(hdev->dev, "Failed to get handle for page\n");
+		rc = -EFAULT;
+		goto idr_err;
+	}
+
+	for (i = 0 ; i < num_pgs ; i++)
+		kref_get(&vm->dram_pg_pool_refcount);
+
+	phys_pg_pack->handle = handle;
+
+	atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+	atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+	*ret_handle = handle;
+
+	return 0;
+
+idr_err:
+page_err:
+	if (!phys_pg_pack->contiguous)
+		for (i = 0 ; i < num_curr_pgs ; i++)
+			gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i],
+					page_size);
+
+	kfree(phys_pg_pack->pages);
+pages_arr_err:
+	kfree(phys_pg_pack);
+pages_pack_err:
+	if (contiguous)
+		gen_pool_free(vm->dram_pg_pool, paddr, total_size);
+
+	return rc;
+}
+
+/*
+ * get_userptr_from_host_va - initialize userptr structure from given host
+ *                            virtual address
+ *
+ * @hdev                : habanalabs device structure
+ * @args                : parameters containing the virtual address and size
+ * @p_userptr           : pointer to result userptr structure
+ *
+ * This function does the following:
+ * - Allocate userptr structure
+ * - Pin the given host memory using the userptr structure
+ * - Perform DMA mapping to have the DMA addresses of the pages
+ */
+static int get_userptr_from_host_va(struct hl_device *hdev,
+		struct hl_mem_in *args, struct hl_userptr **p_userptr)
+{
+	struct hl_userptr *userptr;
+	int rc;
+
+	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
+	if (!userptr) {
+		rc = -ENOMEM;
+		goto userptr_err;
+	}
+
+	rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr,
+			args->map_host.mem_size, userptr);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to pin host memory\n");
+		goto pin_err;
+	}
+
+	rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
+					userptr->sgt->nents, DMA_BIDIRECTIONAL);
+	if (rc) {
+		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
+		goto dma_map_err;
+	}
+
+	userptr->dma_mapped = true;
+	userptr->dir = DMA_BIDIRECTIONAL;
+	userptr->vm_type = VM_TYPE_USERPTR;
+
+	*p_userptr = userptr;
+
+	return 0;
+
+dma_map_err:
+	hl_unpin_host_memory(hdev, userptr);
+pin_err:
+	kfree(userptr);
+userptr_err:
+
+	return rc;
+}
+
+/*
+ * free_userptr - free userptr structure
+ *
+ * @hdev                : habanalabs device structure
+ * @userptr             : userptr to free
+ *
+ * This function does the following:
+ * - Unpins the physical pages
+ * - Frees the userptr structure
+ */
+static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+{
+	hl_unpin_host_memory(hdev, userptr);
+	kfree(userptr);
+}
+
+/*
+ * dram_pg_pool_do_release - free DRAM pages pool
+ *
+ * @ref                 : pointer to reference object
+ *
+ * This function does the following:
+ * - Frees the idr structure of physical pages handles
+ * - Frees the generic pool of DRAM physical pages
+ */
+static void dram_pg_pool_do_release(struct kref *ref)
+{
+	struct hl_vm *vm = container_of(ref, struct hl_vm,
+			dram_pg_pool_refcount);
+
+	/*
+	 * free the idr here as only here we know for sure that there are no
+	 * allocated physical pages and hence there are no handles in use
+	 */
+	idr_destroy(&vm->phys_pg_pack_handles);
+	gen_pool_destroy(vm->dram_pg_pool);
+}
+
+/*
+ * free_phys_pg_pack   - free physical page pack
+ *
+ * @hdev               : habanalabs device structure
+ * @phys_pg_pack       : physical page pack to free
+ *
+ * This function does the following:
+ * - For DRAM memory only, iterate over the pack and free each physical block
+ *   structure by returning it to the general pool
+ * - Free the hl_vm_phys_pg_pack structure
+ */
+static void free_phys_pg_pack(struct hl_device *hdev,
+		struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+	struct hl_vm *vm = &hdev->vm;
+	int i;
+
+	if (!phys_pg_pack->created_from_userptr) {
+		if (phys_pg_pack->contiguous) {
+			gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0],
+					phys_pg_pack->total_size);
+
+			for (i = 0; i < phys_pg_pack->npages ; i++)
+				kref_put(&vm->dram_pg_pool_refcount,
+					dram_pg_pool_do_release);
+		} else {
+			for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+				gen_pool_free(vm->dram_pg_pool,
+						phys_pg_pack->pages[i],
+						phys_pg_pack->page_size);
+				kref_put(&vm->dram_pg_pool_refcount,
+					dram_pg_pool_do_release);
+			}
+		}
+	}
+
+	kfree(phys_pg_pack->pages);
+	kfree(phys_pg_pack);
+}
+
+/*
+ * free_device_memory - free device memory
+ *
+ * @ctx                  : current context
+ * @handle              : handle of the memory chunk to free
+ *
+ * This function does the following:
+ * - Free the device memory related to the given handle
+ */
+static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+
+	spin_lock(&vm->idr_lock);
+	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+	if (phys_pg_pack) {
+		if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) {
+			dev_err(hdev->dev, "handle %u is mapped, cannot free\n",
+				handle);
+			spin_unlock(&vm->idr_lock);
+			return -EINVAL;
+		}
+
+		/*
+		 * must remove from idr before the freeing of the physical
+		 * pages as the refcount of the pool is also the trigger of the
+		 * idr destroy
+		 */
+		idr_remove(&vm->phys_pg_pack_handles, handle);
+		spin_unlock(&vm->idr_lock);
+
+		atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem);
+		atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem);
+
+		free_phys_pg_pack(hdev, phys_pg_pack);
+	} else {
+		spin_unlock(&vm->idr_lock);
+		dev_err(hdev->dev,
+			"free device memory failed, no match for handle %u\n",
+			handle);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * clear_va_list_locked - free virtual addresses list
+ *
+ * @hdev                : habanalabs device structure
+ * @va_list             : list of virtual addresses to free
+ *
+ * This function does the following:
+ * - Iterate over the list and free each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void clear_va_list_locked(struct hl_device *hdev,
+		struct list_head *va_list)
+{
+	struct hl_vm_va_block *va_block, *tmp;
+
+	list_for_each_entry_safe(va_block, tmp, va_list, node) {
+		list_del(&va_block->node);
+		kfree(va_block);
+	}
+}
+
+/*
+ * print_va_list_locked    - print virtual addresses list
+ *
+ * @hdev                : habanalabs device structure
+ * @va_list             : list of virtual addresses to print
+ *
+ * This function does the following:
+ * - Iterate over the list and print each virtual addresses block
+ *
+ * This function should be called only when va_list lock is taken
+ */
+static void print_va_list_locked(struct hl_device *hdev,
+		struct list_head *va_list)
+{
+#if HL_MMU_DEBUG
+	struct hl_vm_va_block *va_block;
+
+	dev_dbg(hdev->dev, "print va list:\n");
+
+	list_for_each_entry(va_block, va_list, node)
+		dev_dbg(hdev->dev,
+			"va block, start: 0x%llx, end: 0x%llx, size: %llu\n",
+			va_block->start, va_block->end, va_block->size);
+#endif
+}
+
+/*
+ * merge_va_blocks_locked - merge a virtual block if possible
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @va_block            : virtual block to merge with adjacent blocks
+ *
+ * This function does the following:
+ * - Merge the given blocks with the adjacent blocks if their virtual ranges
+ *   create a contiguous virtual range
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static void merge_va_blocks_locked(struct hl_device *hdev,
+		struct list_head *va_list, struct hl_vm_va_block *va_block)
+{
+	struct hl_vm_va_block *prev, *next;
+
+	prev = list_prev_entry(va_block, node);
+	if (&prev->node != va_list && prev->end + 1 == va_block->start) {
+		prev->end = va_block->end;
+		prev->size = prev->end - prev->start;
+		list_del(&va_block->node);
+		kfree(va_block);
+		va_block = prev;
+	}
+
+	next = list_next_entry(va_block, node);
+	if (&next->node != va_list && va_block->end + 1 == next->start) {
+		next->start = va_block->start;
+		next->size = next->end - next->start;
+		list_del(&va_block->node);
+		kfree(va_block);
+	}
+}
+
+/*
+ * add_va_block_locked - add a virtual block to the virtual addresses list
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @start               : start virtual address
+ * @end                 : end virtual address
+ *
+ * This function does the following:
+ * - Add the given block to the virtual blocks list and merge with other
+ * blocks if a contiguous virtual block can be created
+ *
+ * This Function should be called only when va_list lock is taken
+ */
+static int add_va_block_locked(struct hl_device *hdev,
+		struct list_head *va_list, u64 start, u64 end)
+{
+	struct hl_vm_va_block *va_block, *res = NULL;
+	u64 size = end - start;
+
+	print_va_list_locked(hdev, va_list);
+
+	list_for_each_entry(va_block, va_list, node) {
+		/* TODO: remove upon matureness */
+		if (hl_mem_area_crosses_range(start, size, va_block->start,
+				va_block->end)) {
+			dev_err(hdev->dev,
+				"block crossing ranges at start 0x%llx, end 0x%llx\n",
+				va_block->start, va_block->end);
+			return -EINVAL;
+		}
+
+		if (va_block->end < start)
+			res = va_block;
+	}
+
+	va_block = kmalloc(sizeof(*va_block), GFP_KERNEL);
+	if (!va_block)
+		return -ENOMEM;
+
+	va_block->start = start;
+	va_block->end = end;
+	va_block->size = size;
+
+	if (!res)
+		list_add(&va_block->node, va_list);
+	else
+		list_add(&va_block->node, &res->node);
+
+	merge_va_blocks_locked(hdev, va_list, va_block);
+
+	print_va_list_locked(hdev, va_list);
+
+	return 0;
+}
+
+/*
+ * add_va_block - wrapper for add_va_block_locked
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_list             : pointer to the virtual addresses block list
+ * @start               : start virtual address
+ * @end                 : end virtual address
+ *
+ * This function does the following:
+ * - Takes the list lock and calls add_va_block_locked
+ */
+static inline int add_va_block(struct hl_device *hdev,
+		struct hl_va_range *va_range, u64 start, u64 end)
+{
+	int rc;
+
+	mutex_lock(&va_range->lock);
+	rc = add_va_block_locked(hdev, &va_range->list, start, end);
+	mutex_unlock(&va_range->lock);
+
+	return rc;
+}
+
+/*
+ * get_va_block - get a virtual block with the requested size
+ *
+ * @hdev            : pointer to the habanalabs device structure
+ * @va_range        : pointer to the virtual addresses range
+ * @size            : requested block size
+ * @hint_addr       : hint for request address by the user
+ * @is_userptr      : is host or DRAM memory
+ *
+ * This function does the following:
+ * - Iterate on the virtual block list to find a suitable virtual block for the
+ *   requested size
+ * - Reserve the requested block and update the list
+ * - Return the start address of the virtual block
+ */
+static u64 get_va_block(struct hl_device *hdev,
+		struct hl_va_range *va_range, u32 size, u64 hint_addr,
+		bool is_userptr)
+{
+	struct hl_vm_va_block *va_block, *new_va_block = NULL;
+	u64 valid_start, valid_size, prev_start, prev_end, page_mask,
+		res_valid_start = 0, res_valid_size = 0;
+	u32 page_size;
+	bool add_prev = false;
+
+	if (is_userptr) {
+		/*
+		 * We cannot know if the user allocated memory with huge pages
+		 * or not, hence we continue with the biggest possible
+		 * granularity.
+		 */
+		page_size = PAGE_SIZE_2MB;
+		page_mask = PAGE_MASK_2MB;
+	} else {
+		page_size = hdev->asic_prop.dram_page_size;
+		page_mask = ~((u64)page_size - 1);
+	}
+
+	mutex_lock(&va_range->lock);
+
+	print_va_list_locked(hdev, &va_range->list);
+
+	list_for_each_entry(va_block, &va_range->list, node) {
+		/* calc the first possible aligned addr */
+		valid_start = va_block->start;
+
+
+		if (valid_start & (page_size - 1)) {
+			valid_start &= page_mask;
+			valid_start += page_size;
+			if (valid_start > va_block->end)
+				continue;
+		}
+
+		valid_size = va_block->end - valid_start;
+
+		if (valid_size >= size &&
+			(!new_va_block || valid_size < res_valid_size)) {
+
+			new_va_block = va_block;
+			res_valid_start = valid_start;
+			res_valid_size = valid_size;
+		}
+
+		if (hint_addr && hint_addr >= valid_start &&
+				((hint_addr + size) <= va_block->end)) {
+			new_va_block = va_block;
+			res_valid_start = hint_addr;
+			res_valid_size = valid_size;
+			break;
+		}
+	}
+
+	if (!new_va_block) {
+		dev_err(hdev->dev, "no available va block for size %u\n", size);
+		goto out;
+	}
+
+	if (res_valid_start > new_va_block->start) {
+		prev_start = new_va_block->start;
+		prev_end = res_valid_start - 1;
+
+		new_va_block->start = res_valid_start;
+		new_va_block->size = res_valid_size;
+
+		add_prev = true;
+	}
+
+	if (new_va_block->size > size) {
+		new_va_block->start += size;
+		new_va_block->size = new_va_block->end - new_va_block->start;
+	} else {
+		list_del(&new_va_block->node);
+		kfree(new_va_block);
+	}
+
+	if (add_prev)
+		add_va_block_locked(hdev, &va_range->list, prev_start,
+				prev_end);
+
+	print_va_list_locked(hdev, &va_range->list);
+out:
+	mutex_unlock(&va_range->lock);
+
+	return res_valid_start;
+}
+
+/*
+ * get_sg_info - get number of pages and the DMA address from SG list
+ *
+ * @sg                 : the SG list
+ * @dma_addr           : pointer to DMA address to return
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+	*dma_addr = sg_dma_address(sg);
+
+	return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+			(PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
+/*
+ * init_phys_pg_pack_from_userptr - initialize physical page pack from host
+ *                                   memory
+ *
+ * @ctx                : current context
+ * @userptr            : userptr to initialize from
+ * @pphys_pg_pack      : res pointer
+ *
+ * This function does the following:
+ * - Pin the physical pages related to the given virtual block
+ * - Create a physical page pack from the physical pages related to the given
+ *   virtual block
+ */
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+		struct hl_userptr *userptr,
+		struct hl_vm_phys_pg_pack **pphys_pg_pack)
+{
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	struct scatterlist *sg;
+	dma_addr_t dma_addr;
+	u64 page_mask;
+	u32 npages, total_npages, page_size = PAGE_SIZE;
+	bool first = true, is_huge_page_opt = true;
+	int rc, i, j;
+
+	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
+	if (!phys_pg_pack)
+		return -ENOMEM;
+
+	phys_pg_pack->vm_type = userptr->vm_type;
+	phys_pg_pack->created_from_userptr = true;
+	phys_pg_pack->asid = ctx->asid;
+	atomic_set(&phys_pg_pack->mapping_cnt, 1);
+
+	/* Only if all dma_addrs are aligned to 2MB and their
+	 * sizes is at least 2MB, we can use huge page mapping.
+	 * We limit the 2MB optimization to this condition,
+	 * since later on we acquire the related VA range as one
+	 * consecutive block.
+	 */
+	total_npages = 0;
+	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+		npages = get_sg_info(sg, &dma_addr);
+
+		total_npages += npages;
+
+		if (first) {
+			first = false;
+			dma_addr &= PAGE_MASK_2MB;
+		}
+
+		if ((npages % PGS_IN_2MB_PAGE) ||
+					(dma_addr & (PAGE_SIZE_2MB - 1)))
+			is_huge_page_opt = false;
+	}
+
+	if (is_huge_page_opt) {
+		page_size = PAGE_SIZE_2MB;
+		total_npages /= PGS_IN_2MB_PAGE;
+	}
+
+	page_mask = ~(((u64) page_size) - 1);
+
+	phys_pg_pack->pages = kcalloc(total_npages, sizeof(u64), GFP_KERNEL);
+	if (!phys_pg_pack->pages) {
+		rc = -ENOMEM;
+		goto page_pack_arr_mem_err;
+	}
+
+	phys_pg_pack->npages = total_npages;
+	phys_pg_pack->page_size = page_size;
+	phys_pg_pack->total_size = total_npages * page_size;
+
+	j = 0;
+	first = true;
+	for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
+		npages = get_sg_info(sg, &dma_addr);
+
+		/* align down to physical page size and save the offset */
+		if (first) {
+			first = false;
+			phys_pg_pack->offset = dma_addr & (page_size - 1);
+			dma_addr &= page_mask;
+		}
+
+		while (npages) {
+			phys_pg_pack->pages[j++] = dma_addr;
+			dma_addr += page_size;
+
+			if (is_huge_page_opt)
+				npages -= PGS_IN_2MB_PAGE;
+			else
+				npages--;
+		}
+	}
+
+	*pphys_pg_pack = phys_pg_pack;
+
+	return 0;
+
+page_pack_arr_mem_err:
+	kfree(phys_pg_pack);
+
+	return rc;
+}
+
+/*
+ * map_phys_page_pack - maps the physical page pack
+ *
+ * @ctx                : current context
+ * @vaddr              : start address of the virtual area to map from
+ * @phys_pg_pack       : the pack of physical pages to map to
+ *
+ * This function does the following:
+ * - Maps each chunk of virtual memory to matching physical chunk
+ * - Stores number of successful mappings in the given argument
+ * - Returns 0 on success, error code otherwise.
+ */
+static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
+		struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 next_vaddr = vaddr, paddr;
+	u32 page_size = phys_pg_pack->page_size;
+	int i, rc = 0, mapped_pg_cnt = 0;
+
+	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
+		paddr = phys_pg_pack->pages[i];
+
+		/* For accessing the host we need to turn on bit 39 */
+		if (phys_pg_pack->created_from_userptr)
+			paddr += hdev->asic_prop.host_phys_base_address;
+
+		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
+		if (rc) {
+			dev_err(hdev->dev,
+				"map failed for handle %u, npages: %d, mapped: %d",
+				phys_pg_pack->handle, phys_pg_pack->npages,
+				mapped_pg_cnt);
+			goto err;
+		}
+
+		mapped_pg_cnt++;
+		next_vaddr += page_size;
+	}
+
+	return 0;
+
+err:
+	next_vaddr = vaddr;
+	for (i = 0 ; i < mapped_pg_cnt ; i++) {
+		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+			dev_warn_ratelimited(hdev->dev,
+				"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
+					phys_pg_pack->handle, next_vaddr,
+					phys_pg_pack->pages[i], page_size);
+
+		next_vaddr += page_size;
+	}
+
+	return rc;
+}
+
+static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
+				u64 *paddr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	u32 handle;
+
+	handle = lower_32_bits(args->map_device.handle);
+	spin_lock(&vm->idr_lock);
+	phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+	if (!phys_pg_pack) {
+		spin_unlock(&vm->idr_lock);
+		dev_err(hdev->dev, "no match for handle %u\n", handle);
+		return -EINVAL;
+	}
+
+	*paddr = phys_pg_pack->pages[0];
+
+	spin_unlock(&vm->idr_lock);
+
+	return 0;
+}
+
+/*
+ * map_device_va - map the given memory
+ *
+ * @ctx	         : current context
+ * @args         : host parameters with handle/host virtual address
+ * @device_addr	 : pointer to result device virtual address
+ *
+ * This function does the following:
+ * - If given a physical device memory handle, map to a device virtual block
+ *   and return the start address of this block
+ * - If given a host virtual address and size, find the related physical pages,
+ *   map a device virtual block to this pages and return the start address of
+ *   this block
+ */
+static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+		u64 *device_addr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+	struct hl_vm_phys_pg_pack *phys_pg_pack;
+	struct hl_userptr *userptr = NULL;
+	struct hl_vm_hash_node *hnode;
+	enum vm_type_t *vm_type;
+	u64 ret_vaddr, hint_addr;
+	u32 handle = 0;
+	int rc;
+	bool is_userptr = args->flags & HL_MEM_USERPTR;
+
+	/* Assume failure */
+	*device_addr = 0;
+
+	if (is_userptr) {
+		rc = get_userptr_from_host_va(hdev, args, &userptr);
+		if (rc) {
+			dev_err(hdev->dev, "failed to get userptr from va\n");
+			return rc;
+		}
+
+		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+				&phys_pg_pack);
+		if (rc) {
+			dev_err(hdev->dev,
+				"unable to init page pack for vaddr 0x%llx\n",
+				args->map_host.host_virt_addr);
+			goto init_page_pack_err;
+		}
+
+		vm_type = (enum vm_type_t *) userptr;
+		hint_addr = args->map_host.hint_addr;
+	} else {
+		handle = lower_32_bits(args->map_device.handle);
+
+		spin_lock(&vm->idr_lock);
+		phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
+		if (!phys_pg_pack) {
+			spin_unlock(&vm->idr_lock);
+			dev_err(hdev->dev,
+				"no match for handle %u\n", handle);
+			return -EINVAL;
+		}
+
+		/* increment now to avoid freeing device memory while mapping */
+		atomic_inc(&phys_pg_pack->mapping_cnt);
+
+		spin_unlock(&vm->idr_lock);
+
+		vm_type = (enum vm_type_t *) phys_pg_pack;
+
+		hint_addr = args->map_device.hint_addr;
+	}
+
+	/*
+	 * relevant for mapping device physical memory only, as host memory is
+	 * implicitly shared
+	 */
+	if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) &&
+			phys_pg_pack->asid != ctx->asid) {
+		dev_err(hdev->dev,
+			"Failed to map memory, handle %u is not shared\n",
+			handle);
+		rc = -EPERM;
+		goto shared_err;
+	}
+
+	hnode = kzalloc(sizeof(*hnode), GFP_KERNEL);
+	if (!hnode) {
+		rc = -ENOMEM;
+		goto hnode_err;
+	}
+
+	ret_vaddr = get_va_block(hdev,
+			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+			phys_pg_pack->total_size, hint_addr, is_userptr);
+	if (!ret_vaddr) {
+		dev_err(hdev->dev, "no available va block for handle %u\n",
+				handle);
+		rc = -ENOMEM;
+		goto va_block_err;
+	}
+
+	mutex_lock(&ctx->mmu_lock);
+
+	rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack);
+	if (rc) {
+		mutex_unlock(&ctx->mmu_lock);
+		dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
+				handle);
+		goto map_err;
+	}
+
+	hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, ctx->asid,
+			ret_vaddr, phys_pg_pack->total_size);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	ret_vaddr += phys_pg_pack->offset;
+
+	hnode->ptr = vm_type;
+	hnode->vaddr = ret_vaddr;
+
+	mutex_lock(&ctx->mem_hash_lock);
+	hash_add(ctx->mem_hash, &hnode->node, ret_vaddr);
+	mutex_unlock(&ctx->mem_hash_lock);
+
+	*device_addr = ret_vaddr;
+
+	if (is_userptr)
+		free_phys_pg_pack(hdev, phys_pg_pack);
+
+	return 0;
+
+map_err:
+	if (add_va_block(hdev,
+			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+			ret_vaddr,
+			ret_vaddr + phys_pg_pack->total_size - 1))
+		dev_warn(hdev->dev,
+			"release va block failed for handle 0x%x, vaddr: 0x%llx\n",
+				handle, ret_vaddr);
+
+va_block_err:
+	kfree(hnode);
+hnode_err:
+shared_err:
+	atomic_dec(&phys_pg_pack->mapping_cnt);
+	if (is_userptr)
+		free_phys_pg_pack(hdev, phys_pg_pack);
+init_page_pack_err:
+	if (is_userptr)
+		free_userptr(hdev, userptr);
+
+	return rc;
+}
+
+/*
+ * unmap_device_va      - unmap the given device virtual address
+ *
+ * @ctx                 : current context
+ * @vaddr               : device virtual address to unmap
+ *
+ * This function does the following:
+ * - Unmap the physical pages related to the given virtual address
+ * - return the device virtual block to the virtual block list
+ */
+static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+	struct hl_vm_hash_node *hnode = NULL;
+	struct hl_userptr *userptr = NULL;
+	enum vm_type_t *vm_type;
+	u64 next_vaddr;
+	u32 page_size;
+	bool is_userptr;
+	int i, rc;
+
+	/* protect from double entrance */
+	mutex_lock(&ctx->mem_hash_lock);
+	hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
+		if (vaddr == hnode->vaddr)
+			break;
+
+	if (!hnode) {
+		mutex_unlock(&ctx->mem_hash_lock);
+		dev_err(hdev->dev,
+			"unmap failed, no mem hnode for vaddr 0x%llx\n",
+			vaddr);
+		return -EINVAL;
+	}
+
+	hash_del(&hnode->node);
+	mutex_unlock(&ctx->mem_hash_lock);
+
+	vm_type = hnode->ptr;
+
+	if (*vm_type == VM_TYPE_USERPTR) {
+		is_userptr = true;
+		userptr = hnode->ptr;
+		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
+				&phys_pg_pack);
+		if (rc) {
+			dev_err(hdev->dev,
+				"unable to init page pack for vaddr 0x%llx\n",
+				vaddr);
+			goto vm_type_err;
+		}
+	} else if (*vm_type == VM_TYPE_PHYS_PACK) {
+		is_userptr = false;
+		phys_pg_pack = hnode->ptr;
+	} else {
+		dev_warn(hdev->dev,
+			"unmap failed, unknown vm desc for vaddr 0x%llx\n",
+				vaddr);
+		rc = -EFAULT;
+		goto vm_type_err;
+	}
+
+	if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) {
+		dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr);
+		rc = -EINVAL;
+		goto mapping_cnt_err;
+	}
+
+	page_size = phys_pg_pack->page_size;
+	vaddr &= ~(((u64) page_size) - 1);
+
+	next_vaddr = vaddr;
+
+	mutex_lock(&ctx->mmu_lock);
+
+	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size)
+		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+			dev_warn_ratelimited(hdev->dev,
+				"unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+	hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, ctx->asid,
+			vaddr, phys_pg_pack->total_size);
+
+	mutex_unlock(&ctx->mmu_lock);
+
+	if (add_va_block(hdev,
+			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
+			vaddr,
+			vaddr + phys_pg_pack->total_size - 1))
+		dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n",
+				vaddr);
+
+	atomic_dec(&phys_pg_pack->mapping_cnt);
+	kfree(hnode);
+
+	if (is_userptr) {
+		free_phys_pg_pack(hdev, phys_pg_pack);
+		free_userptr(hdev, userptr);
+	}
+
+	return 0;
+
+mapping_cnt_err:
+	if (is_userptr)
+		free_phys_pg_pack(hdev, phys_pg_pack);
+vm_type_err:
+	mutex_lock(&ctx->mem_hash_lock);
+	hash_add(ctx->mem_hash, &hnode->node, vaddr);
+	mutex_unlock(&ctx->mem_hash_lock);
+
+	return rc;
+}
+
+int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	union hl_mem_args *args = data;
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ctx *ctx = hpriv->ctx;
+	u64 device_addr = 0;
+	u32 handle = 0;
+	int rc;
+
+	if (hl_device_disabled_or_in_reset(hdev)) {
+		dev_warn_ratelimited(hdev->dev,
+			"Device is disabled or in reset. Can't execute memory IOCTL\n");
+		return -EBUSY;
+	}
+
+	if (hdev->mmu_enable) {
+		switch (args->in.op) {
+		case HL_MEM_OP_ALLOC:
+			if (!hdev->dram_supports_virtual_memory) {
+				dev_err(hdev->dev,
+					"DRAM alloc is not supported\n");
+				rc = -EINVAL;
+				goto out;
+			}
+			if (args->in.alloc.mem_size == 0) {
+				dev_err(hdev->dev,
+					"alloc size must be larger than 0\n");
+				rc = -EINVAL;
+				goto out;
+			}
+			rc = alloc_device_memory(ctx, &args->in, &handle);
+
+			memset(args, 0, sizeof(*args));
+			args->out.handle = (__u64) handle;
+			break;
+
+		case HL_MEM_OP_FREE:
+			if (!hdev->dram_supports_virtual_memory) {
+				dev_err(hdev->dev,
+					"DRAM free is not supported\n");
+				rc = -EINVAL;
+				goto out;
+			}
+			rc = free_device_memory(ctx, args->in.free.handle);
+			break;
+
+		case HL_MEM_OP_MAP:
+			rc = map_device_va(ctx, &args->in, &device_addr);
+
+			memset(args, 0, sizeof(*args));
+			args->out.device_virt_addr = device_addr;
+			break;
+
+		case HL_MEM_OP_UNMAP:
+			rc = unmap_device_va(ctx,
+					args->in.unmap.device_virt_addr);
+			break;
+
+		default:
+			dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+			rc = -ENOTTY;
+			break;
+		}
+	} else {
+		switch (args->in.op) {
+		case HL_MEM_OP_ALLOC:
+			if (args->in.alloc.mem_size == 0) {
+				dev_err(hdev->dev,
+					"alloc size must be larger than 0\n");
+				rc = -EINVAL;
+				goto out;
+			}
+
+			/* Force contiguous as there are no real MMU
+			 * translations to overcome physical memory gaps
+			 */
+			args->in.flags |= HL_MEM_CONTIGUOUS;
+			rc = alloc_device_memory(ctx, &args->in, &handle);
+
+			memset(args, 0, sizeof(*args));
+			args->out.handle = (__u64) handle;
+			break;
+
+		case HL_MEM_OP_FREE:
+			rc = free_device_memory(ctx, args->in.free.handle);
+			break;
+
+		case HL_MEM_OP_MAP:
+			if (args->in.flags & HL_MEM_USERPTR) {
+				device_addr = args->in.map_host.host_virt_addr;
+				rc = 0;
+			} else {
+				rc = get_paddr_from_handle(ctx, &args->in,
+						&device_addr);
+			}
+
+			memset(args, 0, sizeof(*args));
+			args->out.device_virt_addr = device_addr;
+			break;
+
+		case HL_MEM_OP_UNMAP:
+			rc = 0;
+			break;
+
+		default:
+			dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+			rc = -ENOTTY;
+			break;
+		}
+	}
+
+out:
+	return rc;
+}
 
 /*
  * hl_pin_host_memory - pins a chunk of host memory
@@ -196,3 +1384,332 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
 
 	return false;
 }
+
+/*
+ * hl_va_range_init - initialize virtual addresses range
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ * @va_range            : pointer to the range to initialize
+ * @start               : range start address
+ * @end                 : range end address
+ *
+ * This function does the following:
+ * - Initializes the virtual addresses list of the given range with the given
+ *   addresses.
+ */
+static int hl_va_range_init(struct hl_device *hdev,
+		struct hl_va_range *va_range, u64 start, u64 end)
+{
+	int rc;
+
+	INIT_LIST_HEAD(&va_range->list);
+
+	/* PAGE_SIZE alignment */
+
+	if (start & (PAGE_SIZE - 1)) {
+		start &= PAGE_MASK;
+		start += PAGE_SIZE;
+	}
+
+	if (end & (PAGE_SIZE - 1))
+		end &= PAGE_MASK;
+
+	if (start >= end) {
+		dev_err(hdev->dev, "too small vm range for va list\n");
+		return -EFAULT;
+	}
+
+	rc = add_va_block(hdev, va_range, start, end);
+
+	if (rc) {
+		dev_err(hdev->dev, "Failed to init host va list\n");
+		return rc;
+	}
+
+	va_range->start_addr = start;
+	va_range->end_addr = end;
+
+	return 0;
+}
+
+/*
+ * hl_vm_ctx_init_with_ranges - initialize virtual memory for context
+ *
+ * @ctx                 : pointer to the habanalabs context structure
+ * @host_range_start    : host virtual addresses range start
+ * @host_range_end      : host virtual addresses range end
+ * @dram_range_start    : dram virtual addresses range start
+ * @dram_range_end      : dram virtual addresses range end
+ *
+ * This function initializes the following:
+ * - MMU for context
+ * - Virtual address to area descriptor hashtable
+ * - Virtual block list of available virtual memory
+ */
+int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
+				u64 host_range_end, u64 dram_range_start,
+				u64 dram_range_end)
+{
+	struct hl_device *hdev = ctx->hdev;
+	int rc;
+
+	hl_mmu_ctx_init(ctx);
+
+	mutex_init(&ctx->mem_hash_lock);
+	hash_init(ctx->mem_hash);
+
+	mutex_init(&ctx->host_va_range.lock);
+
+	rc = hl_va_range_init(hdev, &ctx->host_va_range, host_range_start,
+			host_range_end);
+	if (rc) {
+		dev_err(hdev->dev, "failed to init host vm range\n");
+		goto host_vm_err;
+	}
+
+	mutex_init(&ctx->dram_va_range.lock);
+
+	rc = hl_va_range_init(hdev, &ctx->dram_va_range, dram_range_start,
+			dram_range_end);
+	if (rc) {
+		dev_err(hdev->dev, "failed to init dram vm range\n");
+		goto dram_vm_err;
+	}
+
+	return 0;
+
+dram_vm_err:
+	mutex_destroy(&ctx->dram_va_range.lock);
+
+	mutex_lock(&ctx->host_va_range.lock);
+	clear_va_list_locked(hdev, &ctx->host_va_range.list);
+	mutex_unlock(&ctx->host_va_range.lock);
+host_vm_err:
+	mutex_destroy(&ctx->host_va_range.lock);
+	mutex_destroy(&ctx->mem_hash_lock);
+	hl_mmu_ctx_fini(ctx);
+
+	return rc;
+}
+
+int hl_vm_ctx_init(struct hl_ctx *ctx)
+{
+	struct asic_fixed_properties *prop = &ctx->hdev->asic_prop;
+	u64 host_range_start, host_range_end, dram_range_start,
+		dram_range_end;
+
+	atomic64_set(&ctx->dram_phys_mem, 0);
+
+	/*
+	 * - If MMU is enabled, init the ranges as usual.
+	 * - If MMU is disabled, in case of host mapping, the returned address
+	 *   is the given one.
+	 *   In case of DRAM mapping, the returned address is the physical
+	 *   address of the memory related to the given handle.
+	 */
+	if (ctx->hdev->mmu_enable) {
+		dram_range_start = prop->va_space_dram_start_address;
+		dram_range_end = prop->va_space_dram_end_address;
+		host_range_start = prop->va_space_host_start_address;
+		host_range_end = prop->va_space_host_end_address;
+	} else {
+		dram_range_start = prop->dram_user_base_address;
+		dram_range_end = prop->dram_end_address;
+		host_range_start = prop->dram_user_base_address;
+		host_range_end = prop->dram_end_address;
+	}
+
+	return hl_vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end,
+			dram_range_start, dram_range_end);
+}
+
+/*
+ * hl_va_range_fini     - clear a virtual addresses range
+ *
+ * @hdev                : pointer to the habanalabs structure
+ * va_range             : pointer to virtual addresses range
+ *
+ * This function initializes the following:
+ * - Checks that the given range contains the whole initial range
+ * - Frees the virtual addresses block list and its lock
+ */
+static void hl_va_range_fini(struct hl_device *hdev,
+		struct hl_va_range *va_range)
+{
+	struct hl_vm_va_block *va_block;
+
+	if (list_empty(&va_range->list)) {
+		dev_warn(hdev->dev,
+				"va list should not be empty on cleanup!\n");
+		goto out;
+	}
+
+	if (!list_is_singular(&va_range->list)) {
+		dev_warn(hdev->dev,
+			"va list should not contain multiple blocks on cleanup!\n");
+		goto free_va_list;
+	}
+
+	va_block = list_first_entry(&va_range->list, typeof(*va_block), node);
+
+	if (va_block->start != va_range->start_addr ||
+		va_block->end != va_range->end_addr) {
+		dev_warn(hdev->dev,
+			"wrong va block on cleanup, from 0x%llx to 0x%llx\n",
+				va_block->start, va_block->end);
+		goto free_va_list;
+	}
+
+free_va_list:
+	mutex_lock(&va_range->lock);
+	clear_va_list_locked(hdev, &va_range->list);
+	mutex_unlock(&va_range->lock);
+
+out:
+	mutex_destroy(&va_range->lock);
+}
+
+/*
+ * hl_vm_ctx_fini       - virtual memory teardown of context
+ *
+ * @ctx                 : pointer to the habanalabs context structure
+ *
+ * This function perform teardown the following:
+ * - Virtual block list of available virtual memory
+ * - Virtual address to area descriptor hashtable
+ * - MMU for context
+ *
+ * In addition this function does the following:
+ * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
+ *   hashtable should be empty as no valid mappings should exist at this
+ *   point.
+ * - Frees any existing physical page list from the idr which relates to the
+ *   current context asid.
+ * - This function checks the virtual block list for correctness. At this point
+ *   the list should contain one element which describes the whole virtual
+ *   memory range of the context. Otherwise, a warning is printed.
+ */
+void hl_vm_ctx_fini(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct hl_vm *vm = &hdev->vm;
+	struct hl_vm_phys_pg_pack *phys_pg_list;
+	struct hl_vm_hash_node *hnode;
+	struct hlist_node *tmp_node;
+	int i;
+
+	if (!hash_empty(ctx->mem_hash))
+		dev_notice(hdev->dev, "ctx is freed while it has va in use\n");
+
+	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
+		dev_dbg(hdev->dev,
+			"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
+			hnode->vaddr, ctx->asid);
+		unmap_device_va(ctx, hnode->vaddr);
+	}
+
+	spin_lock(&vm->idr_lock);
+	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
+		if (phys_pg_list->asid == ctx->asid) {
+			dev_dbg(hdev->dev,
+				"page list 0x%p of asid %d is still alive\n",
+				phys_pg_list, ctx->asid);
+			free_phys_pg_pack(hdev, phys_pg_list);
+			idr_remove(&vm->phys_pg_pack_handles, i);
+		}
+	spin_unlock(&vm->idr_lock);
+
+	hl_va_range_fini(hdev, &ctx->dram_va_range);
+	hl_va_range_fini(hdev, &ctx->host_va_range);
+
+	mutex_destroy(&ctx->mem_hash_lock);
+	hl_mmu_ctx_fini(ctx);
+}
+
+/*
+ * hl_vm_init           - initialize virtual memory module
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ *
+ * This function initializes the following:
+ * - MMU module
+ * - DRAM physical pages pool of 2MB
+ * - Idr for device memory allocation handles
+ */
+int hl_vm_init(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_vm *vm = &hdev->vm;
+	int rc;
+
+	rc = hl_mmu_init(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to init MMU\n");
+		return rc;
+	}
+
+	vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+	if (!vm->dram_pg_pool) {
+		dev_err(hdev->dev, "Failed to create dram page pool\n");
+		rc = -ENOMEM;
+		goto pool_create_err;
+	}
+
+	kref_init(&vm->dram_pg_pool_refcount);
+
+	rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address,
+			prop->dram_end_address - prop->dram_user_base_address,
+			-1);
+
+	if (rc) {
+		dev_err(hdev->dev,
+			"Failed to add memory to dram page pool %d\n", rc);
+		goto pool_add_err;
+	}
+
+	spin_lock_init(&vm->idr_lock);
+	idr_init(&vm->phys_pg_pack_handles);
+
+	atomic64_set(&hdev->dram_used_mem, 0);
+
+	vm->init_done = true;
+
+	return 0;
+
+pool_add_err:
+	gen_pool_destroy(vm->dram_pg_pool);
+pool_create_err:
+	hl_mmu_fini(hdev);
+
+	return rc;
+}
+
+/*
+ * hl_vm_fini           - virtual memory module teardown
+ *
+ * @hdev                : pointer to the habanalabs device structure
+ *
+ * This function perform teardown to the following:
+ * - Idr for device memory allocation handles
+ * - DRAM physical pages pool of 2MB
+ * - MMU module
+ */
+void hl_vm_fini(struct hl_device *hdev)
+{
+	struct hl_vm *vm = &hdev->vm;
+
+	if (!vm->init_done)
+		return;
+
+	/*
+	 * At this point all the contexts should be freed and hence no DRAM
+	 * memory should be in use. Hence the DRAM pool should be freed here.
+	 */
+	if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1)
+		dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n",
+				__func__);
+
+	hl_mmu_fini(hdev);
+
+	vm->init_done = false;
+}
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
new file mode 100644
index 000000000000..79c70d92e74b
--- /dev/null
+++ b/drivers/misc/habanalabs/mmu.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/mmu/mmu_general.h"
+
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+
+static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr)
+{
+	struct pgt_info *pgt_info = NULL;
+
+	hash_for_each_possible(ctx->mmu_hash, pgt_info, node,
+				(unsigned long) addr)
+		if (addr == pgt_info->addr)
+			break;
+
+	return pgt_info;
+}
+
+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+	gen_pool_free(pgt_info->ctx->hdev->mmu_pgt_pool, pgt_info->addr,
+			ctx->hdev->asic_prop.mmu_hop_table_size);
+	hash_del(&pgt_info->node);
+
+	kfree(pgt_info);
+}
+
+static u64 alloc_hop(struct hl_ctx *ctx)
+{
+	struct hl_device *hdev = ctx->hdev;
+	struct pgt_info *pgt_info;
+	u64 addr;
+
+	pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
+	if (!pgt_info)
+		return ULLONG_MAX;
+
+	addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
+			hdev->asic_prop.mmu_hop_table_size);
+	if (!addr) {
+		dev_err(hdev->dev, "failed to allocate page\n");
+		kfree(pgt_info);
+		return ULLONG_MAX;
+	}
+
+	pgt_info->addr = addr;
+	pgt_info->ctx = ctx;
+	pgt_info->num_of_ptes = 0;
+	hash_add(ctx->mmu_hash, &pgt_info->node, addr);
+
+	return addr;
+}
+
+static inline void clear_pte(struct hl_device *hdev, u64 pte_addr)
+{
+	/* clear the last and present bits */
+	hdev->asic_funcs->write_pte(hdev, pte_addr, 0);
+}
+
+static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+	get_pgt_info(ctx, hop_addr)->num_of_ptes++;
+}
+
+/*
+ * put_pte - decrement the num of ptes and free the hop if possible
+ *
+ * @ctx: pointer to the context structure
+ * @hop_addr: addr of the hop
+ *
+ * This function returns the number of ptes left on this hop. If the number is
+ * 0, it means the pte was freed.
+ */
+static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+	int num_of_ptes_left;
+
+	pgt_info->num_of_ptes--;
+
+	/*
+	 * Need to save the number of ptes left because free_hop might free
+	 * the pgt_info
+	 */
+	num_of_ptes_left = pgt_info->num_of_ptes;
+	if (!num_of_ptes_left)
+		free_hop(ctx, hop_addr);
+
+	return num_of_ptes_left;
+}
+
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+	return ctx->hdev->asic_prop.mmu_pgt_addr +
+			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+					u64 virt_addr, u64 mask, u64 shift)
+{
+	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
+			((virt_addr & mask) >> shift);
+}
+
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
+}
+
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
+}
+
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
+}
+
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
+}
+
+static inline u64 get_next_hop_addr(u64 curr_pte)
+{
+	if (curr_pte & PAGE_PRESENT_MASK)
+		return curr_pte & PHYS_ADDR_MASK;
+	else
+		return ULLONG_MAX;
+}
+
+static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
+						bool *is_new_hop)
+{
+	u64 hop_addr = get_next_hop_addr(curr_pte);
+
+	if (hop_addr == ULLONG_MAX) {
+		hop_addr = alloc_hop(ctx);
+		*is_new_hop = true;
+	}
+
+	return hop_addr;
+}
+
+/*
+ * hl_mmu_init - init the mmu module
+ *
+ * @hdev: pointer to the habanalabs device structure
+ *
+ * This function does the following:
+ * - Allocate max_asid zeroed hop0 pgts so no mapping is available
+ * - Enable mmu in hw
+ * - Invalidate the mmu cache
+ * - Create a pool of pages for pgts
+ * - Returns 0 on success
+ *
+ * This function depends on DMA QMAN to be working!
+ */
+int hl_mmu_init(struct hl_device *hdev)
+{
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	int rc;
+
+	if (!hdev->mmu_enable)
+		return 0;
+
+	/* MMU HW init was already done in device hw_init() */
+
+	mutex_init(&hdev->mmu_cache_lock);
+
+	hdev->mmu_pgt_pool =
+			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
+
+	if (!hdev->mmu_pgt_pool) {
+		dev_err(hdev->dev, "Failed to create page gen pool\n");
+		rc = -ENOMEM;
+		goto err_pool_create;
+	}
+
+	rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
+			prop->mmu_hop0_tables_total_size,
+			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
+			-1);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+		goto err_pool_add;
+	}
+
+	return 0;
+
+err_pool_add:
+	gen_pool_destroy(hdev->mmu_pgt_pool);
+err_pool_create:
+	mutex_destroy(&hdev->mmu_cache_lock);
+
+	return rc;
+}
+
+/*
+ * hl_mmu_fini - release the mmu module.
+ *
+ * @hdev: pointer to the habanalabs device structure
+ *
+ * This function does the following:
+ * - Disable mmu in hw
+ * - free the pgts pool
+ *
+ * All ctxs should be freed before calling this func
+ */
+void hl_mmu_fini(struct hl_device *hdev)
+{
+	if (!hdev->mmu_enable)
+		return;
+
+	gen_pool_destroy(hdev->mmu_pgt_pool);
+
+	mutex_destroy(&hdev->mmu_cache_lock);
+
+	/* MMU HW fini will be done in device hw_fini() */
+}
+
+/*
+ * hl_mmu_ctx_init - init a ctx for using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Init a mutex to protect the concurrent mapping flow
+ * - Init a hash to hold all pgts related to this ctx
+ */
+void hl_mmu_ctx_init(struct hl_ctx *ctx)
+{
+	if (!ctx->hdev->mmu_enable)
+		return;
+
+	mutex_init(&ctx->mmu_lock);
+	hash_init(ctx->mmu_hash);
+}
+
+/*
+ * hl_mmu_ctx_fini - disable a ctx from using the mmu module
+ *
+ * @ctx: pointer to the context structure
+ *
+ * This function does the following:
+ * - Free any pgts which were not freed yet
+ * - Free the mutex
+ */
+void hl_mmu_ctx_fini(struct hl_ctx *ctx)
+{
+	struct pgt_info *pgt_info;
+	struct hlist_node *tmp;
+	int i;
+
+	if (!ctx->hdev->mmu_enable)
+		return;
+
+	if (!hash_empty(ctx->mmu_hash))
+		dev_err(ctx->hdev->dev,
+				"ctx is freed while it has pgts in use\n");
+
+	hash_for_each_safe(ctx->mmu_hash, i, tmp, pgt_info, node) {
+		dev_err(ctx->hdev->dev,
+			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
+			pgt_info->addr, ctx->asid, pgt_info->num_of_ptes);
+		free_hop(ctx, pgt_info->addr);
+	}
+
+	mutex_destroy(&ctx->mmu_lock);
+}
+
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 hop0_addr = 0, hop0_pte_addr = 0,
+		hop1_addr = 0, hop1_pte_addr = 0,
+		hop2_addr = 0, hop2_pte_addr = 0,
+		hop3_addr = 0, hop3_pte_addr = 0,
+		hop4_addr = 0, hop4_pte_addr = 0,
+		curr_pte;
+	int clear_hop3 = 1;
+
+	hop0_addr = get_hop0_addr(ctx);
+
+	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+
+	hop1_addr = get_next_hop_addr(curr_pte);
+
+	if (hop1_addr == ULLONG_MAX)
+		goto not_mapped;
+
+	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+
+	hop2_addr = get_next_hop_addr(curr_pte);
+
+	if (hop2_addr == ULLONG_MAX)
+		goto not_mapped;
+
+	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+
+	hop3_addr = get_next_hop_addr(curr_pte);
+
+	if (hop3_addr == ULLONG_MAX)
+		goto not_mapped;
+
+	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+	if (!(curr_pte & LAST_MASK)) {
+		hop4_addr = get_next_hop_addr(curr_pte);
+
+		if (hop4_addr == ULLONG_MAX)
+			goto not_mapped;
+
+		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+
+		curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+
+		clear_hop3 = 0;
+	}
+
+	if (!(curr_pte & PAGE_PRESENT_MASK))
+		goto not_mapped;
+
+	clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+
+	if (hop4_addr && !put_pte(ctx, hop4_addr))
+		clear_hop3 = 1;
+
+	if (!clear_hop3)
+		goto flush;
+	clear_pte(hdev, hop3_pte_addr);
+
+	if (put_pte(ctx, hop3_addr))
+		goto flush;
+	clear_pte(hdev, hop2_pte_addr);
+
+	if (put_pte(ctx, hop2_addr))
+		goto flush;
+	clear_pte(hdev, hop1_pte_addr);
+
+	if (put_pte(ctx, hop1_addr))
+		goto flush;
+	clear_pte(hdev, hop0_pte_addr);
+
+flush:
+	/* flush all writes from all cores to reach PCI */
+	mb();
+
+	hdev->asic_funcs->read_pte(hdev,
+				hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+
+	return 0;
+
+not_mapped:
+	dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+		virt_addr);
+
+	return -EINVAL;
+}
+
+/*
+ * hl_mmu_unmap - unmaps a virtual addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @page_size: size of the page to unmap
+ *
+ * This function does the following:
+ * - Check that the virt addr is mapped
+ * - Unmap the virt addr and frees pgts if possible
+ * - Returns 0 on success, -EINVAL if the given addr is not mapped
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ */
+int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 real_virt_addr;
+	u32 real_page_size, npages;
+	int i, rc;
+
+	if (!hdev->mmu_enable)
+		return 0;
+
+	/*
+	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
+	 * is bigger, we break it to sub-pages and unmap them separately.
+	 */
+	if ((page_size % PAGE_SIZE_2MB) == 0) {
+		real_page_size = PAGE_SIZE_2MB;
+	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
+		real_page_size = PAGE_SIZE_4KB;
+	} else {
+		dev_err(hdev->dev,
+			"page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
+				page_size);
+
+		return -EFAULT;
+	}
+
+	npages = page_size / real_page_size;
+	real_virt_addr = virt_addr;
+
+	for (i = 0 ; i < npages ; i++) {
+		rc = _hl_mmu_unmap(ctx, real_virt_addr);
+		if (rc)
+			return rc;
+
+		real_virt_addr += real_page_size;
+	}
+
+	return 0;
+}
+
+static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+		u32 page_size)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 hop0_addr = 0, hop0_pte_addr = 0,
+		hop1_addr = 0, hop1_pte_addr = 0,
+		hop2_addr = 0, hop2_pte_addr = 0,
+		hop3_addr = 0, hop3_pte_addr = 0,
+		hop4_addr = 0, hop4_pte_addr = 0,
+		curr_pte = 0;
+	bool hop1_new = false, hop2_new = false, hop3_new = false,
+		hop4_new = false, is_huge;
+	int rc = -ENOMEM;
+
+	/*
+	 * This mapping function can map a 4KB/2MB page. For 2MB page there are
+	 * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
+	 * pages only but user memory could have been allocated with one of the
+	 * two page sizes. Since this is a common code for all the three cases,
+	 * we need this hugs page check.
+	 */
+	is_huge = page_size == PAGE_SIZE_2MB;
+
+	hop0_addr = get_hop0_addr(ctx);
+
+	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+
+	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
+
+	if (hop1_addr == ULLONG_MAX)
+		goto err;
+
+	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+
+	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
+
+	if (hop2_addr == ULLONG_MAX)
+		goto err;
+
+	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+
+	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
+
+	if (hop3_addr == ULLONG_MAX)
+		goto err;
+
+	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+
+	curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+
+	if (!is_huge) {
+		hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
+
+		if (hop4_addr == ULLONG_MAX)
+			goto err;
+
+		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+
+		curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+	}
+
+	if (curr_pte & PAGE_PRESENT_MASK) {
+		dev_err(hdev->dev,
+				"mapping already exists for virt_addr 0x%llx\n",
+					virt_addr);
+
+		dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
+				hdev->asic_funcs->read_pte(hdev, hop0_pte_addr),
+				hop0_pte_addr);
+		dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
+				hdev->asic_funcs->read_pte(hdev, hop1_pte_addr),
+				hop1_pte_addr);
+		dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
+				hdev->asic_funcs->read_pte(hdev, hop2_pte_addr),
+				hop2_pte_addr);
+		dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
+				hdev->asic_funcs->read_pte(hdev, hop3_pte_addr),
+				hop3_pte_addr);
+
+		if (!is_huge)
+			dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
+				hdev->asic_funcs->read_pte(hdev,
+							hop4_pte_addr),
+							hop4_pte_addr);
+
+		rc = EINVAL;
+		goto err;
+	}
+
+	curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
+			| PAGE_PRESENT_MASK;
+
+	hdev->asic_funcs->write_pte(hdev,
+				is_huge ? hop3_pte_addr : hop4_pte_addr,
+				curr_pte);
+
+	if (hop1_new) {
+		curr_pte = (hop1_addr & PTE_PHYS_ADDR_MASK) |
+				PAGE_PRESENT_MASK;
+		ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop0_pte_addr,
+				curr_pte);
+	}
+	if (hop2_new) {
+		curr_pte = (hop2_addr & PTE_PHYS_ADDR_MASK) |
+				PAGE_PRESENT_MASK;
+		ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop1_pte_addr,
+				curr_pte);
+		get_pte(ctx, hop1_addr);
+	}
+	if (hop3_new) {
+		curr_pte = (hop3_addr & PTE_PHYS_ADDR_MASK) |
+				PAGE_PRESENT_MASK;
+		ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop2_pte_addr,
+				curr_pte);
+		get_pte(ctx, hop2_addr);
+	}
+
+	if (!is_huge) {
+		if (hop4_new) {
+			curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
+					PAGE_PRESENT_MASK;
+			ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+					hop3_pte_addr, curr_pte);
+			get_pte(ctx, hop3_addr);
+		}
+
+		get_pte(ctx, hop4_addr);
+	} else {
+		get_pte(ctx, hop3_addr);
+	}
+
+	/* flush all writes from all cores to reach PCI */
+	mb();
+
+	hdev->asic_funcs->read_pte(hdev,
+				is_huge ? hop3_pte_addr : hop4_pte_addr);
+
+	return 0;
+
+err:
+	if (hop4_new)
+		free_hop(ctx, hop4_addr);
+	if (hop3_new)
+		free_hop(ctx, hop3_addr);
+	if (hop2_new)
+		free_hop(ctx, hop2_addr);
+	if (hop1_new)
+		free_hop(ctx, hop1_addr);
+
+	return rc;
+}
+
+/*
+ * hl_mmu_map - maps a virtual addr to physical addr
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @page_size: physical page size
+ *
+ * This function does the following:
+ * - Check that the virt addr is not mapped
+ * - Allocate pgts as necessary in order to map the virt addr to the phys
+ * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
+ *
+ * Because this function changes the page tables in the device and because it
+ * changes the MMU hash, it must be protected by a lock.
+ * However, because it maps only a single page, the lock should be implemented
+ * in a higher level in order to protect the entire mapping of the memory area
+ */
+int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 real_virt_addr;
+	u32 real_page_size, npages;
+	int i, rc, mapped_cnt = 0;
+
+	if (!hdev->mmu_enable)
+		return 0;
+
+	/*
+	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
+	 * is bigger, we break it to sub-pages and map them separately.
+	 */
+	if ((page_size % PAGE_SIZE_2MB) == 0) {
+		real_page_size = PAGE_SIZE_2MB;
+	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
+		real_page_size = PAGE_SIZE_4KB;
+	} else {
+		dev_err(hdev->dev,
+			"page size of %u is not 4KB nor 2MB aligned, can't map\n",
+				page_size);
+
+		return -EFAULT;
+	}
+
+	npages = page_size / real_page_size;
+	real_virt_addr = virt_addr;
+
+	for (i = 0 ; i < npages ; i++) {
+		rc = _hl_mmu_map(ctx, real_virt_addr, phys_addr,
+				real_page_size);
+		if (rc)
+			goto err;
+
+		real_virt_addr += real_page_size;
+		mapped_cnt++;
+	}
+
+	return 0;
+
+err:
+	real_virt_addr = virt_addr;
+	for (i = 0 ; i < mapped_cnt ; i++) {
+		if (_hl_mmu_unmap(ctx, real_virt_addr))
+			dev_warn_ratelimited(hdev->dev,
+				"failed to unmap va: 0x%llx\n", real_virt_addr);
+
+		real_virt_addr += real_page_size;
+	}
+
+	return rc;
+}
+
+/*
+ * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_out(struct hl_ctx *ctx)
+{
+
+}
+
+/*
+ * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
+ *
+ * @ctx: pointer to the context structure
+ *
+ */
+void hl_mmu_swap_in(struct hl_ctx *ctx)
+{
+
+}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index fba49417f607..9015043887d1 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -162,6 +162,108 @@ union hl_wait_cs_args {
 	struct hl_wait_cs_out out;
 };
 
+/* Opcode to alloc device memory */
+#define HL_MEM_OP_ALLOC			0
+/* Opcode to free previously allocated device memory */
+#define HL_MEM_OP_FREE			1
+/* Opcode to map host memory */
+#define HL_MEM_OP_MAP			2
+/* Opcode to unmap previously mapped host memory */
+#define HL_MEM_OP_UNMAP			3
+
+/* Memory flags */
+#define HL_MEM_CONTIGUOUS	0x1
+#define HL_MEM_SHARED		0x2
+#define HL_MEM_USERPTR		0x4
+
+struct hl_mem_in {
+	union {
+		/* HL_MEM_OP_ALLOC- allocate device memory */
+		struct {
+			/* Size to alloc */
+			__u32 mem_size;
+			__u32 pad;
+		} alloc;
+
+		/* HL_MEM_OP_FREE - free device memory */
+		struct {
+			/* Handle returned from HL_MEM_OP_ALLOC */
+			__u64 handle;
+		} free;
+
+		/* HL_MEM_OP_MAP - map device memory */
+		struct {
+			/*
+			 * Requested virtual address of mapped memory.
+			 * KMD will try to map the requested region to this
+			 * hint address, as long as the address is valid and
+			 * not already mapped. The user should check the
+			 * returned address of the IOCTL to make sure he got
+			 * the hint address. Passing 0 here means that KMD
+			 * will choose the address itself.
+			 */
+			__u64 hint_addr;
+			/* Handle returned from HL_MEM_OP_ALLOC */
+			__u64 handle;
+		} map_device;
+
+		/* HL_MEM_OP_MAP - map host memory */
+		struct {
+			/* Address of allocated host memory */
+			__u64 host_virt_addr;
+			/*
+			 * Requested virtual address of mapped memory.
+			 * KMD will try to map the requested region to this
+			 * hint address, as long as the address is valid and
+			 * not already mapped. The user should check the
+			 * returned address of the IOCTL to make sure he got
+			 * the hint address. Passing 0 here means that KMD
+			 * will choose the address itself.
+			 */
+			__u64 hint_addr;
+			/* Size of allocated host memory */
+			__u32 mem_size;
+			__u32 pad;
+		} map_host;
+
+		/* HL_MEM_OP_UNMAP - unmap host memory */
+		struct {
+			/* Virtual address returned from HL_MEM_OP_MAP */
+			__u64 device_virt_addr;
+		} unmap;
+	};
+
+	/* HL_MEM_OP_* */
+	__u32 op;
+	/* HL_MEM_* flags */
+	__u32 flags;
+	/* Context ID - Currently not in use */
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+struct hl_mem_out {
+	union {
+		/*
+		 * Used for HL_MEM_OP_MAP as the virtual address that was
+		 * assigned in the device VA space.
+		 * A value of 0 means the requested operation failed.
+		 */
+		__u64 device_virt_addr;
+
+		/*
+		 * Used for HL_MEM_OP_ALLOC. This is the assigned
+		 * handle for the allocated memory
+		 */
+		__u64 handle;
+	};
+};
+
+union hl_mem_args {
+	struct hl_mem_in in;
+	struct hl_mem_out out;
+};
+
 /*
  * Command Buffer
  * - Request a Command Buffer
@@ -245,7 +347,25 @@ union hl_wait_cs_args {
 #define HL_IOCTL_WAIT_CS			\
 		_IOWR('H', 0x04, union hl_wait_cs_args)
 
+/*
+ * Memory
+ * - Map host memory to device MMU
+ * - Unmap host memory from device MMU
+ *
+ * This IOCTL allows the user to map host memory to the device MMU
+ *
+ * For host memory, the IOCTL doesn't allocate memory. The user is supposed
+ * to allocate the memory in user-space (malloc/new). The driver pins the
+ * physical pages (up to the allowed limit by the OS), assigns a virtual
+ * address in the device VA space and initializes the device MMU.
+ *
+ * There is an option for the user to specify the requested virtual address.
+ *
+ */
+#define HL_IOCTL_MEMORY		\
+		_IOWR('H', 0x05, union hl_mem_args)
+
 #define HL_COMMAND_START	0x02
-#define HL_COMMAND_END		0x05
+#define HL_COMMAND_END		0x06
 
 #endif /* HABANALABS_H_ */
-- 
cgit v1.2.3


From d8dd7b0a81cc192ef5d30ec76ed6f6d35a1a7cf5 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Sat, 16 Feb 2019 00:39:23 +0200
Subject: habanalabs: implement INFO IOCTL

This patch implements the INFO IOCTL. That IOCTL is used by the user to
query information that is relevant/needed by the user in order to submit
deep learning jobs to Goya.

The information is divided into several categories, such as H/W IP, Events
that happened, DDR usage and more.

Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/goya/goya.c        |   6 ++
 drivers/misc/habanalabs/habanalabs.h       |   2 +
 drivers/misc/habanalabs/habanalabs_ioctl.c | 126 +++++++++++++++++++++++++++++
 include/uapi/misc/habanalabs.h             |  75 ++++++++++++++++-
 4 files changed, 208 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 89b82b989966..bf3f76f1aeae 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -5131,6 +5131,11 @@ static void goya_hw_queues_unlock(struct hl_device *hdev)
 	spin_unlock(&goya->hw_queues_lock);
 }
 
+static u32 goya_get_pci_id(struct hl_device *hdev)
+{
+	return hdev->pdev->device;
+}
+
 int goya_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
 {
 	struct goya_device *goya = hdev->asic_specific;
@@ -5232,6 +5237,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.soft_reset_late_init = goya_soft_reset_late_init,
 	.hw_queues_lock = goya_hw_queues_lock,
 	.hw_queues_unlock = goya_hw_queues_unlock,
+	.get_pci_id = goya_get_pci_id,
 	.get_eeprom_data = goya_get_eeprom_data,
 	.send_cpu_message = goya_send_cpu_message,
 	.get_hw_state = goya_get_hw_state
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 03085e7a12dd..02de4a2cab27 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -470,6 +470,7 @@ enum hl_pll_frequency {
  * @soft_reset_late_init: perform certain actions needed after soft reset.
  * @hw_queues_lock: acquire H/W queues lock.
  * @hw_queues_unlock: release H/W queues lock.
+ * @get_pci_id: retrieve PCI ID.
  * @get_eeprom_data: retrieve EEPROM data from F/W.
  * @send_cpu_message: send buffer to ArmCP.
  * @get_hw_state: retrieve the H/W state
@@ -539,6 +540,7 @@ struct hl_asic_funcs {
 	int (*soft_reset_late_init)(struct hl_device *hdev);
 	void (*hw_queues_lock)(struct hl_device *hdev);
 	void (*hw_queues_unlock)(struct hl_device *hdev);
+	u32 (*get_pci_id)(struct hl_device *hdev);
 	int (*get_eeprom_data)(struct hl_device *hdev, void *data,
 				size_t max_size);
 	int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 6e4dc5b5e696..12408d3302e9 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -12,10 +12,136 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_hw_ip_info hw_ip = {0};
+	u32 size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u64 sram_kmd_size, dram_kmd_size;
+
+	if ((!size) || (!out))
+		return -EINVAL;
+
+	sram_kmd_size = (prop->sram_user_base_address -
+				prop->sram_base_address);
+	dram_kmd_size = (prop->dram_user_base_address -
+				prop->dram_base_address);
+
+	hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
+	hw_ip.sram_base_address = prop->sram_user_base_address;
+	hw_ip.dram_base_address = prop->dram_user_base_address;
+	hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
+	hw_ip.sram_size = prop->sram_size - sram_kmd_size;
+	hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+	if (hw_ip.dram_size > 0)
+		hw_ip.dram_enabled = 1;
+	hw_ip.num_of_events = prop->num_of_events;
+	memcpy(hw_ip.armcp_version,
+		prop->armcp_info.armcp_version, VERSION_MAX_LEN);
+	hw_ip.armcp_cpld_version = prop->armcp_info.cpld_version;
+	hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
+	hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
+	hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
+	hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
+
+	return copy_to_user(out, &hw_ip,
+		min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
+}
+
+static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+	u32 size, max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	void *arr;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	arr = hdev->asic_funcs->get_events_stat(hdev, &size);
+
+	return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
+}
+
+static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_dram_usage dram_usage = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u64 dram_kmd_size;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	dram_kmd_size = (prop->dram_user_base_address -
+				prop->dram_base_address);
+	dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
+					atomic64_read(&hdev->dram_used_mem);
+	dram_usage.ctx_dram_mem = atomic64_read(&hdev->user_ctx->dram_phys_mem);
+
+	return copy_to_user(out, &dram_usage,
+		min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
+}
+
+static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_hw_idle hw_idle = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev);
+
+	return copy_to_user(out, &hw_idle,
+		min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
+}
+
+static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+	struct hl_info_args *args = data;
+	struct hl_device *hdev = hpriv->hdev;
+	int rc;
+
+	if (hl_device_disabled_or_in_reset(hdev)) {
+		dev_err(hdev->dev,
+			"Device is disabled or in reset. Can't execute INFO IOCTL\n");
+		return -EBUSY;
+	}
+
+	switch (args->op) {
+	case HL_INFO_HW_IP_INFO:
+		rc = hw_ip_info(hdev, args);
+		break;
+
+	case HL_INFO_HW_EVENTS:
+		rc = hw_events_info(hdev, args);
+		break;
+
+	case HL_INFO_DRAM_USAGE:
+		rc = dram_usage_info(hdev, args);
+		break;
+
+	case HL_INFO_HW_IDLE:
+		rc = hw_idle(hdev, args);
+		break;
+
+	default:
+		dev_err(hdev->dev, "Invalid request %d\n", args->op);
+		rc = -ENOTTY;
+		break;
+	}
+
+	return rc;
+}
+
 #define HL_IOCTL_DEF(ioctl, _func) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
 
 static const struct hl_ioctl_desc hl_ioctls[] = {
+	HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
 	HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
 	HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
 	HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 9015043887d1..4afc1891ece8 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -45,6 +45,62 @@ enum goya_queue_id {
 	GOYA_QUEUE_ID_SIZE
 };
 
+/* Opcode for management ioctl */
+#define HL_INFO_HW_IP_INFO	0
+#define HL_INFO_HW_EVENTS	1
+#define HL_INFO_DRAM_USAGE	2
+#define HL_INFO_HW_IDLE		3
+
+#define HL_INFO_VERSION_MAX_LEN	128
+
+struct hl_info_hw_ip_info {
+	__u64 sram_base_address;
+	__u64 dram_base_address;
+	__u64 dram_size;
+	__u32 sram_size;
+	__u32 num_of_events;
+	__u32 device_id; /* PCI Device ID */
+	__u32 reserved[3];
+	__u32 armcp_cpld_version;
+	__u32 psoc_pci_pll_nr;
+	__u32 psoc_pci_pll_nf;
+	__u32 psoc_pci_pll_od;
+	__u32 psoc_pci_pll_div_factor;
+	__u8 tpc_enabled_mask;
+	__u8 dram_enabled;
+	__u8 pad[2];
+	__u8 armcp_version[HL_INFO_VERSION_MAX_LEN];
+};
+
+struct hl_info_dram_usage {
+	__u64 dram_free_mem;
+	__u64 ctx_dram_mem;
+};
+
+struct hl_info_hw_idle {
+	__u32 is_idle;
+	__u32 pad;
+};
+
+struct hl_info_args {
+	/* Location of relevant struct in userspace */
+	__u64 return_pointer;
+	/*
+	 * The size of the return value. Just like "size" in "snprintf",
+	 * it limits how many bytes the kernel can write
+	 *
+	 * For hw_events array, the size should be
+	 * hl_info_hw_ip_info.num_of_events * sizeof(__u32)
+	 */
+	__u32 return_size;
+
+	/* HL_INFO_* */
+	__u32 op;
+
+	/* Context ID - Currently not in use */
+	__u32 ctx_id;
+	__u32 pad;
+};
 
 /* Opcode to create a new command buffer */
 #define HL_CB_OP_CREATE		0
@@ -264,6 +320,23 @@ union hl_mem_args {
 	struct hl_mem_out out;
 };
 
+/*
+ * Various information operations such as:
+ * - H/W IP information
+ * - Current dram usage
+ *
+ * The user calls this IOCTL with an opcode that describes the required
+ * information. The user should supply a pointer to a user-allocated memory
+ * chunk, which will be filled by the driver with the requested information.
+ *
+ * The user supplies the maximum amount of size to copy into the user's memory,
+ * in order to prevent data corruption in case of differences between the
+ * definitions of structures in kernel and userspace, e.g. in case of old
+ * userspace and new kernel driver
+ */
+#define HL_IOCTL_INFO	\
+		_IOWR('H', 0x01, struct hl_info_args)
+
 /*
  * Command Buffer
  * - Request a Command Buffer
@@ -365,7 +438,7 @@ union hl_mem_args {
 #define HL_IOCTL_MEMORY		\
 		_IOWR('H', 0x05, union hl_mem_args)
 
-#define HL_COMMAND_START	0x02
+#define HL_COMMAND_START	0x01
 #define HL_COMMAND_END		0x06
 
 #endif /* HABANALABS_H_ */
-- 
cgit v1.2.3


From aa342261bde5ff177935c3a4f17f0bf2bf81babf Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:06 -0800
Subject: ACPICA: Remove legacy module-level code support

ACPICA commit 47f5607c204719d9239a12b889df725225098c8f

Module-level code refers to executable ASL code that runs during
table load. This is typically used in ASL to declare named objects
based on a condition evaluated during table load like so:

definition_block(...)
{
  opreation_region (OPR1, system_memory, ...)
  Field (OPR1)
  {
    FLD1, 8 /* Assume that FLD1's value is 0x1 */
  }

  /* The if statement below is referred to as module-level code */

  If (FLD1)
  {
    /* Declare DEV1 conditionally */
    Device (DEV1) {...}
  }

  Device (DEV2)
  {
    ...
  }
}

In legacy module-level code, the execution of the If statement
was deferred after other modules were loaded. The order of
code execution for the table above is the following:

1.) Load OPR1 to the ACPI Namespace
2.) Load FLD1 to the ACPI Namespace (not intended for drivers)
3.) Load DEV2 to the ACPI Namespace
4.) Execute If (FLD1) and load DEV1 if the condition is true

This legacy approach can be problematic for tables that look like the
following:

definition_block(...)
{
  opreation_region (OPR1, system_memory, ...)
  Field (OPR1)
  {
    FLD1, 8 /* Assume that FLD1's value is 0x1 */
  }

  /* The if statement below is referred to as module-level code */

  If (FLD1)
  {
    /* Declare DEV1 conditionally */
    Device (DEV1) {...}
  }

  Scope (DEV1)
  {
    /* Add objects DEV1's scope */
    Name (OBJ1, 0x1234)
  }
}

When loading this in the legacy approach, Scope DEV1 gets evaluated
before the If statement. The following is the order of execution:

1.) Load OPR1 to the ACPI Namespace
2.) Load FLD1 to the ACPI Namespace (not intended for drivers)
3.) Add OBJ1 under DEV1's scope -- ERROR. DEV1 does not exist
4.) Execute If (FLD1) and load DEV1 if the condition is true

The legacy approach can never succeed for tables like this due to the
deferral of the module-level code. Due to this limitation, a new
module-level code was developed. This new approach exeutes if
statements in the order that they appear in the definition block.
With this approach, the order of execution for the above defintion
block is as follows:

1.) Load OPR1 to the ACPI Namespace
2.) Load FLD1 to the ACPI Namespace (not intended for drivers)
3.) Execute If (FLD1) and load DEV1 because the condition is true
4.) Add OBJ1 under DEV1's scope.

Since DEV1 is loaded in the namespace in step 3, step 4 executes
successfully.

This change removes support for the legacy module-level code
execution. From this point onward, the new module-level code
execution will be the official approach.

Link: https://github.com/acpica/acpica/commit/47f5607c
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/evrgnini.c |  19 ----
 drivers/acpi/acpica/nsparse.c  |  66 +++-----------
 drivers/acpi/acpica/psloop.c   | 193 -----------------------------------------
 drivers/acpi/acpica/tbxfload.c |  25 +++---
 include/acpi/acpixf.h          |   8 --
 5 files changed, 22 insertions(+), 289 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 163dd4a142eb..0b47bbcd2a23 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -516,25 +516,6 @@ acpi_status acpi_ev_initialize_region(union acpi_operand_object *region_obj)
 				handler_obj = obj_desc->common_notify.handler;
 				break;
 
-			case ACPI_TYPE_METHOD:
-				/*
-				 * If we are executing module level code, the original
-				 * Node's object was replaced by this Method object and we
-				 * saved the handler in the method object.
-				 *
-				 * Note: Only used for the legacy MLC support. Will
-				 * be removed in the future.
-				 *
-				 * See acpi_ns_exec_module_code
-				 */
-				if (!acpi_gbl_execute_tables_as_methods &&
-				    obj_desc->method.
-				    info_flags & ACPI_METHOD_MODULE_LEVEL) {
-					handler_obj =
-					    obj_desc->method.dispatch.handler;
-				}
-				break;
-
 			default:
 
 				/* Ignore other objects */
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index bfa408f7fd39..c0b4f7bedfab 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -253,61 +253,19 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 
 	ACPI_FUNCTION_TRACE(ns_parse_table);
 
-	if (acpi_gbl_execute_tables_as_methods) {
-		/*
-		 * This case executes the AML table as one large control method.
-		 * The point of this is to execute any module-level code in-place
-		 * as the table is parsed. Some AML code depends on this behavior.
-		 *
-		 * It is a run-time option at this time, but will eventually become
-		 * the default.
-		 *
-		 * Note: This causes the table to only have a single-pass parse.
-		 * However, this is compatible with other ACPI implementations.
-		 */
-		ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE,
-				      "%s: **** Start table execution pass\n",
-				      ACPI_GET_FUNCTION_NAME));
-
-		status = acpi_ns_execute_table(table_index, start_node);
-		if (ACPI_FAILURE(status)) {
-			return_ACPI_STATUS(status);
-		}
-	} else {
-		/*
-		 * AML Parse, pass 1
-		 *
-		 * In this pass, we load most of the namespace. Control methods
-		 * are not parsed until later. A parse tree is not created.
-		 * Instead, each Parser Op subtree is deleted when it is finished.
-		 * This saves a great deal of memory, and allows a small cache of
-		 * parse objects to service the entire parse. The second pass of
-		 * the parse then performs another complete parse of the AML.
-		 */
-		ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 1\n"));
-
-		status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
-						    table_index, start_node);
-		if (ACPI_FAILURE(status)) {
-			return_ACPI_STATUS(status);
-		}
+	/*
+	 * Executes the AML table as one large control method.
+	 * The point of this is to execute any module-level code in-place
+	 * as the table is parsed. Some AML code depends on this behavior.
+	 *
+	 * Note: This causes the table to only have a single-pass parse.
+	 * However, this is compatible with other ACPI implementations.
+	 */
+	ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE,
+			      "%s: **** Start table execution pass\n",
+			      ACPI_GET_FUNCTION_NAME));
 
-		/*
-		 * AML Parse, pass 2
-		 *
-		 * In this pass, we resolve forward references and other things
-		 * that could not be completed during the first pass.
-		 * Another complete parse of the AML is performed, but the
-		 * overhead of this is compensated for by the fact that the
-		 * parse objects are all cached.
-		 */
-		ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 2\n"));
-		status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
-						    table_index, start_node);
-		if (ACPI_FAILURE(status)) {
-			return_ACPI_STATUS(status);
-		}
-	}
+	status = acpi_ns_execute_table(table_index, start_node);
 
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/psloop.c b/drivers/acpi/acpica/psloop.c
index 4796f17805ac..207805047bc4 100644
--- a/drivers/acpi/acpica/psloop.c
+++ b/drivers/acpi/acpica/psloop.c
@@ -32,10 +32,6 @@ static acpi_status
 acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 		      u8 * aml_op_start, union acpi_parse_object *op);
 
-static void
-acpi_ps_link_module_code(union acpi_parse_object *parent_op,
-			 u8 *aml_start, u32 aml_length, acpi_owner_id owner_id);
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ps_get_arguments
@@ -56,7 +52,6 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 {
 	acpi_status status = AE_OK;
 	union acpi_parse_object *arg = NULL;
-	const struct acpi_opcode_info *op_info;
 
 	ACPI_FUNCTION_TRACE_PTR(ps_get_arguments, walk_state);
 
@@ -136,96 +131,6 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 				  walk_state->arg_count,
 				  walk_state->pass_number));
 
-		/*
-		 * This case handles the legacy option that groups all module-level
-		 * code blocks together and defers execution until all of the tables
-		 * are loaded. Execute all of these blocks at this time.
-		 * Execute any module-level code that was detected during the table
-		 * load phase.
-		 *
-		 * Note: this option is deprecated and will be eliminated in the
-		 * future. Use of this option can cause problems with AML code that
-		 * depends upon in-order immediate execution of module-level code.
-		 */
-		if (!acpi_gbl_execute_tables_as_methods &&
-		    (walk_state->pass_number <= ACPI_IMODE_LOAD_PASS2) &&
-		    ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) == 0)) {
-			/*
-			 * We want to skip If/Else/While constructs during Pass1 because we
-			 * want to actually conditionally execute the code during Pass2.
-			 *
-			 * Except for disassembly, where we always want to walk the
-			 * If/Else/While packages
-			 */
-			switch (op->common.aml_opcode) {
-			case AML_IF_OP:
-			case AML_ELSE_OP:
-			case AML_WHILE_OP:
-				/*
-				 * Currently supported module-level opcodes are:
-				 * IF/ELSE/WHILE. These appear to be the most common,
-				 * and easiest to support since they open an AML
-				 * package.
-				 */
-				if (walk_state->pass_number ==
-				    ACPI_IMODE_LOAD_PASS1) {
-					acpi_ps_link_module_code(op->common.
-								 parent,
-								 aml_op_start,
-								 (u32)
-								 (walk_state->
-								 parser_state.
-								 pkg_end -
-								 aml_op_start),
-								 walk_state->
-								 owner_id);
-				}
-
-				ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
-						  "Pass1: Skipping an If/Else/While body\n"));
-
-				/* Skip body of if/else/while in pass 1 */
-
-				walk_state->parser_state.aml =
-				    walk_state->parser_state.pkg_end;
-				walk_state->arg_count = 0;
-				break;
-
-			default:
-				/*
-				 * Check for an unsupported executable opcode at module
-				 * level. We must be in PASS1, the parent must be a SCOPE,
-				 * The opcode class must be EXECUTE, and the opcode must
-				 * not be an argument to another opcode.
-				 */
-				if ((walk_state->pass_number ==
-				     ACPI_IMODE_LOAD_PASS1)
-				    && (op->common.parent->common.aml_opcode ==
-					AML_SCOPE_OP)) {
-					op_info =
-					    acpi_ps_get_opcode_info(op->common.
-								    aml_opcode);
-					if ((op_info->class ==
-					     AML_CLASS_EXECUTE) && (!arg)) {
-						ACPI_WARNING((AE_INFO,
-							      "Unsupported module-level executable opcode "
-							      "0x%.2X at table offset 0x%.4X",
-							      op->common.
-							      aml_opcode,
-							      (u32)
-							      (ACPI_PTR_DIFF
-							       (aml_op_start,
-								walk_state->
-								parser_state.
-								aml_start) +
-							       sizeof(struct
-								      acpi_table_header))));
-					}
-				}
-				break;
-			}
-		}
-
 		/* Special processing for certain opcodes */
 
 		switch (op->common.aml_opcode) {
@@ -300,104 +205,6 @@ acpi_ps_get_arguments(struct acpi_walk_state *walk_state,
 	return_ACPI_STATUS(AE_OK);
 }
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ps_link_module_code
- *
- * PARAMETERS:  parent_op           - Parent parser op
- *              aml_start           - Pointer to the AML
- *              aml_length          - Length of executable AML
- *              owner_id            - owner_id of module level code
- *
- * RETURN:      None.
- *
- * DESCRIPTION: Wrap the module-level code with a method object and link the
- *              object to the global list. Note, the mutex field of the method
- *              object is used to link multiple module-level code objects.
- *
- * NOTE: In this legacy option, each block of detected executable AML
- * code that is outside of any control method is wrapped with a temporary
- * control method object and placed on a global list below.
- *
- * This function executes the module-level code for all tables only after
- * all of the tables have been loaded. It is a legacy option and is
- * not compatible with other ACPI implementations. See acpi_ns_load_table.
- *
- * This function will be removed when the legacy option is removed.
- *
- ******************************************************************************/
-
-static void
-acpi_ps_link_module_code(union acpi_parse_object *parent_op,
-			 u8 *aml_start, u32 aml_length, acpi_owner_id owner_id)
-{
-	union acpi_operand_object *prev;
-	union acpi_operand_object *next;
-	union acpi_operand_object *method_obj;
-	struct acpi_namespace_node *parent_node;
-
-	ACPI_FUNCTION_TRACE(ps_link_module_code);
-
-	/* Get the tail of the list */
-
-	prev = next = acpi_gbl_module_code_list;
-	while (next) {
-		prev = next;
-		next = next->method.mutex;
-	}
-
-	/*
-	 * Insert the module level code into the list. Merge it if it is
-	 * adjacent to the previous element.
-	 */
-	if (!prev ||
-	    ((prev->method.aml_start + prev->method.aml_length) != aml_start)) {
-
-		/* Create, initialize, and link a new temporary method object */
-
-		method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD);
-		if (!method_obj) {
-			return_VOID;
-		}
-
-		ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
-				  "Create/Link new code block: %p\n",
-				  method_obj));
-
-		if (parent_op->common.node) {
-			parent_node = parent_op->common.node;
-		} else {
-			parent_node = acpi_gbl_root_node;
-		}
-
-		method_obj->method.aml_start = aml_start;
-		method_obj->method.aml_length = aml_length;
-		method_obj->method.owner_id = owner_id;
-		method_obj->method.info_flags |= ACPI_METHOD_MODULE_LEVEL;
-
-		/*
-		 * Save the parent node in next_object. This is cheating, but we
-		 * don't want to expand the method object.
-		 */
-		method_obj->method.next_object =
-		    ACPI_CAST_PTR(union acpi_operand_object, parent_node);
-
-		if (!prev) {
-			acpi_gbl_module_code_list = method_obj;
-		} else {
-			prev->method.mutex = method_obj;
-		}
-	} else {
-		ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
-				  "Appending to existing code block: %p\n",
-				  prev));
-
-		prev->method.aml_length += aml_length;
-	}
-
-	return_VOID;
-}
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ps_parse_loop
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index 2c573fcc8c70..1a2592cc3245 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -69,23 +69,18 @@ acpi_status ACPI_INIT_FUNCTION acpi_load_tables(void)
 				"While loading namespace from ACPI tables"));
 	}
 
-	if (acpi_gbl_execute_tables_as_methods) {
-		/*
-		 * If the module-level code support is enabled, initialize the objects
-		 * in the namespace that remain uninitialized. This runs the executable
-		 * AML that may be part of the declaration of these name objects:
-		 *     operation_regions, buffer_fields, Buffers, and Packages.
-		 *
-		 * Note: The module-level code is optional at this time, but will
-		 * become the default in the future.
-		 */
-		status = acpi_ns_initialize_objects();
-		if (ACPI_FAILURE(status)) {
-			return_ACPI_STATUS(status);
-		}
+	/*
+	 * Initialize the objects in the namespace that remain uninitialized.
+	 * This runs the executable AML that may be part of the declaration of
+	 * these name objects:
+	 *     operation_regions, buffer_fields, Buffers, and Packages.
+	 *
+	 */
+	status = acpi_ns_initialize_objects();
+	if (ACPI_SUCCESS(status)) {
+		acpi_gbl_namespace_initialized = TRUE;
 	}
 
-	acpi_gbl_namespace_initialized = TRUE;
 	return_ACPI_STATUS(status);
 }
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 4cc74954fea3..bcf3bc06eb94 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -156,14 +156,6 @@ ACPI_INIT_GLOBAL(u8, acpi_gbl_copy_dsdt_locally, FALSE);
  */
 ACPI_INIT_GLOBAL(u8, acpi_gbl_do_not_use_xsdt, FALSE);
 
-/*
- * Optionally support module level code by parsing an entire table as
- * a method as it is loaded. Default is TRUE.
- * NOTE, this is essentially obsolete and will be removed soon
- * (01/2018).
- */
-ACPI_INIT_GLOBAL(u8, acpi_gbl_execute_tables_as_methods, TRUE);
-
 /*
  * Optionally use 32-bit FADT addresses if and when there is a conflict
  * (address mismatch) between the 32-bit and 64-bit versions of the
-- 
cgit v1.2.3


From f00175d06b50a9fe7ed076dd73a8d0c36a9ef22e Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:11 -0800
Subject: ACPICA: ACPI 6.3: Add Trigger order to PCC Identifier structure in
 PDTT

ACPICA commit b11446d8b47805c2637a2286aca34b717ec6b5be

Link: https://github.com/acpica/acpica/commit/b11446d8
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index be01efdede32..f876a3776c75 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1361,6 +1361,7 @@ struct acpi_pdtt_channel {
 
 #define ACPI_PDTT_RUNTIME_TRIGGER           (1)
 #define ACPI_PDTT_WAIT_COMPLETION           (1<<1)
+#define ACPI_PDTT_TRIGGER_ORDER             (1<<2)
 
 /*******************************************************************************
  *
-- 
cgit v1.2.3


From aa475a59fff172ec858093fbc8471c0993081481 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:12 -0800
Subject: ACPICA: ACPI 6.3: SRAT: add Generic Affinity Structure subtable

ACPICA commit 8c9eba7811a939a387d93d6c2a572d0887e64f2c

Link: https://github.com/acpica/acpica/commit/8c9eba78
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl3.h | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl3.h b/include/acpi/actbl3.h
index a6014e9a7113..7a58c10ce421 100644
--- a/include/acpi/actbl3.h
+++ b/include/acpi/actbl3.h
@@ -190,7 +190,8 @@ enum acpi_srat_type {
 	ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY = 2,
 	ACPI_SRAT_TYPE_GICC_AFFINITY = 3,
 	ACPI_SRAT_TYPE_GIC_ITS_AFFINITY = 4,	/* ACPI 6.2 */
-	ACPI_SRAT_TYPE_RESERVED = 5	/* 5 and greater are reserved */
+	ACPI_SRAT_TYPE_GENERIC_AFFINITY = 5,	/* ACPI 6.3 */
+	ACPI_SRAT_TYPE_RESERVED = 6	/* 5 and greater are reserved */
 };
 
 /*
@@ -271,6 +272,22 @@ struct acpi_srat_gic_its_affinity {
 	u32 its_id;
 };
 
+/* 5: Generic Initiator Affinity Structure (ACPI 6.3) */
+
+struct acpi_srat_generic_affinity {
+	struct acpi_subtable_header header;
+	u8 reserved;
+	u8 device_handle_type;
+	u32 proximity_domain;
+	u8 device_handle[16];
+	u32 flags;
+	u32 reserved1;
+};
+
+/* Flags for struct acpi_srat_generic_affinity */
+
+#define ACPI_SRAT_GENERIC_AFFINITY_ENABLED (1)	/* 00: Use affinity structure */
+
 /*******************************************************************************
  *
  * STAO - Status Override Table (_STA override) - ACPI 6.0
-- 
cgit v1.2.3


From 0145c30e896d26e638d27c957d9eed72893c1c92 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Feb 2019 18:13:07 +0100
Subject: genirq/affinity: Code consolidation

All information and calculations in the interrupt affinity spreading code
is strictly unsigned int. Though the code uses int all over the place.

Convert it over to unsigned int.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Cc: linux-pci@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com>
Link: https://lkml.kernel.org/r/20190216172228.336424556@linutronix.de
---
 include/linux/interrupt.h | 20 +++++++++--------
 kernel/irq/affinity.c     | 56 +++++++++++++++++++++++------------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 4a728dba02e2..35e7389c2011 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -251,10 +251,10 @@ struct irq_affinity_notify {
  * @sets:		Number of affinitized sets
  */
 struct irq_affinity {
-	int	pre_vectors;
-	int	post_vectors;
-	int	nr_sets;
-	int	*sets;
+	unsigned int	pre_vectors;
+	unsigned int	post_vectors;
+	unsigned int	nr_sets;
+	unsigned int	*sets;
 };
 
 /**
@@ -314,9 +314,10 @@ extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
 struct irq_affinity_desc *
-irq_create_affinity_masks(int nvec, const struct irq_affinity *affd);
+irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd);
 
-int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd);
+unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
+				       const struct irq_affinity *affd);
 
 #else /* CONFIG_SMP */
 
@@ -350,13 +351,14 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 }
 
 static inline struct irq_affinity_desc *
-irq_create_affinity_masks(int nvec, const struct irq_affinity *affd)
+irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd)
 {
 	return NULL;
 }
 
-static inline int
-irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
+static inline unsigned int
+irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
+			  const struct irq_affinity *affd)
 {
 	return maxvec;
 }
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 118b66d64a53..82e8799374e9 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -9,7 +9,7 @@
 #include <linux/cpu.h>
 
 static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
-				int cpus_per_vec)
+				unsigned int cpus_per_vec)
 {
 	const struct cpumask *siblmsk;
 	int cpu, sibl;
@@ -95,15 +95,17 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
 }
 
 static int __irq_build_affinity_masks(const struct irq_affinity *affd,
-				      int startvec, int numvecs, int firstvec,
+				      unsigned int startvec,
+				      unsigned int numvecs,
+				      unsigned int firstvec,
 				      cpumask_var_t *node_to_cpumask,
 				      const struct cpumask *cpu_mask,
 				      struct cpumask *nmsk,
 				      struct irq_affinity_desc *masks)
 {
-	int n, nodes, cpus_per_vec, extra_vecs, done = 0;
-	int last_affv = firstvec + numvecs;
-	int curvec = startvec;
+	unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
+	unsigned int last_affv = firstvec + numvecs;
+	unsigned int curvec = startvec;
 	nodemask_t nodemsk = NODE_MASK_NONE;
 
 	if (!cpumask_weight(cpu_mask))
@@ -117,18 +119,16 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
 	 */
 	if (numvecs <= nodes) {
 		for_each_node_mask(n, nodemsk) {
-			cpumask_or(&masks[curvec].mask,
-					&masks[curvec].mask,
-					node_to_cpumask[n]);
+			cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
+				   node_to_cpumask[n]);
 			if (++curvec == last_affv)
 				curvec = firstvec;
 		}
-		done = numvecs;
-		goto out;
+		return numvecs;
 	}
 
 	for_each_node_mask(n, nodemsk) {
-		int ncpus, v, vecs_to_assign, vecs_per_node;
+		unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
 
 		/* Spread the vectors per node */
 		vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
@@ -163,8 +163,6 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
 			curvec = firstvec;
 		--nodes;
 	}
-
-out:
 	return done;
 }
 
@@ -174,13 +172,14 @@ out:
  *	2) spread other possible CPUs on these vectors
  */
 static int irq_build_affinity_masks(const struct irq_affinity *affd,
-				    int startvec, int numvecs, int firstvec,
+				    unsigned int startvec, unsigned int numvecs,
+				    unsigned int firstvec,
 				    struct irq_affinity_desc *masks)
 {
-	int curvec = startvec, nr_present, nr_others;
-	int ret = -ENOMEM;
-	cpumask_var_t nmsk, npresmsk;
+	unsigned int curvec = startvec, nr_present, nr_others;
 	cpumask_var_t *node_to_cpumask;
+	cpumask_var_t nmsk, npresmsk;
+	int ret = -ENOMEM;
 
 	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return ret;
@@ -239,12 +238,10 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
  */
 struct irq_affinity_desc *
-irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
+irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
 {
-	int affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
-	int curvec, usedvecs;
+	unsigned int affvecs, curvec, usedvecs, nr_sets, i;
 	struct irq_affinity_desc *masks = NULL;
-	int i, nr_sets;
 
 	/*
 	 * If there aren't any vectors left after applying the pre/post
@@ -264,16 +261,17 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	 * Spread on present CPUs starting from affd->pre_vectors. If we
 	 * have multiple sets, build each sets affinity mask separately.
 	 */
+	affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 	nr_sets = affd->nr_sets;
 	if (!nr_sets)
 		nr_sets = 1;
 
 	for (i = 0, usedvecs = 0; i < nr_sets; i++) {
-		int this_vecs = affd->sets ? affd->sets[i] : affvecs;
+		unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs;
 		int ret;
 
 		ret = irq_build_affinity_masks(affd, curvec, this_vecs,
-						curvec, masks);
+					       curvec, masks);
 		if (ret) {
 			kfree(masks);
 			return NULL;
@@ -303,17 +301,17 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
  * @maxvec:	The maximum number of vectors available
  * @affd:	Description of the affinity requirements
  */
-int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
+unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
+				       const struct irq_affinity *affd)
 {
-	int resv = affd->pre_vectors + affd->post_vectors;
-	int vecs = maxvec - resv;
-	int set_vecs;
+	unsigned int resv = affd->pre_vectors + affd->post_vectors;
+	unsigned int set_vecs;
 
 	if (resv > minvec)
 		return 0;
 
 	if (affd->nr_sets) {
-		int i;
+		unsigned int i;
 
 		for (i = 0, set_vecs = 0;  i < affd->nr_sets; i++)
 			set_vecs += affd->sets[i];
@@ -323,5 +321,5 @@ int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity
 		put_online_cpus();
 	}
 
-	return resv + min(set_vecs, vecs);
+	return resv + min(set_vecs, maxvec - resv);
 }
-- 
cgit v1.2.3


From 9cfef55bb57e7620c63087be18a76351628f8d0f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 16 Feb 2019 18:13:08 +0100
Subject: genirq/affinity: Store interrupt sets size in struct irq_affinity

The interrupt affinity spreading mechanism supports to spread out
affinities for one or more interrupt sets. A interrupt set contains one
or more interrupts. Each set is mapped to a specific functionality of a
device, e.g. general I/O queues and read I/O queus of multiqueue block
devices.

The number of interrupts per set is defined by the driver. It depends on
the total number of available interrupts for the device, which is
determined by the PCI capabilites and the availability of underlying CPU
resources, and the number of queues which the device provides and the
driver wants to instantiate.

The driver passes initial configuration for the interrupt allocation via
a pointer to struct irq_affinity.

Right now the allocation mechanism is complex as it requires to have a
loop in the driver to determine the maximum number of interrupts which
are provided by the PCI capabilities and the underlying CPU resources.
This loop would have to be replicated in every driver which wants to
utilize this mechanism. That's unwanted code duplication and error
prone.

In order to move this into generic facilities it is required to have a
mechanism, which allows the recalculation of the interrupt sets and
their size, in the core code. As the core code does not have any
knowledge about the underlying device, a driver specific callback will
be added to struct affinity_desc, which will be invoked by the core
code. The callback will get the number of available interupts as an
argument, so the driver can calculate the corresponding number and size
of interrupt sets.

To support this, two modifications for the handling of struct irq_affinity
are required:

1) The (optional) interrupt sets size information is contained in a
   separate array of integers and struct irq_affinity contains a
   pointer to it.

   This is cumbersome and as the maximum number of interrupt sets is small,
   there is no reason to have separate storage. Moving the size array into
   struct affinity_desc avoids indirections and makes the code simpler.

2) At the moment the struct irq_affinity pointer which is handed in from
   the driver and passed through to several core functions is marked
   'const'.

   With the upcoming callback to recalculate the number and size of
   interrupt sets, it's necessary to remove the 'const'
   qualifier. Otherwise the callback would not be able to update the data.

Implement #1 and store the interrupt sets size in 'struct irq_affinity'.

No functional change.

[ tglx: Fixed the memcpy() size so it won't copy beyond the size of the
  	source. Fixed the kernel doc comments for struct irq_affinity and
  	de-'This patch'-ed the changelog ]

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Cc: linux-pci@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com>
Link: https://lkml.kernel.org/r/20190216172228.423723127@linutronix.de
---
 drivers/nvme/host/pci.c   |  7 +++----
 include/linux/interrupt.h |  9 ++++++---
 kernel/irq/affinity.c     | 16 ++++++++++++----
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9bc585415d9b..21ffd671b6ed 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2081,12 +2081,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
 static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
-	int irq_sets[2];
 	struct irq_affinity affd = {
-		.pre_vectors = 1,
-		.nr_sets = ARRAY_SIZE(irq_sets),
-		.sets = irq_sets,
+		.pre_vectors	= 1,
+		.nr_sets	= 2,
 	};
+	unsigned int *irq_sets = affd.set_size;
 	int result = 0;
 	unsigned int irq_queues, this_p_queues;
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7389c2011..5afdfd5dc39b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -241,20 +241,23 @@ struct irq_affinity_notify {
 	void (*release)(struct kref *ref);
 };
 
+#define	IRQ_AFFINITY_MAX_SETS  4
+
 /**
  * struct irq_affinity - Description for automatic irq affinity assignements
  * @pre_vectors:	Don't apply affinity to @pre_vectors at beginning of
  *			the MSI(-X) vector space
  * @post_vectors:	Don't apply affinity to @post_vectors at end of
  *			the MSI(-X) vector space
- * @nr_sets:		Length of passed in *sets array
- * @sets:		Number of affinitized sets
+ * @nr_sets:		The number of interrupt sets for which affinity
+ *			spreading is required
+ * @set_size:		Array holding the size of each interrupt set
  */
 struct irq_affinity {
 	unsigned int	pre_vectors;
 	unsigned int	post_vectors;
 	unsigned int	nr_sets;
-	unsigned int	*sets;
+	unsigned int	set_size[IRQ_AFFINITY_MAX_SETS];
 };
 
 /**
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 82e8799374e9..278289c091bb 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -238,9 +238,10 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
  */
 struct irq_affinity_desc *
-irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
+irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 {
 	unsigned int affvecs, curvec, usedvecs, nr_sets, i;
+	unsigned int set_size[IRQ_AFFINITY_MAX_SETS];
 	struct irq_affinity_desc *masks = NULL;
 
 	/*
@@ -250,6 +251,9 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
 	if (nvecs == affd->pre_vectors + affd->post_vectors)
 		return NULL;
 
+	if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
+		return NULL;
+
 	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		return NULL;
@@ -263,11 +267,15 @@ irq_create_affinity_masks(unsigned int nvecs, const struct irq_affinity *affd)
 	 */
 	affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
 	nr_sets = affd->nr_sets;
-	if (!nr_sets)
+	if (!nr_sets) {
 		nr_sets = 1;
+		set_size[0] = affvecs;
+	} else {
+		memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int));
+	}
 
 	for (i = 0, usedvecs = 0; i < nr_sets; i++) {
-		unsigned int this_vecs = affd->sets ? affd->sets[i] : affvecs;
+		unsigned int this_vecs = set_size[i];
 		int ret;
 
 		ret = irq_build_affinity_masks(affd, curvec, this_vecs,
@@ -314,7 +322,7 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 		unsigned int i;
 
 		for (i = 0, set_vecs = 0;  i < affd->nr_sets; i++)
-			set_vecs += affd->sets[i];
+			set_vecs += affd->set_size[i];
 	} else {
 		get_online_cpus();
 		set_vecs = cpumask_weight(cpu_possible_mask);
-- 
cgit v1.2.3


From c66d4bd110a1f8a68c1a88bfbf866eb50c6464b7 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 16 Feb 2019 18:13:09 +0100
Subject: genirq/affinity: Add new callback for (re)calculating interrupt sets

The interrupt affinity spreading mechanism supports to spread out
affinities for one or more interrupt sets. A interrupt set contains one or
more interrupts. Each set is mapped to a specific functionality of a
device, e.g. general I/O queues and read I/O queus of multiqueue block
devices.

The number of interrupts per set is defined by the driver. It depends on
the total number of available interrupts for the device, which is
determined by the PCI capabilites and the availability of underlying CPU
resources, and the number of queues which the device provides and the
driver wants to instantiate.

The driver passes initial configuration for the interrupt allocation via a
pointer to struct irq_affinity.

Right now the allocation mechanism is complex as it requires to have a loop
in the driver to determine the maximum number of interrupts which are
provided by the PCI capabilities and the underlying CPU resources.  This
loop would have to be replicated in every driver which wants to utilize
this mechanism. That's unwanted code duplication and error prone.

In order to move this into generic facilities it is required to have a
mechanism, which allows the recalculation of the interrupt sets and their
size, in the core code. As the core code does not have any knowledge about the
underlying device, a driver specific callback is required in struct
irq_affinity, which can be invoked by the core code. The callback gets the
number of available interupts as an argument, so the driver can calculate the
corresponding number and size of interrupt sets.

At the moment the struct irq_affinity pointer which is handed in from the
driver and passed through to several core functions is marked 'const', but for
the callback to be able to modify the data in the struct it's required to
remove the 'const' qualifier.

Add the optional callback to struct irq_affinity, which allows drivers to
recalculate the number and size of interrupt sets and remove the 'const'
qualifier.

For simple invocations, which do not supply a callback, a default callback
is installed, which just sets nr_sets to 1 and transfers the number of
spreadable vectors to the set_size array at index 0.

This is for now guarded by a check for nr_sets != 0 to keep the NVME driver
working until it is converted to the callback mechanism.

To make sure that the driver configuration is correct under all circumstances
the callback is invoked even when there are no interrupts for queues left,
i.e. the pre/post requirements already exhaust the numner of available
interrupts.

At the PCI layer irq_create_affinity_masks() has to be invoked even for the
case where the legacy interrupt is used. That ensures that the callback is
invoked and the device driver can adjust to that situation.

[ tglx: Fixed the simple case (no sets required). Moved the sanity check
  	for nr_sets after the invocation of the callback so it catches
  	broken drivers. Fixed the kernel doc comments for struct
  	irq_affinity and de-'This patch'-ed the changelog ]

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: linux-nvme@lists.infradead.org
Cc: linux-pci@vger.kernel.org
Cc: Keith Busch <keith.busch@intel.com>
Cc: Sumit Saxena <sumit.saxena@broadcom.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Shivasharan Srikanteshwara <shivasharan.srikanteshwara@broadcom.com>
Link: https://lkml.kernel.org/r/20190216172228.512444498@linutronix.de
---
 drivers/pci/msi.c               | 25 +++++++++++------
 drivers/scsi/be2iscsi/be_main.c |  2 +-
 include/linux/interrupt.h       | 10 +++++--
 include/linux/pci.h             |  4 +--
 kernel/irq/affinity.c           | 62 +++++++++++++++++++++++++++++------------
 5 files changed, 71 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4c0b47867258..7149d6315726 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -532,7 +532,7 @@ error_attrs:
 }
 
 static struct msi_desc *
-msi_setup_entry(struct pci_dev *dev, int nvec, const struct irq_affinity *affd)
+msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
 {
 	struct irq_affinity_desc *masks = NULL;
 	struct msi_desc *entry;
@@ -597,7 +597,7 @@ static int msi_verify_entries(struct pci_dev *dev)
  * which could have been allocated.
  */
 static int msi_capability_init(struct pci_dev *dev, int nvec,
-			       const struct irq_affinity *affd)
+			       struct irq_affinity *affd)
 {
 	struct msi_desc *entry;
 	int ret;
@@ -669,7 +669,7 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
 
 static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 			      struct msix_entry *entries, int nvec,
-			      const struct irq_affinity *affd)
+			      struct irq_affinity *affd)
 {
 	struct irq_affinity_desc *curmsk, *masks = NULL;
 	struct msi_desc *entry;
@@ -736,7 +736,7 @@ static void msix_program_entries(struct pci_dev *dev,
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
 static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
-				int nvec, const struct irq_affinity *affd)
+				int nvec, struct irq_affinity *affd)
 {
 	int ret;
 	u16 control;
@@ -932,7 +932,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
 EXPORT_SYMBOL(pci_msix_vec_count);
 
 static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
-			     int nvec, const struct irq_affinity *affd)
+			     int nvec, struct irq_affinity *affd)
 {
 	int nr_entries;
 	int i, j;
@@ -1018,7 +1018,7 @@ int pci_msi_enabled(void)
 EXPORT_SYMBOL(pci_msi_enabled);
 
 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
-				  const struct irq_affinity *affd)
+				  struct irq_affinity *affd)
 {
 	int nvec;
 	int rc;
@@ -1086,7 +1086,7 @@ EXPORT_SYMBOL(pci_enable_msi);
 
 static int __pci_enable_msix_range(struct pci_dev *dev,
 				   struct msix_entry *entries, int minvec,
-				   int maxvec, const struct irq_affinity *affd)
+				   int maxvec, struct irq_affinity *affd)
 {
 	int rc, nvec = maxvec;
 
@@ -1165,9 +1165,9 @@ EXPORT_SYMBOL(pci_enable_msix_range);
  */
 int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 				   unsigned int max_vecs, unsigned int flags,
-				   const struct irq_affinity *affd)
+				   struct irq_affinity *affd)
 {
-	static const struct irq_affinity msi_default_affd;
+	struct irq_affinity msi_default_affd = {0};
 	int msix_vecs = -ENOSPC;
 	int msi_vecs = -ENOSPC;
 
@@ -1196,6 +1196,13 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 	/* use legacy irq if allowed */
 	if (flags & PCI_IRQ_LEGACY) {
 		if (min_vecs == 1 && dev->irq) {
+			/*
+			 * Invoke the affinity spreading logic to ensure that
+			 * the device driver can adjust queue configuration
+			 * for the single interrupt case.
+			 */
+			if (affd)
+				irq_create_affinity_masks(1, affd);
 			pci_intx(dev, 1);
 			return 1;
 		}
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 74e260027c7d..76e49d902609 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -3566,7 +3566,7 @@ static void be2iscsi_enable_msix(struct beiscsi_hba *phba)
 
 	/* if eqid_count == 1 fall back to INTX */
 	if (enable_msix && nvec > 1) {
-		const struct irq_affinity desc = { .post_vectors = 1 };
+		struct irq_affinity desc = { .post_vectors = 1 };
 
 		if (pci_alloc_irq_vectors_affinity(phba->pcidev, 2, nvec,
 				PCI_IRQ_MSIX | PCI_IRQ_AFFINITY, &desc) < 0) {
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5afdfd5dc39b..dcdddf4fa76b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -252,12 +252,18 @@ struct irq_affinity_notify {
  * @nr_sets:		The number of interrupt sets for which affinity
  *			spreading is required
  * @set_size:		Array holding the size of each interrupt set
+ * @calc_sets:		Callback for calculating the number and size
+ *			of interrupt sets
+ * @priv:		Private data for usage by @calc_sets, usually a
+ *			pointer to driver/device specific data.
  */
 struct irq_affinity {
 	unsigned int	pre_vectors;
 	unsigned int	post_vectors;
 	unsigned int	nr_sets;
 	unsigned int	set_size[IRQ_AFFINITY_MAX_SETS];
+	void		(*calc_sets)(struct irq_affinity *, unsigned int nvecs);
+	void		*priv;
 };
 
 /**
@@ -317,7 +323,7 @@ extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
 struct irq_affinity_desc *
-irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd);
+irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd);
 
 unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 				       const struct irq_affinity *affd);
@@ -354,7 +360,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
 }
 
 static inline struct irq_affinity_desc *
-irq_create_affinity_masks(unsigned int nvec, const struct irq_affinity *affd)
+irq_create_affinity_masks(unsigned int nvec, struct irq_affinity *affd)
 {
 	return NULL;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 65f1d8c2f082..e7c51b00cdfe 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1393,7 +1393,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
 }
 int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 				   unsigned int max_vecs, unsigned int flags,
-				   const struct irq_affinity *affd);
+				   struct irq_affinity *affd);
 
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
@@ -1419,7 +1419,7 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
 static inline int
 pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
 			       unsigned int max_vecs, unsigned int flags,
-			       const struct irq_affinity *aff_desc)
+			       struct irq_affinity *aff_desc)
 {
 	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq)
 		return 1;
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 278289c091bb..d737dc60ab52 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -230,6 +230,12 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
 	return ret;
 }
 
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+{
+	affd->nr_sets = 1;
+	affd->set_size[0] = affvecs;
+}
+
 /**
  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
  * @nvecs:	The total number of vectors
@@ -240,20 +246,46 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
 struct irq_affinity_desc *
 irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 {
-	unsigned int affvecs, curvec, usedvecs, nr_sets, i;
-	unsigned int set_size[IRQ_AFFINITY_MAX_SETS];
+	unsigned int affvecs, curvec, usedvecs, i;
 	struct irq_affinity_desc *masks = NULL;
 
 	/*
-	 * If there aren't any vectors left after applying the pre/post
-	 * vectors don't bother with assigning affinity.
+	 * Determine the number of vectors which need interrupt affinities
+	 * assigned. If the pre/post request exhausts the available vectors
+	 * then nothing to do here except for invoking the calc_sets()
+	 * callback so the device driver can adjust to the situation. If there
+	 * is only a single vector, then managing the queue is pointless as
+	 * well.
 	 */
-	if (nvecs == affd->pre_vectors + affd->post_vectors)
-		return NULL;
+	if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors)
+		affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+	else
+		affvecs = 0;
+
+	/*
+	 * Simple invocations do not provide a calc_sets() callback. Install
+	 * the generic one. The check for affd->nr_sets is a temporary
+	 * workaround and will be removed after the NVME driver is converted
+	 * over.
+	 */
+	if (!affd->nr_sets && !affd->calc_sets)
+		affd->calc_sets = default_calc_sets;
+
+	/*
+	 * If the device driver provided a calc_sets() callback let it
+	 * recalculate the number of sets and their size. The check will go
+	 * away once the NVME driver is converted over.
+	 */
+	if (affd->calc_sets)
+		affd->calc_sets(affd, affvecs);
 
 	if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
 		return NULL;
 
+	/* Nothing to assign? */
+	if (!affvecs)
+		return NULL;
+
 	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		return NULL;
@@ -261,21 +293,13 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
 	/* Fill out vectors at the beginning that don't need affinity */
 	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 		cpumask_copy(&masks[curvec].mask, irq_default_affinity);
+
 	/*
 	 * Spread on present CPUs starting from affd->pre_vectors. If we
 	 * have multiple sets, build each sets affinity mask separately.
 	 */
-	affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
-	nr_sets = affd->nr_sets;
-	if (!nr_sets) {
-		nr_sets = 1;
-		set_size[0] = affvecs;
-	} else {
-		memcpy(set_size, affd->set_size, nr_sets * sizeof(unsigned int));
-	}
-
-	for (i = 0, usedvecs = 0; i < nr_sets; i++) {
-		unsigned int this_vecs = set_size[i];
+	for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+		unsigned int this_vecs = affd->set_size[i];
 		int ret;
 
 		ret = irq_build_affinity_masks(affd, curvec, this_vecs,
@@ -318,7 +342,9 @@ unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
 	if (resv > minvec)
 		return 0;
 
-	if (affd->nr_sets) {
+	if (affd->calc_sets) {
+		set_vecs = maxvec - resv;
+	} else if (affd->nr_sets) {
 		unsigned int i;
 
 		for (i = 0, set_vecs = 0;  i < affd->nr_sets; i++)
-- 
cgit v1.2.3


From 1757d05f3112acc5c0cdbcccad3afdee99655bf9 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Sun, 17 Feb 2019 11:54:14 +0800
Subject: ACPI / CPPC: Add a helper to get desired performance

This patch add a helper to get the value of desired performance
register.

Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
[ rjw: More white space ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/cppc_acpi.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/acpi/cppc_acpi.h |  1 +
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 217a782c3e55..1b207fca1420 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1050,6 +1050,48 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 	return ret_val;
 }
 
+/**
+ * cppc_get_desired_perf - Get the value of desired performance register.
+ * @cpunum: CPU from which to get desired performance.
+ * @desired_perf: address of a variable to store the returned desired performance
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+{
+	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
+	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
+	struct cpc_register_resource *desired_reg;
+	struct cppc_pcc_data *pcc_ss_data = NULL;
+
+	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+
+	if (CPC_IN_PCC(desired_reg)) {
+		int ret = 0;
+
+		if (pcc_ss_id < 0)
+			return -EIO;
+
+		pcc_ss_data = pcc_data[pcc_ss_id];
+
+		down_write(&pcc_ss_data->pcc_lock);
+
+		if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
+			cpc_read(cpunum, desired_reg, desired_perf);
+		else
+			ret = -EIO;
+
+		up_write(&pcc_ss_data->pcc_lock);
+
+		return ret;
+	}
+
+	cpc_read(cpunum, desired_reg, desired_perf);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
+
 /**
  * cppc_get_perf_caps - Get a CPUs performance capabilities.
  * @cpunum: CPU from which to get capabilities info.
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 4f34734e7f36..ba6fd7202775 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -137,6 +137,7 @@ struct cppc_cpudata {
 	cpumask_var_t shared_cpu_map;
 };
 
+extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf);
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
-- 
cgit v1.2.3


From feee96440c9c5fdf47f8c8079c104fc8082924a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 Feb 2019 08:01:27 +0100
Subject: swiotlb: remove swiotlb_dma_supported

The only user left is powerpc, but even there the generic dma-direct
version works just as well, given that we guarantee that the swiotlb
buffer must always be addressable.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Christian Zigotzky <chzigotzky@xenosoft.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/dma-swiotlb.c |  2 +-
 include/linux/swiotlb.h           |  3 ---
 kernel/dma/swiotlb.c              | 12 ------------
 3 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index d5950a0cb758..6d2677b2daa6 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -36,7 +36,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
 	.free = __dma_nommu_free_coherent,
 	.map_sg = dma_direct_map_sg,
 	.unmap_sg = dma_direct_unmap_sg,
-	.dma_supported = swiotlb_dma_supported,
+	.dma_supported = dma_direct_supported,
 	.map_page = dma_direct_map_page,
 	.unmap_page = dma_direct_unmap_page,
 	.sync_single_for_cpu = dma_direct_sync_single_for_cpu,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7c007ed7505f..54254388899e 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -60,9 +60,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 				    size_t size, enum dma_data_direction dir,
 				    enum dma_sync_target target);
 
-extern int
-swiotlb_dma_supported(struct device *hwdev, u64 mask);
-
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
 extern phys_addr_t io_tlb_start, io_tlb_end;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d6361776dc5c..cbf3498a46f9 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -648,15 +648,3 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
 
 	return true;
 }
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
-}
-- 
cgit v1.2.3


From f7db89accc9c51d8f765d79b8e9557cc623ec20e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 9 Jan 2019 13:15:23 +0100
Subject: fsnotify: Create function to remove event from notification list

Create function to remove event from the notification list. Later it will
be used from more places.

Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/notification.c         | 20 +++++++++++++-------
 include/linux/fsnotify_backend.h |  3 +++
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 027d5d5bb90e..5f3a54d444b5 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -141,6 +141,18 @@ queue:
 	return ret;
 }
 
+void fsnotify_remove_queued_event(struct fsnotify_group *group,
+				  struct fsnotify_event *event)
+{
+	assert_spin_locked(&group->notification_lock);
+	/*
+	 * We need to init list head for the case of overflow event so that
+	 * check in fsnotify_add_event() works
+	 */
+	list_del_init(&event->list);
+	group->q_len--;
+}
+
 /*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
@@ -155,13 +167,7 @@ struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group)
 
 	event = list_first_entry(&group->notification_list,
 				 struct fsnotify_event, list);
-	/*
-	 * We need to init list head for the case of overflow event so that
-	 * check in fsnotify_add_event() works
-	 */
-	list_del_init(&event->list);
-	group->q_len--;
-
+	fsnotify_remove_queued_event(group, event);
 	return event;
 }
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7b93f15b4944..dfc28fcb4de8 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -422,6 +422,9 @@ extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 extern struct fsnotify_event *fsnotify_peek_first_event(struct fsnotify_group *group);
 /* return AND dequeue the first event on the notification queue */
 extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group *group);
+/* Remove event queued in the notification list */
+extern void fsnotify_remove_queued_event(struct fsnotify_group *group,
+					 struct fsnotify_event *event);
 
 /* functions used to manipulate the marks attached to inodes */
 
-- 
cgit v1.2.3


From ba4fda620a5f7db521aa9e0262cf49854c1b1d9c Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 18 Feb 2019 10:58:21 +0000
Subject: drm/i915: Optionally disable automatic recovery after a GPU reset

Some clients, such as mesa, may only emit minimal incremental batches
that rely on the logical context state from previous batches. They know
that recovery is impossible after a hang as their required GPU state is
lost, and that each in flight and subsequent batch will hang (resetting
the context image back to default perpetuating the problem).

To avoid getting into the state in the first place, we can allow clients
to opt out of automatic recovery and elect to ban any guilty context
following a hang. This prevents the continual stream of hangs and allows
the client to recreate their context and rebuild the state from scratch.

v2: Prefer calling it recoverable rather than unrecoverable.

References: https://lists.freedesktop.org/archives/mesa-dev/2019-February/215431.html
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Acked-by: Kenneth Graunke <kenneth@whitecape.org> # for mesa
Link: https://patchwork.freedesktop.org/patch/msgid/20190218105821.17293-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_context.c | 15 +++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h | 16 ++++++++++++++++
 drivers/gpu/drm/i915/i915_reset.c       |  3 ++-
 include/uapi/drm/i915_drm.h             | 20 ++++++++++++++++++++
 4 files changed, 53 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 280813a4bf82..da21c843fed8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -401,6 +401,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
 
 	i915_gem_context_set_bannable(ctx);
+	i915_gem_context_set_recoverable(ctx);
+
 	ctx->ring_size = 4 * PAGE_SIZE;
 	ctx->desc_template =
 		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
@@ -951,6 +953,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->size = 0;
 		args->value = i915_gem_context_is_bannable(ctx);
 		break;
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->size = 0;
 		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
@@ -1285,6 +1291,15 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 			i915_gem_context_clear_bannable(ctx);
 		break;
 
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			i915_gem_context_set_recoverable(ctx);
+		else
+			i915_gem_context_clear_recoverable(ctx);
+		break;
+
 	case I915_CONTEXT_PARAM_PRIORITY:
 		{
 			s64 priority = args->value;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index ca150a764c24..071108d34ae0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -134,6 +134,7 @@ struct i915_gem_context {
 #define UCONTEXT_NO_ZEROMAP		0
 #define UCONTEXT_NO_ERROR_CAPTURE	1
 #define UCONTEXT_BANNABLE		2
+#define UCONTEXT_RECOVERABLE		3
 
 	/**
 	 * @flags: small set of booleans
@@ -270,6 +271,21 @@ static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx)
 	clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
 }
 
+static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
 static inline bool i915_gem_context_is_banned(const struct i915_gem_context *ctx)
 {
 	return test_bit(CONTEXT_BANNED, &ctx->flags);
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c
index 5a067a4b3d5d..1911e00d2581 100644
--- a/drivers/gpu/drm/i915/i915_reset.c
+++ b/drivers/gpu/drm/i915/i915_reset.c
@@ -66,7 +66,8 @@ static bool context_mark_guilty(struct i915_gem_context *ctx)
 
 	bannable = i915_gem_context_is_bannable(ctx);
 	score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
-	banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+	banned = (!i915_gem_context_is_recoverable(ctx) ||
+		  score >= CONTEXT_SCORE_BAN_THRESHOLD);
 
 	/* Cool contexts don't accumulate client ban score */
 	if (!bannable)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 397810fa2d33..c890b7992d5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1491,6 +1491,26 @@ struct drm_i915_gem_context_param {
 	 * drm_i915_gem_context_param_sseu.
 	 */
 #define I915_CONTEXT_PARAM_SSEU		0x7
+
+/*
+ * Not all clients may want to attempt automatic recover of a context after
+ * a hang (for example, some clients may only submit very small incremental
+ * batches relying on known logical state of previous batches which will never
+ * recover correctly and each attempt will hang), and so would prefer that
+ * the context is forever banned instead.
+ *
+ * If set to false (0), after a reset, subsequent (and in flight) rendering
+ * from this context is discarded, and the client will need to create a new
+ * context to use instead.
+ *
+ * If set to true (1), the kernel will automatically attempt to recover the
+ * context by skipping the hanging batch and executing the next batch starting
+ * from the default context state (discarding the incomplete logical context
+ * state lost due to the reset).
+ *
+ * On creation, all new contexts are marked as recoverable.
+ */
+#define I915_CONTEXT_PARAM_RECOVERABLE	0x8
 	__u64 value;
 };
 
-- 
cgit v1.2.3


From 0d0216c03a7a14e121abb2e3eb38e491767c36e8 Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Wed, 16 May 2018 11:18:48 +0300
Subject: compat ABI: use non-compat openat and open_by_handle_at variants

The only difference between native and compat openat and open_by_handle_at
is that non-compat version forces O_LARGEFILE, and it should be the
default behaviour for all architectures, as we are going to drop the
support of 32-bit userspace off_t.

Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Yury Norov <ynorov@marvell.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index acf9a07ab2ff..b928eff3bf92 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -179,7 +179,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat)
 #define __NR_fchown 55
 __SYSCALL(__NR_fchown, sys_fchown)
 #define __NR_openat 56
-__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
+__SYSCALL(__NR_openat, sys_openat)
 #define __NR_close 57
 __SYSCALL(__NR_close, sys_close)
 #define __NR_vhangup 58
@@ -678,8 +678,7 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
 #define __NR_name_to_handle_at         264
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 #define __NR_open_by_handle_at         265
-__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
-	  compat_sys_open_by_handle_at)
+__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
 #define __NR_clock_adjtime 266
 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
 #define __NR_syncfs 267
-- 
cgit v1.2.3


From 3d769df5fc32dc3cb12d6ccb61690b09371b8b3f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 8 Feb 2019 03:49:23 -0500
Subject: media: v4l2-subdev.h: v4l2_subdev_call: use temp __sd variable

The sd argument of this macro can be a more complex expression. Since it
is used 5 times in the macro it can be evaluated that many times as well.

So assign it to a temp variable in the beginning and use that instead.

This also avoids any potential side-effects of evaluating sd.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/v4l2-subdev.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 47af609dc8f1..34da094a3f40 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -1093,13 +1093,14 @@ void v4l2_subdev_init(struct v4l2_subdev *sd,
  */
 #define v4l2_subdev_call(sd, o, f, args...)				\
 	({								\
+		struct v4l2_subdev *__sd = (sd);			\
 		int __result;						\
-		if (!(sd))						\
+		if (!__sd)						\
 			__result = -ENODEV;				\
-		else if (!((sd)->ops->o && (sd)->ops->o->f))		\
+		else if (!(__sd->ops->o && __sd->ops->o->f))		\
 			__result = -ENOIOCTLCMD;			\
 		else							\
-			__result = (sd)->ops->o->f((sd), ##args);	\
+			__result = __sd->ops->o->f(__sd, ##args);	\
 		__result;						\
 	})
 
-- 
cgit v1.2.3


From 852661a8691fd1f974b61eaa317d2620198f7c76 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 8 Feb 2019 01:02:58 -0500
Subject: drm: tweak header name

Use linux/mman.h to make sure we get all mmap flags we need.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/drm/drmP.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index bdb0d5548f39..a3184416ddc5 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -57,8 +57,7 @@
 #include <linux/workqueue.h>
 #include <linux/dma-fence.h>
 #include <linux/module.h>
-
-#include <asm/mman.h>
+#include <linux/mman.h>
 #include <asm/pgalloc.h>
 #include <linux/uaccess.h>
 
-- 
cgit v1.2.3


From 746c9398f5ac2b3f5730da4ed09e99ef4bb50b4a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 8 Feb 2019 01:02:55 -0500
Subject: arch: move common mmap flags to linux/mman.h

Now that we have 3 mmap flags shared by all architectures,
let's move them into the common header.

This will help discourage future architectures from duplicating code.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/uapi/asm/mman.h     | 4 +---
 arch/mips/include/uapi/asm/mman.h      | 4 +---
 arch/parisc/include/uapi/asm/mman.h    | 4 +---
 arch/xtensa/include/uapi/asm/mman.h    | 4 +---
 include/uapi/asm-generic/mman-common.h | 4 +---
 include/uapi/linux/mman.h              | 4 ++++
 6 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index f9d4e6b6d4bd..ac23379b7a87 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -10,9 +10,7 @@
 #define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
 #define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
 #define MAP_TYPE	0x0f		/* Mask for type of mapping (OSF/1 is _wrong_) */
 #define MAP_FIXED	0x100		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x10		/* don't use a file */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 3035ca499cd8..c2b40969eb1f 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -27,9 +27,7 @@
 /*
  * Flags for mmap
  */
-#define MAP_SHARED	0x001		/* Share changes */
-#define MAP_PRIVATE	0x002		/* Changes are private */
-#define MAP_SHARED_VALIDATE 0x003	/* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
 #define MAP_TYPE	0x00f		/* Mask for type of mapping */
 #define MAP_FIXED	0x010		/* Interpret addr exactly */
 
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 870fbf8c7088..c98162f494db 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -10,9 +10,7 @@
 #define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
 #define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
 #define MAP_TYPE	0x2b		/* Mask for type of mapping, includes bits 0x08 and 0x20 */
 #define MAP_FIXED	0x04		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x10		/* don't use a file */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 58f29a9d895d..be726062412b 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -34,9 +34,7 @@
 /*
  * Flags for mmap
  */
-#define MAP_SHARED	0x001		/* Share changes */
-#define MAP_PRIVATE	0x002		/* Changes are private */
-#define MAP_SHARED_VALIDATE 0x003	/* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
 #define MAP_TYPE	0x00f		/* Mask for type of mapping */
 #define MAP_FIXED	0x010		/* Interpret addr exactly */
 
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index e7ee32861d51..abd238d0f7a4 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -15,9 +15,7 @@
 #define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
 #define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
 
-#define MAP_SHARED	0x01		/* Share changes */
-#define MAP_PRIVATE	0x02		/* Changes are private */
-#define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+/* 0x01 - 0x03 are defined in linux/mman.h */
 #define MAP_TYPE	0x0f		/* Mask for type of mapping */
 #define MAP_FIXED	0x10		/* Interpret addr exactly */
 #define MAP_ANONYMOUS	0x20		/* don't use a file */
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index d0f515d53299..fc1a64c3447b 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -12,6 +12,10 @@
 #define OVERCOMMIT_ALWAYS		1
 #define OVERCOMMIT_NEVER		2
 
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03	/* share + validate extension flags */
+
 /*
  * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
  * size other than the default is desired.  See hugetlb_encode.h.
-- 
cgit v1.2.3


From 35c0272502cca0a1b461d310c23aac94a503983d Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Sat, 16 Feb 2019 10:34:59 +0530
Subject: drm/audio: declaration of struct device

Header has used the references to struct device without it definition
or declaration. Hence resulting in compilation warning such as

  "'struct device' declared inside parameter list..."

This changes adds a declaration to struct device in the header to avoid
any such warnings.

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
cc: Takashi Iwai <tiwai@suse.de>
cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550293499-5560-1-git-send-email-ramalingam.c@intel.com
---
 include/drm/drm_audio_component.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_audio_component.h b/include/drm/drm_audio_component.h
index 4923b00328c1..93a386be38fa 100644
--- a/include/drm/drm_audio_component.h
+++ b/include/drm/drm_audio_component.h
@@ -5,6 +5,7 @@
 #define _DRM_AUDIO_COMPONENT_H_
 
 struct drm_audio_component;
+struct device;
 
 /**
  * struct drm_audio_component_ops - Ops implemented by DRM driver, called by hda driver
-- 
cgit v1.2.3


From a7fe4ca72b1fe7877de5672640d0b4e023d0fdca Mon Sep 17 00:00:00 2001
From: Vivek Kasireddy <vivek.kasireddy@intel.com>
Date: Thu, 7 Feb 2019 22:18:43 -0500
Subject: media: v4l: Add 32-bit packed YUV formats

The formats added in this patch include:
 V4L2_PIX_FMT_AYUV32
 V4L2_PIX_FMT_XYUV32
 V4L2_PIX_FMT_VUYA32
 V4L2_PIX_FMT_VUYX32

These formats enable the trasmission of alpha channel data to other
drivers and userspace applications in addition to YUV data. For
example, buffers generated by drivers in one of these formats
can be used by the Weston compositor to display as a texture or
flipped directly onto the overlay planes with the help of a DRM
driver.

Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst | 170 ++++++++++++++++++++-
 drivers/media/v4l2-core/v4l2-ioctl.c               |   4 +
 include/uapi/linux/videodev2.h                     |   4 +
 3 files changed, 177 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst b/Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst
index f53e8f57a003..7fcee1c11ac4 100644
--- a/Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-packed-yuv.rst
@@ -190,6 +190,170 @@ component of each pixel in one 16 or 32 bit word.
       - Cr\ :sub:`2`
       - Cr\ :sub:`1`
       - Cr\ :sub:`0`
+      -
+    * .. _V4L2-PIX-FMT-AYUV32:
+
+      - ``V4L2_PIX_FMT_AYUV32``
+      - 'AYUV'
+
+      - a\ :sub:`7`
+      - a\ :sub:`6`
+      - a\ :sub:`5`
+      - a\ :sub:`4`
+      - a\ :sub:`3`
+      - a\ :sub:`2`
+      - a\ :sub:`1`
+      - a\ :sub:`0`
+
+      - Y'\ :sub:`7`
+      - Y'\ :sub:`6`
+      - Y'\ :sub:`5`
+      - Y'\ :sub:`4`
+      - Y'\ :sub:`3`
+      - Y'\ :sub:`2`
+      - Y'\ :sub:`1`
+      - Y'\ :sub:`0`
+
+      - Cb\ :sub:`7`
+      - Cb\ :sub:`6`
+      - Cb\ :sub:`5`
+      - Cb\ :sub:`4`
+      - Cb\ :sub:`3`
+      - Cb\ :sub:`2`
+      - Cb\ :sub:`1`
+      - Cb\ :sub:`0`
+
+      - Cr\ :sub:`7`
+      - Cr\ :sub:`6`
+      - Cr\ :sub:`5`
+      - Cr\ :sub:`4`
+      - Cr\ :sub:`3`
+      - Cr\ :sub:`2`
+      - Cr\ :sub:`1`
+      - Cr\ :sub:`0`
+      -
+    * .. _V4L2-PIX-FMT-XYUV32:
+
+      - ``V4L2_PIX_FMT_XYUV32``
+      - 'XYUV'
+
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+
+      - Y'\ :sub:`7`
+      - Y'\ :sub:`6`
+      - Y'\ :sub:`5`
+      - Y'\ :sub:`4`
+      - Y'\ :sub:`3`
+      - Y'\ :sub:`2`
+      - Y'\ :sub:`1`
+      - Y'\ :sub:`0`
+
+      - Cb\ :sub:`7`
+      - Cb\ :sub:`6`
+      - Cb\ :sub:`5`
+      - Cb\ :sub:`4`
+      - Cb\ :sub:`3`
+      - Cb\ :sub:`2`
+      - Cb\ :sub:`1`
+      - Cb\ :sub:`0`
+
+      - Cr\ :sub:`7`
+      - Cr\ :sub:`6`
+      - Cr\ :sub:`5`
+      - Cr\ :sub:`4`
+      - Cr\ :sub:`3`
+      - Cr\ :sub:`2`
+      - Cr\ :sub:`1`
+      - Cr\ :sub:`0`
+      -
+    * .. _V4L2-PIX-FMT-VUYA32:
+
+      - ``V4L2_PIX_FMT_VUYA32``
+      - 'VUYA'
+
+      - Cr\ :sub:`7`
+      - Cr\ :sub:`6`
+      - Cr\ :sub:`5`
+      - Cr\ :sub:`4`
+      - Cr\ :sub:`3`
+      - Cr\ :sub:`2`
+      - Cr\ :sub:`1`
+      - Cr\ :sub:`0`
+
+      - Cb\ :sub:`7`
+      - Cb\ :sub:`6`
+      - Cb\ :sub:`5`
+      - Cb\ :sub:`4`
+      - Cb\ :sub:`3`
+      - Cb\ :sub:`2`
+      - Cb\ :sub:`1`
+      - Cb\ :sub:`0`
+
+      - Y'\ :sub:`7`
+      - Y'\ :sub:`6`
+      - Y'\ :sub:`5`
+      - Y'\ :sub:`4`
+      - Y'\ :sub:`3`
+      - Y'\ :sub:`2`
+      - Y'\ :sub:`1`
+      - Y'\ :sub:`0`
+
+      - a\ :sub:`7`
+      - a\ :sub:`6`
+      - a\ :sub:`5`
+      - a\ :sub:`4`
+      - a\ :sub:`3`
+      - a\ :sub:`2`
+      - a\ :sub:`1`
+      - a\ :sub:`0`
+      -
+    * .. _V4L2-PIX-FMT-VUYX32:
+
+      - ``V4L2_PIX_FMT_VUYX32``
+      - 'VUYX'
+
+      - Cr\ :sub:`7`
+      - Cr\ :sub:`6`
+      - Cr\ :sub:`5`
+      - Cr\ :sub:`4`
+      - Cr\ :sub:`3`
+      - Cr\ :sub:`2`
+      - Cr\ :sub:`1`
+      - Cr\ :sub:`0`
+
+      - Cb\ :sub:`7`
+      - Cb\ :sub:`6`
+      - Cb\ :sub:`5`
+      - Cb\ :sub:`4`
+      - Cb\ :sub:`3`
+      - Cb\ :sub:`2`
+      - Cb\ :sub:`1`
+      - Cb\ :sub:`0`
+
+      - Y'\ :sub:`7`
+      - Y'\ :sub:`6`
+      - Y'\ :sub:`5`
+      - Y'\ :sub:`4`
+      - Y'\ :sub:`3`
+      - Y'\ :sub:`2`
+      - Y'\ :sub:`1`
+      - Y'\ :sub:`0`
+
+      -
+      -
+      -
+      -
+      -
+      -
+      -
+      -
 
 .. raw:: latex
 
@@ -202,4 +366,8 @@ component of each pixel in one 16 or 32 bit word.
     #) The value of a = alpha bits is undefined when reading from the driver,
        ignored when writing to the driver, except when alpha blending has
        been negotiated for a :ref:`Video Overlay <overlay>` or
-       :ref:`Video Output Overlay <osd>`.
+       :ref:`Video Output Overlay <osd>` for the formats Y444, YUV555 and
+       YUV4. However, for formats AYUV32 and VUYA32, the alpha component is
+       expected to contain a meaningful value that can be used by drivers
+       and applications. And, the formats XYUV32 and VUYX32 contain undefined
+       alpha values that must be ignored by all applications and drivers.
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 206b7348797e..6f707466b5d2 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1220,6 +1220,10 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_PIX_FMT_YUV555:	descr = "16-bit A/XYUV 1-5-5-5"; break;
 	case V4L2_PIX_FMT_YUV565:	descr = "16-bit YUV 5-6-5"; break;
 	case V4L2_PIX_FMT_YUV32:	descr = "32-bit A/XYUV 8-8-8-8"; break;
+	case V4L2_PIX_FMT_AYUV32:	descr = "32-bit AYUV 8-8-8-8"; break;
+	case V4L2_PIX_FMT_XYUV32:	descr = "32-bit XYUV 8-8-8-8"; break;
+	case V4L2_PIX_FMT_VUYA32:	descr = "32-bit VUYA 8-8-8-8"; break;
+	case V4L2_PIX_FMT_VUYX32:	descr = "32-bit VUYX 8-8-8-8"; break;
 	case V4L2_PIX_FMT_YUV410:	descr = "Planar YUV 4:1:0"; break;
 	case V4L2_PIX_FMT_YUV420:	descr = "Planar YUV 4:2:0"; break;
 	case V4L2_PIX_FMT_HI240:	descr = "8-bit Dithered RGB (BTTV)"; break;
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9a920f071ff9..1db220da3bcc 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -562,6 +562,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
 #define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
 #define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
+#define V4L2_PIX_FMT_AYUV32  v4l2_fourcc('A', 'Y', 'U', 'V') /* 32  AYUV-8-8-8-8  */
+#define V4L2_PIX_FMT_XYUV32  v4l2_fourcc('X', 'Y', 'U', 'V') /* 32  XYUV-8-8-8-8  */
+#define V4L2_PIX_FMT_VUYA32  v4l2_fourcc('V', 'U', 'Y', 'A') /* 32  VUYA-8-8-8-8  */
+#define V4L2_PIX_FMT_VUYX32  v4l2_fourcc('V', 'U', 'Y', 'X') /* 32  VUYX-8-8-8-8  */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
 #define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 #define V4L2_PIX_FMT_M420    v4l2_fourcc('M', '4', '2', '0') /* 12  YUV 4:2:0 2 lines y, 1 line uv interleaved */
-- 
cgit v1.2.3


From cfc7740835d09b98a304a4793c93b6074c04379e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 4 Feb 2019 05:11:32 -0500
Subject: media: vb2: replace bool by bitfield in vb2_buffer

The bool type is not recommended for use in structs, so replace these
by bitfields.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-core.c | 12 ++++++------
 include/media/videobuf2-core.h                  |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index 34cc87ca8d59..b4457edc3245 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -934,7 +934,7 @@ void vb2_buffer_done(struct vb2_buffer *vb, enum vb2_buffer_state state)
 		/* sync buffers */
 		for (plane = 0; plane < vb->num_planes; ++plane)
 			call_void_memop(vb, finish, vb->planes[plane].mem_priv);
-		vb->synced = false;
+		vb->synced = 0;
 	}
 
 	spin_lock_irqsave(&q->done_lock, flags);
@@ -1313,8 +1313,8 @@ static int __buf_prepare(struct vb2_buffer *vb)
 	for (plane = 0; plane < vb->num_planes; ++plane)
 		call_void_memop(vb, prepare, vb->planes[plane].mem_priv);
 
-	vb->synced = true;
-	vb->prepared = true;
+	vb->synced = 1;
+	vb->prepared = 1;
 	vb->state = orig_state;
 
 	return 0;
@@ -1802,7 +1802,7 @@ int vb2_core_dqbuf(struct vb2_queue *q, unsigned int *pindex, void *pb,
 	}
 
 	call_void_vb_qop(vb, buf_finish, vb);
-	vb->prepared = false;
+	vb->prepared = 0;
 
 	if (pindex)
 		*pindex = vb->index;
@@ -1926,12 +1926,12 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
 			for (plane = 0; plane < vb->num_planes; ++plane)
 				call_void_memop(vb, finish,
 						vb->planes[plane].mem_priv);
-			vb->synced = false;
+			vb->synced = 0;
 		}
 
 		if (vb->prepared) {
 			call_void_vb_qop(vb, buf_finish, vb);
-			vb->prepared = false;
+			vb->prepared = 0;
 		}
 		__vb2_dqbuf(vb);
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 06142c1469cc..6d5490bb91d3 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -269,8 +269,8 @@ struct vb2_buffer {
 	 * vb2_plane:		per-plane information; do not change
 	 */
 	enum vb2_buffer_state	state;
-	bool			synced;
-	bool			prepared;
+	unsigned int		synced:1;
+	unsigned int		prepared:1;
 
 	struct vb2_plane	planes[VB2_MAX_PLANES];
 	struct list_head	queued_entry;
-- 
cgit v1.2.3


From 7e4e71624491d8a8befe62b43138beb0ab696006 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Mon, 4 Feb 2019 05:11:33 -0500
Subject: media: vb2: keep track of timestamp status

If a stream is stopped, or if a USERPTR/DMABUF buffer is queued
backed by a different user address or dmabuf fd, then the timestamp
should be skipped by vb2_find_timestamp since the memory it refers
to is no longer valid.

So keep track of a 'copied_timestamp' state: it is set when the
timestamp is copied from an output to a capture buffer, and is
cleared when it is no longer valid.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/videobuf2/videobuf2-core.c | 3 +++
 drivers/media/common/videobuf2/videobuf2-v4l2.c | 3 ++-
 drivers/media/v4l2-core/v4l2-mem2mem.c          | 1 +
 include/media/videobuf2-core.h                  | 3 +++
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index b4457edc3245..275cddd8808c 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -1041,6 +1041,7 @@ static int __prepare_userptr(struct vb2_buffer *vb)
 		if (vb->planes[plane].mem_priv) {
 			if (!reacquired) {
 				reacquired = true;
+				vb->copied_timestamp = 0;
 				call_void_vb_qop(vb, buf_cleanup, vb);
 			}
 			call_void_memop(vb, put_userptr, vb->planes[plane].mem_priv);
@@ -1165,6 +1166,7 @@ static int __prepare_dmabuf(struct vb2_buffer *vb)
 
 		if (!reacquired) {
 			reacquired = true;
+			vb->copied_timestamp = 0;
 			call_void_vb_qop(vb, buf_cleanup, vb);
 		}
 
@@ -1942,6 +1944,7 @@ static void __vb2_queue_cancel(struct vb2_queue *q)
 		if (vb->request)
 			media_request_put(vb->request);
 		vb->request = NULL;
+		vb->copied_timestamp = 0;
 	}
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 3aeaea3af42a..55277370c313 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -604,7 +604,8 @@ int vb2_find_timestamp(const struct vb2_queue *q, u64 timestamp,
 	unsigned int i;
 
 	for (i = start_idx; i < q->num_buffers; i++)
-		if (q->bufs[i]->timestamp == timestamp)
+		if (q->bufs[i]->copied_timestamp &&
+		    q->bufs[i]->timestamp == timestamp)
 			return i;
 	return -1;
 }
diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 1494d0d5951a..add228c98a9a 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -992,6 +992,7 @@ void v4l2_m2m_buf_copy_metadata(const struct vb2_v4l2_buffer *out_vb,
 	cap_vb->field = out_vb->field;
 	cap_vb->flags &= ~mask;
 	cap_vb->flags |= out_vb->flags & mask;
+	cap_vb->vb2_buf.copied_timestamp = 1;
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_buf_copy_metadata);
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 6d5490bb91d3..a844abcae71e 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -262,6 +262,8 @@ struct vb2_buffer {
 	 * prepared:		this buffer has been prepared, i.e. the
 	 *			buf_prepare op was called. It is cleared again
 	 *			after the 'buf_finish' op is called.
+	 * copied_timestamp:	the timestamp of this capture buffer was copied
+	 *			from an output buffer.
 	 * queued_entry:	entry on the queued buffers list, which holds
 	 *			all buffers queued from userspace
 	 * done_entry:		entry on the list that stores all buffers ready
@@ -271,6 +273,7 @@ struct vb2_buffer {
 	enum vb2_buffer_state	state;
 	unsigned int		synced:1;
 	unsigned int		prepared:1;
+	unsigned int		copied_timestamp:1;
 
 	struct vb2_plane	planes[VB2_MAX_PLANES];
 	struct list_head	queued_entry;
-- 
cgit v1.2.3


From 8dd22b289c8fab1dcb13ab038c0228aa74f134ca Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Fri, 8 Feb 2019 11:17:48 -0500
Subject: media: v4l2-mem2mem: Correct return type for mem2mem buffer helpers

This commit changes the return type of mem2mem buffer handling API.
Namely, these functions:

 v4l2_m2m_next_buf
 v4l2_m2m_last_buf
 v4l2_m2m_buf_remove
 v4l2_m2m_next_src_buf
 v4l2_m2m_next_dst_buf
 v4l2_m2m_last_src_buf
 v4l2_m2m_last_dst_buf
 v4l2_m2m_src_buf_remove
 v4l2_m2m_dst_buf_remove

which currently return void pointer.

In every case, the actual return type is a struct vb2_v4l2_buffer
pointer. Change the return type of the listed functions,
so type checking can be properly used.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
[hverkuil-cisco@xs4all.nl: clean up line-too-long checkpatch warnings]
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-mem2mem.c |  6 +++---
 include/media/v4l2-mem2mem.h           | 24 +++++++++++++++---------
 2 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c
index 57eccbaa52de..3392833d9541 100644
--- a/drivers/media/v4l2-core/v4l2-mem2mem.c
+++ b/drivers/media/v4l2-core/v4l2-mem2mem.c
@@ -131,7 +131,7 @@ struct vb2_queue *v4l2_m2m_get_vq(struct v4l2_m2m_ctx *m2m_ctx,
 }
 EXPORT_SYMBOL(v4l2_m2m_get_vq);
 
-void *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx)
+struct vb2_v4l2_buffer *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx)
 {
 	struct v4l2_m2m_buffer *b;
 	unsigned long flags;
@@ -149,7 +149,7 @@ void *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx)
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_next_buf);
 
-void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx)
+struct vb2_v4l2_buffer *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx)
 {
 	struct v4l2_m2m_buffer *b;
 	unsigned long flags;
@@ -167,7 +167,7 @@ void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx)
 }
 EXPORT_SYMBOL_GPL(v4l2_m2m_last_buf);
 
-void *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx)
+struct vb2_v4l2_buffer *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx)
 {
 	struct v4l2_m2m_buffer *b;
 	unsigned long flags;
diff --git a/include/media/v4l2-mem2mem.h b/include/media/v4l2-mem2mem.h
index 47c6d9aa0bf4..bb3e63d6bd1a 100644
--- a/include/media/v4l2-mem2mem.h
+++ b/include/media/v4l2-mem2mem.h
@@ -425,7 +425,7 @@ unsigned int v4l2_m2m_num_dst_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx
  */
-void *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx);
+struct vb2_v4l2_buffer *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx);
 
 /**
  * v4l2_m2m_next_src_buf() - return next source buffer from the list of ready
@@ -433,7 +433,8 @@ void *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx);
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_next_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_next_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_next_buf(&m2m_ctx->out_q_ctx);
 }
@@ -444,7 +445,8 @@ static inline void *v4l2_m2m_next_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_next_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_next_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_next_buf(&m2m_ctx->cap_q_ctx);
 }
@@ -454,7 +456,7 @@ static inline void *v4l2_m2m_next_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx
  */
-void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx);
+struct vb2_v4l2_buffer *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx);
 
 /**
  * v4l2_m2m_last_src_buf() - return last destination buffer from the list of
@@ -462,7 +464,8 @@ void *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx);
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_last_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_last_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_last_buf(&m2m_ctx->out_q_ctx);
 }
@@ -473,7 +476,8 @@ static inline void *v4l2_m2m_last_src_buf(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_last_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_last_dst_buf(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_last_buf(&m2m_ctx->cap_q_ctx);
 }
@@ -547,7 +551,7 @@ struct vb2_queue *v4l2_m2m_get_dst_vq(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx
  */
-void *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx);
+struct vb2_v4l2_buffer *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx);
 
 /**
  * v4l2_m2m_src_buf_remove() - take off a source buffer from the list of ready
@@ -555,7 +559,8 @@ void *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx);
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_src_buf_remove(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_src_buf_remove(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_buf_remove(&m2m_ctx->out_q_ctx);
 }
@@ -566,7 +571,8 @@ static inline void *v4l2_m2m_src_buf_remove(struct v4l2_m2m_ctx *m2m_ctx)
  *
  * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx
  */
-static inline void *v4l2_m2m_dst_buf_remove(struct v4l2_m2m_ctx *m2m_ctx)
+static inline struct vb2_v4l2_buffer *
+v4l2_m2m_dst_buf_remove(struct v4l2_m2m_ctx *m2m_ctx)
 {
 	return v4l2_m2m_buf_remove(&m2m_ctx->cap_q_ctx);
 }
-- 
cgit v1.2.3


From 721074b03411327e7bf41555d4cc7c18f49313f7 Mon Sep 17 00:00:00 2001
From: Patrick Lerda <patrick9876@free.fr>
Date: Thu, 17 Jan 2019 03:50:13 -0500
Subject: media: rc: rcmm decoder and encoder

media: add support for RCMM infrared remote controls.

Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/lirc.h.rst.exceptions |   3 +
 MAINTAINERS                               |   5 +
 drivers/media/rc/Kconfig                  |  13 ++
 drivers/media/rc/Makefile                 |   1 +
 drivers/media/rc/ir-rcmm-decoder.c        | 254 ++++++++++++++++++++++++++++++
 drivers/media/rc/rc-core-priv.h           |   5 +
 drivers/media/rc/rc-main.c                |   9 ++
 include/media/rc-map.h                    |  14 +-
 include/uapi/linux/lirc.h                 |   6 +
 tools/include/uapi/linux/lirc.h           |  12 ++
 tools/testing/selftests/ir/ir_loopback.c  |   9 ++
 11 files changed, 328 insertions(+), 3 deletions(-)
 create mode 100644 drivers/media/rc/ir-rcmm-decoder.c

(limited to 'include')

diff --git a/Documentation/media/lirc.h.rst.exceptions b/Documentation/media/lirc.h.rst.exceptions
index 379b9e7df5d0..7a8b8ff4f076 100644
--- a/Documentation/media/lirc.h.rst.exceptions
+++ b/Documentation/media/lirc.h.rst.exceptions
@@ -60,6 +60,9 @@ ignore symbol RC_PROTO_SHARP
 ignore symbol RC_PROTO_XMP
 ignore symbol RC_PROTO_CEC
 ignore symbol RC_PROTO_IMON
+ignore symbol RC_PROTO_RCMM12
+ignore symbol RC_PROTO_RCMM24
+ignore symbol RC_PROTO_RCMM32
 
 # Undocumented macros
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 914d333ee73d..9662ad6c58e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16536,6 +16536,11 @@ M:	David Härdeman <david@hardeman.nu>
 S:	Maintained
 F:	drivers/media/rc/winbond-cir.c
 
+RCMM REMOTE CONTROLS DECODER
+M:	Patrick Lerda <patrick9876@free.fr>
+S:	Maintained
+F:	drivers/media/rc/ir-rcmm-decoder.c
+
 WINSYSTEMS EBC-C384 WATCHDOG DRIVER
 M:	William Breathitt Gray <vilhelm.gray@gmail.com>
 L:	linux-watchdog@vger.kernel.org
diff --git a/drivers/media/rc/Kconfig b/drivers/media/rc/Kconfig
index 8a216068a35a..8e95f6eac89d 100644
--- a/drivers/media/rc/Kconfig
+++ b/drivers/media/rc/Kconfig
@@ -133,6 +133,19 @@ config IR_IMON_DECODER
 	   remote control and you would like to use it with a raw IR
 	   receiver, or if you wish to use an encoder to transmit this IR.
 
+config IR_RCMM_DECODER
+	tristate "Enable IR raw decoder for the RC-MM protocol"
+	depends on RC_CORE
+	help
+	   Enable this option when you have IR with RC-MM protocol, and
+	   you need the software decoder. The driver supports 12,
+	   24 and 32 bits RC-MM variants. You can enable or disable the
+	   different modes using the following RC protocol keywords:
+	   'rc-mm-12', 'rc-mm-24' and 'rc-mm-32'.
+
+	   To compile this driver as a module, choose M here: the module
+	   will be called ir-rcmm-decoder.
+
 endif #RC_DECODERS
 
 menuconfig RC_DEVICES
diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile
index 92c163816849..48d23433b3c0 100644
--- a/drivers/media/rc/Makefile
+++ b/drivers/media/rc/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_IR_SHARP_DECODER) += ir-sharp-decoder.o
 obj-$(CONFIG_IR_MCE_KBD_DECODER) += ir-mce_kbd-decoder.o
 obj-$(CONFIG_IR_XMP_DECODER) += ir-xmp-decoder.o
 obj-$(CONFIG_IR_IMON_DECODER) += ir-imon-decoder.o
+obj-$(CONFIG_IR_RCMM_DECODER) += ir-rcmm-decoder.o
 
 # stand-alone IR receivers/transmitters
 obj-$(CONFIG_RC_ATI_REMOTE) += ati_remote.o
diff --git a/drivers/media/rc/ir-rcmm-decoder.c b/drivers/media/rc/ir-rcmm-decoder.c
new file mode 100644
index 000000000000..f1096ac1e5c5
--- /dev/null
+++ b/drivers/media/rc/ir-rcmm-decoder.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0+
+// ir-rcmm-decoder.c - A decoder for the RCMM IR protocol
+//
+// Copyright (C) 2018 by Patrick Lerda <patrick9876@free.fr>
+
+#include "rc-core-priv.h"
+#include <linux/module.h>
+#include <linux/version.h>
+
+#define RCMM_UNIT		166667	/* nanosecs */
+#define RCMM_PREFIX_PULSE	416666  /* 166666.666666666*2.5 */
+#define RCMM_PULSE_0            277777  /* 166666.666666666*(1+2/3) */
+#define RCMM_PULSE_1            444444  /* 166666.666666666*(2+2/3) */
+#define RCMM_PULSE_2            611111  /* 166666.666666666*(3+2/3) */
+#define RCMM_PULSE_3            777778  /* 166666.666666666*(4+2/3) */
+
+enum rcmm_state {
+	STATE_INACTIVE,
+	STATE_LOW,
+	STATE_BUMP,
+	STATE_VALUE,
+	STATE_FINISHED,
+};
+
+static bool rcmm_mode(const struct rcmm_dec *data)
+{
+	return !((0x000c0000 & data->bits) == 0x000c0000);
+}
+
+static int rcmm_miscmode(struct rc_dev *dev, struct rcmm_dec *data)
+{
+	switch (data->count) {
+	case 24:
+		if (dev->enabled_protocols & RC_PROTO_BIT_RCMM24) {
+			rc_keydown(dev, RC_PROTO_RCMM24, data->bits, 0);
+			data->state = STATE_INACTIVE;
+			return 0;
+		}
+		return -1;
+
+	case 12:
+		if (dev->enabled_protocols & RC_PROTO_BIT_RCMM12) {
+			rc_keydown(dev, RC_PROTO_RCMM12, data->bits, 0);
+			data->state = STATE_INACTIVE;
+			return 0;
+		}
+		return -1;
+	}
+
+	return -1;
+}
+
+/**
+ * ir_rcmm_decode() - Decode one RCMM pulse or space
+ * @dev:	the struct rc_dev descriptor of the device
+ * @ev:		the struct ir_raw_event descriptor of the pulse/space
+ *
+ * This function returns -EINVAL if the pulse violates the state machine
+ */
+static int ir_rcmm_decode(struct rc_dev *dev, struct ir_raw_event ev)
+{
+	struct rcmm_dec *data = &dev->raw->rcmm;
+	u32 scancode;
+	u8 toggle;
+	int value;
+
+	if (!(dev->enabled_protocols & (RC_PROTO_BIT_RCMM32 |
+							RC_PROTO_BIT_RCMM24 |
+							RC_PROTO_BIT_RCMM12)))
+		return 0;
+
+	if (!is_timing_event(ev)) {
+		if (ev.reset)
+			data->state = STATE_INACTIVE;
+		return 0;
+	}
+
+	switch (data->state) {
+	case STATE_INACTIVE:
+		if (!ev.pulse)
+			break;
+
+		if (!eq_margin(ev.duration, RCMM_PREFIX_PULSE, RCMM_UNIT / 2))
+			break;
+
+		data->state = STATE_LOW;
+		data->count = 0;
+		data->bits  = 0;
+		return 0;
+
+	case STATE_LOW:
+		if (ev.pulse)
+			break;
+
+		if (!eq_margin(ev.duration, RCMM_PULSE_0, RCMM_UNIT / 2))
+			break;
+
+		data->state = STATE_BUMP;
+		return 0;
+
+	case STATE_BUMP:
+		if (!ev.pulse)
+			break;
+
+		if (!eq_margin(ev.duration, RCMM_UNIT, RCMM_UNIT / 2))
+			break;
+
+		data->state = STATE_VALUE;
+		return 0;
+
+	case STATE_VALUE:
+		if (ev.pulse)
+			break;
+
+		if (eq_margin(ev.duration, RCMM_PULSE_0, RCMM_UNIT / 2))
+			value = 0;
+		else if (eq_margin(ev.duration, RCMM_PULSE_1, RCMM_UNIT / 2))
+			value = 1;
+		else if (eq_margin(ev.duration, RCMM_PULSE_2, RCMM_UNIT / 2))
+			value = 2;
+		else if (eq_margin(ev.duration, RCMM_PULSE_3, RCMM_UNIT / 2))
+			value = 3;
+		else
+			value = -1;
+
+		if (value == -1) {
+			if (!rcmm_miscmode(dev, data))
+				return 0;
+			break;
+		}
+
+		data->bits <<= 2;
+		data->bits |= value;
+
+		data->count += 2;
+
+		if (data->count < 32)
+			data->state = STATE_BUMP;
+		else
+			data->state = STATE_FINISHED;
+
+		return 0;
+
+	case STATE_FINISHED:
+		if (!ev.pulse)
+			break;
+
+		if (!eq_margin(ev.duration, RCMM_UNIT, RCMM_UNIT / 2))
+			break;
+
+		if (rcmm_mode(data)) {
+			toggle = !!(0x8000 & data->bits);
+			scancode = data->bits & ~0x8000;
+		} else {
+			toggle = 0;
+			scancode = data->bits;
+		}
+
+		if (dev->enabled_protocols & RC_PROTO_BIT_RCMM32) {
+			rc_keydown(dev, RC_PROTO_RCMM32, scancode, toggle);
+			data->state = STATE_INACTIVE;
+			return 0;
+		}
+
+		break;
+	}
+
+	data->state = STATE_INACTIVE;
+	return -EINVAL;
+}
+
+static const int rcmmspace[] = {
+	RCMM_PULSE_0,
+	RCMM_PULSE_1,
+	RCMM_PULSE_2,
+	RCMM_PULSE_3,
+};
+
+static int ir_rcmm_rawencoder(struct ir_raw_event **ev, unsigned int max,
+			      unsigned int n, u32 data)
+{
+	int i;
+	int ret;
+
+	ret = ir_raw_gen_pulse_space(ev, &max, RCMM_PREFIX_PULSE, RCMM_PULSE_0);
+	if (ret)
+		return ret;
+
+	for (i = n - 2; i >= 0; i -= 2) {
+		const unsigned int space = rcmmspace[(data >> i) & 3];
+
+		ret = ir_raw_gen_pulse_space(ev, &max, RCMM_UNIT, space);
+		if (ret)
+			return ret;
+	}
+
+	return ir_raw_gen_pulse_space(ev, &max, RCMM_UNIT, RCMM_PULSE_3 * 2);
+}
+
+static int ir_rcmm_encode(enum rc_proto protocol, u32 scancode,
+			  struct ir_raw_event *events, unsigned int max)
+{
+	struct ir_raw_event *e = events;
+	int ret;
+
+	switch (protocol) {
+	case RC_PROTO_RCMM32:
+		ret = ir_rcmm_rawencoder(&e, max, 32, scancode);
+		break;
+	case RC_PROTO_RCMM24:
+		ret = ir_rcmm_rawencoder(&e, max, 24, scancode);
+		break;
+	case RC_PROTO_RCMM12:
+		ret = ir_rcmm_rawencoder(&e, max, 12, scancode);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret < 0)
+		return ret;
+
+	return e - events;
+}
+
+static struct ir_raw_handler rcmm_handler = {
+	.protocols	= RC_PROTO_BIT_RCMM32 |
+			  RC_PROTO_BIT_RCMM24 |
+			  RC_PROTO_BIT_RCMM12,
+	.decode		= ir_rcmm_decode,
+	.encode         = ir_rcmm_encode,
+	.carrier        = 36000,
+	.min_timeout	= RCMM_PULSE_3 + RCMM_UNIT,
+};
+
+static int __init ir_rcmm_decode_init(void)
+{
+	ir_raw_handler_register(&rcmm_handler);
+
+	pr_info("IR RCMM protocol handler initialized\n");
+	return 0;
+}
+
+static void __exit ir_rcmm_decode_exit(void)
+{
+	ir_raw_handler_unregister(&rcmm_handler);
+}
+
+module_init(ir_rcmm_decode_init);
+module_exit(ir_rcmm_decode_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick Lerda");
+MODULE_DESCRIPTION("RCMM IR protocol decoder");
diff --git a/drivers/media/rc/rc-core-priv.h b/drivers/media/rc/rc-core-priv.h
index c2cbe7f6266c..9f21b3e8b377 100644
--- a/drivers/media/rc/rc-core-priv.h
+++ b/drivers/media/rc/rc-core-priv.h
@@ -131,6 +131,11 @@ struct ir_raw_event_ctrl {
 		unsigned int bits;
 		bool stick_keyboard;
 	} imon;
+	struct rcmm_dec {
+		int state;
+		unsigned int count;
+		u32 bits;
+	} rcmm;
 };
 
 /* Mutex for locking raw IR processing and handler change */
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index 66a174979b3c..dc38e9c0a2ff 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -70,6 +70,12 @@ static const struct {
 	[RC_PROTO_CEC] = { .name = "cec", .repeat_period = 0 },
 	[RC_PROTO_IMON] = { .name = "imon",
 		.scancode_bits = 0x7fffffff, .repeat_period = 114 },
+	[RC_PROTO_RCMM12] = { .name = "rc-mm-12",
+		.scancode_bits = 0x00000fff, .repeat_period = 114 },
+	[RC_PROTO_RCMM24] = { .name = "rc-mm-24",
+		.scancode_bits = 0x00ffffff, .repeat_period = 114 },
+	[RC_PROTO_RCMM32] = { .name = "rc-mm-32",
+		.scancode_bits = 0xffffffff, .repeat_period = 114 },
 };
 
 /* Used to keep track of known keymaps */
@@ -1006,6 +1012,9 @@ static const struct {
 	{ RC_PROTO_BIT_XMP,	"xmp",		"ir-xmp-decoder"	},
 	{ RC_PROTO_BIT_CEC,	"cec",		NULL			},
 	{ RC_PROTO_BIT_IMON,	"imon",		"ir-imon-decoder"	},
+	{ RC_PROTO_BIT_RCMM12 |
+	  RC_PROTO_BIT_RCMM24 |
+	  RC_PROTO_BIT_RCMM32,	"rc-mm",	"ir-rcmm-decoder"	},
 };
 
 /**
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index d621acadfbf3..e5e86d595645 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -37,6 +37,9 @@
 #define RC_PROTO_BIT_XMP		BIT_ULL(RC_PROTO_XMP)
 #define RC_PROTO_BIT_CEC		BIT_ULL(RC_PROTO_CEC)
 #define RC_PROTO_BIT_IMON		BIT_ULL(RC_PROTO_IMON)
+#define RC_PROTO_BIT_RCMM12		BIT_ULL(RC_PROTO_RCMM12)
+#define RC_PROTO_BIT_RCMM24		BIT_ULL(RC_PROTO_RCMM24)
+#define RC_PROTO_BIT_RCMM32		BIT_ULL(RC_PROTO_RCMM32)
 
 #define RC_PROTO_BIT_ALL \
 			(RC_PROTO_BIT_UNKNOWN | RC_PROTO_BIT_OTHER | \
@@ -51,7 +54,8 @@
 			 RC_PROTO_BIT_RC6_6A_24 | RC_PROTO_BIT_RC6_6A_32 | \
 			 RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_SHARP | \
 			 RC_PROTO_BIT_XMP | RC_PROTO_BIT_CEC | \
-			 RC_PROTO_BIT_IMON)
+			 RC_PROTO_BIT_IMON | RC_PROTO_BIT_RCMM12 | \
+			 RC_PROTO_BIT_RCMM24 | RC_PROTO_BIT_RCMM32)
 /* All rc protocols for which we have decoders */
 #define RC_PROTO_BIT_ALL_IR_DECODER \
 			(RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \
@@ -64,7 +68,9 @@
 			 RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \
 			 RC_PROTO_BIT_RC6_6A_24 |  RC_PROTO_BIT_RC6_6A_32 | \
 			 RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_SHARP | \
-			 RC_PROTO_BIT_XMP | RC_PROTO_BIT_IMON)
+			 RC_PROTO_BIT_XMP | RC_PROTO_BIT_IMON | \
+			 RC_PROTO_BIT_RCMM12 | RC_PROTO_BIT_RCMM24 | \
+			 RC_PROTO_BIT_RCMM32)
 
 #define RC_PROTO_BIT_ALL_IR_ENCODER \
 			(RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \
@@ -77,7 +83,9 @@
 			 RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \
 			 RC_PROTO_BIT_RC6_6A_24 | \
 			 RC_PROTO_BIT_RC6_6A_32 | RC_PROTO_BIT_RC6_MCE | \
-			 RC_PROTO_BIT_SHARP | RC_PROTO_BIT_IMON)
+			 RC_PROTO_BIT_SHARP | RC_PROTO_BIT_IMON | \
+			 RC_PROTO_BIT_RCMM12 | RC_PROTO_BIT_RCMM24 | \
+			 RC_PROTO_BIT_RCMM32)
 
 #define RC_SCANCODE_UNKNOWN(x)			(x)
 #define RC_SCANCODE_OTHER(x)			(x)
diff --git a/include/uapi/linux/lirc.h b/include/uapi/linux/lirc.h
index 6b319581882f..45fcbf99d72e 100644
--- a/include/uapi/linux/lirc.h
+++ b/include/uapi/linux/lirc.h
@@ -192,6 +192,9 @@ struct lirc_scancode {
  * @RC_PROTO_XMP: XMP protocol
  * @RC_PROTO_CEC: CEC protocol
  * @RC_PROTO_IMON: iMon Pad protocol
+ * @RC_PROTO_RCMM12: RC-MM protocol 12 bits
+ * @RC_PROTO_RCMM24: RC-MM protocol 24 bits
+ * @RC_PROTO_RCMM32: RC-MM protocol 32 bits
  */
 enum rc_proto {
 	RC_PROTO_UNKNOWN	= 0,
@@ -218,6 +221,9 @@ enum rc_proto {
 	RC_PROTO_XMP		= 21,
 	RC_PROTO_CEC		= 22,
 	RC_PROTO_IMON		= 23,
+	RC_PROTO_RCMM12		= 24,
+	RC_PROTO_RCMM24		= 25,
+	RC_PROTO_RCMM32		= 26,
 };
 
 #endif
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
index f189931042a7..45fcbf99d72e 100644
--- a/tools/include/uapi/linux/lirc.h
+++ b/tools/include/uapi/linux/lirc.h
@@ -133,6 +133,12 @@
 
 #define LIRC_SET_WIDEBAND_RECEIVER     _IOW('i', 0x00000023, __u32)
 
+/*
+ * Return the recording timeout, which is either set by
+ * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols.
+ */
+#define LIRC_GET_REC_TIMEOUT	       _IOR('i', 0x00000024, __u32)
+
 /*
  * struct lirc_scancode - decoded scancode with protocol for use with
  *	LIRC_MODE_SCANCODE
@@ -186,6 +192,9 @@ struct lirc_scancode {
  * @RC_PROTO_XMP: XMP protocol
  * @RC_PROTO_CEC: CEC protocol
  * @RC_PROTO_IMON: iMon Pad protocol
+ * @RC_PROTO_RCMM12: RC-MM protocol 12 bits
+ * @RC_PROTO_RCMM24: RC-MM protocol 24 bits
+ * @RC_PROTO_RCMM32: RC-MM protocol 32 bits
  */
 enum rc_proto {
 	RC_PROTO_UNKNOWN	= 0,
@@ -212,6 +221,9 @@ enum rc_proto {
 	RC_PROTO_XMP		= 21,
 	RC_PROTO_CEC		= 22,
 	RC_PROTO_IMON		= 23,
+	RC_PROTO_RCMM12		= 24,
+	RC_PROTO_RCMM24		= 25,
+	RC_PROTO_RCMM32		= 26,
 };
 
 #endif
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index 858c19caf224..570a7358942c 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -51,6 +51,10 @@ static const struct {
 	{ RC_PROTO_RC6_6A_32, "rc-6-6a-32", 0xffffffff, "rc-6" },
 	{ RC_PROTO_RC6_MCE, "rc-6-mce", 0x00007fff, "rc-6" },
 	{ RC_PROTO_SHARP, "sharp", 0x1fff, "sharp" },
+	{ RC_PROTO_IMON, "imon", 0x7fffffff, "imon" },
+	{ RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rcmm" },
+	{ RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rcmm" },
+	{ RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rcmm" },
 };
 
 int lirc_open(const char *rc)
@@ -139,6 +143,11 @@ int main(int argc, char **argv)
 			    (((scancode >> 8) ^ ~scancode) & 0xff) == 0)
 				continue;
 
+			if (rc_proto == RC_PROTO_RCMM32 &&
+			    (scancode & 0x000c0000) != 0x000c0000 &&
+			    scancode & 0x00008000)
+				continue;
+
 			struct lirc_scancode lsc = {
 				.rc_proto = rc_proto,
 				.scancode = scancode
-- 
cgit v1.2.3


From 9004a14cb688c69194002aa2fadda4433c3b79fb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 16 Feb 2019 17:26:05 +0100
Subject: net: phy: add helper mii_10gbt_stat_mod_linkmode_lpa_t

Similar to the existing helpers for the Clause 22 registers add helper
mii_10gbt_stat_mod_linkmode_lpa_t.

Note that this helper is defined in linux/mdio.h, not like the
Clause 22 helpers in linux/mii.h. Reason is that the Clause 45 register
constants are defined in uapi/linux/mdio.h. And uapi/linux/mdio.h
includes linux/mii.h before defining the C45 register constants.

v2:
- remove helpers that don't have users in this series

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index dd46828b4c47..3e99ae3ed87f 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -286,6 +286,25 @@ static inline u32 linkmode_adv_to_mii_10gbt_adv_t(unsigned long *advertising)
 	return result;
 }
 
+/**
+ * mii_10gbt_stat_mod_linkmode_lpa_t
+ * @advertising: target the linkmode advertisement settings
+ * @adv: value of the C45 10GBASE-T AN STATUS register
+ *
+ * A small helper function that translates C45 10GBASE-T AN STATUS register bits
+ * to linkmode advertisement settings. Other bits in advertising aren't changed.
+ */
+static inline void mii_10gbt_stat_mod_linkmode_lpa_t(unsigned long *advertising,
+						     u32 lpa)
+{
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP2_5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP5G);
+	linkmode_mod_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+			 advertising, lpa & MDIO_AN_10GBT_STAT_LP10G);
+}
+
 int __mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int __mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 
-- 
cgit v1.2.3


From fd47c2f99f04249d1ba82c422d1818dcbe193908 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:43 +0200
Subject: RDMA/restrack: Convert internal DB from hash to XArray

The additions of .doit callbacks posses new access pattern to the resource
entries by some user visible index. Back then, the legacy DB was
implemented as hash because per-index access wasn't needed and XArray
wasn't accepted yet.

Acceptance of XArray together with per-index access requires the refresh
of DB implementation.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/nldev.c    |  13 ++---
 drivers/infiniband/core/restrack.c | 103 +++++++++++++++++++++++++------------
 include/rdma/restrack.h            |  22 +++++---
 3 files changed, 93 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 5601fa968244..4bf890ae6e28 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -970,6 +970,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	int start = cb->args[0];
 	bool has_cap_net_admin;
 	struct nlmsghdr *nlh;
+	unsigned long id;
 	u32 index, port = 0;
 	bool filled = false;
 
@@ -1020,7 +1021,12 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
 
 	down_read(&device->res.rwsem);
-	hash_for_each_possible(device->res.hash, res, node, res_type) {
+	/*
+	 * FIXME: if the skip ahead is something common this loop should
+	 * use xas_for_each & xas_pause to optimize, we can have a lot of
+	 * objects.
+	 */
+	xa_for_each(&device->res.xa[res_type], id, res) {
 		if (idx < start)
 			goto next;
 
@@ -1047,11 +1053,6 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 		rdma_restrack_put(res);
 
 		if (ret == -EMSGSIZE)
-			/*
-			 * There is a chance to optimize here.
-			 * It can be done by using list_prepare_entry
-			 * and list_for_each_entry_continue afterwards.
-			 */
 			break;
 		if (ret)
 			goto res_err;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index f80b37d437ac..b4f302811858 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -12,6 +12,28 @@
 
 #include "cma_priv.h"
 
+static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
+			      u32 *next)
+{
+	int err;
+
+	*id = *next;
+	if (*next == U32_MAX)
+		*id = 0;
+
+	xa_lock(xa);
+	err = __xa_alloc(xa, id, U32_MAX, entry, GFP_KERNEL);
+	if (err && *next != U32_MAX) {
+		*id = 0;
+		err = __xa_alloc(xa, id, *next, entry, GFP_KERNEL);
+	}
+
+	if (!err)
+		*next = *id + 1;
+	xa_unlock(xa);
+	return err;
+}
+
 /**
  * rdma_restrack_init() - initialize resource tracking
  * @dev:  IB device
@@ -19,6 +41,10 @@
 void rdma_restrack_init(struct ib_device *dev)
 {
 	struct rdma_restrack_root *res = &dev->res;
+	int i;
+
+	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
+		xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
 
 	init_rwsem(&res->rwsem);
 }
@@ -46,33 +72,42 @@ void rdma_restrack_clean(struct ib_device *dev)
 	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
 	char buf[TASK_COMM_LEN];
+	bool found = false;
 	const char *owner;
-	int bkt;
-
-	if (hash_empty(res->hash))
-		return;
-
-	dev = container_of(res, struct ib_device, res);
-	pr_err("restrack: %s", CUT_HERE);
-	dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
-	hash_for_each(res->hash, bkt, e, node) {
-		if (rdma_is_kernel_res(e)) {
-			owner = e->kern_name;
-		} else {
-			/*
-			 * There is no need to call get_task_struct here,
-			 * because we can be here only if there are more
-			 * get_task_struct() call than put_task_struct().
-			 */
-			get_task_comm(buf, e->task);
-			owner = buf;
+	int i;
+
+	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
+		if (!xa_empty(&res->xa[i])) {
+			unsigned long index;
+
+			if (!found) {
+				pr_err("restrack: %s", CUT_HERE);
+				dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
+			}
+			xa_for_each(&res->xa[i], index, e) {
+				if (rdma_is_kernel_res(e)) {
+					owner = e->kern_name;
+				} else {
+					/*
+					 * There is no need to call get_task_struct here,
+					 * because we can be here only if there are more
+					 * get_task_struct() call than put_task_struct().
+					 */
+					get_task_comm(buf, e->task);
+					owner = buf;
+				}
+
+				pr_err("restrack: %s %s object allocated by %s is not freed\n",
+				       rdma_is_kernel_res(e) ? "Kernel" :
+							       "User",
+				       type2str(e->type), owner);
+			}
+			found = true;
 		}
-
-		pr_err("restrack: %s %s object allocated by %s is not freed\n",
-		       rdma_is_kernel_res(e) ? "Kernel" : "User",
-		       type2str(e->type), owner);
+		xa_destroy(&res->xa[i]);
 	}
-	pr_err("restrack: %s", CUT_HERE);
+	if (found)
+		pr_err("restrack: %s", CUT_HERE);
 }
 
 /**
@@ -86,10 +121,11 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 {
 	struct rdma_restrack_root *res = &dev->res;
 	struct rdma_restrack_entry *e;
+	unsigned long index = 0;
 	u32 cnt = 0;
 
 	down_read(&res->rwsem);
-	hash_for_each_possible(res->hash, e, node, type) {
+	xa_for_each(&res->xa[type], index, e) {
 		if (ns == &init_pid_ns ||
 		    (!rdma_is_kernel_res(e) &&
 		     ns == task_active_pid_ns(e->task)))
@@ -166,16 +202,20 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
 static void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
+	int ret;
 
 	if (!dev)
 		return;
 
 	kref_init(&res->kref);
 	init_completion(&res->comp);
-	res->valid = true;
 
 	down_write(&dev->res.rwsem);
-	hash_add(dev->res.hash, &res->node, res->type);
+	ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
+				 &dev->res.next_id[res->type]);
+
+	if (!ret)
+		res->valid = true;
 	up_write(&dev->res.rwsem);
 }
 
@@ -241,15 +281,14 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	if (!dev)
 		return;
 
-	rdma_restrack_put(res);
-
-	wait_for_completion(&res->comp);
-
 	down_write(&dev->res.rwsem);
-	hash_del(&res->node);
+	xa_erase(&dev->res.xa[res->type], res->id);
 	res->valid = false;
 	up_write(&dev->res.rwsem);
 
+	rdma_restrack_put(res);
+	wait_for_completion(&res->comp);
+
 out:
 	if (res->task) {
 		put_task_struct(res->task);
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index cc66cc7a11d3..16e11b4c3ec3 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -13,6 +13,7 @@
 #include <linux/completion.h>
 #include <linux/sched/task.h>
 #include <uapi/rdma/rdma_netlink.h>
+#include <linux/xarray.h>
 
 /**
  * enum rdma_restrack_type - HW objects to track
@@ -48,7 +49,6 @@ enum rdma_restrack_type {
 	RDMA_RESTRACK_MAX
 };
 
-#define RDMA_RESTRACK_HASH_BITS	8
 struct ib_device;
 struct rdma_restrack_entry;
 
@@ -62,9 +62,17 @@ struct rdma_restrack_root {
 	 */
 	struct rw_semaphore	rwsem;
 	/**
-	 * @hash: global database for all resources per-device
+	 * @xa: Array of XArray structures to hold restrack entries.
+	 * We want to use array of XArrays because insertion is type
+	 * dependent. For types with xisiting unique ID (like QPN),
+	 * we will insert to that unique index. For other types,
+	 * we insert based on pointers and auto-allocate unique index.
 	 */
-	DECLARE_HASHTABLE(hash, RDMA_RESTRACK_HASH_BITS);
+	struct xarray xa[RDMA_RESTRACK_MAX];
+	/**
+	 * @next_id: Next ID to support cyclic allocation
+	 */
+	u32 next_id[RDMA_RESTRACK_MAX];
 };
 
 /**
@@ -102,10 +110,6 @@ struct rdma_restrack_entry {
 	 * @kern_name: name of owner for the kernel created entities.
 	 */
 	const char		*kern_name;
-	/**
-	 * @node: hash table entry
-	 */
-	struct hlist_node	node;
 	/**
 	 * @type: various objects in restrack database
 	 */
@@ -114,6 +118,10 @@ struct rdma_restrack_entry {
 	 * @user: user resource
 	 */
 	bool			user;
+	/**
+	 * @id: ID to expose to users
+	 */
+	u32 id;
 };
 
 void rdma_restrack_init(struct ib_device *dev);
-- 
cgit v1.2.3


From 18c4c66f76d99df89ad682ba25bafb9227e8ec30 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:44 +0200
Subject: RDMA/restrack: Translate from ID to restrack object

Add new general helper to get restrack entry given by ID and their
respective type.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/restrack.c | 25 +++++++++++++++++++++++++
 include/rdma/restrack.h            |  3 +++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index b4f302811858..ac97167da81c 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -256,6 +256,31 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
 }
 EXPORT_SYMBOL(rdma_restrack_get);
 
+/**
+ * rdma_restrack_get_byid() - translate from ID to restrack object
+ * @dev: IB device
+ * @type: resource track type
+ * @id: ID to take a look
+ *
+ * Return: Pointer to restrack entry or -ENOENT in case of error.
+ */
+struct rdma_restrack_entry *
+rdma_restrack_get_byid(struct ib_device *dev,
+		       enum rdma_restrack_type type, u32 id)
+{
+	struct rdma_restrack_root *rt = &dev->res;
+	struct rdma_restrack_entry *res;
+
+	down_read(&dev->res.rwsem);
+	res = xa_load(&rt->xa[type], id);
+	if (!res || !rdma_restrack_get(res))
+		res = ERR_PTR(-ENOENT);
+	up_read(&dev->res.rwsem);
+
+	return res;
+}
+EXPORT_SYMBOL(rdma_restrack_get_byid);
+
 static void restrack_release(struct kref *kref)
 {
 	struct rdma_restrack_entry *res;
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 16e11b4c3ec3..44ce32cc0b51 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -179,4 +179,7 @@ int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
 int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value);
 int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name,
 			       u64 value);
+struct rdma_restrack_entry *rdma_restrack_get_byid(struct ib_device *dev,
+						   enum rdma_restrack_type type,
+						   u32 id);
 #endif /* _RDMA_RESTRACK_H_ */
-- 
cgit v1.2.3


From 48118527186fb255461ebf3685ab0f1c2680bd9c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:46 +0200
Subject: RDMA/restrack: Reduce scope of synchronization lock while updating DB

XArray uses internal lock for updates to XArray. This means that our
external RW lock is needed to ensure that entry is not deleted while we
are performing iteration over list.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/restrack.c | 2 --
 include/rdma/restrack.h            | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index ac97167da81c..076ef6475df8 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -210,13 +210,11 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res)
 	kref_init(&res->kref);
 	init_completion(&res->comp);
 
-	down_write(&dev->res.rwsem);
 	ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
 				 &dev->res.next_id[res->type]);
 
 	if (!ret)
 		res->valid = true;
-	up_write(&dev->res.rwsem);
 }
 
 /**
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 44ce32cc0b51..53e1a7fb7355 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -58,7 +58,8 @@ struct rdma_restrack_entry;
  */
 struct rdma_restrack_root {
 	/*
-	 * @rwsem: Read/write lock to protect lists
+	 * @rwsem: Read/write lock to protect erase of entry.
+	 * Lists and insertions are protected by XArray internal lock.
 	 */
 	struct rw_semaphore	rwsem;
 	/**
-- 
cgit v1.2.3


From 41eda65c6100930d95bb854a0114f3544593070c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:47 +0200
Subject: RDMA/restrack: Hide restrack DB from IB/core

There is no need to expose internals of restrack DB to IB/core.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c   |  6 ++-
 drivers/infiniband/core/nldev.c    | 17 ++++----
 drivers/infiniband/core/restrack.c | 83 ++++++++++++++++++++++++++------------
 drivers/infiniband/core/restrack.h | 39 ++++++++++++++++++
 include/rdma/ib_verbs.h            |  7 ++--
 include/rdma/restrack.h            | 28 -------------
 6 files changed, 114 insertions(+), 66 deletions(-)
 create mode 100644 drivers/infiniband/core/restrack.h

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 3eddc6e67a16..f7e206033d39 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -45,6 +45,7 @@
 #include <rdma/ib_cache.h>
 
 #include "core_priv.h"
+#include "restrack.h"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("core kernel InfiniBand API");
@@ -338,7 +339,10 @@ struct ib_device *_ib_alloc_device(size_t size)
 	if (!device)
 		return NULL;
 
-	rdma_restrack_init(device);
+	if (rdma_restrack_init(device)) {
+		kfree(device);
+		return NULL;
+	}
 
 	device->dev.class = &ib_class;
 	device->groups[0] = &ib_dev_attr_group;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 0cd95f80f7b4..54312f9626a1 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -39,6 +39,7 @@
 
 #include "core_priv.h"
 #include "cma_priv.h"
+#include "restrack.h"
 
 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
@@ -1027,6 +1028,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 	unsigned long id;
 	u32 index, port = 0;
 	bool filled = false;
+	struct xarray *xa;
 
 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 			  nldev_policy, NULL);
@@ -1074,13 +1076,14 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 
 	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
 
-	down_read(&device->res.rwsem);
+	xa = &device->res->xa[res_type];
+	down_read(&device->res->rwsem);
 	/*
 	 * FIXME: if the skip ahead is something common this loop should
 	 * use xas_for_each & xas_pause to optimize, we can have a lot of
 	 * objects.
 	 */
-	xa_for_each(&device->res.xa[res_type], id, res) {
+	xa_for_each(xa, id, res) {
 		if (idx < start)
 			goto next;
 
@@ -1101,13 +1104,13 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 		if (!entry_attr) {
 			ret = -EMSGSIZE;
 			rdma_restrack_put(res);
-			up_read(&device->res.rwsem);
+			up_read(&device->res->rwsem);
 			break;
 		}
-		up_read(&device->res.rwsem);
 
+		up_read(&device->res->rwsem);
 		ret = fe->fill_res_func(skb, has_cap_net_admin, res, port);
-		down_read(&device->res.rwsem);
+		down_read(&device->res->rwsem);
 		/*
 		 * Return resource back, but it won't be released till
 		 * the &device->res.rwsem will be released for write.
@@ -1125,7 +1128,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 		nla_nest_end(skb, entry_attr);
 next:		idx++;
 	}
-	up_read(&device->res.rwsem);
+	up_read(&device->res->rwsem);
 
 	nla_nest_end(skb, table_attr);
 	nlmsg_end(skb, nlh);
@@ -1143,7 +1146,7 @@ next:		idx++;
 
 res_err:
 	nla_nest_cancel(skb, table_attr);
-	up_read(&device->res.rwsem);
+	up_read(&device->res->rwsem);
 
 err:
 	nlmsg_cancel(skb, nlh);
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 076ef6475df8..6a4b76c66bcb 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -9,8 +9,10 @@
 #include <linux/mutex.h>
 #include <linux/sched/task.h>
 #include <linux/pid_namespace.h>
+#include <linux/rwsem.h>
 
 #include "cma_priv.h"
+#include "restrack.h"
 
 static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
 			      u32 *next)
@@ -35,18 +37,27 @@ static int rt_xa_alloc_cyclic(struct xarray *xa, u32 *id, void *entry,
 }
 
 /**
- * rdma_restrack_init() - initialize resource tracking
+ * rdma_restrack_init() - initialize and allocate resource tracking
  * @dev:  IB device
+ *
+ * Return: 0 on success
  */
-void rdma_restrack_init(struct ib_device *dev)
+int rdma_restrack_init(struct ib_device *dev)
 {
-	struct rdma_restrack_root *res = &dev->res;
+	struct rdma_restrack_root *rt;
 	int i;
 
+	dev->res = kzalloc(sizeof(*rt), GFP_KERNEL);
+	if (!dev->res)
+		return -ENOMEM;
+
+	rt = dev->res;
+
 	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++)
-		xa_init_flags(&res->xa[i], XA_FLAGS_ALLOC);
+		xa_init_flags(&rt->xa[i], XA_FLAGS_ALLOC);
+	init_rwsem(&rt->rwsem);
 
-	init_rwsem(&res->rwsem);
+	return 0;
 }
 
 static const char *type2str(enum rdma_restrack_type type)
@@ -69,7 +80,7 @@ static const char *type2str(enum rdma_restrack_type type)
  */
 void rdma_restrack_clean(struct ib_device *dev)
 {
-	struct rdma_restrack_root *res = &dev->res;
+	struct rdma_restrack_root *rt = dev->res;
 	struct rdma_restrack_entry *e;
 	char buf[TASK_COMM_LEN];
 	bool found = false;
@@ -77,14 +88,16 @@ void rdma_restrack_clean(struct ib_device *dev)
 	int i;
 
 	for (i = 0 ; i < RDMA_RESTRACK_MAX; i++) {
-		if (!xa_empty(&res->xa[i])) {
+		struct xarray *xa = &dev->res->xa[i];
+
+		if (!xa_empty(xa)) {
 			unsigned long index;
 
 			if (!found) {
 				pr_err("restrack: %s", CUT_HERE);
 				dev_err(&dev->dev, "BUG: RESTRACK detected leak of resources\n");
 			}
-			xa_for_each(&res->xa[i], index, e) {
+			xa_for_each(xa, index, e) {
 				if (rdma_is_kernel_res(e)) {
 					owner = e->kern_name;
 				} else {
@@ -104,10 +117,12 @@ void rdma_restrack_clean(struct ib_device *dev)
 			}
 			found = true;
 		}
-		xa_destroy(&res->xa[i]);
+		xa_destroy(xa);
 	}
 	if (found)
 		pr_err("restrack: %s", CUT_HERE);
+
+	kfree(rt);
 }
 
 /**
@@ -119,19 +134,19 @@ void rdma_restrack_clean(struct ib_device *dev)
 int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 			struct pid_namespace *ns)
 {
-	struct rdma_restrack_root *res = &dev->res;
+	struct xarray *xa = &dev->res->xa[type];
 	struct rdma_restrack_entry *e;
 	unsigned long index = 0;
 	u32 cnt = 0;
 
-	down_read(&res->rwsem);
-	xa_for_each(&res->xa[type], index, e) {
+	down_read(&dev->res->rwsem);
+	xa_for_each(xa, index, e) {
 		if (ns == &init_pid_ns ||
 		    (!rdma_is_kernel_res(e) &&
 		     ns == task_active_pid_ns(e->task)))
 			cnt++;
 	}
-	up_read(&res->rwsem);
+	up_read(&dev->res->rwsem);
 	return cnt;
 }
 EXPORT_SYMBOL(rdma_restrack_count);
@@ -202,17 +217,19 @@ EXPORT_SYMBOL(rdma_restrack_set_task);
 static void rdma_restrack_add(struct rdma_restrack_entry *res)
 {
 	struct ib_device *dev = res_to_dev(res);
+	struct rdma_restrack_root *rt;
+	struct xarray *xa;
 	int ret;
 
 	if (!dev)
 		return;
 
+	rt = dev->res;
+	xa = &dev->res->xa[res->type];
+
 	kref_init(&res->kref);
 	init_completion(&res->comp);
-
-	ret = rt_xa_alloc_cyclic(&dev->res.xa[res->type], &res->id, res,
-				 &dev->res.next_id[res->type]);
-
+	ret = rt_xa_alloc_cyclic(xa, &res->id, res, &rt->next_id[res->type]);
 	if (!ret)
 		res->valid = true;
 }
@@ -266,14 +283,14 @@ struct rdma_restrack_entry *
 rdma_restrack_get_byid(struct ib_device *dev,
 		       enum rdma_restrack_type type, u32 id)
 {
-	struct rdma_restrack_root *rt = &dev->res;
+	struct xarray *xa = &dev->res->xa[type];
 	struct rdma_restrack_entry *res;
 
-	down_read(&dev->res.rwsem);
-	res = xa_load(&rt->xa[type], id);
+	down_read(&dev->res->rwsem);
+	res = xa_load(xa, id);
 	if (!res || !rdma_restrack_get(res))
 		res = ERR_PTR(-ENOENT);
-	up_read(&dev->res.rwsem);
+	up_read(&dev->res->rwsem);
 
 	return res;
 }
@@ -295,19 +312,33 @@ EXPORT_SYMBOL(rdma_restrack_put);
 
 void rdma_restrack_del(struct rdma_restrack_entry *res)
 {
-	struct ib_device *dev;
+	struct ib_device *dev = res_to_dev(res);
+	struct xarray *xa;
 
 	if (!res->valid)
 		goto out;
 
-	dev = res_to_dev(res);
+	/*
+	 * All objects except CM_ID set valid device immediately
+	 * after new object is created, it means that for not valid
+	 * objects will still have "dev".
+	 *
+	 * It is not the case for CM_ID, newly created object has
+	 * this field set to NULL and it is set in _cma_attach_to_dev()
+	 * only.
+	 *
+	 * Because we don't want to add any conditions on call
+	 * to rdma_restrack_del(), the check below protects from
+	 * NULL-dereference.
+	 */
 	if (!dev)
 		return;
 
-	down_write(&dev->res.rwsem);
-	xa_erase(&dev->res.xa[res->type], res->id);
+	xa = &dev->res->xa[res->type];
+	down_write(&dev->res->rwsem);
+	xa_erase(xa, res->id);
 	res->valid = false;
-	up_write(&dev->res.rwsem);
+	up_write(&dev->res->rwsem);
 
 	rdma_restrack_put(res);
 	wait_for_completion(&res->comp);
diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h
new file mode 100644
index 000000000000..cf89ef0b8ed5
--- /dev/null
+++ b/drivers/infiniband/core/restrack.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2017-2019 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_CORE_RESTRACK_H_
+#define _RDMA_CORE_RESTRACK_H_
+
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+
+/**
+ * struct rdma_restrack_root - main resource tracking management
+ * entity, per-device
+ */
+struct rdma_restrack_root {
+	/*
+	 * @rwsem: Read/write lock to protect erase of entry.
+	 * Lists and insertions are protected by XArray internal lock.
+	 */
+	struct rw_semaphore	rwsem;
+	/**
+	 * @xa: Array of XArray structures to hold restrack entries.
+	 * We want to use array of XArrays because insertion is type
+	 * dependent. For types with xisiting unique ID (like QPN),
+	 * we will insert to that unique index. For other types,
+	 * we insert based on pointers and auto-allocate unique index.
+	 */
+	struct xarray xa[RDMA_RESTRACK_MAX];
+	/**
+	 * @next_id: Next ID to support cyclic allocation
+	 */
+	u32 next_id[RDMA_RESTRACK_MAX];
+};
+
+
+int rdma_restrack_init(struct ib_device *dev);
+void rdma_restrack_clean(struct ib_device *dev);
+#endif /* _RDMA_CORE_RESTRACK_H_ */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 64ee7c08be22..2a17c2b30073 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2533,6 +2533,8 @@ struct ib_device_ops {
 	DECLARE_RDMA_OBJ_SIZE(ib_pd);
 };
 
+struct rdma_restrack_root;
+
 struct ib_device {
 	/* Do not access @dma_device directly from ULP nor from HW drivers. */
 	struct device                *dma_device;
@@ -2589,10 +2591,7 @@ struct ib_device {
 #endif
 
 	u32                          index;
-	/*
-	 * Implementation details of the RDMA core, don't use in drivers
-	 */
-	struct rdma_restrack_root     res;
+	struct rdma_restrack_root *res;
 
 	const struct uapi_definition   *driver_def;
 	enum rdma_driver_id		driver_id;
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index 53e1a7fb7355..ecf3c7702a4f 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -7,7 +7,6 @@
 #define _RDMA_RESTRACK_H_
 
 #include <linux/typecheck.h>
-#include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/kref.h>
 #include <linux/completion.h>
@@ -50,31 +49,6 @@ enum rdma_restrack_type {
 };
 
 struct ib_device;
-struct rdma_restrack_entry;
-
-/**
- * struct rdma_restrack_root - main resource tracking management
- * entity, per-device
- */
-struct rdma_restrack_root {
-	/*
-	 * @rwsem: Read/write lock to protect erase of entry.
-	 * Lists and insertions are protected by XArray internal lock.
-	 */
-	struct rw_semaphore	rwsem;
-	/**
-	 * @xa: Array of XArray structures to hold restrack entries.
-	 * We want to use array of XArrays because insertion is type
-	 * dependent. For types with xisiting unique ID (like QPN),
-	 * we will insert to that unique index. For other types,
-	 * we insert based on pointers and auto-allocate unique index.
-	 */
-	struct xarray xa[RDMA_RESTRACK_MAX];
-	/**
-	 * @next_id: Next ID to support cyclic allocation
-	 */
-	u32 next_id[RDMA_RESTRACK_MAX];
-};
 
 /**
  * struct rdma_restrack_entry - metadata per-entry
@@ -125,8 +99,6 @@ struct rdma_restrack_entry {
 	u32 id;
 };
 
-void rdma_restrack_init(struct ib_device *dev);
-void rdma_restrack_clean(struct ib_device *dev);
 int rdma_restrack_count(struct ib_device *dev,
 			enum rdma_restrack_type type,
 			struct pid_namespace *ns);
-- 
cgit v1.2.3


From 942fa985e9f161ac018ce2230d3e6f7668cca6ac Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Wed, 16 May 2018 11:18:49 +0300
Subject: 32-bit userspace ABI: introduce ARCH_32BIT_OFF_T config option

All new 32-bit architectures should have 64-bit userspace off_t type, but
existing architectures has 32-bit ones.

To enforce the rule, new config option is added to arch/Kconfig that defaults
ARCH_32BIT_OFF_T to be disabled for new 32-bit architectures. All existing
32-bit architectures enable it explicitly.

New option affects force_o_largefile() behaviour. Namely, if userspace
off_t is 64-bits long, we have no reason to reject user to open big files.

Note that even if architectures has only 64-bit off_t in the kernel
(arc, c6x, h8300, hexagon, nios2, openrisc, and unicore32),
a libc may use 32-bit off_t, and therefore want to limit the file size
to 4GB unless specified differently in the open flags.

Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yury Norov <ynorov@marvell.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/Kconfig            | 10 ++++++++++
 arch/arc/Kconfig        |  1 +
 arch/arm/Kconfig        |  1 +
 arch/c6x/Kconfig        |  1 +
 arch/csky/Kconfig       |  1 +
 arch/h8300/Kconfig      |  1 +
 arch/hexagon/Kconfig    |  1 +
 arch/m68k/Kconfig       |  1 +
 arch/microblaze/Kconfig |  1 +
 arch/mips/Kconfig       |  1 +
 arch/nds32/Kconfig      |  1 +
 arch/nios2/Kconfig      |  1 +
 arch/openrisc/Kconfig   |  1 +
 arch/parisc/Kconfig     |  1 +
 arch/powerpc/Kconfig    |  1 +
 arch/riscv/Kconfig      |  1 +
 arch/sh/Kconfig         |  1 +
 arch/sparc/Kconfig      |  1 +
 arch/unicore32/Kconfig  |  1 +
 arch/x86/Kconfig        |  1 +
 arch/x86/um/Kconfig     |  1 +
 arch/xtensa/Kconfig     |  1 +
 include/linux/fcntl.h   |  2 +-
 23 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 46db715a7f42..cd5f443865ec 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -276,6 +276,16 @@ config ARCH_THREAD_STACK_ALLOCATOR
 config ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	bool
 
+config ARCH_32BIT_OFF_T
+	bool
+	depends on !64BIT
+	help
+	  All new 32-bit architectures should have 64-bit off_t type on
+	  userspace side which corresponds to the loff_t kernel type. This
+	  is the requirement for modern ABIs. Some existing architectures
+	  still support 32-bit off_t. This option is enabled for all such
+	  architectures explicitly.
+
 config HAVE_REGS_AND_STACK_ACCESS_API
 	bool
 	help
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 376366a7db81..1cfe4197146f 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -14,6 +14,7 @@ config ARC
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
+	select ARCH_32BIT_OFF_T
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 664e918e2624..8933f7337e56 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@
 config ARM
 	bool
 	default y
+	select ARCH_32BIT_OFF_T
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK if !HAVE_ARCH_PFN_VALID && !KEXEC
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 456e154674d1..e5cd3c5f8399 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -6,6 +6,7 @@
 
 config C6X
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select CLKDEV_LOOKUP
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 398113c845f5..6959e0b1e956 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -1,5 +1,6 @@
 config CSKY
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_USE_BUILTIN_BSWAP
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 6472a0685470..c071da34e081 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config H8300
         def_bool y
+	select ARCH_32BIT_OFF_T
 	select GENERIC_ATOMIC64
 	select HAVE_UID16
 	select VIRT_TO_BUS
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index fb2fbfcfc532..ac441680dcc0 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon"
 
 config HEXAGON
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_PREEMPT
 	select HAVE_OPROFILE
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index e173ea2ff395..b54206408f91 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -2,6 +2,7 @@
 config M68K
 	bool
 	default y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
 	select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 58aff2653d86..a51b965b3b82 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -1,5 +1,6 @@
 config MICROBLAZE
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_NO_SWAP
 	select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 787290781b8c..d80ccabd3c06 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2,6 +2,7 @@
 config MIPS
 	bool
 	default y
+	select ARCH_32BIT_OFF_T if !64BIT
 	select ARCH_BINFMT_ELF_STATE if MIPS_FP_SUPPORT
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_DISCARD_MEMBLOCK
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index dda1906bba11..addb7f5f5264 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -5,6 +5,7 @@
 
 config NDS32
         def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_WANT_FRAME_POINTERS if FTRACE
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 532343eebf89..c3e913ef4f0c 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config NIOS2
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_SWAP
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 09ab59e942ae..a5e361fbb75a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -6,6 +6,7 @@
 
 config OPENRISC
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select OF
 	select OF_EARLY_FLATTREE
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7ca2c3ebad64..c8e621296092 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config PARISC
 	def_bool y
+	select ARCH_32BIT_OFF_T if !64BIT
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2890d36eb531..375d0dc0dc7d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -128,6 +128,7 @@ config PPC
 	#
 	# Please keep this list sorted alphabetically.
 	#
+	select ARCH_32BIT_OFF_T if PPC32
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_SET_COHERENT_MASK
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index feeeaa60697c..09fa3a87bf30 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -11,6 +11,7 @@ config 32BIT
 
 config RISCV
 	def_bool y
+	select ARCH_32BIT_OFF_T if !64BIT
 	# even on 32-bit, physical (and DMA) addresses are > 32-bits
 	select PHYS_ADDR_T_64BIT
 	select OF
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index a9c36f95744a..d9a9144dec35 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -62,6 +62,7 @@ config SUPERH
 
 config SUPERH32
 	def_bool "$(ARCH)" = "sh"
+	select ARCH_32BIT_OFF_T
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_IOREMAP_PROT if MMU && !X2TLB
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index d5dd652fb8cc..40f8f4f73fe8 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -49,6 +49,7 @@ config SPARC
 
 config SPARC32
 	def_bool !64BIT
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select GENERIC_ATOMIC64
 	select CLZ_TAB
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c3a41bfe161b..a7f1ae58d211 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config UNICORE32
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 15af091611e2..7aac274c2849 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,7 @@ config X86
 	select ACPI_LEGACY_TABLES_LOOKUP	if ACPI
 	select ACPI_SYSTEM_POWER_STATES_SUPPORT	if ACPI
 	select ANON_INODES
+	select ARCH_32BIT_OFF_T			if X86_32
 	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_CLOCKSOURCE_INIT
 	select ARCH_DISCARD_MEMBLOCK
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index f518b4744ff8..ab14e6f73ca4 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -17,6 +17,7 @@ config 64BIT
 config X86_32
 	def_bool !64BIT
 	select HAVE_AOUT
+	select ARCH_32BIT_OFF_T
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select MODULES_USE_ELF_REL
 	select CLONE_BACKWARDS
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 20a0756f27ef..2033b4485cc4 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 config XTENSA
 	def_bool y
+	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h
index 27dc7a60693e..d019df946cb2 100644
--- a/include/linux/fcntl.h
+++ b/include/linux/fcntl.h
@@ -12,7 +12,7 @@
 	 O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE)
 
 #ifndef force_o_largefile
-#define force_o_largefile() (BITS_PER_LONG != 32)
+#define force_o_largefile() (!IS_ENABLED(CONFIG_ARCH_32BIT_OFF_T))
 #endif
 
 #if BITS_PER_LONG == 32
-- 
cgit v1.2.3


From 80d7da1cac62f28b3df4880e8143b39cabb4b59a Mon Sep 17 00:00:00 2001
From: Yury Norov <ynorov@caviumnetworks.com>
Date: Wed, 16 May 2018 11:18:50 +0300
Subject: asm-generic: Drop getrlimit and setrlimit syscalls from default list

The newer prlimit64 syscall provides all the functionality of getrlimit
and setrlimit syscalls and adds the pid of target process, so future
architectures won't need to include getrlimit and setrlimit.

Therefore drop getrlimit and setrlimit syscalls from the generic syscall
list unless __ARCH_WANT_SET_GET_RLIMIT is defined by the architecture's
unistd.h prior to including asm-generic/unistd.h, and adjust all
architectures using the generic syscall list to define it so that no
in-tree architectures are affected.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-arch@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-hexagon@vger.kernel.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Salter <msalter@redhat.com> [c6x]
Acked-by: James Hogan <james.hogan@imgtec.com> [metag]
Acked-by: Ley Foon Tan <lftan@altera.com> [nios2]
Acked-by: Stafford Horne <shorne@gmail.com> [openrisc]
Acked-by: Will Deacon <will.deacon@arm.com> [arm64]
Acked-by: Vineet Gupta <vgupta@synopsys.com> #arch/arc bits
Signed-off-by: Yury Norov <ynorov@marvell.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arc/include/uapi/asm/unistd.h       | 1 +
 arch/arm64/include/uapi/asm/unistd.h     | 1 +
 arch/c6x/include/uapi/asm/unistd.h       | 1 +
 arch/csky/include/uapi/asm/unistd.h      | 1 +
 arch/h8300/include/uapi/asm/unistd.h     | 1 +
 arch/hexagon/include/uapi/asm/unistd.h   | 1 +
 arch/nds32/include/uapi/asm/unistd.h     | 1 +
 arch/nios2/include/uapi/asm/unistd.h     | 1 +
 arch/openrisc/include/uapi/asm/unistd.h  | 1 +
 arch/riscv/include/uapi/asm/unistd.h     | 1 +
 arch/unicore32/include/uapi/asm/unistd.h | 1 +
 include/uapi/asm-generic/unistd.h        | 5 +++++
 scripts/checksyscalls.sh                 | 5 +++++
 13 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 3b3543fd151c..6a1a62a979dd 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -18,6 +18,7 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index dae1584cf017..79937de2a0cc 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -17,5 +17,6 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 #include <asm-generic/unistd.h>
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index 6b2fe792de9d..e3721b2cfd6a 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -17,6 +17,7 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index 224c9a9ab45b..f5c83492136f 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -2,6 +2,7 @@
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SET_GET_RLIMIT
 #include <asm-generic/unistd.h>
 
 #define __NR_set_thread_area	(__NR_arch_specific_syscall + 0)
diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
index 628195823816..b9e9352f2328 100644
--- a/arch/h8300/include/uapi/asm/unistd.h
+++ b/arch/h8300/include/uapi/asm/unistd.h
@@ -2,5 +2,6 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 #include <asm-generic/unistd.h>
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index c91ca7d02461..6bb392a33c35 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -30,6 +30,7 @@
 #define sys_mmap2 sys_mmap_pgoff
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index c2c3a3e34083..eb98d24d3190 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -3,6 +3,7 @@
 
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYNC_FILE_RANGE2
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
diff --git a/arch/nios2/include/uapi/asm/unistd.h b/arch/nios2/include/uapi/asm/unistd.h
index d9948d88790b..fa68e68bc26d 100644
--- a/arch/nios2/include/uapi/asm/unistd.h
+++ b/arch/nios2/include/uapi/asm/unistd.h
@@ -20,6 +20,7 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index ec37df18d8ed..2e0bc0ff9f31 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -21,6 +21,7 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
+#define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
 
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 1f3bd3ebbb0d..d9340c52e7ad 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -18,6 +18,7 @@
 #ifdef __LP64__
 #define __ARCH_WANT_NEW_STAT
 #endif /* __LP64__ */
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 #include <asm-generic/unistd.h>
 
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 1e8fe5941b8a..2b575c0cf177 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -12,6 +12,7 @@
  */
 
 #define __ARCH_WANT_RENAMEAT
+#define __ARCH_WANT_SET_GET_RLIMIT
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index b928eff3bf92..2cdf600b05fa 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -467,10 +467,15 @@ __SYSCALL(__NR_uname, sys_newuname)
 __SYSCALL(__NR_sethostname, sys_sethostname)
 #define __NR_setdomainname 162
 __SYSCALL(__NR_setdomainname, sys_setdomainname)
+
+#ifdef __ARCH_WANT_SET_GET_RLIMIT
+/* getrlimit and setrlimit are superseded with prlimit64 */
 #define __NR_getrlimit 163
 __SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
 #define __NR_setrlimit 164
 __SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
+#endif
+
 #define __NR_getrusage 165
 __SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
 #define __NR_umask 166
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index cc70a64fa81f..53c5677d7e82 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -38,6 +38,11 @@ cat << EOF
 #define __IGNORE_lstat64	/* fstatat64 */
 #endif
 
+#ifndef __ARCH_WANT_SET_GET_RLIMIT
+#define __IGNORE_getrlimit	/* getrlimit */
+#define __IGNORE_setrlimit	/* setrlimit */
+#endif
+
 /* Missing flags argument */
 #define __IGNORE_renameat	/* renameat2 */
 
-- 
cgit v1.2.3


From 85945c28b5a888043cb2b54f880d80d8915f21f5 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 14 Feb 2019 18:29:10 +0000
Subject: PM / core: Add support to skip power management in device/driver
 model

All device objects in the driver model contain fields that control the
handling of various power management activities. However, it's not
always useful. There are few instances where pseudo devices are added
to the model just to take advantage of many other features like
kobjects, udev events, and so on. One such example is cpu devices and
their caches.

The sysfs for the cpu caches are managed by adding devices with cpu
as the parent in cpu_device_create() when secondary cpu is brought
online. Generally when the secondary CPUs are hotplugged back in as part
of resume from suspend-to-ram, we call cpu_device_create() from the cpu
hotplug state machine while the cpu device associated with that CPU is
not yet ready to be resumed as the device_resume() call happens bit
later. It's not really needed to set the flag is_prepared for cpu
devices as they are mostly pseudo device and hotplug framework deals
with state machine and not managed through the cpu device.

This often results in annoying warning when resuming:
Enabling non-boot CPUs ...
CPU1: Booted secondary processor
 cache: parent cpu1 should not be sleeping
CPU1 is up
CPU2: Booted secondary processor
 cache: parent cpu2 should not be sleeping
CPU2 is up
.... and so on.

So in order to fix these kind of errors, we could just completely avoid
doing any power management related initialisations and operations if
they are not used by these devices.

Add no_pm flags to indicate that the device doesn't require any sort of
PM activities and all of them can be completely skipped. We can use the
same flag to also avoid adding not used *power* sysfs entries for these
devices. For now, lets use this for cpu cache devices.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Eugeniu Rosca <erosca@de.adit-jv.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/cpu.c         |  1 +
 drivers/base/power/main.c  |  7 +++++++
 drivers/base/power/sysfs.c |  6 ++++++
 include/linux/device.h     | 10 ++++++++++
 include/linux/pm.h         |  1 +
 5 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index eb9443d5bae1..6ce93a52bf3f 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -427,6 +427,7 @@ __cpu_device_create(struct device *parent, void *drvdata,
 	dev->parent = parent;
 	dev->groups = groups;
 	dev->release = device_create_release;
+	device_set_pm_not_required(dev);
 	dev_set_drvdata(dev, drvdata);
 
 	retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 337a56ff11b7..893ae464bfd6 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -124,6 +124,10 @@ void device_pm_unlock(void)
  */
 void device_pm_add(struct device *dev)
 {
+	/* Skip PM setup/initialization. */
+	if (device_pm_not_required(dev))
+		return;
+
 	pr_debug("PM: Adding info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	device_pm_check_callbacks(dev);
@@ -142,6 +146,9 @@ void device_pm_add(struct device *dev)
  */
 void device_pm_remove(struct device *dev)
 {
+	if (device_pm_not_required(dev))
+		return;
+
 	pr_debug("PM: Removing info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	complete_all(&dev->power.completion);
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 96c8a227610a..c6bf76124184 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -653,6 +653,10 @@ int dpm_sysfs_add(struct device *dev)
 {
 	int rc;
 
+	/* No need to create PM sysfs if explicitly disabled. */
+	if (device_pm_not_required(dev))
+		return 0;
+
 	rc = sysfs_create_group(&dev->kobj, &pm_attr_group);
 	if (rc)
 		return rc;
@@ -732,6 +736,8 @@ void rpm_sysfs_remove(struct device *dev)
 
 void dpm_sysfs_remove(struct device *dev)
 {
+	if (device_pm_not_required(dev))
+		return;
 	sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group);
 	dev_pm_qos_constraints_destroy(dev);
 	rpm_sysfs_remove(dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6cb4640b6160..53028636fe39 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1165,6 +1165,16 @@ static inline bool device_async_suspend_enabled(struct device *dev)
 	return !!dev->power.async_suspend;
 }
 
+static inline bool device_pm_not_required(struct device *dev)
+{
+	return dev->power.no_pm;
+}
+
+static inline void device_set_pm_not_required(struct device *dev)
+{
+	dev->power.no_pm = true;
+}
+
 static inline void dev_pm_syscore_device(struct device *dev, bool val)
 {
 #ifdef CONFIG_PM_SLEEP
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3d2cbf947768..06f7ed893928 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -592,6 +592,7 @@ struct dev_pm_info {
 	bool			is_suspended:1;	/* Ditto */
 	bool			is_noirq_suspended:1;
 	bool			is_late_suspended:1;
+	bool			no_pm:1;
 	bool			early_init:1;	/* Owned by the PM core */
 	bool			direct_complete:1;	/* Owned by the PM core */
 	u32			driver_flags;
-- 
cgit v1.2.3


From be03564bd7b60b119aac60443b3602bac38d6405 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 18 Feb 2019 09:46:28 +0000
Subject: drm/i915: Include reminders about leaving no holes in uAPI enums

We don't want to pre-reserve any holes in our uAPI for that is a sign of
nefarious and hidden activity. Add a reminder about our uAPI
expectations to encourage good practice when adding new defines/enums.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190218094628.13522-1-chris@chris-wilson.co.uk
---
 include/uapi/drm/i915_drm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c890b7992d5c..8304a7f1ec3f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -99,6 +99,8 @@ enum drm_i915_gem_engine_class {
 	I915_ENGINE_CLASS_VIDEO		= 2,
 	I915_ENGINE_CLASS_VIDEO_ENHANCE	= 3,
 
+	/* should be kept compact */
+
 	I915_ENGINE_CLASS_INVALID	= -1
 };
 
@@ -319,6 +321,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+/* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -559,6 +562,8 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_MMAP_GTT_COHERENT	52
 
+/* Must be kept compact -- no holes and well documented */
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -574,6 +579,7 @@ typedef struct drm_i915_getparam {
 #define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY             2
 #define I915_SETPARAM_ALLOW_BATCHBUFFER                   3
 #define I915_SETPARAM_NUM_USED_FENCES                     4
+/* Must be kept compact -- no holes */
 
 typedef struct drm_i915_setparam {
 	int param;
@@ -1511,6 +1517,7 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+/* Must be kept compact -- no holes and well documented */
 	__u64 value;
 };
 
@@ -1734,6 +1741,7 @@ struct drm_i915_perf_oa_config {
 struct drm_i915_query_item {
 	__u64 query_id;
 #define DRM_I915_QUERY_TOPOLOGY_INFO    1
+/* Must be kept compact -- no holes and well documented */
 
 	/*
 	 * When set to zero by userspace, this is filled with the size of the
-- 
cgit v1.2.3


From e4246b05507fc6102008bac0aee848f207bd96de Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 18 Feb 2019 17:36:48 +0100
Subject: drivers/component: kerneldoc polish

Polish the kerneldoc a bit with suggestions from Randy.

v2: Randy found another typo: s/compent/component/

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Ramalingam C <ramalingam.c@intel.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/component.c  | 14 +++++++-------
 include/linux/component.h |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/base/component.c b/drivers/base/component.c
index 7dbc41cccd58..532a3a5d8f63 100644
--- a/drivers/base/component.c
+++ b/drivers/base/component.c
@@ -27,7 +27,7 @@
  * helper fills the niche of aggregate drivers for specific hardware, where
  * further standardization into a subsystem would not be practical. The common
  * example is when a logical device (e.g. a DRM display driver) is spread around
- * the SoC on various component (scanout engines, blending blocks, transcoders
+ * the SoC on various components (scanout engines, blending blocks, transcoders
  * for various outputs and so on).
  *
  * The component helper also doesn't solve runtime dependencies, e.g. for system
@@ -378,7 +378,7 @@ static void __component_match_add(struct device *master,
 }
 
 /**
- * component_match_add_release - add a component match with release callback
+ * component_match_add_release - add a component match entry with release callback
  * @master: device with the aggregate driver
  * @matchptr: pointer to the list of component matches
  * @release: release function for @compare_data
@@ -408,7 +408,7 @@ void component_match_add_release(struct device *master,
 EXPORT_SYMBOL(component_match_add_release);
 
 /**
- * component_match_add_typed - add a compent match for a typed component
+ * component_match_add_typed - add a component match entry for a typed component
  * @master: device with the aggregate driver
  * @matchptr: pointer to the list of component matches
  * @compare_typed: compare function to match against all typed components
@@ -537,11 +537,11 @@ static void component_unbind(struct component *component,
 }
 
 /**
- * component_unbind_all - unbind all component to an aggregate driver
+ * component_unbind_all - unbind all components of an aggregate driver
  * @master_dev: device with the aggregate driver
  * @data: opaque pointer, passed to all components
  *
- * Unbinds all components to the aggregate @dev by passing @data to their
+ * Unbinds all components of the aggregate @dev by passing @data to their
  * &component_ops.unbind functions. Should be called from
  * &component_master_ops.unbind.
  */
@@ -619,11 +619,11 @@ static int component_bind(struct component *component, struct master *master,
 }
 
 /**
- * component_bind_all - bind all component to an aggregate driver
+ * component_bind_all - bind all components of an aggregate driver
  * @master_dev: device with the aggregate driver
  * @data: opaque pointer, passed to all components
  *
- * Binds all components to the aggregate @dev by passing @data to their
+ * Binds all components of the aggregate @dev by passing @data to their
  * &component_ops.bind functions. Should be called from
  * &component_master_ops.bind.
  */
diff --git a/include/linux/component.h b/include/linux/component.h
index 30bcc7e590eb..16de18f473d7 100644
--- a/include/linux/component.h
+++ b/include/linux/component.h
@@ -98,7 +98,7 @@ void component_match_add_typed(struct device *master,
 	int (*compare_typed)(struct device *, int, void *), void *compare_data);
 
 /**
- * component_match_add - add a compent match
+ * component_match_add - add a component match entry
  * @master: device with the aggregate driver
  * @matchptr: pointer to the list of component matches
  * @compare: compare function to match against all components
-- 
cgit v1.2.3


From 8b29f7aa52330411ee0b8127b32ac17d50b16f76 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 Feb 2019 15:52:09 +0100
Subject: irqchip: davinci-aintc: add a new config structure

Add a config structure that will be used by aintc-based platforms.
It contains the register range resource, number of interrupts and
a list of priorities.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/irqchip/irq-davinci-aintc.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 include/linux/irqchip/irq-davinci-aintc.h

(limited to 'include')

diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h
new file mode 100644
index 000000000000..2b2ace3c1b22
--- /dev/null
+++ b/include/linux/irqchip/irq-davinci-aintc.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Texas Instruments
+ */
+
+#ifndef _LINUX_IRQ_DAVINCI_AINTC_
+#define _LINUX_IRQ_DAVINCI_AINTC_
+
+#include <linux/ioport.h>
+
+/**
+ * struct davinci_aintc_config - configuration data for davinci-aintc driver.
+ *
+ * @reg: register range to map
+ * @num_irqs: number of HW interrupts supported by the controller
+ * @prios: an array of size num_irqs containing priority settings for
+ *         each interrupt
+ */
+struct davinci_aintc_config {
+	struct resource reg;
+	unsigned int num_irqs;
+	u8 *prios;
+};
+
+#endif /* _LINUX_IRQ_DAVINCI_AINTC_ */
-- 
cgit v1.2.3


From 06a2871614295eb3c504821adc4dee15748890ac Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 Feb 2019 15:52:11 +0100
Subject: ARM: davinci: aintc: use the new config structure

Modify the aintc driver to take all its configuration from the new
config structure. Stop referencing davinci_soc_info in any way.
Move the declaration for davinci_aintc_init() to irq-davinci-aintc.h
and make it take the new config structure as parameter. Convert all
users to the new version.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/dm355.c               |  2 +-
 arch/arm/mach-davinci/dm365.c               |  2 +-
 arch/arm/mach-davinci/dm644x.c              |  2 +-
 arch/arm/mach-davinci/dm646x.c              |  2 +-
 arch/arm/mach-davinci/include/mach/common.h |  2 --
 arch/arm/mach-davinci/irq.c                 | 39 +++++++++++++++--------------
 include/linux/irqchip/irq-davinci-aintc.h   |  2 ++
 7 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index ff79c1a17fae..c7cd765114af 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -805,7 +805,7 @@ static const struct davinci_aintc_config dm355_aintc_config = {
 
 void __init dm355_init_irq(void)
 {
-	davinci_aintc_init();
+	davinci_aintc_init(&dm355_aintc_config);
 }
 
 static int __init dm355_init_devices(void)
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 44dc3ca94dd3..bde3c3b94cc9 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -1064,7 +1064,7 @@ static const struct davinci_aintc_config dm365_aintc_config = {
 
 void __init dm365_init_irq(void)
 {
-	davinci_aintc_init();
+	davinci_aintc_init(&dm365_aintc_config);
 }
 
 static int __init dm365_init_devices(void)
diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c
index 0b0ecac36486..6d3498058283 100644
--- a/arch/arm/mach-davinci/dm644x.c
+++ b/arch/arm/mach-davinci/dm644x.c
@@ -741,7 +741,7 @@ static const struct davinci_aintc_config dm644x_aintc_config = {
 
 void __init dm644x_init_irq(void)
 {
-	davinci_aintc_init();
+	davinci_aintc_init(&dm644x_aintc_config);
 }
 
 void __init dm644x_init_devices(void)
diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c
index 4e871d00e4e9..a0a8b336c1a4 100644
--- a/arch/arm/mach-davinci/dm646x.c
+++ b/arch/arm/mach-davinci/dm646x.c
@@ -702,7 +702,7 @@ static const struct davinci_aintc_config dm646x_aintc_config = {
 
 void __init dm646x_init_irq(void)
 {
-	davinci_aintc_init();
+	davinci_aintc_init(&dm646x_aintc_config);
 }
 
 static int __init dm646x_init_devices(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 8c9c011f96f6..14e0e1c40611 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -24,8 +24,6 @@
 
 void davinci_timer_init(struct clk *clk);
 
-extern void davinci_aintc_init(void);
-
 struct davinci_timer_instance {
 	u32		base;
 	u32		bottom_irq;
diff --git a/arch/arm/mach-davinci/irq.c b/arch/arm/mach-davinci/irq.c
index 509be44eda22..1b2eeddfabd1 100644
--- a/arch/arm/mach-davinci/irq.c
+++ b/arch/arm/mach-davinci/irq.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <linux/irqchip/irq-davinci-aintc.h>
 #include <linux/io.h>
 #include <linux/irqdomain.h>
 
@@ -82,13 +83,14 @@ davinci_aintc_handle_irq(struct pt_regs *regs)
 }
 
 /* ARM Interrupt Controller Initialization */
-void __init davinci_aintc_init(void)
+void __init davinci_aintc_init(const struct davinci_aintc_config *config)
 {
-	unsigned i, j;
-	const u8 *davinci_def_priorities = davinci_soc_info.intc_irq_prios;
+	unsigned int irq_off, reg_off, prio, shift;
 	int ret, irq_base;
+	const u8 *prios;
 
-	davinci_aintc_base = ioremap(davinci_soc_info.intc_base, SZ_4K);
+	davinci_aintc_base = ioremap(config->reg.start,
+				     resource_size(&config->reg));
 	if (WARN_ON(!davinci_aintc_base))
 		return;
 
@@ -114,23 +116,21 @@ void __init davinci_aintc_init(void)
 	davinci_aintc_writel(~0x0, DAVINCI_AINTC_IRQ_REG0);
 	davinci_aintc_writel(~0x0, DAVINCI_AINTC_IRQ_REG1);
 
-	for (i = DAVINCI_AINTC_IRQ_INTPRI0_REG;
-	     i <= DAVINCI_AINTC_IRQ_INTPRI7_REG; i += 4) {
-		u32		pri;
-
-		for (j = 0, pri = 0; j < 32; j += 4, davinci_def_priorities++)
-			pri |= (*davinci_def_priorities & 0x07) << j;
-		davinci_aintc_writel(pri, i);
+	prios = config->prios;
+	for (reg_off = DAVINCI_AINTC_IRQ_INTPRI0_REG;
+	     reg_off <= DAVINCI_AINTC_IRQ_INTPRI7_REG; reg_off += 4) {
+		for (shift = 0, prio = 0; shift < 32; shift += 4, prios++)
+			prio |= (*prios & 0x07) << shift;
+		davinci_aintc_writel(prio, reg_off);
 	}
 
-	irq_base = irq_alloc_descs(-1, 0, davinci_soc_info.intc_irq_num, 0);
+	irq_base = irq_alloc_descs(-1, 0, config->num_irqs, 0);
 	if (WARN_ON(irq_base < 0))
 		return;
 
 	davinci_aintc_irq_domain = irq_domain_add_legacy(NULL,
-					davinci_soc_info.intc_irq_num,
-					irq_base, 0, &irq_domain_simple_ops,
-					NULL);
+						config->num_irqs, irq_base, 0,
+						&irq_domain_simple_ops, NULL);
 	if (WARN_ON(!davinci_aintc_irq_domain))
 		return;
 
@@ -140,10 +140,11 @@ void __init davinci_aintc_init(void)
 	if (WARN_ON(ret))
 		return;
 
-	for (i = 0, j = 0; i < davinci_soc_info.intc_irq_num;
-	     i += 32, j += 0x04)
-		davinci_aintc_setup_gc(davinci_aintc_base + j,
-				       irq_base + i, 32);
+	for (irq_off = 0, reg_off = 0;
+	     irq_off < config->num_irqs;
+	     irq_off += 32, reg_off += 0x04)
+		davinci_aintc_setup_gc(davinci_aintc_base + reg_off,
+				       irq_base + irq_off, 32);
 
 	irq_set_handler(DAVINCI_INTC_IRQ(IRQ_TINT1_TINT34), handle_level_irq);
 	set_handle_irq(davinci_aintc_handle_irq);
diff --git a/include/linux/irqchip/irq-davinci-aintc.h b/include/linux/irqchip/irq-davinci-aintc.h
index 2b2ace3c1b22..ea4e087fac98 100644
--- a/include/linux/irqchip/irq-davinci-aintc.h
+++ b/include/linux/irqchip/irq-davinci-aintc.h
@@ -22,4 +22,6 @@ struct davinci_aintc_config {
 	u8 *prios;
 };
 
+void davinci_aintc_init(const struct davinci_aintc_config *config);
+
 #endif /* _LINUX_IRQ_DAVINCI_AINTC_ */
-- 
cgit v1.2.3


From 94af2c4d14d09c2c2d07b4ea2778668890241ea8 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 Feb 2019 15:52:19 +0100
Subject: irqchip: davinci-cp-intc: add a new config structure

Add a config structure that will be used by cp-intc-based platforms.
It contains the register range resource and the number of interrupts.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/irqchip/irq-davinci-cp-intc.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 include/linux/irqchip/irq-davinci-cp-intc.h

(limited to 'include')

diff --git a/include/linux/irqchip/irq-davinci-cp-intc.h b/include/linux/irqchip/irq-davinci-cp-intc.h
new file mode 100644
index 000000000000..2270a6167b98
--- /dev/null
+++ b/include/linux/irqchip/irq-davinci-cp-intc.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Texas Instruments
+ */
+
+#ifndef _LINUX_IRQ_DAVINCI_CP_INTC_
+#define _LINUX_IRQ_DAVINCI_CP_INTC_
+
+#include <linux/ioport.h>
+
+/**
+ * struct davinci_cp_intc_config - configuration data for davinci-cp-intc
+ *                                 driver.
+ *
+ * @reg: register range to map
+ * @num_irqs: number of HW interrupts supported by the controller
+ */
+struct davinci_cp_intc_config {
+	struct resource reg;
+	unsigned int num_irqs;
+};
+
+#endif /* _LINUX_IRQ_DAVINCI_CP_INTC_ */
-- 
cgit v1.2.3


From 6567954b8e8e7cbb74b1340038dcac7ecc9e2e1b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 Feb 2019 15:52:23 +0100
Subject: ARM: davinci: cp-intc: use the new-style config structure

Modify the cp-intc driver to take all its configuration from the new
config structure. Stop referencing davinci_soc_info in any way.
Move the declaration for davinci_cp_intc_init() to
irq-davinci-cp-intc.h and make it take the new config structure as
parameter. Convert all users to the new version.

Also: since the two da8xx SoCs default all irq priorities to 7, just
drop the priority configuration at all and hardcode the channels to 7.

It will simplify the driver code and make our lives easier when it
comes to device-tree support.

Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 arch/arm/mach-davinci/cp_intc.c             | 99 ++++++++++++++---------------
 arch/arm/mach-davinci/da830.c               |  2 +-
 arch/arm/mach-davinci/da850.c               |  2 +-
 arch/arm/mach-davinci/include/mach/common.h |  1 -
 include/linux/irqchip/irq-davinci-cp-intc.h |  2 +
 5 files changed, 50 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-davinci/cp_intc.c b/arch/arm/mach-davinci/cp_intc.c
index dcd43b067a6a..f56a4275083f 100644
--- a/arch/arm/mach-davinci/cp_intc.c
+++ b/arch/arm/mach-davinci/cp_intc.c
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/irq-davinci-cp-intc.h>
 #include <linux/irqdomain.h>
 #include <linux/io.h>
 #include <linux/of.h>
@@ -20,7 +21,6 @@
 #include <linux/of_irq.h>
 
 #include <asm/exception.h>
-#include <mach/common.h>
 
 #define DAVINCI_CP_INTC_CTRL			0x04
 #define DAVINCI_CP_INTC_HOST_CTRL		0x0c
@@ -158,22 +158,15 @@ static const struct irq_domain_ops davinci_cp_intc_irq_domain_ops = {
 	.xlate = irq_domain_xlate_onetwocell,
 };
 
-static int __init davinci_cp_intc_of_init(struct device_node *node,
-					  struct device_node *parent)
+static int __init
+davinci_cp_intc_do_init(const struct davinci_cp_intc_config *config,
+			struct device_node *node)
 {
-	u32 num_irq		= davinci_soc_info.intc_irq_num;
-	u8 *irq_prio		= davinci_soc_info.intc_irq_prios;
-	unsigned num_reg	= BITS_TO_LONGS(num_irq);
-	int i, irq_base;
-
-	if (node) {
-		davinci_cp_intc_base = of_iomap(node, 0);
-		if (of_property_read_u32(node, "ti,intc-size", &num_irq))
-			pr_warn("unable to get intc-size, default to %d\n",
-				num_irq);
-	} else {
-		davinci_cp_intc_base = ioremap(davinci_soc_info.intc_base, SZ_8K);
-	}
+	unsigned int num_regs = BITS_TO_LONGS(config->num_irqs);
+	int offset, irq_base;
+
+	davinci_cp_intc_base = ioremap(config->reg.start,
+				       resource_size(&config->reg));
 	if (WARN_ON(!davinci_cp_intc_base))
 		return -EINVAL;
 
@@ -183,51 +176,29 @@ static int __init davinci_cp_intc_of_init(struct device_node *node,
 	davinci_cp_intc_write(0, DAVINCI_CP_INTC_HOST_ENABLE(0));
 
 	/* Disable system interrupts */
-	for (i = 0; i < num_reg; i++)
-		davinci_cp_intc_write(~0, DAVINCI_CP_INTC_SYS_ENABLE_CLR(i));
+	for (offset = 0; offset < num_regs; offset++)
+		davinci_cp_intc_write(~0,
+			DAVINCI_CP_INTC_SYS_ENABLE_CLR(offset));
 
 	/* Set to normal mode, no nesting, no priority hold */
 	davinci_cp_intc_write(0, DAVINCI_CP_INTC_CTRL);
 	davinci_cp_intc_write(0, DAVINCI_CP_INTC_HOST_CTRL);
 
 	/* Clear system interrupt status */
-	for (i = 0; i < num_reg; i++)
-		davinci_cp_intc_write(~0, DAVINCI_CP_INTC_SYS_STAT_CLR(i));
+	for (offset = 0; offset < num_regs; offset++)
+		davinci_cp_intc_write(~0,
+			DAVINCI_CP_INTC_SYS_STAT_CLR(offset));
 
 	/* Enable nIRQ (what about nFIQ?) */
 	davinci_cp_intc_write(1, DAVINCI_CP_INTC_HOST_ENABLE_IDX_SET);
 
-	/*
-	 * Priority is determined by host channel: lower channel number has
-	 * higher priority i.e. channel 0 has highest priority and channel 31
-	 * had the lowest priority.
-	 */
-	num_reg = (num_irq + 3) >> 2;	/* 4 channels per register */
-	if (irq_prio) {
-		unsigned j, k;
-		u32 val;
-
-		for (k = i = 0; i < num_reg; i++) {
-			for (val = j = 0; j < 4; j++, k++) {
-				val >>= 8;
-				if (k < num_irq)
-					val |= irq_prio[k] << 24;
-			}
-
-			davinci_cp_intc_write(val, DAVINCI_CP_INTC_CHAN_MAP(i));
-		}
-	} else	{
-		/*
-		 * Default everything to channel 15 if priority not specified.
-		 * Note that channel 0-1 are mapped to nFIQ and channels 2-31
-		 * are mapped to nIRQ.
-		 */
-		for (i = 0; i < num_reg; i++)
-			davinci_cp_intc_write(0x0f0f0f0f,
-					      DAVINCI_CP_INTC_CHAN_MAP(i));
-	}
+	/* Default all priorities to channel 7. */
+	num_regs = (config->num_irqs + 3) >> 2;	/* 4 channels per register */
+	for (offset = 0; offset < num_regs; offset++)
+		davinci_cp_intc_write(0x07070707,
+			DAVINCI_CP_INTC_CHAN_MAP(offset));
 
-	irq_base = irq_alloc_descs(-1, 0, num_irq, 0);
+	irq_base = irq_alloc_descs(-1, 0, config->num_irqs, 0);
 	if (irq_base < 0) {
 		pr_warn("Couldn't allocate IRQ numbers\n");
 		irq_base = 0;
@@ -235,7 +206,7 @@ static int __init davinci_cp_intc_of_init(struct device_node *node,
 
 	/* create a legacy host */
 	davinci_cp_intc_irq_domain = irq_domain_add_legacy(
-					node, num_irq, irq_base, 0,
+					node, config->num_irqs, irq_base, 0,
 					&davinci_cp_intc_irq_domain_ops, NULL);
 
 	if (!davinci_cp_intc_irq_domain) {
@@ -251,9 +222,31 @@ static int __init davinci_cp_intc_of_init(struct device_node *node,
 	return 0;
 }
 
-void __init davinci_cp_intc_init(void)
+int __init davinci_cp_intc_init(const struct davinci_cp_intc_config *config)
 {
-	davinci_cp_intc_of_init(NULL, NULL);
+	return davinci_cp_intc_do_init(config, NULL);
 }
 
+static int __init davinci_cp_intc_of_init(struct device_node *node,
+					  struct device_node *parent)
+{
+	struct davinci_cp_intc_config config = { };
+	int ret;
+
+	ret = of_address_to_resource(node, 0, &config.reg);
+	if (ret) {
+		pr_err("%s: unable to get the register range from device-tree\n",
+		       __func__);
+		return ret;
+	}
+
+	ret = of_property_read_u32(node, "ti,intc-size", &config.num_irqs);
+	if (ret) {
+		pr_err("%s: unable to read the 'ti,intc-size' property\n",
+		       __func__);
+		return ret;
+	}
+
+	return davinci_cp_intc_do_init(&config, node);
+}
 IRQCHIP_DECLARE(cp_intc, "ti,cp-intc", davinci_cp_intc_of_init);
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c
index 0eb48ed2d423..7ce0b5f1200d 100644
--- a/arch/arm/mach-davinci/da830.c
+++ b/arch/arm/mach-davinci/da830.c
@@ -833,7 +833,7 @@ static const struct davinci_cp_intc_config da830_cp_intc_config = {
 
 void __init da830_init_irq(void)
 {
-	davinci_cp_intc_init();
+	davinci_cp_intc_init(&da830_cp_intc_config);
 }
 
 void __init da830_init_time(void)
diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c
index fe274ab63fc8..62a00fa94696 100644
--- a/arch/arm/mach-davinci/da850.c
+++ b/arch/arm/mach-davinci/da850.c
@@ -771,7 +771,7 @@ static const struct davinci_cp_intc_config da850_cp_intc_config = {
 
 void __init da850_init_irq(void)
 {
-	davinci_cp_intc_init();
+	davinci_cp_intc_init(&da850_cp_intc_config);
 }
 
 void __init da850_init_time(void)
diff --git a/arch/arm/mach-davinci/include/mach/common.h b/arch/arm/mach-davinci/include/mach/common.h
index 7ad79171b4b5..14e0e1c40611 100644
--- a/arch/arm/mach-davinci/include/mach/common.h
+++ b/arch/arm/mach-davinci/include/mach/common.h
@@ -22,7 +22,6 @@
 #define DAVINCI_INTC_START		NR_IRQS
 #define DAVINCI_INTC_IRQ(_irqnum)	(DAVINCI_INTC_START + (_irqnum))
 
-void davinci_cp_intc_init(void);
 void davinci_timer_init(struct clk *clk);
 
 struct davinci_timer_instance {
diff --git a/include/linux/irqchip/irq-davinci-cp-intc.h b/include/linux/irqchip/irq-davinci-cp-intc.h
index 2270a6167b98..8d71ed5b5a61 100644
--- a/include/linux/irqchip/irq-davinci-cp-intc.h
+++ b/include/linux/irqchip/irq-davinci-cp-intc.h
@@ -20,4 +20,6 @@ struct davinci_cp_intc_config {
 	unsigned int num_irqs;
 };
 
+int davinci_cp_intc_init(const struct davinci_cp_intc_config *config);
+
 #endif /* _LINUX_IRQ_DAVINCI_CP_INTC_ */
-- 
cgit v1.2.3


From 517b773e0f612d608cbc62a08c55601bd56f73f6 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:49 +0200
Subject: RDMA/nldev: Share with user-space object IDs

Give to the user space tools unique identifier for PD, MR, CQ and CM_ID
objects, so they can be able to query on them with .doit callbacks.

QP .doit is not supported yet, till all drivers will be updated to provide
their LQPN to be equal to their restrack ID.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/nldev.c  | 20 ++++++++++++++++++++
 include/uapi/rdma/rdma_netlink.h |  9 +++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 9b4f891771c4..81d7ee3dcb20 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -108,6 +108,10 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
 	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
+	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -466,6 +470,9 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
 		    &cm_id->route.addr.dst_addr))
 		goto err;
 
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -494,6 +501,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
 		goto err;
 
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -522,6 +532,9 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			      RDMA_NLDEV_ATTR_PAD))
 		goto err;
 
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -552,6 +565,9 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
 			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
 		goto err;
 
+	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -893,6 +909,7 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
 		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
+		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
 	},
 	[RDMA_RESTRACK_CQ] = {
 		.fill_res_func = fill_res_cq_entry,
@@ -900,6 +917,7 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
 		.flags = NLDEV_PER_DEV,
 		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
+		.id = RDMA_NLDEV_ATTR_RES_CQN,
 	},
 	[RDMA_RESTRACK_MR] = {
 		.fill_res_func = fill_res_mr_entry,
@@ -907,6 +925,7 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
 		.flags = NLDEV_PER_DEV,
 		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
+		.id = RDMA_NLDEV_ATTR_RES_MRN,
 	},
 	[RDMA_RESTRACK_PD] = {
 		.fill_res_func = fill_res_pd_entry,
@@ -914,6 +933,7 @@ static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
 		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
 		.flags = NLDEV_PER_DEV,
 		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
+		.id = RDMA_NLDEV_ATTR_RES_PDN,
 	},
 };
 
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 3a9e681e4257..43362132e0d7 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -456,6 +456,15 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_DRIVER_S64,		/* s64 */
 	RDMA_NLDEV_ATTR_DRIVER_U64,		/* u64 */
 
+	/*
+	 * Indexes to get/set secific entry,
+	 * for QP use RDMA_NLDEV_ATTR_RES_LQPN
+	 */
+	RDMA_NLDEV_ATTR_RES_PDN,               /* u32 */
+	RDMA_NLDEV_ATTR_RES_CQN,               /* u32 */
+	RDMA_NLDEV_ATTR_RES_MRN,               /* u32 */
+	RDMA_NLDEV_ATTR_RES_CM_IDN,            /* u32 */
+
 	/*
 	 * Always the end
 	 */
-- 
cgit v1.2.3


From c3d02788b45ab4a2d8f243b98c04b549c8193af6 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 18 Feb 2019 22:25:50 +0200
Subject: RDMA/nldev: Provide parent IDs for PD, MR and QP objects

PD, MR and QP objects have parents objects: contexts and PDs.  The exposed
parent IDs allow to correlate various objects and simplify debug
investigation.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/nldev.c  | 18 ++++++++++++++++++
 include/uapi/rdma/rdma_netlink.h |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 81d7ee3dcb20..e6c7cc510556 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -112,6 +112,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_RES_CQN]               = { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -420,6 +421,10 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
 		goto err;
 
+	if (!rdma_is_kernel_res(res) &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -503,6 +508,10 @@ static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
 
 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
 		goto err;
+	if (!rdma_is_kernel_res(res) &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
+			cq->uobject->context->res.id))
+		goto err;
 
 	if (fill_res_name_pid(msg, res))
 		goto err;
@@ -535,6 +544,10 @@ static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
 		goto err;
 
+	if (!rdma_is_kernel_res(res) &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
@@ -568,6 +581,11 @@ static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
 		goto err;
 
+	if (!rdma_is_kernel_res(res) &&
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
+			pd->uobject->context->res.id))
+		goto err;
+
 	if (fill_res_name_pid(msg, res))
 		goto err;
 
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 43362132e0d7..4ebbcfb2c6ef 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -464,6 +464,7 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_RES_CQN,               /* u32 */
 	RDMA_NLDEV_ATTR_RES_MRN,               /* u32 */
 	RDMA_NLDEV_ATTR_RES_CM_IDN,            /* u32 */
+	RDMA_NLDEV_ATTR_RES_CTXN,	       /* u32 */
 
 	/*
 	 * Always the end
-- 
cgit v1.2.3


From ea1075edcbab7d92f4e4ccf5490043f796bf78be Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:47 -0700
Subject: RDMA: Add and use rdma_for_each_port

We have many loops iterating over all of the end port numbers on a struct
ib_device, simplify them with a for_each helper.

Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 .clang-format                             |  1 +
 drivers/infiniband/core/cache.c           |  6 +++---
 drivers/infiniband/core/cma.c             |  7 +++----
 drivers/infiniband/core/device.c          | 26 ++++++++++++--------------
 drivers/infiniband/core/mad.c             |  4 ++--
 drivers/infiniband/core/nldev.c           |  4 ++--
 drivers/infiniband/core/security.c        | 11 +++++++----
 drivers/infiniband/core/sysfs.c           | 12 +++---------
 drivers/infiniband/core/user_mad.c        |  9 +++++----
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  4 ++--
 drivers/infiniband/ulp/srp/ib_srp.c       |  5 +++--
 include/rdma/ib_verbs.h                   | 10 ++++++++++
 12 files changed, 53 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/.clang-format b/.clang-format
index 335ce29ab813..201a4f531b90 100644
--- a/.clang-format
+++ b/.clang-format
@@ -361,6 +361,7 @@ ForEachMacros:
   - 'radix_tree_for_each_slot'
   - 'radix_tree_for_each_tagged'
   - 'rbtree_postorder_for_each_entry_safe'
+  - 'rdma_for_each_port'
   - 'resource_list_for_each_entry'
   - 'resource_list_for_each_entry_safe'
   - 'rhl_for_each_entry_rcu'
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 2338d0b3a0ca..3d137d8381a9 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1428,7 +1428,7 @@ static void ib_cache_event(struct ib_event_handler *handler,
 
 int ib_cache_setup_one(struct ib_device *device)
 {
-	int p;
+	unsigned int p;
 	int err;
 
 	rwlock_init(&device->cache.lock);
@@ -1447,8 +1447,8 @@ int ib_cache_setup_one(struct ib_device *device)
 		return err;
 	}
 
-	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
-		ib_cache_update(device, p + rdma_start_port(device), true);
+	rdma_for_each_port (device, p)
+		ib_cache_update(device, p, true);
 
 	INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
 			      device, ib_cache_event);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index c43512752b8a..68c997be2429 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -659,7 +659,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
 	struct cma_device *cma_dev;
 	enum ib_gid_type gid_type;
 	int ret = -ENODEV;
-	u8 port;
+	unsigned int port;
 
 	if (dev_addr->dev_type != ARPHRD_INFINIBAND &&
 	    id_priv->id.ps == RDMA_PS_IPOIB)
@@ -673,8 +673,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
 
 	mutex_lock(&lock);
 	list_for_each_entry(cma_dev, &dev_list, list) {
-		for (port = rdma_start_port(cma_dev->device);
-		     port <= rdma_end_port(cma_dev->device); port++) {
+		rdma_for_each_port (cma_dev->device, port) {
 			gidp = rdma_protocol_roce(cma_dev->device, port) ?
 			       &iboe_gid : &gid;
 			gid_type = cma_dev->default_gid_type[port - 1];
@@ -4548,7 +4547,7 @@ static void cma_add_one(struct ib_device *device)
 	if (!cma_dev->default_roce_tos)
 		goto free_gid_type;
 
-	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+	rdma_for_each_port (device, i) {
 		supported_gids = roce_gid_type_mask_support(device, i);
 		WARN_ON(!supported_gids);
 		if (supported_gids & (1 << CMA_PREFERRED_ROCE_GID_TYPE))
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f7e206033d39..71582f848a9c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -470,10 +470,8 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
 
 static int read_port_immutable(struct ib_device *device)
 {
+	unsigned int port;
 	int ret;
-	u8 start_port = rdma_start_port(device);
-	u8 end_port = rdma_end_port(device);
-	u8 port;
 
 	/**
 	 * device->port_immutable is indexed directly by the port number to make
@@ -482,13 +480,13 @@ static int read_port_immutable(struct ib_device *device)
 	 * Therefore port_immutable is declared as a 1 based array with
 	 * potential empty slots at the beginning.
 	 */
-	device->port_immutable = kcalloc(end_port + 1,
-					 sizeof(*device->port_immutable),
-					 GFP_KERNEL);
+	device->port_immutable =
+		kcalloc(rdma_end_port(device) + 1,
+			sizeof(*device->port_immutable), GFP_KERNEL);
 	if (!device->port_immutable)
 		return -ENOMEM;
 
-	for (port = start_port; port <= end_port; ++port) {
+	rdma_for_each_port (device, port) {
 		ret = device->ops.get_port_immutable(
 			device, port, &device->port_immutable[port]);
 		if (ret)
@@ -540,9 +538,9 @@ static void ib_policy_change_task(struct work_struct *work)
 
 	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
-		int i;
+		unsigned int i;
 
-		for (i = rdma_start_port(dev); i <= rdma_end_port(dev); i++) {
+		rdma_for_each_port (dev, i) {
 			u64 sp;
 			int ret = ib_get_cached_subnet_prefix(dev,
 							      i,
@@ -1060,10 +1058,9 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
 			 roce_netdev_callback cb,
 			 void *cookie)
 {
-	u8 port;
+	unsigned int port;
 
-	for (port = rdma_start_port(ib_dev); port <= rdma_end_port(ib_dev);
-	     port++)
+	rdma_for_each_port (ib_dev, port)
 		if (rdma_protocol_roce(ib_dev, port)) {
 			struct net_device *idev = NULL;
 
@@ -1217,9 +1214,10 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
 		u8 *port_num, u16 *index)
 {
 	union ib_gid tmp_gid;
-	int ret, port, i;
+	unsigned int port;
+	int ret, i;
 
-	for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
+	rdma_for_each_port (device, port) {
 		if (!rdma_protocol_ib(device, port))
 			continue;
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 7870823bac47..e742a6a2c138 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -3326,9 +3326,9 @@ error:
 
 static void ib_mad_remove_device(struct ib_device *device, void *client_data)
 {
-	int i;
+	unsigned int i;
 
-	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+	rdma_for_each_port (device, i) {
 		if (!rdma_cap_ib_mad(device, i))
 			continue;
 
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index e7350d9d60e9..85f6f2bcce40 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -774,7 +774,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 	u32 idx = 0;
 	u32 ifindex;
 	int err;
-	u32 p;
+	unsigned int p;
 
 	err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
 			  nldev_policy, NULL);
@@ -786,7 +786,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 	if (!device)
 		return -EINVAL;
 
-	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+	rdma_for_each_port (device, p) {
 		/*
 		 * The dumpit function returns all information from specific
 		 * index. This specific index is taken from the netlink
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index dad6a94a43f3..492702b83600 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -422,12 +422,15 @@ void ib_close_shared_qp_security(struct ib_qp_security *sec)
 
 int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
 {
-	u8 i = rdma_start_port(dev);
+	unsigned int i;
 	bool is_ib = false;
 	int ret;
 
-	while (i <= rdma_end_port(dev) && !is_ib)
+	rdma_for_each_port (dev, i) {
 		is_ib = rdma_protocol_ib(dev, i++);
+		if (is_ib)
+			break;
+	}
 
 	/* If this isn't an IB device don't create the security context */
 	if (!is_ib)
@@ -561,9 +564,9 @@ void ib_security_cache_change(struct ib_device *device,
 void ib_security_release_port_pkey_list(struct ib_device *device)
 {
 	struct pkey_index_qp_list *pkey, *tmp_pkey;
-	int i;
+	unsigned int i;
 
-	for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+	rdma_for_each_port (device, i) {
 		list_for_each_entry_safe(pkey,
 					 tmp_pkey,
 					 &device->port_pkey_list[i].pkey_list,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 9335b15c2e38..9b6a065bdfa5 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1308,23 +1308,17 @@ static void ib_free_port_attrs(struct ib_device *device)
 
 static int ib_setup_port_attrs(struct ib_device *device)
 {
+	unsigned int port;
 	int ret;
-	int i;
 
 	device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj);
 	if (!device->ports_kobj)
 		return -ENOMEM;
 
-	if (rdma_cap_ib_switch(device)) {
-		ret = add_port(device, 0);
+	rdma_for_each_port (device, port) {
+		ret = add_port(device, port);
 		if (ret)
 			goto err_put;
-	} else {
-		for (i = 1; i <= device->phys_port_cnt; ++i) {
-			ret = add_port(device, i);
-			if (ret)
-				goto err_put;
-		}
 	}
 
 	return 0;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 3ebd211a87ed..02b7947ab215 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1323,14 +1323,15 @@ free:
 static void ib_umad_remove_one(struct ib_device *device, void *client_data)
 {
 	struct ib_umad_device *umad_dev = client_data;
-	int i;
+	unsigned int i;
 
 	if (!umad_dev)
 		return;
 
-	for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) {
-		if (rdma_cap_ib_mad(device, i + rdma_start_port(device)))
-			ib_umad_kill_port(&umad_dev->ports[i]);
+	rdma_for_each_port (device, i) {
+		if (rdma_cap_ib_mad(device, i))
+			ib_umad_kill_port(
+				&umad_dev->ports[i - rdma_start_port(device)]);
 	}
 	/* balances kref_init() */
 	ib_umad_dev_put(umad_dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ee4cca80f00b..48eda16db1a7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2495,7 +2495,7 @@ static void ipoib_add_one(struct ib_device *device)
 	struct list_head *dev_list;
 	struct net_device *dev;
 	struct ipoib_dev_priv *priv;
-	int p;
+	unsigned int p;
 	int count = 0;
 
 	dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
@@ -2504,7 +2504,7 @@ static void ipoib_add_one(struct ib_device *device)
 
 	INIT_LIST_HEAD(dev_list);
 
-	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+	rdma_for_each_port (device, p) {
 		if (!rdma_protocol_ib(device, p))
 			continue;
 		dev = ipoib_add_port("ib%d", device, p);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 84184910f038..151f4eba84b8 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -4127,7 +4127,8 @@ static void srp_add_one(struct ib_device *device)
 	struct srp_device *srp_dev;
 	struct ib_device_attr *attr = &device->attrs;
 	struct srp_host *host;
-	int mr_page_shift, p;
+	int mr_page_shift;
+	unsigned int p;
 	u64 max_pages_per_mr;
 	unsigned int flags = 0;
 
@@ -4194,7 +4195,7 @@ static void srp_add_one(struct ib_device *device)
 		WARN_ON_ONCE(srp_dev->global_rkey == 0);
 	}
 
-	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
+	rdma_for_each_port (device, p) {
 		host = srp_add_port(srp_dev, p);
 		if (host)
 			list_add_tail(&host->list, &srp_dev->dev_list);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 2a17c2b30073..fa0edd6ae33c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2827,6 +2827,16 @@ static inline u8 rdma_start_port(const struct ib_device *device)
 	return rdma_cap_ib_switch(device) ? 0 : 1;
 }
 
+/**
+ * rdma_for_each_port - Iterate over all valid port numbers of the IB device
+ * @device - The struct ib_device * to iterate over
+ * @iter - The unsigned int to store the port number
+ */
+#define rdma_for_each_port(device, iter)                                       \
+	for (iter = rdma_start_port(device + BUILD_BUG_ON_ZERO(!__same_type(   \
+						     unsigned int, iter)));    \
+	     iter <= rdma_end_port(device); (iter)++)
+
 /**
  * rdma_end_port - Return the last valid port number for the device
  * specified
-- 
cgit v1.2.3


From 8ceb1357b33790193e9d55d2d09bcfd6bd59dd6d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:48 -0700
Subject: RDMA/device: Consolidate ib_device per_port data into one place

There is no reason to have three allocations of per-port data. Combine
them together and make the lifetime for all the per-port data match the
struct ib_device.

Following patches will require more port-specific data, now there is a
good place to put it.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c    |  4 +--
 drivers/infiniband/core/device.c   | 70 ++++++++++++------------------------
 drivers/infiniband/core/security.c | 24 ++++++-------
 include/rdma/ib_verbs.h            | 74 ++++++++++++++++++++++----------------
 4 files changed, 78 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 3d137d8381a9..9d0e8aca741a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -881,8 +881,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev)
 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 		u8 rdma_port = port + rdma_start_port(ib_dev);
 
-		table =	alloc_gid_table(
-				ib_dev->port_immutable[rdma_port].gid_tbl_len);
+		table = alloc_gid_table(
+			ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
 		if (!table)
 			goto rollback_table_setup;
 
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 71582f848a9c..8d7d63a60ef5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -293,8 +293,7 @@ static void ib_device_release(struct device *device)
 	WARN_ON(refcount_read(&dev->refcount));
 	ib_cache_release_one(dev);
 	ib_security_release_port_pkey_list(dev);
-	kfree(dev->port_pkey_list);
-	kfree(dev->port_immutable);
+	kfree(dev->port_data);
 	xa_destroy(&dev->client_data);
 	kfree(dev);
 }
@@ -468,27 +467,31 @@ static int verify_immutable(const struct ib_device *dev, u8 port)
 			    rdma_max_mad_size(dev, port) != 0);
 }
 
-static int read_port_immutable(struct ib_device *device)
+static int setup_port_data(struct ib_device *device)
 {
 	unsigned int port;
 	int ret;
 
-	/**
-	 * device->port_immutable is indexed directly by the port number to make
+	/*
+	 * device->port_data is indexed directly by the port number to make
 	 * access to this data as efficient as possible.
 	 *
-	 * Therefore port_immutable is declared as a 1 based array with
-	 * potential empty slots at the beginning.
+	 * Therefore port_data is declared as a 1 based array with potential
+	 * empty slots at the beginning.
 	 */
-	device->port_immutable =
-		kcalloc(rdma_end_port(device) + 1,
-			sizeof(*device->port_immutable), GFP_KERNEL);
-	if (!device->port_immutable)
+	device->port_data = kcalloc(rdma_end_port(device) + 1,
+				    sizeof(*device->port_data), GFP_KERNEL);
+	if (!device->port_data)
 		return -ENOMEM;
 
 	rdma_for_each_port (device, port) {
-		ret = device->ops.get_port_immutable(
-			device, port, &device->port_immutable[port]);
+		struct ib_port_data *pdata = &device->port_data[port];
+
+		spin_lock_init(&pdata->pkey_list_lock);
+		INIT_LIST_HEAD(&pdata->pkey_list);
+
+		ret = device->ops.get_port_immutable(device, port,
+						     &pdata->immutable);
 		if (ret)
 			return ret;
 
@@ -507,30 +510,6 @@ void ib_get_device_fw_str(struct ib_device *dev, char *str)
 }
 EXPORT_SYMBOL(ib_get_device_fw_str);
 
-static int setup_port_pkey_list(struct ib_device *device)
-{
-	int i;
-
-	/**
-	 * device->port_pkey_list is indexed directly by the port number,
-	 * Therefore it is declared as a 1 based array with potential empty
-	 * slots at the beginning.
-	 */
-	device->port_pkey_list = kcalloc(rdma_end_port(device) + 1,
-					 sizeof(*device->port_pkey_list),
-					 GFP_KERNEL);
-
-	if (!device->port_pkey_list)
-		return -ENOMEM;
-
-	for (i = 0; i < (rdma_end_port(device) + 1); i++) {
-		spin_lock_init(&device->port_pkey_list[i].list_lock);
-		INIT_LIST_HEAD(&device->port_pkey_list[i].pkey_list);
-	}
-
-	return 0;
-}
-
 static void ib_policy_change_task(struct work_struct *work)
 {
 	struct ib_device *dev;
@@ -668,10 +647,9 @@ static int setup_device(struct ib_device *device)
 	if (ret)
 		return ret;
 
-	ret = read_port_immutable(device);
+	ret = setup_port_data(device);
 	if (ret) {
-		dev_warn(&device->dev,
-			 "Couldn't create per port immutable data\n");
+		dev_warn(&device->dev, "Couldn't create per-port data\n");
 		return ret;
 	}
 
@@ -683,12 +661,6 @@ static int setup_device(struct ib_device *device)
 		return ret;
 	}
 
-	ret = setup_port_pkey_list(device);
-	if (ret) {
-		dev_warn(&device->dev, "Couldn't create per port_pkey_list\n");
-		return ret;
-	}
-
 	return 0;
 }
 
@@ -1221,7 +1193,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
 		if (!rdma_protocol_ib(device, port))
 			continue;
 
-		for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
+		for (i = 0; i < device->port_data[port].immutable.gid_tbl_len;
+		     ++i) {
 			ret = rdma_query_gid(device, port, i, &tmp_gid);
 			if (ret)
 				return ret;
@@ -1253,7 +1226,8 @@ int ib_find_pkey(struct ib_device *device,
 	u16 tmp_pkey;
 	int partial_ix = -1;
 
-	for (i = 0; i < device->port_immutable[port_num].pkey_tbl_len; ++i) {
+	for (i = 0; i < device->port_data[port_num].immutable.pkey_tbl_len;
+	     ++i) {
 		ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
 		if (ret)
 			return ret;
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 492702b83600..1ab423b19f77 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -49,16 +49,15 @@ static struct pkey_index_qp_list *get_pkey_idx_qp_list(struct ib_port_pkey *pp)
 	struct pkey_index_qp_list *tmp_pkey;
 	struct ib_device *dev = pp->sec->dev;
 
-	spin_lock(&dev->port_pkey_list[pp->port_num].list_lock);
-	list_for_each_entry(tmp_pkey,
-			    &dev->port_pkey_list[pp->port_num].pkey_list,
-			    pkey_index_list) {
+	spin_lock(&dev->port_data[pp->port_num].pkey_list_lock);
+	list_for_each_entry (tmp_pkey, &dev->port_data[pp->port_num].pkey_list,
+			     pkey_index_list) {
 		if (tmp_pkey->pkey_index == pp->pkey_index) {
 			pkey = tmp_pkey;
 			break;
 		}
 	}
-	spin_unlock(&dev->port_pkey_list[pp->port_num].list_lock);
+	spin_unlock(&dev->port_data[pp->port_num].pkey_list_lock);
 	return pkey;
 }
 
@@ -263,12 +262,12 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
 		if (!pkey)
 			return -ENOMEM;
 
-		spin_lock(&dev->port_pkey_list[port_num].list_lock);
+		spin_lock(&dev->port_data[port_num].pkey_list_lock);
 		/* Check for the PKey again.  A racing process may
 		 * have created it.
 		 */
 		list_for_each_entry(tmp_pkey,
-				    &dev->port_pkey_list[port_num].pkey_list,
+				    &dev->port_data[port_num].pkey_list,
 				    pkey_index_list) {
 			if (tmp_pkey->pkey_index == pp->pkey_index) {
 				kfree(pkey);
@@ -283,9 +282,9 @@ static int port_pkey_list_insert(struct ib_port_pkey *pp)
 			spin_lock_init(&pkey->qp_list_lock);
 			INIT_LIST_HEAD(&pkey->qp_list);
 			list_add(&pkey->pkey_index_list,
-				 &dev->port_pkey_list[port_num].pkey_list);
+				 &dev->port_data[port_num].pkey_list);
 		}
-		spin_unlock(&dev->port_pkey_list[port_num].list_lock);
+		spin_unlock(&dev->port_data[port_num].pkey_list_lock);
 	}
 
 	spin_lock(&pkey->qp_list_lock);
@@ -551,9 +550,8 @@ void ib_security_cache_change(struct ib_device *device,
 {
 	struct pkey_index_qp_list *pkey;
 
-	list_for_each_entry(pkey,
-			    &device->port_pkey_list[port_num].pkey_list,
-			    pkey_index_list) {
+	list_for_each_entry (pkey, &device->port_data[port_num].pkey_list,
+			     pkey_index_list) {
 		check_pkey_qps(pkey,
 			       device,
 			       port_num,
@@ -569,7 +567,7 @@ void ib_security_release_port_pkey_list(struct ib_device *device)
 	rdma_for_each_port (device, i) {
 		list_for_each_entry_safe(pkey,
 					 tmp_pkey,
-					 &device->port_pkey_list[i].pkey_list,
+					 &device->port_data[i].pkey_list,
 					 pkey_index_list) {
 			list_del(&pkey->pkey_index_list);
 			kfree(pkey);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index fa0edd6ae33c..b42e257814f7 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2198,6 +2198,13 @@ struct ib_port_immutable {
 	u32                           max_mad_size;
 };
 
+struct ib_port_data {
+	struct ib_port_immutable immutable;
+
+	spinlock_t pkey_list_lock;
+	struct list_head pkey_list;
+};
+
 /* rdma netdev type - specifies protocol type */
 enum rdma_netdev_t {
 	RDMA_NETDEV_OPA_VNIC,
@@ -2243,12 +2250,6 @@ struct rdma_netdev_alloc_params {
 				      struct net_device *netdev, void *param);
 };
 
-struct ib_port_pkey_list {
-	/* Lock to hold while modifying the list. */
-	spinlock_t                    list_lock;
-	struct list_head              pkey_list;
-};
-
 struct ib_counters {
 	struct ib_device	*device;
 	struct ib_uobject	*uobject;
@@ -2549,14 +2550,12 @@ struct ib_device {
 
 	struct ib_cache               cache;
 	/**
-	 * port_immutable is indexed by port number
+	 * port_data is indexed by port number
 	 */
-	struct ib_port_immutable     *port_immutable;
+	struct ib_port_data *port_data;
 
 	int			      num_comp_vectors;
 
-	struct ib_port_pkey_list     *port_pkey_list;
-
 	struct iw_cm_verbs	     *iwcm;
 
 	struct module               *owner;
@@ -2860,34 +2859,38 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
 static inline bool rdma_is_grh_required(const struct ib_device *device,
 					u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags &
-		RDMA_CORE_PORT_IB_GRH_REQUIRED;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_PORT_IB_GRH_REQUIRED;
 }
 
 static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IB;
 }
 
 static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags &
-		(RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
 }
 
 static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
 }
 
 static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_ROCE;
 }
 
 static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IWARP;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_IWARP;
 }
 
 static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
@@ -2898,12 +2901,14 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
 
 static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_RAW_PACKET;
 }
 
 static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_PROT_USNIC;
 }
 
 /**
@@ -2920,7 +2925,8 @@ static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_n
  */
 static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_MAD;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_MAD;
 }
 
 /**
@@ -2944,8 +2950,8 @@ static inline bool rdma_cap_ib_mad(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
 {
-	return (device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_OPA_MAD)
-		== RDMA_CORE_CAP_OPA_MAD;
+	return (device->port_data[port_num].immutable.core_cap_flags &
+		RDMA_CORE_CAP_OPA_MAD) == RDMA_CORE_CAP_OPA_MAD;
 }
 
 /**
@@ -2970,7 +2976,8 @@ static inline bool rdma_cap_opa_mad(struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SMI;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SMI;
 }
 
 /**
@@ -2990,7 +2997,8 @@ static inline bool rdma_cap_ib_smi(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_CM;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_CM;
 }
 
 /**
@@ -3007,7 +3015,8 @@ static inline bool rdma_cap_ib_cm(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IW_CM;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IW_CM;
 }
 
 /**
@@ -3027,7 +3036,8 @@ static inline bool rdma_cap_iw_cm(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_ib_sa(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_IB_SA;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_IB_SA;
 }
 
 /**
@@ -3067,7 +3077,8 @@ static inline bool rdma_cap_ib_mcast(const struct ib_device *device, u8 port_num
  */
 static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_AF_IB;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_AF_IB;
 }
 
 /**
@@ -3088,7 +3099,8 @@ static inline bool rdma_cap_af_ib(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_ETH_AH;
+	return device->port_data[port_num].immutable.core_cap_flags &
+	       RDMA_CORE_CAP_ETH_AH;
 }
 
 /**
@@ -3102,7 +3114,7 @@ static inline bool rdma_cap_eth_ah(const struct ib_device *device, u8 port_num)
  */
 static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
 {
-	return (device->port_immutable[port_num].core_cap_flags &
+	return (device->port_data[port_num].immutable.core_cap_flags &
 		RDMA_CORE_CAP_OPA_AH) == RDMA_CORE_CAP_OPA_AH;
 }
 
@@ -3120,7 +3132,7 @@ static inline bool rdma_cap_opa_ah(struct ib_device *device, u8 port_num)
  */
 static inline size_t rdma_max_mad_size(const struct ib_device *device, u8 port_num)
 {
-	return device->port_immutable[port_num].max_mad_size;
+	return device->port_data[port_num].immutable.max_mad_size;
 }
 
 /**
-- 
cgit v1.2.3


From 8faea9fd4a3914f12cd343e10810ec5f4215ddd6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:49 -0700
Subject: RDMA/cache: Move the cache per-port data into the main ib_port_data

Like the other cases there no real reason to have another array just for
the cache. This larger conversion gets its own patch.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c | 89 ++++++++++++++---------------------------
 include/rdma/ib_verbs.h         |  3 +-
 2 files changed, 33 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 9d0e8aca741a..a28dc1901c80 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -185,7 +185,7 @@ EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 
 static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
 {
-	return device->cache.ports[port - rdma_start_port(device)].gid;
+	return device->port_data[port].cache.gid;
 }
 
 static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
@@ -765,7 +765,7 @@ err_free_table:
 	return NULL;
 }
 
-static void release_gid_table(struct ib_device *device, u8 port,
+static void release_gid_table(struct ib_device *device,
 			      struct ib_gid_table *table)
 {
 	bool leak = false;
@@ -863,31 +863,27 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 
 static void gid_table_release_one(struct ib_device *ib_dev)
 {
-	struct ib_gid_table *table;
-	u8 port;
+	unsigned int p;
 
-	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
-		table = ib_dev->cache.ports[port].gid;
-		release_gid_table(ib_dev, port, table);
-		ib_dev->cache.ports[port].gid = NULL;
+	rdma_for_each_port (ib_dev, p) {
+		release_gid_table(ib_dev, ib_dev->port_data[p].cache.gid);
+		ib_dev->port_data[p].cache.gid = NULL;
 	}
 }
 
 static int _gid_table_setup_one(struct ib_device *ib_dev)
 {
-	u8 port;
 	struct ib_gid_table *table;
+	unsigned int rdma_port;
 
-	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
-		u8 rdma_port = port + rdma_start_port(ib_dev);
-
+	rdma_for_each_port (ib_dev, rdma_port) {
 		table = alloc_gid_table(
 			ib_dev->port_data[rdma_port].immutable.gid_tbl_len);
 		if (!table)
 			goto rollback_table_setup;
 
 		gid_table_reserve_default(ib_dev, rdma_port, table);
-		ib_dev->cache.ports[port].gid = table;
+		ib_dev->port_data[rdma_port].cache.gid = table;
 	}
 	return 0;
 
@@ -898,14 +894,11 @@ rollback_table_setup:
 
 static void gid_table_cleanup_one(struct ib_device *ib_dev)
 {
-	struct ib_gid_table *table;
-	u8 port;
+	unsigned int p;
 
-	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
-		table = ib_dev->cache.ports[port].gid;
-		cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
-				       table);
-	}
+	rdma_for_each_port (ib_dev, p)
+		cleanup_gid_table_port(ib_dev, p,
+				       ib_dev->port_data[p].cache.gid);
 }
 
 static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -983,17 +976,17 @@ const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
 			     GID_ATTR_FIND_MASK_GID_TYPE;
 	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-	u8 p;
+	unsigned int p;
 
 	if (ndev)
 		mask |= GID_ATTR_FIND_MASK_NETDEV;
 
-	for (p = 0; p < device->phys_port_cnt; p++) {
+	rdma_for_each_port(device, p) {
 		struct ib_gid_table *table;
 		unsigned long flags;
 		int index;
 
-		table = device->cache.ports[p].gid;
+		table = device->port_data[p].cache.gid;
 		read_lock_irqsave(&table->rwlock, flags);
 		index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
 		if (index >= 0) {
@@ -1025,7 +1018,7 @@ int ib_get_cached_pkey(struct ib_device *device,
 
 	read_lock_irqsave(&device->cache.lock, flags);
 
-	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
+	cache = device->port_data[port_num].cache.pkey;
 
 	if (index < 0 || index >= cache->table_len)
 		ret = -EINVAL;
@@ -1043,14 +1036,12 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
 				u64              *sn_pfx)
 {
 	unsigned long flags;
-	int p;
 
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
-	p = port_num - rdma_start_port(device);
 	read_lock_irqsave(&device->cache.lock, flags);
-	*sn_pfx = device->cache.ports[p].subnet_prefix;
+	*sn_pfx = device->port_data[port_num].cache.subnet_prefix;
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
 	return 0;
@@ -1073,7 +1064,7 @@ int ib_find_cached_pkey(struct ib_device *device,
 
 	read_lock_irqsave(&device->cache.lock, flags);
 
-	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
+	cache = device->port_data[port_num].cache.pkey;
 
 	*index = -1;
 
@@ -1113,7 +1104,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
 
 	read_lock_irqsave(&device->cache.lock, flags);
 
-	cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
+	cache = device->port_data[port_num].cache.pkey;
 
 	*index = -1;
 
@@ -1141,7 +1132,7 @@ int ib_get_cached_lmc(struct ib_device *device,
 		return -EINVAL;
 
 	read_lock_irqsave(&device->cache.lock, flags);
-	*lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
+	*lmc = device->port_data[port_num].cache.lmc;
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
 	return ret;
@@ -1159,8 +1150,7 @@ int ib_get_cached_port_state(struct ib_device   *device,
 		return -EINVAL;
 
 	read_lock_irqsave(&device->cache.lock, flags);
-	*port_state = device->cache.ports[port_num
-		- rdma_start_port(device)].port_state;
+	*port_state = device->port_data[port_num].cache.port_state;
 	read_unlock_irqrestore(&device->cache.lock, flags);
 
 	return ret;
@@ -1361,16 +1351,13 @@ static void ib_cache_update(struct ib_device *device,
 
 	write_lock_irq(&device->cache.lock);
 
-	old_pkey_cache = device->cache.ports[port -
-		rdma_start_port(device)].pkey;
+	old_pkey_cache = device->port_data[port].cache.pkey;
 
-	device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
-	device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
-	device->cache.ports[port - rdma_start_port(device)].port_state =
-		tprops->state;
+	device->port_data[port].cache.pkey = pkey_cache;
+	device->port_data[port].cache.lmc = tprops->lmc;
+	device->port_data[port].cache.port_state = tprops->state;
 
-	device->cache.ports[port - rdma_start_port(device)].subnet_prefix =
-							tprops->subnet_prefix;
+	device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
 	write_unlock_irq(&device->cache.lock);
 
 	if (enforce_security)
@@ -1433,19 +1420,9 @@ int ib_cache_setup_one(struct ib_device *device)
 
 	rwlock_init(&device->cache.lock);
 
-	device->cache.ports =
-		kcalloc(rdma_end_port(device) - rdma_start_port(device) + 1,
-			sizeof(*device->cache.ports),
-			GFP_KERNEL);
-	if (!device->cache.ports)
-		return -ENOMEM;
-
 	err = gid_table_setup_one(device);
-	if (err) {
-		kfree(device->cache.ports);
-		device->cache.ports = NULL;
+	if (err)
 		return err;
-	}
 
 	rdma_for_each_port (device, p)
 		ib_cache_update(device, p, true);
@@ -1458,10 +1435,7 @@ int ib_cache_setup_one(struct ib_device *device)
 
 void ib_cache_release_one(struct ib_device *device)
 {
-	int p;
-
-	if (!device->cache.ports)
-		return;
+	unsigned int p;
 
 	/*
 	 * The release function frees all the cache elements.
@@ -1469,11 +1443,10 @@ void ib_cache_release_one(struct ib_device *device)
 	 * all the device's resources when the cache could no
 	 * longer be accessed.
 	 */
-	for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
-		kfree(device->cache.ports[p].pkey);
+	rdma_for_each_port (device, p)
+		kfree(device->port_data[p].cache.pkey);
 
 	gid_table_release_one(device);
-	kfree(device->cache.ports);
 }
 
 void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index b42e257814f7..50b7ebc2885e 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2186,7 +2186,6 @@ struct ib_port_cache {
 struct ib_cache {
 	rwlock_t                lock;
 	struct ib_event_handler event_handler;
-	struct ib_port_cache   *ports;
 };
 
 struct iw_cm_verbs;
@@ -2203,6 +2202,8 @@ struct ib_port_data {
 
 	spinlock_t pkey_list_lock;
 	struct list_head pkey_list;
+
+	struct ib_port_cache cache;
 };
 
 /* rdma netdev type - specifies protocol type */
-- 
cgit v1.2.3


From 4509de14680084141d3514c3b87bd9d070fc366d Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Thu, 14 Feb 2019 07:11:35 +0000
Subject: net/tls: Move protocol constants from cipher context to tls context

Each tls context maintains two cipher contexts (one each for tx and rx
directions). For each tls session, the constants such as protocol
version, ciphersuite, iv size, associated data size etc are same for
both the directions and need to be stored only once per tls context.
Hence these are moved from 'struct cipher_context' to 'struct
tls_prot_info' and stored only once in 'struct tls_context'.

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  46 +++++++++-----
 net/tls/tls_device.c |  24 ++++---
 net/tls/tls_main.c   |  17 ++++-
 net/tls/tls_sw.c     | 172 +++++++++++++++++++++++++++------------------------
 4 files changed, 149 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index a93a8ed8f716..a8b37226a287 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -199,15 +199,8 @@ enum {
 };
 
 struct cipher_context {
-	u16 prepend_size;
-	u16 tag_size;
-	u16 overhead_size;
-	u16 iv_size;
 	char *iv;
-	u16 rec_seq_size;
 	char *rec_seq;
-	u16 aad_size;
-	u16 tail_size;
 };
 
 union tls_crypto_context {
@@ -218,7 +211,21 @@ union tls_crypto_context {
 	};
 };
 
+struct tls_prot_info {
+	u16 version;
+	u16 cipher_type;
+	u16 prepend_size;
+	u16 tag_size;
+	u16 overhead_size;
+	u16 iv_size;
+	u16 rec_seq_size;
+	u16 aad_size;
+	u16 tail_size;
+};
+
 struct tls_context {
+	struct tls_prot_info prot_info;
+
 	union tls_crypto_context crypto_send;
 	union tls_crypto_context crypto_recv;
 
@@ -401,16 +408,26 @@ static inline bool tls_bigint_increment(unsigned char *seq, int len)
 	return (i == -1);
 }
 
+static inline struct tls_context *tls_get_ctx(const struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	return icsk->icsk_ulp_data;
+}
+
 static inline void tls_advance_record_sn(struct sock *sk,
 					 struct cipher_context *ctx,
 					 int version)
 {
-	if (tls_bigint_increment(ctx->rec_seq, ctx->rec_seq_size))
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+
+	if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size))
 		tls_err_abort(sk, EBADMSG);
 
 	if (version != TLS_1_3_VERSION) {
 		tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-				     ctx->iv_size);
+				     prot->iv_size);
 	}
 }
 
@@ -420,9 +437,10 @@ static inline void tls_fill_prepend(struct tls_context *ctx,
 			     unsigned char record_type,
 			     int version)
 {
-	size_t pkt_len, iv_size = ctx->tx.iv_size;
+	struct tls_prot_info *prot = &ctx->prot_info;
+	size_t pkt_len, iv_size = prot->iv_size;
 
-	pkt_len = plaintext_len + ctx->tx.tag_size;
+	pkt_len = plaintext_len + prot->tag_size;
 	if (version != TLS_1_3_VERSION) {
 		pkt_len += iv_size;
 
@@ -475,12 +493,6 @@ static inline void xor_iv_with_seq(int version, char *iv, char *seq)
 	}
 }
 
-static inline struct tls_context *tls_get_ctx(const struct sock *sk)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-
-	return icsk->icsk_ulp_data;
-}
 
 static inline struct tls_sw_context_rx *tls_sw_ctx_rx(
 		const struct tls_context *tls_ctx)
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7ee9008b2187..a5c17c47d08a 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -247,6 +247,7 @@ static int tls_push_record(struct sock *sk,
 			   int flags,
 			   unsigned char record_type)
 {
+	struct tls_prot_info *prot = &ctx->prot_info;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct page_frag dummy_tag_frag;
 	skb_frag_t *frag;
@@ -256,7 +257,7 @@ static int tls_push_record(struct sock *sk,
 	frag = &record->frags[0];
 	tls_fill_prepend(ctx,
 			 skb_frag_address(frag),
-			 record->len - ctx->tx.prepend_size,
+			 record->len - prot->prepend_size,
 			 record_type,
 			 ctx->crypto_send.info.version);
 
@@ -264,7 +265,7 @@ static int tls_push_record(struct sock *sk,
 	dummy_tag_frag.page = skb_frag_page(frag);
 	dummy_tag_frag.offset = 0;
 
-	tls_append_frag(record, &dummy_tag_frag, ctx->tx.tag_size);
+	tls_append_frag(record, &dummy_tag_frag, prot->tag_size);
 	record->end_seq = tp->write_seq + record->len;
 	spin_lock_irq(&offload_ctx->lock);
 	list_add_tail(&record->list, &offload_ctx->records_list);
@@ -347,6 +348,7 @@ static int tls_push_data(struct sock *sk,
 			 unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
 	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
 	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
@@ -376,10 +378,10 @@ static int tls_push_data(struct sock *sk,
 	 * we need to leave room for an authentication tag.
 	 */
 	max_open_record_len = TLS_MAX_PAYLOAD_SIZE +
-			      tls_ctx->tx.prepend_size;
+			      prot->prepend_size;
 	do {
 		rc = tls_do_allocation(sk, ctx, pfrag,
-				       tls_ctx->tx.prepend_size);
+				       prot->prepend_size);
 		if (rc) {
 			rc = sk_stream_wait_memory(sk, &timeo);
 			if (!rc)
@@ -397,7 +399,7 @@ handle_error:
 				size = orig_size;
 				destroy_record(record);
 				ctx->open_record = NULL;
-			} else if (record->len > tls_ctx->tx.prepend_size) {
+			} else if (record->len > prot->prepend_size) {
 				goto last_record;
 			}
 
@@ -658,6 +660,8 @@ int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
 int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 {
 	u16 nonce_size, tag_size, iv_size, rec_seq_size;
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_record_info *start_marker_record;
 	struct tls_offload_context_tx *offload_ctx;
 	struct tls_crypto_info *crypto_info;
@@ -703,10 +707,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 		goto free_offload_ctx;
 	}
 
-	ctx->tx.prepend_size = TLS_HEADER_SIZE + nonce_size;
-	ctx->tx.tag_size = tag_size;
-	ctx->tx.overhead_size = ctx->tx.prepend_size + ctx->tx.tag_size;
-	ctx->tx.iv_size = iv_size;
+	prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	prot->tag_size = tag_size;
+	prot->overhead_size = prot->prepend_size + prot->tag_size;
+	prot->iv_size = iv_size;
 	ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			     GFP_KERNEL);
 	if (!ctx->tx.iv) {
@@ -716,7 +720,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
 
 	memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
 
-	ctx->tx.rec_seq_size = rec_seq_size;
+	prot->rec_seq_size = rec_seq_size;
 	ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
 	if (!ctx->tx.rec_seq) {
 		rc = -ENOMEM;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d1c2fd9a3f63..caff15b2f9b2 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -435,6 +435,7 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 				  unsigned int optlen, int tx)
 {
 	struct tls_crypto_info *crypto_info;
+	struct tls_crypto_info *alt_crypto_info;
 	struct tls_context *ctx = tls_get_ctx(sk);
 	size_t optsize;
 	int rc = 0;
@@ -445,10 +446,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 		goto out;
 	}
 
-	if (tx)
+	if (tx) {
 		crypto_info = &ctx->crypto_send.info;
-	else
+		alt_crypto_info = &ctx->crypto_recv.info;
+	} else {
 		crypto_info = &ctx->crypto_recv.info;
+		alt_crypto_info = &ctx->crypto_send.info;
+	}
 
 	/* Currently we don't support set crypto info more than one time */
 	if (TLS_CRYPTO_INFO_READY(crypto_info)) {
@@ -469,6 +473,15 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 		goto err_crypto_info;
 	}
 
+	/* Ensure that TLS version and ciphers are same in both directions */
+	if (TLS_CRYPTO_INFO_READY(alt_crypto_info)) {
+		if (alt_crypto_info->version != crypto_info->version ||
+		    alt_crypto_info->cipher_type != crypto_info->cipher_type) {
+			rc = -EINVAL;
+			goto err_crypto_info;
+		}
+	}
+
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 	case TLS_CIPHER_AES_GCM_256: {
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index ae4784734547..71be8acfbc9b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -127,7 +127,7 @@ static int padding_length(struct tls_sw_context_rx *ctx,
 	int sub = 0;
 
 	/* Determine zero-padding length */
-	if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION) {
+	if (tls_ctx->prot_info.version == TLS_1_3_VERSION) {
 		char content_type = 0;
 		int err;
 		int back = 17;
@@ -155,6 +155,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 	struct scatterlist *sgin = aead_req->src;
 	struct tls_sw_context_rx *ctx;
 	struct tls_context *tls_ctx;
+	struct tls_prot_info *prot;
 	struct scatterlist *sg;
 	struct sk_buff *skb;
 	unsigned int pages;
@@ -163,6 +164,7 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 	skb = (struct sk_buff *)req->data;
 	tls_ctx = tls_get_ctx(skb->sk);
 	ctx = tls_sw_ctx_rx(tls_ctx);
+	prot = &tls_ctx->prot_info;
 
 	/* Propagate if there was an err */
 	if (err) {
@@ -171,8 +173,8 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
 	} else {
 		struct strp_msg *rxm = strp_msg(skb);
 		rxm->full_len -= padding_length(ctx, tls_ctx, skb);
-		rxm->offset += tls_ctx->rx.prepend_size;
-		rxm->full_len -= tls_ctx->rx.overhead_size;
+		rxm->offset += prot->prepend_size;
+		rxm->full_len -= prot->overhead_size;
 	}
 
 	/* After using skb->sk to propagate sk through crypto async callback
@@ -209,13 +211,14 @@ static int tls_do_decryption(struct sock *sk,
 			     bool async)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	int ret;
 
 	aead_request_set_tfm(aead_req, ctx->aead_recv);
-	aead_request_set_ad(aead_req, tls_ctx->rx.aad_size);
+	aead_request_set_ad(aead_req, prot->aad_size);
 	aead_request_set_crypt(aead_req, sgin, sgout,
-			       data_len + tls_ctx->rx.tag_size,
+			       data_len + prot->tag_size,
 			       (u8 *)iv_recv);
 
 	if (async) {
@@ -253,12 +256,13 @@ static int tls_do_decryption(struct sock *sk,
 static void tls_trim_both_msgs(struct sock *sk, int target_size)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
 
 	sk_msg_trim(sk, &rec->msg_plaintext, target_size);
 	if (target_size > 0)
-		target_size += tls_ctx->tx.overhead_size;
+		target_size += prot->overhead_size;
 	sk_msg_trim(sk, &rec->msg_encrypted, target_size);
 }
 
@@ -275,6 +279,7 @@ static int tls_alloc_encrypted_msg(struct sock *sk, int len)
 static int tls_clone_plaintext_msg(struct sock *sk, int required)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
 	struct sk_msg *msg_pl = &rec->msg_plaintext;
@@ -290,7 +295,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required)
 	/* Skip initial bytes in msg_en's data to be able to use
 	 * same offset of both plain and encrypted data.
 	 */
-	skip = tls_ctx->tx.prepend_size + msg_pl->sg.size;
+	skip = prot->prepend_size + msg_pl->sg.size;
 
 	return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
 }
@@ -298,6 +303,7 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required)
 static struct tls_rec *tls_get_rec(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct sk_msg *msg_pl, *msg_en;
 	struct tls_rec *rec;
@@ -316,13 +322,11 @@ static struct tls_rec *tls_get_rec(struct sock *sk)
 	sk_msg_init(msg_en);
 
 	sg_init_table(rec->sg_aead_in, 2);
-	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
-		   tls_ctx->tx.aad_size);
+	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space, prot->aad_size);
 	sg_unmark_end(&rec->sg_aead_in[1]);
 
 	sg_init_table(rec->sg_aead_out, 2);
-	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
-		   tls_ctx->tx.aad_size);
+	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size);
 	sg_unmark_end(&rec->sg_aead_out[1]);
 
 	return rec;
@@ -411,6 +415,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 	struct aead_request *aead_req = (struct aead_request *)req;
 	struct sock *sk = req->data;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct scatterlist *sge;
 	struct sk_msg *msg_en;
@@ -422,8 +427,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 	msg_en = &rec->msg_encrypted;
 
 	sge = sk_msg_elem(msg_en, msg_en->sg.curr);
-	sge->offset -= tls_ctx->tx.prepend_size;
-	sge->length += tls_ctx->tx.prepend_size;
+	sge->offset -= prot->prepend_size;
+	sge->length += prot->prepend_size;
 
 	/* Check if error is previously set on socket */
 	if (err || sk->sk_err) {
@@ -470,22 +475,23 @@ static int tls_do_encryption(struct sock *sk,
 			     struct aead_request *aead_req,
 			     size_t data_len, u32 start)
 {
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_rec *rec = ctx->open_rec;
 	struct sk_msg *msg_en = &rec->msg_encrypted;
 	struct scatterlist *sge = sk_msg_elem(msg_en, start);
 	int rc;
 
 	memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data));
-	xor_iv_with_seq(tls_ctx->crypto_send.info.version, rec->iv_data,
+	xor_iv_with_seq(prot->version, rec->iv_data,
 			tls_ctx->tx.rec_seq);
 
-	sge->offset += tls_ctx->tx.prepend_size;
-	sge->length -= tls_ctx->tx.prepend_size;
+	sge->offset += prot->prepend_size;
+	sge->length -= prot->prepend_size;
 
 	msg_en->sg.curr = start;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
-	aead_request_set_ad(aead_req, tls_ctx->tx.aad_size);
+	aead_request_set_ad(aead_req, prot->aad_size);
 	aead_request_set_crypt(aead_req, rec->sg_aead_in,
 			       rec->sg_aead_out,
 			       data_len, rec->iv_data);
@@ -500,8 +506,8 @@ static int tls_do_encryption(struct sock *sk,
 	rc = crypto_aead_encrypt(aead_req);
 	if (!rc || rc != -EINPROGRESS) {
 		atomic_dec(&ctx->encrypt_pending);
-		sge->offset -= tls_ctx->tx.prepend_size;
-		sge->length += tls_ctx->tx.prepend_size;
+		sge->offset -= prot->prepend_size;
+		sge->length += prot->prepend_size;
 	}
 
 	if (!rc) {
@@ -513,8 +519,7 @@ static int tls_do_encryption(struct sock *sk,
 
 	/* Unhook the record from context if encryption is not failure */
 	ctx->open_rec = NULL;
-	tls_advance_record_sn(sk, &tls_ctx->tx,
-			      tls_ctx->crypto_send.info.version);
+	tls_advance_record_sn(sk, &tls_ctx->tx, prot->version);
 	return rc;
 }
 
@@ -640,6 +645,7 @@ static int tls_push_record(struct sock *sk, int flags,
 			   unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
 	u32 i, split_point, uninitialized_var(orig_end);
@@ -658,12 +664,12 @@ static int tls_push_record(struct sock *sk, int flags,
 	split = split_point && split_point < msg_pl->sg.size;
 	if (split) {
 		rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
-					   split_point, tls_ctx->tx.overhead_size,
+					   split_point, prot->overhead_size,
 					   &orig_end);
 		if (rc < 0)
 			return rc;
 		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
-			    tls_ctx->tx.overhead_size);
+			    prot->overhead_size);
 	}
 
 	rec->tx_flags = flags;
@@ -673,7 +679,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	sk_msg_iter_var_prev(i);
 
 	rec->content_type = record_type;
-	if (tls_ctx->crypto_send.info.version == TLS_1_3_VERSION) {
+	if (prot->version == TLS_1_3_VERSION) {
 		/* Add content type to end of message.  No padding added */
 		sg_set_buf(&rec->sg_content_type, &rec->content_type, 1);
 		sg_mark_end(&rec->sg_content_type);
@@ -694,22 +700,20 @@ static int tls_push_record(struct sock *sk, int flags,
 	i = msg_en->sg.start;
 	sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
 
-	tls_make_aad(rec->aad_space, msg_pl->sg.size + tls_ctx->tx.tail_size,
-		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
-		     record_type,
-		     tls_ctx->crypto_send.info.version);
+	tls_make_aad(rec->aad_space, msg_pl->sg.size + prot->tail_size,
+		     tls_ctx->tx.rec_seq, prot->rec_seq_size,
+		     record_type, prot->version);
 
 	tls_fill_prepend(tls_ctx,
 			 page_address(sg_page(&msg_en->sg.data[i])) +
 			 msg_en->sg.data[i].offset,
-			 msg_pl->sg.size + tls_ctx->tx.tail_size,
-			 record_type,
-			 tls_ctx->crypto_send.info.version);
+			 msg_pl->sg.size + prot->tail_size,
+			 record_type, prot->version);
 
 	tls_ctx->pending_open_record_frags = false;
 
 	rc = tls_do_encryption(sk, tls_ctx, ctx, req,
-			       msg_pl->sg.size + tls_ctx->tx.tail_size, i);
+			       msg_pl->sg.size + prot->tail_size, i);
 	if (rc < 0) {
 		if (rc != -EINPROGRESS) {
 			tls_err_abort(sk, EBADMSG);
@@ -723,8 +727,7 @@ static int tls_push_record(struct sock *sk, int flags,
 	} else if (split) {
 		msg_pl = &tmp->msg_plaintext;
 		msg_en = &tmp->msg_encrypted;
-		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
-			    tls_ctx->tx.overhead_size);
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
 		tls_ctx->pending_open_record_frags = true;
 		ctx->open_rec = tmp;
 	}
@@ -859,6 +862,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	bool async_capable = ctx->async_capable;
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
@@ -925,7 +929,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		}
 
 		required_size = msg_pl->sg.size + try_to_copy +
-				tls_ctx->tx.overhead_size;
+				prot->overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -994,8 +998,8 @@ fallback_to_reg_send:
 			 */
 			try_to_copy -= required_size - msg_pl->sg.size;
 			full_record = true;
-			sk_msg_trim(sk, msg_en, msg_pl->sg.size +
-				    tls_ctx->tx.overhead_size);
+			sk_msg_trim(sk, msg_en,
+				    msg_pl->sg.size + prot->overhead_size);
 		}
 
 		if (try_to_copy) {
@@ -1081,6 +1085,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
 	struct sk_msg *msg_pl;
 	struct tls_rec *rec;
@@ -1130,8 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
 			full_record = true;
 		}
 
-		required_size = msg_pl->sg.size + copy +
-				tls_ctx->tx.overhead_size;
+		required_size = msg_pl->sg.size + copy + prot->overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
@@ -1330,6 +1334,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct strp_msg *rxm = strp_msg(skb);
 	int n_sgin, n_sgout, nsg, mem_size, aead_size, err, pages = 0;
 	struct aead_request *aead_req;
@@ -1337,16 +1342,16 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	u8 *aad, *iv, *mem = NULL;
 	struct scatterlist *sgin = NULL;
 	struct scatterlist *sgout = NULL;
-	const int data_len = rxm->full_len - tls_ctx->rx.overhead_size +
-		tls_ctx->rx.tail_size;
+	const int data_len = rxm->full_len - prot->overhead_size +
+			     prot->tail_size;
 
 	if (*zc && (out_iov || out_sg)) {
 		if (out_iov)
 			n_sgout = iov_iter_npages(out_iov, INT_MAX) + 1;
 		else
 			n_sgout = sg_nents(out_sg);
-		n_sgin = skb_nsg(skb, rxm->offset + tls_ctx->rx.prepend_size,
-				 rxm->full_len - tls_ctx->rx.prepend_size);
+		n_sgin = skb_nsg(skb, rxm->offset + prot->prepend_size,
+				 rxm->full_len - prot->prepend_size);
 	} else {
 		n_sgout = 0;
 		*zc = false;
@@ -1363,7 +1368,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 
 	aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
 	mem_size = aead_size + (nsg * sizeof(struct scatterlist));
-	mem_size = mem_size + tls_ctx->rx.aad_size;
+	mem_size = mem_size + prot->aad_size;
 	mem_size = mem_size + crypto_aead_ivsize(ctx->aead_recv);
 
 	/* Allocate a single block of memory which contains
@@ -1379,37 +1384,35 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	sgin = (struct scatterlist *)(mem + aead_size);
 	sgout = sgin + n_sgin;
 	aad = (u8 *)(sgout + n_sgout);
-	iv = aad + tls_ctx->rx.aad_size;
+	iv = aad + prot->aad_size;
 
 	/* Prepare IV */
 	err = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE,
 			    iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
-			    tls_ctx->rx.iv_size);
+			    prot->iv_size);
 	if (err < 0) {
 		kfree(mem);
 		return err;
 	}
-	if (tls_ctx->crypto_recv.info.version == TLS_1_3_VERSION)
+	if (prot->version == TLS_1_3_VERSION)
 		memcpy(iv, tls_ctx->rx.iv, crypto_aead_ivsize(ctx->aead_recv));
 	else
 		memcpy(iv, tls_ctx->rx.iv, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
 
-	xor_iv_with_seq(tls_ctx->crypto_recv.info.version, iv,
-			tls_ctx->rx.rec_seq);
+	xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq);
 
 	/* Prepare AAD */
-	tls_make_aad(aad, rxm->full_len - tls_ctx->rx.overhead_size +
-		     tls_ctx->rx.tail_size,
-		     tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size,
-		     ctx->control,
-		     tls_ctx->crypto_recv.info.version);
+	tls_make_aad(aad, rxm->full_len - prot->overhead_size +
+		     prot->tail_size,
+		     tls_ctx->rx.rec_seq, prot->rec_seq_size,
+		     ctx->control, prot->version);
 
 	/* Prepare sgin */
 	sg_init_table(sgin, n_sgin);
-	sg_set_buf(&sgin[0], aad, tls_ctx->rx.aad_size);
+	sg_set_buf(&sgin[0], aad, prot->aad_size);
 	err = skb_to_sgvec(skb, &sgin[1],
-			   rxm->offset + tls_ctx->rx.prepend_size,
-			   rxm->full_len - tls_ctx->rx.prepend_size);
+			   rxm->offset + prot->prepend_size,
+			   rxm->full_len - prot->prepend_size);
 	if (err < 0) {
 		kfree(mem);
 		return err;
@@ -1418,7 +1421,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 	if (n_sgout) {
 		if (out_iov) {
 			sg_init_table(sgout, n_sgout);
-			sg_set_buf(&sgout[0], aad, tls_ctx->rx.aad_size);
+			sg_set_buf(&sgout[0], aad, prot->aad_size);
 
 			*chunk = 0;
 			err = tls_setup_from_iter(sk, out_iov, data_len,
@@ -1459,7 +1462,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
-	int version = tls_ctx->crypto_recv.info.version;
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
+	int version = prot->version;
 	struct strp_msg *rxm = strp_msg(skb);
 	int err = 0;
 
@@ -1480,8 +1484,8 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 
 		rxm->full_len -= padding_length(ctx, tls_ctx, skb);
 
-		rxm->offset += tls_ctx->rx.prepend_size;
-		rxm->full_len -= tls_ctx->rx.overhead_size;
+		rxm->offset += prot->prepend_size;
+		rxm->full_len -= prot->overhead_size;
 		tls_advance_record_sn(sk, &tls_ctx->rx, version);
 		ctx->decrypted = true;
 		ctx->saved_data_ready(sk);
@@ -1605,6 +1609,7 @@ int tls_sw_recvmsg(struct sock *sk,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct sk_psock *psock;
 	unsigned char control = 0;
 	ssize_t decrypted = 0;
@@ -1667,11 +1672,11 @@ int tls_sw_recvmsg(struct sock *sk,
 
 		rxm = strp_msg(skb);
 
-		to_decrypt = rxm->full_len - tls_ctx->rx.overhead_size;
+		to_decrypt = rxm->full_len - prot->overhead_size;
 
 		if (to_decrypt <= len && !is_kvec && !is_peek &&
 		    ctx->control == TLS_RECORD_TYPE_DATA &&
-		    tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
+		    prot->version != TLS_1_3_VERSION)
 			zc = true;
 
 		/* Do not use async mode if record is non-data */
@@ -1875,6 +1880,7 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(strp->sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	char header[TLS_HEADER_SIZE + MAX_IV_SIZE];
 	struct strp_msg *rxm = strp_msg(skb);
 	size_t cipher_overhead;
@@ -1882,17 +1888,17 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 	int ret;
 
 	/* Verify that we have a full TLS header, or wait for more data */
-	if (rxm->offset + tls_ctx->rx.prepend_size > skb->len)
+	if (rxm->offset + prot->prepend_size > skb->len)
 		return 0;
 
 	/* Sanity-check size of on-stack buffer. */
-	if (WARN_ON(tls_ctx->rx.prepend_size > sizeof(header))) {
+	if (WARN_ON(prot->prepend_size > sizeof(header))) {
 		ret = -EINVAL;
 		goto read_failure;
 	}
 
 	/* Linearize header to local buffer */
-	ret = skb_copy_bits(skb, rxm->offset, header, tls_ctx->rx.prepend_size);
+	ret = skb_copy_bits(skb, rxm->offset, header, prot->prepend_size);
 
 	if (ret < 0)
 		goto read_failure;
@@ -1901,12 +1907,12 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 
 	data_len = ((header[4] & 0xFF) | (header[3] << 8));
 
-	cipher_overhead = tls_ctx->rx.tag_size;
-	if (tls_ctx->crypto_recv.info.version != TLS_1_3_VERSION)
-		cipher_overhead += tls_ctx->rx.iv_size;
+	cipher_overhead = prot->tag_size;
+	if (prot->version != TLS_1_3_VERSION)
+		cipher_overhead += prot->iv_size;
 
 	if (data_len > TLS_MAX_PAYLOAD_SIZE + cipher_overhead +
-	    tls_ctx->rx.tail_size) {
+	    prot->tail_size) {
 		ret = -EMSGSIZE;
 		goto read_failure;
 	}
@@ -2066,6 +2072,8 @@ static void tx_work_handler(struct work_struct *work)
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_crypto_info *crypto_info;
 	struct tls12_crypto_info_aes_gcm_128 *gcm_128_info;
 	struct tls12_crypto_info_aes_gcm_256 *gcm_256_info;
@@ -2171,18 +2179,20 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 
 	if (crypto_info->version == TLS_1_3_VERSION) {
 		nonce_size = 0;
-		cctx->aad_size = TLS_HEADER_SIZE;
-		cctx->tail_size = 1;
+		prot->aad_size = TLS_HEADER_SIZE;
+		prot->tail_size = 1;
 	} else {
-		cctx->aad_size = TLS_AAD_SPACE_SIZE;
-		cctx->tail_size = 0;
+		prot->aad_size = TLS_AAD_SPACE_SIZE;
+		prot->tail_size = 0;
 	}
 
-	cctx->prepend_size = TLS_HEADER_SIZE + nonce_size;
-	cctx->tag_size = tag_size;
-	cctx->overhead_size = cctx->prepend_size + cctx->tag_size +
-		cctx->tail_size;
-	cctx->iv_size = iv_size;
+	prot->version = crypto_info->version;
+	prot->cipher_type = crypto_info->cipher_type;
+	prot->prepend_size = TLS_HEADER_SIZE + nonce_size;
+	prot->tag_size = tag_size;
+	prot->overhead_size = prot->prepend_size +
+			      prot->tag_size + prot->tail_size;
+	prot->iv_size = iv_size;
 	cctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE,
 			   GFP_KERNEL);
 	if (!cctx->iv) {
@@ -2192,7 +2202,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	/* Note: 128 & 256 bit salt are the same size */
 	memcpy(cctx->iv, salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE);
 	memcpy(cctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size);
-	cctx->rec_seq_size = rec_seq_size;
+	prot->rec_seq_size = rec_seq_size;
 	cctx->rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL);
 	if (!cctx->rec_seq) {
 		rc = -ENOMEM;
@@ -2215,7 +2225,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 	if (rc)
 		goto free_aead;
 
-	rc = crypto_aead_setauthsize(*aead, cctx->tag_size);
+	rc = crypto_aead_setauthsize(*aead, prot->tag_size);
 	if (rc)
 		goto free_aead;
 
-- 
cgit v1.2.3


From c8ce48f06503eee20f189eed5b2aa736272b7344 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 18 Feb 2019 17:30:06 +0100
Subject: asm-generic: Make time32 syscall numbers optional

We don't want new architectures to even provide the old 32-bit time_t
based system calls any more, or define the syscall number macros.

Add a new __ARCH_WANT_TIME32_SYSCALLS macro that gets enabled for all
existing 32-bit architectures using the generic system call table,
so we don't change any current behavior.
Since this symbol is evaluated in user space as well, we cannot use
a Kconfig CONFIG_* macro but have to define it in uapi/asm/unistd.h.

On 64-bit architectures, the same system call numbers mostly refer to
the system calls we want to keep, as they already pass 64-bit time_t.

As new architectures no longer provide these, we need new exceptions
in checksyscalls.sh.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arc/include/uapi/asm/unistd.h       |  1 +
 arch/arm64/include/uapi/asm/unistd.h     |  1 +
 arch/c6x/include/uapi/asm/unistd.h       |  1 +
 arch/csky/include/uapi/asm/unistd.h      |  1 +
 arch/h8300/include/uapi/asm/unistd.h     |  1 +
 arch/hexagon/include/uapi/asm/unistd.h   |  1 +
 arch/nds32/include/uapi/asm/unistd.h     |  1 +
 arch/nios2/include/uapi/asm/unistd.h     |  1 +
 arch/openrisc/include/uapi/asm/unistd.h  |  1 +
 arch/riscv/include/uapi/asm/unistd.h     |  3 +++
 arch/unicore32/include/uapi/asm/unistd.h |  1 +
 include/uapi/asm-generic/unistd.h        | 36 ++++++++++++++++++++++++++++++++
 scripts/checksyscalls.sh                 |  7 +++++++
 13 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h
index 6a1a62a979dd..5eafa1115162 100644
--- a/arch/arc/include/uapi/asm/unistd.h
+++ b/arch/arc/include/uapi/asm/unistd.h
@@ -23,6 +23,7 @@
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 #define sys_mmap2 sys_mmap_pgoff
 
diff --git a/arch/arm64/include/uapi/asm/unistd.h b/arch/arm64/include/uapi/asm/unistd.h
index 79937de2a0cc..4703d218663a 100644
--- a/arch/arm64/include/uapi/asm/unistd.h
+++ b/arch/arm64/include/uapi/asm/unistd.h
@@ -18,5 +18,6 @@
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_NEW_STAT
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 #include <asm-generic/unistd.h>
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index e3721b2cfd6a..79b724c39d9b 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -19,6 +19,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index f5c83492136f..ec60e49cea66 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -3,6 +3,7 @@
 
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 #include <asm-generic/unistd.h>
 
 #define __NR_set_thread_area	(__NR_arch_specific_syscall + 0)
diff --git a/arch/h8300/include/uapi/asm/unistd.h b/arch/h8300/include/uapi/asm/unistd.h
index b9e9352f2328..eb7bc0012af5 100644
--- a/arch/h8300/include/uapi/asm/unistd.h
+++ b/arch/h8300/include/uapi/asm/unistd.h
@@ -3,5 +3,6 @@
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 #include <asm-generic/unistd.h>
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 6bb392a33c35..432c4db1b623 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -35,5 +35,6 @@
 #define __ARCH_WANT_SYS_CLONE
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 #include <asm-generic/unistd.h>
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index eb98d24d3190..4ec8f543103f 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -4,6 +4,7 @@
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYNC_FILE_RANGE2
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
diff --git a/arch/nios2/include/uapi/asm/unistd.h b/arch/nios2/include/uapi/asm/unistd.h
index fa68e68bc26d..0b4bb1d41b28 100644
--- a/arch/nios2/include/uapi/asm/unistd.h
+++ b/arch/nios2/include/uapi/asm/unistd.h
@@ -21,6 +21,7 @@
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 2e0bc0ff9f31..566f8c4f8047 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -24,6 +24,7 @@
 #define __ARCH_WANT_SET_GET_RLIMIT
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 #include <asm-generic/unistd.h>
 
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index d9340c52e7ad..486a288b454c 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -19,6 +19,9 @@
 #define __ARCH_WANT_NEW_STAT
 #endif /* __LP64__ */
 #define __ARCH_WANT_SET_GET_RLIMIT
+#ifndef __LP64__
+#define __ARCH_WANT_TIME32_SYSCALLS
+#endif
 
 #include <asm-generic/unistd.h>
 
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index 2b575c0cf177..4e5e624f5d7e 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -13,6 +13,7 @@
 
 #define __ARCH_WANT_RENAMEAT
 #define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 2cdf600b05fa..12cdf611d217 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -38,8 +38,10 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy)
 __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
 #define __NR_io_cancel 3
 __SYSCALL(__NR_io_cancel, sys_io_cancel)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_getevents 4
 __SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents)
+#endif
 
 /* fs/xattr.c */
 #define __NR_setxattr 5
@@ -222,10 +224,12 @@ __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
 __SYSCALL(__NR3264_sendfile, sys_sendfile64)
 
 /* fs/select.c */
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_pselect6 72
 __SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32)
 #define __NR_ppoll 73
 __SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32)
+#endif
 
 /* fs/signalfd.c */
 #define __NR_signalfd4 74
@@ -269,16 +273,20 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
 /* fs/timerfd.c */
 #define __NR_timerfd_create 85
 __SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timerfd_settime 86
 __SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \
 	  sys_timerfd_settime)
 #define __NR_timerfd_gettime 87
 __SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \
 	  sys_timerfd_gettime)
+#endif
 
 /* fs/utimes.c */
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_utimensat 88
 __SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat)
+#endif
 
 /* kernel/acct.c */
 #define __NR_acct 89
@@ -309,8 +317,10 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
 __SYSCALL(__NR_unshare, sys_unshare)
 
 /* kernel/futex.c */
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_futex 98
 __SC_3264(__NR_futex, sys_futex_time32, sys_futex)
+#endif
 #define __NR_set_robust_list 99
 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
 	  compat_sys_set_robust_list)
@@ -319,8 +329,10 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
 	  compat_sys_get_robust_list)
 
 /* kernel/hrtimer.c */
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_nanosleep 101
 __SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep)
+#endif
 
 /* kernel/itimer.c */
 #define __NR_getitimer 102
@@ -341,14 +353,19 @@ __SYSCALL(__NR_delete_module, sys_delete_module)
 /* kernel/posix-timers.c */
 #define __NR_timer_create 107
 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_gettime 108
 __SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime)
+#endif
 #define __NR_timer_getoverrun 109
 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_timer_settime 110
 __SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime)
+#endif
 #define __NR_timer_delete 111
 __SYSCALL(__NR_timer_delete, sys_timer_delete)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_settime 112
 __SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime)
 #define __NR_clock_gettime 113
@@ -358,6 +375,7 @@ __SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres)
 #define __NR_clock_nanosleep 115
 __SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \
 	  sys_clock_nanosleep)
+#endif
 
 /* kernel/printk.c */
 #define __NR_syslog 116
@@ -388,9 +406,11 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield)
 __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
 #define __NR_sched_get_priority_min 126
 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_sched_rr_get_interval 127
 __SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \
 	  sys_sched_rr_get_interval)
+#endif
 
 /* kernel/signal.c */
 #define __NR_restart_syscall 128
@@ -411,9 +431,11 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
 __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
 #define __NR_rt_sigpending 136
 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_rt_sigtimedwait 137
 __SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \
 	  sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32)
+#endif
 #define __NR_rt_sigqueueinfo 138
 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
 	  compat_sys_rt_sigqueueinfo)
@@ -486,12 +508,14 @@ __SYSCALL(__NR_prctl, sys_prctl)
 __SYSCALL(__NR_getcpu, sys_getcpu)
 
 /* kernel/time.c */
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_gettimeofday 169
 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
 #define __NR_settimeofday 170
 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
 #define __NR_adjtimex 171
 __SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex)
+#endif
 
 /* kernel/timer.c */
 #define __NR_getpid 172
@@ -516,11 +540,13 @@ __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
 __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
 #define __NR_mq_unlink 181
 __SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_mq_timedsend 182
 __SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend)
 #define __NR_mq_timedreceive 183
 __SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \
 	  sys_mq_timedreceive)
+#endif
 #define __NR_mq_notify 184
 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
 #define __NR_mq_getsetattr 185
@@ -541,8 +567,10 @@ __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
 __SYSCALL(__NR_semget, sys_semget)
 #define __NR_semctl 191
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
 __SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
+#endif
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
 
@@ -663,8 +691,10 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
 __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 #define __NR_accept4 242
 __SYSCALL(__NR_accept4, sys_accept4)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_recvmmsg 243
 __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32)
+#endif
 
 /*
  * Architectures may provide up to 16 syscalls of their own
@@ -672,8 +702,10 @@ __SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recv
  */
 #define __NR_arch_specific_syscall 244
 
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_wait4 260
 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
+#endif
 #define __NR_prlimit64 261
 __SYSCALL(__NR_prlimit64, sys_prlimit64)
 #define __NR_fanotify_init 262
@@ -684,8 +716,10 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
 #define __NR_open_by_handle_at         265
 __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_clock_adjtime 266
 __SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime)
+#endif
 #define __NR_syncfs 267
 __SYSCALL(__NR_syncfs, sys_syncfs)
 #define __NR_setns 268
@@ -738,8 +772,10 @@ __SYSCALL(__NR_pkey_alloc,    sys_pkey_alloc)
 __SYSCALL(__NR_pkey_free,     sys_pkey_free)
 #define __NR_statx 291
 __SYSCALL(__NR_statx,     sys_statx)
+#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_io_pgetevents 292
 __SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents)
+#endif
 #define __NR_rseq 293
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
diff --git a/scripts/checksyscalls.sh b/scripts/checksyscalls.sh
index 53c5677d7e82..ffd635efbdca 100755
--- a/scripts/checksyscalls.sh
+++ b/scripts/checksyscalls.sh
@@ -143,6 +143,13 @@ cat << EOF
 #define __IGNORE_rt_sigtimedwait
 #define __IGNORE_futex
 #define __IGNORE_sched_rr_get_interval
+#define __IGNORE_gettimeofday
+#define __IGNORE_settimeofday
+#define __IGNORE_wait4
+#define __IGNORE_adjtimex
+#define __IGNORE_nanosleep
+#define __IGNORE_io_getevents
+#define __IGNORE_recvmmsg
 #endif
 
 /* i386-specific or historical system calls */
-- 
cgit v1.2.3


From 568f196756ad9fe2b49c46bbf6a9de1b190438b4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 Jan 2019 17:21:52 -0800
Subject: bpf: check that BPF programs run with preemption disabled

Introduce cant_sleep() macro for annotation of functions that
cannot sleep.

Use it in BPF_PROG_RUN to catch execution of BPF programs in
preemptable context.

Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/filter.h |  2 +-
 include/linux/kernel.h | 14 ++++++++++++--
 kernel/sched/core.c    | 28 ++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 95e2d7ebdf21..f32b3eca5a04 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -533,7 +533,7 @@ struct sk_filter {
 	struct bpf_prog	*prog;
 };
 
-#define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
+#define BPF_PROG_RUN(filter, ctx)  ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8f0e68e250a7..a8868a32098c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -245,8 +245,10 @@ extern int _cond_resched(void);
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-  void ___might_sleep(const char *file, int line, int preempt_offset);
-  void __might_sleep(const char *file, int line, int preempt_offset);
+extern void ___might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __cant_sleep(const char *file, int line, int preempt_offset);
+
 /**
  * might_sleep - annotation for functions that can sleep
  *
@@ -259,6 +261,13 @@ extern int _cond_resched(void);
  */
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+/**
+ * cant_sleep - annotation for functions that cannot sleep
+ *
+ * this macro will print a stack trace if it is executed with preemption enabled
+ */
+# define cant_sleep() \
+	do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
 # define sched_annotate_sleep()	(current->task_state_change = 0)
 #else
   static inline void ___might_sleep(const char *file, int line,
@@ -266,6 +275,7 @@ extern int _cond_resched(void);
   static inline void __might_sleep(const char *file, int line,
 				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
+# define cant_sleep() do { } while (0)
 # define sched_annotate_sleep() do { } while (0)
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d8d76a65cfdd..7cbb5658be80 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6162,6 +6162,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
 EXPORT_SYMBOL(___might_sleep);
+
+void __cant_sleep(const char *file, int line, int preempt_offset)
+{
+	static unsigned long prev_jiffy;
+
+	if (irqs_disabled())
+		return;
+
+	if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		return;
+
+	if (preempt_count() > preempt_offset)
+		return;
+
+	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+		return;
+	prev_jiffy = jiffies;
+
+	printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
+	printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(),
+			current->pid, current->comm);
+
+	debug_show_held_locks(current);
+	dump_stack();
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+}
+EXPORT_SYMBOL_GPL(__cant_sleep);
 #endif
 
 #ifdef CONFIG_MAGIC_SYSRQ
-- 
cgit v1.2.3


From ee7930490a8f84570e96268f5736e99570b58307 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Fri, 6 Jul 2018 09:11:50 +0100
Subject: clocksource/arm_arch_timer: Store physical timer IRQ number for KVM
 on VHE

A host running in VHE mode gets the EL2 physical timer as its time
source (accessed using the EL1 sysreg accessors, which get re-directed
to the EL2 sysregs by VHE).

The EL1 physical timer remains unused by the host kernel, allowing us to
pass that on directly to a KVM guest and saves us from emulating this
timer for the guest on VHE systems.

Store the EL1 Physical Timer's IRQ number in
struct arch_timer_kvm_info on VHE systems to allow KVM to use it.

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 drivers/clocksource/arm_arch_timer.c | 11 +++++++++--
 include/clocksource/arm_arch_timer.h |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 9a7d4dc00b6e..b9243e2328b4 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -1206,6 +1206,13 @@ static enum arch_timer_ppi_nr __init arch_timer_select_ppi(void)
 	return ARCH_TIMER_PHYS_SECURE_PPI;
 }
 
+static void __init arch_timer_populate_kvm_info(void)
+{
+	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+	if (is_kernel_in_hyp_mode())
+		arch_timer_kvm_info.physical_irq = arch_timer_ppi[ARCH_TIMER_PHYS_NONSECURE_PPI];
+}
+
 static int __init arch_timer_of_init(struct device_node *np)
 {
 	int i, ret;
@@ -1220,7 +1227,7 @@ static int __init arch_timer_of_init(struct device_node *np)
 	for (i = ARCH_TIMER_PHYS_SECURE_PPI; i < ARCH_TIMER_MAX_TIMER_PPI; i++)
 		arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
 
-	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+	arch_timer_populate_kvm_info();
 
 	rate = arch_timer_get_cntfrq();
 	arch_timer_of_configure_rate(rate, np);
@@ -1550,7 +1557,7 @@ static int __init arch_timer_acpi_init(struct acpi_table_header *table)
 	arch_timer_ppi[ARCH_TIMER_HYP_PPI] =
 		acpi_gtdt_map_ppi(ARCH_TIMER_HYP_PPI);
 
-	arch_timer_kvm_info.virtual_irq = arch_timer_ppi[ARCH_TIMER_VIRT_PPI];
+	arch_timer_populate_kvm_info();
 
 	/*
 	 * When probing via ACPI, we have no mechanism to override the sysreg
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 349e5957c949..702967d996bb 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -74,6 +74,7 @@ enum arch_timer_spi_nr {
 struct arch_timer_kvm_info {
 	struct timecounter timecounter;
 	int virtual_irq;
+	int physical_irq;
 };
 
 struct arch_timer_mem_frame {
-- 
cgit v1.2.3


From accb99bcd0ca6d3ee412557b0c3f583a3abc0eb6 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Mon, 26 Nov 2018 18:21:22 +0100
Subject: KVM: arm/arm64: Simplify bg_timer programming

Instead of calling into kvm_timer_[un]schedule from the main kvm
blocking path, test if the VCPU is on the wait queue from the load/put
path and perform the background timer setup/cancel in this path.

This has the distinct advantage that we no longer race between load/put
and schedule/unschedule and programming and canceling of the bg_timer
always happens when the timer state is not loaded.

Note that we must now remove the checks in kvm_timer_blocking that do
not schedule a background timer if one of the timers can fire, because
we no longer have a guarantee that kvm_vcpu_check_block() will be called
before kvm_timer_blocking.

Reported-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_arch_timer.h |  3 ---
 virt/kvm/arm/arch_timer.c    | 35 ++++++++++++++---------------------
 virt/kvm/arm/arm.c           |  2 --
 3 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 33771352dcd6..d6e6a45d1d24 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -76,9 +76,6 @@ int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr);
 
 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu);
 
-void kvm_timer_schedule(struct kvm_vcpu *vcpu);
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
-
 u64 kvm_phys_timer_read(void);
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b07ac4614e1c..4986028d9829 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -349,22 +349,12 @@ out:
  * thread is removed from its waitqueue and made runnable when there's a timer
  * interrupt to handle.
  */
-void kvm_timer_schedule(struct kvm_vcpu *vcpu)
+static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
-	vtimer_save_state(vcpu);
-
-	/*
-	 * No need to schedule a background timer if any guest timer has
-	 * already expired, because kvm_vcpu_block will return before putting
-	 * the thread to sleep.
-	 */
-	if (kvm_timer_should_fire(vtimer) || kvm_timer_should_fire(ptimer))
-		return;
-
 	/*
 	 * If both timers are not capable of raising interrupts (disabled or
 	 * masked), then there's no more work for us to do.
@@ -373,12 +363,19 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 		return;
 
 	/*
-	 * The guest timers have not yet expired, schedule a background timer.
+	 * At least one guest time will expire. Schedule a background timer.
 	 * Set the earliest expiration time among the guest timers.
 	 */
 	soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
 }
 
+static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	soft_timer_cancel(&timer->bg_timer);
+}
+
 static void vtimer_restore_state(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -401,15 +398,6 @@ out:
 	local_irq_restore(flags);
 }
 
-void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	vtimer_restore_state(vcpu);
-
-	soft_timer_cancel(&timer->bg_timer);
-}
-
 static void set_cntvoff(u64 cntvoff)
 {
 	u32 low = lower_32_bits(cntvoff);
@@ -485,6 +473,8 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 	/* Set the background timer for the physical timer emulation. */
 	phys_timer_emulate(vcpu);
 
+	kvm_timer_unblocking(vcpu);
+
 	/* If the timer fired while we weren't running, inject it now */
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
@@ -527,6 +517,9 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	 */
 	soft_timer_cancel(&timer->phys_timer);
 
+	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+		kvm_timer_blocking(vcpu);
+
 	/*
 	 * The kernel may decide to run userspace after calling vcpu_put, so
 	 * we reset cntvoff to 0 to ensure a consistent read between user
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index b77db673bb03..9fbdb9e1c51f 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -335,13 +335,11 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
-	kvm_timer_schedule(vcpu);
 	kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
-	kvm_timer_unschedule(vcpu);
 	kvm_vgic_v4_disable_doorbell(vcpu);
 }
 
-- 
cgit v1.2.3


From 84135d3d18da2ff17d3ad1a609b2818cc3049552 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 5 Jul 2018 16:48:23 +0100
Subject: KVM: arm/arm64: consolidate arch timer trap handlers

At the moment we have separate system register emulation handlers for
each timer register. Actually they are quite similar, and we rely on
kvm_arm_timer_[gs]et_reg() for the actual emulation anyways, so let's
just merge all of those handlers into one function, which just marshalls
the arguments and then hands off to a set of common accessors.
This makes extending the emulation to include EL2 timers much easier.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
[Fixed 32-bit VM breakage and reduced to reworking existing code]
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
[Fixed 32bit host, general cleanup]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/coproc.c           |  23 ++++---
 arch/arm64/include/asm/sysreg.h |   4 ++
 arch/arm64/kvm/sys_regs.c       |  73 ++++++++++------------
 include/kvm/arm_arch_timer.h    |  23 +++++++
 virt/kvm/arm/arch_timer.c       | 130 ++++++++++++++++++++++++++++++++++------
 5 files changed, 187 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 222c1635bc7a..51863364f8d1 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -293,15 +293,16 @@ static bool access_cntp_tval(struct kvm_vcpu *vcpu,
 			     const struct coproc_params *p,
 			     const struct coproc_reg *r)
 {
-	u64 now = kvm_phys_timer_read();
-	u64 val;
+	u32 val;
 
 	if (p->is_write) {
 		val = *vcpu_reg(vcpu, p->Rt1);
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val + now);
+		kvm_arm_timer_write_sysreg(vcpu,
+					   TIMER_PTIMER, TIMER_REG_TVAL, val);
 	} else {
-		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
-		*vcpu_reg(vcpu, p->Rt1) = val - now;
+		val = kvm_arm_timer_read_sysreg(vcpu,
+						TIMER_PTIMER, TIMER_REG_TVAL);
+		*vcpu_reg(vcpu, p->Rt1) = val;
 	}
 
 	return true;
@@ -315,9 +316,11 @@ static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
 
 	if (p->is_write) {
 		val = *vcpu_reg(vcpu, p->Rt1);
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, val);
+		kvm_arm_timer_write_sysreg(vcpu,
+					   TIMER_PTIMER, TIMER_REG_CTL, val);
 	} else {
-		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
+		val = kvm_arm_timer_read_sysreg(vcpu,
+						TIMER_PTIMER, TIMER_REG_CTL);
 		*vcpu_reg(vcpu, p->Rt1) = val;
 	}
 
@@ -333,9 +336,11 @@ static bool access_cntp_cval(struct kvm_vcpu *vcpu,
 	if (p->is_write) {
 		val = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
 		val |= *vcpu_reg(vcpu, p->Rt1);
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, val);
+		kvm_arm_timer_write_sysreg(vcpu,
+					   TIMER_PTIMER, TIMER_REG_CVAL, val);
 	} else {
-		val = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+		val = kvm_arm_timer_read_sysreg(vcpu,
+						TIMER_PTIMER, TIMER_REG_CVAL);
 		*vcpu_reg(vcpu, p->Rt1) = val;
 		*vcpu_reg(vcpu, p->Rt2) = val >> 32;
 	}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 3e5650903d6d..6482e8bcf1b8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -392,6 +392,10 @@
 #define SYS_CNTP_CTL_EL0		sys_reg(3, 3, 14, 2, 1)
 #define SYS_CNTP_CVAL_EL0		sys_reg(3, 3, 14, 2, 2)
 
+#define SYS_AARCH32_CNTP_TVAL		sys_reg(0, 0, 14, 2, 0)
+#define SYS_AARCH32_CNTP_CTL		sys_reg(0, 0, 14, 2, 1)
+#define SYS_AARCH32_CNTP_CVAL		sys_reg(0, 2, 0, 14, 0)
+
 #define __PMEV_op2(n)			((n) & 0x7)
 #define __CNTR_CRm(n)			(0x8 | (((n) >> 3) & 0x3))
 #define SYS_PMEVCNTRn_EL0(n)		sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1a5bea4285e4..0d348500b24b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -990,44 +990,38 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	{ SYS_DESC(SYS_PMEVTYPERn_EL0(n)),					\
 	  access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), }
 
-static bool access_cntp_tval(struct kvm_vcpu *vcpu,
-		struct sys_reg_params *p,
-		const struct sys_reg_desc *r)
+static bool access_arch_timer(struct kvm_vcpu *vcpu,
+			      struct sys_reg_params *p,
+			      const struct sys_reg_desc *r)
 {
-	u64 now = kvm_phys_timer_read();
-	u64 cval;
+	enum kvm_arch_timers tmr;
+	enum kvm_arch_timer_regs treg;
+	u64 reg = reg_to_encoding(r);
 
-	if (p->is_write) {
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL,
-				      p->regval + now);
-	} else {
-		cval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
-		p->regval = cval - now;
+	switch (reg) {
+	case SYS_CNTP_TVAL_EL0:
+	case SYS_AARCH32_CNTP_TVAL:
+		tmr = TIMER_PTIMER;
+		treg = TIMER_REG_TVAL;
+		break;
+	case SYS_CNTP_CTL_EL0:
+	case SYS_AARCH32_CNTP_CTL:
+		tmr = TIMER_PTIMER;
+		treg = TIMER_REG_CTL;
+		break;
+	case SYS_CNTP_CVAL_EL0:
+	case SYS_AARCH32_CNTP_CVAL:
+		tmr = TIMER_PTIMER;
+		treg = TIMER_REG_CVAL;
+		break;
+	default:
+		BUG();
 	}
 
-	return true;
-}
-
-static bool access_cntp_ctl(struct kvm_vcpu *vcpu,
-		struct sys_reg_params *p,
-		const struct sys_reg_desc *r)
-{
-	if (p->is_write)
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CTL, p->regval);
-	else
-		p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CTL);
-
-	return true;
-}
-
-static bool access_cntp_cval(struct kvm_vcpu *vcpu,
-		struct sys_reg_params *p,
-		const struct sys_reg_desc *r)
-{
 	if (p->is_write)
-		kvm_arm_timer_set_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL, p->regval);
+		kvm_arm_timer_write_sysreg(vcpu, tmr, treg, p->regval);
 	else
-		p->regval = kvm_arm_timer_get_reg(vcpu, KVM_REG_ARM_PTIMER_CVAL);
+		p->regval = kvm_arm_timer_read_sysreg(vcpu, tmr, treg);
 
 	return true;
 }
@@ -1392,9 +1386,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
 	{ SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 },
 
-	{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_cntp_tval },
-	{ SYS_DESC(SYS_CNTP_CTL_EL0), access_cntp_ctl },
-	{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_cntp_cval },
+	{ SYS_DESC(SYS_CNTP_TVAL_EL0), access_arch_timer },
+	{ SYS_DESC(SYS_CNTP_CTL_EL0), access_arch_timer },
+	{ SYS_DESC(SYS_CNTP_CVAL_EL0), access_arch_timer },
 
 	/* PMEVCNTRn_EL0 */
 	PMU_PMEVCNTR_EL0(0),
@@ -1715,10 +1709,9 @@ static const struct sys_reg_desc cp15_regs[] = {
 
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
-	/* CNTP_TVAL */
-	{ Op1( 0), CRn(14), CRm( 2), Op2( 0), access_cntp_tval },
-	/* CNTP_CTL */
-	{ Op1( 0), CRn(14), CRm( 2), Op2( 1), access_cntp_ctl },
+	/* Arch Tmers */
+	{ SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer },
+	{ SYS_DESC(SYS_AARCH32_CNTP_CTL), access_arch_timer },
 
 	/* PMEVCNTRn */
 	PMU_PMEVCNTR(0),
@@ -1795,7 +1788,7 @@ static const struct sys_reg_desc cp15_64_regs[] = {
 	{ Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
 	{ Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
 	{ Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
-	{ Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
+	{ SYS_DESC(SYS_AARCH32_CNTP_CVAL),    access_arch_timer },
 };
 
 /* Target specific emulation tables */
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index d6e6a45d1d24..d26b7fde9935 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -22,6 +22,19 @@
 #include <linux/clocksource.h>
 #include <linux/hrtimer.h>
 
+enum kvm_arch_timers {
+	TIMER_PTIMER,
+	TIMER_VTIMER,
+	NR_KVM_TIMERS
+};
+
+enum kvm_arch_timer_regs {
+	TIMER_REG_CNT,
+	TIMER_REG_CVAL,
+	TIMER_REG_TVAL,
+	TIMER_REG_CTL,
+};
+
 struct arch_timer_context {
 	/* Registers: control register, timer value */
 	u32				cnt_ctl;
@@ -87,5 +100,15 @@ bool kvm_arch_timer_get_input_level(int vintid);
 
 #define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.vtimer)
 #define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.ptimer)
+#define vcpu_get_timer(v,t)					\
+	(t == TIMER_VTIMER ? vcpu_vtimer(v) : vcpu_ptimer(v))
+
+u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
+			      enum kvm_arch_timers tmr,
+			      enum kvm_arch_timer_regs treg);
+void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
+				enum kvm_arch_timers tmr,
+				enum kvm_arch_timer_regs treg,
+				u64 val);
 
 #endif
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4986028d9829..f7d377448438 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -25,6 +25,7 @@
 
 #include <clocksource/arm_arch_timer.h>
 #include <asm/arch_timer.h>
+#include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
 
 #include <kvm/arm_vgic.h>
@@ -52,6 +53,13 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 				 struct arch_timer_context *timer_ctx);
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+				struct arch_timer_context *timer,
+				enum kvm_arch_timer_regs treg,
+				u64 val);
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+			      struct arch_timer_context *timer,
+			      enum kvm_arch_timer_regs treg);
 
 u64 kvm_phys_timer_read(void)
 {
@@ -628,24 +636,25 @@ static void kvm_timer_init_interrupt(void *info)
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 {
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-
 	switch (regid) {
 	case KVM_REG_ARM_TIMER_CTL:
-		vtimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+		kvm_arm_timer_write(vcpu,
+				    vcpu_vtimer(vcpu), TIMER_REG_CTL, value);
 		break;
 	case KVM_REG_ARM_TIMER_CNT:
 		update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
 		break;
 	case KVM_REG_ARM_TIMER_CVAL:
-		vtimer->cnt_cval = value;
+		kvm_arm_timer_write(vcpu,
+				    vcpu_vtimer(vcpu), TIMER_REG_CVAL, value);
 		break;
 	case KVM_REG_ARM_PTIMER_CTL:
-		ptimer->cnt_ctl = value & ~ARCH_TIMER_CTRL_IT_STAT;
+		kvm_arm_timer_write(vcpu,
+				    vcpu_ptimer(vcpu), TIMER_REG_CTL, value);
 		break;
 	case KVM_REG_ARM_PTIMER_CVAL:
-		ptimer->cnt_cval = value;
+		kvm_arm_timer_write(vcpu,
+				    vcpu_ptimer(vcpu), TIMER_REG_CVAL, value);
 		break;
 
 	default:
@@ -672,26 +681,113 @@ static u64 read_timer_ctl(struct arch_timer_context *timer)
 
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 {
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-
 	switch (regid) {
 	case KVM_REG_ARM_TIMER_CTL:
-		return read_timer_ctl(vtimer);
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_vtimer(vcpu), TIMER_REG_CTL);
 	case KVM_REG_ARM_TIMER_CNT:
-		return kvm_phys_timer_read() - vtimer->cntvoff;
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
 	case KVM_REG_ARM_TIMER_CVAL:
-		return vtimer->cnt_cval;
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_vtimer(vcpu), TIMER_REG_CVAL);
 	case KVM_REG_ARM_PTIMER_CTL:
-		return read_timer_ctl(ptimer);
-	case KVM_REG_ARM_PTIMER_CVAL:
-		return ptimer->cnt_cval;
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_ptimer(vcpu), TIMER_REG_CTL);
 	case KVM_REG_ARM_PTIMER_CNT:
-		return kvm_phys_timer_read();
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_vtimer(vcpu), TIMER_REG_CNT);
+	case KVM_REG_ARM_PTIMER_CVAL:
+		return kvm_arm_timer_read(vcpu,
+					  vcpu_ptimer(vcpu), TIMER_REG_CVAL);
 	}
 	return (u64)-1;
 }
 
+static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
+			      struct arch_timer_context *timer,
+			      enum kvm_arch_timer_regs treg)
+{
+	u64 val;
+
+	switch (treg) {
+	case TIMER_REG_TVAL:
+		val = kvm_phys_timer_read() - timer->cntvoff - timer->cnt_cval;
+		break;
+
+	case TIMER_REG_CTL:
+		val = read_timer_ctl(timer);
+		break;
+
+	case TIMER_REG_CVAL:
+		val = timer->cnt_cval;
+		break;
+
+	case TIMER_REG_CNT:
+		val = kvm_phys_timer_read() - timer->cntvoff;
+		break;
+
+	default:
+		BUG();
+	}
+
+	return val;
+}
+
+u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
+			      enum kvm_arch_timers tmr,
+			      enum kvm_arch_timer_regs treg)
+{
+	u64 val;
+
+	preempt_disable();
+	kvm_timer_vcpu_put(vcpu);
+
+	val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg);
+
+	kvm_timer_vcpu_load(vcpu);
+	preempt_enable();
+
+	return val;
+}
+
+static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
+				struct arch_timer_context *timer,
+				enum kvm_arch_timer_regs treg,
+				u64 val)
+{
+	switch (treg) {
+	case TIMER_REG_TVAL:
+		timer->cnt_cval = val - kvm_phys_timer_read() - timer->cntvoff;
+		break;
+
+	case TIMER_REG_CTL:
+		timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT;
+		break;
+
+	case TIMER_REG_CVAL:
+		timer->cnt_cval = val;
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
+				enum kvm_arch_timers tmr,
+				enum kvm_arch_timer_regs treg,
+				u64 val)
+{
+	preempt_disable();
+	kvm_timer_vcpu_put(vcpu);
+
+	kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val);
+
+	kvm_timer_vcpu_load(vcpu);
+	preempt_enable();
+}
+
 static int kvm_timer_starting_cpu(unsigned int cpu)
 {
 	kvm_timer_init_interrupt(NULL);
-- 
cgit v1.2.3


From e604dd5d45c75c2112424dec74853efb708f4fa6 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Tue, 18 Sep 2018 10:08:18 -0700
Subject: KVM: arm/arm64: timer: Rework data structures for multiple timers

Prepare for having 4 timer data structures (2 for now).

Move loaded to the cpu data structure and not the individual timer
structure, in preparation for assigning the EL1 phys timer as well.

Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_arch_timer.h | 41 ++++++++++++++++---------------
 virt/kvm/arm/arch_timer.c    | 58 +++++++++++++++++++++++---------------------
 2 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index d26b7fde9935..ab835112204d 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -36,6 +36,8 @@ enum kvm_arch_timer_regs {
 };
 
 struct arch_timer_context {
+	struct kvm_vcpu			*vcpu;
+
 	/* Registers: control register, timer value */
 	u32				cnt_ctl;
 	u64				cnt_cval;
@@ -43,32 +45,31 @@ struct arch_timer_context {
 	/* Timer IRQ */
 	struct kvm_irq_level		irq;
 
-	/*
-	 * We have multiple paths which can save/restore the timer state
-	 * onto the hardware, so we need some way of keeping track of
-	 * where the latest state is.
-	 *
-	 * loaded == true:  State is loaded on the hardware registers.
-	 * loaded == false: State is stored in memory.
-	 */
-	bool			loaded;
-
 	/* Virtual offset */
-	u64			cntvoff;
+	u64				cntvoff;
+
+	/* Emulated Timer (may be unused) */
+	struct hrtimer			hrtimer;
 };
 
 struct arch_timer_cpu {
-	struct arch_timer_context	vtimer;
-	struct arch_timer_context	ptimer;
+	struct arch_timer_context timers[NR_KVM_TIMERS];
 
 	/* Background timer used when the guest is not running */
 	struct hrtimer			bg_timer;
 
-	/* Physical timer emulation */
-	struct hrtimer			phys_timer;
-
 	/* Is the timer enabled */
 	bool			enabled;
+
+	/*
+	 * We have multiple paths which can save/restore the timer state
+	 * onto the hardware, so we need some way of keeping track of
+	 * where the latest state is.
+	 *
+	 * loaded == true:  State is loaded on the hardware registers.
+	 * loaded == false: State is stored in memory.
+	 */
+	bool			loaded;
 };
 
 int kvm_timer_hyp_init(bool);
@@ -98,10 +99,10 @@ void kvm_timer_init_vhe(void);
 
 bool kvm_arch_timer_get_input_level(int vintid);
 
-#define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.vtimer)
-#define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.ptimer)
-#define vcpu_get_timer(v,t)					\
-	(t == TIMER_VTIMER ? vcpu_vtimer(v) : vcpu_ptimer(v))
+#define vcpu_timer(v)	(&(v)->arch.timer_cpu)
+#define vcpu_get_timer(v,t)	(&vcpu_timer(v)->timers[(t)])
+#define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.timers[TIMER_VTIMER])
+#define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.timers[TIMER_PTIMER])
 
 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
 			      enum kvm_arch_timers tmr,
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index f7d377448438..471f9fd004c9 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -184,13 +184,11 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
 static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
 {
 	struct arch_timer_context *ptimer;
-	struct arch_timer_cpu *timer;
 	struct kvm_vcpu *vcpu;
 	u64 ns;
 
-	timer = container_of(hrt, struct arch_timer_cpu, phys_timer);
-	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
-	ptimer = vcpu_ptimer(vcpu);
+	ptimer = container_of(hrt, struct arch_timer_context, hrtimer);
+	vcpu = ptimer->vcpu;
 
 	/*
 	 * Check that the timer has really expired from the guest's
@@ -209,9 +207,10 @@ static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
 
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
+	struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu);
 	u64 cval, now;
 
-	if (timer_ctx->loaded) {
+	if (timer->loaded) {
 		u32 cnt_ctl;
 
 		/* Only the virtual timer can be loaded so far */
@@ -280,7 +279,6 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 /* Schedule the background timer for the emulated timer. */
 static void phys_timer_emulate(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 	/*
@@ -289,11 +287,11 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
 	 * then we also don't need a soft timer.
 	 */
 	if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
-		soft_timer_cancel(&timer->phys_timer);
+		soft_timer_cancel(&ptimer->hrtimer);
 		return;
 	}
 
-	soft_timer_start(&timer->phys_timer, kvm_timer_compute_delta(ptimer));
+	soft_timer_start(&ptimer->hrtimer, kvm_timer_compute_delta(ptimer));
 }
 
 /*
@@ -303,7 +301,7 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
  */
 static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 	bool level;
@@ -329,13 +327,13 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	if (!vtimer->loaded)
+	if (!timer->loaded)
 		goto out;
 
 	if (timer->enabled) {
@@ -347,7 +345,7 @@ static void vtimer_save_state(struct kvm_vcpu *vcpu)
 	write_sysreg_el0(0, cntv_ctl);
 	isb();
 
-	vtimer->loaded = false;
+	timer->loaded = false;
 out:
 	local_irq_restore(flags);
 }
@@ -359,7 +357,7 @@ out:
  */
 static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -379,20 +377,20 @@ static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 
 static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 
 	soft_timer_cancel(&timer->bg_timer);
 }
 
 static void vtimer_restore_state(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	if (vtimer->loaded)
+	if (timer->loaded)
 		goto out;
 
 	if (timer->enabled) {
@@ -401,7 +399,7 @@ static void vtimer_restore_state(struct kvm_vcpu *vcpu)
 		write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
 	}
 
-	vtimer->loaded = true;
+	timer->loaded = true;
 out:
 	local_irq_restore(flags);
 }
@@ -462,7 +460,7 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -507,7 +505,8 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 	if (unlikely(!timer->enabled))
 		return;
@@ -523,7 +522,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	 * In any case, we re-schedule the hrtimer for the physical timer when
 	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
 	 */
-	soft_timer_cancel(&timer->phys_timer);
+	soft_timer_cancel(&ptimer->hrtimer);
 
 	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
 		kvm_timer_blocking(vcpu);
@@ -559,7 +558,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 
 	if (unlikely(!timer->enabled))
 		return;
@@ -570,7 +569,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
@@ -611,22 +610,25 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
 
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 	/* Synchronize cntvoff across all vtimers of a VM. */
 	update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
-	vcpu_ptimer(vcpu)->cntvoff = 0;
+	ptimer->cntvoff = 0;
 
 	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	timer->bg_timer.function = kvm_bg_timer_expire;
 
-	hrtimer_init(&timer->phys_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	timer->phys_timer.function = kvm_phys_timer_expire;
+	hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	ptimer->hrtimer.function = kvm_phys_timer_expire;
 
 	vtimer->irq.irq = default_vtimer_irq.irq;
 	ptimer->irq.irq = default_ptimer_irq.irq;
+
+	vtimer->vcpu = vcpu;
+	ptimer->vcpu = vcpu;
 }
 
 static void kvm_timer_init_interrupt(void *info)
@@ -860,7 +862,7 @@ out_free_irq:
 
 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 
 	soft_timer_cancel(&timer->bg_timer);
 }
@@ -904,7 +906,7 @@ bool kvm_arch_timer_get_input_level(int vintid)
 
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	int ret;
 
-- 
cgit v1.2.3


From 9e01dc76be6a3b5768cb02130d2ff0055a68809a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Tue, 19 Feb 2019 14:04:30 +0100
Subject: KVM: arm/arm64: arch_timer: Assign the phys timer on VHE systems

VHE systems don't have to emulate the physical timer, we can simply
assign the EL1 physical timer directly to the VM as the host always
uses the EL2 timers.

In order to minimize the amount of cruft, AArch32 gets definitions for
the physical timer too, but is should be generally unused on this
architecture.

Co-written with Marc Zyngier <marc.zyngier@arm.com>

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 arch/arm/include/asm/kvm_hyp.h |   4 +
 include/kvm/arm_arch_timer.h   |   6 ++
 virt/kvm/arm/arch_timer.c      | 219 ++++++++++++++++++++++++++++++++---------
 3 files changed, 180 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h
index e93a0cac9add..87bcd18df8d5 100644
--- a/arch/arm/include/asm/kvm_hyp.h
+++ b/arch/arm/include/asm/kvm_hyp.h
@@ -40,6 +40,7 @@
 #define TTBR1		__ACCESS_CP15_64(1, c2)
 #define VTTBR		__ACCESS_CP15_64(6, c2)
 #define PAR		__ACCESS_CP15_64(0, c7)
+#define CNTP_CVAL	__ACCESS_CP15_64(2, c14)
 #define CNTV_CVAL	__ACCESS_CP15_64(3, c14)
 #define CNTVOFF		__ACCESS_CP15_64(4, c14)
 
@@ -85,6 +86,7 @@
 #define TID_PRIV	__ACCESS_CP15(c13, 0, c0, 4)
 #define HTPIDR		__ACCESS_CP15(c13, 4, c0, 2)
 #define CNTKCTL		__ACCESS_CP15(c14, 0, c1, 0)
+#define CNTP_CTL	__ACCESS_CP15(c14, 0, c2, 1)
 #define CNTV_CTL	__ACCESS_CP15(c14, 0, c3, 1)
 #define CNTHCTL		__ACCESS_CP15(c14, 4, c1, 0)
 
@@ -94,6 +96,8 @@
 #define read_sysreg_el0(r)		read_sysreg(r##_el0)
 #define write_sysreg_el0(v, r)		write_sysreg(v, r##_el0)
 
+#define cntp_ctl_el0			CNTP_CTL
+#define cntp_cval_el0			CNTP_CVAL
 #define cntv_ctl_el0			CNTV_CTL
 #define cntv_cval_el0			CNTV_CVAL
 #define cntvoff_el2			CNTVOFF
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ab835112204d..6d4a33a9c45a 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -50,6 +50,10 @@ struct arch_timer_context {
 
 	/* Emulated Timer (may be unused) */
 	struct hrtimer			hrtimer;
+
+	/* Duplicated state from arch_timer.c for convenience */
+	u32				host_timer_irq;
+	u32				host_timer_irq_flags;
 };
 
 struct arch_timer_cpu {
@@ -104,6 +108,8 @@ bool kvm_arch_timer_get_input_level(int vintid);
 #define vcpu_vtimer(v)	(&(v)->arch.timer_cpu.timers[TIMER_VTIMER])
 #define vcpu_ptimer(v)	(&(v)->arch.timer_cpu.timers[TIMER_PTIMER])
 
+#define arch_timer_ctx_index(ctx)	((ctx) - vcpu_timer((ctx)->vcpu)->timers)
+
 u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
 			      enum kvm_arch_timers tmr,
 			      enum kvm_arch_timer_regs treg);
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 471f9fd004c9..10c15151c87e 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -35,7 +35,9 @@
 
 static struct timecounter *timecounter;
 static unsigned int host_vtimer_irq;
+static unsigned int host_ptimer_irq;
 static u32 host_vtimer_irq_flags;
+static u32 host_ptimer_irq_flags;
 
 static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
 
@@ -86,20 +88,24 @@ static void soft_timer_cancel(struct hrtimer *hrt)
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
-	struct arch_timer_context *vtimer;
+	struct arch_timer_context *ctx;
 
 	/*
 	 * We may see a timer interrupt after vcpu_put() has been called which
 	 * sets the CPU's vcpu pointer to NULL, because even though the timer
-	 * has been disabled in vtimer_save_state(), the hardware interrupt
+	 * has been disabled in timer_save_state(), the hardware interrupt
 	 * signal may not have been retired from the interrupt controller yet.
 	 */
 	if (!vcpu)
 		return IRQ_HANDLED;
 
-	vtimer = vcpu_vtimer(vcpu);
-	if (kvm_timer_should_fire(vtimer))
-		kvm_timer_update_irq(vcpu, true, vtimer);
+	if (irq == host_vtimer_irq)
+		ctx = vcpu_vtimer(vcpu);
+	else
+		ctx = vcpu_ptimer(vcpu);
+
+	if (kvm_timer_should_fire(ctx))
+		kvm_timer_update_irq(vcpu, true, ctx);
 
 	if (userspace_irqchip(vcpu->kvm) &&
 	    !static_branch_unlikely(&has_gic_active_state))
@@ -208,13 +214,25 @@ static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu);
+	enum kvm_arch_timers index = arch_timer_ctx_index(timer_ctx);
 	u64 cval, now;
 
 	if (timer->loaded) {
-		u32 cnt_ctl;
+		u32 cnt_ctl = 0;
+
+		switch (index) {
+		case TIMER_VTIMER:
+			cnt_ctl = read_sysreg_el0(cntv_ctl);
+			break;
+		case TIMER_PTIMER:
+			cnt_ctl = read_sysreg_el0(cntp_ctl);
+			break;
+		case NR_KVM_TIMERS:
+			/* GCC is braindead */
+			cnt_ctl = 0;
+			break;
+		}
 
-		/* Only the virtual timer can be loaded so far */
-		cnt_ctl = read_sysreg_el0(cntv_ctl);
 		return  (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
 		        (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
 		       !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
@@ -310,7 +328,7 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 		return;
 
 	/*
-	 * The vtimer virtual interrupt is a 'mapped' interrupt, meaning part
+	 * If the timer virtual interrupt is a 'mapped' interrupt, part
 	 * of its lifecycle is offloaded to the hardware, and we therefore may
 	 * not have lowered the irq.level value before having to signal a new
 	 * interrupt, but have to signal an interrupt every time the level is
@@ -319,31 +337,55 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
 	level = kvm_timer_should_fire(vtimer);
 	kvm_timer_update_irq(vcpu, level, vtimer);
 
+	if (has_vhe()) {
+		level = kvm_timer_should_fire(ptimer);
+		kvm_timer_update_irq(vcpu, level, ptimer);
+
+		return;
+	}
+
 	phys_timer_emulate(vcpu);
 
 	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
 		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 }
 
-static void vtimer_save_state(struct kvm_vcpu *vcpu)
+static void timer_save_state(struct arch_timer_context *ctx)
 {
-	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
 	unsigned long flags;
 
+	if (!timer->enabled)
+		return;
+
 	local_irq_save(flags);
 
 	if (!timer->loaded)
 		goto out;
 
-	if (timer->enabled) {
-		vtimer->cnt_ctl = read_sysreg_el0(cntv_ctl);
-		vtimer->cnt_cval = read_sysreg_el0(cntv_cval);
-	}
+	switch (index) {
+	case TIMER_VTIMER:
+		ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
+		ctx->cnt_cval = read_sysreg_el0(cntv_cval);
 
-	/* Disable the virtual timer */
-	write_sysreg_el0(0, cntv_ctl);
-	isb();
+		/* Disable the timer */
+		write_sysreg_el0(0, cntv_ctl);
+		isb();
+
+		break;
+	case TIMER_PTIMER:
+		ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
+		ctx->cnt_cval = read_sysreg_el0(cntp_cval);
+
+		/* Disable the timer */
+		write_sysreg_el0(0, cntp_ctl);
+		isb();
+
+		break;
+	case NR_KVM_TIMERS:
+		break; /* GCC is braindead */
+	}
 
 	timer->loaded = false;
 out:
@@ -382,21 +424,33 @@ static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
 	soft_timer_cancel(&timer->bg_timer);
 }
 
-static void vtimer_restore_state(struct kvm_vcpu *vcpu)
+static void timer_restore_state(struct arch_timer_context *ctx)
 {
-	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu);
+	enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
 	unsigned long flags;
 
+	if (!timer->enabled)
+		return;
+
 	local_irq_save(flags);
 
 	if (timer->loaded)
 		goto out;
 
-	if (timer->enabled) {
-		write_sysreg_el0(vtimer->cnt_cval, cntv_cval);
+	switch (index) {
+	case TIMER_VTIMER:
+		write_sysreg_el0(ctx->cnt_cval, cntv_cval);
+		isb();
+		write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
+		break;
+	case TIMER_PTIMER:
+		write_sysreg_el0(ctx->cnt_cval, cntp_cval);
 		isb();
-		write_sysreg_el0(vtimer->cnt_ctl, cntv_ctl);
+		write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
+		break;
+	case NR_KVM_TIMERS:
+		break; /* GCC is braindead */
 	}
 
 	timer->loaded = true;
@@ -419,23 +473,23 @@ static void set_cntvoff(u64 cntvoff)
 	kvm_call_hyp(__kvm_timer_set_cntvoff, low, high);
 }
 
-static inline void set_vtimer_irq_phys_active(struct kvm_vcpu *vcpu, bool active)
+static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
 {
 	int r;
-	r = irq_set_irqchip_state(host_vtimer_irq, IRQCHIP_STATE_ACTIVE, active);
+	r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
 	WARN_ON(r);
 }
 
-static void kvm_timer_vcpu_load_gic(struct kvm_vcpu *vcpu)
+static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
 {
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct kvm_vcpu *vcpu = ctx->vcpu;
 	bool phys_active;
 
 	if (irqchip_in_kernel(vcpu->kvm))
-		phys_active = kvm_vgic_map_is_active(vcpu, vtimer->irq.irq);
+		phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq);
 	else
-		phys_active = vtimer->irq.level;
-	set_vtimer_irq_phys_active(vcpu, phys_active);
+		phys_active = ctx->irq.level;
+	set_timer_irq_phys_active(ctx, phys_active);
 }
 
 static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
@@ -467,14 +521,22 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 	if (unlikely(!timer->enabled))
 		return;
 
-	if (static_branch_likely(&has_gic_active_state))
-		kvm_timer_vcpu_load_gic(vcpu);
-	else
+	if (static_branch_likely(&has_gic_active_state)) {
+		kvm_timer_vcpu_load_gic(vtimer);
+		if (has_vhe())
+			kvm_timer_vcpu_load_gic(ptimer);
+	} else {
 		kvm_timer_vcpu_load_nogic(vcpu);
+	}
 
 	set_cntvoff(vtimer->cntvoff);
 
-	vtimer_restore_state(vcpu);
+	timer_restore_state(vtimer);
+
+	if (has_vhe()) {
+		timer_restore_state(ptimer);
+		return;
+	}
 
 	/* Set the background timer for the physical timer emulation. */
 	phys_timer_emulate(vcpu);
@@ -506,12 +568,17 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
+	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 	if (unlikely(!timer->enabled))
 		return;
 
-	vtimer_save_state(vcpu);
+	timer_save_state(vtimer);
+	if (has_vhe()) {
+		timer_save_state(ptimer);
+		return;
+	}
 
 	/*
 	 * Cancel the physical timer emulation, because the only case where we
@@ -534,8 +601,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	 * counter of non-VHE case. For VHE, the virtual counter uses a fixed
 	 * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
 	 */
-	if (!has_vhe())
-		set_cntvoff(0);
+	set_cntvoff(0);
 }
 
 /*
@@ -550,7 +616,7 @@ static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
 	if (!kvm_timer_should_fire(vtimer)) {
 		kvm_timer_update_irq(vcpu, false, vtimer);
 		if (static_branch_likely(&has_gic_active_state))
-			set_vtimer_irq_phys_active(vcpu, false);
+			set_timer_irq_phys_active(vtimer, false);
 		else
 			enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 	}
@@ -625,7 +691,12 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	ptimer->hrtimer.function = kvm_phys_timer_expire;
 
 	vtimer->irq.irq = default_vtimer_irq.irq;
+	vtimer->host_timer_irq = host_vtimer_irq;
+	vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
+
 	ptimer->irq.irq = default_ptimer_irq.irq;
+	ptimer->host_timer_irq = host_ptimer_irq;
+	ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
 
 	vtimer->vcpu = vcpu;
 	ptimer->vcpu = vcpu;
@@ -634,6 +705,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 static void kvm_timer_init_interrupt(void *info)
 {
 	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
+	enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -815,6 +887,8 @@ int kvm_timer_hyp_init(bool has_gic)
 		return -ENODEV;
 	}
 
+	/* First, do the virtual EL1 timer irq */
+
 	if (info->virtual_irq <= 0) {
 		kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
 			info->virtual_irq);
@@ -825,15 +899,15 @@ int kvm_timer_hyp_init(bool has_gic)
 	host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
 	if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
 	    host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
-		kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+		kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n",
 			host_vtimer_irq);
 		host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
 	}
 
 	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
-				 "kvm guest timer", kvm_get_running_vcpus());
+				 "kvm guest vtimer", kvm_get_running_vcpus());
 	if (err) {
-		kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
+		kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
 			host_vtimer_irq, err);
 		return err;
 	}
@@ -851,6 +925,43 @@ int kvm_timer_hyp_init(bool has_gic)
 
 	kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
 
+	/* Now let's do the physical EL1 timer irq */
+
+	if (info->physical_irq > 0) {
+		host_ptimer_irq = info->physical_irq;
+		host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq);
+		if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH &&
+		    host_ptimer_irq_flags != IRQF_TRIGGER_LOW) {
+			kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n",
+				host_ptimer_irq);
+			host_ptimer_irq_flags = IRQF_TRIGGER_LOW;
+		}
+
+		err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
+					 "kvm guest ptimer", kvm_get_running_vcpus());
+		if (err) {
+			kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
+				host_ptimer_irq, err);
+			return err;
+		}
+
+		if (has_gic) {
+			err = irq_set_vcpu_affinity(host_ptimer_irq,
+						    kvm_get_running_vcpus());
+			if (err) {
+				kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
+				goto out_free_irq;
+			}
+		}
+
+		kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
+	} else if (has_vhe()) {
+		kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
+			info->physical_irq);
+		err = -ENODEV;
+		goto out_free_irq;
+	}
+
 	cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
 			  "kvm/arm/timer:starting", kvm_timer_starting_cpu,
 			  kvm_timer_dying_cpu);
@@ -898,8 +1009,10 @@ bool kvm_arch_timer_get_input_level(int vintid)
 
 	if (vintid == vcpu_vtimer(vcpu)->irq.irq)
 		timer = vcpu_vtimer(vcpu);
+	else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
+		timer = vcpu_ptimer(vcpu);
 	else
-		BUG(); /* We only map the vtimer so far */
+		BUG();
 
 	return kvm_timer_should_fire(timer);
 }
@@ -908,6 +1021,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 	int ret;
 
 	if (timer->enabled)
@@ -930,14 +1044,21 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 	if (ret)
 		return ret;
 
+	if (has_vhe()) {
+		ret = kvm_vgic_map_phys_irq(vcpu, host_ptimer_irq, ptimer->irq.irq,
+					    kvm_arch_timer_get_input_level);
+		if (ret)
+			return ret;
+	}
+
 no_vgic:
 	timer->enabled = 1;
 	return 0;
 }
 
 /*
- * On VHE system, we only need to configure trap on physical timer and counter
- * accesses in EL0 and EL1 once, not for every world switch.
+ * On VHE system, we only need to configure the EL2 timer trap register once,
+ * not for every world switch.
  * The host kernel runs at EL2 with HCR_EL2.TGE == 1,
  * and this makes those bits have no effect for the host kernel execution.
  */
@@ -948,11 +1069,11 @@ void kvm_timer_init_vhe(void)
 	u64 val;
 
 	/*
-	 * Disallow physical timer access for the guest.
-	 * Physical counter access is allowed.
+	 * VHE systems allow the guest direct access to the EL1 physical
+	 * timer/counter.
 	 */
 	val = read_sysreg(cnthctl_el2);
-	val &= ~(CNTHCTL_EL1PCEN << cnthctl_shift);
+	val |= (CNTHCTL_EL1PCEN << cnthctl_shift);
 	val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
 	write_sysreg(val, cnthctl_el2);
 }
-- 
cgit v1.2.3


From bee038a67487598ebbe995f85bf60c3a5b2e9099 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@arm.com>
Date: Fri, 4 Jan 2019 13:31:22 +0100
Subject: KVM: arm/arm64: Rework the timer code to use a timer_map

We are currently emulating two timers in two different ways.  When we
add support for nested virtualization in the future, we are going to be
emulating either two timers in two diffferent ways, or four timers in a
single way.

We need a unified data structure to keep track of how we map virtual
state to physical state and we need to cleanup some of the timer code to
operate more independently on a struct arch_timer_context instead of
trying to consider the global state of the VCPU and recomputing all
state.

Co-written with Marc Zyngier <marc.zyngier@arm.com>

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 include/kvm/arm_arch_timer.h |  23 ++--
 virt/kvm/arm/arch_timer.c    | 295 +++++++++++++++++++++++--------------------
 virt/kvm/arm/trace.h         | 105 +++++++++++++++
 3 files changed, 278 insertions(+), 145 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 6d4a33a9c45a..05a18dd265b5 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -51,11 +51,24 @@ struct arch_timer_context {
 	/* Emulated Timer (may be unused) */
 	struct hrtimer			hrtimer;
 
+	/*
+	 * We have multiple paths which can save/restore the timer state onto
+	 * the hardware, so we need some way of keeping track of where the
+	 * latest state is.
+	 */
+	bool				loaded;
+
 	/* Duplicated state from arch_timer.c for convenience */
 	u32				host_timer_irq;
 	u32				host_timer_irq_flags;
 };
 
+struct timer_map {
+	struct arch_timer_context *direct_vtimer;
+	struct arch_timer_context *direct_ptimer;
+	struct arch_timer_context *emul_ptimer;
+};
+
 struct arch_timer_cpu {
 	struct arch_timer_context timers[NR_KVM_TIMERS];
 
@@ -64,16 +77,6 @@ struct arch_timer_cpu {
 
 	/* Is the timer enabled */
 	bool			enabled;
-
-	/*
-	 * We have multiple paths which can save/restore the timer state
-	 * onto the hardware, so we need some way of keeping track of
-	 * where the latest state is.
-	 *
-	 * loaded == true:  State is loaded on the hardware registers.
-	 * loaded == false: State is stored in memory.
-	 */
-	bool			loaded;
 };
 
 int kvm_timer_hyp_init(bool);
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 10c15151c87e..17f9de73cc8a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -68,6 +68,21 @@ u64 kvm_phys_timer_read(void)
 	return timecounter->cc->read(timecounter->cc);
 }
 
+static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
+{
+	if (has_vhe()) {
+		map->direct_vtimer = vcpu_vtimer(vcpu);
+		map->direct_ptimer = vcpu_ptimer(vcpu);
+		map->emul_ptimer = NULL;
+	} else {
+		map->direct_vtimer = vcpu_vtimer(vcpu);
+		map->direct_ptimer = NULL;
+		map->emul_ptimer = vcpu_ptimer(vcpu);
+	}
+
+	trace_kvm_get_timer_map(vcpu->vcpu_id, map);
+}
+
 static inline bool userspace_irqchip(struct kvm *kvm)
 {
 	return static_branch_unlikely(&userspace_irqchip_in_use) &&
@@ -89,6 +104,7 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
 	struct arch_timer_context *ctx;
+	struct timer_map map;
 
 	/*
 	 * We may see a timer interrupt after vcpu_put() has been called which
@@ -99,10 +115,12 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 	if (!vcpu)
 		return IRQ_HANDLED;
 
+	get_timer_map(vcpu, &map);
+
 	if (irq == host_vtimer_irq)
-		ctx = vcpu_vtimer(vcpu);
+		ctx = map.direct_vtimer;
 	else
-		ctx = vcpu_ptimer(vcpu);
+		ctx = map.direct_ptimer;
 
 	if (kvm_timer_should_fire(ctx))
 		kvm_timer_update_irq(vcpu, true, ctx);
@@ -136,7 +154,9 @@ static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
 
 static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
 {
-	return !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+	WARN_ON(timer_ctx && timer_ctx->loaded);
+	return timer_ctx &&
+	       !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
 		(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE);
 }
 
@@ -146,21 +166,22 @@ static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
  */
 static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
 {
-	u64 min_virt = ULLONG_MAX, min_phys = ULLONG_MAX;
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	u64 min_delta = ULLONG_MAX;
+	int i;
 
-	if (kvm_timer_irq_can_fire(vtimer))
-		min_virt = kvm_timer_compute_delta(vtimer);
+	for (i = 0; i < NR_KVM_TIMERS; i++) {
+		struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
 
-	if (kvm_timer_irq_can_fire(ptimer))
-		min_phys = kvm_timer_compute_delta(ptimer);
+		WARN(ctx->loaded, "timer %d loaded\n", i);
+		if (kvm_timer_irq_can_fire(ctx))
+			min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
+	}
 
 	/* If none of timers can fire, then return 0 */
-	if ((min_virt == ULLONG_MAX) && (min_phys == ULLONG_MAX))
+	if (min_delta == ULLONG_MAX)
 		return 0;
 
-	return min(min_virt, min_phys);
+	return min_delta;
 }
 
 static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
@@ -187,37 +208,45 @@ static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
 	return HRTIMER_NORESTART;
 }
 
-static enum hrtimer_restart kvm_phys_timer_expire(struct hrtimer *hrt)
+static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
 {
-	struct arch_timer_context *ptimer;
+	struct arch_timer_context *ctx;
 	struct kvm_vcpu *vcpu;
 	u64 ns;
 
-	ptimer = container_of(hrt, struct arch_timer_context, hrtimer);
-	vcpu = ptimer->vcpu;
+	ctx = container_of(hrt, struct arch_timer_context, hrtimer);
+	vcpu = ctx->vcpu;
+
+	trace_kvm_timer_hrtimer_expire(ctx);
 
 	/*
 	 * Check that the timer has really expired from the guest's
 	 * PoV (NTP on the host may have forced it to expire
 	 * early). If not ready, schedule for a later time.
 	 */
-	ns = kvm_timer_compute_delta(ptimer);
+	ns = kvm_timer_compute_delta(ctx);
 	if (unlikely(ns)) {
 		hrtimer_forward_now(hrt, ns_to_ktime(ns));
 		return HRTIMER_RESTART;
 	}
 
-	kvm_timer_update_irq(vcpu, true, ptimer);
+	kvm_timer_update_irq(vcpu, true, ctx);
 	return HRTIMER_NORESTART;
 }
 
 static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 {
-	struct arch_timer_cpu *timer = vcpu_timer(timer_ctx->vcpu);
-	enum kvm_arch_timers index = arch_timer_ctx_index(timer_ctx);
+	struct arch_timer_cpu *timer;
+	enum kvm_arch_timers index;
 	u64 cval, now;
 
-	if (timer->loaded) {
+	if (!timer_ctx)
+		return false;
+
+	timer = vcpu_timer(timer_ctx->vcpu);
+	index = arch_timer_ctx_index(timer_ctx);
+
+	if (timer_ctx->loaded) {
 		u32 cnt_ctl = 0;
 
 		switch (index) {
@@ -249,13 +278,13 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
 
 bool kvm_timer_is_pending(struct kvm_vcpu *vcpu)
 {
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
 
-	if (kvm_timer_should_fire(vtimer))
-		return true;
+	get_timer_map(vcpu, &map);
 
-	return kvm_timer_should_fire(ptimer);
+	return kvm_timer_should_fire(map.direct_vtimer) ||
+	       kvm_timer_should_fire(map.direct_ptimer) ||
+	       kvm_timer_should_fire(map.emul_ptimer);
 }
 
 /*
@@ -294,60 +323,28 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
 	}
 }
 
-/* Schedule the background timer for the emulated timer. */
-static void phys_timer_emulate(struct kvm_vcpu *vcpu)
+static void timer_emulate(struct arch_timer_context *ctx)
 {
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	bool should_fire = kvm_timer_should_fire(ctx);
 
-	/*
-	 * If the timer can fire now, we don't need to have a soft timer
-	 * scheduled for the future.  If the timer cannot fire at all,
-	 * then we also don't need a soft timer.
-	 */
-	if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
-		soft_timer_cancel(&ptimer->hrtimer);
-		return;
-	}
-
-	soft_timer_start(&ptimer->hrtimer, kvm_timer_compute_delta(ptimer));
-}
-
-/*
- * Check if there was a change in the timer state, so that we should either
- * raise or lower the line level to the GIC or schedule a background timer to
- * emulate the physical timer.
- */
-static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
-{
-	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
-	bool level;
+	trace_kvm_timer_emulate(ctx, should_fire);
 
-	if (unlikely(!timer->enabled))
+	if (should_fire) {
+		kvm_timer_update_irq(ctx->vcpu, true, ctx);
 		return;
+	}
 
 	/*
-	 * If the timer virtual interrupt is a 'mapped' interrupt, part
-	 * of its lifecycle is offloaded to the hardware, and we therefore may
-	 * not have lowered the irq.level value before having to signal a new
-	 * interrupt, but have to signal an interrupt every time the level is
-	 * asserted.
+	 * If the timer can fire now, we don't need to have a soft timer
+	 * scheduled for the future.  If the timer cannot fire at all,
+	 * then we also don't need a soft timer.
 	 */
-	level = kvm_timer_should_fire(vtimer);
-	kvm_timer_update_irq(vcpu, level, vtimer);
-
-	if (has_vhe()) {
-		level = kvm_timer_should_fire(ptimer);
-		kvm_timer_update_irq(vcpu, level, ptimer);
-
+	if (!kvm_timer_irq_can_fire(ctx)) {
+		soft_timer_cancel(&ctx->hrtimer);
 		return;
 	}
 
-	phys_timer_emulate(vcpu);
-
-	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
-		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
+	soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
 }
 
 static void timer_save_state(struct arch_timer_context *ctx)
@@ -361,7 +358,7 @@ static void timer_save_state(struct arch_timer_context *ctx)
 
 	local_irq_save(flags);
 
-	if (!timer->loaded)
+	if (!ctx->loaded)
 		goto out;
 
 	switch (index) {
@@ -384,10 +381,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
 
 		break;
 	case NR_KVM_TIMERS:
-		break; /* GCC is braindead */
+		BUG();
 	}
 
-	timer->loaded = false;
+	trace_kvm_timer_save_state(ctx);
+
+	ctx->loaded = false;
 out:
 	local_irq_restore(flags);
 }
@@ -400,14 +399,17 @@ out:
 static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
+
+	get_timer_map(vcpu, &map);
 
 	/*
-	 * If both timers are not capable of raising interrupts (disabled or
+	 * If no timers are capable of raising interrupts (disabled or
 	 * masked), then there's no more work for us to do.
 	 */
-	if (!kvm_timer_irq_can_fire(vtimer) && !kvm_timer_irq_can_fire(ptimer))
+	if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
+	    !kvm_timer_irq_can_fire(map.direct_ptimer) &&
+	    !kvm_timer_irq_can_fire(map.emul_ptimer))
 		return;
 
 	/*
@@ -435,7 +437,7 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 
 	local_irq_save(flags);
 
-	if (timer->loaded)
+	if (ctx->loaded)
 		goto out;
 
 	switch (index) {
@@ -450,10 +452,12 @@ static void timer_restore_state(struct arch_timer_context *ctx)
 		write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
 		break;
 	case NR_KVM_TIMERS:
-		break; /* GCC is braindead */
+		BUG();
 	}
 
-	timer->loaded = true;
+	trace_kvm_timer_restore_state(ctx);
+
+	ctx->loaded = true;
 out:
 	local_irq_restore(flags);
 }
@@ -515,37 +519,31 @@ static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
 
 	if (unlikely(!timer->enabled))
 		return;
 
+	get_timer_map(vcpu, &map);
+
 	if (static_branch_likely(&has_gic_active_state)) {
-		kvm_timer_vcpu_load_gic(vtimer);
-		if (has_vhe())
-			kvm_timer_vcpu_load_gic(ptimer);
+		kvm_timer_vcpu_load_gic(map.direct_vtimer);
+		if (map.direct_ptimer)
+			kvm_timer_vcpu_load_gic(map.direct_ptimer);
 	} else {
 		kvm_timer_vcpu_load_nogic(vcpu);
 	}
 
-	set_cntvoff(vtimer->cntvoff);
-
-	timer_restore_state(vtimer);
-
-	if (has_vhe()) {
-		timer_restore_state(ptimer);
-		return;
-	}
-
-	/* Set the background timer for the physical timer emulation. */
-	phys_timer_emulate(vcpu);
+	set_cntvoff(map.direct_vtimer->cntvoff);
 
 	kvm_timer_unblocking(vcpu);
 
-	/* If the timer fired while we weren't running, inject it now */
-	if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
-		kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
+	timer_restore_state(map.direct_vtimer);
+	if (map.direct_ptimer)
+		timer_restore_state(map.direct_ptimer);
+
+	if (map.emul_ptimer)
+		timer_emulate(map.emul_ptimer);
 }
 
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
@@ -568,20 +566,19 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
 
 	if (unlikely(!timer->enabled))
 		return;
 
-	timer_save_state(vtimer);
-	if (has_vhe()) {
-		timer_save_state(ptimer);
-		return;
-	}
+	get_timer_map(vcpu, &map);
+
+	timer_save_state(map.direct_vtimer);
+	if (map.direct_ptimer)
+		timer_save_state(map.direct_ptimer);
 
 	/*
-	 * Cancel the physical timer emulation, because the only case where we
+	 * Cancel soft timer emulation, because the only case where we
 	 * need it after a vcpu_put is in the context of a sleeping VCPU, and
 	 * in that case we already factor in the deadline for the physical
 	 * timer when scheduling the bg_timer.
@@ -589,7 +586,8 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	 * In any case, we re-schedule the hrtimer for the physical timer when
 	 * coming back to the VCPU thread in kvm_timer_vcpu_load().
 	 */
-	soft_timer_cancel(&ptimer->hrtimer);
+	if (map.emul_ptimer)
+		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
 	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
 		kvm_timer_blocking(vcpu);
@@ -636,8 +634,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
+
+	get_timer_map(vcpu, &map);
 
 	/*
 	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -645,12 +644,22 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 	 * resets the timer to be disabled and unmasked and is compliant with
 	 * the ARMv7 architecture.
 	 */
-	vtimer->cnt_ctl = 0;
-	ptimer->cnt_ctl = 0;
-	kvm_timer_update_state(vcpu);
+	vcpu_vtimer(vcpu)->cnt_ctl = 0;
+	vcpu_ptimer(vcpu)->cnt_ctl = 0;
+
+	if (timer->enabled) {
+		kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu));
+		kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu));
+
+		if (irqchip_in_kernel(vcpu->kvm)) {
+			kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq);
+			if (map.direct_ptimer)
+				kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq);
+		}
+	}
 
-	if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
-		kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+	if (map.emul_ptimer)
+		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
 	return 0;
 }
@@ -687,15 +696,18 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	timer->bg_timer.function = kvm_bg_timer_expire;
 
+	hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	ptimer->hrtimer.function = kvm_phys_timer_expire;
+	vtimer->hrtimer.function = kvm_hrtimer_expire;
+	ptimer->hrtimer.function = kvm_hrtimer_expire;
 
 	vtimer->irq.irq = default_vtimer_irq.irq;
-	vtimer->host_timer_irq = host_vtimer_irq;
-	vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
-
 	ptimer->irq.irq = default_ptimer_irq.irq;
+
+	vtimer->host_timer_irq = host_vtimer_irq;
 	ptimer->host_timer_irq = host_ptimer_irq;
+
+	vtimer->host_timer_irq_flags = host_vtimer_irq_flags;
 	ptimer->host_timer_irq_flags = host_ptimer_irq_flags;
 
 	vtimer->vcpu = vcpu;
@@ -710,32 +722,39 @@ static void kvm_timer_init_interrupt(void *info)
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 {
+	struct arch_timer_context *timer;
+	bool level;
+
 	switch (regid) {
 	case KVM_REG_ARM_TIMER_CTL:
-		kvm_arm_timer_write(vcpu,
-				    vcpu_vtimer(vcpu), TIMER_REG_CTL, value);
+		timer = vcpu_vtimer(vcpu);
+		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
 		break;
 	case KVM_REG_ARM_TIMER_CNT:
+		timer = vcpu_vtimer(vcpu);
 		update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
 		break;
 	case KVM_REG_ARM_TIMER_CVAL:
-		kvm_arm_timer_write(vcpu,
-				    vcpu_vtimer(vcpu), TIMER_REG_CVAL, value);
+		timer = vcpu_vtimer(vcpu);
+		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
 		break;
 	case KVM_REG_ARM_PTIMER_CTL:
-		kvm_arm_timer_write(vcpu,
-				    vcpu_ptimer(vcpu), TIMER_REG_CTL, value);
+		timer = vcpu_ptimer(vcpu);
+		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value);
 		break;
 	case KVM_REG_ARM_PTIMER_CVAL:
-		kvm_arm_timer_write(vcpu,
-				    vcpu_ptimer(vcpu), TIMER_REG_CVAL, value);
+		timer = vcpu_ptimer(vcpu);
+		kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value);
 		break;
 
 	default:
 		return -1;
 	}
 
-	kvm_timer_update_state(vcpu);
+	level = kvm_timer_should_fire(timer);
+	kvm_timer_update_irq(vcpu, level, timer);
+	timer_emulate(timer);
+
 	return 0;
 }
 
@@ -1020,8 +1039,7 @@ bool kvm_arch_timer_get_input_level(int vintid)
 int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
-	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
-	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+	struct timer_map map;
 	int ret;
 
 	if (timer->enabled)
@@ -1039,18 +1057,25 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 	}
 
-	ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq,
+	get_timer_map(vcpu, &map);
+
+	ret = kvm_vgic_map_phys_irq(vcpu,
+				    map.direct_vtimer->host_timer_irq,
+				    map.direct_vtimer->irq.irq,
 				    kvm_arch_timer_get_input_level);
 	if (ret)
 		return ret;
 
-	if (has_vhe()) {
-		ret = kvm_vgic_map_phys_irq(vcpu, host_ptimer_irq, ptimer->irq.irq,
+	if (map.direct_ptimer) {
+		ret = kvm_vgic_map_phys_irq(vcpu,
+					    map.direct_ptimer->host_timer_irq,
+					    map.direct_ptimer->irq.irq,
 					    kvm_arch_timer_get_input_level);
-		if (ret)
-			return ret;
 	}
 
+	if (ret)
+		return ret;
+
 no_vgic:
 	timer->enabled = 1;
 	return 0;
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index 3828beab93f2..54bb059243b9 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -2,6 +2,7 @@
 #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KVM_H
 
+#include <kvm/arm_arch_timer.h>
 #include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
@@ -262,6 +263,110 @@ TRACE_EVENT(kvm_timer_update_irq,
 		  __entry->vcpu_id, __entry->irq, __entry->level)
 );
 
+TRACE_EVENT(kvm_get_timer_map,
+	TP_PROTO(unsigned long vcpu_id, struct timer_map *map),
+	TP_ARGS(vcpu_id, map),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		vcpu_id	)
+		__field(	int,			direct_vtimer	)
+		__field(	int,			direct_ptimer	)
+		__field(	int,			emul_ptimer	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id		= vcpu_id;
+		__entry->direct_vtimer		= arch_timer_ctx_index(map->direct_vtimer);
+		__entry->direct_ptimer =
+			(map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1;
+		__entry->emul_ptimer =
+			(map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1;
+	),
+
+	TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d",
+		  __entry->vcpu_id,
+		  __entry->direct_vtimer,
+		  __entry->direct_ptimer,
+		  __entry->emul_ptimer)
+);
+
+TRACE_EVENT(kvm_timer_save_state,
+	TP_PROTO(struct arch_timer_context *ctx),
+	TP_ARGS(ctx),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		ctl		)
+		__field(	unsigned long long,	cval		)
+		__field(	int,			timer_idx	)
+	),
+
+	TP_fast_assign(
+		__entry->ctl			= ctx->cnt_ctl;
+		__entry->cval			= ctx->cnt_cval;
+		__entry->timer_idx		= arch_timer_ctx_index(ctx);
+	),
+
+	TP_printk("   CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+		  __entry->ctl,
+		  __entry->cval,
+		  __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_restore_state,
+	TP_PROTO(struct arch_timer_context *ctx),
+	TP_ARGS(ctx),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,		ctl		)
+		__field(	unsigned long long,	cval		)
+		__field(	int,			timer_idx	)
+	),
+
+	TP_fast_assign(
+		__entry->ctl			= ctx->cnt_ctl;
+		__entry->cval			= ctx->cnt_cval;
+		__entry->timer_idx		= arch_timer_ctx_index(ctx);
+	),
+
+	TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d",
+		  __entry->ctl,
+		  __entry->cval,
+		  __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_hrtimer_expire,
+	TP_PROTO(struct arch_timer_context *ctx),
+	TP_ARGS(ctx),
+
+	TP_STRUCT__entry(
+		__field(	int,			timer_idx	)
+	),
+
+	TP_fast_assign(
+		__entry->timer_idx		= arch_timer_ctx_index(ctx);
+	),
+
+	TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx)
+);
+
+TRACE_EVENT(kvm_timer_emulate,
+	TP_PROTO(struct arch_timer_context *ctx, bool should_fire),
+	TP_ARGS(ctx, should_fire),
+
+	TP_STRUCT__entry(
+		__field(	int,			timer_idx	)
+		__field(	bool,			should_fire	)
+	),
+
+	TP_fast_assign(
+		__entry->timer_idx		= arch_timer_ctx_index(ctx);
+		__entry->should_fire		= should_fire;
+	),
+
+	TP_printk("arch_timer_ctx_index: %d (should_fire: %d)",
+		  __entry->timer_idx, __entry->should_fire)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 2736d94f351b92749c07efef01f7c10548e39ad8 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Tue, 22 Jan 2019 10:50:10 +0200
Subject: ethtool: Added support for 50Gbps per lane link modes

Added support for 50Gbps per lane link modes. Define various 50G, 100G
and 200G link modes using it.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/uapi/linux/ethtool.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 17be76aeb468..378c52308d89 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1453,6 +1453,21 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_FEC_NONE_BIT	= 49,
 	ETHTOOL_LINK_MODE_FEC_RS_BIT	= 50,
 	ETHTOOL_LINK_MODE_FEC_BASER_BIT	= 51,
+	ETHTOOL_LINK_MODE_50000baseKR_Full_BIT		 = 52,
+	ETHTOOL_LINK_MODE_50000baseSR_Full_BIT		 = 53,
+	ETHTOOL_LINK_MODE_50000baseCR_Full_BIT		 = 54,
+	ETHTOOL_LINK_MODE_50000baseLR_ER_FR_Full_BIT	 = 55,
+	ETHTOOL_LINK_MODE_50000baseDR_Full_BIT		 = 56,
+	ETHTOOL_LINK_MODE_100000baseKR2_Full_BIT	 = 57,
+	ETHTOOL_LINK_MODE_100000baseSR2_Full_BIT	 = 58,
+	ETHTOOL_LINK_MODE_100000baseCR2_Full_BIT	 = 59,
+	ETHTOOL_LINK_MODE_100000baseLR2_ER2_FR2_Full_BIT = 60,
+	ETHTOOL_LINK_MODE_100000baseDR2_Full_BIT	 = 61,
+	ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT	 = 62,
+	ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT	 = 63,
+	ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64,
+	ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT	 = 65,
+	ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT	 = 66,
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1461,7 +1476,7 @@ enum ethtool_link_mode_bit_indices {
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_FEC_BASER_BIT,
+	  = ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
@@ -1569,6 +1584,7 @@ enum ethtool_link_mode_bit_indices {
 #define SPEED_50000		50000
 #define SPEED_56000		56000
 #define SPEED_100000		100000
+#define SPEED_200000		200000
 
 #define SPEED_UNKNOWN		-1
 
-- 
cgit v1.2.3


From 58066ac9d7f5dcde4ef08c03b7e127f0522d9ea0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 19 Feb 2019 14:21:20 +0000
Subject: ptp_qoriq: don't pass a large struct by value but instead pass it by
 reference

Passing the struct ptp_clock_info caps by parameter is passing over 130 bytes
of data by value on the stack. Optimize this by passing it by reference instead.
Also shinks the object code size:

Before:
   text	   data	    bss	    dec	    hex	filename
  12596	   2160	     64	  14820	   39e4	drivers/ptp/ptp_qoriq.o

After:
   text	   data	    bss	    dec	    hex	filename
  12567	   2160	     64	  14791	   39c7	drivers/ptp/ptp_qoriq.o

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_ptp.c | 2 +-
 drivers/ptp/ptp_qoriq.c                          | 6 +++---
 include/linux/fsl/ptp_qoriq.h                    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
index dc2f58a7c9e5..8c1497e7d9c5 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
@@ -92,7 +92,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev,
 
 	ptp_qoriq->dev = &pdev->dev;
 
-	err = ptp_qoriq_init(ptp_qoriq, base, enetc_ptp_caps);
+	err = ptp_qoriq_init(ptp_qoriq, base, &enetc_ptp_caps);
 	if (err)
 		goto err_no_clock;
 
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index 42d3654f77f0..53775362aac6 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -459,7 +459,7 @@ static int ptp_qoriq_auto_config(struct ptp_qoriq *ptp_qoriq,
 }
 
 int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
-		   const struct ptp_clock_info caps)
+		   const struct ptp_clock_info *caps)
 {
 	struct device_node *node = ptp_qoriq->dev->of_node;
 	struct ptp_qoriq_registers *regs;
@@ -468,7 +468,7 @@ int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
 	u32 tmr_ctrl;
 
 	ptp_qoriq->base = base;
-	ptp_qoriq->caps = caps;
+	ptp_qoriq->caps = *caps;
 
 	if (of_property_read_u32(node, "fsl,cksel", &ptp_qoriq->cksel))
 		ptp_qoriq->cksel = DEFAULT_CKSEL;
@@ -605,7 +605,7 @@ static int ptp_qoriq_probe(struct platform_device *dev)
 		goto no_ioremap;
 	}
 
-	err = ptp_qoriq_init(ptp_qoriq, base, ptp_qoriq_caps);
+	err = ptp_qoriq_init(ptp_qoriq, base, &ptp_qoriq_caps);
 	if (err)
 		goto no_clock;
 
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index f127adb71041..992bf9fa1729 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -183,7 +183,7 @@ static inline void qoriq_write_le(unsigned __iomem *addr, u32 val)
 
 irqreturn_t ptp_qoriq_isr(int irq, void *priv);
 int ptp_qoriq_init(struct ptp_qoriq *ptp_qoriq, void __iomem *base,
-		   const struct ptp_clock_info caps);
+		   const struct ptp_clock_info *caps);
 void ptp_qoriq_free(struct ptp_qoriq *ptp_qoriq);
 int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
 int ptp_qoriq_adjtime(struct ptp_clock_info *ptp, s64 delta);
-- 
cgit v1.2.3


From 04fb53101edef67517f2d5dc00c1a5eb707fe101 Mon Sep 17 00:00:00 2001
From: Artur Rojek <contact@artur-rojek.eu>
Date: Sun, 17 Feb 2019 15:29:11 +0100
Subject: power: supply: core: Add a field to support battery max voltage

Add a field for "voltage_max_design_uv" to present fully charged
battery voltage.

Signed-off-by: Artur Rojek <contact@artur-rojek.eu>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_core.c | 3 +++
 include/linux/power_supply.h             | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 07a85e19615c..c917a8b43b2b 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -573,6 +573,7 @@ int power_supply_get_battery_info(struct power_supply *psy,
 	info->energy_full_design_uwh         = -EINVAL;
 	info->charge_full_design_uah         = -EINVAL;
 	info->voltage_min_design_uv          = -EINVAL;
+	info->voltage_max_design_uv          = -EINVAL;
 	info->precharge_current_ua           = -EINVAL;
 	info->charge_term_current_ua         = -EINVAL;
 	info->constant_charge_current_max_ua = -EINVAL;
@@ -613,6 +614,8 @@ int power_supply_get_battery_info(struct power_supply *psy,
 			     &info->charge_full_design_uah);
 	of_property_read_u32(battery_np, "voltage-min-design-microvolt",
 			     &info->voltage_min_design_uv);
+	of_property_read_u32(battery_np, "voltage-max-design-microvolt",
+			     &info->voltage_max_design_uv);
 	of_property_read_u32(battery_np, "precharge-current-microamp",
 			     &info->precharge_current_ua);
 	of_property_read_u32(battery_np, "charge-term-current-microamp",
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 57b2ab82b951..2f9c201a54d1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -332,6 +332,7 @@ struct power_supply_battery_info {
 	int energy_full_design_uwh;	    /* microWatt-hours */
 	int charge_full_design_uah;	    /* microAmp-hours */
 	int voltage_min_design_uv;	    /* microVolts */
+	int voltage_max_design_uv;	    /* microVolts */
 	int precharge_current_ua;	    /* microAmps */
 	int charge_term_current_ua;	    /* microAmps */
 	int constant_charge_current_max_ua; /* microAmps */
-- 
cgit v1.2.3


From 8beb90aaf334a6efa3e924339926b5f93a234dbb Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Fri, 15 Feb 2019 13:19:20 +0100
Subject: scsi: fcoe: make use of fip_mode enum complete

commit 1917d42d14b7 ("fcoe: use enum for fip_mode") introduces a separate
enum for the fip_mode that shall be used during initialisation handling
until it is passed to fcoe_ctrl_link_up to set the initial fip_state.  That
change was incomplete and gcc quietly converted in various places between
the fip_mode and the fip_state enum values with implicit enum conversions,
which fortunately cannot cause any issues in the actual code's execution.

clang however warns about these implicit enum conversions in the scsi
drivers. This commit consolidates the use of the two enums, guided by
clang's enum-conversion warnings.

This commit now completes the use of the fip_mode: It expects and uses
fip_mode in {bnx2fc,fcoe}_interface_create and fcoe_ctlr_init, and it calls
fcoe_ctrl_set_set() with the correct values in fcoe_ctlr_link_up().  It
also breaks the association between FIP_MODE_AUTO and FIP_ST_AUTO to
indicate these two enums are distinct.

Link: https://github.com/ClangBuiltLinux/linux/issues/151
Fixes: 1917d42d14b7 ("fcoe: use enum for fip_mode")
Reported-by: Dmitry Golovin <dima@golovin.in>
Original-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
CC: Lukas Bulwahn <lukas.bulwahn@gmail.com>
CC: Nick Desaulniers <ndesaulniers@google.com>
CC: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Suggested-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c  | 2 +-
 drivers/scsi/fcoe/fcoe.c           | 2 +-
 drivers/scsi/fcoe/fcoe_ctlr.c      | 7 +++++--
 drivers/scsi/fcoe/fcoe_transport.c | 2 +-
 drivers/scsi/qedf/qedf_main.c      | 2 +-
 include/scsi/libfcoe.h             | 4 ++--
 6 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index 2e4e7159ebf9..a75e74ad1698 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -1438,7 +1438,7 @@ bind_err:
 static struct bnx2fc_interface *
 bnx2fc_interface_create(struct bnx2fc_hba *hba,
 			struct net_device *netdev,
-			enum fip_state fip_mode)
+			enum fip_mode fip_mode)
 {
 	struct fcoe_ctlr_device *ctlr_dev;
 	struct bnx2fc_interface *interface;
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index cd19be3f3405..8ba8862d3292 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -389,7 +389,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
  * Returns: pointer to a struct fcoe_interface or NULL on error
  */
 static struct fcoe_interface *fcoe_interface_create(struct net_device *netdev,
-						    enum fip_state fip_mode)
+						    enum fip_mode fip_mode)
 {
 	struct fcoe_ctlr_device *ctlr_dev;
 	struct fcoe_ctlr *ctlr;
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 54da3166da8d..7dc4ffa24430 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -147,7 +147,7 @@ static void fcoe_ctlr_map_dest(struct fcoe_ctlr *fip)
  * fcoe_ctlr_init() - Initialize the FCoE Controller instance
  * @fip: The FCoE controller to initialize
  */
-void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_state mode)
+void fcoe_ctlr_init(struct fcoe_ctlr *fip, enum fip_mode mode)
 {
 	fcoe_ctlr_set_state(fip, FIP_ST_LINK_WAIT);
 	fip->mode = mode;
@@ -454,7 +454,10 @@ void fcoe_ctlr_link_up(struct fcoe_ctlr *fip)
 		mutex_unlock(&fip->ctlr_mutex);
 		fc_linkup(fip->lp);
 	} else if (fip->state == FIP_ST_LINK_WAIT) {
-		fcoe_ctlr_set_state(fip, fip->mode);
+		if (fip->mode == FIP_MODE_NON_FIP)
+			fcoe_ctlr_set_state(fip, FIP_ST_NON_FIP);
+		else
+			fcoe_ctlr_set_state(fip, FIP_ST_AUTO);
 		switch (fip->mode) {
 		default:
 			LIBFCOE_FIP_DBG(fip, "invalid mode %d\n", fip->mode);
diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c
index 6d3949b687ef..29fe3426f9f2 100644
--- a/drivers/scsi/fcoe/fcoe_transport.c
+++ b/drivers/scsi/fcoe/fcoe_transport.c
@@ -872,7 +872,7 @@ static int fcoe_transport_create(const char *buffer,
 	int rc = -ENODEV;
 	struct net_device *netdev = NULL;
 	struct fcoe_transport *ft = NULL;
-	enum fip_state fip_mode = (enum fip_state)(long)kp->arg;
+	enum fip_mode fip_mode = (enum fip_mode)kp->arg;
 
 	mutex_lock(&ft_mutex);
 
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index edcaf4b0cb0b..d43342b327ac 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -1417,7 +1417,7 @@ static struct libfc_function_template qedf_lport_template = {
 
 static void qedf_fcoe_ctlr_setup(struct qedf_ctx *qedf)
 {
-	fcoe_ctlr_init(&qedf->ctlr, FIP_ST_AUTO);
+	fcoe_ctlr_init(&qedf->ctlr, FIP_MODE_AUTO);
 
 	qedf->ctlr.send = qedf_fip_send;
 	qedf->ctlr.get_src_addr = qedf_get_src_mac;
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index cb8a273732cf..bb8092fa1e36 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -79,7 +79,7 @@ enum fip_state {
  * It must not change after fcoe_ctlr_init() sets it.
  */
 enum fip_mode {
-	FIP_MODE_AUTO = FIP_ST_AUTO,
+	FIP_MODE_AUTO,
 	FIP_MODE_NON_FIP,
 	FIP_MODE_FABRIC,
 	FIP_MODE_VN2VN,
@@ -250,7 +250,7 @@ struct fcoe_rport {
 };
 
 /* FIP API functions */
-void fcoe_ctlr_init(struct fcoe_ctlr *, enum fip_state);
+void fcoe_ctlr_init(struct fcoe_ctlr *, enum fip_mode);
 void fcoe_ctlr_destroy(struct fcoe_ctlr *);
 void fcoe_ctlr_link_up(struct fcoe_ctlr *);
 int fcoe_ctlr_link_down(struct fcoe_ctlr *);
-- 
cgit v1.2.3


From c2261dd76b549754c14c8ac7cadadd0993b182d6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:50 -0700
Subject: RDMA/device: Add ib_device_set_netdev() as an alternative to
 get_netdev

The associated netdev should not actually be very dynamic, so for most
drivers there is no reason for a callback like this. Provide an API to
inform the core code about the net dev affiliation and use a core
maintained data structure instead.

This allows the core code to be more aware of the ndev relationship which
will allow some new APIs based around this.

This also uses locking that makes some kind of sense, many drivers had a
confusing RCU lock, or missing locking which isn't right.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/cache.c     |  24 +++---
 drivers/infiniband/core/core_priv.h |   3 +
 drivers/infiniband/core/device.c    | 166 ++++++++++++++++++++++++++++++++----
 drivers/infiniband/core/nldev.c     |   4 +-
 drivers/infiniband/core/verbs.c     |   5 +-
 include/rdma/ib_verbs.h             |   7 ++
 6 files changed, 171 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index a28dc1901c80..43c67e5f43c6 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -547,21 +547,19 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 	unsigned long mask;
 	int ret;
 
-	if (ib_dev->ops.get_netdev) {
-		idev = ib_dev->ops.get_netdev(ib_dev, port);
-		if (idev && attr->ndev != idev) {
-			union ib_gid default_gid;
-
-			/* Adding default GIDs in not permitted */
-			make_default_gid(idev, &default_gid);
-			if (!memcmp(gid, &default_gid, sizeof(*gid))) {
-				dev_put(idev);
-				return -EPERM;
-			}
-		}
-		if (idev)
+	idev = ib_device_get_netdev(ib_dev, port);
+	if (idev && attr->ndev != idev) {
+		union ib_gid default_gid;
+
+		/* Adding default GIDs is not permitted */
+		make_default_gid(idev, &default_gid);
+		if (!memcmp(gid, &default_gid, sizeof(*gid))) {
 			dev_put(idev);
+			return -EPERM;
+		}
 	}
+	if (idev)
+		dev_put(idev);
 
 	mask = GID_ATTR_FIND_MASK_GID |
 	       GID_ATTR_FIND_MASK_GID_TYPE |
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index eeabe9ca8427..08c690249594 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -66,6 +66,9 @@ typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
 typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
 				   struct net_device *idev, void *cookie);
 
+struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
+					unsigned int port);
+
 void ib_enum_roce_netdev(struct ib_device *ib_dev,
 			 roce_netdev_filter filter,
 			 void *filter_cookie,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 8d7d63a60ef5..7680a64a98bc 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -134,6 +134,7 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
 	     !xa_is_err(entry);                                                \
 	     (index)++, entry = xan_find_marked(xa, &(index), filter))
 
+static void free_netdevs(struct ib_device *ib_dev);
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
 static void ib_policy_change_task(struct work_struct *work);
@@ -290,6 +291,7 @@ static void ib_device_release(struct device *device)
 {
 	struct ib_device *dev = container_of(device, struct ib_device, dev);
 
+	free_netdevs(dev);
 	WARN_ON(refcount_read(&dev->refcount));
 	ib_cache_release_one(dev);
 	ib_security_release_port_pkey_list(dev);
@@ -371,6 +373,9 @@ EXPORT_SYMBOL(_ib_alloc_device);
  */
 void ib_dealloc_device(struct ib_device *device)
 {
+	/* Expedite releasing netdev references */
+	free_netdevs(device);
+
 	WARN_ON(!xa_empty(&device->client_data));
 	WARN_ON(refcount_read(&device->refcount));
 	rdma_restrack_clean(device);
@@ -461,16 +466,16 @@ static void remove_client_context(struct ib_device *device,
 	up_read(&device->client_data_rwsem);
 }
 
-static int verify_immutable(const struct ib_device *dev, u8 port)
-{
-	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
-			    rdma_max_mad_size(dev, port) != 0);
-}
-
-static int setup_port_data(struct ib_device *device)
+static int alloc_port_data(struct ib_device *device)
 {
 	unsigned int port;
-	int ret;
+
+	if (device->port_data)
+		return 0;
+
+	/* This can only be called once the physical port range is defined */
+	if (WARN_ON(!device->phys_port_cnt))
+		return -EINVAL;
 
 	/*
 	 * device->port_data is indexed directly by the port number to make
@@ -489,6 +494,28 @@ static int setup_port_data(struct ib_device *device)
 
 		spin_lock_init(&pdata->pkey_list_lock);
 		INIT_LIST_HEAD(&pdata->pkey_list);
+		spin_lock_init(&pdata->netdev_lock);
+	}
+	return 0;
+}
+
+static int verify_immutable(const struct ib_device *dev, u8 port)
+{
+	return WARN_ON(!rdma_cap_ib_mad(dev, port) &&
+			    rdma_max_mad_size(dev, port) != 0);
+}
+
+static int setup_port_data(struct ib_device *device)
+{
+	unsigned int port;
+	int ret;
+
+	ret = alloc_port_data(device);
+	if (ret)
+		return ret;
+
+	rdma_for_each_port (device, port) {
+		struct ib_port_data *pdata = &device->port_data[port];
 
 		ret = device->ops.get_port_immutable(device, port,
 						     &pdata->immutable);
@@ -682,6 +709,9 @@ static void disable_device(struct ib_device *device)
 	/* Pairs with refcount_set in enable_device */
 	ib_device_put(device);
 	wait_for_completion(&device->unreg_completion);
+
+	/* Expedite removing unregistered pointers from the hash table */
+	free_netdevs(device);
 }
 
 /*
@@ -1012,6 +1042,114 @@ int ib_query_port(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_query_port);
 
+/**
+ * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
+ * @ib_dev: Device to modify
+ * @ndev: net_device to affiliate, may be NULL
+ * @port: IB port the net_device is connected to
+ *
+ * Drivers should use this to link the ib_device to a netdev so the netdev
+ * shows up in interfaces like ib_enum_roce_netdev. Only one netdev may be
+ * affiliated with any port.
+ *
+ * The caller must ensure that the given ndev is not unregistered or
+ * unregistering, and that either the ib_device is unregistered or
+ * ib_device_set_netdev() is called with NULL when the ndev sends a
+ * NETDEV_UNREGISTER event.
+ */
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+			 unsigned int port)
+{
+	struct net_device *old_ndev;
+	struct ib_port_data *pdata;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * Drivers wish to call this before ib_register_driver, so we have to
+	 * setup the port data early.
+	 */
+	ret = alloc_port_data(ib_dev);
+	if (ret)
+		return ret;
+
+	if (!rdma_is_port_valid(ib_dev, port))
+		return -EINVAL;
+
+	pdata = &ib_dev->port_data[port];
+	spin_lock_irqsave(&pdata->netdev_lock, flags);
+	if (pdata->netdev == ndev) {
+		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+		return 0;
+	}
+	old_ndev = pdata->netdev;
+
+	if (ndev)
+		dev_hold(ndev);
+	pdata->netdev = ndev;
+	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+
+	if (old_ndev)
+		dev_put(old_ndev);
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_device_set_netdev);
+
+static void free_netdevs(struct ib_device *ib_dev)
+{
+	unsigned long flags;
+	unsigned int port;
+
+	rdma_for_each_port (ib_dev, port) {
+		struct ib_port_data *pdata = &ib_dev->port_data[port];
+
+		spin_lock_irqsave(&pdata->netdev_lock, flags);
+		if (pdata->netdev) {
+			dev_put(pdata->netdev);
+			pdata->netdev = NULL;
+		}
+		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
+	}
+}
+
+struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
+					unsigned int port)
+{
+	struct ib_port_data *pdata;
+	struct net_device *res;
+
+	if (!rdma_is_port_valid(ib_dev, port))
+		return NULL;
+
+	pdata = &ib_dev->port_data[port];
+
+	/*
+	 * New drivers should use ib_device_set_netdev() not the legacy
+	 * get_netdev().
+	 */
+	if (ib_dev->ops.get_netdev)
+		res = ib_dev->ops.get_netdev(ib_dev, port);
+	else {
+		spin_lock(&pdata->netdev_lock);
+		res = pdata->netdev;
+		if (res)
+			dev_hold(res);
+		spin_unlock(&pdata->netdev_lock);
+	}
+
+	/*
+	 * If we are starting to unregister expedite things by preventing
+	 * propagation of an unregistering netdev.
+	 */
+	if (res && res->reg_state != NETREG_REGISTERED) {
+		dev_put(res);
+		return NULL;
+	}
+
+	return res;
+}
+
 /**
  * ib_enum_roce_netdev - enumerate all RoCE ports
  * @ib_dev : IB device we want to query
@@ -1034,16 +1172,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
 
 	rdma_for_each_port (ib_dev, port)
 		if (rdma_protocol_roce(ib_dev, port)) {
-			struct net_device *idev = NULL;
-
-			if (ib_dev->ops.get_netdev)
-				idev = ib_dev->ops.get_netdev(ib_dev, port);
-
-			if (idev &&
-			    idev->reg_state >= NETREG_UNREGISTERED) {
-				dev_put(idev);
-				idev = NULL;
-			}
+			struct net_device *idev =
+				ib_device_get_netdev(ib_dev, port);
 
 			if (filter(ib_dev, port, idev, filter_cookie))
 				cb(ib_dev, port, idev, cookie);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 85f6f2bcce40..1980ddc5f7bc 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -268,9 +268,7 @@ static int fill_port_info(struct sk_buff *msg,
 	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
 		return -EMSGSIZE;
 
-	if (device->ops.get_netdev)
-		netdev = device->ops.get_netdev(device, port);
-
+	netdev = ib_device_get_netdev(device, port);
 	if (netdev && net_eq(dev_net(netdev), net)) {
 		ret = nla_put_u32(msg,
 				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index de5d895a5054..5a5e83f5f0fc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1723,10 +1723,7 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
 	if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
 		return -EINVAL;
 
-	if (!dev->ops.get_netdev)
-		return -EOPNOTSUPP;
-
-	netdev = dev->ops.get_netdev(dev, port_num);
+	netdev = ib_device_get_netdev(dev, port_num);
 	if (!netdev)
 		return -ENODEV;
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 50b7ebc2885e..7f81a313c01b 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2204,6 +2204,9 @@ struct ib_port_data {
 	struct list_head pkey_list;
 
 	struct ib_port_cache cache;
+
+	spinlock_t netdev_lock;
+	struct net_device *netdev;
 };
 
 /* rdma netdev type - specifies protocol type */
@@ -3996,6 +3999,10 @@ void ib_device_put(struct ib_device *device);
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
 					    u16 pkey, const union ib_gid *gid,
 					    const struct sockaddr *addr);
+int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
+			 unsigned int port);
+struct net_device *ib_device_netdev(struct ib_device *dev, u8 port);
+
 struct ib_wq *ib_create_wq(struct ib_pd *pd,
 			   struct ib_wq_init_attr *init_attr);
 int ib_destroy_wq(struct ib_wq *wq);
-- 
cgit v1.2.3


From 324e227ea7c952626abafe72db42ae0d70220a6e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:51 -0700
Subject: RDMA/device: Add ib_device_get_by_netdev()

Several drivers need to find the ib_device from a given netdev. rxe needs
this at speed in an unsleepable context, so choose to implement the
translation using a RCU safe hash table.

The hash table can have a many to one mapping. This is intended to support
some future case where multiple IB drivers (ie iWarp and RoCE) connect to
the same netdevs. driver_ids will need to be different to support this.

In the process this makes the struct ib_device and ib_port_data RCU safe
by deferring their kfrees.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c | 119 +++++++++++++++++++++++++++++++++++----
 include/rdma/ib_verbs.h          |  10 +++-
 2 files changed, 116 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 7680a64a98bc..f6795ad7ca98 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -40,6 +40,7 @@
 #include <linux/netdevice.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
+#include <linux/hashtable.h>
 #include <rdma/rdma_netlink.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
@@ -134,6 +135,10 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
 	     !xa_is_err(entry);                                                \
 	     (index)++, entry = xan_find_marked(xa, &(index), filter))
 
+/* RCU hash table mapping netdevice pointers to struct ib_port_data */
+static DEFINE_SPINLOCK(ndev_hash_lock);
+static DECLARE_HASHTABLE(ndev_hash, 5);
+
 static void free_netdevs(struct ib_device *ib_dev);
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
@@ -144,6 +149,12 @@ static struct notifier_block ibdev_lsm_nb = {
 	.notifier_call = ib_security_change,
 };
 
+/* Pointer to the RCU head at the start of the ib_port_data array */
+struct ib_port_data_rcu {
+	struct rcu_head rcu_head;
+	struct ib_port_data pdata[];
+};
+
 static int ib_device_check_mandatory(struct ib_device *device)
 {
 #define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
@@ -295,9 +306,12 @@ static void ib_device_release(struct device *device)
 	WARN_ON(refcount_read(&dev->refcount));
 	ib_cache_release_one(dev);
 	ib_security_release_port_pkey_list(dev);
-	kfree(dev->port_data);
 	xa_destroy(&dev->client_data);
-	kfree(dev);
+	if (dev->port_data)
+		kfree_rcu(container_of(dev->port_data, struct ib_port_data_rcu,
+				       pdata[0]),
+			  rcu_head);
+	kfree_rcu(dev, rcu_head);
 }
 
 static int ib_device_uevent(struct device *device,
@@ -468,6 +482,7 @@ static void remove_client_context(struct ib_device *device,
 
 static int alloc_port_data(struct ib_device *device)
 {
+	struct ib_port_data_rcu *pdata_rcu;
 	unsigned int port;
 
 	if (device->port_data)
@@ -484,17 +499,26 @@ static int alloc_port_data(struct ib_device *device)
 	 * Therefore port_data is declared as a 1 based array with potential
 	 * empty slots at the beginning.
 	 */
-	device->port_data = kcalloc(rdma_end_port(device) + 1,
-				    sizeof(*device->port_data), GFP_KERNEL);
-	if (!device->port_data)
+	pdata_rcu = kzalloc(struct_size(pdata_rcu, pdata,
+					rdma_end_port(device) + 1),
+			    GFP_KERNEL);
+	if (!pdata_rcu)
 		return -ENOMEM;
+	/*
+	 * The rcu_head is put in front of the port data array and the stored
+	 * pointer is adjusted since we never need to see that member until
+	 * kfree_rcu.
+	 */
+	device->port_data = pdata_rcu->pdata;
 
 	rdma_for_each_port (device, port) {
 		struct ib_port_data *pdata = &device->port_data[port];
 
+		pdata->ib_dev = device;
 		spin_lock_init(&pdata->pkey_list_lock);
 		INIT_LIST_HEAD(&pdata->pkey_list);
 		spin_lock_init(&pdata->netdev_lock);
+		INIT_HLIST_NODE(&pdata->ndev_hash_link);
 	}
 	return 0;
 }
@@ -1042,6 +1066,29 @@ int ib_query_port(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_query_port);
 
+static void add_ndev_hash(struct ib_port_data *pdata)
+{
+	unsigned long flags;
+
+	might_sleep();
+
+	spin_lock_irqsave(&ndev_hash_lock, flags);
+	if (hash_hashed(&pdata->ndev_hash_link)) {
+		hash_del_rcu(&pdata->ndev_hash_link);
+		spin_unlock_irqrestore(&ndev_hash_lock, flags);
+		/*
+		 * We cannot do hash_add_rcu after a hash_del_rcu until the
+		 * grace period
+		 */
+		synchronize_rcu();
+		spin_lock_irqsave(&ndev_hash_lock, flags);
+	}
+	if (pdata->netdev)
+		hash_add_rcu(ndev_hash, &pdata->ndev_hash_link,
+			     (uintptr_t)pdata->netdev);
+	spin_unlock_irqrestore(&ndev_hash_lock, flags);
+}
+
 /**
  * ib_device_set_netdev - Associate the ib_dev with an underlying net_device
  * @ib_dev: Device to modify
@@ -1078,17 +1125,19 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 
 	pdata = &ib_dev->port_data[port];
 	spin_lock_irqsave(&pdata->netdev_lock, flags);
-	if (pdata->netdev == ndev) {
+	old_ndev = rcu_dereference_protected(
+		pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
+	if (old_ndev == ndev) {
 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 		return 0;
 	}
-	old_ndev = pdata->netdev;
 
 	if (ndev)
 		dev_hold(ndev);
-	pdata->netdev = ndev;
+	rcu_assign_pointer(pdata->netdev, ndev);
 	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 
+	add_ndev_hash(pdata);
 	if (old_ndev)
 		dev_put(old_ndev);
 
@@ -1103,11 +1152,24 @@ static void free_netdevs(struct ib_device *ib_dev)
 
 	rdma_for_each_port (ib_dev, port) {
 		struct ib_port_data *pdata = &ib_dev->port_data[port];
+		struct net_device *ndev;
 
 		spin_lock_irqsave(&pdata->netdev_lock, flags);
-		if (pdata->netdev) {
-			dev_put(pdata->netdev);
-			pdata->netdev = NULL;
+		ndev = rcu_dereference_protected(
+			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
+		if (ndev) {
+			spin_lock(&ndev_hash_lock);
+			hash_del_rcu(&pdata->ndev_hash_link);
+			spin_unlock(&ndev_hash_lock);
+
+			/*
+			 * If this is the last dev_put there is still a
+			 * synchronize_rcu before the netdev is kfreed, so we
+			 * can continue to rely on unlocked pointer
+			 * comparisons after the put
+			 */
+			rcu_assign_pointer(pdata->netdev, NULL);
+			dev_put(ndev);
 		}
 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 	}
@@ -1132,7 +1194,8 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
 		res = ib_dev->ops.get_netdev(ib_dev, port);
 	else {
 		spin_lock(&pdata->netdev_lock);
-		res = pdata->netdev;
+		res = rcu_dereference_protected(
+			pdata->netdev, lockdep_is_held(&pdata->netdev_lock));
 		if (res)
 			dev_hold(res);
 		spin_unlock(&pdata->netdev_lock);
@@ -1150,6 +1213,38 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
 	return res;
 }
 
+/**
+ * ib_device_get_by_netdev - Find an IB device associated with a netdev
+ * @ndev: netdev to locate
+ * @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
+ *
+ * Find and hold an ib_device that is associated with a netdev via
+ * ib_device_set_netdev(). The caller must call ib_device_put() on the
+ * returned pointer.
+ */
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id)
+{
+	struct ib_device *res = NULL;
+	struct ib_port_data *cur;
+
+	rcu_read_lock();
+	hash_for_each_possible_rcu (ndev_hash, cur, ndev_hash_link,
+				    (uintptr_t)ndev) {
+		if (rcu_access_pointer(cur->netdev) == ndev &&
+		    (driver_id == RDMA_DRIVER_UNKNOWN ||
+		     cur->ib_dev->driver_id == driver_id) &&
+		    ib_device_try_get(cur->ib_dev)) {
+			res = cur->ib_dev;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return res;
+}
+EXPORT_SYMBOL(ib_device_get_by_netdev);
+
 /**
  * ib_enum_roce_netdev - enumerate all RoCE ports
  * @ib_dev : IB device we want to query
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7f81a313c01b..3aa802b65cf3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2198,6 +2198,8 @@ struct ib_port_immutable {
 };
 
 struct ib_port_data {
+	struct ib_device *ib_dev;
+
 	struct ib_port_immutable immutable;
 
 	spinlock_t pkey_list_lock;
@@ -2206,7 +2208,8 @@ struct ib_port_data {
 	struct ib_port_cache cache;
 
 	spinlock_t netdev_lock;
-	struct net_device *netdev;
+	struct net_device __rcu *netdev;
+	struct hlist_node ndev_hash_link;
 };
 
 /* rdma netdev type - specifies protocol type */
@@ -2545,6 +2548,7 @@ struct ib_device {
 	struct device                *dma_device;
 	struct ib_device_ops	     ops;
 	char                          name[IB_DEVICE_NAME_MAX];
+	struct rcu_head rcu_head;
 
 	struct list_head              event_handler_list;
 	spinlock_t                    event_handler_lock;
@@ -3996,6 +4000,10 @@ static inline bool ib_device_try_get(struct ib_device *dev)
 }
 
 void ib_device_put(struct ib_device *device);
+struct ib_device *ib_device_get_by_netdev(struct net_device *ndev,
+					  enum rdma_driver_id driver_id);
+struct ib_device *ib_device_get_by_name(const char *name,
+					enum rdma_driver_id driver_id);
 struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
 					    u16 pkey, const union ib_gid *gid,
 					    const struct sockaddr *addr);
-- 
cgit v1.2.3


From d0899892edd089790eb17943ecf28254a909deae Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:53 -0700
Subject: RDMA/device: Provide APIs from the core code to help unregistration

These APIs are intended to support drivers that exist outside the usual
driver core probe()/remove() callbacks. Normally the driver core will
prevent remove() from running concurrently with probe(), once this safety
is lost drivers need more support to get the locking and lifetimes right.

ib_unregister_driver() is intended to be used during module_exit of a
driver using these APIs. It unregisters all the associated ib_devices.

ib_unregister_device_and_put() is to be used by a driver-specific removal
function (ie removal by name, removal from a netdev notifier, removal from
netlink)

ib_unregister_queued() is to be used from netdev notifier chains where
RTNL is held.

The locking is tricky here since once things become async it is possible
to race unregister with registration. This is largely solved by relying on
the registration refcount, unregistration will only ever work on something
that has a positive registration refcount - and then an unregistration
mutex serializes all competing unregistrations of the same device.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c | 242 +++++++++++++++++++++++++++++++++------
 include/rdma/ib_verbs.h          |  11 ++
 2 files changed, 217 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index f6795ad7ca98..e470fa651961 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -140,6 +140,8 @@ static DEFINE_SPINLOCK(ndev_hash_lock);
 static DECLARE_HASHTABLE(ndev_hash, 5);
 
 static void free_netdevs(struct ib_device *ib_dev);
+static void ib_unregister_work(struct work_struct *work);
+static void __ib_unregister_device(struct ib_device *device);
 static int ib_security_change(struct notifier_block *nb, unsigned long event,
 			      void *lsm_data);
 static void ib_policy_change_task(struct work_struct *work);
@@ -366,6 +368,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 
 	INIT_LIST_HEAD(&device->event_handler_list);
 	spin_lock_init(&device->event_handler_lock);
+	mutex_init(&device->unregistration_lock);
 	/*
 	 * client_data needs to be alloc because we don't want our mark to be
 	 * destroyed if the user stores NULL in the client data.
@@ -374,6 +377,7 @@ struct ib_device *_ib_alloc_device(size_t size)
 	init_rwsem(&device->client_data_rwsem);
 	INIT_LIST_HEAD(&device->port_list);
 	init_completion(&device->unreg_completion);
+	INIT_WORK(&device->unregistration_work, ib_unregister_work);
 
 	return device;
 }
@@ -387,6 +391,20 @@ EXPORT_SYMBOL(_ib_alloc_device);
  */
 void ib_dealloc_device(struct ib_device *device)
 {
+	if (device->ops.dealloc_driver)
+		device->ops.dealloc_driver(device);
+
+	/*
+	 * ib_unregister_driver() requires all devices to remain in the xarray
+	 * while their ops are callable. The last op we call is dealloc_driver
+	 * above.  This is needed to create a fence on op callbacks prior to
+	 * allowing the driver module to unload.
+	 */
+	down_write(&devices_rwsem);
+	if (xa_load(&devices, device->index) == device)
+		xa_erase(&devices, device->index);
+	up_write(&devices_rwsem);
+
 	/* Expedite releasing netdev references */
 	free_netdevs(device);
 
@@ -599,7 +617,8 @@ static int ib_security_change(struct notifier_block *nb, unsigned long event,
 }
 
 /*
- * Assign the unique string device name and the unique device index.
+ * Assign the unique string device name and the unique device index. This is
+ * undone by ib_dealloc_device.
  */
 static int assign_name(struct ib_device *device, const char *name)
 {
@@ -640,13 +659,6 @@ out:
 	return ret;
 }
 
-static void release_name(struct ib_device *device)
-{
-	down_write(&devices_rwsem);
-	xa_erase(&devices, device->index);
-	up_write(&devices_rwsem);
-}
-
 static void setup_dma_device(struct ib_device *device)
 {
 	struct device *parent = device->dev.parent;
@@ -740,30 +752,38 @@ static void disable_device(struct ib_device *device)
 
 /*
  * An enabled device is visible to all clients and to all the public facing
- * APIs that return a device pointer.
+ * APIs that return a device pointer. This always returns with a new get, even
+ * if it fails.
  */
-static int enable_device(struct ib_device *device)
+static int enable_device_and_get(struct ib_device *device)
 {
 	struct ib_client *client;
 	unsigned long index;
-	int ret;
+	int ret = 0;
 
-	refcount_set(&device->refcount, 1);
+	/*
+	 * One ref belongs to the xa and the other belongs to this
+	 * thread. This is needed to guard against parallel unregistration.
+	 */
+	refcount_set(&device->refcount, 2);
 	down_write(&devices_rwsem);
 	xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
-	up_write(&devices_rwsem);
+
+	/*
+	 * By using downgrade_write() we ensure that no other thread can clear
+	 * DEVICE_REGISTERED while we are completing the client setup.
+	 */
+	downgrade_write(&devices_rwsem);
 
 	down_read(&clients_rwsem);
 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
 		ret = add_client_context(device, client);
-		if (ret) {
-			up_read(&clients_rwsem);
-			disable_device(device);
-			return ret;
-		}
+		if (ret)
+			break;
 	}
 	up_read(&clients_rwsem);
-	return 0;
+	up_read(&devices_rwsem);
+	return ret;
 }
 
 /**
@@ -774,6 +794,10 @@ static int enable_device(struct ib_device *device)
  * devices with the IB core.  All registered clients will receive a
  * callback for each device that is added. @device must be allocated
  * with ib_alloc_device().
+ *
+ * If the driver uses ops.dealloc_driver and calls any ib_unregister_device()
+ * asynchronously then the device pointer may become freed as soon as this
+ * function returns.
  */
 int ib_register_device(struct ib_device *device, const char *name)
 {
@@ -785,13 +809,13 @@ int ib_register_device(struct ib_device *device, const char *name)
 
 	ret = setup_device(device);
 	if (ret)
-		goto out;
+		return ret;
 
 	ret = ib_cache_setup_one(device);
 	if (ret) {
 		dev_warn(&device->dev,
 			 "Couldn't set up InfiniBand P_Key/GID cache\n");
-		goto out;
+		return ret;
 	}
 
 	ib_device_register_rdmacg(device);
@@ -807,42 +831,186 @@ int ib_register_device(struct ib_device *device, const char *name)
 		goto dev_cleanup;
 	}
 
-	ret = enable_device(device);
-	if (ret)
-		goto sysfs_cleanup;
+	ret = enable_device_and_get(device);
+	if (ret) {
+		void (*dealloc_fn)(struct ib_device *);
+
+		/*
+		 * If we hit this error flow then we don't want to
+		 * automatically dealloc the device since the caller is
+		 * expected to call ib_dealloc_device() after
+		 * ib_register_device() fails. This is tricky due to the
+		 * possibility for a parallel unregistration along with this
+		 * error flow. Since we have a refcount here we know any
+		 * parallel flow is stopped in disable_device and will see the
+		 * NULL pointers, causing the responsibility to
+		 * ib_dealloc_device() to revert back to this thread.
+		 */
+		dealloc_fn = device->ops.dealloc_driver;
+		device->ops.dealloc_driver = NULL;
+		ib_device_put(device);
+		__ib_unregister_device(device);
+		device->ops.dealloc_driver = dealloc_fn;
+		return ret;
+	}
+	ib_device_put(device);
 
 	return 0;
 
-sysfs_cleanup:
-	ib_device_unregister_sysfs(device);
 dev_cleanup:
 	device_del(&device->dev);
 cg_cleanup:
 	ib_device_unregister_rdmacg(device);
 	ib_cache_cleanup_one(device);
-out:
-	release_name(device);
 	return ret;
 }
 EXPORT_SYMBOL(ib_register_device);
 
+/* Callers must hold a get on the device. */
+static void __ib_unregister_device(struct ib_device *ib_dev)
+{
+	/*
+	 * We have a registration lock so that all the calls to unregister are
+	 * fully fenced, once any unregister returns the device is truely
+	 * unregistered even if multiple callers are unregistering it at the
+	 * same time. This also interacts with the registration flow and
+	 * provides sane semantics if register and unregister are racing.
+	 */
+	mutex_lock(&ib_dev->unregistration_lock);
+	if (!refcount_read(&ib_dev->refcount))
+		goto out;
+
+	disable_device(ib_dev);
+	ib_device_unregister_sysfs(ib_dev);
+	device_del(&ib_dev->dev);
+	ib_device_unregister_rdmacg(ib_dev);
+	ib_cache_cleanup_one(ib_dev);
+
+	/*
+	 * Drivers using the new flow may not call ib_dealloc_device except
+	 * in error unwind prior to registration success.
+	 */
+	if (ib_dev->ops.dealloc_driver) {
+		WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1);
+		ib_dealloc_device(ib_dev);
+	}
+out:
+	mutex_unlock(&ib_dev->unregistration_lock);
+}
+
 /**
  * ib_unregister_device - Unregister an IB device
- * @device:Device to unregister
+ * @device: The device to unregister
  *
  * Unregister an IB device.  All clients will receive a remove callback.
+ *
+ * Callers should call this routine only once, and protect against races with
+ * registration. Typically it should only be called as part of a remove
+ * callback in an implementation of driver core's struct device_driver and
+ * related.
+ *
+ * If ops.dealloc_driver is used then ib_dev will be freed upon return from
+ * this function.
  */
-void ib_unregister_device(struct ib_device *device)
+void ib_unregister_device(struct ib_device *ib_dev)
 {
-	disable_device(device);
-	ib_device_unregister_sysfs(device);
-	device_del(&device->dev);
-	ib_device_unregister_rdmacg(device);
-	ib_cache_cleanup_one(device);
-	release_name(device);
+	get_device(&ib_dev->dev);
+	__ib_unregister_device(ib_dev);
+	put_device(&ib_dev->dev);
 }
 EXPORT_SYMBOL(ib_unregister_device);
 
+/**
+ * ib_unregister_device_and_put - Unregister a device while holding a 'get'
+ * device: The device to unregister
+ *
+ * This is the same as ib_unregister_device(), except it includes an internal
+ * ib_device_put() that should match a 'get' obtained by the caller.
+ *
+ * It is safe to call this routine concurrently from multiple threads while
+ * holding the 'get'. When the function returns the device is fully
+ * unregistered.
+ *
+ * Drivers using this flow MUST use the driver_unregister callback to clean up
+ * their resources associated with the device and dealloc it.
+ */
+void ib_unregister_device_and_put(struct ib_device *ib_dev)
+{
+	WARN_ON(!ib_dev->ops.dealloc_driver);
+	get_device(&ib_dev->dev);
+	ib_device_put(ib_dev);
+	__ib_unregister_device(ib_dev);
+	put_device(&ib_dev->dev);
+}
+EXPORT_SYMBOL(ib_unregister_device_and_put);
+
+/**
+ * ib_unregister_driver - Unregister all IB devices for a driver
+ * @driver_id: The driver to unregister
+ *
+ * This implements a fence for device unregistration. It only returns once all
+ * devices associated with the driver_id have fully completed their
+ * unregistration and returned from ib_unregister_device*().
+ *
+ * If device's are not yet unregistered it goes ahead and starts unregistering
+ * them.
+ *
+ * This does not block creation of new devices with the given driver_id, that
+ * is the responsibility of the caller.
+ */
+void ib_unregister_driver(enum rdma_driver_id driver_id)
+{
+	struct ib_device *ib_dev;
+	unsigned long index;
+
+	down_read(&devices_rwsem);
+	xa_for_each (&devices, index, ib_dev) {
+		if (ib_dev->driver_id != driver_id)
+			continue;
+
+		get_device(&ib_dev->dev);
+		up_read(&devices_rwsem);
+
+		WARN_ON(!ib_dev->ops.dealloc_driver);
+		__ib_unregister_device(ib_dev);
+
+		put_device(&ib_dev->dev);
+		down_read(&devices_rwsem);
+	}
+	up_read(&devices_rwsem);
+}
+EXPORT_SYMBOL(ib_unregister_driver);
+
+static void ib_unregister_work(struct work_struct *work)
+{
+	struct ib_device *ib_dev =
+		container_of(work, struct ib_device, unregistration_work);
+
+	__ib_unregister_device(ib_dev);
+	put_device(&ib_dev->dev);
+}
+
+/**
+ * ib_unregister_device_queued - Unregister a device using a work queue
+ * device: The device to unregister
+ *
+ * This schedules an asynchronous unregistration using a WQ for the device. A
+ * driver should use this to avoid holding locks while doing unregistration,
+ * such as holding the RTNL lock.
+ *
+ * Drivers using this API must use ib_unregister_driver before module unload
+ * to ensure that all scheduled unregistrations have completed.
+ */
+void ib_unregister_device_queued(struct ib_device *ib_dev)
+{
+	WARN_ON(!refcount_read(&ib_dev->refcount));
+	WARN_ON(!ib_dev->ops.dealloc_driver);
+	get_device(&ib_dev->dev);
+	if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
+		put_device(&ib_dev->dev);
+}
+EXPORT_SYMBOL(ib_unregister_device_queued);
+
 static int assign_client_id(struct ib_client *client)
 {
 	int ret;
@@ -1558,6 +1726,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, create_srq);
 	SET_DEVICE_OP(dev_ops, create_wq);
 	SET_DEVICE_OP(dev_ops, dealloc_dm);
+	SET_DEVICE_OP(dev_ops, dealloc_driver);
 	SET_DEVICE_OP(dev_ops, dealloc_fmr);
 	SET_DEVICE_OP(dev_ops, dealloc_mw);
 	SET_DEVICE_OP(dev_ops, dealloc_pd);
@@ -1744,6 +1913,7 @@ static void __exit ib_core_cleanup(void)
 	destroy_workqueue(ib_comp_wq);
 	/* Make sure that any pending umem accounting work is done. */
 	destroy_workqueue(ib_wq);
+	flush_workqueue(system_unbound_wq);
 	WARN_ON(!xa_empty(&clients));
 	WARN_ON(!xa_empty(&devices));
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 3aa802b65cf3..ad83f8c38dc8 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2538,6 +2538,12 @@ struct ib_device_ops {
 	int (*fill_res_entry)(struct sk_buff *msg,
 			      struct rdma_restrack_entry *entry);
 
+	/* Device lifecycle callbacks */
+	/*
+	 * This is called as part of ib_dealloc_device().
+	 */
+	void (*dealloc_driver)(struct ib_device *dev);
+
 	DECLARE_RDMA_OBJ_SIZE(ib_pd);
 };
 
@@ -2555,6 +2561,7 @@ struct ib_device {
 
 	struct rw_semaphore	      client_data_rwsem;
 	struct xarray                 client_data;
+	struct mutex                  unregistration_lock;
 
 	struct ib_cache               cache;
 	/**
@@ -2609,6 +2616,7 @@ struct ib_device {
 	 */
 	refcount_t refcount;
 	struct completion unreg_completion;
+	struct work_struct unregistration_work;
 };
 
 struct ib_client {
@@ -2658,6 +2666,9 @@ void ib_get_device_fw_str(struct ib_device *device, char *str);
 
 int ib_register_device(struct ib_device *device, const char *name);
 void ib_unregister_device(struct ib_device *device);
+void ib_unregister_driver(enum rdma_driver_id driver_id);
+void ib_unregister_device_and_put(struct ib_device *device);
+void ib_unregister_device_queued(struct ib_device *ib_dev);
 
 int ib_register_client   (struct ib_client *client);
 void ib_unregister_client(struct ib_client *client);
-- 
cgit v1.2.3


From ca22354b140853b8155692d5b2bc0110aa54e937 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 12 Feb 2019 21:12:56 -0700
Subject: RDMA/rxe: Close a race after ib_register_device

Since rxe allows unregistration from other threads the rxe pointer can
become invalid any moment after ib_register_driver returns. This could
cause a user triggered use after free.

Add another driver callback to be called right after the device becomes
registered to complete any device setup required post-registration.  This
callback has enough core locking to prevent the device from becoming
unregistered.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c      |  9 +++++++++
 drivers/infiniband/sw/rxe/rxe_net.c   |  8 ++++----
 drivers/infiniband/sw/rxe/rxe_net.h   |  2 +-
 drivers/infiniband/sw/rxe/rxe_sysfs.c |  9 ++-------
 drivers/infiniband/sw/rxe/rxe_verbs.c | 14 ++++++++++++++
 include/rdma/ib_verbs.h               |  5 +++++
 6 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 2a7d54794ee3..bf2a215d94dd 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -803,6 +803,12 @@ static int enable_device_and_get(struct ib_device *device)
 	 */
 	downgrade_write(&devices_rwsem);
 
+	if (device->ops.enable_driver) {
+		ret = device->ops.enable_driver(device);
+		if (ret)
+			goto out;
+	}
+
 	down_read(&clients_rwsem);
 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
 		ret = add_client_context(device, client);
@@ -810,6 +816,8 @@ static int enable_device_and_get(struct ib_device *device)
 			break;
 	}
 	up_read(&clients_rwsem);
+
+out:
 	up_read(&devices_rwsem);
 	return ret;
 }
@@ -1775,6 +1783,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, disassociate_ucontext);
 	SET_DEVICE_OP(dev_ops, drain_rq);
 	SET_DEVICE_OP(dev_ops, drain_sq);
+	SET_DEVICE_OP(dev_ops, enable_driver);
 	SET_DEVICE_OP(dev_ops, fill_res_entry);
 	SET_DEVICE_OP(dev_ops, get_dev_fw_str);
 	SET_DEVICE_OP(dev_ops, get_dma_mr);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index d6dfbcf6a47e..fb792f5bc0b7 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -517,24 +517,24 @@ enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
 	return IB_LINK_LAYER_ETHERNET;
 }
 
-struct rxe_dev *rxe_net_add(struct net_device *ndev)
+int rxe_net_add(struct net_device *ndev)
 {
 	int err;
 	struct rxe_dev *rxe = NULL;
 
 	rxe = ib_alloc_device(rxe_dev, ib_dev);
 	if (!rxe)
-		return NULL;
+		return -ENOMEM;
 
 	rxe->ndev = ndev;
 
 	err = rxe_add(rxe, ndev->mtu);
 	if (err) {
 		ib_dealloc_device(&rxe->ib_dev);
-		return NULL;
+		return err;
 	}
 
-	return rxe;
+	return 0;
 }
 
 static void rxe_port_event(struct rxe_dev *rxe,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 106c586dbb26..ad79514191bb 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -43,7 +43,7 @@ struct rxe_recv_sockets {
 	struct socket *sk6;
 };
 
-struct rxe_dev *rxe_net_add(struct net_device *ndev);
+int rxe_net_add(struct net_device *ndev);
 
 int rxe_net_init(void);
 void rxe_net_exit(void);
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index d51b55b0a311..46587eb0da0e 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -60,7 +60,6 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
 	char intf[32];
 	struct net_device *ndev;
 	struct rxe_dev *exists;
-	struct rxe_dev *rxe;
 
 	len = sanitize_arg(val, intf, sizeof(intf));
 	if (!len) {
@@ -82,16 +81,12 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
 		goto err;
 	}
 
-	rxe = rxe_net_add(ndev);
-	if (!rxe) {
+	err = rxe_net_add(ndev);
+	if (err) {
 		pr_err("failed to add %s\n", intf);
-		err = -EINVAL;
 		goto err;
 	}
 
-	rxe_set_port_state(rxe);
-	dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
-
 err:
 	dev_put(ndev);
 	return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 76da8a142bf7..79ad93b4140c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1125,6 +1125,15 @@ static const struct attribute_group rxe_attr_group = {
 	.attrs = rxe_dev_attributes,
 };
 
+static int rxe_enable_driver(struct ib_device *ib_dev)
+{
+	struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev);
+
+	rxe_set_port_state(rxe);
+	dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev));
+	return 0;
+}
+
 static const struct ib_device_ops rxe_dev_ops = {
 	.alloc_hw_stats = rxe_ib_alloc_hw_stats,
 	.alloc_mr = rxe_alloc_mr,
@@ -1144,6 +1153,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.destroy_qp = rxe_destroy_qp,
 	.destroy_srq = rxe_destroy_srq,
 	.detach_mcast = rxe_detach_mcast,
+	.enable_driver = rxe_enable_driver,
 	.get_dma_mr = rxe_get_dma_mr,
 	.get_hw_stats = rxe_ib_get_hw_stats,
 	.get_link_layer = rxe_get_link_layer,
@@ -1245,5 +1255,9 @@ int rxe_register_device(struct rxe_dev *rxe)
 	if (err)
 		pr_warn("%s failed with error %d\n", __func__, err);
 
+	/*
+	 * Note that rxe may be invalid at this point if another thread
+	 * unregistered it.
+	 */
 	return err;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index ad83f8c38dc8..640263289ab9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2539,6 +2539,11 @@ struct ib_device_ops {
 			      struct rdma_restrack_entry *entry);
 
 	/* Device lifecycle callbacks */
+	/*
+	 * Called after the device becomes registered, before clients are
+	 * attached
+	 */
+	int (*enable_driver)(struct ib_device *dev);
 	/*
 	 * This is called as part of ib_dealloc_device().
 	 */
-- 
cgit v1.2.3


From 3856ec4b93c9463d36ee39098dde1fbbd29ec6dd Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Fri, 15 Feb 2019 11:03:53 -0800
Subject: RDMA/core: Add RDMA_NLDEV_CMD_NEWLINK/DELLINK support

Add support for new LINK messages to allow adding and deleting rdma
interfaces.  This will be used initially for soft rdma drivers which
instantiate device instances dynamically by the admin specifying a netdev
device to use.  The rdma_rxe module will be the first user of these
messages.

The design is modeled after RTNL_NEWLINK/DELLINK: rdma drivers register
with the rdma core if they provide link add/delete functions.  Each driver
registers with a unique "type" string, that is used to dispatch messages
coming from user space.  A new RDMA_NLDEV_ATTR is defined for the "type"
string.  User mode will pass 3 attributes in a NEWLINK message:
RDMA_NLDEV_ATTR_DEV_NAME for the desired rdma device name to be created,
RDMA_NLDEV_ATTR_LINK_TYPE for the "type" of link being added, and
RDMA_NLDEV_ATTR_NDEV_NAME for the net_device interface to use for this
link.  The DELLINK message will contain the RDMA_NLDEV_ATTR_DEV_INDEX of
the device to delete.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/nldev.c  | 122 +++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_verbs.h          |   3 +
 include/rdma/rdma_netlink.h      |  11 ++++
 include/uapi/rdma/rdma_netlink.h |  10 +++-
 4 files changed, 144 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 1980ddc5f7bc..5e94dc87f04f 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
+#include <linux/mutex.h>
 #include <net/netlink.h>
 #include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
@@ -113,6 +114,8 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
 	[RDMA_NLDEV_ATTR_RES_MRN]               = { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_CM_IDN]            = { .type = NLA_U32 },
 	[RDMA_NLDEV_ATTR_RES_CTXN]              = { .type = NLA_U32 },
+	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
+				    .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
 };
 
 static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -1200,6 +1203,117 @@ RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
 RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
 RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
 
+static LIST_HEAD(link_ops);
+static DECLARE_RWSEM(link_ops_rwsem);
+
+static const struct rdma_link_ops *link_ops_get(const char *type)
+{
+	const struct rdma_link_ops *ops;
+
+	list_for_each_entry(ops, &link_ops, list) {
+		if (!strcmp(ops->type, type))
+			goto out;
+	}
+	ops = NULL;
+out:
+	return ops;
+}
+
+void rdma_link_register(struct rdma_link_ops *ops)
+{
+	down_write(&link_ops_rwsem);
+	if (link_ops_get(ops->type)) {
+		WARN_ONCE("Duplicate rdma_link_ops! %s\n", ops->type);
+		goto out;
+	}
+	list_add(&ops->list, &link_ops);
+out:
+	up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_register);
+
+void rdma_link_unregister(struct rdma_link_ops *ops)
+{
+	down_write(&link_ops_rwsem);
+	list_del(&ops->list);
+	up_write(&link_ops_rwsem);
+}
+EXPORT_SYMBOL(rdma_link_unregister);
+
+static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	char ibdev_name[IB_DEVICE_NAME_MAX];
+	const struct rdma_link_ops *ops;
+	char ndev_name[IFNAMSIZ];
+	struct net_device *ndev;
+	char type[IFNAMSIZ];
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
+	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
+		return -EINVAL;
+
+	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
+		    sizeof(ibdev_name));
+	if (strchr(ibdev_name, '%'))
+		return -EINVAL;
+
+	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
+	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
+		    sizeof(ndev_name));
+
+	ndev = dev_get_by_name(&init_net, ndev_name);
+	if (!ndev)
+		return -ENODEV;
+
+	down_read(&link_ops_rwsem);
+	ops = link_ops_get(type);
+#ifdef CONFIG_MODULES
+	if (!ops) {
+		up_read(&link_ops_rwsem);
+		request_module("rdma-link-%s", type);
+		down_read(&link_ops_rwsem);
+		ops = link_ops_get(type);
+	}
+#endif
+	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
+	up_read(&link_ops_rwsem);
+	dev_put(ndev);
+
+	return err;
+}
+
+static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
+	struct ib_device *device;
+	u32 index;
+	int err;
+
+	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
+			  nldev_policy, extack);
+	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
+		return -EINVAL;
+
+	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
+	device = ib_device_get_by_index(index);
+	if (!device)
+		return -EINVAL;
+
+	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
+		ib_device_put(device);
+		return -EINVAL;
+	}
+
+	ib_unregister_device_and_put(device);
+	return 0;
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 	[RDMA_NLDEV_CMD_GET] = {
 		.doit = nldev_get_doit,
@@ -1209,6 +1323,14 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
 		.doit = nldev_set_doit,
 		.flags = RDMA_NL_ADMIN_PERM,
 	},
+	[RDMA_NLDEV_CMD_NEWLINK] = {
+		.doit = nldev_newlink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
+	[RDMA_NLDEV_CMD_DELLINK] = {
+		.doit = nldev_dellink,
+		.flags = RDMA_NL_ADMIN_PERM,
+	},
 	[RDMA_NLDEV_CMD_PORT_GET] = {
 		.doit = nldev_port_get_doit,
 		.dump = nldev_port_get_dumpit,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 640263289ab9..225cb76d469f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -238,6 +238,7 @@ enum ib_device_cap_flags {
 	IB_DEVICE_RDMA_NETDEV_OPA_VNIC		= (1ULL << 35),
 	/* The device supports padding incoming writes to cacheline. */
 	IB_DEVICE_PCI_WRITE_END_PADDING		= (1ULL << 36),
+	IB_DEVICE_ALLOW_USER_UNREG		= (1ULL << 37),
 };
 
 enum ib_signature_prot_cap {
@@ -2622,6 +2623,8 @@ struct ib_device {
 	refcount_t refcount;
 	struct completion unreg_completion;
 	struct work_struct unregistration_work;
+
+	const struct rdma_link_ops *link_ops;
 };
 
 struct ib_client {
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 70218e6b5187..10732ab31ba2 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -99,4 +99,15 @@ int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
  * Returns true on success or false if no listeners.
  */
 bool rdma_nl_chk_listeners(unsigned int group);
+
+struct rdma_link_ops {
+	struct list_head list;
+	const char *type;
+	int (*newlink)(const char *ibdev_name, struct net_device *ndev);
+};
+
+void rdma_link_register(struct rdma_link_ops *ops);
+void rdma_link_unregister(struct rdma_link_ops *ops);
+
+#define MODULE_ALIAS_RDMA_LINK(type) MODULE_ALIAS("rdma-link-" type)
 #endif /* _RDMA_NETLINK_H */
diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h
index 4ebbcfb2c6ef..5cc592728071 100644
--- a/include/uapi/rdma/rdma_netlink.h
+++ b/include/uapi/rdma/rdma_netlink.h
@@ -255,9 +255,11 @@ enum rdma_nldev_command {
 	RDMA_NLDEV_CMD_GET, /* can dump */
 	RDMA_NLDEV_CMD_SET,
 
-	/* 3 - 4 are free to use */
+	RDMA_NLDEV_CMD_NEWLINK,
 
-	RDMA_NLDEV_CMD_PORT_GET = 5, /* can dump */
+	RDMA_NLDEV_CMD_DELLINK,
+
+	RDMA_NLDEV_CMD_PORT_GET, /* can dump */
 
 	/* 6 - 8 are free to use */
 
@@ -465,6 +467,10 @@ enum rdma_nldev_attr {
 	RDMA_NLDEV_ATTR_RES_MRN,               /* u32 */
 	RDMA_NLDEV_ATTR_RES_CM_IDN,            /* u32 */
 	RDMA_NLDEV_ATTR_RES_CTXN,	       /* u32 */
+	/*
+	 * Identifies the rdma driver. eg: "rxe" or "siw"
+	 */
+	RDMA_NLDEV_ATTR_LINK_TYPE,		/* string */
 
 	/*
 	 * Always the end
-- 
cgit v1.2.3


From 36003d4cf57ca431fb3f94d317bcca426a2394d6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 19 Feb 2019 17:53:26 +0100
Subject: driver core: Fix PM-runtime for links added during consumer probe

Commit 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage
counter imbalance") introduced a regression that causes suppliers
to be suspended prematurely for device links added during consumer
driver probe if the initial PM-runtime status of the consumer is
"suspended" and the consumer is resumed after adding the link and
before pm_runtime_put_suppliers() is called.  In that case,
pm_runtime_put_suppliers() will drop the rpm_active refcount for
the link by one and (since rpm_active is equal to two after the
preceding consumer resume) the supplier's PM-runtime usage counter
will be decremented, which may cause the supplier to suspend even
though the consumer's PM-runtime status is "active".

For this reason, partially revert commit 4c06c4e6cf63 as the problem
it tried to fix needs to be addressed somewhat differently, and
change pm_runtime_get_suppliers() and pm_runtime_put_suppliers() so
that the latter only drops rpm_active references acquired by the
former.  [This requires adding a new field to struct device_link,
but I coulnd't find a cleaner way to address the issue that would
work in all cases.]

This causes pm_runtime_put_suppliers() to effectively ignore device
links added during consumer probe, so device_link_add() doesn't need
to worry about ensuring that suppliers will remain active after
pm_runtime_put_suppliers() for links created with DL_FLAG_RPM_ACTIVE
set and it only needs to bump up rpm_active by one for those links,
so pm_runtime_active_link() is not necessary any more.

Fixes: 4c06c4e6cf63 ("driver core: Fix possible supplier PM-usage counter imbalance")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c          |  4 ++--
 drivers/base/power/runtime.c | 29 ++++++-----------------------
 include/linux/device.h       |  1 +
 include/linux/pm_runtime.h   |  4 ----
 4 files changed, 9 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 787190238753..4aeaa0c92bda 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -277,7 +277,7 @@ struct device_link *device_link_add(struct device *consumer,
 				link->flags |= DL_FLAG_PM_RUNTIME;
 			}
 			if (flags & DL_FLAG_RPM_ACTIVE)
-				pm_runtime_active_link(link, supplier);
+				refcount_inc(&link->rpm_active);
 		}
 
 		if (flags & DL_FLAG_STATELESS) {
@@ -310,7 +310,7 @@ struct device_link *device_link_add(struct device *consumer,
 
 	if (flags & DL_FLAG_PM_RUNTIME) {
 		if (flags & DL_FLAG_RPM_ACTIVE)
-			pm_runtime_active_link(link, supplier);
+			refcount_inc(&link->rpm_active);
 
 		pm_runtime_new_link(consumer);
 	}
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 6b8aa6bed064..70d2cb188601 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1626,6 +1626,7 @@ void pm_runtime_get_suppliers(struct device *dev)
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
 		if (link->flags & DL_FLAG_PM_RUNTIME) {
+			link->supplier_preactivated = true;
 			refcount_inc(&link->rpm_active);
 			pm_runtime_get_sync(link->supplier);
 		}
@@ -1645,9 +1646,11 @@ void pm_runtime_put_suppliers(struct device *dev)
 	idx = device_links_read_lock();
 
 	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
-		if (link->flags & DL_FLAG_PM_RUNTIME &&
-		    refcount_dec_not_one(&link->rpm_active))
-			pm_runtime_put(link->supplier);
+		if (link->supplier_preactivated) {
+			link->supplier_preactivated = false;
+			if (refcount_dec_not_one(&link->rpm_active))
+				pm_runtime_put(link->supplier);
+		}
 
 	device_links_read_unlock(idx);
 }
@@ -1659,26 +1662,6 @@ void pm_runtime_new_link(struct device *dev)
 	spin_unlock_irq(&dev->power.lock);
 }
 
-/**
- * pm_runtime_active_link - Set up new device link as active for PM-runtime.
- * @link: Device link to be set up as active.
- * @supplier: Supplier end of the link.
- *
- * Add 2 to the rpm_active refcount of @link and increment the PM-runtime
- * usage counter of @supplier once more in case the link is being added while
- * the consumer driver is probing and pm_runtime_put_suppliers() will be called
- * subsequently.
- *
- * Note that this doesn't prevent rpm_put_suppliers() from decreasing the link's
- * rpm_active refcount down to one, so runtime suspend of the consumer end of
- * @link is not affected.
- */
-void pm_runtime_active_link(struct device_link *link, struct device *supplier)
-{
-	refcount_add(2, &link->rpm_active);
-	pm_runtime_get_noresume(supplier);
-}
-
 void pm_runtime_drop_link(struct device *dev)
 {
 	spin_lock_irq(&dev->power.lock);
diff --git a/include/linux/device.h b/include/linux/device.h
index 292b720c4bc2..a7967a48cdc9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -861,6 +861,7 @@ struct device_link {
 #ifdef CONFIG_SRCU
 	struct rcu_head rcu_head;
 #endif
+	bool supplier_preactivated; /* Owned by consumer probe. */
 };
 
 /**
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index a27bbb5937b8..fed5be706bc9 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -59,8 +59,6 @@ extern void pm_runtime_clean_up_links(struct device *dev);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
-extern void pm_runtime_active_link(struct device_link *link,
-				   struct device *supplier);
 extern void pm_runtime_drop_link(struct device *dev);
 
 static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
@@ -178,8 +176,6 @@ static inline void pm_runtime_clean_up_links(struct device *dev) {}
 static inline void pm_runtime_get_suppliers(struct device *dev) {}
 static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
-static inline void pm_runtime_active_link(struct device_link *link,
-					  struct device *supplier) {}
 static inline void pm_runtime_drop_link(struct device *dev) {}
 
 #endif /* !CONFIG_PM */
-- 
cgit v1.2.3


From fadccd8fc2d06cf7fd222245d7e04b00fae946cf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Feb 2019 09:37:13 +0100
Subject: nvme_ioctl.h: remove duplicate GPL boilerplate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have a ЅPDX header, so no need to duplicate the information.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme.h            | 10 +---------
 include/uapi/linux/nvme_ioctl.h |  9 ---------
 2 files changed, 1 insertion(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index bbcc83886899..baa49e6a23cc 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1,15 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Definitions for the NVM Express interface
  * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef _LINUX_NVME_H
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index 6e74b1eaf541..1c215ea1798e 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -2,15 +2,6 @@
 /*
  * Definitions for the NVM Express ioctl interface
  * Copyright (c) 2011-2014, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef _UAPI_LINUX_NVME_IOCTL_H
-- 
cgit v1.2.3


From 055d045a7aaeef326f8ab6845519da3157887830 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Feb 2019 09:37:42 +0100
Subject: nvme-tcp.h: fix SPDX header

For .h files we need to use /* */ style comments.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme-tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 03d87c0550a9..959e0bd9a913 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * NVMe over Fabrics TCP protocol header.
  * Copyright (c) 2018 Lightbits Labs. All rights reserved.
-- 
cgit v1.2.3


From 8638b2461475ad4c35a957156ecf2425b9b82e85 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Feb 2019 09:33:28 +0100
Subject: nvme-fc: convert to SPDX identifiers

Update license to use SPDX-License-Identifier instead of verbose license
text.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/fc.c         | 14 +-------------
 include/linux/nvme-fc-driver.h | 10 +---------
 include/linux/nvme-fc.h        | 14 +-------------
 3 files changed, 3 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 89accc76d71c..b29b12498a1a 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1,18 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2016 Avago Technologies.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful.
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
- * See the GNU General Public License for more details, a copy of which
- * can be found in the file COPYING included with this package
- *
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 91745cc3704c..2bb349035431 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2016, Avago Technologies
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef _NVME_FC_DRIVER_H
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index 36cca93a5ff2..067c9fea64fe 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2016 Avago Technologies.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful.
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
- * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
- * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
- * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
- * See the GNU General Public License for more details, a copy of which
- * can be found in the file COPYING included with this package
- *
  */
 
 /*
-- 
cgit v1.2.3


From 5d8762d5684ab997c7ccf2457c8beec7ef972ceb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Feb 2019 09:34:21 +0100
Subject: nvme-rdma: convert to SPDX identifiers

Update license to use SPDX-License-Identifier instead of verbose license
text.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c  | 10 +---------
 include/linux/nvme-rdma.h | 10 +---------
 2 files changed, 2 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index ac365366c2ec..7c0d29185249 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1,15 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * NVMe over Fabrics RDMA host code.
  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index a72fd04aa5e1..3aa97b98dc89 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -1,14 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef _LINUX_NVME_RDMA_H
-- 
cgit v1.2.3


From ff4c25f26a71b79c70ea03b3935a1297439a8a85 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Feb 2019 20:12:02 +0100
Subject: dma-mapping: improve selection of dma_declare_coherent availability

This API is primarily used through DT entries, but two architectures
and two drivers call it directly.  So instead of selecting the config
symbol for random architectures pull it in implicitly for the actual
users.  Also rename the Kconfig option to describe the feature better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS
Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arc/Kconfig            | 1 -
 arch/arm/Kconfig            | 2 +-
 arch/arm64/Kconfig          | 1 -
 arch/csky/Kconfig           | 1 -
 arch/mips/Kconfig           | 1 -
 arch/riscv/Kconfig          | 1 -
 arch/sh/Kconfig             | 2 +-
 arch/unicore32/Kconfig      | 1 -
 arch/x86/Kconfig            | 1 -
 drivers/mfd/Kconfig         | 2 ++
 drivers/of/Kconfig          | 3 ++-
 include/linux/device.h      | 2 +-
 include/linux/dma-mapping.h | 8 ++++----
 kernel/dma/Kconfig          | 2 +-
 kernel/dma/Makefile         | 2 +-
 15 files changed, 13 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ab8d6131c954..728a0f6f838c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -31,7 +31,6 @@ config ARC
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_FUTEX_CMPXCHG if FUTEX
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_IOREMAP_PROT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZMA
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index e07e5c184d2f..33612e6da19a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -32,6 +32,7 @@ config ARM
 	select CLONE_BACKWARDS
 	select CPU_PM if SUSPEND || CPU_IDLE
 	select DCACHE_WORD_ACCESS if HAVE_EFFICIENT_UNALIGNED_ACCESS
+	select DMA_DECLARE_COHERENT
 	select DMA_REMAP if MMU
 	select EDAC_SUPPORT
 	select EDAC_ATOMIC_SCRUB
@@ -74,7 +75,6 @@ config ARM
 	select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL
 	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 	select HAVE_GCC_PLUGINS
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
 	select HAVE_IDE if PCI || ISA || PCMCIA
 	select HAVE_IRQ_TIME_ACCOUNTING
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fbcf521e1c9f..e86fac1e6b03 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -139,7 +139,6 @@ config ARM64
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_GCC_PLUGINS
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_MEMBLOCK_NODE_MAP if NUMA
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 0a9595afe9be..c009a8c63946 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -30,7 +30,6 @@ config CSKY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index dc5d70f674e0..433b9dd35824 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -56,7 +56,6 @@ config MIPS
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index feeeaa60697c..51b9c97751bf 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -32,7 +32,6 @@ config RISCV
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_FUTEX_CMPXCHG if FUTEX
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_PERF_EVENTS
 	select HAVE_SYSCALL_TRACEPOINTS
 	select IRQ_DOMAIN
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index a9c36f95744a..a3d2a24e75c7 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,11 +7,11 @@ config SUPERH
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
 	select HAVE_PATA_PLATFORM
 	select CLKDEV_LOOKUP
+	select DMA_DECLARE_COHERENT
 	select HAVE_IDE if HAS_IOPORT_MAP
 	select HAVE_MEMBLOCK_NODE_MAP
 	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_OPROFILE
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_PERF_EVENTS
 	select HAVE_DEBUG_BUGVERBOSE
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index c3a41bfe161b..6d2891d37e32 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -4,7 +4,6 @@ config UNICORE32
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
-	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select GENERIC_ATOMIC64
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 26387c7bf305..0e33dede053e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -15,7 +15,6 @@ config X86_32
 	select CLKSRC_I8253
 	select CLONE_BACKWARDS
 	select HAVE_AOUT
-	select HAVE_GENERIC_DMA_COHERENT
 	select MODULES_USE_ELF_REL
 	select OLD_SIGACTION
 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f15f6489803d..c3ccf2c7b3ef 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1067,6 +1067,7 @@ config MFD_SI476X_CORE
 config MFD_SM501
 	tristate "Silicon Motion SM501"
 	depends on HAS_DMA
+	select DMA_DECLARE_COHERENT
 	 ---help---
 	  This is the core driver for the Silicon Motion SM501 multimedia
 	  companion chip. This device is a multifunction device which may
@@ -1675,6 +1676,7 @@ config MFD_TC6393XB
 	select GPIOLIB
 	select MFD_CORE
 	select MFD_TMIO
+	select DMA_DECLARE_COHERENT
 	help
 	  Support for Toshiba Mobile IO Controller TC6393XB
 
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3607fd2810e4..37c2ccbefecd 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -43,6 +43,7 @@ config OF_FLATTREE
 
 config OF_EARLY_FLATTREE
 	bool
+	select DMA_DECLARE_COHERENT if HAS_DMA
 	select OF_FLATTREE
 
 config OF_PROMTREE
@@ -83,7 +84,7 @@ config OF_MDIO
 config OF_RESERVED_MEM
 	bool
 	depends on OF_EARLY_FLATTREE
-	default y if HAVE_GENERIC_DMA_COHERENT || DMA_CMA
+	default y if DMA_DECLARE_COHERENT || DMA_CMA
 
 config OF_RESOLVE
 	bool
diff --git a/include/linux/device.h b/include/linux/device.h
index be544400acdd..c52d90348cef 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1017,7 +1017,7 @@ struct device {
 
 	struct list_head	dma_pools;	/* dma pools (if dma'ble) */
 
-#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
+#ifdef CONFIG_DMA_DECLARE_COHERENT
 	struct dma_coherent_mem	*dma_mem; /* internal for coherent mem
 					     override */
 #endif
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 4210c5c1dd21..e29441b8b3b7 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -153,7 +153,7 @@ static inline int is_device_dma_capable(struct device *dev)
 	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
 }
 
-#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
+#ifdef CONFIG_DMA_DECLARE_COHERENT
 /*
  * These three functions are only for dma allocator.
  * Don't use them in device drivers.
@@ -192,7 +192,7 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
 {
 	return 0;
 }
-#endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
+#endif /* CONFIG_DMA_DECLARE_COHERENT */
 
 static inline bool dma_is_direct(const struct dma_map_ops *ops)
 {
@@ -739,7 +739,7 @@ static inline int dma_get_cache_alignment(void)
 /* flags for the coherent memory api */
 #define DMA_MEMORY_EXCLUSIVE		0x01
 
-#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
+#ifdef CONFIG_DMA_DECLARE_COHERENT
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size, int flags);
 void dma_release_declared_memory(struct device *dev);
@@ -764,7 +764,7 @@ dma_mark_declared_memory_occupied(struct device *dev,
 {
 	return ERR_PTR(-EBUSY);
 }
-#endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
+#endif /* CONFIG_DMA_DECLARE_COHERENT */
 
 static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
 		dma_addr_t *dma_handle, gfp_t gfp)
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index bde9179c6ed7..24d45c78c671 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -16,7 +16,7 @@ config ARCH_DMA_ADDR_T_64BIT
 config ARCH_HAS_DMA_COHERENCE_H
 	bool
 
-config HAVE_GENERIC_DMA_COHERENT
+config DMA_DECLARE_COHERENT
 	bool
 
 config ARCH_HAS_SETUP_DMA_OPS
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 72ff6e46aa86..d237cf3dc181 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -2,7 +2,7 @@
 
 obj-$(CONFIG_HAS_DMA)			+= mapping.o direct.o dummy.o
 obj-$(CONFIG_DMA_CMA)			+= contiguous.o
-obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
+obj-$(CONFIG_DMA_DECLARE_COHERENT)	+= coherent.o
 obj-$(CONFIG_DMA_VIRT_OPS)		+= virt.o
 obj-$(CONFIG_DMA_API_DEBUG)		+= debug.o
 obj-$(CONFIG_SWIOTLB)			+= swiotlb.o
-- 
cgit v1.2.3


From 91a6fda95cb67c94b887355690d1923a7eb6f630 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Dec 2018 17:27:14 +0100
Subject: dma-mapping: remove dma_mark_declared_memory_occupied

This API is not used anywhere, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/DMA-API.txt   | 17 -----------------
 include/linux/dma-mapping.h |  9 ---------
 kernel/dma/coherent.c       | 23 -----------------------
 3 files changed, 49 deletions(-)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 78114ee63057..b9d0cba83877 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -605,23 +605,6 @@ unconditionally having removed all the required structures.  It is the
 driver's job to ensure that no parts of this memory region are
 currently in use.
 
-::
-
-	void *
-	dma_mark_declared_memory_occupied(struct device *dev,
-					  dma_addr_t device_addr, size_t size)
-
-This is used to occupy specific regions of the declared space
-(dma_alloc_coherent() will hand out the first free region it finds).
-
-device_addr is the *device* address of the region requested.
-
-size is the size (and should be a page-sized multiple).
-
-The return value will be either a pointer to the processor virtual
-address of the memory, or an error (via PTR_ERR()) if any part of the
-region is occupied.
-
 Part III - Debug drivers use of the DMA-API
 -------------------------------------------
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index e29441b8b3b7..d29faadf6ef2 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -743,8 +743,6 @@ static inline int dma_get_cache_alignment(void)
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size, int flags);
 void dma_release_declared_memory(struct device *dev);
-void *dma_mark_declared_memory_occupied(struct device *dev,
-					dma_addr_t device_addr, size_t size);
 #else
 static inline int
 dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
@@ -757,13 +755,6 @@ static inline void
 dma_release_declared_memory(struct device *dev)
 {
 }
-
-static inline void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size)
-{
-	return ERR_PTR(-EBUSY);
-}
 #endif /* CONFIG_DMA_DECLARE_COHERENT */
 
 static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 4b76aba574c2..1d12a31af6d7 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -137,29 +137,6 @@ void dma_release_declared_memory(struct device *dev)
 }
 EXPORT_SYMBOL(dma_release_declared_memory);
 
-void *dma_mark_declared_memory_occupied(struct device *dev,
-					dma_addr_t device_addr, size_t size)
-{
-	struct dma_coherent_mem *mem = dev->dma_mem;
-	unsigned long flags;
-	int pos, err;
-
-	size += device_addr & ~PAGE_MASK;
-
-	if (!mem)
-		return ERR_PTR(-EINVAL);
-
-	spin_lock_irqsave(&mem->spinlock, flags);
-	pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
-	err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
-	spin_unlock_irqrestore(&mem->spinlock, flags);
-
-	if (err != 0)
-		return ERR_PTR(err);
-	return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 		ssize_t size, dma_addr_t *dma_handle)
 {
-- 
cgit v1.2.3


From 82c5de0ab8dbd6035223ad69e76bd8a88a0a9399 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Dec 2018 13:29:54 +0100
Subject: dma-mapping: remove the DMA_MEMORY_EXCLUSIVE flag

All users of dma_declare_coherent want their allocations to be
exclusive, so default to exclusive allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/DMA-API.txt                          |  9 +-------
 arch/arm/mach-imx/mach-imx27_visstrim_m10.c        | 12 ++++-------
 arch/arm/mach-imx/mach-mx31moboard.c               |  3 +--
 arch/sh/boards/mach-ap325rxa/setup.c               |  5 ++---
 arch/sh/boards/mach-ecovec24/setup.c               |  6 ++----
 arch/sh/boards/mach-kfr2r09/setup.c                |  5 ++---
 arch/sh/boards/mach-migor/setup.c                  |  5 ++---
 arch/sh/boards/mach-se/7724/setup.c                |  6 ++----
 arch/sh/drivers/pci/fixups-dreamcast.c             |  3 +--
 .../platform/soc_camera/sh_mobile_ceu_camera.c     |  3 +--
 drivers/usb/host/ohci-sm501.c                      |  3 +--
 drivers/usb/host/ohci-tmio.c                       |  2 +-
 include/linux/dma-mapping.h                        |  7 ++----
 kernel/dma/coherent.c                              | 25 ++++++----------------
 14 files changed, 29 insertions(+), 65 deletions(-)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index b9d0cba83877..38e561b773b4 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -566,8 +566,7 @@ boundaries when doing this.
 
 	int
 	dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-				    dma_addr_t device_addr, size_t size, int
-				    flags)
+				    dma_addr_t device_addr, size_t size);
 
 Declare region of memory to be handed out by dma_alloc_coherent() when
 it's asked for coherent memory for this device.
@@ -581,12 +580,6 @@ dma_addr_t in dma_alloc_coherent()).
 
 size is the size of the area (must be multiples of PAGE_SIZE).
 
-flags can be ORed together and are:
-
-- DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions.
-  Do not allow dma_alloc_coherent() to fall back to system memory when
-  it's out of memory in the declared region.
-
 As a simplification for the platforms, only *one* such region of
 memory may be declared per device.
 
diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
index 5169dfba9718..07d4fcfe5c2e 100644
--- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
+++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c
@@ -258,8 +258,7 @@ static void __init visstrim_analog_camera_init(void)
 		return;
 
 	dma_declare_coherent_memory(&pdev->dev, mx2_camera_base,
-				    mx2_camera_base, MX2_CAMERA_BUF_SIZE,
-				    DMA_MEMORY_EXCLUSIVE);
+				    mx2_camera_base, MX2_CAMERA_BUF_SIZE);
 }
 
 static void __init visstrim_reserve(void)
@@ -445,8 +444,7 @@ static void __init visstrim_coda_init(void)
 	dma_declare_coherent_memory(&pdev->dev,
 				    mx2_camera_base + MX2_CAMERA_BUF_SIZE,
 				    mx2_camera_base + MX2_CAMERA_BUF_SIZE,
-				    MX2_CAMERA_BUF_SIZE,
-				    DMA_MEMORY_EXCLUSIVE);
+				    MX2_CAMERA_BUF_SIZE);
 }
 
 /* DMA deinterlace */
@@ -465,8 +463,7 @@ static void __init visstrim_deinterlace_init(void)
 	dma_declare_coherent_memory(&pdev->dev,
 				    mx2_camera_base + 2 * MX2_CAMERA_BUF_SIZE,
 				    mx2_camera_base + 2 * MX2_CAMERA_BUF_SIZE,
-				    MX2_CAMERA_BUF_SIZE,
-				    DMA_MEMORY_EXCLUSIVE);
+				    MX2_CAMERA_BUF_SIZE);
 }
 
 /* Emma-PrP for format conversion */
@@ -485,8 +482,7 @@ static void __init visstrim_emmaprp_init(void)
 	 */
 	ret = dma_declare_coherent_memory(&pdev->dev,
 				mx2_camera_base, mx2_camera_base,
-				MX2_CAMERA_BUF_SIZE,
-				DMA_MEMORY_EXCLUSIVE);
+				MX2_CAMERA_BUF_SIZE);
 	if (ret)
 		pr_err("Failed to declare memory for emmaprp\n");
 }
diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c
index 643a3d749703..fe50f4cf00a7 100644
--- a/arch/arm/mach-imx/mach-mx31moboard.c
+++ b/arch/arm/mach-imx/mach-mx31moboard.c
@@ -475,8 +475,7 @@ static int __init mx31moboard_init_cam(void)
 
 	ret = dma_declare_coherent_memory(&pdev->dev,
 					  mx3_camera_base, mx3_camera_base,
-					  MX3_CAMERA_BUF_SIZE,
-					  DMA_MEMORY_EXCLUSIVE);
+					  MX3_CAMERA_BUF_SIZE);
 	if (ret)
 		goto err;
 
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 8f234d0435aa..7899b4f51fdd 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -529,9 +529,8 @@ static int __init ap325rxa_devices_setup(void)
 	device_initialize(&ap325rxa_ceu_device.dev);
 	arch_setup_pdev_archdata(&ap325rxa_ceu_device);
 	dma_declare_coherent_memory(&ap325rxa_ceu_device.dev,
-				    ceu_dma_membase, ceu_dma_membase,
-				    ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+			ceu_dma_membase, ceu_dma_membase,
+			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
 
 	platform_device_add(&ap325rxa_ceu_device);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 22b4106b8084..eb66754cfb8c 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1440,8 +1440,7 @@ static int __init arch_setup(void)
 	dma_declare_coherent_memory(&ecovec_ceu_devices[0]->dev,
 				    ceu0_dma_membase, ceu0_dma_membase,
 				    ceu0_dma_membase +
-				    CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+				    CEU_BUFFER_MEMORY_SIZE - 1);
 	platform_device_add(ecovec_ceu_devices[0]);
 
 	device_initialize(&ecovec_ceu_devices[1]->dev);
@@ -1449,8 +1448,7 @@ static int __init arch_setup(void)
 	dma_declare_coherent_memory(&ecovec_ceu_devices[1]->dev,
 				    ceu1_dma_membase, ceu1_dma_membase,
 				    ceu1_dma_membase +
-				    CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+				    CEU_BUFFER_MEMORY_SIZE - 1);
 	platform_device_add(ecovec_ceu_devices[1]);
 
 	gpiod_add_lookup_table(&cn12_power_gpiod_table);
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 203d249a0a2b..b8bf67c86eab 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -603,9 +603,8 @@ static int __init kfr2r09_devices_setup(void)
 	device_initialize(&kfr2r09_ceu_device.dev);
 	arch_setup_pdev_archdata(&kfr2r09_ceu_device);
 	dma_declare_coherent_memory(&kfr2r09_ceu_device.dev,
-				    ceu_dma_membase, ceu_dma_membase,
-				    ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+			ceu_dma_membase, ceu_dma_membase,
+			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
 
 	platform_device_add(&kfr2r09_ceu_device);
 
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index f4ad33c6d2aa..bcd249e6cfcc 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -603,9 +603,8 @@ static int __init migor_devices_setup(void)
 	device_initialize(&migor_ceu_device.dev);
 	arch_setup_pdev_archdata(&migor_ceu_device);
 	dma_declare_coherent_memory(&migor_ceu_device.dev,
-				    ceu_dma_membase, ceu_dma_membase,
-				    ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+			ceu_dma_membase, ceu_dma_membase,
+			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
 
 	platform_device_add(&migor_ceu_device);
 
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index fdbec22ae687..13c2d3ce78f4 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -941,8 +941,7 @@ static int __init devices_setup(void)
 	dma_declare_coherent_memory(&ms7724se_ceu_devices[0]->dev,
 				    ceu0_dma_membase, ceu0_dma_membase,
 				    ceu0_dma_membase +
-				    CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+				    CEU_BUFFER_MEMORY_SIZE - 1);
 	platform_device_add(ms7724se_ceu_devices[0]);
 
 	device_initialize(&ms7724se_ceu_devices[1]->dev);
@@ -950,8 +949,7 @@ static int __init devices_setup(void)
 	dma_declare_coherent_memory(&ms7724se_ceu_devices[1]->dev,
 				    ceu1_dma_membase, ceu1_dma_membase,
 				    ceu1_dma_membase +
-				    CEU_BUFFER_MEMORY_SIZE - 1,
-				    DMA_MEMORY_EXCLUSIVE);
+				    CEU_BUFFER_MEMORY_SIZE - 1);
 	platform_device_add(ms7724se_ceu_devices[1]);
 
 	return platform_add_devices(ms7724se_devices,
diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c
index dfdbd05b6eb1..7be8694c0d13 100644
--- a/arch/sh/drivers/pci/fixups-dreamcast.c
+++ b/arch/sh/drivers/pci/fixups-dreamcast.c
@@ -63,8 +63,7 @@ static void gapspci_fixup_resources(struct pci_dev *dev)
 		BUG_ON(dma_declare_coherent_memory(&dev->dev,
 						res.start,
 						region.start,
-						resource_size(&res),
-						DMA_MEMORY_EXCLUSIVE));
+						resource_size(&res)));
 		break;
 	default:
 		printk("PCI: Failed resource fixup\n");
diff --git a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
index 6803f744e307..cc357b8db1dc 100644
--- a/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
+++ b/drivers/media/platform/soc_camera/sh_mobile_ceu_camera.c
@@ -1708,8 +1708,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	if (res) {
 		err = dma_declare_coherent_memory(&pdev->dev, res->start,
 						  res->start,
-						  resource_size(res),
-						  DMA_MEMORY_EXCLUSIVE);
+						  resource_size(res));
 		if (err) {
 			dev_err(&pdev->dev, "Unable to declare CEU memory.\n");
 			return err;
diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c
index c9233cddf9a2..c26228c25f99 100644
--- a/drivers/usb/host/ohci-sm501.c
+++ b/drivers/usb/host/ohci-sm501.c
@@ -126,8 +126,7 @@ static int ohci_hcd_sm501_drv_probe(struct platform_device *pdev)
 
 	retval = dma_declare_coherent_memory(dev, mem->start,
 					 mem->start - mem->parent->start,
-					 resource_size(mem),
-					 DMA_MEMORY_EXCLUSIVE);
+					 resource_size(mem));
 	if (retval) {
 		dev_err(dev, "cannot declare coherent memory\n");
 		goto err1;
diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c
index a631dbb369d7..f88a0370659f 100644
--- a/drivers/usb/host/ohci-tmio.c
+++ b/drivers/usb/host/ohci-tmio.c
@@ -225,7 +225,7 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev)
 	}
 
 	ret = dma_declare_coherent_memory(&dev->dev, sram->start, sram->start,
-				resource_size(sram), DMA_MEMORY_EXCLUSIVE);
+				resource_size(sram));
 	if (ret)
 		goto err_dma_declare;
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d29faadf6ef2..70ad15758a70 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -736,17 +736,14 @@ static inline int dma_get_cache_alignment(void)
 	return 1;
 }
 
-/* flags for the coherent memory api */
-#define DMA_MEMORY_EXCLUSIVE		0x01
-
 #ifdef CONFIG_DMA_DECLARE_COHERENT
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-				dma_addr_t device_addr, size_t size, int flags);
+				dma_addr_t device_addr, size_t size);
 void dma_release_declared_memory(struct device *dev);
 #else
 static inline int
 dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-			    dma_addr_t device_addr, size_t size, int flags)
+			    dma_addr_t device_addr, size_t size)
 {
 	return -ENOSYS;
 }
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 1d12a31af6d7..29fd6590dc1e 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -14,7 +14,6 @@ struct dma_coherent_mem {
 	dma_addr_t	device_base;
 	unsigned long	pfn_base;
 	int		size;
-	int		flags;
 	unsigned long	*bitmap;
 	spinlock_t	spinlock;
 	bool		use_dev_dma_pfn_offset;
@@ -38,9 +37,9 @@ static inline dma_addr_t dma_get_device_base(struct device *dev,
 		return mem->device_base;
 }
 
-static int dma_init_coherent_memory(
-	phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
-	struct dma_coherent_mem **mem)
+static int dma_init_coherent_memory(phys_addr_t phys_addr,
+		dma_addr_t device_addr, size_t size,
+		struct dma_coherent_mem **mem)
 {
 	struct dma_coherent_mem *dma_mem = NULL;
 	void *mem_base = NULL;
@@ -73,7 +72,6 @@ static int dma_init_coherent_memory(
 	dma_mem->device_base = device_addr;
 	dma_mem->pfn_base = PFN_DOWN(phys_addr);
 	dma_mem->size = pages;
-	dma_mem->flags = flags;
 	spin_lock_init(&dma_mem->spinlock);
 
 	*mem = dma_mem;
@@ -110,12 +108,12 @@ static int dma_assign_coherent_memory(struct device *dev,
 }
 
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-				dma_addr_t device_addr, size_t size, int flags)
+				dma_addr_t device_addr, size_t size)
 {
 	struct dma_coherent_mem *mem;
 	int ret;
 
-	ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
+	ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem);
 	if (ret)
 		return ret;
 
@@ -190,15 +188,7 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 		return 0;
 
 	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
-	if (*ret)
-		return 1;
-
-	/*
-	 * In the case where the allocation can not be satisfied from the
-	 * per-device area, try to fall back to generic memory if the
-	 * constraints allow it.
-	 */
-	return mem->flags & DMA_MEMORY_EXCLUSIVE;
+	return 1;
 }
 
 void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
@@ -327,8 +317,7 @@ static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
 
 	if (!mem) {
 		ret = dma_init_coherent_memory(rmem->base, rmem->base,
-					       rmem->size,
-					       DMA_MEMORY_EXCLUSIVE, &mem);
+					       rmem->size, &mem);
 		if (ret) {
 			pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
 				&rmem->base, (unsigned long)rmem->size / SZ_1M);
-- 
cgit v1.2.3


From aeb0d80a21dbf13ce8cd3e62b86d62dfa1f81bef Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Sat, 16 Feb 2019 23:06:56 +0530
Subject: drm: HDCP2.2 link check period

Time period for HDCP2.2 link check.

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550338640-17470-10-git-send-email-ramalingam.c@intel.com
---
 include/drm/drm_hdcp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index 7260b31af276..d4e98b11b4aa 100644
--- a/include/drm/drm_hdcp.h
+++ b/include/drm/drm_hdcp.h
@@ -13,6 +13,7 @@
 
 /* Period of hdcp checks (to ensure we're still authenticated) */
 #define DRM_HDCP_CHECK_PERIOD_MS		(128 * 16)
+#define DRM_HDCP2_CHECK_PERIOD_MS		500
 
 /* Shared lengths/masks between HDMI/DVI/DisplayPort */
 #define DRM_HDCP_AN_LEN				8
-- 
cgit v1.2.3


From 956af8964bb50fcd08a564f001b6f56cfe52f94b Mon Sep 17 00:00:00 2001
From: Ramalingam C <ramalingam.c@intel.com>
Date: Sat, 16 Feb 2019 23:06:59 +0530
Subject: drm: removing the DP Errata msg and its msg id

Since DP ERRATA message is not defined at spec, those structure
definition is removed from drm_hdcp.h

Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Acked-by: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/1550338640-17470-13-git-send-email-ramalingam.c@intel.com
---
 include/drm/drm_hdcp.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h
index d4e98b11b4aa..f243408ecf26 100644
--- a/include/drm/drm_hdcp.h
+++ b/include/drm/drm_hdcp.h
@@ -69,7 +69,6 @@
 #define HDCP_2_2_REP_SEND_ACK			15
 #define HDCP_2_2_REP_STREAM_MANAGE		16
 #define HDCP_2_2_REP_STREAM_READY		17
-#define HDCP_2_2_ERRATA_DP_STREAM_TYPE		50
 
 #define HDCP_2_2_RTX_LEN			8
 #define HDCP_2_2_RRX_LEN			8
@@ -220,11 +219,6 @@ struct hdcp2_rep_stream_ready {
 	u8	m_prime[HDCP_2_2_MPRIME_LEN];
 } __packed;
 
-struct hdcp2_dp_errata_stream_type {
-	u8	msg_id;
-	u8	stream_type;
-} __packed;
-
 /* HDCP2.2 TIMEOUTs in mSec */
 #define HDCP_2_2_CERT_TIMEOUT_MS		100
 #define HDCP_2_2_HPRIME_NO_PAIRED_TIMEOUT_MS	1000
-- 
cgit v1.2.3


From 078b5fd92c4913dd367361db6c28568386077c89 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 18 Feb 2019 11:35:54 -0500
Subject: NFS: Clean up list moves of struct nfs_page

In several places we're just moving the struct nfs_page from one list to
another by first removing from the existing list, then adding to the new
one.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c          |  3 +--
 fs/nfs/pagelist.c        | 12 ++++--------
 include/linux/nfs_page.h | 10 ++++++++++
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 33824a0a57bf..1377ee20ecf9 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -664,8 +664,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
 		if (!nfs_pageio_add_request(&desc, req)) {
-			nfs_list_remove_request(req);
-			nfs_list_add_request(req, &failed);
+			nfs_list_move_request(req, &failed);
 			spin_lock(&cinfo.inode->i_lock);
 			dreq->flags = 0;
 			if (desc.pg_error < 0)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a8951f1f7b4e..9cbfdb979992 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -768,8 +768,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 	pageused = 0;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
-		nfs_list_remove_request(req);
-		nfs_list_add_request(req, &hdr->pages);
+		nfs_list_move_request(req, &hdr->pages);
 
 		if (!last_page || last_page != req->wb_page) {
 			pageused++;
@@ -961,8 +960,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 	}
 	if (!nfs_can_coalesce_requests(prev, req, desc))
 		return 0;
-	nfs_list_remove_request(req);
-	nfs_list_add_request(req, &mirror->pg_list);
+	nfs_list_move_request(req, &mirror->pg_list);
 	mirror->pg_count += req->wb_bytes;
 	return 1;
 }
@@ -994,8 +992,7 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
 {
 	LIST_HEAD(head);
 
-	nfs_list_remove_request(req);
-	nfs_list_add_request(req, &head);
+	nfs_list_move_request(req, &head);
 	desc->pg_completion_ops->error_cleanup(&head);
 }
 
@@ -1237,9 +1234,8 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
 	while (!list_empty(&hdr->pages)) {
 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 
-		nfs_list_remove_request(req);
 		if (!nfs_pageio_add_request(desc, req))
-			nfs_list_add_request(req, &failed);
+			nfs_list_move_request(req, &failed);
 	}
 	nfs_pageio_complete(desc);
 	if (!list_empty(&failed)) {
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index e27572d30d97..ad69430fd0eb 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -164,6 +164,16 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
 	list_add_tail(&req->wb_list, head);
 }
 
+/**
+ * nfs_list_move_request - Move a request to a new list
+ * @req: request
+ * @head: head of list into which to insert the request.
+ */
+static inline void
+nfs_list_move_request(struct nfs_page *req, struct list_head *head)
+{
+	list_move_tail(&req->wb_list, head);
+}
 
 /**
  * nfs_list_remove_request - Remove a request from its wb_list
-- 
cgit v1.2.3


From df3accb849607a86278a37c35e6b313635ccc48b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 13 Feb 2019 10:39:39 -0500
Subject: NFS: Pass error information to the pgio error cleanup routine

Allow the caller to pass error information when cleaning up a failed
I/O request so that we can conditionally take action to cancel the
request altogether if the error turned out to be fatal.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c         |  4 ++--
 fs/nfs/pagelist.c       |  5 +++--
 fs/nfs/read.c           |  2 +-
 fs/nfs/write.c          | 11 +++++++++--
 include/linux/nfs_xdr.h |  2 +-
 5 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1377ee20ecf9..0fd811ac08b5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -428,7 +428,7 @@ out_put:
 	hdr->release(hdr);
 }
 
-static void nfs_read_sync_pgio_error(struct list_head *head)
+static void nfs_read_sync_pgio_error(struct list_head *head, int error)
 {
 	struct nfs_page *req;
 
@@ -820,7 +820,7 @@ out_put:
 	hdr->release(hdr);
 }
 
-static void nfs_write_sync_pgio_error(struct list_head *head)
+static void nfs_write_sync_pgio_error(struct list_head *head, int error)
 {
 	struct nfs_page *req;
 
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 9cbfdb979992..695afb7de3a7 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -993,7 +993,7 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
 	LIST_HEAD(head);
 
 	nfs_list_move_request(req, &head);
-	desc->pg_completion_ops->error_cleanup(&head);
+	desc->pg_completion_ops->error_cleanup(&head, desc->pg_error);
 }
 
 /**
@@ -1129,7 +1129,8 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
 
 	for (midx = 0; midx < desc->pg_mirror_count; midx++) {
 		mirror = &desc->pg_mirrors[midx];
-		desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+		desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
+				desc->pg_error);
 	}
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index f9f19784db82..1d95a60b2586 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -205,7 +205,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
 }
 
 static void
-nfs_async_read_error(struct list_head *head)
+nfs_async_read_error(struct list_head *head, int error)
 {
 	struct nfs_page	*req;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d09c9f878141..11df9f03245f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1412,20 +1412,27 @@ static void nfs_redirty_request(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
-static void nfs_async_write_error(struct list_head *head)
+static void nfs_async_write_error(struct list_head *head, int error)
 {
 	struct nfs_page	*req;
 
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		if (nfs_error_is_fatal(error)) {
+			nfs_context_set_write_error(req->wb_context, error);
+			if (nfs_error_is_fatal_on_server(error)) {
+				nfs_write_error_remove_page(req);
+				continue;
+			}
+		}
 		nfs_redirty_request(req);
 	}
 }
 
 static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
 {
-	nfs_async_write_error(&hdr->pages);
+	nfs_async_write_error(&hdr->pages, 0);
 	filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
 			hdr->args.offset + hdr->args.count - 1);
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 441a93ebcac0..b4bd2bf5f585 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1549,7 +1549,7 @@ struct nfs_commit_data {
 };
 
 struct nfs_pgio_completion_ops {
-	void	(*error_cleanup)(struct list_head *head);
+	void	(*error_cleanup)(struct list_head *head, int);
 	void	(*init_hdr)(struct nfs_pgio_header *hdr);
 	void	(*completion)(struct nfs_pgio_header *hdr);
 	void	(*reschedule_io)(struct nfs_pgio_header *hdr);
-- 
cgit v1.2.3


From 152482580a1b0accb60676063a1ac57b2d12daf6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 5 Feb 2019 12:54:17 -0800
Subject: KVM: Call kvm_arch_memslots_updated() before updating memslots

kvm_arch_memslots_updated() is at this point in time an x86-specific
hook for handling MMIO generation wraparound.  x86 stashes 19 bits of
the memslots generation number in its MMIO sptes in order to avoid
full page fault walks for repeat faults on emulated MMIO addresses.
Because only 19 bits are used, wrapping the MMIO generation number is
possible, if unlikely.  kvm_arch_memslots_updated() alerts x86 that
the generation has changed so that it can invalidate all MMIO sptes in
case the effective MMIO generation has wrapped so as to avoid using a
stale spte, e.g. a (very) old spte that was created with generation==0.

Given that the purpose of kvm_arch_memslots_updated() is to prevent
consuming stale entries, it needs to be called before the new generation
is propagated to memslots.  Invalidating the MMIO sptes after updating
memslots means that there is a window where a vCPU could dereference
the new memslots generation, e.g. 0, and incorrectly reuse an old MMIO
spte that was created with (pre-wrap) generation==0.

Fixes: e59dbe09f8e6 ("KVM: Introduce kvm_arch_memslots_updated()")
Cc: <stable@vger.kernel.org>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/include/asm/kvm_host.h    | 2 +-
 arch/powerpc/include/asm/kvm_host.h | 2 +-
 arch/s390/include/asm/kvm_host.h    | 2 +-
 arch/x86/include/asm/kvm_host.h     | 2 +-
 arch/x86/kvm/mmu.c                  | 4 ++--
 arch/x86/kvm/x86.c                  | 4 ++--
 include/linux/kvm_host.h            | 2 +-
 virt/kvm/arm/mmu.c                  | 2 +-
 virt/kvm/kvm_main.c                 | 7 +++++--
 9 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d2abd98471e8..41204a49cf95 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -1134,7 +1134,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0f98f00da2ea..19693b8add93 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -837,7 +837,7 @@ struct kvm_vcpu_arch {
 static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d5d24889c3bc..c2b8c8c6c9be 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -878,7 +878,7 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 		struct kvm_memory_slot *slot) {}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0e2ef41efb9d..c4758e1a8843 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				   struct kvm_memory_slot *slot,
 				   gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 415d0e62cb3e..a53a0e7ad9e6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5893,13 +5893,13 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
 	return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
 }
 
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
 	/*
 	 * The very rare case: if the generation-number is round,
 	 * zap all shadow pages.
 	 */
-	if (unlikely((slots->generation & MMIO_GEN_MASK) == 0)) {
+	if (unlikely((gen & MMIO_GEN_MASK) == 0)) {
 		kvm_debug_ratelimited("kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3de586f89730..03d26ffb29cd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9357,13 +9357,13 @@ out_free:
 	return -ENOMEM;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
 	/*
 	 * memslots->generation has been incremented.
 	 * mmio generation may have reached its maximum value.
 	 */
-	kvm_mmu_invalidate_mmio_sptes(kvm, slots);
+	kvm_mmu_invalidate_mmio_sptes(kvm, gen);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c38cc5eb7e73..cf761ff58224 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -634,7 +634,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont);
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages);
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots);
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				const struct kvm_userspace_memory_region *mem,
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index fbdf3ac2f001..e0355e0f8712 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -2350,7 +2350,7 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return 0;
 }
 
-void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots)
+void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
 {
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a0ea8f4bb1b..d54f6578a849 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -874,6 +874,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 		int as_id, struct kvm_memslots *slots)
 {
 	struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
+	u64 gen;
 
 	/*
 	 * Set the low bit in the generation, which disables SPTE caching
@@ -896,9 +897,11 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	 * space 0 will use generations 0, 4, 8, ... while * address space 1 will
 	 * use generations 2, 6, 10, 14, ...
 	 */
-	slots->generation += KVM_ADDRESS_SPACE_NUM * 2 - 1;
+	gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1;
 
-	kvm_arch_memslots_updated(kvm, slots);
+	kvm_arch_memslots_updated(kvm, gen);
+
+	slots->generation = gen;
 
 	return old_memslots;
 }
-- 
cgit v1.2.3


From 361209e054a2c9f34da090ee1ee4c1e8bfe76a64 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 5 Feb 2019 13:01:14 -0800
Subject: KVM: Explicitly define the "memslot update in-progress" bit

KVM uses bit 0 of the memslots generation as an "update in-progress"
flag, which is used by x86 to prevent caching MMIO access while the
memslots are changing.  Although the intended behavior is flag-like,
e.g. MMIO sptes intentionally drop the in-progress bit so as to avoid
caching data from in-flux memslots, the implementation oftentimes treats
the bit as part of the generation number itself, e.g. incrementing the
generation increments twice, once to set the flag and once to clear it.

Prior to commit 4bd518f1598d ("KVM: use separate generations for
each address space"), incorporating the "update in-progress" bit into
the generation number largely made sense, e.g. "real" generations are
even, "bogus" generations are odd, most code doesn't need to be aware of
the bit, etc...

Now that unique memslots generation numbers are assigned to each address
space, stealthing the in-progress status into the generation number
results in a wide variety of subtle code, e.g. kvm_create_vm() jumps
over bit 0 when initializing the memslots generation without any hint as
to why.

Explicitly define the flag and convert as much code as possible (which
isn't much) to actually treat it like a flag.  This paves the way for
eventually using a different bit for "update in-progress" so that it can
be a flag in truth instead of a awkward extension to the generation
number.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.h       |  2 +-
 include/linux/kvm_host.h | 21 +++++++++++++++++++++
 virt/kvm/kvm_main.c      | 26 +++++++++++++-------------
 3 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 20ede17202bf..28406aa1136d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -183,7 +183,7 @@ static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
 {
 	u64 gen = kvm_memslots(vcpu->kvm)->generation;
 
-	if (unlikely(gen & 1))
+	if (unlikely(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS))
 		return;
 
 	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index cf761ff58224..5e1cb74922b3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -48,6 +48,27 @@
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
 
+/*
+ * Bit 0 of the memslot generation number is an "update in-progress flag",
+ * e.g. is temporarily set for the duration of install_new_memslots().
+ * This flag effectively creates a unique generation number that is used to
+ * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
+ * i.e. may (or may not) have come from the previous memslots generation.
+ *
+ * This is necessary because the actual memslots update is not atomic with
+ * respect to the generation number update.  Updating the generation number
+ * first would allow a vCPU to cache a spte from the old memslots using the
+ * new generation number, and updating the generation number after switching
+ * to the new memslots would allow cache hits using the old generation number
+ * to reference the defunct memslots.
+ *
+ * This mechanism is used to prevent getting hits in KVM's caches while a
+ * memslot update is in-progress, and to prevent cache hits *after* updating
+ * the actual generation number against accesses that were inserted into the
+ * cache *before* the memslots were updated.
+ */
+#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS	BIT_ULL(0)
+
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d54f6578a849..0f1f1c7c7a36 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -874,30 +874,30 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 		int as_id, struct kvm_memslots *slots)
 {
 	struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
-	u64 gen;
+	u64 gen = old_memslots->generation;
 
-	/*
-	 * Set the low bit in the generation, which disables SPTE caching
-	 * until the end of synchronize_srcu_expedited.
-	 */
-	WARN_ON(old_memslots->generation & 1);
-	slots->generation = old_memslots->generation + 1;
+	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+	slots->generation = gen | KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
 
 	rcu_assign_pointer(kvm->memslots[as_id], slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
-	 * Increment the new memslot generation a second time. This prevents
-	 * vm exits that race with memslot updates from caching a memslot
-	 * generation that will (potentially) be valid forever.
-	 *
+	 * Increment the new memslot generation a second time, dropping the
+	 * update in-progress flag and incrementing then generation based on
+	 * the number of address spaces.  This provides a unique and easily
+	 * identifiable generation number while the memslots are in flux.
+	 */
+	gen = slots->generation & ~KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS;
+
+	/*
 	 * Generations must be unique even across address spaces.  We do not need
 	 * a global counter for that, instead the generation space is evenly split
 	 * across address spaces.  For example, with two address spaces, address
-	 * space 0 will use generations 0, 4, 8, ... while * address space 1 will
+	 * space 0 will use generations 0, 4, 8, ... while address space 1 will
 	 * use generations 2, 6, 10, 14, ...
 	 */
-	gen = slots->generation + KVM_ADDRESS_SPACE_NUM * 2 - 1;
+	gen += KVM_ADDRESS_SPACE_NUM * 2;
 
 	kvm_arch_memslots_updated(kvm, gen);
 
-- 
cgit v1.2.3


From 164bf7e56c5a73f2f819c39ba7e0f20e0f97dc7b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Tue, 5 Feb 2019 13:01:18 -0800
Subject: KVM: Move the memslot update in-progress flag to bit 63

...now that KVM won't explode by moving it out of bit 0.  Using bit 63
eliminates the need to jump over bit 0, e.g. when calculating a new
memslots generation or when propagating the memslots generation to an
MMIO spte.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/mmu.txt | 13 ++++++++-----
 arch/x86/kvm/mmu.c                | 31 ++++++++++++-------------------
 include/linux/kvm_host.h          |  4 ++--
 virt/kvm/kvm_main.c               |  8 ++++----
 4 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index e507a9e0421e..367a952f50ab 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -452,13 +452,16 @@ stored into the MMIO spte.  Thus, the MMIO spte might be created based on
 out-of-date information, but with an up-to-date generation number.
 
 To avoid this, the generation number is incremented again after synchronize_srcu
-returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a
 memslot update, while some SRCU readers might be using the old copy.  We do not
 want to use an MMIO sptes created with an odd generation number, and we can do
-this without losing a bit in the MMIO spte.  The low bit of the generation
-is not stored in MMIO spte, and presumed zero when it is extracted out of the
-spte.  If KVM is unlucky and creates an MMIO spte while the low bit is 1,
-the next access to the spte will always be a cache miss.
+this without losing a bit in the MMIO spte.  The "update in-progress" bit of the
+generation is not stored in MMIO spte, and is so is implicitly zero when the
+generation is extracted out of the spte.  If KVM is unlucky and creates an MMIO
+spte while an update is in-progress, the next access to the spte will always be
+a cache miss.  For example, a subsequent access during the update window will
+miss due to the in-progress flag diverging, while an access after the update
+window closes will have a higher generation number (as compared to the spte).
 
 
 Further reading
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 364b2a737d94..bcf62e1e1ff7 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -335,18 +335,17 @@ static inline bool is_access_track_spte(u64 spte)
  * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
  * the memslots generation and is derived as follows:
  *
- * Bits 1-9 of the memslot generation are propagated to spte bits 3-11
- * Bits 10-19 of the memslot generation are propagated to spte bits 52-61
+ * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
+ * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
  *
- * The MMIO generation starts at bit 1 of the memslots generation in order to
- * skip over bit 0, the KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag.  Including
- * the flag would require stealing a bit from the "real" generation number and
- * thus effectively halve the maximum number of MMIO generations that can be
- * handled before encountering a wrap (which requires a full MMU zap).  The
- * flag is instead explicitly queried when checking for MMIO spte cache hits.
+ * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
+ * the MMIO generation number, as doing so would require stealing a bit from
+ * the "real" generation number and thus effectively halve the maximum number
+ * of MMIO generations that can be handled before encountering a wrap (which
+ * requires a full MMU zap).  The flag is instead explicitly queried when
+ * checking for MMIO spte cache hits.
  */
-#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(19, 1)
-#define MMIO_SPTE_GEN_SHIFT		1
+#define MMIO_SPTE_GEN_MASK		GENMASK_ULL(18, 0)
 
 #define MMIO_SPTE_GEN_LOW_START		3
 #define MMIO_SPTE_GEN_LOW_END		11
@@ -363,8 +362,6 @@ static u64 generation_mmio_spte_mask(u64 gen)
 
 	WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
 
-	gen >>= MMIO_SPTE_GEN_SHIFT;
-
 	mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
 	mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
 	return mask;
@@ -378,7 +375,7 @@ static u64 get_mmio_spte_generation(u64 spte)
 
 	gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
 	gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
-	return gen << MMIO_SPTE_GEN_SHIFT;
+	return gen;
 }
 
 static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
@@ -5905,13 +5902,9 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
 
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 {
-	gen &= MMIO_SPTE_GEN_MASK;
+	WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
 
-	/*
-	 * Shift to adjust for the "update in-progress" flag, which isn't
-	 * included in the MMIO generation number.
-	 */
-	gen >>= MMIO_SPTE_GEN_SHIFT;
+	gen &= MMIO_SPTE_GEN_MASK;
 
 	/*
 	 * Generation numbers are incremented in multiples of the number of
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5e1cb74922b3..85c0c00d5159 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -49,7 +49,7 @@
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
 
 /*
- * Bit 0 of the memslot generation number is an "update in-progress flag",
+ * Bit 63 of the memslot generation number is an "update in-progress flag",
  * e.g. is temporarily set for the duration of install_new_memslots().
  * This flag effectively creates a unique generation number that is used to
  * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
@@ -67,7 +67,7 @@
  * the actual generation number against accesses that were inserted into the
  * cache *before* the memslots were updated.
  */
-#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS	BIT_ULL(0)
+#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS	BIT_ULL(63)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c2e7e173a46..c9d0bc01f8cb 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -657,7 +657,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 		if (!slots)
 			goto out_err_no_srcu;
 		/* Generations must be different for each address space. */
-		slots->generation = i * 2;
+		slots->generation = i;
 		rcu_assign_pointer(kvm->memslots[i], slots);
 	}
 
@@ -890,10 +890,10 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	 * Generations must be unique even across address spaces.  We do not need
 	 * a global counter for that, instead the generation space is evenly split
 	 * across address spaces.  For example, with two address spaces, address
-	 * space 0 will use generations 0, 4, 8, ... while address space 1 will
-	 * use generations 2, 6, 10, 14, ...
+	 * space 0 will use generations 0, 2, 4, ... while address space 1 will
+	 * use generations 1, 3, 5, ...
 	 */
-	gen += KVM_ADDRESS_SPACE_NUM * 2;
+	gen += KVM_ADDRESS_SPACE_NUM;
 
 	kvm_arch_memslots_updated(kvm, gen);
 
-- 
cgit v1.2.3


From 49113d360bdeb4dd916fb6bffbcc3e157422b6fd Mon Sep 17 00:00:00 2001
From: Nir Weiner <nir.weiner@oracle.com>
Date: Sun, 27 Jan 2019 12:17:15 +0200
Subject: KVM: Expose the initial start value in grow_halt_poll_ns() as a
 module parameter

The hard-coded value 10000 in grow_halt_poll_ns() stands for the initial
start value when raising up vcpu->halt_poll_ns.
It actually sets the first timeout to the first polling session.
This value has significant effect on how tolerant we are to outliers.
On the standard case, higher value is better - we will spend more time
in the polling busyloop, handle events/interrupts faster and result
in better performance.
But on outliers it puts us in a busy loop that does nothing.
Even if the shrink factor is zero, we will still waste time on the first
iteration.
The optimal value changes between different workloads. It depends on
outliers rate and polling sessions length.
As this value has significant effect on the dynamic halt-polling
algorithm, it should be configurable and exposed.

Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Nir Weiner <nir.weiner@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/halt-polling.txt | 37 +++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv.c               |  3 +--
 include/linux/kvm_host.h                   |  1 +
 virt/kvm/kvm_main.c                        |  8 +++++--
 4 files changed, 31 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt
index 4a8418318769..4f791b128dd2 100644
--- a/Documentation/virtual/kvm/halt-polling.txt
+++ b/Documentation/virtual/kvm/halt-polling.txt
@@ -53,7 +53,8 @@ the global max polling interval then the polling interval can be increased in
 the hope that next time during the longer polling interval the wake up source
 will be received while the host is polling and the latency benefits will be
 received. The polling interval is grown in the function grow_halt_poll_ns() and
-is multiplied by the module parameter halt_poll_ns_grow.
+is multiplied by the module parameters halt_poll_ns_grow and
+halt_poll_ns_grow_start.
 
 In the event that the total block time was greater than the global max polling
 interval then the host will never poll for long enough (limited by the global
@@ -80,22 +81,30 @@ shrunk. These variables are defined in include/linux/kvm_host.h and as module
 parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
 powerpc kvm-hv case.
 
-Module Parameter    |	     Description	      |	     Default Value
+Module Parameter	|   Description		    |	     Default Value
 --------------------------------------------------------------------------------
-halt_poll_ns	    | The global max polling interval | KVM_HALT_POLL_NS_DEFAULT
-		    | which defines the ceiling value |
-		    | of the polling interval for     | (per arch value)
-		    | each vcpu. 		      |
+halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT
+			| interval which defines    |
+			| the ceiling value of the  |
+			| polling interval for      | (per arch value)
+			| each vcpu.		    |
 --------------------------------------------------------------------------------
-halt_poll_ns_grow   | The value by which the halt     |	2
-		    | polling interval is multiplied  |
-		    | in the grow_halt_poll_ns()      |
-		    | function.			      |
+halt_poll_ns_grow	| The value by which the    | 2
+			| halt polling interval is  |
+			| multiplied in the	    |
+			| grow_halt_poll_ns()	    |
+			| function.		    |
 --------------------------------------------------------------------------------
-halt_poll_ns_shrink | The value by which the halt     |	0
-		    | polling interval is divided in  |
-		    | the shrink_halt_poll_ns()	      |
-		    | function.			      |
+halt_poll_ns_grow_start | The initial value to grow | 10000
+			| to from zero in the	    |
+			| grow_halt_poll_ns()	    |
+			| function.		    |
+--------------------------------------------------------------------------------
+halt_poll_ns_shrink	| The value by which the    | 0
+			| halt polling interval is  |
+			| divided in the	    |
+			| shrink_halt_poll_ns()	    |
+			| function.		    |
 --------------------------------------------------------------------------------
 
 These module parameters can be set from the debugfs files in:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e316a2ddb70b..29ffc99bd79b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3634,9 +3634,8 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
 	if (!halt_poll_ns_grow)
 		return;
 
-	/* 10us base */
 	if (vc->halt_poll_ns == 0)
-		vc->halt_poll_ns = 10000;
+		vc->halt_poll_ns = halt_poll_ns_grow_start;
 	else
 		vc->halt_poll_ns *= halt_poll_ns_grow;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 85c0c00d5159..9d55c63db09b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1203,6 +1203,7 @@ extern bool kvm_rebooting;
 
 extern unsigned int halt_poll_ns;
 extern unsigned int halt_poll_ns_grow;
+extern unsigned int halt_poll_ns_grow_start;
 extern unsigned int halt_poll_ns_shrink;
 
 struct kvm_device {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9c8a8bf6e686..ae818d27a1a4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -81,6 +81,11 @@ unsigned int halt_poll_ns_grow = 2;
 module_param(halt_poll_ns_grow, uint, 0644);
 EXPORT_SYMBOL_GPL(halt_poll_ns_grow);
 
+/* The start value to grow halt_poll_ns from */
+unsigned int halt_poll_ns_grow_start = 10000; /* 10us */
+module_param(halt_poll_ns_grow_start, uint, 0644);
+EXPORT_SYMBOL_GPL(halt_poll_ns_grow_start);
+
 /* Default resets per-vcpu halt_poll_ns . */
 unsigned int halt_poll_ns_shrink;
 module_param(halt_poll_ns_shrink, uint, 0644);
@@ -2191,9 +2196,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
 	if (!grow)
 		goto out;
 
-	/* 10us base */
 	if (val == 0)
-		val = 10000;
+		val = halt_poll_ns_grow_start;
 	else
 		val *= grow;
 
-- 
cgit v1.2.3


From b38f6c50270683abf35a388f82cafecce971a003 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 20 Feb 2019 11:30:49 -0500
Subject: XArray: Fix xa_release in allocating arrays

xa_cmpxchg() was a little too magic in turning ZERO entries into NULL,
and would leave the entry set to the ZERO entry instead of releasing
it for future use.  After careful review of existing users of
xa_cmpxchg(), change the semantics so that it does not translate either
incoming argument from NULL into ZERO entries.

Add several tests to the test-suite to make sure this problem doesn't
come back.

Reported-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 36 +++++++++++++++++++++++-------------
 lib/test_xarray.c      | 28 ++++++++++++++++++++++++----
 lib/xarray.c           |  6 +-----
 3 files changed, 48 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 687c150071a5..588733abd19d 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -131,6 +131,12 @@ static inline unsigned int xa_pointer_tag(void *entry)
  * xa_mk_internal() - Create an internal entry.
  * @v: Value to turn into an internal entry.
  *
+ * Internal entries are used for a number of purposes.  Entries 0-255 are
+ * used for sibling entries (only 0-62 are used by the current code).  256
+ * is used for the retry entry.  257 is used for the reserved / zero entry.
+ * Negative internal entries are used to represent errnos.  Node pointers
+ * are also tagged as internal entries in some situations.
+ *
  * Context: Any context.
  * Return: An XArray internal entry corresponding to this value.
  */
@@ -163,6 +169,22 @@ static inline bool xa_is_internal(const void *entry)
 	return ((unsigned long)entry & 3) == 2;
 }
 
+#define XA_ZERO_ENTRY		xa_mk_internal(257)
+
+/**
+ * xa_is_zero() - Is the entry a zero entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * The normal API will return NULL as the contents of a slot containing
+ * a zero entry.  You can only see zero entries by using the advanced API.
+ *
+ * Return: %true if the entry is a zero entry.
+ */
+static inline bool xa_is_zero(const void *entry)
+{
+	return unlikely(entry == XA_ZERO_ENTRY);
+}
+
 /**
  * xa_is_err() - Report whether an XArray operation returned an error
  * @entry: Result from calling an XArray function
@@ -1050,7 +1072,7 @@ int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
  */
 static inline void xa_release(struct xarray *xa, unsigned long index)
 {
-	xa_cmpxchg(xa, index, NULL, NULL, 0);
+	xa_cmpxchg(xa, index, XA_ZERO_ENTRY, NULL, 0);
 }
 
 /* Everything below here is the Advanced API.  Proceed with caution. */
@@ -1210,18 +1232,6 @@ static inline bool xa_is_sibling(const void *entry)
 }
 
 #define XA_RETRY_ENTRY		xa_mk_internal(256)
-#define XA_ZERO_ENTRY		xa_mk_internal(257)
-
-/**
- * xa_is_zero() - Is the entry a zero entry?
- * @entry: Entry retrieved from the XArray
- *
- * Return: %true if the entry is a zero entry.
- */
-static inline bool xa_is_zero(const void *entry)
-{
-	return unlikely(entry == XA_ZERO_ENTRY);
-}
 
 /**
  * xa_is_retry() - Is the entry a retry entry?
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 3eaa40ddc390..52f8ecff8c0c 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -361,6 +361,7 @@ static noinline void check_reserve(struct xarray *xa)
 {
 	void *entry;
 	unsigned long index;
+	int count;
 
 	/* An array with a reserved entry is not empty */
 	XA_BUG_ON(xa, !xa_empty(xa));
@@ -377,15 +378,15 @@ static noinline void check_reserve(struct xarray *xa)
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* cmpxchg sees a reserved entry as NULL */
+	/* cmpxchg sees a reserved entry as ZERO */
 	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
-	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
-				GFP_NOWAIT) != NULL);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, XA_ZERO_ENTRY,
+				xa_mk_value(12345678), GFP_NOWAIT) != NULL);
 	xa_release(xa, 12345678);
 	xa_erase_index(xa, 12345678);
 	XA_BUG_ON(xa, !xa_empty(xa));
 
-	/* But xa_insert does not */
+	/* xa_insert treats it as busy */
 	XA_BUG_ON(xa, xa_reserve(xa, 12345678, GFP_KERNEL) != 0);
 	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa_mk_value(12345678), 0) !=
 			-EBUSY);
@@ -398,9 +399,27 @@ static noinline void check_reserve(struct xarray *xa)
 	XA_BUG_ON(xa, xa_reserve(xa, 6, GFP_KERNEL) != 0);
 	xa_store_index(xa, 7, GFP_KERNEL);
 
+	count = 0;
 	xa_for_each(xa, index, entry) {
 		XA_BUG_ON(xa, index != 5 && index != 7);
+		count++;
+	}
+	XA_BUG_ON(xa, count != 2);
+
+	/* If we free a reserved entry, we should be able to allocate it */
+	if (xa->xa_flags & XA_FLAGS_ALLOC) {
+		u32 id;
+
+		XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(8),
+					XA_LIMIT(5, 10), GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != 8);
+
+		xa_release(xa, 6);
+		XA_BUG_ON(xa, xa_alloc(xa, &id, xa_mk_value(6),
+					XA_LIMIT(5, 10), GFP_KERNEL) != 0);
+		XA_BUG_ON(xa, id != 6);
 	}
+
 	xa_destroy(xa);
 }
 
@@ -1486,6 +1505,7 @@ static int xarray_checks(void)
 	check_xas_erase(&array);
 	check_cmpxchg(&array);
 	check_reserve(&array);
+	check_reserve(&xa0);
 	check_multi_store(&array);
 	check_xa_alloc();
 	check_find(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index 89e37ac50850..b9a6cf42feee 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1429,16 +1429,12 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 
 	if (WARN_ON_ONCE(xa_is_advanced(entry)))
 		return XA_ERROR(-EINVAL);
-	if (xa_track_free(xa) && !entry)
-		entry = XA_ZERO_ENTRY;
 
 	do {
 		curr = xas_load(&xas);
-		if (curr == XA_ZERO_ENTRY)
-			curr = NULL;
 		if (curr == old) {
 			xas_store(&xas, entry);
-			if (xa_track_free(xa))
+			if (xa_track_free(xa) && entry && !curr)
 				xas_clear_mark(&xas, XA_FREE_MARK);
 		}
 	} while (__xas_nomem(&xas, gfp));
-- 
cgit v1.2.3


From 962033d55d0761e0716a01a715c6659c8c8dfc41 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 20 Feb 2019 11:51:22 -0500
Subject: XArray: Use xa_cmpxchg to implement xa_reserve

Jason feels this is clearer, and it saves a function and an exported
symbol.

Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  1 -
 include/linux/xarray.h            | 25 +++----------------------
 lib/xarray.c                      | 36 ------------------------------------
 3 files changed, 3 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index c7436da5c4ad..ef6f9f98f595 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -215,7 +215,6 @@ Assumes xa_lock held on entry:
  * :c:func:`__xa_erase`
  * :c:func:`__xa_cmpxchg`
  * :c:func:`__xa_alloc`
- * :c:func:`__xa_reserve`
  * :c:func:`__xa_set_mark`
  * :c:func:`__xa_clear_mark`
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 588733abd19d..0e01e6129145 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -525,7 +525,6 @@ int __must_check __xa_alloc(struct xarray *, u32 *id, void *entry,
 		struct xa_limit, gfp_t);
 int __must_check __xa_alloc_cyclic(struct xarray *, u32 *id, void *entry,
 		struct xa_limit, u32 *next, gfp_t);
-int __must_check __xa_reserve(struct xarray *, unsigned long index, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -1004,13 +1003,7 @@ static inline int xa_alloc_cyclic_irq(struct xarray *xa, u32 *id, void *entry,
 static inline __must_check
 int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	int ret;
-
-	xa_lock(xa);
-	ret = __xa_reserve(xa, index, gfp);
-	xa_unlock(xa);
-
-	return ret;
+	return xa_err(xa_cmpxchg(xa, index, NULL, XA_ZERO_ENTRY, gfp));
 }
 
 /**
@@ -1028,13 +1021,7 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 static inline __must_check
 int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	int ret;
-
-	xa_lock_bh(xa);
-	ret = __xa_reserve(xa, index, gfp);
-	xa_unlock_bh(xa);
-
-	return ret;
+	return xa_err(xa_cmpxchg_bh(xa, index, NULL, XA_ZERO_ENTRY, gfp));
 }
 
 /**
@@ -1052,13 +1039,7 @@ int xa_reserve_bh(struct xarray *xa, unsigned long index, gfp_t gfp)
 static inline __must_check
 int xa_reserve_irq(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	int ret;
-
-	xa_lock_irq(xa);
-	ret = __xa_reserve(xa, index, gfp);
-	xa_unlock_irq(xa);
-
-	return ret;
+	return xa_err(xa_cmpxchg_irq(xa, index, NULL, XA_ZERO_ENTRY, gfp));
 }
 
 /**
diff --git a/lib/xarray.c b/lib/xarray.c
index b9a6cf42feee..3f10198f00b7 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1484,42 +1484,6 @@ int __xa_insert(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(__xa_insert);
 
-/**
- * __xa_reserve() - Reserve this index in the XArray.
- * @xa: XArray.
- * @index: Index into array.
- * @gfp: Memory allocation flags.
- *
- * Ensures there is somewhere to store an entry at @index in the array.
- * If there is already something stored at @index, this function does
- * nothing.  If there was nothing there, the entry is marked as reserved.
- * Loading from a reserved entry returns a %NULL pointer.
- *
- * If you do not use the entry that you have reserved, call xa_release()
- * or xa_erase() to free any unnecessary memory.
- *
- * Context: Any context.  Expects the xa_lock to be held on entry.  May
- * release the lock, sleep and reacquire the lock if the @gfp flags permit.
- * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
- */
-int __xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
-{
-	XA_STATE(xas, xa, index);
-	void *curr;
-
-	do {
-		curr = xas_load(&xas);
-		if (!curr) {
-			xas_store(&xas, XA_ZERO_ENTRY);
-			if (xa_track_free(xa))
-				xas_clear_mark(&xas, XA_FREE_MARK);
-		}
-	} while (__xas_nomem(&xas, gfp));
-
-	return xas_error(&xas);
-}
-EXPORT_SYMBOL(__xa_reserve);
-
 #ifdef CONFIG_XARRAY_MULTI
 static void xas_set_range(struct xa_state *xas, unsigned long first,
 		unsigned long last)
-- 
cgit v1.2.3


From 61697a6abd24acba941359c6268a94f4afe4a53d Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 18 Jan 2019 14:19:26 -0500
Subject: dm: eliminate 'split_discard_bios' flag from DM target interface

There is no need to have DM core split discards on behalf of a DM target
now that blk_queue_split() handles splitting discards based on the
queue_limits.  A DM target just needs to set max_discard_sectors,
discard_granularity, etc, in queue_limits.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-cache-target.c  |  1 -
 drivers/md/dm-raid.c          | 14 +++++++++-----
 drivers/md/dm-thin.c          |  1 -
 drivers/md/dm-zoned-target.c  |  1 -
 drivers/md/dm.c               | 25 ++++++-------------------
 include/linux/device-mapper.h |  6 ------
 include/uapi/linux/dm-ioctl.h |  4 ++--
 7 files changed, 17 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b29a8327eed1..adc529f12b6b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2496,7 +2496,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
 
 	ti->num_discard_bios = 1;
 	ti->discards_supported = true;
-	ti->split_discard_bios = false;
 
 	ti->per_io_data_size = sizeof(struct per_bio_data);
 
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index adcfe8ae10aa..9fdef6897316 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs)
 		}
 	}
 
-	/*
-	 * RAID1 and RAID10 personalities require bio splitting,
-	 * RAID0/4/5/6 don't and process large discard bios properly.
-	 */
-	ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
 	ti->num_discard_bios = 1;
 }
 
@@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 	blk_limits_io_min(limits, chunk_size);
 	blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+
+	/*
+	 * RAID1 and RAID10 personalities require bio splitting,
+	 * RAID0/4/5/6 don't and process large discard bios properly.
+	 */
+	if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+		limits->discard_granularity = chunk_size;
+		limits->max_discard_sectors = chunk_size;
+	}
 }
 
 static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e83b63608262..0d9ded0f5e50 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4227,7 +4227,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (tc->pool->pf.discard_enabled) {
 		ti->discards_supported = true;
 		ti->num_discard_bios = 1;
-		ti->split_discard_bios = false;
 	}
 
 	mutex_unlock(&dm_thin_pool_table.mutex);
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6af5babe6837..8865c1709e16 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->per_io_data_size = sizeof(struct dmz_bioctx);
 	ti->flush_supported = true;
 	ti->discards_supported = true;
-	ti->split_discard_bios = true;
 
 	/* The exposed capacity is the number of chunks that can be mapped */
 	ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7a774fcd0194..55f12df3589d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1478,17 +1478,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
 	return ti->num_write_zeroes_bios;
 }
 
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
-	return ti->split_discard_bios;
-}
-
 static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
-				       unsigned num_bios, bool is_split_required)
+				       unsigned num_bios)
 {
-	unsigned len;
+	unsigned len = ci->sector_count;
 
 	/*
 	 * Even though the device advertised support for this type of
@@ -1499,11 +1492,6 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
 	if (!num_bios)
 		return -EOPNOTSUPP;
 
-	if (!is_split_required)
-		len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
-	else
-		len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
 	__send_duplicate_bios(ci, ti, num_bios, &len);
 
 	ci->sector += len;
@@ -1514,23 +1502,22 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
 
 static int __send_discard(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
-					   is_split_required_for_discard(ti));
+	return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti));
 }
 
 static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti));
 }
 
 static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti));
 }
 
 static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
 {
-	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
+	return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti));
 }
 
 static bool is_abnormal_io(struct bio *bio)
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e528baebad69..0f5b3d7c6cb3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -315,12 +315,6 @@ struct dm_target {
 	 * whether or not its underlying devices have support.
 	 */
 	bool discards_supported:1;
-
-	/*
-	 * Set if the target required discard bios to be split
-	 * on max_io_len boundary.
-	 */
-	bool split_discard_bios:1;
 };
 
 /* Each target can link one of these into the table */
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index d1e49514977b..f396a82dfd3e 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -270,9 +270,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	39
+#define DM_VERSION_MINOR	40
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2018-04-03)"
+#define DM_VERSION_EXTRA	"-ioctl (2019-01-18)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 086d08725d34c6b3333db710344ae9c4fdafb2d5 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Thu, 10 Jan 2019 14:50:49 +0100
Subject: remoteproc: create vdev subdevice with specific dma memory pool

This patch creates a dedicated vdev subdevice for each vdev declared
in firmware resource table and associates carveout named "vdev%dbuffer"
(with %d vdev index in resource table) if any as dma coherent memory pool.

Then vdev subdevice is used as parent for virtio device.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c     | 47 ++++++++++++++++++++++++++++++--
 drivers/remoteproc/remoteproc_internal.h |  1 +
 drivers/remoteproc/remoteproc_virtio.c   | 42 +++++++++++++++++++++++++++-
 include/linux/remoteproc.h               |  1 +
 4 files changed, 87 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 54ec38fc5dca..821dbedef18e 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -39,9 +39,11 @@
 #include <linux/idr.h>
 #include <linux/elf.h>
 #include <linux/crc32.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_ring.h>
 #include <asm/byteorder.h>
+#include <linux/platform_device.h>
 
 #include "remoteproc_internal.h"
 
@@ -145,7 +147,7 @@ static void rproc_disable_iommu(struct rproc *rproc)
 	iommu_domain_free(domain);
 }
 
-static phys_addr_t rproc_va_to_pa(void *cpu_addr)
+phys_addr_t rproc_va_to_pa(void *cpu_addr)
 {
 	/*
 	 * Return physical address according to virtual address location
@@ -160,6 +162,7 @@ static phys_addr_t rproc_va_to_pa(void *cpu_addr)
 	WARN_ON(!virt_addr_valid(cpu_addr));
 	return virt_to_phys(cpu_addr);
 }
+EXPORT_SYMBOL(rproc_va_to_pa);
 
 /**
  * rproc_da_to_va() - lookup the kernel virtual address for a remoteproc address
@@ -422,6 +425,20 @@ static void rproc_vdev_do_stop(struct rproc_subdev *subdev, bool crashed)
 	rproc_remove_virtio_dev(rvdev);
 }
 
+/**
+ * rproc_rvdev_release() - release the existence of a rvdev
+ *
+ * @dev: the subdevice's dev
+ */
+static void rproc_rvdev_release(struct device *dev)
+{
+	struct rproc_vdev *rvdev = container_of(dev, struct rproc_vdev, dev);
+
+	of_reserved_mem_device_release(dev);
+
+	kfree(rvdev);
+}
+
 /**
  * rproc_handle_vdev() - handle a vdev fw resource
  * @rproc: the remote processor
@@ -455,6 +472,7 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 	struct device *dev = &rproc->dev;
 	struct rproc_vdev *rvdev;
 	int i, ret;
+	char name[16];
 
 	/* make sure resource isn't truncated */
 	if (sizeof(*rsc) + rsc->num_of_vrings * sizeof(struct fw_rsc_vdev_vring)
@@ -488,6 +506,29 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 	rvdev->rproc = rproc;
 	rvdev->index = rproc->nb_vdev++;
 
+	/* Initialise vdev subdevice */
+	snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index);
+	rvdev->dev.parent = rproc->dev.parent;
+	rvdev->dev.release = rproc_rvdev_release;
+	dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name);
+	dev_set_drvdata(&rvdev->dev, rvdev);
+
+	ret = device_register(&rvdev->dev);
+	if (ret) {
+		put_device(&rvdev->dev);
+		return ret;
+	}
+	/* Make device dma capable by inheriting from parent's capabilities */
+	set_dma_ops(&rvdev->dev, get_dma_ops(rproc->dev.parent));
+
+	ret = dma_coerce_mask_and_coherent(&rvdev->dev,
+					   dma_get_mask(rproc->dev.parent));
+	if (ret) {
+		dev_warn(dev,
+			 "Failed to set DMA mask %llx. Trying to continue... %x\n",
+			 dma_get_mask(rproc->dev.parent), ret);
+	}
+
 	/* parse the vrings */
 	for (i = 0; i < rsc->num_of_vrings; i++) {
 		ret = rproc_parse_vring(rvdev, rsc, i);
@@ -518,7 +559,7 @@ unwind_vring_allocations:
 	for (i--; i >= 0; i--)
 		rproc_free_vring(&rvdev->vring[i]);
 free_rvdev:
-	kfree(rvdev);
+	device_unregister(&rvdev->dev);
 	return ret;
 }
 
@@ -536,7 +577,7 @@ void rproc_vdev_release(struct kref *ref)
 
 	rproc_remove_subdev(rproc, &rvdev->subdev);
 	list_del(&rvdev->node);
-	kfree(rvdev);
+	device_unregister(&rvdev->dev);
 }
 
 /**
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index f6cad243d7ca..bfeacfd40947 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -52,6 +52,7 @@ void rproc_free_vring(struct rproc_vring *rvring);
 int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
 
 void *rproc_da_to_va(struct rproc *rproc, u64 da, int len);
+phys_addr_t rproc_va_to_pa(void *cpu_addr);
 int rproc_trigger_recovery(struct rproc *rproc);
 
 int rproc_elf_sanity_check(struct rproc *rproc, const struct firmware *fw);
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 183fc42a510a..d08b2cfd875b 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -17,7 +17,9 @@
  * GNU General Public License for more details.
  */
 
+#include <linux/dma-mapping.h>
 #include <linux/export.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/remoteproc.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
@@ -328,10 +330,48 @@ static void rproc_virtio_dev_release(struct device *dev)
 int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 {
 	struct rproc *rproc = rvdev->rproc;
-	struct device *dev = &rproc->dev;
+	struct device *dev = &rvdev->dev;
 	struct virtio_device *vdev = &rvdev->vdev;
+	struct rproc_mem_entry *mem;
 	int ret;
 
+	/* Try to find dedicated vdev buffer carveout */
+	mem = rproc_find_carveout_by_name(rproc, "vdev%dbuffer", rvdev->index);
+	if (mem) {
+		phys_addr_t pa;
+
+		if (mem->of_resm_idx != -1) {
+			struct device_node *np = rproc->dev.parent->of_node;
+
+			/* Associate reserved memory to vdev device */
+			ret = of_reserved_mem_device_init_by_idx(dev, np,
+								 mem->of_resm_idx);
+			if (ret) {
+				dev_err(dev, "Can't associate reserved memory\n");
+				goto out;
+			}
+		} else {
+			if (mem->va) {
+				dev_warn(dev, "vdev %d buffer already mapped\n",
+					 rvdev->index);
+				pa = rproc_va_to_pa(mem->va);
+			} else {
+				/* Use dma address as carveout no memmapped yet */
+				pa = (phys_addr_t)mem->dma;
+			}
+
+			/* Associate vdev buffer memory pool to vdev subdev */
+			ret = dma_declare_coherent_memory(dev, pa,
+							   mem->da,
+							   mem->len,
+							   DMA_MEMORY_EXCLUSIVE);
+			if (ret < 0) {
+				dev_err(dev, "Failed to associate buffer\n");
+				goto out;
+			}
+		}
+	}
+
 	vdev->id.device	= id,
 	vdev->config = &rproc_virtio_config_ops,
 	vdev->dev.parent = dev;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 68e72f33c705..82cb77ad37c7 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -554,6 +554,7 @@ struct rproc_vdev {
 	struct kref refcount;
 
 	struct rproc_subdev subdev;
+	struct device dev;
 
 	unsigned int id;
 	struct list_head node;
-- 
cgit v1.2.3


From d4c036fec321341f378ca95d3e99976e835a7404 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Mon, 21 Jan 2019 14:55:15 +0100
Subject: remoteproc: fix recovery procedure

Commit 7e83cab824a87e83cab824a8 ("remoteproc: Modify recovery path
to use rproc_{start,stop}()") replaces rproc_{shutdown,boot}() with
rproc_{stop,start}(), which skips destroy the virtio device at stop
but re-initializes it again at start.

Issue is that struct virtio_dev is not correctly reinitialized like done
at initial allocation thanks to kzalloc() and kobject is considered as
already initialized by kernel. That is due to the fact struct virtio_dev
is allocated and released at vdev resource handling level managed and
virtio device is registered and unregistered at rproc subdevices level.

Moreover kernel documentation mentions that device struct must be
zero initialized before calling device_initialize().

This patch disentangles struct virtio_dev from struct rproc_vdev as
the two struct don't have the same life-cycle.

struct virtio_dev is now allocated on rproc_start() and released
on rproc_stop().

This patch applies on top of patch
remoteproc: create vdev subdevice with specific dma memory pool [1]

[1]: https://patchwork.kernel.org/patch/10755781/

Fixes: 7e83cab824a8 ("remoteproc: Modify recovery path to use rproc_{start,stop}()")

Reported-by: Xiang Xiao <xiaoxiang781216@gmail.com>
Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c     |  5 ++++-
 drivers/remoteproc/remoteproc_internal.h |  2 +-
 drivers/remoteproc/remoteproc_virtio.c   | 20 ++++++++++++++++----
 include/linux/remoteproc.h               |  3 +--
 4 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 821dbedef18e..454a601d63c9 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -421,8 +421,11 @@ static int rproc_vdev_do_start(struct rproc_subdev *subdev)
 static void rproc_vdev_do_stop(struct rproc_subdev *subdev, bool crashed)
 {
 	struct rproc_vdev *rvdev = container_of(subdev, struct rproc_vdev, subdev);
+	int ret;
 
-	rproc_remove_virtio_dev(rvdev);
+	ret = device_for_each_child(&rvdev->dev, NULL, rproc_remove_virtio_dev);
+	if (ret)
+		dev_warn(&rvdev->dev, "can't remove vdev child device: %d\n", ret);
 }
 
 /**
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index bfeacfd40947..2698775c5005 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -32,7 +32,7 @@ void rproc_vdev_release(struct kref *ref);
 
 /* from remoteproc_virtio.c */
 int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id);
-void rproc_remove_virtio_dev(struct rproc_vdev *rvdev);
+int rproc_remove_virtio_dev(struct device *dev, void *data);
 
 /* from remoteproc_debugfs.c */
 void rproc_remove_trace_file(struct dentry *tfile);
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index d08b2cfd875b..b7a987d1b962 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -313,6 +313,8 @@ static void rproc_virtio_dev_release(struct device *dev)
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
 
+	kfree(vdev);
+
 	kref_put(&rvdev->refcount, rproc_vdev_release);
 
 	put_device(&rproc->dev);
@@ -331,7 +333,7 @@ int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 {
 	struct rproc *rproc = rvdev->rproc;
 	struct device *dev = &rvdev->dev;
-	struct virtio_device *vdev = &rvdev->vdev;
+	struct virtio_device *vdev;
 	struct rproc_mem_entry *mem;
 	int ret;
 
@@ -372,6 +374,12 @@ int rproc_add_virtio_dev(struct rproc_vdev *rvdev, int id)
 		}
 	}
 
+	/* Allocate virtio device */
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	vdev->id.device	= id,
 	vdev->config = &rproc_virtio_config_ops,
 	vdev->dev.parent = dev;
@@ -405,11 +413,15 @@ out:
 
 /**
  * rproc_remove_virtio_dev() - remove an rproc-induced virtio device
- * @rvdev: the remote vdev
+ * @dev: the virtio device
+ * @data: must be null
  *
  * This function unregisters an existing virtio device.
  */
-void rproc_remove_virtio_dev(struct rproc_vdev *rvdev)
+int rproc_remove_virtio_dev(struct device *dev, void *data)
 {
-	unregister_virtio_device(&rvdev->vdev);
+	struct virtio_device *vdev = dev_to_virtio(dev);
+
+	unregister_virtio_device(vdev);
+	return 0;
 }
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 82cb77ad37c7..04d04709f2bd 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -559,7 +559,6 @@ struct rproc_vdev {
 	unsigned int id;
 	struct list_head node;
 	struct rproc *rproc;
-	struct virtio_device vdev;
 	struct rproc_vring vring[RVDEV_NUM_VRINGS];
 	u32 rsc_offset;
 	u32 index;
@@ -602,7 +601,7 @@ int rproc_coredump_add_custom_segment(struct rproc *rproc,
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
-	return container_of(vdev, struct rproc_vdev, vdev);
+	return container_of(vdev->dev.parent, struct rproc_vdev, dev);
 }
 
 static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
-- 
cgit v1.2.3


From 225c0eda36bdb5327dc0125f4cf222c4cfd802aa Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Fri, 8 Feb 2019 18:34:31 +0000
Subject: mtd: spi-nor: Fix wrong abbreviation HWCPAS

Change SNOR_HWCPAS_READ_OCTAL to SNOR_HWCAPS_READ_OCTAL.

Signed-off-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 include/linux/mtd/spi-nor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 2353af8bac99..b3d360b0ee3d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -487,7 +487,7 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_READ_4_4_4		BIT(9)
 #define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
 
-#define SNOR_HWCPAS_READ_OCTAL		GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_OCTAL		GENMASK(14, 11)
 #define SNOR_HWCAPS_READ_1_1_8		BIT(11)
 #define SNOR_HWCAPS_READ_1_8_8		BIT(12)
 #define SNOR_HWCAPS_READ_8_8_8		BIT(13)
-- 
cgit v1.2.3


From 9f199dd34ce06f603df365ab18bd84eefc5f7c2b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 20 Feb 2019 08:59:23 +0000
Subject: irqdomain: Allow the default irq domain to be retrieved

The default irq domain allows legacy code to create irqdomain
mappings without having to track the domain it is allocating
from. Setting the default domain is a one shot, fire and forget
operation, and no effort was made to be able to retrieve this
information at a later point in time.

Newer irqdomain APIs (the hierarchical stuff) relies on both
the irqchip code to track the irqdomain it is allocating from,
as well as some form of firmware abstraction to easily identify
which piece of HW maps to which irq domain (DT, ACPI).

For systems without such firmware (or legacy platform that are
getting dragged into the 21st century), things are a bit harder.
For these cases (and these cases only!), let's provide a way
to retrieve the default domain, allowing the use of the v2 API
without having to resort to platform-specific hacks.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqdomain.h |  1 +
 kernel/irq/irqdomain.c    | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 35965f41d7be..d2130dc7c0e6 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -265,6 +265,7 @@ extern struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec,
 						   enum irq_domain_bus_token bus_token);
 extern bool irq_domain_check_msi_remap(void);
 extern void irq_set_default_host(struct irq_domain *host);
+extern struct irq_domain *irq_get_default_host(void);
 extern int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
 				  irq_hw_number_t hwirq, int node,
 				  const struct irq_affinity_desc *affinity);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8b0be4bd6565..80818764643d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -458,6 +458,20 @@ void irq_set_default_host(struct irq_domain *domain)
 }
 EXPORT_SYMBOL_GPL(irq_set_default_host);
 
+/**
+ * irq_get_default_host() - Retrieve the "default" irq domain
+ *
+ * Returns: the default domain, if any.
+ *
+ * Modern code should never use this. This should only be used on
+ * systems that cannot implement a firmware->fwnode mapping (which
+ * both DT and ACPI provide).
+ */
+struct irq_domain *irq_get_default_host(void)
+{
+	return irq_default_domain;
+}
+
 static void irq_domain_clear_mapping(struct irq_domain *domain,
 				     irq_hw_number_t hwirq)
 {
-- 
cgit v1.2.3


From 7945f929f1a77a1c8887a97ca07f87626858ff42 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 20 Feb 2019 11:12:39 +0000
Subject: drivers: provide devm_platform_ioremap_resource()

There are currently 1200+ instances of using platform_get_resource()
and devm_ioremap_resource() together in the kernel tree.

This patch wraps these two calls in a single helper. Thanks to that
we don't have to declare a local variable for struct resource * and can
omit the redundant argument for resource type. We also have one
function call less.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/base/platform.c         | 18 ++++++++++++++++++
 include/linux/platform_device.h |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 1c958eb33ef4..f82691e1c26c 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -79,6 +79,24 @@ struct resource *platform_get_resource(struct platform_device *dev,
 }
 EXPORT_SYMBOL_GPL(platform_get_resource);
 
+/**
+ * devm_platform_ioremap_resource - call devm_ioremap_resource() for a platform
+ *				    device
+ *
+ * @pdev: platform device to use both for memory resource lookup as well as
+ *        resource managemend
+ * @index: resource index
+ */
+void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev,
+					     unsigned int index)
+{
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	return devm_ioremap_resource(&pdev->dev, res);
+}
+EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource);
+
 /**
  * platform_get_irq - get an IRQ for a device
  * @dev: platform device
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index c7c081dc6034..b126b73ed8ef 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -52,6 +52,9 @@ extern struct device platform_bus;
 extern void arch_setup_pdev_archdata(struct platform_device *);
 extern struct resource *platform_get_resource(struct platform_device *,
 					      unsigned int, unsigned int);
+extern void __iomem *
+devm_platform_ioremap_resource(struct platform_device *pdev,
+			       unsigned int index);
 extern int platform_get_irq(struct platform_device *, unsigned int);
 extern int platform_irq_count(struct platform_device *);
 extern struct resource *platform_get_resource_byname(struct platform_device *,
-- 
cgit v1.2.3


From a7013ba5a9302cbded1c45ab48003c6346584a4d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 21 Feb 2019 12:28:05 +0100
Subject: driver core: Add missing description of new struct device_link field

Commit 36003d4cf57c ("driver core: Fix PM-runtime for links added
during consumer probe") forgot to add a kerneldoc decription for the
new struct device_link member added by it, so do that now.

Fixes: 36003d4cf57c ("driver core: Fix PM-runtime for links added during consumer probe")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index a7967a48cdc9..163b5898ac78 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -848,6 +848,7 @@ enum device_link_state {
  * @rpm_active: Whether or not the consumer device is runtime-PM-active.
  * @kref: Count repeated addition of the same link.
  * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks.
+ * @supplier_preactivated: Supplier has been made active before consumer probe.
  */
 struct device_link {
 	struct device *supplier;
-- 
cgit v1.2.3


From f3d74b2245a0e8b994f53df1d7982d367fc63dfe Mon Sep 17 00:00:00 2001
From: Hans Wippel <hwippel@linux.ibm.com>
Date: Thu, 21 Feb 2019 13:01:01 +0100
Subject: net/smc: add smcd support to the pnet table

Currently, users can only set pnetids for netdevs and ib devices in the
pnet table. This patch adds support for smcd devices to the pnet table.

Signed-off-by: Hans Wippel <hwippel@linux.ibm.com>
Signed-off-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/smc.h  |  1 +
 net/smc/smc_pnet.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 81 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/smc.h b/include/net/smc.h
index 9ef49f8b1002..bd9c0fb3b577 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -74,6 +74,7 @@ struct smcd_dev {
 	struct list_head vlan;
 	struct workqueue_struct *event_wq;
 	u8 pnetid[SMC_MAX_PNETID_LEN];
+	bool pnetid_by_user;
 };
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 327bf2afe820..5497a8b44287 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -66,6 +66,7 @@ static struct smc_pnettable {
  * @ndev: pointer to network device.
  * @smcibdev: Pointer to IB device.
  * @ib_port: Port of IB device.
+ * @smcd_dev: Pointer to smcd device.
  */
 struct smc_user_pnetentry {
 	struct list_head list;
@@ -73,6 +74,7 @@ struct smc_user_pnetentry {
 	struct net_device *ndev;
 	struct smc_ib_device *smcibdev;
 	u8 ib_port;
+	struct smcd_dev *smcd_dev;
 };
 
 /* pnet entry stored in pnet table */
@@ -103,6 +105,7 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name)
 {
 	struct smc_pnetentry *pnetelem, *tmp_pe;
 	struct smc_ib_device *ibdev;
+	struct smcd_dev *smcd_dev;
 	int rc = -ENOENT;
 	int ibport;
 
@@ -135,6 +138,18 @@ static int smc_pnet_remove_by_pnetid(char *pnet_name)
 		}
 	}
 	spin_unlock(&smc_ib_devices.lock);
+	/* remove smcd devices */
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (smcd_dev->pnetid_by_user &&
+		    (!pnet_name ||
+		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
+			smcd_dev->pnetid_by_user = false;
+			rc = 0;
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
 	return rc;
 }
 
@@ -168,6 +183,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem)
 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 	struct smc_pnetentry *tmp_pnetelem;
 	struct smc_pnetentry *pnetelem;
+	bool new_smcddev = false;
 	struct net_device *ndev;
 	bool new_netdev = true;
 	bool new_ibdev = false;
@@ -185,9 +201,21 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem)
 		}
 		spin_unlock(&smc_ib_devices.lock);
 	}
+	if (new_pnetelem->smcd_dev) {
+		struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+
+		spin_lock(&smcd_dev_list.lock);
+		if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+			memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
+			       SMC_MAX_PNETID_LEN);
+			smcd_dev->pnetid_by_user = true;
+			new_smcddev = true;
+		}
+		spin_unlock(&smcd_dev_list.lock);
+	}
 
 	if (!new_pnetelem->ndev)
-		return new_ibdev ? 0 : -EEXIST;
+		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 
 	/* check if (base) netdev already has a pnetid. If there is one, we do
 	 * not want to add a pnet table entry
@@ -195,7 +223,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem)
 	ndev = pnet_find_base_ndev(new_pnetelem->ndev);
 	if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
 				    ndev_pnetid))
-		return new_ibdev ? 0 : -EEXIST;
+		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
 
 	/* add a new netdev entry to the pnet table if there isn't one */
 	tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
@@ -219,7 +247,7 @@ static int smc_pnet_enter(struct smc_user_pnetentry *new_pnetelem)
 		kfree(tmp_pnetelem);
 	}
 
-	return (new_netdev || new_ibdev) ? 0 : -EEXIST;
+	return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
 }
 
 /* The limit for pnetid is 16 characters.
@@ -267,6 +295,23 @@ out:
 	return ibdev;
 }
 
+/* Find an smcd device by a given name. The device might not exist. */
+static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
+{
+	struct smcd_dev *smcd_dev;
+
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
+			     IB_DEVICE_NAME_MAX - 1))
+			goto out;
+	}
+	smcd_dev = NULL;
+out:
+	spin_unlock(&smcd_dev_list.lock);
+	return smcd_dev;
+}
+
 /* Parse the supplied netlink attributes and fill a pnetentry structure.
  * For ethernet and infiniband device names verify that the devices exist.
  */
@@ -300,7 +345,8 @@ static int smc_pnet_fill_entry(struct net *net,
 		ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
 		ibname = strim(ibname);
 		pnetelem->smcibdev = smc_pnet_find_ib(ibname);
-		if (!pnetelem->smcibdev)
+		pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
+		if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
 			goto error;
 		if (pnetelem->smcibdev) {
 			if (!tb[SMC_PNETID_IBPORT])
@@ -339,6 +385,11 @@ static int smc_pnet_set_nla(struct sk_buff *msg,
 				   pnetelem->smcibdev->ibdev->name) ||
 		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
 			return -1;
+	} else if (pnetelem->smcd_dev) {
+		if (nla_put_string(msg, SMC_PNETID_IBNAME,
+				   dev_name(&pnetelem->smcd_dev->dev)) ||
+		    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
+			return -1;
 	} else {
 		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
 		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
@@ -400,6 +451,7 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid,
 	struct smc_user_pnetentry tmp_entry;
 	struct smc_pnetentry *pnetelem;
 	struct smc_ib_device *ibdev;
+	struct smcd_dev *smcd_dev;
 	int idx = 0;
 	int ibport;
 
@@ -450,6 +502,27 @@ static int _smc_pnet_dump(struct sk_buff *skb, u32 portid, u32 seq, u8 *pnetid,
 	}
 	spin_unlock(&smc_ib_devices.lock);
 
+	/* dump smcd devices */
+	spin_lock(&smcd_dev_list.lock);
+	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
+		if (smcd_dev->pnetid_by_user) {
+			if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
+				continue;
+			if (idx++ < start_idx)
+				continue;
+			memset(&tmp_entry, 0, sizeof(tmp_entry));
+			memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
+			       SMC_MAX_PNETID_LEN);
+			tmp_entry.smcd_dev = smcd_dev;
+			if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
+					      &tmp_entry)) {
+				--idx;
+				break;
+			}
+		}
+	}
+	spin_unlock(&smcd_dev_list.lock);
+
 	return idx;
 }
 
@@ -666,12 +739,13 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
 
 	ndev = pnet_find_base_ndev(ndev);
 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
-				   ndev_pnetid))
+				   ndev_pnetid) &&
+	    smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid))
 		return; /* pnetid could not be determined */
 
 	spin_lock(&smcd_dev_list.lock);
 	list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
-		if (!memcmp(ismdev->pnetid, ndev_pnetid, SMC_MAX_PNETID_LEN)) {
+		if (smc_pnet_match(ismdev->pnetid, ndev_pnetid)) {
 			*smcismdev = ismdev;
 			break;
 		}
@@ -715,7 +789,6 @@ void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev)
 	if (!dst->dev)
 		goto out_rel;
 
-	/* if possible, lookup via hardware-defined pnetid */
 	smc_pnet_find_ism_by_pnetid(dst->dev, smcismdev);
 
 out_rel:
-- 
cgit v1.2.3


From 54719527fd06e80fce52b98537414035cd21e8d4 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Thu, 21 Feb 2019 14:12:01 +0200
Subject: devlink: Rename devlink health attributes

Rename devlink health attributes for better reflect the attributes use.
Add COUNT prefix on error counter attribute and recovery counter
attribute.

Fixes: 7afe335a8bed ("devlink: Add health get command")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 4 ++--
 net/core/devlink.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 53de8802a000..5bb4ea67d84f 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -323,8 +323,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_HEALTH_REPORTER,			/* nested */
 	DEVLINK_ATTR_HEALTH_REPORTER_NAME,		/* string */
 	DEVLINK_ATTR_HEALTH_REPORTER_STATE,		/* u8 */
-	DEVLINK_ATTR_HEALTH_REPORTER_ERR,		/* u64 */
-	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,		/* u64 */
+	DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,	/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,		/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,	/* u64 */
 	DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,	/* u8 */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 04d98550c78c..5135997ecbe7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4650,10 +4650,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
 	if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
 		       reporter->health_state))
 		goto reporter_nest_cancel;
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR,
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
 			      reporter->error_count, DEVLINK_ATTR_PAD))
 		goto reporter_nest_cancel;
-	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER,
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
 			      reporter->recovery_count, DEVLINK_ATTR_PAD))
 		goto reporter_nest_cancel;
 	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
-- 
cgit v1.2.3


From 7b3d4f44abf0e7a1ba762c8a9c99a8b39ee0c8b1 Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Fri, 8 Feb 2019 17:37:17 -0700
Subject: platform/chrome: Add new driver for Wilco EC

This EC is an incompatible variant of the typical Chrome OS embedded
controller.  It uses the same low-level communication and a similar
protocol with some significant differences.  The EC firmware does
not support the same mailbox commands so it is not registered as a
cros_ec device type.  This commit exports the wilco_ec_mailbox()
function so that other modules can use it to communicate with the EC.

Signed-off-by: Duncan Laurie <dlaurie@google.com>
Signed-off-by: Nick Crews <ncrews@chromium.org>
[Fix the sparse warning: symbol 'wilco_ec_transfer' was not declared]
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
[Fix Kconfig dependencies for wilco_ec]
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 drivers/platform/chrome/Kconfig            |   2 +
 drivers/platform/chrome/Makefile           |   2 +
 drivers/platform/chrome/wilco_ec/Kconfig   |  10 ++
 drivers/platform/chrome/wilco_ec/Makefile  |   4 +
 drivers/platform/chrome/wilco_ec/core.c    | 104 +++++++++++++
 drivers/platform/chrome/wilco_ec/mailbox.c | 237 +++++++++++++++++++++++++++++
 include/linux/platform_data/wilco-ec.h     | 140 +++++++++++++++++
 7 files changed, 499 insertions(+)
 create mode 100644 drivers/platform/chrome/wilco_ec/Kconfig
 create mode 100644 drivers/platform/chrome/wilco_ec/Makefile
 create mode 100644 drivers/platform/chrome/wilco_ec/core.c
 create mode 100644 drivers/platform/chrome/wilco_ec/mailbox.c
 create mode 100644 include/linux/platform_data/wilco-ec.h

(limited to 'include')

diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index 5e2fde5ff63d..9186d81a51cc 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -152,4 +152,6 @@ config CROS_EC_SYSFS
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_ec_sysfs.
 
+source "drivers/platform/chrome/wilco_ec/Kconfig"
+
 endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index fdbee501931b..1e2f0029b597 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -14,3 +14,5 @@ obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
 obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_DEBUGFS)		+= cros_ec_debugfs.o
 obj-$(CONFIG_CROS_EC_SYSFS)		+= cros_ec_sysfs.o
+
+obj-$(CONFIG_WILCO_EC)			+= wilco_ec/
diff --git a/drivers/platform/chrome/wilco_ec/Kconfig b/drivers/platform/chrome/wilco_ec/Kconfig
new file mode 100644
index 000000000000..c6bc4e8f3062
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec/Kconfig
@@ -0,0 +1,10 @@
+config WILCO_EC
+	tristate "ChromeOS Wilco Embedded Controller"
+	depends on ACPI && X86 && CROS_EC_LPC_MEC
+	help
+	  If you say Y here, you get support for talking to the ChromeOS
+	  Wilco EC over an eSPI bus. This uses a simple byte-level protocol
+	  with a checksum.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called wilco_ec.
diff --git a/drivers/platform/chrome/wilco_ec/Makefile b/drivers/platform/chrome/wilco_ec/Makefile
new file mode 100644
index 000000000000..03b32301dc61
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+wilco_ec-objs				:= core.o mailbox.o
+obj-$(CONFIG_WILCO_EC)			+= wilco_ec.o
diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c
new file mode 100644
index 000000000000..20ecc580d108
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec/core.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Core driver for Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ *
+ * This is the entry point for the drivers that control the Wilco EC.
+ * This driver is responsible for several tasks:
+ * - Initialize the register interface that is used by wilco_ec_mailbox()
+ * - Create a platform device which is picked up by the debugfs driver
+ * - Create a platform device which is picked up by the RTC driver
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_data/wilco-ec.h>
+#include <linux/platform_device.h>
+
+#include "../cros_ec_lpc_mec.h"
+
+#define DRV_NAME "wilco-ec"
+
+static struct resource *wilco_get_resource(struct platform_device *pdev,
+					   int index)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, index);
+	if (!res) {
+		dev_dbg(dev, "Couldn't find IO resource %d\n", index);
+		return res;
+	}
+
+	return devm_request_region(dev, res->start, resource_size(res),
+				   dev_name(dev));
+}
+
+static int wilco_ec_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct wilco_ec_device *ec;
+
+	ec = devm_kzalloc(dev, sizeof(*ec), GFP_KERNEL);
+	if (!ec)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ec);
+	ec->dev = dev;
+	mutex_init(&ec->mailbox_lock);
+
+	/* Largest data buffer size requirement is extended data response */
+	ec->data_size = sizeof(struct wilco_ec_response) +
+		EC_MAILBOX_DATA_SIZE_EXTENDED;
+	ec->data_buffer = devm_kzalloc(dev, ec->data_size, GFP_KERNEL);
+	if (!ec->data_buffer)
+		return -ENOMEM;
+
+	/* Prepare access to IO regions provided by ACPI */
+	ec->io_data = wilco_get_resource(pdev, 0);	/* Host Data */
+	ec->io_command = wilco_get_resource(pdev, 1);	/* Host Command */
+	ec->io_packet = wilco_get_resource(pdev, 2);	/* MEC EMI */
+	if (!ec->io_data || !ec->io_command || !ec->io_packet)
+		return -ENODEV;
+
+	/* Initialize cros_ec register interface for communication */
+	cros_ec_lpc_mec_init(ec->io_packet->start,
+			     ec->io_packet->start + EC_MAILBOX_DATA_SIZE);
+
+	return 0;
+}
+
+static int wilco_ec_remove(struct platform_device *pdev)
+{
+	/* Teardown cros_ec interface */
+	cros_ec_lpc_mec_destroy();
+
+	return 0;
+}
+
+static const struct acpi_device_id wilco_ec_acpi_device_ids[] = {
+	{ "GOOG000C", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, wilco_ec_acpi_device_ids);
+
+static struct platform_driver wilco_ec_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.acpi_match_table = wilco_ec_acpi_device_ids,
+	},
+	.probe = wilco_ec_probe,
+	.remove = wilco_ec_remove,
+};
+
+module_platform_driver(wilco_ec_driver);
+
+MODULE_AUTHOR("Nick Crews <ncrews@chromium.org>");
+MODULE_AUTHOR("Duncan Laurie <dlaurie@chromium.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ChromeOS Wilco Embedded Controller driver");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/platform/chrome/wilco_ec/mailbox.c b/drivers/platform/chrome/wilco_ec/mailbox.c
new file mode 100644
index 000000000000..f6ff29a11f1a
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec/mailbox.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mailbox interface for Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ *
+ * The Wilco EC is similar to a typical ChromeOS embedded controller.
+ * It uses the same MEC based low-level communication and a similar
+ * protocol, but with some important differences.  The EC firmware does
+ * not support the same mailbox commands so it is not registered as a
+ * cros_ec device type.
+ *
+ * Most messages follow a standard format, but there are some exceptions
+ * and an interface is provided to do direct/raw transactions that do not
+ * make assumptions about byte placement.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/platform_data/wilco-ec.h>
+#include <linux/platform_device.h>
+
+#include "../cros_ec_lpc_mec.h"
+
+/* Version of mailbox interface */
+#define EC_MAILBOX_VERSION		0
+
+/* Command to start mailbox transaction */
+#define EC_MAILBOX_START_COMMAND	0xda
+
+/* Version of EC protocol */
+#define EC_MAILBOX_PROTO_VERSION	3
+
+/* Number of header bytes to be counted as data bytes */
+#define EC_MAILBOX_DATA_EXTRA		2
+
+/* Maximum timeout */
+#define EC_MAILBOX_TIMEOUT		HZ
+
+/* EC response flags */
+#define EC_CMDR_DATA		BIT(0)	/* Data ready for host to read */
+#define EC_CMDR_PENDING		BIT(1)	/* Write pending to EC */
+#define EC_CMDR_BUSY		BIT(2)	/* EC is busy processing a command */
+#define EC_CMDR_CMD		BIT(3)	/* Last host write was a command */
+
+/**
+ * wilco_ec_response_timed_out() - Wait for EC response.
+ * @ec: EC device.
+ *
+ * Return: true if EC timed out, false if EC did not time out.
+ */
+static bool wilco_ec_response_timed_out(struct wilco_ec_device *ec)
+{
+	unsigned long timeout = jiffies + EC_MAILBOX_TIMEOUT;
+
+	do {
+		if (!(inb(ec->io_command->start) &
+		      (EC_CMDR_PENDING | EC_CMDR_BUSY)))
+			return false;
+		usleep_range(100, 200);
+	} while (time_before(jiffies, timeout));
+
+	return true;
+}
+
+/**
+ * wilco_ec_checksum() - Compute 8-bit checksum over data range.
+ * @data: Data to checksum.
+ * @size: Number of bytes to checksum.
+ *
+ * Return: 8-bit checksum of provided data.
+ */
+static u8 wilco_ec_checksum(const void *data, size_t size)
+{
+	u8 *data_bytes = (u8 *)data;
+	u8 checksum = 0;
+	size_t i;
+
+	for (i = 0; i < size; i++)
+		checksum += data_bytes[i];
+
+	return checksum;
+}
+
+/**
+ * wilco_ec_prepare() - Prepare the request structure for the EC.
+ * @msg: EC message with request information.
+ * @rq: EC request structure to fill.
+ */
+static void wilco_ec_prepare(struct wilco_ec_message *msg,
+			     struct wilco_ec_request *rq)
+{
+	memset(rq, 0, sizeof(*rq));
+
+	/* Handle messages without trimming bytes from the request */
+	if (msg->request_size && msg->flags & WILCO_EC_FLAG_RAW_REQUEST) {
+		rq->reserved_raw = *(u8 *)msg->request_data;
+		msg->request_size--;
+		memmove(msg->request_data, msg->request_data + 1,
+			msg->request_size);
+	}
+
+	/* Fill in request packet */
+	rq->struct_version = EC_MAILBOX_PROTO_VERSION;
+	rq->mailbox_id = msg->type;
+	rq->mailbox_version = EC_MAILBOX_VERSION;
+	rq->data_size = msg->request_size + EC_MAILBOX_DATA_EXTRA;
+	rq->command = msg->command;
+
+	/* Checksum header and data */
+	rq->checksum = wilco_ec_checksum(rq, sizeof(*rq));
+	rq->checksum += wilco_ec_checksum(msg->request_data, msg->request_size);
+	rq->checksum = -rq->checksum;
+}
+
+/**
+ * wilco_ec_transfer() - Perform actual data transfer.
+ * @ec: EC device.
+ * @msg: EC message data for request and response.
+ * @rq: Filled in request structure
+ *
+ * Context: ec->mailbox_lock should be held while using this function.
+ * Return: number of bytes received or negative error code on failure.
+ */
+static int wilco_ec_transfer(struct wilco_ec_device *ec,
+			     struct wilco_ec_message *msg,
+			     struct wilco_ec_request *rq)
+{
+	struct wilco_ec_response *rs;
+	u8 checksum;
+	u8 flag;
+	size_t size;
+
+	/* Write request header, then data */
+	cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, 0, sizeof(*rq), (u8 *)rq);
+	cros_ec_lpc_io_bytes_mec(MEC_IO_WRITE, sizeof(*rq), msg->request_size,
+				 msg->request_data);
+
+	/* Start the command */
+	outb(EC_MAILBOX_START_COMMAND, ec->io_command->start);
+
+	/* For some commands (eg shutdown) the EC will not respond, that's OK */
+	if (msg->flags & WILCO_EC_FLAG_NO_RESPONSE) {
+		dev_dbg(ec->dev, "EC does not respond to this command\n");
+		return 0;
+	}
+
+	/* Wait for it to complete */
+	if (wilco_ec_response_timed_out(ec)) {
+		dev_dbg(ec->dev, "response timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	/* Check result */
+	flag = inb(ec->io_data->start);
+	if (flag) {
+		dev_dbg(ec->dev, "bad response: 0x%02x\n", flag);
+		return -EIO;
+	}
+
+	if (msg->flags & WILCO_EC_FLAG_EXTENDED_DATA)
+		size = EC_MAILBOX_DATA_SIZE_EXTENDED;
+	else
+		size = EC_MAILBOX_DATA_SIZE;
+
+	/* Read back response */
+	rs = ec->data_buffer;
+	checksum = cros_ec_lpc_io_bytes_mec(MEC_IO_READ, 0,
+					    sizeof(*rs) + size, (u8 *)rs);
+	if (checksum) {
+		dev_dbg(ec->dev, "bad packet checksum 0x%02x\n", rs->checksum);
+		return -EBADMSG;
+	}
+
+	/* Check that the EC reported success */
+	msg->result = rs->result;
+	if (msg->result) {
+		dev_dbg(ec->dev, "bad response: 0x%02x\n", msg->result);
+		return -EBADMSG;
+	}
+
+	/* Check the returned data size, skipping the header */
+	if (rs->data_size != size) {
+		dev_dbg(ec->dev, "unexpected packet size (%u != %zu)",
+			rs->data_size, size);
+		return -EMSGSIZE;
+	}
+
+	/* Skip 1 response data byte unless specified */
+	size = (msg->flags & WILCO_EC_FLAG_RAW_RESPONSE) ? 0 : 1;
+	if ((ssize_t) rs->data_size - size < msg->response_size) {
+		dev_dbg(ec->dev, "response data too short (%zd < %zu)",
+			(ssize_t) rs->data_size - size, msg->response_size);
+		return -EMSGSIZE;
+	}
+
+	/* Ignore response data bytes as requested */
+	memcpy(msg->response_data, rs->data + size, msg->response_size);
+
+	/* Return actual amount of data received */
+	return msg->response_size;
+}
+
+/**
+ * wilco_ec_mailbox() - Send EC request and receive EC response.
+ * @ec: EC device.
+ * @msg: EC message data for request and response.
+ *
+ * On entry msg->type, msg->flags, msg->command, msg->request_size,
+ * msg->response_size, and msg->request_data should all be filled in.
+ *
+ * On exit msg->result and msg->response_data will be filled.
+ *
+ * Return: number of bytes received or negative error code on failure.
+ */
+int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg)
+{
+	struct wilco_ec_request *rq;
+	int ret;
+
+	dev_dbg(ec->dev, "cmd=%02x type=%04x flags=%02x rslen=%zu rqlen=%zu\n",
+		msg->command, msg->type, msg->flags, msg->response_size,
+		msg->request_size);
+
+	/* Prepare request packet */
+	rq = ec->data_buffer;
+	wilco_ec_prepare(msg, rq);
+
+	mutex_lock(&ec->mailbox_lock);
+	ret = wilco_ec_transfer(ec, msg, rq);
+	mutex_unlock(&ec->mailbox_lock);
+
+	return ret;
+
+}
+EXPORT_SYMBOL_GPL(wilco_ec_mailbox);
diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h
new file mode 100644
index 000000000000..0feb4b520a54
--- /dev/null
+++ b/include/linux/platform_data/wilco-ec.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ChromeOS Wilco Embedded Controller
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#ifndef WILCO_EC_H
+#define WILCO_EC_H
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+
+/* Message flags for using the mailbox() interface */
+#define WILCO_EC_FLAG_NO_RESPONSE	BIT(0) /* EC does not respond */
+#define WILCO_EC_FLAG_EXTENDED_DATA	BIT(1) /* EC returns 256 data bytes */
+#define WILCO_EC_FLAG_RAW_REQUEST	BIT(2) /* Do not trim request data */
+#define WILCO_EC_FLAG_RAW_RESPONSE	BIT(3) /* Do not trim response data */
+#define WILCO_EC_FLAG_RAW		(WILCO_EC_FLAG_RAW_REQUEST | \
+					 WILCO_EC_FLAG_RAW_RESPONSE)
+
+/* Normal commands have a maximum 32 bytes of data */
+#define EC_MAILBOX_DATA_SIZE		32
+/* Extended commands have 256 bytes of response data */
+#define EC_MAILBOX_DATA_SIZE_EXTENDED	256
+
+/**
+ * struct wilco_ec_device - Wilco Embedded Controller handle.
+ * @dev: Device handle.
+ * @mailbox_lock: Mutex to ensure one mailbox command at a time.
+ * @io_command: I/O port for mailbox command.  Provided by ACPI.
+ * @io_data: I/O port for mailbox data.  Provided by ACPI.
+ * @io_packet: I/O port for mailbox packet data.  Provided by ACPI.
+ * @data_buffer: Buffer used for EC communication.  The same buffer
+ *               is used to hold the request and the response.
+ * @data_size: Size of the data buffer used for EC communication.
+ */
+struct wilco_ec_device {
+	struct device *dev;
+	struct mutex mailbox_lock;
+	struct resource *io_command;
+	struct resource *io_data;
+	struct resource *io_packet;
+	void *data_buffer;
+	size_t data_size;
+};
+
+/**
+ * struct wilco_ec_request - Mailbox request message format.
+ * @struct_version: Should be %EC_MAILBOX_PROTO_VERSION
+ * @checksum: Sum of all bytes must be 0.
+ * @mailbox_id: Mailbox identifier, specifies the command set.
+ * @mailbox_version: Mailbox interface version %EC_MAILBOX_VERSION
+ * @reserved: Set to zero.
+ * @data_size: Length of request, data + last 2 bytes of the header.
+ * @command: Mailbox command code, unique for each mailbox_id set.
+ * @reserved_raw: Set to zero for most commands, but is used by
+ *                some command types and for raw commands.
+ */
+struct wilco_ec_request {
+	u8 struct_version;
+	u8 checksum;
+	u16 mailbox_id;
+	u8 mailbox_version;
+	u8 reserved;
+	u16 data_size;
+	u8 command;
+	u8 reserved_raw;
+} __packed;
+
+/**
+ * struct wilco_ec_response - Mailbox response message format.
+ * @struct_version: Should be %EC_MAILBOX_PROTO_VERSION
+ * @checksum: Sum of all bytes must be 0.
+ * @result: Result code from the EC.  Non-zero indicates an error.
+ * @data_size: Length of the response data buffer.
+ * @reserved: Set to zero.
+ * @mbox0: EC returned data at offset 0 is unused (always 0) so this byte
+ *         is treated as part of the header instead of the data.
+ * @data: Response data buffer.  Max size is %EC_MAILBOX_DATA_SIZE_EXTENDED.
+ */
+struct wilco_ec_response {
+	u8 struct_version;
+	u8 checksum;
+	u16 result;
+	u16 data_size;
+	u8 reserved[2];
+	u8 mbox0;
+	u8 data[0];
+} __packed;
+
+/**
+ * enum wilco_ec_msg_type - Message type to select a set of command codes.
+ * @WILCO_EC_MSG_LEGACY: Legacy EC messages for standard EC behavior.
+ * @WILCO_EC_MSG_PROPERTY: Get/Set/Sync EC controlled NVRAM property.
+ * @WILCO_EC_MSG_TELEMETRY_SHORT: 32 bytes of telemetry data provided by the EC.
+ * @WILCO_EC_MSG_TELEMETRY_LONG: 256 bytes of telemetry data provided by the EC.
+ */
+enum wilco_ec_msg_type {
+	WILCO_EC_MSG_LEGACY = 0x00f0,
+	WILCO_EC_MSG_PROPERTY = 0x00f2,
+	WILCO_EC_MSG_TELEMETRY_SHORT = 0x00f5,
+	WILCO_EC_MSG_TELEMETRY_LONG = 0x00f6,
+};
+
+/**
+ * struct wilco_ec_message - Request and response message.
+ * @type: Mailbox message type.
+ * @flags: Message flags, e.g. %WILCO_EC_FLAG_NO_RESPONSE.
+ * @command: Mailbox command code.
+ * @result: Result code from the EC.  Non-zero indicates an error.
+ * @request_size: Number of bytes to send to the EC.
+ * @request_data: Buffer containing the request data.
+ * @response_size: Number of bytes expected from the EC.
+ *                 This is 32 by default and 256 if the flag
+ *                 is set for %WILCO_EC_FLAG_EXTENDED_DATA
+ * @response_data: Buffer containing the response data, should be
+ *                 response_size bytes and allocated by caller.
+ */
+struct wilco_ec_message {
+	enum wilco_ec_msg_type type;
+	u8 flags;
+	u8 command;
+	u8 result;
+	size_t request_size;
+	void *request_data;
+	size_t response_size;
+	void *response_data;
+};
+
+/**
+ * wilco_ec_mailbox() - Send request to the EC and receive the response.
+ * @ec: Wilco EC device.
+ * @msg: Wilco EC message.
+ *
+ * Return: Number of bytes received or negative error code on failure.
+ */
+int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg);
+
+#endif /* WILCO_EC_H */
-- 
cgit v1.2.3


From b787bb126cbcd73754bcbc055ae9f804ac576e4a Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Fri, 8 Feb 2019 17:37:18 -0700
Subject: platform/chrome: wilco_ec: Add support for raw commands in debugfs

Add a debugfs attribute that allows sending raw commands to the EC.
This is useful for development and debug but should not be enabled
in a production environment.

To test:
Get the EC firmware build date
First send the request command
> echo 00 f0 38 00 03 00 > raw
Then read the result. "12/21/18" is in the middle of the response
> cat raw
00 31 32 2f 32 31 2f 31 38 00 00 0f 01 00 01 00  .12/21/18.......

Get the EC firmware build date
First send the request command
> echo 00 f0 38 00 03 00 > raw
Then read the result. "12/21/18" is in the middle of the response
> cat raw
00 31 32 2f 32 31 2f 31 38 00 00 0f 01 00 01 00  .12/21/18.......

Signed-off-by: Duncan Laurie <dlaurie@google.com>
Signed-off-by: Nick Crews <ncrews@chromium.org>
[Fix off-by-one error in wilco_ec/debugfs.c]
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 Documentation/ABI/testing/debugfs-wilco-ec |  23 +++
 drivers/platform/chrome/wilco_ec/Kconfig   |  10 ++
 drivers/platform/chrome/wilco_ec/Makefile  |   2 +
 drivers/platform/chrome/wilco_ec/core.c    |  14 ++
 drivers/platform/chrome/wilco_ec/debugfs.c | 238 +++++++++++++++++++++++++++++
 include/linux/platform_data/wilco-ec.h     |   2 +
 6 files changed, 289 insertions(+)
 create mode 100644 Documentation/ABI/testing/debugfs-wilco-ec
 create mode 100644 drivers/platform/chrome/wilco_ec/debugfs.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/debugfs-wilco-ec b/Documentation/ABI/testing/debugfs-wilco-ec
new file mode 100644
index 000000000000..f814f112e213
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-wilco-ec
@@ -0,0 +1,23 @@
+What:		/sys/kernel/debug/wilco_ec/raw
+Date:		January 2019
+KernelVersion:	5.1
+Description:
+		Write and read raw mailbox commands to the EC.
+
+		For writing:
+		Bytes 0-1 indicate the message type:
+			00 F0 = Execute Legacy Command
+			00 F2 = Read/Write NVRAM Property
+		Byte 2 provides the command code
+		Bytes 3+ consist of the data passed in the request
+
+		At least three bytes are required, for the msg type and command,
+		with additional bytes optional for additional data.
+
+		Example:
+		// Request EC info type 3 (EC firmware build date)
+		$ echo 00 f0 38 00 03 00 > raw
+		// View the result. The decoded ASCII result "12/21/18" is
+		// included after the raw hex.
+		$ cat raw
+		00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00  .12/21/18.8...
diff --git a/drivers/platform/chrome/wilco_ec/Kconfig b/drivers/platform/chrome/wilco_ec/Kconfig
index c6bc4e8f3062..4a119ced4d0c 100644
--- a/drivers/platform/chrome/wilco_ec/Kconfig
+++ b/drivers/platform/chrome/wilco_ec/Kconfig
@@ -8,3 +8,13 @@ config WILCO_EC
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called wilco_ec.
+
+config WILCO_EC_DEBUGFS
+	tristate "Enable raw access to EC via debugfs"
+	depends on WILCO_EC
+	help
+	  If you say Y here, you get support for sending raw commands to
+	  the Wilco EC via debugfs.  These commands do not do any byte
+	  manipulation and allow for testing arbitrary commands.  This
+	  interface is intended for debug only and will not be present
+	  on production devices.
diff --git a/drivers/platform/chrome/wilco_ec/Makefile b/drivers/platform/chrome/wilco_ec/Makefile
index 03b32301dc61..063e7fb4ea17 100644
--- a/drivers/platform/chrome/wilco_ec/Makefile
+++ b/drivers/platform/chrome/wilco_ec/Makefile
@@ -2,3 +2,5 @@
 
 wilco_ec-objs				:= core.o mailbox.o
 obj-$(CONFIG_WILCO_EC)			+= wilco_ec.o
+wilco_ec_debugfs-objs			:= debugfs.o
+obj-$(CONFIG_WILCO_EC_DEBUGFS)		+= wilco_ec_debugfs.o
diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c
index 20ecc580d108..af5fd288b63b 100644
--- a/drivers/platform/chrome/wilco_ec/core.c
+++ b/drivers/platform/chrome/wilco_ec/core.c
@@ -69,11 +69,25 @@ static int wilco_ec_probe(struct platform_device *pdev)
 	cros_ec_lpc_mec_init(ec->io_packet->start,
 			     ec->io_packet->start + EC_MAILBOX_DATA_SIZE);
 
+	/*
+	 * Register a child device that will be found by the debugfs driver.
+	 * Ignore failure.
+	 */
+	ec->debugfs_pdev = platform_device_register_data(dev,
+							 "wilco-ec-debugfs",
+							 PLATFORM_DEVID_AUTO,
+							 NULL, 0);
+
 	return 0;
 }
 
 static int wilco_ec_remove(struct platform_device *pdev)
 {
+	struct wilco_ec_device *ec = platform_get_drvdata(pdev);
+
+	if (ec->debugfs_pdev)
+		platform_device_unregister(ec->debugfs_pdev);
+
 	/* Teardown cros_ec interface */
 	cros_ec_lpc_mec_destroy();
 
diff --git a/drivers/platform/chrome/wilco_ec/debugfs.c b/drivers/platform/chrome/wilco_ec/debugfs.c
new file mode 100644
index 000000000000..c090db2cd5be
--- /dev/null
+++ b/drivers/platform/chrome/wilco_ec/debugfs.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * debugfs attributes for Wilco EC
+ *
+ * Copyright 2019 Google LLC
+ *
+ * There is only one attribute used for debugging, called raw.
+ * You can write a hexadecimal sentence to raw, and that series of bytes
+ * will be sent to the EC. Then, you can read the bytes of response
+ * by reading from raw.
+ *
+ * For writing:
+ * Bytes 0-1 indicate the message type:
+ *         00 F0 = Execute Legacy Command
+ *         00 F2 = Read/Write NVRAM Property
+ * Byte 2 provides the command code
+ * Bytes 3+ consist of the data passed in the request
+ *
+ * When referencing the EC interface spec, byte 2 corresponds to MBOX[0],
+ * byte 3 corresponds to MBOX[1], etc.
+ *
+ * At least three bytes are required, for the msg type and command,
+ * with additional bytes optional for additional data.
+ *
+ * Example:
+ * // Request EC info type 3 (EC firmware build date)
+ * $ echo 00 f0 38 00 03 00 > raw
+ * // View the result. The decoded ASCII result "12/21/18" is
+ * // included after the raw hex.
+ * $ cat raw
+ * 00 31 32 2f 32 31 2f 31 38 00 38 00 01 00 2f 00  .12/21/18.8...
+ */
+
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/platform_data/wilco-ec.h>
+#include <linux/platform_device.h>
+
+#define DRV_NAME "wilco-ec-debugfs"
+
+/* The 256 raw bytes will take up more space when represented as a hex string */
+#define FORMATTED_BUFFER_SIZE (EC_MAILBOX_DATA_SIZE_EXTENDED * 4)
+
+struct wilco_ec_debugfs {
+	struct wilco_ec_device *ec;
+	struct dentry *dir;
+	size_t response_size;
+	u8 raw_data[EC_MAILBOX_DATA_SIZE_EXTENDED];
+	u8 formatted_data[FORMATTED_BUFFER_SIZE];
+};
+static struct wilco_ec_debugfs *debug_info;
+
+/**
+ * parse_hex_sentence() - Convert a ascii hex representation into byte array.
+ * @in: Input buffer of ascii.
+ * @isize: Length of input buffer.
+ * @out: Output buffer.
+ * @osize: Length of output buffer, e.g. max number of bytes to parse.
+ *
+ * An valid input is a series of ascii hexadecimal numbers, separated by spaces.
+ * An example valid input is
+ * "   00 f2 0    000076 6 0  ff"
+ *
+ * If an individual "word" within the hex sentence is longer than MAX_WORD_SIZE,
+ * then the sentence is illegal, and parsing will fail.
+ *
+ * Return: Number of bytes parsed, or negative error code on failure.
+ */
+static int parse_hex_sentence(const char *in, int isize, u8 *out, int osize)
+{
+	int n_parsed = 0;
+	int word_start = 0;
+	int word_end;
+	int word_len;
+	/* Temp buffer for holding a "word" of chars that represents one byte */
+	#define MAX_WORD_SIZE 16
+	char tmp[MAX_WORD_SIZE + 1];
+	u8 byte;
+
+	while (word_start < isize && n_parsed < osize) {
+		/* Find the start of the next word */
+		while (word_start < isize && isspace(in[word_start]))
+			word_start++;
+		 /* reached the end of the input before next word? */
+		if (word_start >= isize)
+			break;
+
+		/* Find the end of this word */
+		word_end = word_start;
+		while (word_end < isize && !isspace(in[word_end]))
+			word_end++;
+
+		/* Copy to a tmp NULL terminated string */
+		word_len = word_end - word_start;
+		if (word_len > MAX_WORD_SIZE)
+			return -EINVAL;
+		memcpy(tmp, in + word_start, word_len);
+		tmp[word_len] = '\0';
+
+		/*
+		 * Convert from hex string, place in output. If fails to parse,
+		 * just return -EINVAL because specific error code is only
+		 * relevant for this one word, returning it would be confusing.
+		 */
+		if (kstrtou8(tmp, 16, &byte))
+			return -EINVAL;
+		out[n_parsed++] = byte;
+
+		word_start = word_end;
+	}
+	return n_parsed;
+}
+
+/* The message type takes up two bytes*/
+#define TYPE_AND_DATA_SIZE ((EC_MAILBOX_DATA_SIZE) + 2)
+
+static ssize_t raw_write(struct file *file, const char __user *user_buf,
+			 size_t count, loff_t *ppos)
+{
+	char *buf = debug_info->formatted_data;
+	struct wilco_ec_message msg;
+	u8 request_data[TYPE_AND_DATA_SIZE];
+	ssize_t kcount;
+	int ret;
+
+	if (count > FORMATTED_BUFFER_SIZE)
+		return -EINVAL;
+
+	kcount = simple_write_to_buffer(buf, FORMATTED_BUFFER_SIZE, ppos,
+					user_buf, count);
+	if (kcount < 0)
+		return kcount;
+
+	ret = parse_hex_sentence(buf, kcount, request_data, TYPE_AND_DATA_SIZE);
+	if (ret < 0)
+		return ret;
+	/* Need at least two bytes for message type and one for command */
+	if (ret < 3)
+		return -EINVAL;
+
+	/* Clear response data buffer */
+	memset(debug_info->raw_data, '\0', EC_MAILBOX_DATA_SIZE_EXTENDED);
+
+	msg.type = request_data[0] << 8 | request_data[1];
+	msg.flags = WILCO_EC_FLAG_RAW;
+	msg.command = request_data[2];
+	msg.request_data = ret > 3 ? request_data + 3 : 0;
+	msg.request_size = ret - 3;
+	msg.response_data = debug_info->raw_data;
+	msg.response_size = EC_MAILBOX_DATA_SIZE;
+
+	/* Telemetry commands use extended response data */
+	if (msg.type == WILCO_EC_MSG_TELEMETRY_LONG) {
+		msg.flags |= WILCO_EC_FLAG_EXTENDED_DATA;
+		msg.response_size = EC_MAILBOX_DATA_SIZE_EXTENDED;
+	}
+
+	ret = wilco_ec_mailbox(debug_info->ec, &msg);
+	if (ret < 0)
+		return ret;
+	debug_info->response_size = ret;
+
+	return count;
+}
+
+static ssize_t raw_read(struct file *file, char __user *user_buf, size_t count,
+			loff_t *ppos)
+{
+	int fmt_len = 0;
+
+	if (debug_info->response_size) {
+		fmt_len = hex_dump_to_buffer(debug_info->raw_data,
+					     debug_info->response_size,
+					     16, 1, debug_info->formatted_data,
+					     FORMATTED_BUFFER_SIZE, true);
+		/* Only return response the first time it is read */
+		debug_info->response_size = 0;
+	}
+
+	return simple_read_from_buffer(user_buf, count, ppos,
+				       debug_info->formatted_data, fmt_len);
+}
+
+static const struct file_operations fops_raw = {
+	.owner = THIS_MODULE,
+	.read = raw_read,
+	.write = raw_write,
+	.llseek = no_llseek,
+};
+
+/**
+ * wilco_ec_debugfs_probe() - Create the debugfs node
+ * @pdev: The platform device, probably created in core.c
+ *
+ * Try to create a debugfs node. If it fails, then we don't want to change
+ * behavior at all, this is for debugging after all. Just fail silently.
+ *
+ * Return: 0 always.
+ */
+static int wilco_ec_debugfs_probe(struct platform_device *pdev)
+{
+	struct wilco_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+
+	debug_info = devm_kzalloc(&pdev->dev, sizeof(*debug_info), GFP_KERNEL);
+	if (!debug_info)
+		return 0;
+	debug_info->ec = ec;
+	debug_info->dir = debugfs_create_dir("wilco_ec", NULL);
+	if (!debug_info->dir)
+		return 0;
+	debugfs_create_file("raw", 0644, debug_info->dir, NULL, &fops_raw);
+
+	return 0;
+}
+
+static int wilco_ec_debugfs_remove(struct platform_device *pdev)
+{
+	debugfs_remove_recursive(debug_info->dir);
+
+	return 0;
+}
+
+static struct platform_driver wilco_ec_debugfs_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = wilco_ec_debugfs_probe,
+	.remove = wilco_ec_debugfs_remove,
+};
+
+module_platform_driver(wilco_ec_debugfs_driver);
+
+MODULE_ALIAS("platform:" DRV_NAME);
+MODULE_AUTHOR("Nick Crews <ncrews@chromium.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Wilco EC debugfs driver");
diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h
index 0feb4b520a54..5344975afa1a 100644
--- a/include/linux/platform_data/wilco-ec.h
+++ b/include/linux/platform_data/wilco-ec.h
@@ -34,6 +34,7 @@
  * @data_buffer: Buffer used for EC communication.  The same buffer
  *               is used to hold the request and the response.
  * @data_size: Size of the data buffer used for EC communication.
+ * @debugfs_pdev: The child platform_device used by the debugfs sub-driver.
  */
 struct wilco_ec_device {
 	struct device *dev;
@@ -43,6 +44,7 @@ struct wilco_ec_device {
 	struct resource *io_packet;
 	void *data_buffer;
 	size_t data_size;
+	struct platform_device *debugfs_pdev;
 };
 
 /**
-- 
cgit v1.2.3


From 0d2f2a3da1f2a9ebeb66bb03073dd149fccf1bdd Mon Sep 17 00:00:00 2001
From: Nick Crews <ncrews@chromium.org>
Date: Fri, 8 Feb 2019 17:37:19 -0700
Subject: platform/chrome: wilco_ec: Add RTC driver

This Embedded Controller has an internal RTC that is exposed
as a standard RTC class driver with read/write functionality.

The driver is added to the drivers/rtc/ so that the maintainer of that
directory will be able to comment on this change, as that maintainer is
the expert on this system. In addition, the driver code is called
indirectly after a corresponding device is registered from core.c,
as opposed to core.c registering the driver callbacks directly.

To test:
> hwclock --show --rtc /dev/rtc1
2007-12-31 16:01:20.460959-08:00
> hwclock --systohc --rtc /dev/rtc1
> hwclock --show --rtc /dev/rtc1
2018-11-29 17:08:00.780793-08:00

> hwclock --show --rtc /dev/rtc1
2007-12-31 16:01:20.460959-08:00
> hwclock --systohc --rtc /dev/rtc1
> hwclock --show --rtc /dev/rtc1
2018-11-29 17:08:00.780793-08:00

Signed-off-by: Duncan Laurie <dlaurie@google.com>
Signed-off-by: Nick Crews <ncrews@chromium.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
[Fix the sparse warning: symbol 'wilco_ec_rtc_read/write' was not declared]
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
---
 drivers/platform/chrome/wilco_ec/core.c |  18 ++++
 drivers/rtc/Kconfig                     |  11 ++
 drivers/rtc/Makefile                    |   1 +
 drivers/rtc/rtc-wilco-ec.c              | 177 ++++++++++++++++++++++++++++++++
 include/linux/platform_data/wilco-ec.h  |   2 +
 5 files changed, 209 insertions(+)
 create mode 100644 drivers/rtc/rtc-wilco-ec.c

(limited to 'include')

diff --git a/drivers/platform/chrome/wilco_ec/core.c b/drivers/platform/chrome/wilco_ec/core.c
index af5fd288b63b..05e1e2be1c91 100644
--- a/drivers/platform/chrome/wilco_ec/core.c
+++ b/drivers/platform/chrome/wilco_ec/core.c
@@ -42,6 +42,7 @@ static int wilco_ec_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct wilco_ec_device *ec;
+	int ret;
 
 	ec = devm_kzalloc(dev, sizeof(*ec), GFP_KERNEL);
 	if (!ec)
@@ -78,13 +79,30 @@ static int wilco_ec_probe(struct platform_device *pdev)
 							 PLATFORM_DEVID_AUTO,
 							 NULL, 0);
 
+	/* Register a child device that will be found by the RTC driver. */
+	ec->rtc_pdev = platform_device_register_data(dev, "rtc-wilco-ec",
+						     PLATFORM_DEVID_AUTO,
+						     NULL, 0);
+	if (IS_ERR(ec->rtc_pdev)) {
+		dev_err(dev, "Failed to create RTC platform device\n");
+		ret = PTR_ERR(ec->rtc_pdev);
+		goto unregister_debugfs;
+	}
+
 	return 0;
+
+unregister_debugfs:
+	if (ec->debugfs_pdev)
+		platform_device_unregister(ec->debugfs_pdev);
+	cros_ec_lpc_mec_destroy();
+	return ret;
 }
 
 static int wilco_ec_remove(struct platform_device *pdev)
 {
 	struct wilco_ec_device *ec = platform_get_drvdata(pdev);
 
+	platform_device_unregister(ec->rtc_pdev);
 	if (ec->debugfs_pdev)
 		platform_device_unregister(ec->debugfs_pdev);
 
diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index 225b0b8516f3..d5063c791515 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1814,4 +1814,15 @@ config RTC_DRV_GOLDFISH
 	  Goldfish is a code name for the virtual platform developed by Google
 	  for Android emulation.
 
+config RTC_DRV_WILCO_EC
+	tristate "Wilco EC RTC"
+	depends on WILCO_EC
+	default m
+	help
+	  If you say yes here, you get read/write support for the Real Time
+	  Clock on the Wilco Embedded Controller (Wilco is a kind of Chromebook)
+
+	  This can also be built as a module. If so, the module will
+	  be named "rtc_wilco_ec".
+
 endif # RTC_CLASS
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index df022d820bee..6255ea78da25 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -172,6 +172,7 @@ obj-$(CONFIG_RTC_DRV_V3020)	+= rtc-v3020.o
 obj-$(CONFIG_RTC_DRV_VR41XX)	+= rtc-vr41xx.o
 obj-$(CONFIG_RTC_DRV_VRTC)	+= rtc-mrst.o
 obj-$(CONFIG_RTC_DRV_VT8500)	+= rtc-vt8500.o
+obj-$(CONFIG_RTC_DRV_WILCO_EC)	+= rtc-wilco-ec.o
 obj-$(CONFIG_RTC_DRV_WM831X)	+= rtc-wm831x.o
 obj-$(CONFIG_RTC_DRV_WM8350)	+= rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)	+= rtc-x1205.o
diff --git a/drivers/rtc/rtc-wilco-ec.c b/drivers/rtc/rtc-wilco-ec.c
new file mode 100644
index 000000000000..e62bda0cb53e
--- /dev/null
+++ b/drivers/rtc/rtc-wilco-ec.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RTC interface for Wilco Embedded Controller with R/W abilities
+ *
+ * Copyright 2018 Google LLC
+ *
+ * The corresponding platform device is typically registered in
+ * drivers/platform/chrome/wilco_ec/core.c
+ */
+
+#include <linux/bcd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/wilco-ec.h>
+#include <linux/rtc.h>
+#include <linux/timekeeping.h>
+
+#define EC_COMMAND_CMOS			0x7c
+#define EC_CMOS_TOD_WRITE		0x02
+#define EC_CMOS_TOD_READ		0x08
+
+/**
+ * struct ec_rtc_read - Format of RTC returned by EC.
+ * @second: Second value (0..59)
+ * @minute: Minute value (0..59)
+ * @hour: Hour value (0..23)
+ * @day: Day value (1..31)
+ * @month: Month value (1..12)
+ * @year: Year value (full year % 100)
+ * @century: Century value (full year / 100)
+ *
+ * All values are presented in binary (not BCD).
+ */
+struct ec_rtc_read {
+	u8 second;
+	u8 minute;
+	u8 hour;
+	u8 day;
+	u8 month;
+	u8 year;
+	u8 century;
+} __packed;
+
+/**
+ * struct ec_rtc_write - Format of RTC sent to the EC.
+ * @param: EC_CMOS_TOD_WRITE
+ * @century: Century value (full year / 100)
+ * @year: Year value (full year % 100)
+ * @month: Month value (1..12)
+ * @day: Day value (1..31)
+ * @hour: Hour value (0..23)
+ * @minute: Minute value (0..59)
+ * @second: Second value (0..59)
+ * @weekday: Day of the week (0=Saturday)
+ *
+ * All values are presented in BCD.
+ */
+struct ec_rtc_write {
+	u8 param;
+	u8 century;
+	u8 year;
+	u8 month;
+	u8 day;
+	u8 hour;
+	u8 minute;
+	u8 second;
+	u8 weekday;
+} __packed;
+
+static int wilco_ec_rtc_read(struct device *dev, struct rtc_time *tm)
+{
+	struct wilco_ec_device *ec = dev_get_drvdata(dev->parent);
+	u8 param = EC_CMOS_TOD_READ;
+	struct ec_rtc_read rtc;
+	struct wilco_ec_message msg = {
+		.type = WILCO_EC_MSG_LEGACY,
+		.flags = WILCO_EC_FLAG_RAW_RESPONSE,
+		.command = EC_COMMAND_CMOS,
+		.request_data = &param,
+		.request_size = sizeof(param),
+		.response_data = &rtc,
+		.response_size = sizeof(rtc),
+	};
+	int ret;
+
+	ret = wilco_ec_mailbox(ec, &msg);
+	if (ret < 0)
+		return ret;
+
+	tm->tm_sec	= rtc.second;
+	tm->tm_min	= rtc.minute;
+	tm->tm_hour	= rtc.hour;
+	tm->tm_mday	= rtc.day;
+	tm->tm_mon	= rtc.month - 1;
+	tm->tm_year	= rtc.year + (rtc.century * 100) - 1900;
+	tm->tm_yday	= rtc_year_days(tm->tm_mday, tm->tm_mon, tm->tm_year);
+
+	/* Don't compute day of week, we don't need it. */
+	tm->tm_wday = -1;
+
+	return 0;
+}
+
+static int wilco_ec_rtc_write(struct device *dev, struct rtc_time *tm)
+{
+	struct wilco_ec_device *ec = dev_get_drvdata(dev->parent);
+	struct ec_rtc_write rtc;
+	struct wilco_ec_message msg = {
+		.type = WILCO_EC_MSG_LEGACY,
+		.flags = WILCO_EC_FLAG_RAW_RESPONSE,
+		.command = EC_COMMAND_CMOS,
+		.request_data = &rtc,
+		.request_size = sizeof(rtc),
+	};
+	int year = tm->tm_year + 1900;
+	/*
+	 * Convert from 0=Sunday to 0=Saturday for the EC
+	 * We DO need to set weekday because the EC controls battery charging
+	 * schedules that depend on the day of the week.
+	 */
+	int wday = tm->tm_wday == 6 ? 0 : tm->tm_wday + 1;
+	int ret;
+
+	rtc.param	= EC_CMOS_TOD_WRITE;
+	rtc.century	= bin2bcd(year / 100);
+	rtc.year	= bin2bcd(year % 100);
+	rtc.month	= bin2bcd(tm->tm_mon + 1);
+	rtc.day		= bin2bcd(tm->tm_mday);
+	rtc.hour	= bin2bcd(tm->tm_hour);
+	rtc.minute	= bin2bcd(tm->tm_min);
+	rtc.second	= bin2bcd(tm->tm_sec);
+	rtc.weekday	= bin2bcd(wday);
+
+	ret = wilco_ec_mailbox(ec, &msg);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static const struct rtc_class_ops wilco_ec_rtc_ops = {
+	.read_time = wilco_ec_rtc_read,
+	.set_time = wilco_ec_rtc_write,
+};
+
+static int wilco_ec_rtc_probe(struct platform_device *pdev)
+{
+	struct rtc_device *rtc;
+
+	rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(rtc))
+		return PTR_ERR(rtc);
+
+	rtc->ops = &wilco_ec_rtc_ops;
+	/* EC only supports this century */
+	rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
+	rtc->range_max = RTC_TIMESTAMP_END_2099;
+	rtc->owner = THIS_MODULE;
+
+	return rtc_register_device(rtc);
+}
+
+static struct platform_driver wilco_ec_rtc_driver = {
+	.driver = {
+		.name = "rtc-wilco-ec",
+	},
+	.probe = wilco_ec_rtc_probe,
+};
+
+module_platform_driver(wilco_ec_rtc_driver);
+
+MODULE_ALIAS("platform:rtc-wilco-ec");
+MODULE_AUTHOR("Nick Crews <ncrews@chromium.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Wilco EC RTC driver");
diff --git a/include/linux/platform_data/wilco-ec.h b/include/linux/platform_data/wilco-ec.h
index 5344975afa1a..446473a46b88 100644
--- a/include/linux/platform_data/wilco-ec.h
+++ b/include/linux/platform_data/wilco-ec.h
@@ -35,6 +35,7 @@
  *               is used to hold the request and the response.
  * @data_size: Size of the data buffer used for EC communication.
  * @debugfs_pdev: The child platform_device used by the debugfs sub-driver.
+ * @rtc_pdev: The child platform_device used by the RTC sub-driver.
  */
 struct wilco_ec_device {
 	struct device *dev;
@@ -45,6 +46,7 @@ struct wilco_ec_device {
 	void *data_buffer;
 	size_t data_size;
 	struct platform_device *debugfs_pdev;
+	struct platform_device *rtc_pdev;
 };
 
 /**
-- 
cgit v1.2.3


From 9b15cffbf21d3ac0a60618a3274b335532f33573 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 8 Jan 2019 17:21:15 +0100
Subject: clk: imx5: add imx5_SCC2_IPG_GATE

This adds the missing clock for the SCC2 peripheral unit.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-imx51-imx53.c      | 1 +
 include/dt-bindings/clock/imx5-clock.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx51-imx53.c b/drivers/clk/imx/clk-imx51-imx53.c
index fc8e782d817b..e91c826bce70 100644
--- a/drivers/clk/imx/clk-imx51-imx53.c
+++ b/drivers/clk/imx/clk-imx51-imx53.c
@@ -428,6 +428,7 @@ static void __init mx51_clocks_init(struct device_node *np)
 	clk[IMX5_CLK_ESDHC4_PER_GATE]	= imx_clk_gate2("esdhc4_per_gate", "esdhc_d_sel", MXC_CCM_CCGR3, 14);
 	clk[IMX5_CLK_USB_PHY_GATE]	= imx_clk_gate2("usb_phy_gate", "usb_phy_sel", MXC_CCM_CCGR2, 0);
 	clk[IMX5_CLK_HSI2C_GATE]	= imx_clk_gate2("hsi2c_gate", "ipg", MXC_CCM_CCGR1, 22);
+	clk[IMX5_CLK_SCC2_IPG_GATE]	= imx_clk_gate2("scc2_gate", "ipg", MXC_CCM_CCGR1, 30);
 	clk[IMX5_CLK_MIPI_HSC1_GATE]	= imx_clk_gate2_flags("mipi_hsc1_gate", "ipg", MXC_CCM_CCGR4, 6, CLK_IS_CRITICAL);
 	clk[IMX5_CLK_MIPI_HSC2_GATE]	= imx_clk_gate2_flags("mipi_hsc2_gate", "ipg", MXC_CCM_CCGR4, 8, CLK_IS_CRITICAL);
 	clk[IMX5_CLK_MIPI_ESC_GATE]	= imx_clk_gate2_flags("mipi_esc_gate", "ipg", MXC_CCM_CCGR4, 10, CLK_IS_CRITICAL);
diff --git a/include/dt-bindings/clock/imx5-clock.h b/include/dt-bindings/clock/imx5-clock.h
index d382fc71aa83..a81be5be6700 100644
--- a/include/dt-bindings/clock/imx5-clock.h
+++ b/include/dt-bindings/clock/imx5-clock.h
@@ -214,6 +214,7 @@
 #define IMX5_CLK_IEEE1588_SEL		202
 #define IMX5_CLK_IEEE1588_PODF		203
 #define IMX5_CLK_IEEE1588_GATE		204
-#define IMX5_CLK_END			205
+#define IMX5_CLK_SCC2_IPG_GATE		205
+#define IMX5_CLK_END			206
 
 #endif /* __DT_BINDINGS_CLOCK_IMX5_H */
-- 
cgit v1.2.3


From 037a474f6144544848da72bfda80776686084a13 Mon Sep 17 00:00:00 2001
From: Bai Ping <ping.bai@nxp.com>
Date: Tue, 22 Jan 2019 09:31:46 +0000
Subject: dt-bindings: imx: Add clock binding doc for imx8mm

Add the clock binding doc for i.MX8MM.

Signed-off-by: Bai Ping <ping.bai@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/imx8mm-clock.txt     |  29 +++
 include/dt-bindings/clock/imx8mm-clock.h           | 244 +++++++++++++++++++++
 2 files changed, 273 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/imx8mm-clock.txt
 create mode 100644 include/dt-bindings/clock/imx8mm-clock.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/imx8mm-clock.txt b/Documentation/devicetree/bindings/clock/imx8mm-clock.txt
new file mode 100644
index 000000000000..8e4ab9e619a1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8mm-clock.txt
@@ -0,0 +1,29 @@
+* Clock bindings for NXP i.MX8M Mini
+
+Required properties:
+- compatible: Should be "fsl,imx8mm-ccm"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+- clocks: list of clock specifiers, must contain an entry for each required
+          entry in clock-names
+- clock-names: should include the following entries:
+    - "osc_32k"
+    - "osc_24m"
+    - "clk_ext1"
+    - "clk_ext2"
+    - "clk_ext3"
+    - "clk_ext4"
+
+clk: clock-controller@30380000 {
+	compatible = "fsl,imx8mm-ccm";
+	reg = <0x0 0x30380000 0x0 0x10000>;
+	#clock-cells = <1>;
+	clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>, <&clk_ext2>,
+		 <&clk_ext3>, <&clk_ext4>;
+	clock-names = "osc_32k", "osc_24m", "clk_ext1", "clk_ext2",
+		      "clk_ext3", "clk_ext4";
+};
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mm-clock.h
+for the full list of i.MX8M Mini clock IDs.
diff --git a/include/dt-bindings/clock/imx8mm-clock.h b/include/dt-bindings/clock/imx8mm-clock.h
new file mode 100644
index 000000000000..1b4353e7b486
--- /dev/null
+++ b/include/dt-bindings/clock/imx8mm-clock.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_IMX8MM_H
+#define __DT_BINDINGS_CLOCK_IMX8MM_H
+
+#define IMX8MM_CLK_DUMMY			0
+#define IMX8MM_CLK_32K				1
+#define IMX8MM_CLK_24M				2
+#define IMX8MM_OSC_HDMI_CLK			3
+#define IMX8MM_CLK_EXT1				4
+#define IMX8MM_CLK_EXT2				5
+#define IMX8MM_CLK_EXT3				6
+#define IMX8MM_CLK_EXT4				7
+#define IMX8MM_AUDIO_PLL1_REF_SEL		8
+#define IMX8MM_AUDIO_PLL2_REF_SEL		9
+#define IMX8MM_VIDEO_PLL1_REF_SEL		10
+#define IMX8MM_DRAM_PLL_REF_SEL			11
+#define IMX8MM_GPU_PLL_REF_SEL			12
+#define IMX8MM_VPU_PLL_REF_SEL			13
+#define IMX8MM_ARM_PLL_REF_SEL			14
+#define IMX8MM_SYS_PLL1_REF_SEL			15
+#define IMX8MM_SYS_PLL2_REF_SEL			16
+#define IMX8MM_SYS_PLL3_REF_SEL			17
+#define IMX8MM_AUDIO_PLL1			18
+#define IMX8MM_AUDIO_PLL2			19
+#define IMX8MM_VIDEO_PLL1			20
+#define IMX8MM_DRAM_PLL				21
+#define IMX8MM_GPU_PLL				22
+#define IMX8MM_VPU_PLL				23
+#define IMX8MM_ARM_PLL				24
+#define IMX8MM_SYS_PLL1				25
+#define IMX8MM_SYS_PLL2				26
+#define IMX8MM_SYS_PLL3				27
+#define IMX8MM_AUDIO_PLL1_BYPASS		28
+#define IMX8MM_AUDIO_PLL2_BYPASS		29
+#define IMX8MM_VIDEO_PLL1_BYPASS		30
+#define IMX8MM_DRAM_PLL_BYPASS			31
+#define IMX8MM_GPU_PLL_BYPASS			32
+#define IMX8MM_VPU_PLL_BYPASS			33
+#define IMX8MM_ARM_PLL_BYPASS			34
+#define IMX8MM_SYS_PLL1_BYPASS			35
+#define IMX8MM_SYS_PLL2_BYPASS			36
+#define IMX8MM_SYS_PLL3_BYPASS			37
+#define IMX8MM_AUDIO_PLL1_OUT			38
+#define IMX8MM_AUDIO_PLL2_OUT			39
+#define IMX8MM_VIDEO_PLL1_OUT			40
+#define IMX8MM_DRAM_PLL_OUT			41
+#define IMX8MM_GPU_PLL_OUT			42
+#define IMX8MM_VPU_PLL_OUT			43
+#define IMX8MM_ARM_PLL_OUT			44
+#define IMX8MM_SYS_PLL1_OUT			45
+#define IMX8MM_SYS_PLL2_OUT			46
+#define IMX8MM_SYS_PLL3_OUT			47
+#define IMX8MM_SYS_PLL1_40M			48
+#define IMX8MM_SYS_PLL1_80M			49
+#define IMX8MM_SYS_PLL1_100M			50
+#define IMX8MM_SYS_PLL1_133M			51
+#define IMX8MM_SYS_PLL1_160M			52
+#define IMX8MM_SYS_PLL1_200M			53
+#define IMX8MM_SYS_PLL1_266M			54
+#define IMX8MM_SYS_PLL1_400M			55
+#define IMX8MM_SYS_PLL1_800M			56
+#define IMX8MM_SYS_PLL2_50M			57
+#define IMX8MM_SYS_PLL2_100M			58
+#define IMX8MM_SYS_PLL2_125M			59
+#define IMX8MM_SYS_PLL2_166M			60
+#define IMX8MM_SYS_PLL2_200M			61
+#define IMX8MM_SYS_PLL2_250M			62
+#define IMX8MM_SYS_PLL2_333M			63
+#define IMX8MM_SYS_PLL2_500M			64
+#define IMX8MM_SYS_PLL2_1000M			65
+
+/* core */
+#define IMX8MM_CLK_A53_SRC			66
+#define IMX8MM_CLK_M4_SRC			67
+#define IMX8MM_CLK_VPU_SRC			68
+#define IMX8MM_CLK_GPU3D_SRC			69
+#define IMX8MM_CLK_GPU2D_SRC			70
+#define IMX8MM_CLK_A53_CG			71
+#define IMX8MM_CLK_M4_CG			72
+#define IMX8MM_CLK_VPU_CG			73
+#define IMX8MM_CLK_GPU3D_CG			74
+#define IMX8MM_CLK_GPU2D_CG			75
+#define IMX8MM_CLK_A53_DIV			76
+#define IMX8MM_CLK_M4_DIV			77
+#define IMX8MM_CLK_VPU_DIV			78
+#define IMX8MM_CLK_GPU3D_DIV			79
+#define IMX8MM_CLK_GPU2D_DIV			80
+
+/* bus */
+#define IMX8MM_CLK_MAIN_AXI			81
+#define IMX8MM_CLK_ENET_AXI			82
+#define IMX8MM_CLK_NAND_USDHC_BUS		83
+#define IMX8MM_CLK_VPU_BUS			84
+#define IMX8MM_CLK_DISP_AXI			85
+#define IMX8MM_CLK_DISP_APB			86
+#define IMX8MM_CLK_DISP_RTRM			87
+#define IMX8MM_CLK_USB_BUS			88
+#define IMX8MM_CLK_GPU_AXI			89
+#define IMX8MM_CLK_GPU_AHB			90
+#define IMX8MM_CLK_NOC				91
+#define IMX8MM_CLK_NOC_APB			92
+
+#define IMX8MM_CLK_AHB				93
+#define IMX8MM_CLK_AUDIO_AHB			94
+#define IMX8MM_CLK_IPG_ROOT			95
+#define IMX8MM_CLK_IPG_AUDIO_ROOT		96
+
+#define IMX8MM_CLK_DRAM_ALT			97
+#define IMX8MM_CLK_DRAM_APB			98
+#define IMX8MM_CLK_VPU_G1			99
+#define IMX8MM_CLK_VPU_G2			100
+#define IMX8MM_CLK_DISP_DTRC			101
+#define IMX8MM_CLK_DISP_DC8000			102
+#define IMX8MM_CLK_PCIE1_CTRL			103
+#define IMX8MM_CLK_PCIE1_PHY			104
+#define IMX8MM_CLK_PCIE1_AUX			105
+#define IMX8MM_CLK_DC_PIXEL			106
+#define IMX8MM_CLK_LCDIF_PIXEL			107
+#define IMX8MM_CLK_SAI1				108
+#define IMX8MM_CLK_SAI2				109
+#define IMX8MM_CLK_SAI3				110
+#define IMX8MM_CLK_SAI4				111
+#define IMX8MM_CLK_SAI5				112
+#define IMX8MM_CLK_SAI6				113
+#define IMX8MM_CLK_SPDIF1			114
+#define IMX8MM_CLK_SPDIF2			115
+#define IMX8MM_CLK_ENET_REF			116
+#define IMX8MM_CLK_ENET_TIMER			117
+#define IMX8MM_CLK_ENET_PHY_REF			118
+#define IMX8MM_CLK_NAND				119
+#define IMX8MM_CLK_QSPI				120
+#define IMX8MM_CLK_USDHC1			121
+#define IMX8MM_CLK_USDHC2			122
+#define IMX8MM_CLK_I2C1				123
+#define IMX8MM_CLK_I2C2				124
+#define IMX8MM_CLK_I2C3				125
+#define IMX8MM_CLK_I2C4				126
+#define IMX8MM_CLK_UART1			127
+#define IMX8MM_CLK_UART2			128
+#define IMX8MM_CLK_UART3			129
+#define IMX8MM_CLK_UART4			130
+#define IMX8MM_CLK_USB_CORE_REF			131
+#define IMX8MM_CLK_USB_PHY_REF			132
+#define IMX8MM_CLK_ECSPI1			133
+#define IMX8MM_CLK_ECSPI2			134
+#define IMX8MM_CLK_PWM1				135
+#define IMX8MM_CLK_PWM2				136
+#define IMX8MM_CLK_PWM3				137
+#define IMX8MM_CLK_PWM4				138
+#define IMX8MM_CLK_GPT1				139
+#define IMX8MM_CLK_WDOG				140
+#define IMX8MM_CLK_WRCLK			141
+#define IMX8MM_CLK_DSI_CORE			142
+#define IMX8MM_CLK_DSI_PHY_REF			143
+#define IMX8MM_CLK_DSI_DBI			144
+#define IMX8MM_CLK_USDHC3			145
+#define IMX8MM_CLK_CSI1_CORE			146
+#define IMX8MM_CLK_CSI1_PHY_REF			147
+#define IMX8MM_CLK_CSI1_ESC			148
+#define IMX8MM_CLK_CSI2_CORE			149
+#define IMX8MM_CLK_CSI2_PHY_REF			150
+#define IMX8MM_CLK_CSI2_ESC			151
+#define IMX8MM_CLK_PCIE2_CTRL			152
+#define IMX8MM_CLK_PCIE2_PHY			153
+#define IMX8MM_CLK_PCIE2_AUX			154
+#define IMX8MM_CLK_ECSPI3			155
+#define IMX8MM_CLK_PDM				156
+#define IMX8MM_CLK_VPU_H1			157
+#define IMX8MM_CLK_CLKO1			158
+
+#define IMX8MM_CLK_ECSPI1_ROOT			159
+#define IMX8MM_CLK_ECSPI2_ROOT			160
+#define IMX8MM_CLK_ECSPI3_ROOT			161
+#define IMX8MM_CLK_ENET1_ROOT			162
+#define IMX8MM_CLK_GPT1_ROOT			163
+#define IMX8MM_CLK_I2C1_ROOT			164
+#define IMX8MM_CLK_I2C2_ROOT			165
+#define IMX8MM_CLK_I2C3_ROOT			166
+#define IMX8MM_CLK_I2C4_ROOT			167
+#define IMX8MM_CLK_OCOTP_ROOT			168
+#define IMX8MM_CLK_PCIE1_ROOT			169
+#define IMX8MM_CLK_PWM1_ROOT			170
+#define IMX8MM_CLK_PWM2_ROOT			171
+#define IMX8MM_CLK_PWM3_ROOT			172
+#define IMX8MM_CLK_PWM4_ROOT			173
+#define IMX8MM_CLK_QSPI_ROOT			174
+#define IMX8MM_CLK_NAND_ROOT			175
+#define IMX8MM_CLK_SAI1_ROOT			176
+#define IMX8MM_CLK_SAI1_IPG			177
+#define IMX8MM_CLK_SAI2_ROOT			178
+#define IMX8MM_CLK_SAI2_IPG			179
+#define IMX8MM_CLK_SAI3_ROOT			180
+#define IMX8MM_CLK_SAI3_IPG			181
+#define IMX8MM_CLK_SAI4_ROOT			182
+#define IMX8MM_CLK_SAI4_IPG			183
+#define IMX8MM_CLK_SAI5_ROOT			184
+#define IMX8MM_CLK_SAI5_IPG			185
+#define IMX8MM_CLK_SAI6_ROOT			186
+#define IMX8MM_CLK_SAI6_IPG			187
+#define IMX8MM_CLK_UART1_ROOT			188
+#define IMX8MM_CLK_UART2_ROOT			189
+#define IMX8MM_CLK_UART3_ROOT			190
+#define IMX8MM_CLK_UART4_ROOT			191
+#define IMX8MM_CLK_USB1_CTRL_ROOT		192
+#define IMX8MM_CLK_GPU3D_ROOT			193
+#define IMX8MM_CLK_USDHC1_ROOT			194
+#define IMX8MM_CLK_USDHC2_ROOT			195
+#define IMX8MM_CLK_WDOG1_ROOT			196
+#define IMX8MM_CLK_WDOG2_ROOT			197
+#define IMX8MM_CLK_WDOG3_ROOT			198
+#define IMX8MM_CLK_VPU_G1_ROOT			199
+#define IMX8MM_CLK_GPU_BUS_ROOT			200
+#define IMX8MM_CLK_VPU_H1_ROOT			201
+#define IMX8MM_CLK_VPU_G2_ROOT			202
+#define IMX8MM_CLK_PDM_ROOT			203
+#define IMX8MM_CLK_DISP_ROOT			204
+#define IMX8MM_CLK_DISP_AXI_ROOT		205
+#define IMX8MM_CLK_DISP_APB_ROOT		206
+#define IMX8MM_CLK_DISP_RTRM_ROOT		207
+#define IMX8MM_CLK_USDHC3_ROOT			208
+#define IMX8MM_CLK_TMU_ROOT			209
+#define IMX8MM_CLK_VPU_DEC_ROOT			210
+#define IMX8MM_CLK_SDMA1_ROOT			211
+#define IMX8MM_CLK_SDMA2_ROOT			212
+#define IMX8MM_CLK_SDMA3_ROOT			213
+#define IMX8MM_CLK_GPT_3M			214
+#define IMX8MM_CLK_ARM				215
+#define IMX8MM_CLK_PDM_IPG			216
+#define IMX8MM_CLK_GPU2D_ROOT			217
+#define IMX8MM_CLK_MU_ROOT			218
+#define IMX8MM_CLK_CSI1_ROOT			219
+
+#define IMX8MM_CLK_DRAM_CORE			220
+#define IMX8MM_CLK_DRAM_ALT_ROOT		221
+
+#define IMX8MM_CLK_NAND_USDHC_BUS_RAWNAND_CLK	222
+
+#define IMX8MM_CLK_END				223
+
+#endif
-- 
cgit v1.2.3


From 202ce5afe51e883f94736310997ea1c73219eb36 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Wed, 30 Jan 2019 22:20:56 -0200
Subject: clk: imx8mq: Add support for the CLKO1 clock

Add the entry for the CLKO1 clock.

Signed-off-by: Fabio Estevam <festevam@gmail.com>
Reviewed-by: Abel Vesa <abel.vesa@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-imx8mq.c             | 3 +++
 include/dt-bindings/clock/imx8mq-clock.h | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c
index cd904764212b..897740f0a012 100644
--- a/drivers/clk/imx/clk-imx8mq.c
+++ b/drivers/clk/imx/clk-imx8mq.c
@@ -267,6 +267,8 @@ static const char * const imx8mq_ecspi3_sels[] = {"osc_25m", "sys2_pll_200m", "s
 					   "sys1_pll_800m", "sys3_pll2_out", "sys2_pll_250m", "audio_pll2_out", };
 static const char * const imx8mq_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", };
 
+static const char * const imx8mq_clko1_sels[] = {"osc_25m", "sys1_pll_800m", "osc_27m", "sys1_pll_200m",
+					  "audio_pll2_out", "sys2_pll_500m", "vpu_pll_out", "sys1_pll_80m", };
 static const char * const imx8mq_clko2_sels[] = {"osc_25m", "sys2_pll_200m", "sys1_pll_400m", "sys2_pll_166m",
 					  "sys3_pll2_out", "audio_pll1_out", "video_pll1_out", "ckil", };
 
@@ -486,6 +488,7 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MQ_CLK_GPT1] = imx8m_clk_composite("gpt1", imx8mq_gpt1_sels, base + 0xb580);
 	clks[IMX8MQ_CLK_WDOG] = imx8m_clk_composite("wdog", imx8mq_wdog_sels, base + 0xb900);
 	clks[IMX8MQ_CLK_WRCLK] = imx8m_clk_composite("wrclk", imx8mq_wrclk_sels, base + 0xb980);
+	clks[IMX8MQ_CLK_CLKO1] = imx8m_clk_composite("clko1", imx8mq_clko1_sels, base + 0xba00);
 	clks[IMX8MQ_CLK_CLKO2] = imx8m_clk_composite("clko2", imx8mq_clko2_sels, base + 0xba80);
 	clks[IMX8MQ_CLK_DSI_CORE] = imx8m_clk_composite("dsi_core", imx8mq_dsi_core_sels, base + 0xbb00);
 	clks[IMX8MQ_CLK_DSI_PHY_REF] = imx8m_clk_composite("dsi_phy_ref", imx8mq_dsi_phy_sels, base + 0xbb80);
diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h
index 04f7ac345984..74f80b26f679 100644
--- a/include/dt-bindings/clock/imx8mq-clock.h
+++ b/include/dt-bindings/clock/imx8mq-clock.h
@@ -391,5 +391,7 @@
 
 #define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK	267
 
-#define IMX8MQ_CLK_END				268
+#define IMX8MQ_CLK_CLKO1			268
+
+#define IMX8MQ_CLK_END				269
 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */
-- 
cgit v1.2.3


From 45a359e80d9b81b0fc3d9612053a86fc52a5f4b9 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@nxp.com>
Date: Fri, 15 Feb 2019 17:48:45 +0000
Subject: dt-bindings: imx8mq-clock: Add the missing ARM clock

Add the missing ARM clock which will be used by cpufreq

Signed-off-by: Abel Vesa <abel.vesa@nxp.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
[sboyd@kernel.org: Fixed numbering in dt header]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/imx8mq-clock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h
index 74f80b26f679..491e39c146be 100644
--- a/include/dt-bindings/clock/imx8mq-clock.h
+++ b/include/dt-bindings/clock/imx8mq-clock.h
@@ -392,6 +392,7 @@
 #define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK	267
 
 #define IMX8MQ_CLK_CLKO1			268
+#define IMX8MQ_CLK_ARM				269
 
-#define IMX8MQ_CLK_END				269
+#define IMX8MQ_CLK_END				270
 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */
-- 
cgit v1.2.3


From cd34499cacf3c34e2e094ff2a347b9378417970c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 18 Feb 2019 21:26:58 +0100
Subject: net: phy: export genphy_config_eee_advert

We want to use this function in phy-c45.c too, therefore export it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 3 ++-
 include/linux/phy.h          | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 4bb3b6c2894e..49fdd1ee798e 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1575,7 +1575,7 @@ static int genphy_config_advert(struct phy_device *phydev)
  *   efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't
  *   changed, and 1 if it has changed.
  */
-static int genphy_config_eee_advert(struct phy_device *phydev)
+int genphy_config_eee_advert(struct phy_device *phydev)
 {
 	int err;
 
@@ -1588,6 +1588,7 @@ static int genphy_config_eee_advert(struct phy_device *phydev)
 	/* If the call failed, we assume that EEE is not supported */
 	return err < 0 ? 0 : err;
 }
+EXPORT_SYMBOL(genphy_config_eee_advert);
 
 /**
  * genphy_setup_forced - configures/forces speed/duplex from @phydev
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3db507e68191..761131de4971 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1077,6 +1077,7 @@ void phy_attached_info(struct phy_device *phydev);
 int genphy_config_init(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
+int genphy_config_eee_advert(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
 int genphy_aneg_done(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
-- 
cgit v1.2.3


From 1af9f16840e920c6193490ae58371fc5cc28bb01 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 18 Feb 2019 21:27:18 +0100
Subject: net: phy: add genphy_c45_check_and_restart_aneg

This function will be used by config_aneg callback implementations of
PHY drivers and allows to reduce boilerplate code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 30 ++++++++++++++++++++++++++++++
 include/linux/phy.h       |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 2f5721430e05..fc3173cc078b 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -156,6 +156,36 @@ int genphy_c45_restart_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_restart_aneg);
 
+/**
+ * genphy_c45_check_and_restart_aneg - Enable and restart auto-negotiation
+ * @phydev: target phy_device struct
+ * @restart: whether aneg restart is requested
+ *
+ * This assumes that the auto-negotiation MMD is present.
+ *
+ * Check, and restart auto-negotiation if needed.
+ */
+int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart)
+{
+	int ret = 0;
+
+	if (!restart) {
+		/* Configure and restart aneg if it wasn't set before */
+		ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+		if (ret < 0)
+			return ret;
+
+		if (!(ret & MDIO_AN_CTRL1_ENABLE))
+			restart = true;
+	}
+
+	if (restart)
+		ret = genphy_c45_restart_aneg(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_check_and_restart_aneg);
+
 /**
  * genphy_c45_aneg_done - return auto-negotiation complete status
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 761131de4971..8e9fc576472b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1097,6 +1097,7 @@ int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
 
 /* Clause 45 PHY */
 int genphy_c45_restart_aneg(struct phy_device *phydev);
+int genphy_c45_check_and_restart_aneg(struct phy_device *phydev, bool restart);
 int genphy_c45_aneg_done(struct phy_device *phydev);
 int genphy_c45_read_link(struct phy_device *phydev);
 int genphy_c45_read_lpa(struct phy_device *phydev);
-- 
cgit v1.2.3


From ca8d4794f669e721fb5198f6d142e42dd8080239 Mon Sep 17 00:00:00 2001
From: Callum Sinclair <callum.sinclair@alliedtelesis.co.nz>
Date: Mon, 18 Feb 2019 10:07:52 +1300
Subject: ipmr: ip6mr: Create new sockopt to clear mfc cache or vifs

Currently the only way to clear the forwarding cache was to delete the
entries one by one using the MRT_DEL_MFC socket option or to destroy and
recreate the socket.

Create a new socket option which with the use of optional flags can
clear any combination of multicast entries (static or not static) and
multicast vifs (static or not static).

Calling the new socket option MRT_FLUSH with the flags MRT_FLUSH_MFC and
MRT_FLUSH_VIFS will clear all entries and vifs on the socket except for
static entries.

Signed-off-by: Callum Sinclair <callum.sinclair@alliedtelesis.co.nz>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mroute.h  |  9 ++++-
 include/uapi/linux/mroute6.h |  9 ++++-
 net/ipv4/ipmr.c              | 75 +++++++++++++++++++++++++++---------------
 net/ipv6/ip6mr.c             | 78 +++++++++++++++++++++++++++++---------------
 4 files changed, 115 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/mroute.h b/include/uapi/linux/mroute.h
index 5d37a9ccce63..11c8c1fc1124 100644
--- a/include/uapi/linux/mroute.h
+++ b/include/uapi/linux/mroute.h
@@ -28,12 +28,19 @@
 #define MRT_TABLE	(MRT_BASE+9)	/* Specify mroute table ID		*/
 #define MRT_ADD_MFC_PROXY	(MRT_BASE+10)	/* Add a (*,*|G) mfc entry	*/
 #define MRT_DEL_MFC_PROXY	(MRT_BASE+11)	/* Del a (*,*|G) mfc entry	*/
-#define MRT_MAX		(MRT_BASE+11)
+#define MRT_FLUSH	(MRT_BASE+12)	/* Flush all mfc entries and/or vifs	*/
+#define MRT_MAX		(MRT_BASE+12)
 
 #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
 #define SIOCGETRPF	(SIOCPROTOPRIVATE+2)
 
+/* MRT_FLUSH optional flags */
+#define MRT_FLUSH_MFC	1	/* Flush multicast entries */
+#define MRT_FLUSH_MFC_STATIC	2	/* Flush static multicast entries */
+#define MRT_FLUSH_VIFS	4	/* Flush multicast vifs */
+#define MRT_FLUSH_VIFS_STATIC	8	/* Flush static multicast vifs */
+
 #define MAXVIFS		32
 typedef unsigned long vifbitmap_t;	/* User mode code depends on this lot */
 typedef unsigned short vifi_t;
diff --git a/include/uapi/linux/mroute6.h b/include/uapi/linux/mroute6.h
index 9999cc006390..c36177a86516 100644
--- a/include/uapi/linux/mroute6.h
+++ b/include/uapi/linux/mroute6.h
@@ -31,12 +31,19 @@
 #define MRT6_TABLE	(MRT6_BASE+9)	/* Specify mroute table ID		*/
 #define MRT6_ADD_MFC_PROXY	(MRT6_BASE+10)	/* Add a (*,*|G) mfc entry	*/
 #define MRT6_DEL_MFC_PROXY	(MRT6_BASE+11)	/* Del a (*,*|G) mfc entry	*/
-#define MRT6_MAX	(MRT6_BASE+11)
+#define MRT6_FLUSH	(MRT6_BASE+12)	/* Flush all mfc entries and/or vifs	*/
+#define MRT6_MAX	(MRT6_BASE+12)
 
 #define SIOCGETMIFCNT_IN6	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT_IN6	(SIOCPROTOPRIVATE+1)
 #define SIOCGETRPF	(SIOCPROTOPRIVATE+2)
 
+/* MRT6_FLUSH optional flags */
+#define MRT6_FLUSH_MFC	1	/* Flush multicast entries */
+#define MRT6_FLUSH_MFC_STATIC	2	/* Flush static multicast entries */
+#define MRT6_FLUSH_MIFS	4	/* Flushing multicast vifs */
+#define MRT6_FLUSH_MIFS_STATIC	8	/* Flush static multicast vifs */
+
 #define MAXMIFS		32
 typedef unsigned long mifbitmap_t;	/* User mode code depends on this lot */
 typedef unsigned short mifi_t;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index e536970557dd..2c931120c494 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -110,7 +110,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
 static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -415,7 +415,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 static void ipmr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
+	mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC |
+				 MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC);
 	rhltable_destroy(&mrt->mfc_hash);
 	kfree(mrt);
 }
@@ -1296,7 +1297,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 }
 
 /* Close the multicast socket, and clear the vif tables etc */
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
 {
 	struct net *net = read_pnet(&mrt->net);
 	struct mr_mfc *c, *tmp;
@@ -1305,35 +1306,44 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
 	int i;
 
 	/* Shut down all active vif entries */
-	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
-			continue;
-		vif_delete(mrt, i, 0, &list);
+	if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) {
+		for (i = 0; i < mrt->maxvif; i++) {
+			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+			     !(flags & MRT_FLUSH_VIFS_STATIC)) ||
+			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS)))
+				continue;
+			vif_delete(mrt, i, 0, &list);
+		}
+		unregister_netdevice_many(&list);
 	}
-	unregister_netdevice_many(&list);
 
 	/* Wipe the cache */
-	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
-		if (!all && (c->mfc_flags & MFC_STATIC))
-			continue;
-		rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
-		list_del_rcu(&c->list);
-		cache = (struct mfc_cache *)c;
-		call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
-					      mrt->id);
-		mroute_netlink_event(mrt, cache, RTM_DELROUTE);
-		mr_cache_put(c);
-	}
-
-	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
-			list_del(&c->list);
+	if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) ||
+			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC)))
+				continue;
+			rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
+			list_del_rcu(&c->list);
 			cache = (struct mfc_cache *)c;
+			call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, cache,
+						      mrt->id);
 			mroute_netlink_event(mrt, cache, RTM_DELROUTE);
-			ipmr_destroy_unres(mrt, cache);
+			mr_cache_put(c);
+		}
+	}
+
+	if (flags & MRT_FLUSH_MFC) {
+		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+			spin_lock_bh(&mfc_unres_lock);
+			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+				list_del(&c->list);
+				cache = (struct mfc_cache *)c;
+				mroute_netlink_event(mrt, cache, RTM_DELROUTE);
+				ipmr_destroy_unres(mrt, cache);
+			}
+			spin_unlock_bh(&mfc_unres_lock);
 		}
-		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
@@ -1354,7 +1364,7 @@ static void mrtsock_destruct(struct sock *sk)
 						    NETCONFA_IFINDEX_ALL,
 						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
-			mroute_clean_tables(mrt, false);
+			mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC);
 		}
 	}
 	rtnl_unlock();
@@ -1479,6 +1489,17 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
 					   sk == rtnl_dereference(mrt->mroute_sk),
 					   parent);
 		break;
+	case MRT_FLUSH:
+		if (optlen != sizeof(val)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (get_user(val, (int __user *)optval)) {
+			ret = -EFAULT;
+			break;
+		}
+		mroute_clean_tables(mrt, val);
+		break;
 	/* Control PIM assert. */
 	case MRT_ASSERT:
 		if (optlen != sizeof(val)) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc01aa3f2b5e..3594f1d9c68c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
 static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
 static void ipmr_expire_process(struct timer_list *t);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
 static void ip6mr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt, true);
+	mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
+				 MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
 	rhltable_destroy(&mrt->mfc_hash);
 	kfree(mrt);
 }
@@ -1496,42 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
 {
 	struct mr_mfc *c, *tmp;
 	LIST_HEAD(list);
 	int i;
 
 	/* Shut down all active vif entries */
-	for (i = 0; i < mrt->maxvif; i++) {
-		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
-			continue;
-		mif6_delete(mrt, i, 0, &list);
+	if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
+		for (i = 0; i < mrt->maxvif; i++) {
+			if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+			     !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
+			    (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
+				continue;
+			mif6_delete(mrt, i, 0, &list);
+		}
+		unregister_netdevice_many(&list);
 	}
-	unregister_netdevice_many(&list);
 
 	/* Wipe the cache */
-	list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
-		if (!all && (c->mfc_flags & MFC_STATIC))
-			continue;
-		rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
-		list_del_rcu(&c->list);
-		call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
-					       FIB_EVENT_ENTRY_DEL,
-					       (struct mfc6_cache *)c, mrt->id);
-		mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
-		mr_cache_put(c);
+	if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
+		list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+			if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
+			    (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
+				continue;
+			rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+			list_del_rcu(&c->list);
+			call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+						       FIB_EVENT_ENTRY_DEL,
+						       (struct mfc6_cache *)c, mrt->id);
+			mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+			mr_cache_put(c);
+		}
 	}
 
-	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
-		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
-			list_del(&c->list);
-			mr6_netlink_event(mrt, (struct mfc6_cache *)c,
-					  RTM_DELROUTE);
-			ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+	if (flags & MRT6_FLUSH_MFC) {
+		if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+			spin_lock_bh(&mfc_unres_lock);
+			list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+				list_del(&c->list);
+				mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+						  RTM_DELROUTE);
+				ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+			}
+			spin_unlock_bh(&mfc_unres_lock);
 		}
-		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
@@ -1587,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk)
 						     NETCONFA_IFINDEX_ALL,
 						     net->ipv6.devconf_all);
 
-			mroute_clean_tables(mrt, false);
+			mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
 			err = 0;
 			break;
 		}
@@ -1703,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		rtnl_unlock();
 		return ret;
 
+	case MRT6_FLUSH:
+	{
+		int flags;
+
+		if (optlen != sizeof(flags))
+			return -EINVAL;
+		if (get_user(flags, (int __user *)optval))
+			return -EFAULT;
+		rtnl_lock();
+		mroute_clean_tables(mrt, flags);
+		rtnl_unlock();
+		return 0;
+	}
+
 	/*
 	 *	Control PIM assert (to activate pim will activate assert)
 	 */
-- 
cgit v1.2.3


From ed11aff3eef35440e23a7cf9692f84ab6233e527 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Mon, 21 Jan 2019 07:31:28 +0100
Subject: dt-bindings: marvell,mmp2: Add clock id for the LCDC clock

The peripheral clock is required for access the the LCDC registers. It
is in fact separate from the "AXI clock" that is optionally used to
generate the pixel clock and as such requires a separate clock id.

Link: https://lists.freedesktop.org/archives/dri-devel/2019-January/203975.html
Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/marvell,mmp2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/marvell,mmp2.h b/include/dt-bindings/clock/marvell,mmp2.h
index 228a5e234af0..e785c6eb3561 100644
--- a/include/dt-bindings/clock/marvell,mmp2.h
+++ b/include/dt-bindings/clock/marvell,mmp2.h
@@ -71,6 +71,7 @@
 #define MMP2_CLK_CCIC1_MIX		117
 #define MMP2_CLK_CCIC1_PHY		118
 #define MMP2_CLK_CCIC1_SPHY		119
+#define MMP2_CLK_DISP0_LCDC		120
 
 #define MMP2_NR_CLKS			200
 #endif
-- 
cgit v1.2.3


From 04053f4d23a41b9383dcea4da302fd43f673afb3 Mon Sep 17 00:00:00 2001
From: David Dai <daidavid1@codeaurora.org>
Date: Thu, 24 Jan 2019 16:47:00 -0800
Subject: clk: qcom: clk-rpmh: Add IPA clock support

The clk-rpmh driver only supports on and off RPMh clock resources. Let's
extend the driver by adding support for clocks that are managed by a
different type of RPMh resource known as Bus Clock Manager(BCM). The BCM
is a configurable shared resource aggregator that scales performance
based on a set of frequency points. The Qualcomm IP Accelerator (IPA)
clock is an example of a resource that is managed by the BCM and this a
requirement from the IPA driver in order to scale its core clock.

Signed-off-by: David Dai <daidavid1@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/clk-rpmh.c           | 146 ++++++++++++++++++++++++++++++++++
 include/dt-bindings/clock/qcom,rpmh.h |   1 +
 2 files changed, 147 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
index 9f4fc7773fb2..c3fd632af119 100644
--- a/drivers/clk/qcom/clk-rpmh.c
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -18,6 +18,31 @@
 #define CLK_RPMH_ARC_EN_OFFSET		0
 #define CLK_RPMH_VRM_EN_OFFSET		4
 
+#define BCM_TCS_CMD_COMMIT_MASK		0x40000000
+#define BCM_TCS_CMD_VALID_SHIFT		29
+#define BCM_TCS_CMD_VOTE_MASK		0x3fff
+#define BCM_TCS_CMD_VOTE_SHIFT		0
+
+#define BCM_TCS_CMD(valid, vote)				\
+	(BCM_TCS_CMD_COMMIT_MASK |				\
+	((valid) << BCM_TCS_CMD_VALID_SHIFT) |			\
+	((vote & BCM_TCS_CMD_VOTE_MASK)				\
+	<< BCM_TCS_CMD_VOTE_SHIFT))
+
+/**
+ * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager(BCM)
+ * @unit: divisor used to convert Hz value to an RPMh msg
+ * @width: multiplier used to convert Hz value to an RPMh msg
+ * @vcd: virtual clock domain that this bcm belongs to
+ * @reserved: reserved to pad the struct
+ */
+struct bcm_db {
+	__le32 unit;
+	__le16 width;
+	u8 vcd;
+	u8 reserved;
+};
+
 /**
  * struct clk_rpmh - individual rpmh clock data structure
  * @hw:			handle between common and hardware-specific interfaces
@@ -29,6 +54,7 @@
  * @aggr_state:		rpmh clock aggregated state
  * @last_sent_aggr_state: rpmh clock last aggr state sent to RPMh
  * @valid_state_mask:	mask to determine the state of the rpmh clock
+ * @unit:		divisor to convert rate to rpmh msg in magnitudes of Khz
  * @dev:		device to which it is attached
  * @peer:		pointer to the clock rpmh sibling
  */
@@ -42,6 +68,7 @@ struct clk_rpmh {
 	u32 aggr_state;
 	u32 last_sent_aggr_state;
 	u32 valid_state_mask;
+	u32 unit;
 	struct device *dev;
 	struct clk_rpmh *peer;
 };
@@ -98,6 +125,17 @@ static DEFINE_MUTEX(rpmh_clk_lock);
 	__DEFINE_CLK_RPMH(_platform, _name, _name_active, _res_name,	\
 			  CLK_RPMH_VRM_EN_OFFSET, 1, _div)
 
+#define DEFINE_CLK_RPMH_BCM(_platform, _name, _res_name)		\
+	static struct clk_rpmh _platform##_##_name = {			\
+		.res_name = _res_name,					\
+		.valid_state_mask = BIT(RPMH_ACTIVE_ONLY_STATE),	\
+		.div = 1,						\
+		.hw.init = &(struct clk_init_data){			\
+			.ops = &clk_rpmh_bcm_ops,			\
+			.name = #_name,					\
+		},							\
+	}
+
 static inline struct clk_rpmh *to_clk_rpmh(struct clk_hw *_hw)
 {
 	return container_of(_hw, struct clk_rpmh, hw);
@@ -210,6 +248,96 @@ static const struct clk_ops clk_rpmh_ops = {
 	.recalc_rate	= clk_rpmh_recalc_rate,
 };
 
+static int clk_rpmh_bcm_send_cmd(struct clk_rpmh *c, bool enable)
+{
+	struct tcs_cmd cmd = { 0 };
+	u32 cmd_state;
+	int ret;
+
+	mutex_lock(&rpmh_clk_lock);
+
+	cmd_state = 0;
+	if (enable) {
+		cmd_state = 1;
+		if (c->aggr_state)
+			cmd_state = c->aggr_state;
+	}
+
+	if (c->last_sent_aggr_state == cmd_state) {
+		mutex_unlock(&rpmh_clk_lock);
+		return 0;
+	}
+
+	cmd.addr = c->res_addr;
+	cmd.data = BCM_TCS_CMD(enable, cmd_state);
+
+	ret = rpmh_write_async(c->dev, RPMH_ACTIVE_ONLY_STATE, &cmd, 1);
+	if (ret) {
+		dev_err(c->dev, "set active state of %s failed: (%d)\n",
+			c->res_name, ret);
+		mutex_unlock(&rpmh_clk_lock);
+		return ret;
+	}
+
+	c->last_sent_aggr_state = cmd_state;
+
+	mutex_unlock(&rpmh_clk_lock);
+
+	return 0;
+}
+
+static int clk_rpmh_bcm_prepare(struct clk_hw *hw)
+{
+	struct clk_rpmh *c = to_clk_rpmh(hw);
+
+	return clk_rpmh_bcm_send_cmd(c, true);
+};
+
+static void clk_rpmh_bcm_unprepare(struct clk_hw *hw)
+{
+	struct clk_rpmh *c = to_clk_rpmh(hw);
+
+	clk_rpmh_bcm_send_cmd(c, false);
+};
+
+static int clk_rpmh_bcm_set_rate(struct clk_hw *hw, unsigned long rate,
+				 unsigned long parent_rate)
+{
+	struct clk_rpmh *c = to_clk_rpmh(hw);
+
+	c->aggr_state = rate / c->unit;
+	/*
+	 * Since any non-zero value sent to hw would result in enabling the
+	 * clock, only send the value if the clock has already been prepared.
+	 */
+	if (clk_hw_is_prepared(hw))
+		clk_rpmh_bcm_send_cmd(c, true);
+
+	return 0;
+};
+
+static long clk_rpmh_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *parent_rate)
+{
+	return rate;
+}
+
+static unsigned long clk_rpmh_bcm_recalc_rate(struct clk_hw *hw,
+					unsigned long prate)
+{
+	struct clk_rpmh *c = to_clk_rpmh(hw);
+
+	return c->aggr_state * c->unit;
+}
+
+static const struct clk_ops clk_rpmh_bcm_ops = {
+	.prepare	= clk_rpmh_bcm_prepare,
+	.unprepare	= clk_rpmh_bcm_unprepare,
+	.set_rate	= clk_rpmh_bcm_set_rate,
+	.round_rate	= clk_rpmh_round_rate,
+	.recalc_rate	= clk_rpmh_bcm_recalc_rate,
+};
+
 /* Resource name must match resource id present in cmd-db. */
 DEFINE_CLK_RPMH_ARC(sdm845, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 2);
 DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk2, ln_bb_clk2_ao, "lnbclka2", 2);
@@ -217,6 +345,7 @@ DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk3, ln_bb_clk3_ao, "lnbclka3", 2);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk1, rf_clk1_ao, "rfclka1", 1);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk2, rf_clk2_ao, "rfclka2", 1);
 DEFINE_CLK_RPMH_VRM(sdm845, rf_clk3, rf_clk3_ao, "rfclka3", 1);
+DEFINE_CLK_RPMH_BCM(sdm845, ipa, "IP0");
 
 static struct clk_hw *sdm845_rpmh_clocks[] = {
 	[RPMH_CXO_CLK]		= &sdm845_bi_tcxo.hw,
@@ -231,6 +360,7 @@ static struct clk_hw *sdm845_rpmh_clocks[] = {
 	[RPMH_RF_CLK2_A]	= &sdm845_rf_clk2_ao.hw,
 	[RPMH_RF_CLK3]		= &sdm845_rf_clk3.hw,
 	[RPMH_RF_CLK3_A]	= &sdm845_rf_clk3_ao.hw,
+	[RPMH_IPA_CLK]		= &sdm845_ipa.hw,
 };
 
 static const struct clk_rpmh_desc clk_rpmh_sdm845 = {
@@ -267,6 +397,8 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 
 	for (i = 0; i < desc->num_clks; i++) {
 		u32 res_addr;
+		size_t aux_data_len;
+		const struct bcm_db *data;
 
 		rpmh_clk = to_clk_rpmh(hw_clks[i]);
 		res_addr = cmd_db_read_addr(rpmh_clk->res_name);
@@ -275,6 +407,20 @@ static int clk_rpmh_probe(struct platform_device *pdev)
 				rpmh_clk->res_name);
 			return -ENODEV;
 		}
+
+		data = cmd_db_read_aux_data(rpmh_clk->res_name, &aux_data_len);
+		if (IS_ERR(data)) {
+			ret = PTR_ERR(data);
+			dev_err(&pdev->dev,
+				"error reading RPMh aux data for %s (%d)\n",
+				rpmh_clk->res_name, ret);
+			return ret;
+		}
+
+		/* Convert unit from Khz to Hz */
+		if (aux_data_len == sizeof(*data))
+			rpmh_clk->unit = le32_to_cpu(data->unit) * 1000ULL;
+
 		rpmh_clk->res_addr += res_addr;
 		rpmh_clk->dev = &pdev->dev;
 
diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h
index f48fbd6f2095..edcab3f7b7d3 100644
--- a/include/dt-bindings/clock/qcom,rpmh.h
+++ b/include/dt-bindings/clock/qcom,rpmh.h
@@ -18,5 +18,6 @@
 #define RPMH_RF_CLK2_A				9
 #define RPMH_RF_CLK3				10
 #define RPMH_RF_CLK3_A				11
+#define RPMH_IPA_CLK				12
 
 #endif
-- 
cgit v1.2.3


From 585fc46bd47a143ee7433768e298a7f94008e5f5 Mon Sep 17 00:00:00 2001
From: Gabriel Fernandez <gabriel.fernandez@st.com>
Date: Thu, 14 Feb 2019 11:42:42 +0100
Subject: dt-bindings: clock: remove unused definition for stm32mp1

A copy of LTDC_PX and ETHCK_K (LTDC_K and ETHMAC_K) was introduced in
stm32mp1 dt-bindings file by mistake.
These bindings are not used and shouldn't be use to be conform with
convention name of the stm32mp1 clock IP.

Signed-off-by: Gabriel Fernandez <gabriel.fernandez@st.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/stm32mp1-clks.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/stm32mp1-clks.h b/include/dt-bindings/clock/stm32mp1-clks.h
index 90ec780bfc68..4cdaf135829c 100644
--- a/include/dt-bindings/clock/stm32mp1-clks.h
+++ b/include/dt-bindings/clock/stm32mp1-clks.h
@@ -248,7 +248,4 @@
 
 #define STM32MP1_LAST_CLK 232
 
-#define LTDC_K		LTDC_PX
-#define ETHMAC_K	ETHCK_K
-
 #endif /* _DT_BINDINGS_STM32MP1_CLKS_H_ */
-- 
cgit v1.2.3


From 57652796aa979d5754406c8177f716cb2cf60616 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Wed, 20 Feb 2019 15:35:04 -0800
Subject: net: dsa: add support for bridge flags

The Linux bridge implementation allows various properties of the bridge
to be controlled, such as flooding unknown unicast and multicast frames.
This patch adds the necessary DSA infrastructure to allow the Linux
bridge support to control these properties for DSA switches.

Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
[florian: Add missing dp and ds variables declaration to fix build]
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  2 ++
 net/dsa/dsa_priv.h |  2 ++
 net/dsa/port.c     | 17 +++++++++++++++++
 net/dsa/slave.c    |  9 +++++++++
 4 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 7f2a668ef2cc..2c2c10812814 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -400,6 +400,8 @@ struct dsa_switch_ops {
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
 	void	(*port_fast_age)(struct dsa_switch *ds, int port);
+	int	(*port_egress_floods)(struct dsa_switch *ds, int port,
+				      bool unicast, bool multicast);
 
 	/*
 	 * VLAN support
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 1f4972dab9f2..f4f99ec29f5d 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -160,6 +160,8 @@ int dsa_port_mdb_add(const struct dsa_port *dp,
 		     struct switchdev_trans *trans);
 int dsa_port_mdb_del(const struct dsa_port *dp,
 		     const struct switchdev_obj_port_mdb *mdb);
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			  struct switchdev_trans *trans);
 int dsa_port_vlan_add(struct dsa_port *dp,
 		      const struct switchdev_obj_port_vlan *vlan,
 		      struct switchdev_trans *trans);
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2d7e01b23572..6df29bddf37e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -177,6 +177,23 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 	return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info);
 }
 
+int dsa_port_bridge_flags(const struct dsa_port *dp, unsigned long flags,
+			  struct switchdev_trans *trans)
+{
+	struct dsa_switch *ds = dp->ds;
+	int port = dp->index;
+	int err = 0;
+
+	if (switchdev_trans_ph_prepare(trans))
+		return 0;
+
+	if (ds->ops->port_egress_floods)
+		err = ds->ops->port_egress_floods(ds, port, flags & BR_FLOOD,
+						  flags & BR_MCAST_FLOOD);
+
+	return err;
+}
+
 int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
 		     u16 vid)
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2e5e7c04821b..85dc68611002 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -295,6 +295,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
 	case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
 		ret = dsa_port_ageing_time(dp, attr->u.ageing_time, trans);
 		break;
+	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+		ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, trans);
+		break;
 	default:
 		ret = -EOPNOTSUPP;
 		break;
@@ -381,9 +384,15 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev,
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
+
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
 		attr->u.brport_flags_support = 0;
+		if (ds->ops->port_egress_floods)
+			attr->u.brport_flags_support |= BR_FLOOD |
+							BR_MCAST_FLOOD;
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 746dc184ba59aa2c843b9e888be3f1302d227b74 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Feb 2019 16:58:19 -0800
Subject: net: switchdev: Add PORT_PRE_BRIDGE_FLAGS

In preparation for removing switchdev_port_attr_get(), introduce
PORT_PRE_BRIDGE_FLAGS which will be called through
switchdev_port_attr_set(), in the caller's context (possibly atomic) and
which must be checked by the switchdev driver in order to return whether
the operation is supported or not.

This is entirely analoguous to how the BRIDGE_FLAGS_SUPPORT works,
except it goes through a set() instead of get().

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 5e87b54c5dc5..de72b0a3867f 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -46,6 +46,7 @@ enum switchdev_attr_id {
 	SWITCHDEV_ATTR_ID_PORT_STP_STATE,
 	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
 	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
+	SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
 	SWITCHDEV_ATTR_ID_PORT_MROUTER,
 	SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
 	SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
@@ -61,7 +62,7 @@ struct switchdev_attr {
 	void (*complete)(struct net_device *dev, int err, void *priv);
 	union {
 		u8 stp_state;				/* PORT_STP_STATE */
-		unsigned long brport_flags;		/* PORT_BRIDGE_FLAGS */
+		unsigned long brport_flags;		/* PORT_{PRE}_BRIDGE_FLAGS */
 		unsigned long brport_flags_support;	/* PORT_BRIDGE_FLAGS_SUPPORT */
 		bool mrouter;				/* PORT_MROUTER */
 		clock_t ageing_time;			/* BRIDGE_AGEING_TIME */
-- 
cgit v1.2.3


From cc0c207a5d18333fbfecc964a47ddb182fbcb720 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Feb 2019 16:58:25 -0800
Subject: net: Remove SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT

Now that we have converted the bridge code and the drivers to check for
bridge port(s) flags at the time we try to set them, there is no need
for a get() -> set() sequence anymore and
SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT therefore becomes unused.

Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt                   |  6 ++----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 11 +----------
 drivers/net/ethernet/rocker/rocker_main.c                | 14 +-------------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c                  | 10 +---------
 include/net/switchdev.h                                  |  2 --
 net/dsa/slave.c                                          | 16 +---------------
 6 files changed, 6 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index f3244d87512a..79c8b0f16aee 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -232,10 +232,8 @@ Learning_sync attribute enables syncing of the learned/forgotten FDB entry to
 the bridge's FDB.  It's possible, but not optimal, to enable learning on the
 device port and on the bridge port, and disable learning_sync.
 
-To support learning and learning_sync port attributes, the driver implements
-switchdev op switchdev_port_attr_get/set for
-SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS. The driver should initialize the attributes
-to the hardware defaults.
+To support learning, the driver implements switchdev op
+switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS.
 
 FDB Ageing
 ^^^^^^^^^^
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 9a8798f74d2b..bbb5a406232e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -434,16 +434,7 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan)
 static int mlxsw_sp_port_attr_get(struct net_device *dev,
 				  struct switchdev_attr *attr)
 {
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
-					       BR_MCAST_FLOOD;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static int
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 25129f7b5583..6b8273e2057d 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2069,19 +2069,7 @@ static const struct net_device_ops rocker_port_netdev_ops = {
 static int rocker_port_attr_get(struct net_device *dev,
 				struct switchdev_attr *attr)
 {
-	const struct rocker_port *rocker_port = netdev_priv(dev);
-	int err = 0;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		err = rocker_world_port_attr_bridge_flags_support_get(rocker_port,
-								      &attr->u.brport_flags_support);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return err;
+	return -EOPNOTSUPP;
 }
 
 static int rocker_port_attr_set(struct net_device *dev,
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index 331625137717..de4dcabbc29a 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -643,15 +643,7 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev)
 static int swdev_port_attr_get(struct net_device *netdev,
 			       struct switchdev_attr *attr)
 {
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static int port_attr_stp_state_set(struct net_device *netdev,
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index de72b0a3867f..0f352019ef99 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -45,7 +45,6 @@ enum switchdev_attr_id {
 	SWITCHDEV_ATTR_ID_UNDEFINED,
 	SWITCHDEV_ATTR_ID_PORT_STP_STATE,
 	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
-	SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT,
 	SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS,
 	SWITCHDEV_ATTR_ID_PORT_MROUTER,
 	SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
@@ -63,7 +62,6 @@ struct switchdev_attr {
 	union {
 		u8 stp_state;				/* PORT_STP_STATE */
 		unsigned long brport_flags;		/* PORT_{PRE}_BRIDGE_FLAGS */
-		unsigned long brport_flags_support;	/* PORT_BRIDGE_FLAGS_SUPPORT */
 		bool mrouter;				/* PORT_MROUTER */
 		clock_t ageing_time;			/* BRIDGE_AGEING_TIME */
 		bool vlan_filtering;			/* BRIDGE_VLAN_FILTERING */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 44cc4e50dd5a..db0a2651070f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -388,21 +388,7 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev,
 static int dsa_slave_port_attr_get(struct net_device *dev,
 				   struct switchdev_attr *attr)
 {
-	struct dsa_port *dp = dsa_slave_to_port(dev);
-	struct dsa_switch *ds = dp->ds;
-
-	switch (attr->id) {
-	case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
-		attr->u.brport_flags_support = 0;
-		if (ds->ops->port_egress_floods)
-			attr->u.brport_flags_support |= BR_FLOOD |
-							BR_MCAST_FLOOD;
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
-- 
cgit v1.2.3


From 010c8f01aa7fe18ea97d302e1c7e9ca83bc27433 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Feb 2019 16:58:26 -0800
Subject: net: Get rid of switchdev_port_attr_get()

With the bridge no longer calling switchdev_port_attr_get() to obtain
the supported bridge port flags from a driver but instead trying to set
the bridge port flags directly and relying on driver to reject
unsupported configurations, we can effectively get rid of
switchdev_port_attr_get() entirely since this was the only place where
it was called.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt                   | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 7 -------
 drivers/net/ethernet/rocker/rocker_main.c                | 7 -------
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c                  | 7 -------
 include/net/switchdev.h                                  | 8 --------
 net/dsa/slave.c                                          | 7 -------
 6 files changed, 1 insertion(+), 37 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 79c8b0f16aee..413abbae952f 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -233,7 +233,7 @@ the bridge's FDB.  It's possible, but not optimal, to enable learning on the
 device port and on the bridge port, and disable learning_sync.
 
 To support learning, the driver implements switchdev op
-switchdev_port_attr_get/set for SWITCHDEV_ATTR_PORT_ID_BRIDGE_FLAGS.
+switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_{PRE}_BRIDGE_FLAGS.
 
 FDB Ageing
 ^^^^^^^^^^
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index bbb5a406232e..766f5b5f1cf5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -431,12 +431,6 @@ static void mlxsw_sp_bridge_vlan_put(struct mlxsw_sp_bridge_vlan *bridge_vlan)
 		mlxsw_sp_bridge_vlan_destroy(bridge_vlan);
 }
 
-static int mlxsw_sp_port_attr_get(struct net_device *dev,
-				  struct switchdev_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-
 static int
 mlxsw_sp_port_bridge_vlan_stp_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  struct mlxsw_sp_bridge_vlan *bridge_vlan,
@@ -1945,7 +1939,6 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 }
 
 static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
-	.switchdev_port_attr_get	= mlxsw_sp_port_attr_get,
 	.switchdev_port_attr_set	= mlxsw_sp_port_attr_set,
 };
 
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 6b8273e2057d..8200fbf91306 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2066,12 +2066,6 @@ static const struct net_device_ops rocker_port_netdev_ops = {
  * swdev interface
  ********************/
 
-static int rocker_port_attr_get(struct net_device *dev,
-				struct switchdev_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-
 static int rocker_port_attr_set(struct net_device *dev,
 				const struct switchdev_attr *attr,
 				struct switchdev_trans *trans)
@@ -2148,7 +2142,6 @@ static int rocker_port_obj_del(struct net_device *dev,
 }
 
 static const struct switchdev_ops rocker_port_switchdev_ops = {
-	.switchdev_port_attr_get	= rocker_port_attr_get,
 	.switchdev_port_attr_set	= rocker_port_attr_set,
 };
 
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index de4dcabbc29a..018399ee8731 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -640,12 +640,6 @@ static void ethsw_teardown_irqs(struct fsl_mc_device *sw_dev)
 	fsl_mc_free_irqs(sw_dev);
 }
 
-static int swdev_port_attr_get(struct net_device *netdev,
-			       struct switchdev_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-
 static int port_attr_stp_state_set(struct net_device *netdev,
 				   struct switchdev_trans *trans,
 				   u8 state)
@@ -932,7 +926,6 @@ static int swdev_port_obj_del(struct net_device *netdev,
 }
 
 static const struct switchdev_ops ethsw_port_switchdev_ops = {
-	.switchdev_port_attr_get	= swdev_port_attr_get,
 	.switchdev_port_attr_set	= swdev_port_attr_set,
 };
 
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 0f352019ef99..45310ddf2d7e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -179,8 +179,6 @@ switchdev_notifier_info_to_extack(const struct switchdev_notifier_info *info)
 #ifdef CONFIG_NET_SWITCHDEV
 
 void switchdev_deferred_process(void);
-int switchdev_port_attr_get(struct net_device *dev,
-			    struct switchdev_attr *attr);
 int switchdev_port_attr_set(struct net_device *dev,
 			    const struct switchdev_attr *attr);
 int switchdev_port_obj_add(struct net_device *dev,
@@ -225,12 +223,6 @@ static inline void switchdev_deferred_process(void)
 {
 }
 
-static inline int switchdev_port_attr_get(struct net_device *dev,
-					  struct switchdev_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline int switchdev_port_attr_set(struct net_device *dev,
 					  const struct switchdev_attr *attr)
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index db0a2651070f..a78b2bba0332 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -385,12 +385,6 @@ static int dsa_slave_get_port_parent_id(struct net_device *dev,
 	return 0;
 }
 
-static int dsa_slave_port_attr_get(struct net_device *dev,
-				   struct switchdev_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
 						     struct sk_buff *skb)
 {
@@ -1057,7 +1051,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 };
 
 static const struct switchdev_ops dsa_slave_switchdev_ops = {
-	.switchdev_port_attr_get	= dsa_slave_port_attr_get,
 	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
 };
 
-- 
cgit v1.2.3


From 0c8c53e033d278cad09a81ec1c46a401a6a6b1af Mon Sep 17 00:00:00 2001
From: Edgar Bernardi Righi <edgar.righi@lsitec.org.br>
Date: Tue, 15 Jan 2019 09:03:36 +0530
Subject: dt-bindings: clock: Add DT bindings for Actions Semi S500 CMU

Add devicetree bindings for Actions Semi S500 Clock Management Unit.

Signed-off-by: Edgar Bernardi Righi <edgar.righi@lsitec.org.br>
[Mani: Documented S500 CMU compatible]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
[sboyd@kernel.org: Fix SPDX comment style in header file]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/actions,owl-cmu.txt  |  7 +-
 include/dt-bindings/clock/actions,s500-cmu.h       | 78 ++++++++++++++++++++++
 2 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 include/dt-bindings/clock/actions,s500-cmu.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt b/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
index 2ef86ae96df8..d19885b7c73f 100644
--- a/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
+++ b/Documentation/devicetree/bindings/clock/actions,owl-cmu.txt
@@ -2,13 +2,14 @@
 
 The Actions Semi Owl Clock Management Unit generates and supplies clock
 to various controllers within the SoC. The clock binding described here is
-applicable to S900 and S700 SoC's.
+applicable to S900, S700 and S500 SoC's.
 
 Required Properties:
 
 - compatible: should be one of the following,
 	"actions,s900-cmu"
 	"actions,s700-cmu"
+	"actions,s500-cmu"
 - reg: physical base address of the controller and length of memory mapped
   region.
 - clocks: Reference to the parent clocks ("hosc", "losc")
@@ -19,8 +20,8 @@ Each clock is assigned an identifier, and client nodes can use this identifier
 to specify the clock which they consume.
 
 All available clocks are defined as preprocessor macros in corresponding
-dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h header and can be
-used in device tree sources.
+dt-bindings/clock/actions,s900-cmu.h or actions,s700-cmu.h or
+actions,s500-cmu.h header and can be used in device tree sources.
 
 External clocks:
 
diff --git a/include/dt-bindings/clock/actions,s500-cmu.h b/include/dt-bindings/clock/actions,s500-cmu.h
new file mode 100644
index 000000000000..030981cd2d56
--- /dev/null
+++ b/include/dt-bindings/clock/actions,s500-cmu.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Device Tree binding constants for Actions Semi S500 Clock Management Unit
+ *
+ * Copyright (c) 2014 Actions Semi Inc.
+ * Copyright (c) 2018 LSI-TEC - Caninos Loucos
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_S500_CMU_H
+#define __DT_BINDINGS_CLOCK_S500_CMU_H
+
+#define CLK_NONE		0
+
+/* fixed rate clocks */
+#define CLK_LOSC		1
+#define CLK_HOSC		2
+
+/* pll clocks */
+#define CLK_CORE_PLL		3
+#define CLK_DEV_PLL		4
+#define CLK_DDR_PLL		5
+#define CLK_NAND_PLL		6
+#define CLK_DISPLAY_PLL		7
+#define CLK_ETHERNET_PLL	8
+#define CLK_AUDIO_PLL		9
+
+/* system clock */
+#define CLK_DEV			10
+#define CLK_H			11
+#define CLK_AHBPREDIV		12
+#define CLK_AHB			13
+#define CLK_DE			14
+#define CLK_BISP		15
+#define CLK_VCE			16
+#define CLK_VDE			17
+
+/* peripheral device clock */
+#define CLK_TIMER		18
+#define CLK_I2C0		19
+#define CLK_I2C1		20
+#define CLK_I2C2		21
+#define CLK_I2C3		22
+#define CLK_PWM0		23
+#define CLK_PWM1		24
+#define CLK_PWM2		25
+#define CLK_PWM3		26
+#define CLK_PWM4		27
+#define CLK_PWM5		28
+#define CLK_SD0			29
+#define CLK_SD1			30
+#define CLK_SD2			31
+#define CLK_SENSOR0		32
+#define CLK_SENSOR1		33
+#define CLK_SPI0		34
+#define CLK_SPI1		35
+#define CLK_SPI2		36
+#define CLK_SPI3		37
+#define CLK_UART0		38
+#define CLK_UART1		39
+#define CLK_UART2		40
+#define CLK_UART3		41
+#define CLK_UART4		42
+#define CLK_UART5		43
+#define CLK_UART6		44
+#define CLK_DE1			45
+#define CLK_DE2			46
+#define CLK_I2SRX		47
+#define CLK_I2STX		48
+#define CLK_HDMI_AUDIO		49
+#define CLK_HDMI		50
+#define CLK_SPDIF		51
+#define CLK_NAND		52
+#define CLK_ECC			53
+#define CLK_RMII_REF		54
+
+#define CLK_NR_CLKS	       (CLK_RMII_REF + 1)
+
+#endif /* __DT_BINDINGS_CLOCK_S500_CMU_H */
-- 
cgit v1.2.3


From d13501a2bedfbea0983cc868d3f1dc692627f60d Mon Sep 17 00:00:00 2001
From: Katsuhiro Suzuki <katsuhiro@katsuster.net>
Date: Mon, 11 Feb 2019 00:38:06 +0900
Subject: clk: fractional-divider: check parent rate only if flag is set

Custom approximation of fractional-divider may not need parent clock
rate checking. For example Rockchip SoCs work fine using grand parent
clock rate even if target rate is greater than parent.

This patch checks parent clock rate only if CLK_SET_RATE_PARENT flag
is set.

For detailed example, clock tree of Rockchip I2S audio hardware.
  - Clock rate of CPLL is 1.2GHz, GPLL is 491.52MHz.
  - i2s1_div is integer divider can divide N (N is 1~128).
    Input clock is CPLL or GPLL. Initial divider value is N = 1.
    Ex) PLL = CPLL, N = 10, i2s1_div output rate is
      CPLL / 10 = 1.2GHz / 10 = 120MHz
  - i2s1_frac is fractional divider can divide input to x/y, x and
    y are 16bit integer.

CPLL --> | selector | ---> i2s1_div -+--> | selector | --> I2S1 MCLK
GPLL --> |          | ,--------------'    |          |
                      `--> i2s1_frac ---> |          |

Clock mux system try to choose suitable one from i2s1_div and
i2s1_frac for master clock (MCLK) of I2S1.

Bad scenario as follows:
  - Try to set MCLK to 8.192MHz (32kHz audio replay)
    Candidate setting is
    - i2s1_div: GPLL / 60 = 8.192MHz
    i2s1_div candidate is exactly same as target clock rate, so mux
    choose this clock source. i2s1_div output rate is changed
    491.52MHz -> 8.192MHz

  - After that try to set to 11.2896MHz (44.1kHz audio replay)
    Candidate settings are
    - i2s1_div : CPLL / 107 = 11.214945MHz
    - i2s1_frac: i2s1_div   = 8.192MHz
      This is because clk_fd_round_rate() thinks target rate
      (11.2896MHz) is higher than parent rate (i2s1_div = 8.192MHz)
      and returns parent clock rate.

Above is current upstreamed behavior. Clock mux system choose
i2s1_div, but this clock rate is not acceptable for I2S driver, so
users cannot replay audio.

Expected behavior is:
  - Try to set master clock to 11.2896MHz (44.1kHz audio replay)
    Candidate settings are
    - i2s1_div : CPLL / 107          = 11.214945MHz
    - i2s1_frac: i2s1_div * 147/6400 = 11.2896MHz
                 Change i2s1_div to GPLL / 1 = 491.52MHz at same
                 time.

If apply this commit, clk_fd_round_rate() calls custom approximate
function of Rockchip even if target rate is higher than parent.
Custom function changes both grand parent (i2s1_div) and parent
(i2s_frac) settings at same time. Clock mux system can choose
i2s1_frac and audio works fine.

Signed-off-by: Katsuhiro Suzuki <katsuhiro@katsuster.net>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
[sboyd@kernel.org: Make function into a macro instead]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-fractional-divider.c | 2 +-
 include/linux/clk-provider.h         | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
index 545dceec0bbf..fdfe2e423d15 100644
--- a/drivers/clk/clk-fractional-divider.c
+++ b/drivers/clk/clk-fractional-divider.c
@@ -79,7 +79,7 @@ static long clk_fd_round_rate(struct clk_hw *hw, unsigned long rate,
 	unsigned long m, n;
 	u64 ret;
 
-	if (!rate || rate >= *parent_rate)
+	if (!rate || (!clk_hw_can_set_rate_parent(hw) && rate >= *parent_rate))
 		return *parent_rate;
 
 	if (fd->approximation)
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index e443fa9fa859..b7cf80a71293 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -792,6 +792,9 @@ unsigned int __clk_get_enable_count(struct clk *clk);
 unsigned long clk_hw_get_rate(const struct clk_hw *hw);
 unsigned long __clk_get_flags(struct clk *clk);
 unsigned long clk_hw_get_flags(const struct clk_hw *hw);
+#define clk_hw_can_set_rate_parent(hw) \
+	(clk_hw_get_flags((hw)) & CLK_SET_RATE_PARENT)
+
 bool clk_hw_is_prepared(const struct clk_hw *hw);
 bool clk_hw_rate_is_protected(const struct clk_hw *hw);
 bool clk_hw_is_enabled(const struct clk_hw *hw);
-- 
cgit v1.2.3


From a9443a63283ae7eb78f735341da22bc3a69a464d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 18 Feb 2019 22:34:15 +0300
Subject: clk: x86: Move clk-lpss.h to platform_data/x86

clk-lpss.h is solely x86 related header. Move it to correct folder.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/acpi/acpi_lpss.c                   |  2 +-
 drivers/clk/x86/clk-lpt.c                  |  2 +-
 include/linux/platform_data/clk-lpss.h     | 23 -----------------------
 include/linux/platform_data/x86/clk-lpss.h | 23 +++++++++++++++++++++++
 4 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 include/linux/platform_data/clk-lpss.h
 create mode 100644 include/linux/platform_data/x86/clk-lpss.h

(limited to 'include')

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 5f94c35d165f..1e2a10a06b9d 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -18,7 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/clk-lpss.h>
+#include <linux/platform_data/x86/clk-lpss.h>
 #include <linux/platform_data/x86/pmc_atom.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/clk/x86/clk-lpt.c b/drivers/clk/x86/clk-lpt.c
index 6b40eb89ae19..68bd3abaef2c 100644
--- a/drivers/clk/x86/clk-lpt.c
+++ b/drivers/clk/x86/clk-lpt.c
@@ -13,7 +13,7 @@
 #include <linux/clk-provider.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/platform_data/clk-lpss.h>
+#include <linux/platform_data/x86/clk-lpss.h>
 #include <linux/platform_device.h>
 
 static int lpt_clk_probe(struct platform_device *pdev)
diff --git a/include/linux/platform_data/clk-lpss.h b/include/linux/platform_data/clk-lpss.h
deleted file mode 100644
index 23901992b9dd..000000000000
--- a/include/linux/platform_data/clk-lpss.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Intel Low Power Subsystem clocks.
- *
- * Copyright (C) 2013, Intel Corporation
- * Authors: Mika Westerberg <mika.westerberg@linux.intel.com>
- *          Rafael J. Wysocki <rafael.j.wysocki@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __CLK_LPSS_H
-#define __CLK_LPSS_H
-
-struct lpss_clk_data {
-	const char *name;
-	struct clk *clk;
-};
-
-extern int lpt_clk_init(void);
-
-#endif /* __CLK_LPSS_H */
diff --git a/include/linux/platform_data/x86/clk-lpss.h b/include/linux/platform_data/x86/clk-lpss.h
new file mode 100644
index 000000000000..23901992b9dd
--- /dev/null
+++ b/include/linux/platform_data/x86/clk-lpss.h
@@ -0,0 +1,23 @@
+/*
+ * Intel Low Power Subsystem clocks.
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Authors: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *          Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CLK_LPSS_H
+#define __CLK_LPSS_H
+
+struct lpss_clk_data {
+	const char *name;
+	struct clk *clk;
+};
+
+extern int lpt_clk_init(void);
+
+#endif /* __CLK_LPSS_H */
-- 
cgit v1.2.3


From 7bae0432a64aa7569dbd0feb2927fd3ff913901f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Sat, 16 Feb 2019 23:21:51 -0800
Subject: usb: core: add option of only authorizing internal devices

On Chrome OS we want to use USBguard to potentially limit access to USB
devices based on policy. We however to do not want to wait for userspace to
come up before initializing fixed USB devices to not regress our boot
times.

This patch adds option to instruct the kernel to only authorize devices
connected to the internal ports. Previously we could either authorize
all or none (or, by default, we'd only authorize wired devices).

The behavior is controlled via usbcore.authorized_default command line
option.

Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  3 +-
 Documentation/usb/authorization.txt             |  4 +-
 drivers/usb/core/hcd.c                          | 51 +++++++++++++++----------
 drivers/usb/core/usb.c                          | 33 ++++++++++++----
 include/linux/usb/hcd.h                         | 10 +++--
 5 files changed, 69 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 858b6c0b9a15..2d28ef850781 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4697,7 +4697,8 @@
 	usbcore.authorized_default=
 			[USB] Default USB device authorization:
 			(default -1 = authorized except for wireless USB,
-			0 = not authorized, 1 = authorized)
+			0 = not authorized, 1 = authorized, 2 = authorized
+			if device connected to internal port)
 
 	usbcore.autosuspend=
 			[USB] The autosuspend time delay (in seconds) used
diff --git a/Documentation/usb/authorization.txt b/Documentation/usb/authorization.txt
index f901ec77439c..9dd1dc7b1009 100644
--- a/Documentation/usb/authorization.txt
+++ b/Documentation/usb/authorization.txt
@@ -34,7 +34,9 @@ $ echo 1 > /sys/bus/usb/devices/usbX/authorized_default
 By default, Wired USB devices are authorized by default to
 connect. Wireless USB hosts deauthorize by default all new connected
 devices (this is so because we need to do an authentication phase
-before authorizing).
+before authorizing). Writing "2" to the authorized_default attribute
+causes kernel to only authorize by default devices connected to internal
+USB ports.
 
 
 Example system lockdown (lame)
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 86f39e44f98a..3b6e3e25f59e 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -373,13 +373,19 @@ static const u8 ss_rh_config_descriptor[] = {
  * -1 is authorized for all devices except wireless (old behaviour)
  * 0 is unauthorized for all devices
  * 1 is authorized for all devices
+ * 2 is authorized for internal devices
  */
-static int authorized_default = -1;
+#define USB_AUTHORIZE_WIRED	-1
+#define USB_AUTHORIZE_NONE	0
+#define USB_AUTHORIZE_ALL	1
+#define USB_AUTHORIZE_INTERNAL	2
+
+static int authorized_default = USB_AUTHORIZE_WIRED;
 module_param(authorized_default, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(authorized_default,
 		"Default USB device authorization: 0 is not authorized, 1 is "
-		"authorized, -1 is authorized except for wireless USB (default, "
-		"old behaviour");
+		"authorized, 2 is authorized for internal devices, -1 is "
+		"authorized except for wireless USB (default, old behaviour");
 /*-------------------------------------------------------------------------*/
 
 /**
@@ -884,7 +890,7 @@ static ssize_t authorized_default_show(struct device *dev,
 	struct usb_hcd *hcd;
 
 	hcd = bus_to_hcd(usb_bus);
-	return snprintf(buf, PAGE_SIZE, "%u\n", !!HCD_DEV_AUTHORIZED(hcd));
+	return snprintf(buf, PAGE_SIZE, "%u\n", hcd->dev_policy);
 }
 
 static ssize_t authorized_default_store(struct device *dev,
@@ -900,11 +906,8 @@ static ssize_t authorized_default_store(struct device *dev,
 	hcd = bus_to_hcd(usb_bus);
 	result = sscanf(buf, "%u\n", &val);
 	if (result == 1) {
-		if (val)
-			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
-		else
-			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
-
+		hcd->dev_policy = val <= USB_DEVICE_AUTHORIZE_INTERNAL ?
+			val : USB_DEVICE_AUTHORIZE_ALL;
 		result = size;
 	} else {
 		result = -EINVAL;
@@ -2748,18 +2751,26 @@ int usb_add_hcd(struct usb_hcd *hcd,
 
 	dev_info(hcd->self.controller, "%s\n", hcd->product_desc);
 
-	/* Keep old behaviour if authorized_default is not in [0, 1]. */
-	if (authorized_default < 0 || authorized_default > 1) {
-		if (hcd->wireless)
-			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
-		else
-			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
-	} else {
-		if (authorized_default)
-			set_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
-		else
-			clear_bit(HCD_FLAG_DEV_AUTHORIZED, &hcd->flags);
+	switch (authorized_default) {
+	case USB_AUTHORIZE_NONE:
+		hcd->dev_policy = USB_DEVICE_AUTHORIZE_NONE;
+		break;
+
+	case USB_AUTHORIZE_ALL:
+		hcd->dev_policy = USB_DEVICE_AUTHORIZE_ALL;
+		break;
+
+	case USB_AUTHORIZE_INTERNAL:
+		hcd->dev_policy = USB_DEVICE_AUTHORIZE_INTERNAL;
+		break;
+
+	case USB_AUTHORIZE_WIRED:
+	default:
+		hcd->dev_policy = hcd->wireless ?
+			USB_DEVICE_AUTHORIZE_NONE : USB_DEVICE_AUTHORIZE_ALL;
+		break;
 	}
+
 	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 
 	/* per default all interfaces are authorized */
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4ebfbd737905..9b5852e313f5 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -46,8 +46,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 
-#include "usb.h"
-
+#include "hub.h"
 
 const char *usbcore_name = "usbcore";
 
@@ -536,6 +535,27 @@ static unsigned usb_bus_is_wusb(struct usb_bus *bus)
 	return hcd->wireless;
 }
 
+static bool usb_dev_authorized(struct usb_device *dev, struct usb_hcd *hcd)
+{
+	struct usb_hub *hub;
+
+	if (!dev->parent)
+		return true; /* Root hub always ok [and always wired] */
+
+	switch (hcd->dev_policy) {
+	case USB_DEVICE_AUTHORIZE_NONE:
+	default:
+		return false;
+
+	case USB_DEVICE_AUTHORIZE_ALL:
+		return true;
+
+	case USB_DEVICE_AUTHORIZE_INTERNAL:
+		hub = usb_hub_to_struct_hub(dev->parent);
+		return hub->ports[dev->portnum - 1]->connect_type ==
+				USB_PORT_CONNECT_TYPE_HARD_WIRED;
+	}
+}
 
 /**
  * usb_alloc_dev - usb device constructor (usbcore-internal)
@@ -663,12 +683,11 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent,
 	dev->connect_time = jiffies;
 	dev->active_duration = -jiffies;
 #endif
-	if (root_hub)	/* Root hub always ok [and always wired] */
-		dev->authorized = 1;
-	else {
-		dev->authorized = !!HCD_DEV_AUTHORIZED(usb_hcd);
+
+	dev->authorized = usb_dev_authorized(dev, usb_hcd);
+	if (!root_hub)
 		dev->wusb = usb_bus_is_wusb(bus) ? 1 : 0;
-	}
+
 	return dev;
 }
 EXPORT_SYMBOL_GPL(usb_alloc_dev);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 7dc3a411bece..695931b03684 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -72,6 +72,12 @@ struct giveback_urb_bh {
 	struct usb_host_endpoint *completing_ep;
 };
 
+enum usb_dev_authorize_policy {
+	USB_DEVICE_AUTHORIZE_NONE	= 0,
+	USB_DEVICE_AUTHORIZE_ALL	= 1,
+	USB_DEVICE_AUTHORIZE_INTERNAL	= 2,
+};
+
 struct usb_hcd {
 
 	/*
@@ -117,7 +123,6 @@ struct usb_hcd {
 #define HCD_FLAG_RH_RUNNING		5	/* root hub is running? */
 #define HCD_FLAG_DEAD			6	/* controller has died? */
 #define HCD_FLAG_INTF_AUTHORIZED	7	/* authorize interfaces? */
-#define HCD_FLAG_DEV_AUTHORIZED		8	/* authorize devices? */
 
 	/* The flags can be tested using these macros; they are likely to
 	 * be slightly faster than test_bit().
@@ -142,8 +147,7 @@ struct usb_hcd {
 	 * or they require explicit user space authorization; this bit is
 	 * settable through /sys/class/usb_host/X/authorized_default
 	 */
-#define HCD_DEV_AUTHORIZED(hcd) \
-	((hcd)->flags & (1U << HCD_FLAG_DEV_AUTHORIZED))
+	enum usb_dev_authorize_policy dev_policy;
 
 	/* Flags that get set only during HCD registration or removal. */
 	unsigned		rh_registered:1;/* is root hub registered? */
-- 
cgit v1.2.3


From 4d9ec73d2b78daf70477aadc50eb4d2186c8b62f Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Fri, 15 Feb 2019 02:14:33 +0200
Subject: cfg80211: Report Association Request frame IEs in association events

This extends the NL80211_CMD_ASSOCIATE event case to report
NL80211_ATTR_REQ_IE similarly to what is already done with the
NL80211_CMD_CONNECT events if the driver provides this information. In
practice, this adds (Re)Association Request frame information element
reporting to mac80211 drivers for the cases where user space SME is
used.

This provides more information for user space to figure out which
capabilities were negotiated for the association. For example, this can
be used to determine whether HT, VHT, or HE is used.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h     |  7 +++++--
 net/mac80211/ieee80211_i.h |  6 ++++++
 net/mac80211/mlme.c        | 14 ++++++++++++--
 net/wireless/mlme.c        |  8 ++++++--
 net/wireless/nl80211.c     | 24 +++++++++++++++---------
 net/wireless/nl80211.h     |  3 ++-
 6 files changed, 46 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f81677f2f051..7a29b709077f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5676,10 +5676,12 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
  * @dev: network device
  * @bss: the BSS that association was requested with, ownership of the pointer
  *	moves to cfg80211 in this call
- * @buf: authentication frame (header + body)
+ * @buf: (Re)Association Response frame (header + body)
  * @len: length of the frame data
  * @uapsd_queues: bitmap of queues configured for uapsd. Same format
  *	as the AC bitmap in the QoS info field
+ * @req_ies: information elements from the (Re)Association Request frame
+ * @req_ies_len: length of req_ies data
  *
  * After being asked to associate via cfg80211_ops::assoc() the driver must
  * call either this function or cfg80211_auth_timeout().
@@ -5689,7 +5691,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
 void cfg80211_rx_assoc_resp(struct net_device *dev,
 			    struct cfg80211_bss *bss,
 			    const u8 *buf, size_t len,
-			    int uapsd_queues);
+			    int uapsd_queues,
+			    const u8 *req_ies, size_t req_ies_len);
 
 /**
  * cfg80211_assoc_timeout - notification of timed out association
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 5795eef98771..afce50da6fd6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -556,6 +556,12 @@ struct ieee80211_if_managed {
 	 * get stuck in a downgraded situation and flush takes forever.
 	 */
 	struct delayed_work tx_tspec_wk;
+
+	/* Information elements from the last transmitted (Re)Association
+	 * Request frame.
+	 */
+	u8 *assoc_req_ies;
+	size_t assoc_req_ies_len;
 };
 
 struct ieee80211_if_ibss {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a49fbb3f3ed7..df5d4b90616d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -644,7 +644,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, qos_info;
+	u8 *pos, qos_info, *ie_start;
 	size_t offset = 0, noffset;
 	int i, count, rates_len, supp_rates_len, shift;
 	u16 capab;
@@ -752,6 +752,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 
 	/* SSID */
 	pos = skb_put(skb, 2 + assoc_data->ssid_len);
+	ie_start = pos;
 	*pos++ = WLAN_EID_SSID;
 	*pos++ = assoc_data->ssid_len;
 	memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
@@ -976,6 +977,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 
+	pos = skb_tail_pointer(skb);
+	kfree(ifmgd->assoc_req_ies);
+	ifmgd->assoc_req_ies = kmemdup(ie_start, pos - ie_start, GFP_ATOMIC);
+	ifmgd->assoc_req_ies_len = pos - ie_start;
+
 	drv_mgd_prepare_tx(local, sdata, 0);
 
 	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
@@ -3544,7 +3550,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 				uapsd_queues |= ieee80211_ac_to_qos_mask[ac];
 	}
 
-	cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues);
+	cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len, uapsd_queues,
+			       ifmgd->assoc_req_ies, ifmgd->assoc_req_ies_len);
 }
 
 static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
@@ -5576,6 +5583,9 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 		ifmgd->teardown_skb = NULL;
 		ifmgd->orig_teardown_skb = NULL;
 	}
+	kfree(ifmgd->assoc_req_ies);
+	ifmgd->assoc_req_ies = NULL;
+	ifmgd->assoc_req_ies_len = 0;
 	spin_unlock_bh(&ifmgd->teardown_lock);
 	del_timer_sync(&ifmgd->timer);
 	sdata_unlock(sdata);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1615e503f8e3..f9462010575f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -21,7 +21,8 @@
 
 
 void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
-			    const u8 *buf, size_t len, int uapsd_queues)
+			    const u8 *buf, size_t len, int uapsd_queues,
+			    const u8 *req_ies, size_t req_ies_len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -33,6 +34,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
 	cr.status = (int)le16_to_cpu(mgmt->u.assoc_resp.status_code);
 	cr.bssid = mgmt->bssid;
 	cr.bss = bss;
+	cr.req_ie = req_ies;
+	cr.req_ie_len = req_ies_len;
 	cr.resp_ie = mgmt->u.assoc_resp.variable;
 	cr.resp_ie_len =
 		len - offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
@@ -52,7 +55,8 @@ void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss,
 		return;
 	}
 
-	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues);
+	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL, uapsd_queues,
+			      req_ies, req_ies_len);
 	/* update current_bss etc., consumes the bss reference */
 	__cfg80211_connect_result(dev, &cr, cr.status == WLAN_STATUS_SUCCESS);
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 80878b431584..d5badbbb28a3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14491,12 +14491,13 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 				    struct net_device *netdev,
 				    const u8 *buf, size_t len,
 				    enum nl80211_commands cmd, gfp_t gfp,
-				    int uapsd_queues)
+				    int uapsd_queues, const u8 *req_ies,
+				    size_t req_ies_len)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(100 + len, gfp);
+	msg = nlmsg_new(100 + len + req_ies_len, gfp);
 	if (!msg)
 		return;
 
@@ -14508,7 +14509,9 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
-	    nla_put(msg, NL80211_ATTR_FRAME, len, buf))
+	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
+	    (req_ies &&
+	     nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies)))
 		goto nla_put_failure;
 
 	if (uapsd_queues >= 0) {
@@ -14539,15 +14542,17 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_AUTHENTICATE, gfp, -1);
+				NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0);
 }
 
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev, const u8 *buf,
-			   size_t len, gfp_t gfp, int uapsd_queues)
+			   size_t len, gfp_t gfp, int uapsd_queues,
+			   const u8 *req_ies, size_t req_ies_len)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_ASSOCIATE, gfp, uapsd_queues);
+				NL80211_CMD_ASSOCIATE, gfp, uapsd_queues,
+				req_ies, req_ies_len);
 }
 
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
@@ -14555,7 +14560,7 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DEAUTHENTICATE, gfp, -1);
+				NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0);
 }
 
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
@@ -14563,7 +14568,7 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DISASSOCIATE, gfp, -1);
+				NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0);
 }
 
 void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -14584,7 +14589,8 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
 		cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
 
 	trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
-	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1);
+	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
+				NULL, 0);
 }
 EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
 
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 531c82dcba6b..a41e94a49a89 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -67,7 +67,8 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
 			   const u8 *buf, size_t len, gfp_t gfp,
-			   int uapsd_queues);
+			   int uapsd_queues,
+			   const u8 *req_ies, size_t req_ies_len);
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev,
 			 const u8 *buf, size_t len, gfp_t gfp);
-- 
cgit v1.2.3


From ee145775c1eb84bb76e71639425ec44c654fb868 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 6 Feb 2019 13:17:09 +0200
Subject: mac80211: support max channel switch time element

2018 REVmd of the spec introduces the max channel switch time
element which is optionally included in beacons/probes when there
is a channel switch / extended channel switch element.
The value represents the maximum delay between the time the AP
transmitted the last beacon in current channel and the expected
time of the first beacon in the new channel, in TU.

Parse the value and pass it to the driver.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 1 +
 include/net/mac80211.h     | 4 ++++
 net/mac80211/ieee80211_i.h | 2 ++
 net/mac80211/mlme.c        | 1 +
 net/mac80211/spectmgmt.c   | 6 ++++++
 net/mac80211/util.c        | 4 ++++
 6 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 6cbaed4d7a6b..d9650ae2b4f7 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2475,6 +2475,7 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_HE_OPERATION = 36,
 	WLAN_EID_EXT_UORA = 37,
 	WLAN_EID_EXT_HE_MU_EDCA = 38,
+	WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52,
 	WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55,
 };
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 97aed7b1ba5d..3fb38d2bdb4f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1519,6 +1519,9 @@ struct ieee80211_conf {
  *	scheduled channel switch, as indicated by the AP.
  * @chandef: the new channel to switch to
  * @count: the number of TBTT's until the channel switch event
+ * @delay: maximum delay between the time the AP transmitted the last beacon in
+  *	current channel and the expected time of the first beacon in the new
+  *	channel, expressed in TU.
  */
 struct ieee80211_channel_switch {
 	u64 timestamp;
@@ -1526,6 +1529,7 @@ struct ieee80211_channel_switch {
 	bool block_tx;
 	struct cfg80211_chan_def chandef;
 	u8 count;
+	u32 delay;
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index afce50da6fd6..e170f986d226 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1453,6 +1453,7 @@ struct ieee80211_csa_ie {
 	u8 ttl;
 	u16 pre_value;
 	u16 reason_code;
+	u32 max_switch_time;
 };
 
 /* Parsed Information Elements */
@@ -1493,6 +1494,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_channel_sw_ie *ch_switch_ie;
 	const struct ieee80211_ext_chansw_ie *ext_chansw_ie;
 	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
+	const u8 *max_channel_switch_time;
 	const u8 *country_elem;
 	const u8 *pwr_constr_elem;
 	const u8 *cisco_dtpc_elem;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index df5d4b90616d..1b4938d100d5 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1352,6 +1352,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	ch_switch.block_tx = csa_ie.mode;
 	ch_switch.chandef = csa_ie.chandef;
 	ch_switch.count = csa_ie.count;
+	ch_switch.delay = csa_ie.max_switch_time;
 
 	if (drv_pre_channel_switch(sdata, &ch_switch)) {
 		sdata_info(sdata,
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 4e4902bdbef8..3c644f14dd59 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -177,6 +177,12 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 		csa_ie->chandef = new_vht_chandef;
 	}
 
+	if (elems->max_channel_switch_time)
+		csa_ie->max_switch_time =
+			(elems->max_channel_switch_time[0] << 0) |
+			(elems->max_channel_switch_time[1] <<  8) |
+			(elems->max_channel_switch_time[2] << 16);
+
 	return 0;
 }
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8349c91250ef..3f5a704d1ab0 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1274,6 +1274,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 				elems->he_operation = (void *)&pos[1];
 			} else if (pos[0] == WLAN_EID_EXT_UORA && elen >= 1) {
 				elems->uora_element = (void *)&pos[1];
+			} else if (pos[0] ==
+				   WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME &&
+				   elen == 4) {
+				elems->max_channel_switch_time = pos + 1;
 			} else if (pos[0] ==
 				   WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION &&
 				   elen == 3) {
-- 
cgit v1.2.3


From b9cc81d827ed13502ca9c8523361be26ca1d7220 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 6 Feb 2019 13:17:10 +0200
Subject: mac80211: abort CSA if beacon does not include CSA IEs

In case we receive a beacon without CSA IE while we are in
the middle of channel switch - abort the operation.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h    |  5 +++++
 net/mac80211/driver-ops.h | 14 ++++++++++++++
 net/mac80211/mlme.c       | 43 +++++++++++++++++++++++++++++++++++++------
 net/mac80211/trace.h      |  6 ++++++
 4 files changed, 62 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3fb38d2bdb4f..aef7174a7c65 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3643,6 +3643,9 @@ enum ieee80211_reconfig_type {
  * @post_channel_switch: This is an optional callback that is called
  *	after a channel switch procedure is completed, allowing the
  *	driver to go back to a normal configuration.
+ * @abort_channel_switch: This is an optional callback that is called
+ *	when channel switch procedure was completed, allowing the
+ *	driver to go back to a normal configuration.
  *
  * @join_ibss: Join an IBSS (on an IBSS interface); this is called after all
  *	information in bss_conf is set up and the beacon can be retrieved. A
@@ -3946,6 +3949,8 @@ struct ieee80211_ops {
 
 	int (*post_channel_switch)(struct ieee80211_hw *hw,
 				   struct ieee80211_vif *vif);
+	void (*abort_channel_switch)(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif);
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index ba3c07b10cd0..e069122595d0 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1052,6 +1052,20 @@ drv_post_channel_switch(struct ieee80211_sub_if_data *sdata)
 	return ret;
 }
 
+static inline void
+drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_abort_channel_switch(local, sdata);
+
+	if (local->ops->abort_channel_switch)
+		local->ops->abort_channel_switch(&local->hw, &sdata->vif);
+}
+
 static inline int drv_join_ibss(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1b4938d100d5..28a275ec6700 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1258,6 +1258,32 @@ static void ieee80211_chswitch_timer(struct timer_list *t)
 	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
 }
 
+static void
+ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!local->ops->abort_channel_switch)
+		return;
+
+	mutex_lock(&local->mtx);
+
+	mutex_lock(&local->chanctx_mtx);
+	ieee80211_vif_unreserve_chanctx(sdata);
+	mutex_unlock(&local->chanctx_mtx);
+
+	if (sdata->csa_block_tx)
+		ieee80211_wake_vif_queues(local, sdata,
+					  IEEE80211_QUEUE_STOP_REASON_CSA);
+
+	sdata->csa_block_tx = false;
+	sdata->vif.csa_active = false;
+
+	mutex_unlock(&local->mtx);
+
+	drv_abort_channel_switch(sdata);
+}
+
 static void
 ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				 u64 timestamp, u32 device_timestamp,
@@ -1282,19 +1308,24 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	if (local->scanning)
 		return;
 
-	/* disregard subsequent announcements if we are already processing */
-	if (sdata->vif.csa_active)
-		return;
-
 	current_band = cbss->channel->band;
 	res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
 					   ifmgd->flags,
 					   ifmgd->associated->bssid, &csa_ie);
-	if (res	< 0)
+	if (res < 0) {
 		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
-	if (res)
 		return;
+	}
+
+	if (res && beacon && sdata->vif.csa_active &&
+	    !ifmgd->csa_waiting_bcn) {
+		ieee80211_sta_abort_chanswitch(sdata);
+		return;
+	} else if (sdata->vif.csa_active || res) {
+		/* disregard subsequent announcements if already processing */
+		return;
+	}
 
 	if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
 				     IEEE80211_CHAN_DISABLED)) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 35ea0dcb55e6..c1e203fe281e 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2452,6 +2452,12 @@ DEFINE_EVENT(local_sdata_evt, drv_post_channel_switch,
 	     TP_ARGS(local, sdata)
 );
 
+DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
+	     TP_PROTO(struct ieee80211_local *local,
+		      struct ieee80211_sub_if_data *sdata),
+	     TP_ARGS(local, sdata)
+);
+
 TRACE_EVENT(drv_get_txpower,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3


From 2ff5e52e7836f1851f25304e0b2ee09cd9e7962e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 6 Feb 2019 13:17:11 +0200
Subject: radiotap: add 0-length PSDU "not captured" type

This type was defined in radiotap but we didn't add it to the
header file, add it now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/ieee80211_radiotap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index 8014153bdd49..6674412f9270 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2017		Intel Deutschland GmbH
- * Copyright (c) 2018		Intel Corporation
+ * Copyright (c) 2018-2019	Intel Corporation
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -343,6 +343,7 @@ struct ieee80211_radiotap_lsig {
 
 enum ieee80211_radiotap_zero_len_psdu_type {
 	IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING		= 0,
+	IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED		= 1,
 	IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR			= 0xff,
 };
 
-- 
cgit v1.2.3


From fafd2bce5a5821b8902b11ab24dffa47dfdbd2d0 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 6 Feb 2019 13:17:15 +0200
Subject: mac80211: notify driver on subsequent CSA beacons

Some drivers may want to track further the CSA beacons, for example
to compensate for buggy APs that change the beacon count or quiet
mode during CSA flow.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h    |  9 +++++++--
 net/mac80211/driver-ops.h | 17 ++++++++++++++++-
 net/mac80211/mlme.c       | 27 ++++++++++++++++-----------
 net/mac80211/trace.h      | 40 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 77 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index aef7174a7c65..ac2ed8ec662b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6,7 +6,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018        Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -3646,7 +3646,9 @@ enum ieee80211_reconfig_type {
  * @abort_channel_switch: This is an optional callback that is called
  *	when channel switch procedure was completed, allowing the
  *	driver to go back to a normal configuration.
- *
+ * @channel_switch_rx_beacon: This is an optional callback that is called
+ *	when channel switch procedure is in progress and additional beacon with
+ *	CSA IE was received, allowing driver to track changes in count.
  * @join_ibss: Join an IBSS (on an IBSS interface); this is called after all
  *	information in bss_conf is set up and the beacon can be retrieved. A
  *	channel context is bound before this is called.
@@ -3951,6 +3953,9 @@ struct ieee80211_ops {
 				   struct ieee80211_vif *vif);
 	void (*abort_channel_switch)(struct ieee80211_hw *hw,
 				     struct ieee80211_vif *vif);
+	void (*channel_switch_rx_beacon)(struct ieee80211_hw *hw,
+					 struct ieee80211_vif *vif,
+					 struct ieee80211_channel_switch *ch_switch);
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index e069122595d0..28d022a3eee3 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
 /*
 * Portions of this file
 * Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 Intel Corporation
+* Copyright (C) 2018 - 2019 Intel Corporation
 */
 
 #ifndef __MAC80211_DRIVER_OPS
@@ -1066,6 +1066,21 @@ drv_abort_channel_switch(struct ieee80211_sub_if_data *sdata)
 		local->ops->abort_channel_switch(&local->hw, &sdata->vif);
 }
 
+static inline void
+drv_channel_switch_rx_beacon(struct ieee80211_sub_if_data *sdata,
+			     struct ieee80211_channel_switch *ch_switch)
+{
+	struct ieee80211_local *local = sdata->local;
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_channel_switch_rx_beacon(local, sdata, ch_switch);
+	if (local->ops->channel_switch_rx_beacon)
+		local->ops->channel_switch_rx_beacon(&local->hw, &sdata->vif,
+						     ch_switch);
+}
+
 static inline int drv_join_ibss(struct ieee80211_local *local,
 				struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 28a275ec6700..411656614e9a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -7,7 +7,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018        Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -1312,15 +1312,27 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
 					   ifmgd->flags,
 					   ifmgd->associated->bssid, &csa_ie);
+
+	if (!res) {
+		ch_switch.timestamp = timestamp;
+		ch_switch.device_timestamp = device_timestamp;
+		ch_switch.block_tx = csa_ie.mode;
+		ch_switch.chandef = csa_ie.chandef;
+		ch_switch.count = csa_ie.count;
+		ch_switch.delay = csa_ie.max_switch_time;
+	}
+
 	if (res < 0) {
 		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
 		return;
 	}
 
-	if (res && beacon && sdata->vif.csa_active &&
-	    !ifmgd->csa_waiting_bcn) {
-		ieee80211_sta_abort_chanswitch(sdata);
+	if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) {
+		if (res)
+			ieee80211_sta_abort_chanswitch(sdata);
+		else
+			drv_channel_switch_rx_beacon(sdata, &ch_switch);
 		return;
 	} else if (sdata->vif.csa_active || res) {
 		/* disregard subsequent announcements if already processing */
@@ -1378,13 +1390,6 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		goto drop_connection;
 	}
 
-	ch_switch.timestamp = timestamp;
-	ch_switch.device_timestamp = device_timestamp;
-	ch_switch.block_tx = csa_ie.mode;
-	ch_switch.chandef = csa_ie.chandef;
-	ch_switch.count = csa_ie.count;
-	ch_switch.delay = csa_ie.max_switch_time;
-
 	if (drv_pre_channel_switch(sdata, &ch_switch)) {
 		sdata_info(sdata,
 			   "preparing for channel switch failed, disconnecting\n");
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index c1e203fe281e..8ba70d26b82e 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
 * Portions of this file
-* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 Intel Corporation
+* Copyright(c) 2016-2017 Intel Deutschland GmbH
+* Copyright (C) 2018 - 2019 Intel Corporation
 */
 
 #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ)
@@ -2458,6 +2458,42 @@ DEFINE_EVENT(local_sdata_evt, drv_abort_channel_switch,
 	     TP_ARGS(local, sdata)
 );
 
+TRACE_EVENT(drv_channel_switch_rx_beacon,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_channel_switch *ch_switch),
+
+	TP_ARGS(local, sdata, ch_switch),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		CHANDEF_ENTRY
+		__field(u64, timestamp)
+		__field(u32, device_timestamp)
+		__field(bool, block_tx)
+		__field(u8, count)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		CHANDEF_ASSIGN(&ch_switch->chandef)
+		__entry->timestamp = ch_switch->timestamp;
+		__entry->device_timestamp = ch_switch->device_timestamp;
+		__entry->block_tx = ch_switch->block_tx;
+		__entry->count = ch_switch->count;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT VIF_PR_FMT
+		" received a channel switch beacon to "
+		CHANDEF_PR_FMT  " count:%d block_tx:%d timestamp:%llu",
+		LOCAL_PR_ARG, VIF_PR_ARG, CHANDEF_PR_ARG, __entry->count,
+		__entry->block_tx, __entry->timestamp
+	)
+);
+
 TRACE_EVENT(drv_get_txpower,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3


From 55c1fdf0d6c57e02c2279e0ba0c6f8ec502b46df Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 6 Feb 2019 13:17:19 +0200
Subject: cfg80211: allow sending vendor events unicast

Sometimes, we may want to transport higher bandwidth data
through vendor events, and in that case sending it multicast
is a bad idea. Allow vendor events to be unicast.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/nl80211.c | 30 ++++++++++++++++++++++++------
 2 files changed, 71 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7a29b709077f..bb307a11ee63 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5854,6 +5854,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
 					   struct wireless_dev *wdev,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
+					   unsigned int portid,
 					   int vendor_event_idx,
 					   int approxlen, gfp_t gfp);
 
@@ -5903,6 +5904,15 @@ cfg80211_vendor_cmd_alloc_reply_skb(struct wiphy *wiphy, int approxlen)
  */
 int cfg80211_vendor_cmd_reply(struct sk_buff *skb);
 
+/**
+ * cfg80211_vendor_cmd_get_sender
+ * @wiphy: the wiphy
+ *
+ * Return the current netlink port ID in a vendor command handler.
+ * Valid to call only there.
+ */
+unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy);
+
 /**
  * cfg80211_vendor_event_alloc - allocate vendor-specific event skb
  * @wiphy: the wiphy
@@ -5930,7 +5940,42 @@ cfg80211_vendor_event_alloc(struct wiphy *wiphy, struct wireless_dev *wdev,
 {
 	return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR,
 					  NL80211_ATTR_VENDOR_DATA,
-					  event_idx, approxlen, gfp);
+					  0, event_idx, approxlen, gfp);
+}
+
+/**
+ * cfg80211_vendor_event_alloc_ucast - alloc unicast vendor-specific event skb
+ * @wiphy: the wiphy
+ * @wdev: the wireless device
+ * @event_idx: index of the vendor event in the wiphy's vendor_events
+ * @portid: port ID of the receiver
+ * @approxlen: an upper bound of the length of the data that will
+ *	be put into the skb
+ * @gfp: allocation flags
+ *
+ * This function allocates and pre-fills an skb for an event to send to
+ * a specific (userland) socket. This socket would previously have been
+ * obtained by cfg80211_vendor_cmd_get_sender(), and the caller MUST take
+ * care to register a netlink notifier to see when the socket closes.
+ *
+ * If wdev != NULL, both the ifindex and identifier of the specified
+ * wireless device are added to the event message before the vendor data
+ * attribute.
+ *
+ * When done filling the skb, call cfg80211_vendor_event() with the
+ * skb to send the event.
+ *
+ * Return: An allocated and pre-filled skb. %NULL if any errors happen.
+ */
+static inline struct sk_buff *
+cfg80211_vendor_event_alloc_ucast(struct wiphy *wiphy,
+				  struct wireless_dev *wdev,
+				  unsigned int portid, int approxlen,
+				  int event_idx, gfp_t gfp)
+{
+	return __cfg80211_alloc_event_skb(wiphy, wdev, NL80211_CMD_VENDOR,
+					  NL80211_ATTR_VENDOR_DATA,
+					  portid, event_idx, approxlen, gfp);
 }
 
 /**
@@ -6030,7 +6075,7 @@ static inline struct sk_buff *
 cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy, int approxlen, gfp_t gfp)
 {
 	return __cfg80211_alloc_event_skb(wiphy, NULL, NL80211_CMD_TESTMODE,
-					  NL80211_ATTR_TESTDATA, -1,
+					  NL80211_ATTR_TESTDATA, 0, -1,
 					  approxlen, gfp);
 }
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d5badbbb28a3..c5df5211d29a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4,7 +4,7 @@
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2019 Intel Corporation
  */
 
 #include <linux/if.h>
@@ -9306,6 +9306,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
 					   struct wireless_dev *wdev,
 					   enum nl80211_commands cmd,
 					   enum nl80211_attrs attr,
+					   unsigned int portid,
 					   int vendor_event_idx,
 					   int approxlen, gfp_t gfp)
 {
@@ -9329,7 +9330,7 @@ struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy,
 		return NULL;
 	}
 
-	return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, 0, 0,
+	return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, portid, 0,
 					   cmd, attr, info, gfp);
 }
 EXPORT_SYMBOL(__cfg80211_alloc_event_skb);
@@ -9338,6 +9339,7 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp)
 {
 	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
 	void *hdr = ((void **)skb->cb)[1];
+	struct nlmsghdr *nlhdr = nlmsg_hdr(skb);
 	struct nlattr *data = ((void **)skb->cb)[2];
 	enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE;
 
@@ -9347,11 +9349,16 @@ void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp)
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
 
-	if (data->nla_type == NL80211_ATTR_VENDOR_DATA)
-		mcgrp = NL80211_MCGRP_VENDOR;
+	if (nlhdr->nlmsg_pid) {
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), skb,
+				nlhdr->nlmsg_pid);
+	} else {
+		if (data->nla_type == NL80211_ATTR_VENDOR_DATA)
+			mcgrp = NL80211_MCGRP_VENDOR;
 
-	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0,
-				mcgrp, gfp);
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					skb, 0, mcgrp, gfp);
+	}
 }
 EXPORT_SYMBOL(__cfg80211_send_event_skb);
 
@@ -12736,6 +12743,17 @@ int cfg80211_vendor_cmd_reply(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply);
 
+unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+	if (WARN_ON(!rdev->cur_cmd_info))
+		return 0;
+
+	return rdev->cur_cmd_info->snd_portid;
+}
+EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_get_sender);
+
 static int nl80211_set_qos_map(struct sk_buff *skb,
 			       struct genl_info *info)
 {
-- 
cgit v1.2.3


From 77ff2c6b49843b01adef1f80abb091753e4c9c65 Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Wed, 6 Feb 2019 13:17:20 +0200
Subject: mac80211: update HE IEs to D3.3

Update element names and new fields according to D3.3 of
the HE spec.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/fw/api/mac.h    | 26 +++++++++-
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 58 ++++++++--------------
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c  | 40 +++++++++++++++
 include/linux/ieee80211.h                          | 22 +++++---
 net/mac80211/debugfs_sta.c                         | 35 +++++++++----
 5 files changed, 125 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
index 7a3f7b7e6358..941c50477003 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac.h
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2017        Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -29,7 +29,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2017        Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -433,6 +433,28 @@ struct iwl_he_backoff_conf {
 	__le16 mu_time;
 } __packed; /* AC_QOS_DOT11AX_API_S */
 
+/**
+ * enum iwl_he_pkt_ext_constellations - PPE constellation indices
+ * @IWL_HE_PKT_EXT_BPSK: BPSK
+ * @IWL_HE_PKT_EXT_QPSK:  QPSK
+ * @IWL_HE_PKT_EXT_16QAM: 16-QAM
+ * @IWL_HE_PKT_EXT_64QAM: 64-QAM
+ * @IWL_HE_PKT_EXT_256QAM: 256-QAM
+ * @IWL_HE_PKT_EXT_1024QAM: 1024-QAM
+ * @IWL_HE_PKT_EXT_RESERVED: reserved value
+ * @IWL_HE_PKT_EXT_NONE: not defined
+ */
+enum iwl_he_pkt_ext_constellations {
+	IWL_HE_PKT_EXT_BPSK = 0,
+	IWL_HE_PKT_EXT_QPSK,
+	IWL_HE_PKT_EXT_16QAM,
+	IWL_HE_PKT_EXT_64QAM,
+	IWL_HE_PKT_EXT_256QAM,
+	IWL_HE_PKT_EXT_1024QAM,
+	IWL_HE_PKT_EXT_RESERVED,
+	IWL_HE_PKT_EXT_NONE,
+};
+
 #define MAX_HE_SUPP_NSS	2
 #define MAX_HE_CHANNEL_BW_INDX	4
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index d9afedc3d1d9..e1178b09c4d5 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -479,7 +479,6 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
 				.mac_cap_info[2] =
 					IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP |
-					IEEE80211_HE_MAC_CAP2_MU_CASCADING |
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
@@ -490,7 +489,9 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 				.mac_cap_info[5] =
 					IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 |
 					IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 |
-					IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU,
+					IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU |
+					IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS |
+					IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G |
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
@@ -498,18 +499,13 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 				.phy_cap_info[1] =
 					IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 					IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
-					IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
-					IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS,
+					IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
 				.phy_cap_info[2] =
-					IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
-					IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
-					IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
-					IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
-					IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO,
+					IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
 				.phy_cap_info[3] =
-					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK |
+					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
-					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK |
+					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1,
 				.phy_cap_info[4] =
 					IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
@@ -517,16 +513,8 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8,
 				.phy_cap_info[5] =
 					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
-					IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK,
+					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
 				.phy_cap_info[6] =
-					IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
-					IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
-					IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
-					IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB |
-					IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB |
-					IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO |
 					IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 				.phy_cap_info[7] =
 					IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
@@ -537,11 +525,12 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
 					IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU |
-					IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ,
+					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996,
 				.phy_cap_info[9] =
 					IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK |
 					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
-					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
+					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
+					IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
 			},
 			/*
 			 * Set default Tx/Rx HE MCS NSS Support field.
@@ -576,28 +565,26 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
 				.mac_cap_info[2] =
 					IEEE80211_HE_MAC_CAP2_BSR |
-					IEEE80211_HE_MAC_CAP2_MU_CASCADING |
 					IEEE80211_HE_MAC_CAP2_ACK_EN,
 				.mac_cap_info[3] =
 					IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
 					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
 				.mac_cap_info[4] =
 					IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				.mac_cap_info[5] =
+					IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU,
 				.phy_cap_info[0] =
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G |
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
 					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G,
 				.phy_cap_info[1] =
-					IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
-					IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS,
+					IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD,
 				.phy_cap_info[2] =
-					IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
-					IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
-					IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ,
+					IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US,
 				.phy_cap_info[3] =
-					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK |
+					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
-					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK |
+					IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM |
 					IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1,
 				.phy_cap_info[4] =
 					IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
@@ -605,12 +592,8 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8,
 				.phy_cap_info[5] =
 					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 |
-					IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
-					IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK,
+					IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
 				.phy_cap_info[6] =
-					IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
-					IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
 					IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 				.phy_cap_info[7] =
 					IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
@@ -620,10 +603,11 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = {
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
 					IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
 					IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU |
-					IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ,
+					IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996,
 				.phy_cap_info[9] =
 					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
-					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
+					IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB |
+					IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED,
 			},
 			/*
 			 * Set default Tx/Rx HE MCS NSS Support field.
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 97dc464379d2..47d65adfa3e0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2076,6 +2076,46 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm,
 		}
 
 		flags |= STA_CTXT_HE_PACKET_EXT;
+	} else if ((sta->he_cap.he_cap_elem.phy_cap_info[9] &
+		    IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) !=
+		  IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED) {
+		int low_th = -1;
+		int high_th = -1;
+
+		/* Take the PPE thresholds from the nominal padding info */
+		switch (sta->he_cap.he_cap_elem.phy_cap_info[9] &
+			IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) {
+		case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US:
+			low_th = IWL_HE_PKT_EXT_NONE;
+			high_th = IWL_HE_PKT_EXT_NONE;
+			break;
+		case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US:
+			low_th = IWL_HE_PKT_EXT_BPSK;
+			high_th = IWL_HE_PKT_EXT_NONE;
+			break;
+		case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US:
+			low_th = IWL_HE_PKT_EXT_NONE;
+			high_th = IWL_HE_PKT_EXT_BPSK;
+			break;
+		}
+
+		/* Set the PPE thresholds accordingly */
+		if (low_th >= 0 && high_th >= 0) {
+			u8 ***pkt_ext_qam =
+				(void *)sta_ctxt_cmd.pkt_ext.pkt_ext_qam_th;
+
+			for (i = 0; i < MAX_HE_SUPP_NSS; i++) {
+				u8 bw;
+
+				for (bw = 0; bw < MAX_HE_CHANNEL_BW_INDX;
+				     bw++) {
+					pkt_ext_qam[i][bw][0] = low_th;
+					pkt_ext_qam[i][bw][1] = high_th;
+				}
+			}
+
+			flags |= STA_CTXT_HE_PACKET_EXT;
+		}
 	}
 	rcu_read_unlock();
 
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index d9650ae2b4f7..353fb722ab98 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1803,6 +1803,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION	0x04
 #define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU			0x08
 #define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX		0x10
+#define IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS			0x20
+#define IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING		0x40
+#define IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX		0x80
 
 /* 802.11ax HE PHY capabilities */
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
@@ -1926,11 +1929,11 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU			0x08
 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI		0x10
 #define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF		0x20
-#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ				0x00
-#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ				0x40
-#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ				0x80
-#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ		0xc0
-#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK				0xc0
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242				0x00
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484				0x40
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996				0x80
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996				0xc0
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK				0xc0
 
 #define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM		0x01
 #define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK		0x02
@@ -1938,6 +1941,11 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU		0x08
 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB	0x10
 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB	0x20
+#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US			0x00
+#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US			0x40
+#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US			0x80
+#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_RESERVED		0xc0
+#define IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK			0xc0
 
 /* 802.11ax HE TX/RX MCS NSS Support  */
 #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
@@ -2016,7 +2024,7 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK		0x00003ff0
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET		4
 #define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x00004000
-#define IEEE80211_HE_OPERATION_CO_LOCATED_BSS			0x00008000
+#define IEEE80211_HE_OPERATION_CO_HOSTED_BSS			0x00008000
 #define IEEE80211_HE_OPERATION_ER_SU_DISABLE			0x00010000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK			0x3f000000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET		24
@@ -2046,7 +2054,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 	he_oper_params = le32_to_cpu(he_oper->he_oper_params);
 	if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO)
 		oper_len += 3;
-	if (he_oper_params & IEEE80211_HE_OPERATION_CO_LOCATED_BSS)
+	if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS)
 		oper_len++;
 
 	/* Add the first byte (extension ID) to the total length */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 3aa618dcc58e..8e921281e0d5 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -4,7 +4,7 @@
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -685,6 +685,9 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	      "SUBCHAN-SELECVITE-TRANSMISSION");
 	PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU");
 	PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX");
+	PFLAG(MAC, 5, HE_DYNAMIC_SM_PS, "HE-DYNAMIC-SM-PS");
+	PFLAG(MAC, 5, PUNCTURED_SOUNDING, "PUNCTURED-SOUNDING");
+	PFLAG(MAC, 5, HT_VHT_TRIG_FRAME_RX, "HT-VHT-TRIG-FRAME-RX");
 
 	cap = hec->he_cap_elem.phy_cap_info;
 	p += scnprintf(p, buf_sz + buf - p,
@@ -819,18 +822,18 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(PHY, 8, MIDAMBLE_RX_TX_2X_AND_1XLTF,
 	      "MIDAMBLE-RX-TX-2X-AND-1XLTF");
 
-	switch (cap[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK) {
-	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ:
-		PRINT("DDCM-MAX-BW-20MHZ");
+	switch (cap[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK) {
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242:
+		PRINT("DCM-MAX-RU-242");
 		break;
-	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ:
-		PRINT("DCM-MAX-BW-40MHZ");
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484:
+		PRINT("DCM-MAX-RU-484");
 		break;
-	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ:
-		PRINT("DCM-MAX-BW-80MHZ");
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996:
+		PRINT("DCM-MAX-RU-996");
 		break;
-	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ:
-		PRINT("DCM-MAX-BW-160-OR-80P80-MHZ");
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996:
+		PRINT("DCM-MAX-RU-2x996");
 		break;
 	}
 
@@ -847,6 +850,18 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
 	      "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB");
 
+	switch (cap[9] & IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_MASK) {
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_0US:
+		PRINT("NOMINAL-PACKET-PADDING-0US");
+		break;
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_8US:
+		PRINT("NOMINAL-PACKET-PADDING-8US");
+		break;
+	case IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US:
+		PRINT("NOMINAL-PACKET-PADDING-16US");
+		break;
+	}
+
 #undef PFLAG_RANGE_DEFAULT
 #undef PFLAG_RANGE
 #undef PFLAG
-- 
cgit v1.2.3


From 6c4128f658571b2dc7e01058ad09a8e947bc0159 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 14 Feb 2019 22:03:27 +0800
Subject: rhashtable: Remove obsolete rhashtable_walk_init function

The rhashtable_walk_init function has been obsolete for more than
two years.  This patch finally converts its last users over to
rhashtable_walk_enter and removes it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rhashtable.h |  8 --------
 lib/rhashtable.c           |  2 +-
 lib/test_rhashtable.c      |  9 ++-------
 net/ipv6/ila/ila_xlat.c    | 15 +++------------
 net/netlink/af_netlink.c   | 10 +---------
 5 files changed, 7 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 20f9c6af7473..ae9c0f71f311 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1113,14 +1113,6 @@ static inline int rhashtable_replace_fast(
 	return err;
 }
 
-/* Obsolete function, do not use in new code. */
-static inline int rhashtable_walk_init(struct rhashtable *ht,
-				       struct rhashtable_iter *iter, gfp_t gfp)
-{
-	rhashtable_walk_enter(ht, iter);
-	return 0;
-}
-
 /**
  * rhltable_walk_enter - Initialise an iterator
  * @hlt:	Table to walk over
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 852ffa5160f1..0a105d4af166 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -682,7 +682,7 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
  * rhashtable_walk_exit - Free an iterator
  * @iter:	Hash table Iterator
  *
- * This function frees resources allocated by rhashtable_walk_init.
+ * This function frees resources allocated by rhashtable_walk_enter.
  */
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 2c0c53a99734..3bd2e91bfc29 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -177,16 +177,11 @@ static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array,
 
 static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
 {
-	unsigned int err, total = 0, chain_len = 0;
+	unsigned int total = 0, chain_len = 0;
 	struct rhashtable_iter hti;
 	struct rhash_head *pos;
 
-	err = rhashtable_walk_init(ht, &hti, GFP_KERNEL);
-	if (err) {
-		pr_warn("Test failed: allocation error");
-		return;
-	}
-
+	rhashtable_walk_enter(ht, &hti);
 	rhashtable_walk_start(&hti);
 
 	while ((pos = rhashtable_walk_next(&hti))) {
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 17c455ff69ff..ae6cd4cef8db 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -385,10 +385,7 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
 	spinlock_t *lock;
 	int ret;
 
-	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
-	if (ret)
-		goto done;
-
+	rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter);
 	rhashtable_walk_start(&iter);
 
 	for (;;) {
@@ -509,23 +506,17 @@ int ila_xlat_nl_dump_start(struct netlink_callback *cb)
 	struct net *net = sock_net(cb->skb->sk);
 	struct ila_net *ilan = net_generic(net, ila_net_id);
 	struct ila_dump_iter *iter;
-	int ret;
 
 	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return -ENOMEM;
 
-	ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
-				   GFP_KERNEL);
-	if (ret) {
-		kfree(iter);
-		return ret;
-	}
+	rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter);
 
 	iter->skip = 0;
 	cb->args[0] = (long)iter;
 
-	return ret;
+	return 0;
 }
 
 int ila_xlat_nl_dump_done(struct netlink_callback *cb)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 8fa35df94c07..f28e937320a3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2549,15 +2549,7 @@ struct nl_seq_iter {
 
 static int netlink_walk_start(struct nl_seq_iter *iter)
 {
-	int err;
-
-	err = rhashtable_walk_init(&nl_table[iter->link].hash, &iter->hti,
-				   GFP_KERNEL);
-	if (err) {
-		iter->link = MAX_LINKS;
-		return err;
-	}
-
+	rhashtable_walk_enter(&nl_table[iter->link].hash, &iter->hti);
 	rhashtable_walk_start(&iter->hti);
 
 	return 0;
-- 
cgit v1.2.3


From e5c8ba0635a81f90f51efd3d9a4c0c404e463d0f Mon Sep 17 00:00:00 2001
From: Christian Hohnstaedt <Christian.Hohnstaedt@wago.com>
Date: Fri, 22 Feb 2019 09:38:54 +0100
Subject: regulator: tps65218: Add support for LS2

Re-use the "tps65218_pmic_*_current_limit()" functions of LS3
and calculate the different required bit-shift by counting the
trailing 0s in "struct regulator_desc.csel_mask"

Signed-off-by: Christian Hohnstaedt <Christian.Hohnstaedt@wago.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/tps65218-regulator.c | 18 +++++++++++++-----
 include/linux/mfd/tps65218.h           |  3 ++-
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index 6209beee1018..df333b7702cb 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -204,7 +204,8 @@ static int tps65218_pmic_set_input_current_lim(struct regulator_dev *dev,
 		return -EINVAL;
 
 	return tps65218_set_bits(tps, dev->desc->csel_reg, dev->desc->csel_mask,
-				 index << 2, TPS65218_PROTECT_L1);
+				 index << __builtin_ctz(dev->desc->csel_mask),
+				 TPS65218_PROTECT_L1);
 }
 
 static int tps65218_pmic_set_current_limit(struct regulator_dev *dev,
@@ -223,7 +224,8 @@ static int tps65218_pmic_set_current_limit(struct regulator_dev *dev,
 		return -EINVAL;
 
 	return tps65218_set_bits(tps, dev->desc->csel_reg, dev->desc->csel_mask,
-				 index << 2, TPS65218_PROTECT_L1);
+				 index << __builtin_ctz(dev->desc->csel_mask),
+				 TPS65218_PROTECT_L1);
 }
 
 static int tps65218_pmic_get_current_limit(struct regulator_dev *dev)
@@ -236,12 +238,13 @@ static int tps65218_pmic_get_current_limit(struct regulator_dev *dev)
 	if (retval < 0)
 		return retval;
 
-	index = (index & dev->desc->csel_mask) >> 2;
+	index = (index & dev->desc->csel_mask) >>
+					 __builtin_ctz(dev->desc->csel_mask);
 
 	return ls3_currents[index];
 }
 
-static struct regulator_ops tps65218_ls3_ops = {
+static struct regulator_ops tps65218_ls23_ops = {
 	.is_enabled		= regulator_is_enabled_regmap,
 	.enable			= tps65218_pmic_enable,
 	.disable		= tps65218_pmic_disable,
@@ -303,8 +306,13 @@ static const struct regulator_desc regulators[] = {
 			   TPS65218_ENABLE2_LDO1_EN, 0, 0, ldo1_dcdc3_ranges,
 			   2, 0, 0, TPS65218_REG_SEQ6,
 			   TPS65218_SEQ6_LDO1_SEQ_MASK),
+	TPS65218_REGULATOR("LS2", "regulator-ls2", TPS65218_LS_2,
+			   REGULATOR_CURRENT, tps65218_ls23_ops, 0, 0, 0,
+			   TPS65218_REG_ENABLE2, TPS65218_ENABLE2_LS2_EN,
+			   TPS65218_REG_CONFIG2, TPS65218_CONFIG2_LS2ILIM_MASK,
+			   NULL, 0, 0, 0, 0, 0),
 	TPS65218_REGULATOR("LS3", "regulator-ls3", TPS65218_LS_3,
-			   REGULATOR_CURRENT, tps65218_ls3_ops, 0, 0, 0,
+			   REGULATOR_CURRENT, tps65218_ls23_ops, 0, 0, 0,
 			   TPS65218_REG_ENABLE2, TPS65218_ENABLE2_LS3_EN,
 			   TPS65218_REG_CONFIG2, TPS65218_CONFIG2_LS3ILIM_MASK,
 			   NULL, 0, 0, 0, 0, 0),
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index c204d9a79436..45cdcd0fee53 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -208,6 +208,7 @@ enum tps65218_regulator_id {
 	/* LDOs */
 	TPS65218_LDO_1,
 	/* LS's */
+	TPS65218_LS_2,
 	TPS65218_LS_3,
 };
 
@@ -218,7 +219,7 @@ enum tps65218_regulator_id {
 /* Number of LDO voltage regulators available */
 #define TPS65218_NUM_LDO		1
 /* Number of total LS current regulators available */
-#define TPS65218_NUM_LS			1
+#define TPS65218_NUM_LS			2
 /* Number of total regulators available */
 #define TPS65218_NUM_REGULATOR		(TPS65218_NUM_DCDC + TPS65218_NUM_LDO \
 					 + TPS65218_NUM_LS)
-- 
cgit v1.2.3


From e09d168f13f0d63df7fe095d52be04c16cbe1cef Mon Sep 17 00:00:00 2001
From: "Enrico Weigelt, metux IT consult" <info@metux.net>
Date: Fri, 22 Feb 2019 10:54:15 +0100
Subject: gpio: AMD G-Series PCH gpio driver

GPIO platform driver for the AMD G-series PCH (eg. on GX-412TC)

This driver doesn't registers itself automatically, as it needs to
be provided with platform specific configuration, provided by some
board driver setup code.

Didn't implement oftree probing yet, as it's rarely found on x86.

Cc: linux-gpio@vger.kernel.org
Cc: linus.walleij@linaro.org
Cc: bgolaszewski@baylibre.com
Cc: dvhart@infradead.org
Cc: platform-driver-x86@vger.kernel.org
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 MAINTAINERS                                     |   7 +
 drivers/gpio/Kconfig                            |   9 ++
 drivers/gpio/Makefile                           |   1 +
 drivers/gpio/gpio-amd-fch.c                     | 185 ++++++++++++++++++++++++
 include/linux/platform_data/gpio/gpio-amd-fch.h |  46 ++++++
 5 files changed, 248 insertions(+)
 create mode 100644 drivers/gpio/gpio-amd-fch.c
 create mode 100644 include/linux/platform_data/gpio/gpio-amd-fch.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9919840d54cd..5e4135c78862 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -766,6 +766,13 @@ S:	Supported
 F:	Documentation/hwmon/fam15h_power
 F:	drivers/hwmon/fam15h_power.c
 
+AMD FCH GPIO DRIVER
+M:	Enrico Weigelt, metux IT consult <info@metux.net>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-amd-fch.c
+F:	include/linux/platform_data/gpio/gpio-amd-fch.h
+
 AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
 L:	linux-geode@lists.infradead.org (moderated for non-subscribers)
 S:	Orphan
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 486d9de2716a..3f50526a771f 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -655,6 +655,15 @@ config GPIO_LOONGSON1
 	help
 	  Say Y or M here to support GPIO on Loongson1 SoCs.
 
+config GPIO_AMD_FCH
+	tristate "GPIO support for AMD Fusion Controller Hub (G-series SOCs)"
+	help
+	  This option enables driver for GPIO on AMDs Fusion Controller Hub,
+	  as found on G-series SOCs (eg. GX-412TC)
+
+	  Note: This driver doesn't registers itself automatically, as it
+	  needs to be provided with platform specific configuration.
+	  (See eg. CONFIG_PCENGINES_APU2.)
 endmenu
 
 menu "Port-mapped I/O GPIO drivers"
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9655927a3dcf..54d55274b93a 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_GPIO_ADP5520)	+= gpio-adp5520.o
 obj-$(CONFIG_GPIO_ADP5588)	+= gpio-adp5588.o
 obj-$(CONFIG_GPIO_ALTERA)  	+= gpio-altera.o
 obj-$(CONFIG_GPIO_ALTERA_A10SR)	+= gpio-altera-a10sr.o
+obj-$(CONFIG_GPIO_AMD_FCH)	+= gpio-amd-fch.o
 obj-$(CONFIG_GPIO_AMD8111)	+= gpio-amd8111.o
 obj-$(CONFIG_GPIO_AMDPT)	+= gpio-amdpt.o
 obj-$(CONFIG_GPIO_ARIZONA)	+= gpio-arizona.o
diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c
new file mode 100644
index 000000000000..3b4fdce325c1
--- /dev/null
+++ b/drivers/gpio/gpio-amd-fch.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * GPIO driver for the AMD G series FCH (eg. GX-412TC)
+ *
+ * Copyright (C) 2018 metux IT consult
+ * Author: Enrico Weigelt, metux IT consult <info@metux.net>
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/gpio/driver.h>
+#include <linux/platform_data/gpio/gpio-amd-fch.h>
+#include <linux/spinlock.h>
+
+#define AMD_FCH_MMIO_BASE		0xFED80000
+#define AMD_FCH_GPIO_BANK0_BASE		0x1500
+#define AMD_FCH_GPIO_SIZE		0x0300
+
+#define AMD_FCH_GPIO_FLAG_DIRECTION	BIT(23)
+#define AMD_FCH_GPIO_FLAG_WRITE		BIT(22)
+#define AMD_FCH_GPIO_FLAG_READ		BIT(16)
+
+static const struct resource amd_fch_gpio_iores =
+	DEFINE_RES_MEM_NAMED(
+		AMD_FCH_MMIO_BASE + AMD_FCH_GPIO_BANK0_BASE,
+		AMD_FCH_GPIO_SIZE,
+		"amd-fch-gpio-iomem");
+
+struct amd_fch_gpio_priv {
+	struct platform_device		*pdev;
+	struct gpio_chip		gc;
+	void __iomem			*base;
+	struct amd_fch_gpio_pdata	*pdata;
+	spinlock_t			lock;
+};
+
+static void *amd_fch_gpio_addr(struct amd_fch_gpio_priv *priv,
+			       unsigned int gpio)
+{
+	return priv->base + priv->pdata->gpio_reg[gpio]*sizeof(u32);
+}
+
+static int amd_fch_gpio_direction_input(struct gpio_chip *gc,
+					unsigned int offset)
+{
+	unsigned long flags;
+	struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
+	void *ptr = amd_fch_gpio_addr(priv, offset);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	writel_relaxed(readl_relaxed(ptr) & ~AMD_FCH_GPIO_FLAG_DIRECTION, ptr);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int amd_fch_gpio_direction_output(struct gpio_chip *gc,
+					 unsigned int gpio, int value)
+{
+	unsigned long flags;
+	struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
+	void *ptr = amd_fch_gpio_addr(priv, gpio);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	writel_relaxed(readl_relaxed(ptr) | AMD_FCH_GPIO_FLAG_DIRECTION, ptr);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
+}
+
+static int amd_fch_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
+{
+	int ret;
+	unsigned long flags;
+	struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
+	void *ptr = amd_fch_gpio_addr(priv, gpio);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_DIRECTION);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return ret;
+}
+
+static void amd_fch_gpio_set(struct gpio_chip *gc,
+			     unsigned int gpio, int value)
+{
+	unsigned long flags;
+	struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
+	void *ptr = amd_fch_gpio_addr(priv, gpio);
+	u32 mask;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	mask = readl_relaxed(ptr);
+	if (value)
+		mask |= AMD_FCH_GPIO_FLAG_WRITE;
+	else
+		mask &= ~AMD_FCH_GPIO_FLAG_WRITE;
+	writel_relaxed(mask, ptr);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int amd_fch_gpio_get(struct gpio_chip *gc,
+			    unsigned int offset)
+{
+	unsigned long flags;
+	int ret;
+	struct amd_fch_gpio_priv *priv = gpiochip_get_data(gc);
+	void *ptr = amd_fch_gpio_addr(priv, offset);
+
+	spin_lock_irqsave(&priv->lock, flags);
+	ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_READ);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return ret;
+}
+
+static int amd_fch_gpio_request(struct gpio_chip *chip,
+				unsigned int gpio_pin)
+{
+	return 0;
+}
+
+static int amd_fch_gpio_probe(struct platform_device *pdev)
+{
+	struct amd_fch_gpio_priv *priv;
+	struct amd_fch_gpio_pdata *pdata;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform_data\n");
+		return -ENOENT;
+	}
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->pdata	= pdata;
+	priv->pdev	= pdev;
+
+	priv->gc.owner			= THIS_MODULE;
+	priv->gc.parent			= &pdev->dev;
+	priv->gc.label			= dev_name(&pdev->dev);
+	priv->gc.ngpio			= priv->pdata->gpio_num;
+	priv->gc.names			= priv->pdata->gpio_names;
+	priv->gc.base			= -1;
+	priv->gc.request		= amd_fch_gpio_request;
+	priv->gc.direction_input	= amd_fch_gpio_direction_input;
+	priv->gc.direction_output	= amd_fch_gpio_direction_output;
+	priv->gc.get_direction		= amd_fch_gpio_get_direction;
+	priv->gc.get			= amd_fch_gpio_get;
+	priv->gc.set			= amd_fch_gpio_set;
+
+	spin_lock_init(&priv->lock);
+
+	priv->base = devm_ioremap_resource(&pdev->dev, &amd_fch_gpio_iores);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	platform_set_drvdata(pdev, priv);
+
+	return devm_gpiochip_add_data(&pdev->dev, &priv->gc, priv);
+}
+
+static struct platform_driver amd_fch_gpio_driver = {
+	.driver = {
+		.name = AMD_FCH_GPIO_DRIVER_NAME,
+	},
+	.probe = amd_fch_gpio_probe,
+};
+
+module_platform_driver(amd_fch_gpio_driver);
+
+MODULE_AUTHOR("Enrico Weigelt, metux IT consult <info@metux.net>");
+MODULE_DESCRIPTION("AMD G-series FCH GPIO driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" AMD_FCH_GPIO_DRIVER_NAME);
diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h
new file mode 100644
index 000000000000..a867637e172d
--- /dev/null
+++ b/include/linux/platform_data/gpio/gpio-amd-fch.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL+ */
+
+/*
+ * AMD FCH gpio driver platform-data
+ *
+ * Copyright (C) 2018 metux IT consult
+ * Author: Enrico Weigelt <info@metux.net>
+ *
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_GPIO_AMD_FCH_H
+#define __LINUX_PLATFORM_DATA_GPIO_AMD_FCH_H
+
+#define AMD_FCH_GPIO_DRIVER_NAME "gpio_amd_fch"
+
+/*
+ * gpio register index definitions
+ */
+#define AMD_FCH_GPIO_REG_GPIO49		0x40
+#define AMD_FCH_GPIO_REG_GPIO50		0x41
+#define AMD_FCH_GPIO_REG_GPIO51		0x42
+#define AMD_FCH_GPIO_REG_GPIO59_DEVSLP0	0x43
+#define AMD_FCH_GPIO_REG_GPIO57		0x44
+#define AMD_FCH_GPIO_REG_GPIO58		0x45
+#define AMD_FCH_GPIO_REG_GPIO59_DEVSLP1	0x46
+#define AMD_FCH_GPIO_REG_GPIO64		0x47
+#define AMD_FCH_GPIO_REG_GPIO68		0x48
+#define AMD_FCH_GPIO_REG_GPIO66_SPKR	0x5B
+#define AMD_FCH_GPIO_REG_GPIO71		0x4D
+#define AMD_FCH_GPIO_REG_GPIO32_GE1	0x59
+#define AMD_FCH_GPIO_REG_GPIO33_GE2	0x5A
+#define AMT_FCH_GPIO_REG_GEVT22		0x09
+
+/*
+ * struct amd_fch_gpio_pdata - GPIO chip platform data
+ * @gpio_num: number of entries
+ * @gpio_reg: array of gpio registers
+ * @gpio_names: array of gpio names
+ */
+struct amd_fch_gpio_pdata {
+	int			gpio_num;
+	int			*gpio_reg;
+	const char * const	*gpio_names;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_GPIO_AMD_FCH_H */
-- 
cgit v1.2.3


From c60f83b813e5b25ccd5de7e8c8925c31b3aebcc1 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 15 Feb 2019 13:56:55 +0200
Subject: perf, pt, coresight: Fix address filters for vmas with non-zero
 offset

Currently, the address range calculation for file-based filters works as
long as the vma that maps the matching part of the object file starts
from offset zero into the file (vm_pgoff==0). Otherwise, the resulting
filter range would be off by vm_pgoff pages. Another related problem is
that in case of a partially matching vma, that is, a vma that matches
part of a filter region, the filter range size wouldn't be adjusted.

Fix the arithmetics around address filter range calculations, taking
into account vma offset, so that the entire calculation is done before
the filter configuration is passed to the PMU drivers instead of having
those drivers do the final bit of arithmetics.

Based on the patch by Adrian Hunter <adrian.hunter.intel.com>.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Tested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Fixes: 375637bc5249 ("perf/core: Introduce address range filtering")
Link: http://lkml.kernel.org/r/20190215115655.63469-3-alexander.shishkin@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/events/intel/pt.c                       |  9 +--
 drivers/hwtracing/coresight/coresight-etm-perf.c |  7 +-
 include/linux/perf_event.h                       |  7 +-
 kernel/events/core.c                             | 81 ++++++++++++++----------
 4 files changed, 62 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c0e86ff21f81..fb3a2f13fc70 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1223,7 +1223,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters)
 static void pt_event_addr_filters_sync(struct perf_event *event)
 {
 	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
-	unsigned long msr_a, msr_b, *offs = event->addr_filters_offs;
+	unsigned long msr_a, msr_b;
+	struct perf_addr_filter_range *fr = event->addr_filter_ranges;
 	struct pt_filters *filters = event->hw.addr_filters;
 	struct perf_addr_filter *filter;
 	int range = 0;
@@ -1232,12 +1233,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event)
 		return;
 
 	list_for_each_entry(filter, &head->list, entry) {
-		if (filter->path.dentry && !offs[range]) {
+		if (filter->path.dentry && !fr[range].start) {
 			msr_a = msr_b = 0;
 		} else {
 			/* apply the offset */
-			msr_a = filter->offset + offs[range];
-			msr_b = filter->size + msr_a - 1;
+			msr_a = fr[range].start;
+			msr_b = msr_a + fr[range].size - 1;
 		}
 
 		filters->filter[range].msr_a  = msr_a;
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 8c88bf0a1e5f..4d5a2b9f9d6a 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -433,15 +433,16 @@ static int etm_addr_filters_validate(struct list_head *filters)
 static void etm_addr_filters_sync(struct perf_event *event)
 {
 	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
-	unsigned long start, stop, *offs = event->addr_filters_offs;
+	unsigned long start, stop;
+	struct perf_addr_filter_range *fr = event->addr_filter_ranges;
 	struct etm_filters *filters = event->hw.addr_filters;
 	struct etm_filter *etm_filter;
 	struct perf_addr_filter *filter;
 	int i = 0;
 
 	list_for_each_entry(filter, &head->list, entry) {
-		start = filter->offset + offs[i];
-		stop = start + filter->size;
+		start = fr[i].start;
+		stop = start + fr[i].size;
 		etm_filter = &filters->etm_filter[i];
 
 		switch (filter->action) {
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d9c3610e0e25..6ebc72f65017 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -490,6 +490,11 @@ struct perf_addr_filters_head {
 	unsigned int		nr_file_filters;
 };
 
+struct perf_addr_filter_range {
+	unsigned long		start;
+	unsigned long		size;
+};
+
 /**
  * enum perf_event_state - the states of an event:
  */
@@ -666,7 +671,7 @@ struct perf_event {
 	/* address range filters */
 	struct perf_addr_filters_head	addr_filters;
 	/* vma address array for file-based filders */
-	unsigned long			*addr_filters_offs;
+	struct perf_addr_filter_range	*addr_filter_ranges;
 	unsigned long			addr_filters_gen;
 
 	void (*destroy)(struct perf_event *);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2d89efc0a3e0..16609f6737da 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2799,7 +2799,7 @@ static int perf_event_stop(struct perf_event *event, int restart)
  *
  * (p1) when userspace mappings change as a result of (1) or (2) or (3) below,
  *      we update the addresses of corresponding vmas in
- *	event::addr_filters_offs array and bump the event::addr_filters_gen;
+ *	event::addr_filter_ranges array and bump the event::addr_filters_gen;
  * (p2) when an event is scheduled in (pmu::add), it calls
  *      perf_event_addr_filters_sync() which calls pmu::addr_filters_sync()
  *      if the generation has changed since the previous call.
@@ -4446,7 +4446,7 @@ static void _free_event(struct perf_event *event)
 
 	perf_event_free_bpf_prog(event);
 	perf_addr_filters_splice(event, NULL);
-	kfree(event->addr_filters_offs);
+	kfree(event->addr_filter_ranges);
 
 	if (event->destroy)
 		event->destroy(event);
@@ -6687,7 +6687,8 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data)
 	raw_spin_lock_irqsave(&ifh->lock, flags);
 	list_for_each_entry(filter, &ifh->list, entry) {
 		if (filter->path.dentry) {
-			event->addr_filters_offs[count] = 0;
+			event->addr_filter_ranges[count].start = 0;
+			event->addr_filter_ranges[count].size = 0;
 			restart++;
 		}
 
@@ -7367,28 +7368,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter,
 	return true;
 }
 
+static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter,
+					struct vm_area_struct *vma,
+					struct perf_addr_filter_range *fr)
+{
+	unsigned long vma_size = vma->vm_end - vma->vm_start;
+	unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
+	struct file *file = vma->vm_file;
+
+	if (!perf_addr_filter_match(filter, file, off, vma_size))
+		return false;
+
+	if (filter->offset < off) {
+		fr->start = vma->vm_start;
+		fr->size = min(vma_size, filter->size - (off - filter->offset));
+	} else {
+		fr->start = vma->vm_start + filter->offset - off;
+		fr->size = min(vma->vm_end - fr->start, filter->size);
+	}
+
+	return true;
+}
+
 static void __perf_addr_filters_adjust(struct perf_event *event, void *data)
 {
 	struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
 	struct vm_area_struct *vma = data;
-	unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags;
-	struct file *file = vma->vm_file;
 	struct perf_addr_filter *filter;
 	unsigned int restart = 0, count = 0;
+	unsigned long flags;
 
 	if (!has_addr_filter(event))
 		return;
 
-	if (!file)
+	if (!vma->vm_file)
 		return;
 
 	raw_spin_lock_irqsave(&ifh->lock, flags);
 	list_for_each_entry(filter, &ifh->list, entry) {
-		if (perf_addr_filter_match(filter, file, off,
-					     vma->vm_end - vma->vm_start)) {
-			event->addr_filters_offs[count] = vma->vm_start;
+		if (perf_addr_filter_vma_adjust(filter, vma,
+						&event->addr_filter_ranges[count]))
 			restart++;
-		}
 
 		count++;
 	}
@@ -8978,26 +8998,19 @@ static void perf_addr_filters_splice(struct perf_event *event,
  * @filter; if so, adjust filter's address range.
  * Called with mm::mmap_sem down for reading.
  */
-static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter,
-					    struct mm_struct *mm)
+static void perf_addr_filter_apply(struct perf_addr_filter *filter,
+				   struct mm_struct *mm,
+				   struct perf_addr_filter_range *fr)
 {
 	struct vm_area_struct *vma;
 
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		struct file *file = vma->vm_file;
-		unsigned long off = vma->vm_pgoff << PAGE_SHIFT;
-		unsigned long vma_size = vma->vm_end - vma->vm_start;
-
-		if (!file)
+		if (!vma->vm_file)
 			continue;
 
-		if (!perf_addr_filter_match(filter, file, off, vma_size))
-			continue;
-
-		return vma->vm_start;
+		if (perf_addr_filter_vma_adjust(filter, vma, fr))
+			return;
 	}
-
-	return 0;
 }
 
 /*
@@ -9031,15 +9044,15 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
 
 	raw_spin_lock_irqsave(&ifh->lock, flags);
 	list_for_each_entry(filter, &ifh->list, entry) {
-		event->addr_filters_offs[count] = 0;
+		event->addr_filter_ranges[count].start = 0;
+		event->addr_filter_ranges[count].size = 0;
 
 		/*
 		 * Adjust base offset if the filter is associated to a binary
 		 * that needs to be mapped:
 		 */
 		if (filter->path.dentry)
-			event->addr_filters_offs[count] =
-				perf_addr_filter_apply(filter, mm);
+			perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
 
 		count++;
 	}
@@ -10305,10 +10318,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		goto err_pmu;
 
 	if (has_addr_filter(event)) {
-		event->addr_filters_offs = kcalloc(pmu->nr_addr_filters,
-						   sizeof(unsigned long),
-						   GFP_KERNEL);
-		if (!event->addr_filters_offs) {
+		event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters,
+						    sizeof(struct perf_addr_filter_range),
+						    GFP_KERNEL);
+		if (!event->addr_filter_ranges) {
 			err = -ENOMEM;
 			goto err_per_task;
 		}
@@ -10321,9 +10334,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 			struct perf_addr_filters_head *ifh = perf_event_addr_filters(event);
 
 			raw_spin_lock_irq(&ifh->lock);
-			memcpy(event->addr_filters_offs,
-			       event->parent->addr_filters_offs,
-			       pmu->nr_addr_filters * sizeof(unsigned long));
+			memcpy(event->addr_filter_ranges,
+			       event->parent->addr_filter_ranges,
+			       pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range));
 			raw_spin_unlock_irq(&ifh->lock);
 		}
 
@@ -10345,7 +10358,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 	return event;
 
 err_addr_filters:
-	kfree(event->addr_filters_offs);
+	kfree(event->addr_filter_ranges);
 
 err_per_task:
 	exclusive_event_destroy(event);
-- 
cgit v1.2.3


From d724444ab97d2cdf1c4dbfc1ff65d0b18a9631e0 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Thu, 21 Feb 2019 13:36:31 +0200
Subject: asm-generic/page.h: fix typo in #error text requiring a real
 asm/page.h

Replace "need to prove a real asm/page.h" with "need to provide a real
asm/page.h"

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/page.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/page.h b/include/asm-generic/page.h
index 27bf3377b0cb..fe801f01625e 100644
--- a/include/asm-generic/page.h
+++ b/include/asm-generic/page.h
@@ -7,7 +7,7 @@
  */
 
 #ifdef CONFIG_MMU
-#error need to prove a real asm/page.h
+#error need to provide a real asm/page.h
 #endif
 
 
-- 
cgit v1.2.3


From d2aa125d629080c4f3e31f23b7f612ef6b8492ac Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 21 Feb 2019 12:39:57 +0000
Subject: net: Don't set transport offset to invalid value

If the socket was created with socket(AF_PACKET, SOCK_RAW, 0),
skb->protocol will be unset, __skb_flow_dissect() will fail, and
skb_probe_transport_header() will fall back to the offset_hint, making
the resulting skb_transport_offset incorrect.

If, however, there is no transport header in the packet,
transport_header shouldn't be set to an arbitrary value.

Fix it by leaving the transport offset unset if it couldn't be found, to
be explicit rather than to fill it with some wrong value. It changes the
behavior, but if some code relied on the old behavior, it would be
broken anyway, as the old one is incorrect.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tap.c                 |  4 ++--
 drivers/net/tun.c                 |  4 ++--
 drivers/net/xen-netback/netback.c | 15 ++++++++++++---
 include/linux/skbuff.h            |  5 +----
 include/linux/virtio_net.h        |  2 +-
 net/packet/af_packet.c            |  6 +++---
 6 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index c0b52e48f0e6..2ea9b4976f4a 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -712,7 +712,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 			goto err_kfree;
 	}
 
-	skb_probe_transport_header(skb, ETH_HLEN);
+	skb_probe_transport_header(skb);
 
 	/* Move network header to the right position for VLAN tagged packets */
 	if ((skb->protocol == htons(ETH_P_8021Q) ||
@@ -1187,7 +1187,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
 	tap = rcu_dereference(q->tap);
 	if (tap) {
 		skb->dev = tap->dev;
-		skb_probe_transport_header(skb, ETH_HLEN);
+		skb_probe_transport_header(skb);
 		dev_queue_xmit(skb);
 	} else {
 		kfree_skb(skb);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index fed298c0cb39..80bff1b4ec17 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1929,7 +1929,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
 	}
 
 	skb_reset_network_header(skb);
-	skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb);
 
 	if (skb_xdp) {
 		struct bpf_prog *xdp_prog;
@@ -2482,7 +2482,7 @@ build:
 
 	skb->protocol = eth_type_trans(skb, tun->dev);
 	skb_reset_network_header(skb);
-	skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb);
 
 	if (skb_xdp) {
 		err = do_xdp_generic(xdp_prog, skb);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 80aae3a32c2a..c801a832851c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1169,15 +1169,24 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
 			continue;
 		}
 
-		skb_probe_transport_header(skb, 0);
+		skb_probe_transport_header(skb);
 
 		/* If the packet is GSO then we will have just set up the
 		 * transport header offset in checksum_setup so it's now
 		 * straightforward to calculate gso_segs.
 		 */
 		if (skb_is_gso(skb)) {
-			int mss = skb_shinfo(skb)->gso_size;
-			int hdrlen = skb_transport_header(skb) -
+			int mss, hdrlen;
+
+			/* GSO implies having the L4 header. */
+			WARN_ON_ONCE(!skb_transport_header_was_set(skb));
+			if (unlikely(!skb_transport_header_was_set(skb))) {
+				kfree_skb(skb);
+				continue;
+			}
+
+			mss = skb_shinfo(skb)->gso_size;
+			hdrlen = skb_transport_header(skb) -
 				skb_mac_header(skb) +
 				tcp_hdrlen(skb);
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2069fb90a559..27beb549ffbe 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2429,8 +2429,7 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
 	skb->mac_header = skb->network_header;
 }
 
-static inline void skb_probe_transport_header(struct sk_buff *skb,
-					      const int offset_hint)
+static inline void skb_probe_transport_header(struct sk_buff *skb)
 {
 	struct flow_keys_basic keys;
 
@@ -2439,8 +2438,6 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 
 	if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
-	else if (offset_hint >= 0)
-		skb_set_transport_header(skb, offset_hint);
 }
 
 static inline void skb_mac_header_rebuild(struct sk_buff *skb)
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 71f2394abbf7..6728bf581e98 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -62,7 +62,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		 * probe and drop if does not match one of the above types.
 		 */
 		if (gso_type) {
-			skb_probe_transport_header(skb, -1);
+			skb_probe_transport_header(skb);
 			if (!skb_transport_header_was_set(skb))
 				return -EINVAL;
 		}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1cd1d83a4be0..6afd6369d19e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1970,7 +1970,7 @@ retry:
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 
-	skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb);
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
@@ -2519,7 +2519,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
-	skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb);
 
 	return tp_len;
 }
@@ -2925,7 +2925,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		virtio_net_hdr_set_proto(skb, &vnet_hdr);
 	}
 
-	skb_probe_transport_header(skb, reserve);
+	skb_probe_transport_header(skb);
 
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
-- 
cgit v1.2.3


From e78b2915517e8fcadb1bc130ad6aeac7099e510c Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 21 Feb 2019 12:39:58 +0000
Subject: net: Introduce parse_protocol header_ops callback

Introduce a new optional header_ops callback called parse_protocol and a
wrapper function dev_parse_header_protocol, similar to dev_parse_header.

The new callback's purpose is to extract the protocol number from the L2
header, the format of which is known to the driver, but not to the upper
layers of the stack.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index aab4d9f6613d..6997f62cb6a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -274,6 +274,7 @@ struct header_ops {
 				const struct net_device *dev,
 				const unsigned char *haddr);
 	bool	(*validate)(const char *ll_header, unsigned int len);
+	__be16	(*parse_protocol)(const struct sk_buff *skb);
 };
 
 /* These flag bits are private to the generic network queueing
@@ -2939,6 +2940,15 @@ static inline int dev_parse_header(const struct sk_buff *skb,
 	return dev->header_ops->parse(skb, haddr);
 }
 
+static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb)
+{
+	const struct net_device *dev = skb->dev;
+
+	if (!dev->header_ops || !dev->header_ops->parse_protocol)
+		return 0;
+	return dev->header_ops->parse_protocol(skb);
+}
+
 /* ll_header must have at least hard_header_len allocated */
 static inline bool dev_validate_header(const struct net_device *dev,
 				       char *ll_header, int len)
-- 
cgit v1.2.3


From ace53b2e2945c83850964070af158be01d564e67 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 21 Feb 2019 12:39:59 +0000
Subject: net/ethernet: Add parse_protocol header_ops support

The previous commit introduced parse_protocol callback which should
extract the protocol number from the L2 header. Make all Ethernet
devices support it.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  1 +
 net/ethernet/eth.c          | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 2c0af7b00715..e2f3b21cd72a 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -44,6 +44,7 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh,
 		     __be16 type);
 void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev,
 			     const unsigned char *haddr);
+__be16 eth_header_parse_protocol(const struct sk_buff *skb);
 int eth_prepare_mac_addr_change(struct net_device *dev, void *p);
 void eth_commit_mac_addr_change(struct net_device *dev, void *p);
 int eth_mac_addr(struct net_device *dev, void *p);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 4c520110b04f..f7a3d7a171c7 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -264,6 +264,18 @@ void eth_header_cache_update(struct hh_cache *hh,
 }
 EXPORT_SYMBOL(eth_header_cache_update);
 
+/**
+ * eth_header_parser_protocol - extract protocol from L2 header
+ * @skb: packet to extract protocol from
+ */
+__be16 eth_header_parse_protocol(const struct sk_buff *skb)
+{
+	const struct ethhdr *eth = eth_hdr(skb);
+
+	return eth->h_proto;
+}
+EXPORT_SYMBOL(eth_header_parse_protocol);
+
 /**
  * eth_prepare_mac_addr_change - prepare for mac change
  * @dev: network device
@@ -346,6 +358,7 @@ const struct header_ops eth_header_ops ____cacheline_aligned = {
 	.parse		= eth_header_parse,
 	.cache		= eth_header_cache,
 	.cache_update	= eth_header_cache_update,
+	.parse_protocol	= eth_header_parse_protocol,
 };
 
 /**
-- 
cgit v1.2.3


From a2a074ef396f8738d9ee08ceefa8811381a4fe4f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 12 Feb 2019 20:39:16 +0200
Subject: RDMA: Handle ucontext allocations by IB/core

Following the PD conversion patch, do the same for ucontext allocations.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c                |  1 +
 drivers/infiniband/core/rdma_core.c             |  9 +----
 drivers/infiniband/core/uverbs_cmd.c            | 24 +++++++-----
 drivers/infiniband/hw/bnxt_re/ib_verbs.c        | 32 +++++-----------
 drivers/infiniband/hw/bnxt_re/ib_verbs.h        |  7 ++--
 drivers/infiniband/hw/bnxt_re/main.c            |  1 +
 drivers/infiniband/hw/cxgb3/iwch_provider.c     | 17 ++++-----
 drivers/infiniband/hw/cxgb4/provider.c          | 26 +++++--------
 drivers/infiniband/hw/hns/hns_roce_main.c       | 26 +++++--------
 drivers/infiniband/hw/i40iw/i40iw_verbs.c       | 50 +++++++------------------
 drivers/infiniband/hw/mlx4/main.c               | 30 ++++++---------
 drivers/infiniband/hw/mlx5/main.c               | 35 +++++++----------
 drivers/infiniband/hw/mthca/mthca_provider.c    | 39 +++++++------------
 drivers/infiniband/hw/nes/nes_verbs.c           | 32 ++++++----------
 drivers/infiniband/hw/ocrdma/ocrdma_main.c      |  1 +
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c     | 38 +++++++------------
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h     |  5 +--
 drivers/infiniband/hw/qedr/main.c               |  1 +
 drivers/infiniband/hw/qedr/verbs.c              | 34 +++++------------
 drivers/infiniband/hw/qedr/verbs.h              |  4 +-
 drivers/infiniband/hw/usnic/usnic_ib_main.c     |  1 +
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c    | 18 +++------
 drivers/infiniband/hw/usnic/usnic_ib_verbs.h    |  5 +--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c  |  1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c | 50 ++++++++-----------------
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h |  5 +--
 drivers/infiniband/sw/rdmavt/vt.c               | 22 ++++-------
 drivers/infiniband/sw/rxe/rxe_pool.c            |  1 +
 drivers/infiniband/sw/rxe/rxe_verbs.c           | 14 +++----
 drivers/infiniband/sw/rxe/rxe_verbs.h           |  2 +-
 include/rdma/ib_verbs.h                         |  7 ++--
 31 files changed, 198 insertions(+), 340 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index bf2a215d94dd..a9f29156e486 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1832,6 +1832,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, unmap_fmr);
 
 	SET_OBJ_SIZE(dev_ops, ib_pd);
+	SET_OBJ_SIZE(dev_ops, ib_ucontext);
 }
 EXPORT_SYMBOL(ib_set_device_ops);
 
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 96f919fe86e7..778375ff664e 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -844,7 +844,6 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 {
 	struct ib_ucontext *ucontext = ufile->ucontext;
 	struct ib_device *ib_dev = ucontext->device;
-	int ret;
 
 	/*
 	 * If we are closing the FD then the user mmap VMAs must have
@@ -862,12 +861,8 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
 
 	rdma_restrack_del(&ucontext->res);
 
-	/*
-	 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
-	 * the error return.
-	 */
-	ret = ib_dev->ops.dealloc_ucontext(ucontext);
-	WARN_ON(ret);
+	ib_dev->ops.dealloc_ucontext(ucontext);
+	kfree(ucontext);
 
 	ufile->ucontext = NULL;
 }
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e44ac718f1cd..3128821ca36e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -224,12 +224,13 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	if (ret)
 		goto err;
 
-	ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata);
-	if (IS_ERR(ucontext)) {
-		ret = PTR_ERR(ucontext);
+	ucontext = rdma_zalloc_drv_obj(ib_dev, ib_ucontext);
+	if (!ucontext) {
+		ret = -ENOMEM;
 		goto err_alloc;
 	}
 
+	ucontext->res.type = RDMA_RESTRACK_CTX;
 	ucontext->device = ib_dev;
 	ucontext->cg_obj = cg_obj;
 	/* ufile is required when some objects are released */
@@ -240,10 +241,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 
 	mutex_init(&ucontext->per_mm_list_lock);
 	INIT_LIST_HEAD(&ucontext->per_mm_list);
-	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
-		ucontext->invalidate_range = NULL;
-
-	resp.num_comp_vectors = file->device->num_comp_vectors;
 
 	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
@@ -256,15 +253,22 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 		goto err_fd;
 	}
 
+	resp.num_comp_vectors = file->device->num_comp_vectors;
+
 	ret = uverbs_response(attrs, &resp, sizeof(resp));
 	if (ret)
 		goto err_file;
 
-	fd_install(resp.async_fd, filp);
+	ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
+	if (ret)
+		goto err_file;
+	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+		ucontext->invalidate_range = NULL;
 
-	ucontext->res.type = RDMA_RESTRACK_CTX;
 	rdma_restrack_uadd(&ucontext->res);
 
+	fd_install(resp.async_fd, filp);
+
 	/*
 	 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
 	 * only after all writes to setup the ucontext have completed
@@ -283,7 +287,7 @@ err_fd:
 	put_unused_fd(resp.async_fd);
 
 err_free:
-	ib_dev->ops.dealloc_ucontext(ucontext);
+	kfree(ucontext);
 
 err_alloc:
 	ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index f29f29aae537..24092911c2ac 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3671,13 +3671,14 @@ free_mr:
 	return ERR_PTR(rc);
 }
 
-struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
-					   struct ib_udata *udata)
+int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
 {
+	struct ib_device *ibdev = ctx->device;
+	struct bnxt_re_ucontext *uctx =
+		container_of(ctx, struct bnxt_re_ucontext, ib_uctx);
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
 	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
 	struct bnxt_re_uctx_resp resp;
-	struct bnxt_re_ucontext *uctx;
 	u32 chip_met_rev_num = 0;
 	int rc;
 
@@ -3687,13 +3688,9 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 	if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
 		dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
 			BNXT_RE_ABI_VERSION);
-		return ERR_PTR(-EPERM);
+		return -EPERM;
 	}
 
-	uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
-	if (!uctx)
-		return ERR_PTR(-ENOMEM);
-
 	uctx->rdev = rdev;
 
 	uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
@@ -3727,23 +3724,21 @@ struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 		goto cfail;
 	}
 
-	return &uctx->ib_uctx;
+	return 0;
 cfail:
 	free_page((unsigned long)uctx->shpg);
 	uctx->shpg = NULL;
 fail:
-	kfree(uctx);
-	return ERR_PTR(rc);
+	return rc;
 }
 
-int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
+void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
 {
 	struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
 						   struct bnxt_re_ucontext,
 						   ib_uctx);
 
 	struct bnxt_re_dev *rdev = uctx->rdev;
-	int rc = 0;
 
 	if (uctx->shpg)
 		free_page((unsigned long)uctx->shpg);
@@ -3752,17 +3747,10 @@ int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
 		/* Free DPI only if this is the first PD allocated by the
 		 * application and mark the context dpi as NULL
 		 */
-		rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
-					    &rdev->qplib_res.dpi_tbl,
-					    &uctx->dpi);
-		if (rc)
-			dev_err(rdev_to_dev(rdev), "Deallocate HW DPI failed!");
-			/* Don't fail, continue*/
+		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+				       &rdev->qplib_res.dpi_tbl, &uctx->dpi);
 		uctx->dpi.dbr = NULL;
 	}
-
-	kfree(uctx);
-	return 0;
 }
 
 /* Helper function to mmap the virtual memory from user app */
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index c7cca803cfa3..e45465ed4eee 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -135,8 +135,8 @@ struct bnxt_re_mw {
 };
 
 struct bnxt_re_ucontext {
+	struct ib_ucontext      ib_uctx;
 	struct bnxt_re_dev	*rdev;
-	struct ib_ucontext	ib_uctx;
 	struct bnxt_qplib_dpi	dpi;
 	void			*shpg;
 	spinlock_t		sh_lock;	/* protect shpg */
@@ -215,9 +215,8 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int mr_access_flags,
 				  struct ib_udata *udata);
-struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
-					   struct ib_udata *udata);
-int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
+void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
 int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 
 unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 0a89ef6e5754..2bd24ac45ee4 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -638,6 +638,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
 	.reg_user_mr = bnxt_re_reg_user_mr,
 	.req_notify_cq = bnxt_re_req_notify_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, bnxt_re_pd, ib_pd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, bnxt_re_ucontext, ib_uctx),
 };
 
 static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index b74fd90a22dc..4accf7b3dcf2 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -62,7 +62,7 @@
 #include <rdma/cxgb3-abi.h>
 #include "common.h"
 
-static int iwch_dealloc_ucontext(struct ib_ucontext *context)
+static void iwch_dealloc_ucontext(struct ib_ucontext *context)
 {
 	struct iwch_dev *rhp = to_iwch_dev(context->device);
 	struct iwch_ucontext *ucontext = to_iwch_ucontext(context);
@@ -72,24 +72,20 @@ static int iwch_dealloc_ucontext(struct ib_ucontext *context)
 	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
 		kfree(mm);
 	cxio_release_ucontext(&rhp->rdev, &ucontext->uctx);
-	kfree(ucontext);
-	return 0;
 }
 
-static struct ib_ucontext *iwch_alloc_ucontext(struct ib_device *ibdev,
-					struct ib_udata *udata)
+static int iwch_alloc_ucontext(struct ib_ucontext *ucontext,
+			       struct ib_udata *udata)
 {
-	struct iwch_ucontext *context;
+	struct ib_device *ibdev = ucontext->device;
+	struct iwch_ucontext *context = to_iwch_ucontext(ucontext);
 	struct iwch_dev *rhp = to_iwch_dev(ibdev);
 
 	pr_debug("%s ibdev %p\n", __func__, ibdev);
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
 	cxio_init_ucontext(&rhp->rdev, &context->uctx);
 	INIT_LIST_HEAD(&context->mmaps);
 	spin_lock_init(&context->mmap_lock);
-	return &context->ibucontext;
+	return 0;
 }
 
 static int iwch_destroy_cq(struct ib_cq *ib_cq)
@@ -1342,6 +1338,7 @@ static const struct ib_device_ops iwch_dev_ops = {
 	.req_notify_cq = iwch_arm_cq,
 	.resize_cq = iwch_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, iwch_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext),
 };
 
 int iwch_register_device(struct iwch_dev *dev)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 81fcffb597ab..507c54572cc9 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -58,7 +58,7 @@ static int fastreg_support = 1;
 module_param(fastreg_support, int, 0644);
 MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
 
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+static void c4iw_dealloc_ucontext(struct ib_ucontext *context)
 {
 	struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
 	struct c4iw_dev *rhp;
@@ -70,26 +70,19 @@ static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
 	list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
 		kfree(mm);
 	c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
-	kfree(ucontext);
-	return 0;
 }
 
-static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
-					       struct ib_udata *udata)
+static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
+			       struct ib_udata *udata)
 {
-	struct c4iw_ucontext *context;
+	struct ib_device *ibdev = ucontext->device;
+	struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
 	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
 	struct c4iw_alloc_ucontext_resp uresp;
 	int ret = 0;
 	struct c4iw_mm_entry *mm = NULL;
 
 	pr_debug("ibdev %p\n", ibdev);
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
 	c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
 	INIT_LIST_HEAD(&context->mmaps);
 	spin_lock_init(&context->mmap_lock);
@@ -101,7 +94,7 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
 		mm = kmalloc(sizeof(*mm), GFP_KERNEL);
 		if (!mm) {
 			ret = -ENOMEM;
-			goto err_free;
+			goto err;
 		}
 
 		uresp.status_page_size = PAGE_SIZE;
@@ -121,13 +114,11 @@ static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
 		mm->len = PAGE_SIZE;
 		insert_mmap(context, mm);
 	}
-	return &context->ibucontext;
+	return 0;
 err_mm:
 	kfree(mm);
-err_free:
-	kfree(context);
 err:
-	return ERR_PTR(ret);
+	return ret;
 }
 
 static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
@@ -555,6 +546,7 @@ static const struct ib_device_ops c4iw_dev_ops = {
 	.reg_user_mr = c4iw_reg_user_mr,
 	.req_notify_cq = c4iw_arm_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext),
 };
 
 void c4iw_register_device(struct work_struct *work)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 29fb4fbba5ba..c929125da84b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -335,23 +335,19 @@ static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask,
 	return 0;
 }
 
-static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
-						   struct ib_udata *udata)
+static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
+				   struct ib_udata *udata)
 {
 	int ret = 0;
-	struct hns_roce_ucontext *context;
+	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
 	struct hns_roce_ib_alloc_ucontext_resp resp = {};
-	struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
 
 	if (!hr_dev->active)
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 
 	resp.qp_tab_size = hr_dev->caps.num_qps;
 
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
 	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
 	if (ret)
 		goto error_fail_uar_alloc;
@@ -365,25 +361,20 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
 	if (ret)
 		goto error_fail_copy_to_udata;
 
-	return &context->ibucontext;
+	return 0;
 
 error_fail_copy_to_udata:
 	hns_roce_uar_free(hr_dev, &context->uar);
 
 error_fail_uar_alloc:
-	kfree(context);
-
-	return ERR_PTR(ret);
+	return ret;
 }
 
-static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
 
 	hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar);
-	kfree(context);
-
-	return 0;
 }
 
 static int hns_roce_mmap(struct ib_ucontext *context,
@@ -478,6 +469,7 @@ static const struct ib_device_ops hns_roce_dev_ops = {
 	.query_port = hns_roce_query_port,
 	.reg_user_mr = hns_roce_reg_user_mr,
 	INIT_RDMA_OBJ_SIZE(ib_pd, hns_roce_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops hns_roce_dev_mr_ops = {
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 76b4d1218696..a8352e3ca23d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -121,78 +121,55 @@ static int i40iw_query_port(struct ib_device *ibdev,
 
 /**
  * i40iw_alloc_ucontext - Allocate the user context data structure
- * @ibdev: device pointer from stack
+ * @uctx: Uverbs context pointer from stack
  * @udata: user data
  *
  * This keeps track of all objects associated with a particular
  * user-mode client.
  */
-static struct ib_ucontext *i40iw_alloc_ucontext(struct ib_device *ibdev,
-						struct ib_udata *udata)
+static int i40iw_alloc_ucontext(struct ib_ucontext *uctx,
+				struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	struct i40iw_device *iwdev = to_iwdev(ibdev);
 	struct i40iw_alloc_ucontext_req req;
-	struct i40iw_alloc_ucontext_resp uresp;
-	struct i40iw_ucontext *ucontext;
+	struct i40iw_alloc_ucontext_resp uresp = {};
+	struct i40iw_ucontext *ucontext = to_ucontext(uctx);
 
 	if (ib_copy_from_udata(&req, udata, sizeof(req)))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
 		i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
-	memset(&uresp, 0, sizeof(uresp));
 	uresp.max_qps = iwdev->max_qp;
 	uresp.max_pds = iwdev->max_pd;
 	uresp.wq_size = iwdev->max_qp_wr * 2;
 	uresp.kernel_ver = req.userspace_ver;
 
-	ucontext = kzalloc(sizeof(*ucontext), GFP_KERNEL);
-	if (!ucontext)
-		return ERR_PTR(-ENOMEM);
-
 	ucontext->iwdev = iwdev;
 	ucontext->abi_ver = req.userspace_ver;
 
-	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
-		kfree(ucontext);
-		return ERR_PTR(-EFAULT);
-	}
+	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
+		return -EFAULT;
 
 	INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
 	spin_lock_init(&ucontext->cq_reg_mem_list_lock);
 	INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
 	spin_lock_init(&ucontext->qp_reg_mem_list_lock);
 
-	return &ucontext->ibucontext;
+	return 0;
 }
 
 /**
  * i40iw_dealloc_ucontext - deallocate the user context data structure
  * @context: user context created during alloc
  */
-static int i40iw_dealloc_ucontext(struct ib_ucontext *context)
+static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
 {
-	struct i40iw_ucontext *ucontext = to_ucontext(context);
-	unsigned long flags;
-
-	spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
-	if (!list_empty(&ucontext->cq_reg_mem_list)) {
-		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
-		return -EBUSY;
-	}
-	spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
-	spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
-	if (!list_empty(&ucontext->qp_reg_mem_list)) {
-		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
-		return -EBUSY;
-	}
-	spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
-
-	kfree(ucontext);
-	return 0;
+	return;
 }
 
 /**
@@ -2740,6 +2717,7 @@ static const struct ib_device_ops i40iw_dev_ops = {
 	.reg_user_mr = i40iw_reg_user_mr,
 	.req_notify_cq = i40iw_req_notify_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext),
 };
 
 /**
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index c0f6aea7ed7c..733f7bbd5901 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1076,17 +1076,18 @@ out:
 	return err;
 }
 
-static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
-						  struct ib_udata *udata)
+static int mlx4_ib_alloc_ucontext(struct ib_ucontext *uctx,
+				  struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
-	struct mlx4_ib_ucontext *context;
+	struct mlx4_ib_ucontext *context = to_mucontext(uctx);
 	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
 	struct mlx4_ib_alloc_ucontext_resp resp;
 	int err;
 
 	if (!dev->ib_active)
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 
 	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
 		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
@@ -1100,15 +1101,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 		resp.cqe_size	      = dev->dev->caps.cqe_size;
 	}
 
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
 	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
-	if (err) {
-		kfree(context);
-		return ERR_PTR(err);
-	}
+	if (err)
+		return err;
 
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
@@ -1123,21 +1118,17 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	if (err) {
 		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
-		kfree(context);
-		return ERR_PTR(-EFAULT);
+		return -EFAULT;
 	}
 
-	return &context->ibucontext;
+	return err;
 }
 
-static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
 
 	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
-	kfree(context);
-
-	return 0;
 }
 
 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
@@ -2570,6 +2561,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
 	.rereg_user_mr = mlx4_ib_rereg_user_mr,
 	.resize_cq = mlx4_ib_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, mlx4_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx4_ib_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 614b2acbc621..994c19d01211 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1745,14 +1745,15 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
 	mlx5_ib_disable_lb(dev, true, false);
 }
 
-static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
-						  struct ib_udata *udata)
+static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
+				  struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct mlx5_ib_alloc_ucontext_req_v2 req = {};
 	struct mlx5_ib_alloc_ucontext_resp resp = {};
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_ib_ucontext *context;
+	struct mlx5_ib_ucontext *context = to_mucontext(uctx);
 	struct mlx5_bfreg_info *bfregi;
 	int ver;
 	int err;
@@ -1762,29 +1763,29 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	bool lib_uar_4k;
 
 	if (!dev->ib_active)
-		return ERR_PTR(-EAGAIN);
+		return -EAGAIN;
 
 	if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
 		ver = 0;
 	else if (udata->inlen >= min_req_v2)
 		ver = 2;
 	else
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
 	if (err)
-		return ERR_PTR(err);
+		return err;
 
 	if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
 	req.total_num_bfregs = ALIGN(req.total_num_bfregs,
 				    MLX5_NON_FP_BFREGS_PER_UAR);
 	if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
 	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
@@ -1817,10 +1818,6 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 		/* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
 	}
 
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
 	lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
 	bfregi = &context->bfregi;
 
@@ -1955,7 +1952,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 				   1, &dev->roce[port].tx_port_affinity));
 	}
 
-	return &context->ibucontext;
+	return 0;
 
 out_mdev:
 	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
@@ -1973,12 +1970,10 @@ out_count:
 	kfree(bfregi->count);
 
 out_ctx:
-	kfree(context);
-
-	return ERR_PTR(err);
+	return err;
 }
 
-static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
@@ -1998,9 +1993,6 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	deallocate_uars(dev, context);
 	kfree(bfregi->sys_pages);
 	kfree(bfregi->count);
-	kfree(context);
-
-	return 0;
 }
 
 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -5984,6 +5976,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
 	.rereg_user_mr = mlx5_ib_rereg_user_mr,
 	.resize_cq = mlx5_ib_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 80c3af217d96..d063d7a37762 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -301,17 +301,16 @@ static int mthca_query_gid(struct ib_device *ibdev, u8 port,
 	return err;
 }
 
-static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
-						struct ib_udata *udata)
+static int mthca_alloc_ucontext(struct ib_ucontext *uctx,
+				struct ib_udata *udata)
 {
-	struct mthca_alloc_ucontext_resp uresp;
-	struct mthca_ucontext           *context;
+	struct ib_device *ibdev = uctx->device;
+	struct mthca_alloc_ucontext_resp uresp = {};
+	struct mthca_ucontext *context = to_mucontext(uctx);
 	int                              err;
 
 	if (!(to_mdev(ibdev)->active))
-		return ERR_PTR(-EAGAIN);
-
-	memset(&uresp, 0, sizeof uresp);
+		return -EAGAIN;
 
 	uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
 	if (mthca_is_memfree(to_mdev(ibdev)))
@@ -319,44 +318,33 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
 	else
 		uresp.uarc_size = 0;
 
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
 	err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
-	if (err) {
-		kfree(context);
-		return ERR_PTR(err);
-	}
+	if (err)
+		return err;
 
 	context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
 	if (IS_ERR(context->db_tab)) {
 		err = PTR_ERR(context->db_tab);
 		mthca_uar_free(to_mdev(ibdev), &context->uar);
-		kfree(context);
-		return ERR_PTR(err);
+		return err;
 	}
 
-	if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
+	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
 		mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
 		mthca_uar_free(to_mdev(ibdev), &context->uar);
-		kfree(context);
-		return ERR_PTR(-EFAULT);
+		return -EFAULT;
 	}
 
 	context->reg_mr_warned = 0;
 
-	return &context->ibucontext;
+	return 0;
 }
 
-static int mthca_dealloc_ucontext(struct ib_ucontext *context)
+static void mthca_dealloc_ucontext(struct ib_ucontext *context)
 {
 	mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
 				  to_mucontext(context)->db_tab);
 	mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
-	kfree(to_mucontext(context));
-
-	return 0;
 }
 
 static int mthca_mmap_uar(struct ib_ucontext *context,
@@ -1213,6 +1201,7 @@ static const struct ib_device_ops mthca_dev_ops = {
 	.reg_user_mr = mthca_reg_user_mr,
 	.resize_cq = mthca_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, mthca_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, mthca_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index b23956aa45b8..828e4af3f951 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -529,27 +529,27 @@ static int nes_query_gid(struct ib_device *ibdev, u8 port,
  * nes_alloc_ucontext - Allocate the user context data structure. This keeps track
  * of all objects associated with a particular user-mode client.
  */
-static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
-		struct ib_udata *udata)
+static int nes_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	struct nes_alloc_ucontext_req req;
 	struct nes_alloc_ucontext_resp uresp = {};
-	struct nes_ucontext *nes_ucontext;
+	struct nes_ucontext *nes_ucontext = to_nesucontext(uctx);
 	struct nes_ib_device *nesibdev = nesvnic->nesibdev;
 
 
 	if (ib_copy_from_udata(&req, udata, sizeof(struct nes_alloc_ucontext_req))) {
 		printk(KERN_ERR PFX "Invalid structure size on allocate user context.\n");
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 	if (req.userspace_ver != NES_ABI_USERSPACE_VER) {
 		printk(KERN_ERR PFX "Invalid userspace driver version detected. Detected version %d, should be %d\n",
 			req.userspace_ver, NES_ABI_USERSPACE_VER);
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 	}
 
 
@@ -559,10 +559,6 @@ static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
 	uresp.virtwq = nesadapter->virtwq;
 	uresp.kernel_ver = NES_ABI_KERNEL_VER;
 
-	nes_ucontext = kzalloc(sizeof *nes_ucontext, GFP_KERNEL);
-	if (!nes_ucontext)
-		return ERR_PTR(-ENOMEM);
-
 	nes_ucontext->nesdev = nesdev;
 	nes_ucontext->mmap_wq_offset = uresp.max_pds;
 	nes_ucontext->mmap_cq_offset = nes_ucontext->mmap_wq_offset +
@@ -570,29 +566,22 @@ static struct ib_ucontext *nes_alloc_ucontext(struct ib_device *ibdev,
 			PAGE_SIZE;
 
 
-	if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
-		kfree(nes_ucontext);
-		return ERR_PTR(-EFAULT);
-	}
+	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
+		return -EFAULT;
 
 	INIT_LIST_HEAD(&nes_ucontext->cq_reg_mem_list);
 	INIT_LIST_HEAD(&nes_ucontext->qp_reg_mem_list);
-	return &nes_ucontext->ibucontext;
+	return 0;
 }
 
-
 /**
  * nes_dealloc_ucontext
  */
-static int nes_dealloc_ucontext(struct ib_ucontext *context)
+static void nes_dealloc_ucontext(struct ib_ucontext *context)
 {
-	struct nes_ucontext *nes_ucontext = to_nesucontext(context);
-
-	kfree(nes_ucontext);
-	return 0;
+	return;
 }
 
-
 /**
  * nes_mmap
  */
@@ -3599,6 +3588,7 @@ static const struct ib_device_ops nes_dev_ops = {
 	.reg_user_mr = nes_reg_user_mr,
 	.req_notify_cq = nes_req_notify_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, nes_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, nes_ucontext, ibucontext),
 };
 
 /**
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 0de83c92691f..b9e10d55a58e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -180,6 +180,7 @@ static const struct ib_device_ops ocrdma_dev_ops = {
 	.req_notify_cq = ocrdma_arm_cq,
 	.resize_cq = ocrdma_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, ocrdma_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, ocrdma_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops ocrdma_dev_srq_ops = {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index ed5da67b693d..b4e1777c2c97 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -440,7 +440,7 @@ err:
 	return status;
 }
 
-static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
+static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 {
 	struct ocrdma_pd *pd = uctx->cntxt_pd;
 	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
@@ -451,8 +451,7 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 	}
 	kfree(uctx->cntxt_pd);
 	uctx->cntxt_pd = NULL;
-	(void)_ocrdma_dealloc_pd(dev, pd);
-	return 0;
+	_ocrdma_dealloc_pd(dev, pd);
 }
 
 static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
@@ -476,33 +475,28 @@ static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
 	mutex_unlock(&uctx->mm_list_lock);
 }
 
-struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata)
+int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	int status;
-	struct ocrdma_ucontext *ctx;
-	struct ocrdma_alloc_ucontext_resp resp;
+	struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
+	struct ocrdma_alloc_ucontext_resp resp = {};
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
 	struct pci_dev *pdev = dev->nic_info.pdev;
 	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
 
 	if (!udata)
-		return ERR_PTR(-EFAULT);
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
+		return -EFAULT;
 	INIT_LIST_HEAD(&ctx->mm_head);
 	mutex_init(&ctx->mm_list_lock);
 
 	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
 					    &ctx->ah_tbl.pa, GFP_KERNEL);
-	if (!ctx->ah_tbl.va) {
-		kfree(ctx);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!ctx->ah_tbl.va)
+		return -ENOMEM;
+
 	ctx->ah_tbl.len = map_len;
 
-	memset(&resp, 0, sizeof(resp));
 	resp.ah_tbl_len = ctx->ah_tbl.len;
 	resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
 
@@ -524,7 +518,7 @@ struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
 	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
 	if (status)
 		goto cpy_err;
-	return &ctx->ibucontext;
+	return 0;
 
 cpy_err:
 	ocrdma_dealloc_ucontext_pd(ctx);
@@ -533,19 +527,17 @@ pd_err:
 map_err:
 	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
 			  ctx->ah_tbl.pa);
-	kfree(ctx);
-	return ERR_PTR(status);
+	return status;
 }
 
-int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
+void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
 {
-	int status;
 	struct ocrdma_mm *mm, *tmp;
 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
 	struct pci_dev *pdev = dev->nic_info.pdev;
 
-	status = ocrdma_dealloc_ucontext_pd(uctx);
+	ocrdma_dealloc_ucontext_pd(uctx);
 
 	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
 	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
@@ -555,8 +547,6 @@ int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
 		list_del(&mm->entry);
 		kfree(mm);
 	}
-	kfree(uctx);
-	return status;
 }
 
 int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 1fd66721c930..4c04ab40798e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -64,9 +64,8 @@ void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
 struct net_device *ocrdma_get_netdev(struct ib_device *device, u8 port_num);
 int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
 
-struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
-					  struct ib_udata *);
-int ocrdma_dealloc_ucontext(struct ib_ucontext *);
+int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx);
 
 int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
 
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 44ce4989dcef..996d9ecd93e0 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -240,6 +240,7 @@ static const struct ib_device_ops qedr_dev_ops = {
 	.req_notify_cq = qedr_arm_cq,
 	.resize_cq = qedr_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext),
 };
 
 static int qedr_register_device(struct qedr_dev *dev)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index d51bc3ede9d1..59ad4202422c 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -316,28 +316,24 @@ static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
 	return found;
 }
 
-struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
-					struct ib_udata *udata)
+int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	int rc;
-	struct qedr_ucontext *ctx;
-	struct qedr_alloc_ucontext_resp uresp;
+	struct qedr_ucontext *ctx = get_qedr_ucontext(uctx);
+	struct qedr_alloc_ucontext_resp uresp = {};
 	struct qedr_dev *dev = get_qedr_dev(ibdev);
 	struct qed_rdma_add_user_out_params oparams;
 
 	if (!udata)
-		return ERR_PTR(-EFAULT);
-
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (!ctx)
-		return ERR_PTR(-ENOMEM);
+		return -EFAULT;
 
 	rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
 	if (rc) {
 		DP_ERR(dev,
 		       "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
 		       rc);
-		goto err;
+		return rc;
 	}
 
 	ctx->dpi = oparams.dpi;
@@ -347,8 +343,6 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
 	INIT_LIST_HEAD(&ctx->mm_head);
 	mutex_init(&ctx->mm_list_lock);
 
-	memset(&uresp, 0, sizeof(uresp));
-
 	uresp.dpm_enabled = dev->user_dpm_enabled;
 	uresp.wids_enabled = 1;
 	uresp.wid_count = oparams.wid_count;
@@ -364,28 +358,23 @@ struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
 
 	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (rc)
-		goto err;
+		return rc;
 
 	ctx->dev = dev;
 
 	rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
 	if (rc)
-		goto err;
+		return rc;
 
 	DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
 		 &ctx->ibucontext);
-	return &ctx->ibucontext;
-
-err:
-	kfree(ctx);
-	return ERR_PTR(rc);
+	return 0;
 }
 
-int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
+void qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
 {
 	struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
 	struct qedr_mm *mm, *tmp;
-	int status = 0;
 
 	DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
 		 uctx);
@@ -398,9 +387,6 @@ int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
 		list_del(&mm->entry);
 		kfree(mm);
 	}
-
-	kfree(uctx);
-	return status;
 }
 
 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 97a6ff3f9afb..f0c05f4771ac 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -43,8 +43,8 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
 
 int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
 
-struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
-int qedr_dealloc_ucontext(struct ib_ucontext *);
+int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void qedr_dealloc_ucontext(struct ib_ucontext *uctx);
 
 int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
 int qedr_alloc_pd(struct ib_pd *pd, struct ib_ucontext *uctx,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index be6468021a9a..d88d9f8a7f9a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -350,6 +350,7 @@ static const struct ib_device_ops usnic_dev_ops = {
 	.query_qp = usnic_ib_query_qp,
 	.reg_user_mr = usnic_ib_reg_mr,
 	INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, usnic_ib_ucontext, ibucontext),
 };
 
 /* Start of PF discovery section */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 7549ae23027e..bd4521b2cc5f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -653,37 +653,31 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 }
 
-struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
-							struct ib_udata *udata)
+int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
-	struct usnic_ib_ucontext *context;
+	struct ib_device *ibdev = uctx->device;
+	struct usnic_ib_ucontext *context = to_ucontext(uctx);
 	struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
 	usnic_dbg("\n");
 
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-
 	INIT_LIST_HEAD(&context->qp_grp_list);
 	mutex_lock(&us_ibdev->usdev_lock);
 	list_add_tail(&context->link, &us_ibdev->ctx_list);
 	mutex_unlock(&us_ibdev->usdev_lock);
 
-	return &context->ibucontext;
+	return 0;
 }
 
-int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
 	struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
 	usnic_dbg("\n");
 
 	mutex_lock(&us_ibdev->usdev_lock);
-	BUG_ON(!list_empty(&context->qp_grp_list));
+	WARN_ON_ONCE(!list_empty(&context->qp_grp_list));
 	list_del(&context->link);
 	mutex_unlock(&us_ibdev->usdev_lock);
-	kfree(context);
-	return 0;
 }
 
 int usnic_ib_mmap(struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 2a87650949f6..c40e89b6246f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -68,9 +68,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
 				u64 virt_addr, int access_flags,
 				struct ib_udata *udata);
 int usnic_ib_dereg_mr(struct ib_mr *ibmr);
-struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
-						struct ib_udata *udata);
-int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
 int usnic_ib_mmap(struct ib_ucontext *context,
 			struct vm_area_struct *vma);
 #endif /* !USNIC_IB_VERBS_H */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 47e653d2495c..6d8b3e0de57a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -196,6 +196,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
 	.reg_user_mr = pvrdma_reg_user_mr,
 	.req_notify_cq = pvrdma_req_notify_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, pvrdma_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, pvrdma_ucontext, ibucontext),
 };
 
 static const struct ib_device_ops pvrdma_dev_srq_ops = {
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index f44220f72e05..42fe821f8d58 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -306,41 +306,32 @@ out:
 
 /**
  * pvrdma_alloc_ucontext - allocate ucontext
- * @ibdev: the IB device
+ * @uctx: the uverbs countext
  * @udata: user data
  *
- * @return: the ib_ucontext pointer on success, otherwise errno.
+ * @return:  zero on success, otherwise errno.
  */
-struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata)
+int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
+	struct ib_device *ibdev = uctx->device;
 	struct pvrdma_dev *vdev = to_vdev(ibdev);
-	struct pvrdma_ucontext *context;
-	union pvrdma_cmd_req req;
-	union pvrdma_cmd_resp rsp;
+	struct pvrdma_ucontext *context = to_vucontext(uctx);
+	union pvrdma_cmd_req req = {};
+	union pvrdma_cmd_resp rsp = {};
 	struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
 	struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
-	struct pvrdma_alloc_ucontext_resp uresp = {0};
+	struct pvrdma_alloc_ucontext_resp uresp = {};
 	int ret;
-	void *ptr;
 
 	if (!vdev->ib_active)
-		return ERR_PTR(-EAGAIN);
-
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
+		return -EAGAIN;
 
 	context->dev = vdev;
 	ret = pvrdma_uar_alloc(vdev, &context->uar);
-	if (ret) {
-		kfree(context);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (ret)
+		return -ENOMEM;
 
 	/* get ctx_handle from host */
-	memset(cmd, 0, sizeof(*cmd));
-
 	if (vdev->dsr_version < PVRDMA_PPN64_VERSION)
 		cmd->pfn = context->uar.pfn;
 	else
@@ -351,7 +342,6 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
 	if (ret < 0) {
 		dev_warn(&vdev->pdev->dev,
 			 "could not create ucontext, error: %d\n", ret);
-		ptr = ERR_PTR(ret);
 		goto err;
 	}
 
@@ -362,33 +352,28 @@ struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
 	ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
 	if (ret) {
 		pvrdma_uar_free(vdev, &context->uar);
-		context->ibucontext.device = ibdev;
 		pvrdma_dealloc_ucontext(&context->ibucontext);
-		return ERR_PTR(-EFAULT);
+		return -EFAULT;
 	}
 
-	return &context->ibucontext;
+	return 0;
 
 err:
 	pvrdma_uar_free(vdev, &context->uar);
-	kfree(context);
-	return ptr;
+	return ret;
 }
 
 /**
  * pvrdma_dealloc_ucontext - deallocate ucontext
  * @ibcontext: the ucontext
- *
- * @return: 0 on success, otherwise errno.
  */
-int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
+void pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
 {
 	struct pvrdma_ucontext *context = to_vucontext(ibcontext);
-	union pvrdma_cmd_req req;
+	union pvrdma_cmd_req req = {};
 	struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
 	int ret;
 
-	memset(cmd, 0, sizeof(*cmd));
 	cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
 	cmd->ctx_handle = context->ctx_handle;
 
@@ -399,9 +384,6 @@ int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
 
 	/* Free the UAR even if the device command failed */
 	pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
-	kfree(context);
-
-	return ret;
 }
 
 /**
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index ed91baad1ffa..607aa131d67c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -396,9 +396,8 @@ int pvrdma_modify_device(struct ib_device *ibdev, int mask,
 int pvrdma_modify_port(struct ib_device *ibdev, u8 port,
 		       int mask, struct ib_port_modify *props);
 int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
-					  struct ib_udata *udata);
-int pvrdma_dealloc_ucontext(struct ib_ucontext *context);
+int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata);
+void pvrdma_dealloc_ucontext(struct ib_ucontext *context);
 int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_ucontext *context,
 		    struct ib_udata *udata);
 void pvrdma_dealloc_pd(struct ib_pd *ibpd);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index ee9c82cb3b6b..42c9d35f832d 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -292,28 +292,21 @@ static inline struct rvt_ucontext *to_iucontext(struct ib_ucontext
 
 /**
  * rvt_alloc_ucontext - Allocate a user context
- * @ibdev: Verbs IB dev
+ * @uctx: Verbs context
  * @udata: User data allocated
  */
-static struct ib_ucontext *rvt_alloc_ucontext(struct ib_device *ibdev,
-					      struct ib_udata *udata)
+static int rvt_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
-	struct rvt_ucontext *context;
-
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return ERR_PTR(-ENOMEM);
-	return &context->ibucontext;
+	return 0;
 }
 
 /**
- *rvt_dealloc_ucontext - Free a user context
- *@context - Free this
+ * rvt_dealloc_ucontext - Free a user context
+ * @context - Free this
  */
-static int rvt_dealloc_ucontext(struct ib_ucontext *context)
+static void rvt_dealloc_ucontext(struct ib_ucontext *context)
 {
-	kfree(to_iucontext(context));
-	return 0;
+	return;
 }
 
 static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
@@ -433,6 +426,7 @@ static const struct ib_device_ops rvt_dev_ops = {
 	.resize_cq = rvt_resize_cq,
 	.unmap_fmr = rvt_unmap_fmr,
 	INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext),
 };
 
 static noinline int check_support(struct rvt_dev_info *rdi, int verb)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index c9b8b8c6bfb5..120fa9005954 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -42,6 +42,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
 	[RXE_TYPE_UC] = {
 		.name		= "rxe-uc",
 		.size		= sizeof(struct rxe_ucontext),
+		.flags          = RXE_POOL_NO_ALLOC,
 	},
 	[RXE_TYPE_PD] = {
 		.name		= "rxe-pd",
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index bd6a379b79d3..6ecf28570ff0 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -142,22 +142,19 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
 	return rxe_link_layer(rxe, port_num);
 }
 
-static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
-					      struct ib_udata *udata)
+static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 {
-	struct rxe_dev *rxe = to_rdev(dev);
-	struct rxe_ucontext *uc;
+	struct rxe_dev *rxe = to_rdev(uctx->device);
+	struct rxe_ucontext *uc = to_ruc(uctx);
 
-	uc = rxe_alloc(&rxe->uc_pool);
-	return uc ? &uc->ibuc : ERR_PTR(-ENOMEM);
+	return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem);
 }
 
-static int rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
+static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc)
 {
 	struct rxe_ucontext *uc = to_ruc(ibuc);
 
 	rxe_drop_ref(uc);
-	return 0;
 }
 
 static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
@@ -1180,6 +1177,7 @@ static const struct ib_device_ops rxe_dev_ops = {
 	.req_notify_cq = rxe_req_notify_cq,
 	.resize_cq = rxe_resize_cq,
 	INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc),
 };
 
 int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index d02eb75ef282..157e51aeb1e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -61,8 +61,8 @@ static inline int psn_compare(u32 psn_a, u32 psn_b)
 }
 
 struct rxe_ucontext {
+	struct ib_ucontext ibuc;
 	struct rxe_pool_entry	pelem;
-	struct ib_ucontext	ibuc;
 };
 
 struct rxe_pd {
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 225cb76d469f..9b9e17bcc201 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2389,9 +2389,9 @@ struct ib_device_ops {
 	int (*del_gid)(const struct ib_gid_attr *attr, void **context);
 	int (*query_pkey)(struct ib_device *device, u8 port_num, u16 index,
 			  u16 *pkey);
-	struct ib_ucontext *(*alloc_ucontext)(struct ib_device *device,
-					      struct ib_udata *udata);
-	int (*dealloc_ucontext)(struct ib_ucontext *context);
+	int (*alloc_ucontext)(struct ib_ucontext *context,
+			      struct ib_udata *udata);
+	void (*dealloc_ucontext)(struct ib_ucontext *context);
 	int (*mmap)(struct ib_ucontext *context, struct vm_area_struct *vma);
 	void (*disassociate_ucontext)(struct ib_ucontext *ibcontext);
 	int (*alloc_pd)(struct ib_pd *pd, struct ib_ucontext *context,
@@ -2551,6 +2551,7 @@ struct ib_device_ops {
 	void (*dealloc_driver)(struct ib_device *dev);
 
 	DECLARE_RDMA_OBJ_SIZE(ib_pd);
+	DECLARE_RDMA_OBJ_SIZE(ib_ucontext);
 };
 
 struct rdma_restrack_root;
-- 
cgit v1.2.3


From 0dcaafc0b8dcf65d786b12f74d96aaba63884d1b Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Sun, 20 Jan 2019 22:33:19 +0200
Subject: net/mlx5: Introduce tunnel entropy control in PCMR register

When using the device packet encapsulation offload, the device
calculates an entropy value, representing the inner packet headers. The
entropy field is placed inside the outer packet headers. For UDP-type
encapsulations, the entropy is placed in the source port field of the
UDP header. For GRE-type encapsulations, the entropy is placed in the 8
LSB of the key field in the GRE header. If the device does not recognize
the encapsulation type, the entropy is not placed in the packet.

Entropy setting can be controlled using PCMR register. if encapsulation
offload is not used force_entropy_cap should be set to 0x0. Entropy
setting is enabled/disabled using entropy_calc, and could be
additionally enabled/disabled for GRE encapsulation by entropy_gre_calc.

As a pre-step to automatically control the tunnel entropy, introduce
the entropy fields in the PCMR register with no functional change.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b7bb774b57b0..3b83288749c6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8473,9 +8473,17 @@ struct mlx5_ifc_pamp_reg_bits {
 struct mlx5_ifc_pcmr_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
-	u8         reserved_at_10[0x2e];
+	u8         reserved_at_10[0x10];
+	u8         entropy_force_cap[0x1];
+	u8         entropy_calc_cap[0x1];
+	u8         entropy_gre_calc_cap[0x1];
+	u8         reserved_at_23[0x1b];
 	u8         fcs_cap[0x1];
-	u8         reserved_at_3f[0x1f];
+	u8         reserved_at_3f[0x1];
+	u8         entropy_force[0x1];
+	u8         entropy_calc[0x1];
+	u8         entropy_gre_calc[0x1];
+	u8         reserved_at_43[0x1b];
 	u8         fcs_chk[0x1];
 	u8         reserved_at_5f[0x1];
 };
-- 
cgit v1.2.3


From 97417f6182f80a80c9b4443f282ef707be74dade Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Mon, 14 Jan 2019 10:07:44 +0200
Subject: net/mlx5e: Fix GRE key by controlling port tunnel entropy calculation

Flow entropy is calculated on the inner packet headers and used for
flow distribution in processing, routing etc. For GRE-type
encapsulations the entropy value is placed in the eight LSB of the key
field in the GRE header as defined in NVGRE RFC 7637. For UDP based
encapsulations the entropy value is placed in the source port of the
UDP header.
The hardware may support entropy calculation specifically for GRE and
for all tunneling protocols. With commit df2ef3bff193 ("net/mlx5e: Add
GRE protocol offloading") GRE is offloaded, but the hardware is
configured by default to calculate flow entropy so packets transmitted
on the wire have a wrong key. To support UDP based tunnels (i.e VXLAN),
GRE (i.e. no flow entropy) and NVGRE (i.e. with flow entropy) the
hardware behaviour must be controlled by the driver.

Ensure port entropy calculation is enabled for offloaded VXLAN tunnels
and disable port entropy calculation in the presence of offloaded GRE
tunnels by monitoring the presence of entropy enabling tunnels (i.e
VXLAN) and entropy disabing tunnels (i.e GRE).

Fixes: df2ef3bff193 ("net/mlx5e: Add GRE protocol offloading")
Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  18 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |   3 +
 .../net/ethernet/mellanox/mlx5/core/lib/port_tun.c | 205 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/port_tun.h |  24 +++
 drivers/net/ethernet/mellanox/mlx5/core/port.c     |   5 +-
 include/linux/mlx5/port.h                          |   2 +
 7 files changed, 254 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 82d636baaa4e..17f1a8b28c0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o
 
 #
 # Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 287d48e5b073..4d033e01f6ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -44,6 +44,7 @@
 #include "en_tc.h"
 #include "en/tc_tun.h"
 #include "fs_core.h"
+#include "lib/port_tun.h"
 
 #define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
         max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
@@ -1044,14 +1045,23 @@ static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv,
 int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
 				 struct mlx5e_encap_entry *e)
 {
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
 	struct mlx5e_neigh_hash_entry *nhe;
 	int err;
 
+	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+	if (err)
+		return err;
 	nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
 	if (!nhe) {
 		err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
-		if (err)
+		if (err) {
+			mlx5_tun_entropy_refcount_dec(tun_entropy,
+						      e->reformat_type);
 			return err;
+		}
 	}
 	list_add(&e->encap_list, &nhe->encap_list);
 	return 0;
@@ -1060,6 +1070,9 @@ int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
 void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 				  struct mlx5e_encap_entry *e)
 {
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
 	struct mlx5e_neigh_hash_entry *nhe;
 
 	list_del(&e->encap_list);
@@ -1067,6 +1080,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 
 	if (list_empty(&nhe->encap_list))
 		mlx5e_rep_neigh_entry_destroy(priv, nhe);
+	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
 }
 
 static int mlx5e_vf_rep_open(struct net_device *dev)
@@ -1564,6 +1578,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
 		if (err)
 			goto destroy_tises;
 
+		mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
+
 		/* init indirect block notifications */
 		INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
 		uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 36eafc877e6b..1aa3e110bb97 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -37,6 +37,7 @@
 #include <linux/rhashtable.h>
 #include "eswitch.h"
 #include "en.h"
+#include "lib/port_tun.h"
 
 #ifdef CONFIG_MLX5_ESWITCH
 struct mlx5e_neigh_update_table {
@@ -71,6 +72,8 @@ struct mlx5_rep_uplink_priv {
 	 */
 	struct list_head	    tc_indr_block_priv_list;
 	struct notifier_block	    netdevice_nb;
+
+	struct mlx5_tun_entropy tun_entropy;
 };
 
 struct mlx5e_rep_priv {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
new file mode 100644
index 000000000000..40f4a19b1ce1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -0,0 +1,205 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/module.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/port.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_core.h"
+#include "lib/port_tun.h"
+
+struct mlx5_port_tun_entropy_flags {
+	bool force_supported, force_enabled;
+	bool calc_supported, calc_enabled;
+	bool gre_calc_supported, gre_calc_enabled;
+};
+
+static void mlx5_query_port_tun_entropy(struct mlx5_core_dev *mdev,
+					struct mlx5_port_tun_entropy_flags *entropy_flags)
+{
+	u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
+	/* Default values for FW which do not support MLX5_REG_PCMR */
+	entropy_flags->force_supported = false;
+	entropy_flags->calc_supported = false;
+	entropy_flags->gre_calc_supported = false;
+	entropy_flags->force_enabled = false;
+	entropy_flags->calc_enabled = true;
+	entropy_flags->gre_calc_enabled = true;
+
+	if (!MLX5_CAP_GEN(mdev, ports_check))
+		return;
+
+	if (mlx5_query_ports_check(mdev, out, sizeof(out)))
+		return;
+
+	entropy_flags->force_supported = !!(MLX5_GET(pcmr_reg, out, entropy_force_cap));
+	entropy_flags->calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_calc_cap));
+	entropy_flags->gre_calc_supported = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc_cap));
+	entropy_flags->force_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_force));
+	entropy_flags->calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_calc));
+	entropy_flags->gre_calc_enabled = !!(MLX5_GET(pcmr_reg, out, entropy_gre_calc));
+}
+
+static int mlx5_set_port_tun_entropy_calc(struct mlx5_core_dev *mdev, u8 enable,
+					  u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+static int mlx5_set_port_gre_tun_entropy_calc(struct mlx5_core_dev *mdev,
+					      u8 enable, u8 force)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
+	int err;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, entropy_force, force);
+	MLX5_SET(pcmr_reg, in, entropy_gre_calc, enable);
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+
+	tun_entropy->mdev = mdev;
+	mutex_init(&tun_entropy->lock);
+	mlx5_query_port_tun_entropy(mdev, &entropy_flags);
+	tun_entropy->num_enabling_entries = 0;
+	tun_entropy->num_disabling_entries = 0;
+	tun_entropy->enabled = entropy_flags.calc_enabled;
+	tun_entropy->enabled =
+		(entropy_flags.calc_supported) ?
+		entropy_flags.calc_enabled : true;
+}
+
+static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
+			    int reformat_type, bool enable)
+{
+	struct mlx5_port_tun_entropy_flags entropy_flags;
+	int err;
+
+	mlx5_query_port_tun_entropy(tun_entropy->mdev, &entropy_flags);
+	/* Tunnel entropy calculation may be controlled either on port basis
+	 * for all tunneling protocols or specifically for GRE protocol.
+	 * Prioritize GRE protocol control (if capable) over global port
+	 * configuration.
+	 */
+	if (entropy_flags.gre_calc_supported &&
+	    reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		/* Other applications may change the global FW entropy
+		 * calculations settings. Check that the current entropy value
+		 * is the negative of the updated value.
+		 */
+		if (entropy_flags.force_enabled &&
+		    enable == entropy_flags.gre_calc_enabled) {
+			mlx5_core_warn(tun_entropy->mdev,
+				       "Unexpected GRE entropy calc setting - expected %d",
+				       !entropy_flags.gre_calc_enabled);
+			return -EOPNOTSUPP;
+		}
+		err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, enable,
+							 entropy_flags.force_supported);
+		if (err)
+			return err;
+		/* if we turn on the entropy we don't need to force it anymore */
+		if (entropy_flags.force_supported && enable) {
+			err = mlx5_set_port_gre_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+			if (err)
+				return err;
+		}
+	} else if (entropy_flags.calc_supported) {
+		/* Other applications may change the global FW entropy
+		 * calculations settings. Check that the current entropy value
+		 * is the negative of the updated value.
+		 */
+		if (entropy_flags.force_enabled &&
+		    enable == entropy_flags.calc_enabled) {
+			mlx5_core_warn(tun_entropy->mdev,
+				       "Unexpected entropy calc setting - expected %d",
+				       !entropy_flags.calc_enabled);
+			return -EOPNOTSUPP;
+		}
+		/* GRE requires disabling entropy calculation. if there are
+		 * enabling entries (i.e VXLAN) we cannot turn it off for them,
+		 * thus fail.
+		 */
+		if (tun_entropy->num_enabling_entries)
+			return -EOPNOTSUPP;
+		err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, enable,
+						     entropy_flags.force_supported);
+		if (err)
+			return err;
+		tun_entropy->enabled = enable;
+		/* if we turn on the entropy we don't need to force it anymore */
+		if (entropy_flags.force_supported && enable) {
+			err = mlx5_set_port_tun_entropy_calc(tun_entropy->mdev, 1, 0);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+/* the function manages the refcount for enabling/disabling tunnel types.
+ * the return value indicates if the inc is successful or not, depending on
+ * entropy capabilities and configuration.
+ */
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type)
+{
+	/* the default is error for unknown (non VXLAN/GRE tunnel types) */
+	int err = -EOPNOTSUPP;
+
+	mutex_lock(&tun_entropy->lock);
+	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+	    tun_entropy->enabled) {
+		/* in case entropy calculation is enabled for all tunneling
+		 * types, it is ok for VXLAN, so approve.
+		 * otherwise keep the error default.
+		 */
+		tun_entropy->num_enabling_entries++;
+		err = 0;
+	} else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE) {
+		/* turn off the entropy only for the first GRE rule.
+		 * for the next rules the entropy was already disabled
+		 * successfully.
+		 */
+		if (tun_entropy->num_disabling_entries == 0)
+			err = mlx5_set_entropy(tun_entropy, reformat_type, 0);
+		else
+			err = 0;
+		if (!err)
+			tun_entropy->num_disabling_entries++;
+	}
+	mutex_unlock(&tun_entropy->lock);
+
+	return err;
+}
+
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type)
+{
+	mutex_lock(&tun_entropy->lock);
+	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN)
+		tun_entropy->num_enabling_entries--;
+	else if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_NVGRE &&
+		 --tun_entropy->num_disabling_entries == 0)
+		mlx5_set_entropy(tun_entropy, reformat_type, 1);
+	mutex_unlock(&tun_entropy->lock);
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
new file mode 100644
index 000000000000..54c42a88705e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __MLX5_PORT_TUN_H__
+#define __MLX5_PORT_TUN_H__
+
+#include <linux/mlx5/driver.h>
+
+struct mlx5_tun_entropy {
+	struct mlx5_core_dev *mdev;
+	u32 num_enabling_entries;
+	u32 num_disabling_entries;
+	u8  enabled;
+	struct mutex lock;	/* lock the entropy fields */
+};
+
+void mlx5_init_port_tun_entropy(struct mlx5_tun_entropy *tun_entropy,
+				struct mlx5_core_dev *mdev);
+int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
+				  int reformat_type);
+void mlx5_tun_entropy_refcount_dec(struct mlx5_tun_entropy *tun_entropy,
+				   int reformat_type);
+
+#endif /* __MLX5_PORT_TUN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 55b30d21a73a..21b7f05b16a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -764,8 +764,7 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_wol);
 
-static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
-				  int outlen)
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen)
 {
 	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
@@ -774,7 +773,7 @@ static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
 				    outlen, MLX5_REG_PCMR, 0, 0);
 }
 
-static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen)
 {
 	u32 out[MLX5_ST_SZ_DW(pcmr_reg)];
 
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index 814fa194663b..64e78394fc9c 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -182,6 +182,8 @@ int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode);
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode);
 
+int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen);
+int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen);
 int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable);
 void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 			 bool *enabled);
-- 
cgit v1.2.3


From 14215108a1fd7e002c0a1f9faf8fbaf41fdda50d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Wed, 20 Feb 2019 21:37:42 -0800
Subject: net_sched: initialize net pointer inside tcf_exts_init()

For tcindex filter, it is too late to initialize the
net pointer in tcf_exts_validate(), as tcf_exts_get_net()
requires a non-NULL net pointer. We can just move its
initialization into tcf_exts_init(), which just requires
an additional parameter.

This makes the code in tcindex_alloc_perfect_hash()
prettier.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h    |  5 +++--
 net/sched/cls_api.c      |  1 -
 net/sched/cls_basic.c    |  2 +-
 net/sched/cls_bpf.c      |  2 +-
 net/sched/cls_cgroup.c   |  2 +-
 net/sched/cls_flow.c     |  2 +-
 net/sched/cls_flower.c   |  2 +-
 net/sched/cls_fw.c       |  5 +++--
 net/sched/cls_matchall.c |  2 +-
 net/sched/cls_route.c    |  2 +-
 net/sched/cls_rsvp.h     |  7 ++++---
 net/sched/cls_tcindex.c  | 19 +++++++++----------
 net/sched/cls_u32.c      |  8 ++++----
 13 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6a530bef9253..58ea48e1221c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -289,12 +289,13 @@ struct tcf_exts {
 	int police;
 };
 
-static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
+static inline int tcf_exts_init(struct tcf_exts *exts, struct net *net,
+				int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
 	exts->nr_actions = 0;
-	exts->net = NULL;
+	exts->net = net;
 	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
 				GFP_KERNEL);
 	if (!exts->actions)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 28592e9f803f..6593c245f714 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3051,7 +3051,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
 				return err;
 			exts->nr_actions = err;
 		}
-		exts->net = net;
 	}
 #else
 	if ((exts->action && tb[exts->action]) ||
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 2383f449d2bc..687b0af67878 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -199,7 +199,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
 	if (!fnew)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 062350c6621c..b4ac58039cb1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -477,7 +477,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
 	if (!prog)
 		return -ENOBUFS;
 
-	ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_init(&prog->exts, net, TCA_BPF_ACT, TCA_BPF_POLICE);
 	if (ret < 0)
 		goto errout;
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 02b05066b635..4c1567854f95 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
 	if (!new)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
 	if (err < 0)
 		goto errout;
 	new->handle = handle;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 204e2edae8d5..eece1ee26930 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -441,7 +441,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		goto err1;
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
 	if (err < 0)
 		goto err2;
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 640f83e7f93f..27300a3e76c7 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1344,7 +1344,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
 		goto errout_tb;
 	}
 
-	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 4e34966f2ae2..456ee6e62dfa 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -285,7 +285,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 #endif /* CONFIG_NET_CLS_IND */
 		fnew->tp = f->tp;
 
-		err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
+				    TCA_FW_POLICE);
 		if (err < 0) {
 			kfree(fnew);
 			return err;
@@ -334,7 +335,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
 	if (err < 0)
 		goto errout;
 	f->id = handle;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 1f9d481b0fbb..459921bd3d87 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -191,7 +191,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 	if (!new)
 		return -ENOBUFS;
 
-	err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+	err = tcf_exts_init(&new->exts, net, TCA_MATCHALL_ACT, 0);
 	if (err)
 		goto err_exts_init;
 
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 444d15a75d98..f006af23b64a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -497,7 +497,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 	if (!f)
 		goto errout;
 
-	err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	if (err < 0)
 		goto errout;
 
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 4d3836178fa5..0719a21d9c41 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -501,7 +501,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (err < 0)
 		return err;
 
-	err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		return err;
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
@@ -523,7 +523,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 			goto errout2;
 		}
 
-		err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
+				    TCA_RSVP_POLICE);
 		if (err < 0) {
 			kfree(n);
 			goto errout2;
@@ -551,7 +552,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
 	if (f == NULL)
 		goto errout2;
 
-	err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		goto errout;
 	h2 = 16;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index fbf3519a12d8..24e0a62a65cc 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -246,10 +246,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
 	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
 };
 
-static int tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+				      struct net *net)
 {
 	memset(r, 0, sizeof(*r));
-	return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
+			     TCA_TCINDEX_POLICE);
 }
 
 static void tcindex_partial_destroy_work(struct work_struct *work)
@@ -281,13 +283,10 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
 		return -ENOMEM;
 
 	for (i = 0; i < cp->hash; i++) {
-		err = tcf_exts_init(&cp->perfect[i].exts,
+		err = tcf_exts_init(&cp->perfect[i].exts, net,
 				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 		if (err < 0)
 			goto errout;
-#ifdef CONFIG_NET_CLS_ACT
-		cp->perfect[i].exts.net = net;
-#endif
 	}
 
 	return 0;
@@ -310,7 +309,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	int err, balloc = 0;
 	struct tcf_exts e;
 
-	err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 	if (err < 0)
 		return err;
 	err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
@@ -344,7 +343,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 	cp->h = p->h;
 
-	err = tcindex_filter_result_init(&new_filter_result);
+	err = tcindex_filter_result_init(&new_filter_result, net);
 	if (err < 0)
 		goto errout1;
 	if (old_r)
@@ -431,7 +430,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 			goto errout_alloc;
 		f->key = handle;
 		f->next = NULL;
-		err = tcindex_filter_result_init(&f->result);
+		err = tcindex_filter_result_init(&f->result, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
@@ -444,7 +443,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	}
 
 	if (old_r && old_r != r) {
-		err = tcindex_filter_result_init(old_r);
+		err = tcindex_filter_result_init(old_r, net);
 		if (err < 0) {
 			kfree(f);
 			goto errout_alloc;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 27d29c04dcc9..48e76a3acf8a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -804,7 +804,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
 	rcu_assign_pointer(*ins, n);
 }
 
-static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 					 struct tc_u_knode *n)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -849,7 +849,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
 #endif
 	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 
-	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+	if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
 		kfree(new);
 		return NULL;
 	}
@@ -911,7 +911,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 			return -EINVAL;
 		}
 
-		new = u32_init_knode(tp, n);
+		new = u32_init_knode(net, tp, n);
 		if (!new)
 			return -ENOMEM;
 
@@ -1061,7 +1061,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 	n->flags = flags;
 
-	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
 	if (err < 0)
 		goto errout;
 
-- 
cgit v1.2.3


From b4822dc7564f007e7a9b5188b791b7a923e34104 Mon Sep 17 00:00:00 2001
From: Joseph Lo <josephl@nvidia.com>
Date: Thu, 21 Feb 2019 15:21:44 +0800
Subject: clocksource/drivers/tegra: Add Tegra210 timer support

Add support for the Tegra210 timer that runs at oscillator clock
(TMR10-TMR13). We need these timers to work as clock event device and to
replace the ARMv8 architected timer due to it can't survive across the
power cycle of the CPU core or CPUPORESET signal. So it can't be a wake-up
source when CPU suspends in power down state.

Also convert the original driver to use timer-of API.

Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Joseph Lo <josephl@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig         |   3 +-
 drivers/clocksource/timer-tegra20.c | 370 +++++++++++++++++++++++++-----------
 include/linux/cpuhotplug.h          |   1 +
 3 files changed, 262 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 8dfd3bc448d0..5d93e580e5dc 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -131,7 +131,8 @@ config SUN5I_HSTIMER
 config TEGRA_TIMER
 	bool "Tegra timer driver" if COMPILE_TEST
 	select CLKSRC_MMIO
-	depends on ARM
+	select TIMER_OF
+	depends on ARM || ARM64
 	help
 	  Enables support for the Tegra driver.
 
diff --git a/drivers/clocksource/timer-tegra20.c b/drivers/clocksource/timer-tegra20.c
index 4293943f4e2b..fdb3d795a409 100644
--- a/drivers/clocksource/timer-tegra20.c
+++ b/drivers/clocksource/timer-tegra20.c
@@ -15,21 +15,24 @@
  *
  */
 
-#include <linux/init.h>
+#include <linux/clk.h>
+#include <linux/clockchips.h>
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/time.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/clockchips.h>
-#include <linux/clocksource.h>
-#include <linux/clk.h>
-#include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/percpu.h>
 #include <linux/sched_clock.h>
-#include <linux/delay.h>
+#include <linux/time.h>
+
+#include "timer-of.h"
 
+#ifdef CONFIG_ARM
 #include <asm/mach/time.h>
+#endif
 
 #define RTC_SECONDS            0x08
 #define RTC_SHADOW_SECONDS     0x0c
@@ -39,74 +42,161 @@
 #define TIMERUS_USEC_CFG 0x14
 #define TIMERUS_CNTR_FREEZE 0x4c
 
-#define TIMER1_BASE 0x0
-#define TIMER2_BASE 0x8
-#define TIMER3_BASE 0x50
-#define TIMER4_BASE 0x58
-
-#define TIMER_PTV 0x0
-#define TIMER_PCR 0x4
-
+#define TIMER_PTV		0x0
+#define TIMER_PTV_EN		BIT(31)
+#define TIMER_PTV_PER		BIT(30)
+#define TIMER_PCR		0x4
+#define TIMER_PCR_INTR_CLR	BIT(30)
+
+#ifdef CONFIG_ARM
+#define TIMER_CPU0		0x50 /* TIMER3 */
+#else
+#define TIMER_CPU0		0x90 /* TIMER10 */
+#define TIMER10_IRQ_IDX		10
+#define IRQ_IDX_FOR_CPU(cpu)	(TIMER10_IRQ_IDX + cpu)
+#endif
+#define TIMER_BASE_FOR_CPU(cpu) (TIMER_CPU0 + (cpu) * 8)
+
+static u32 usec_config;
 static void __iomem *timer_reg_base;
+#ifdef CONFIG_ARM
 static void __iomem *rtc_base;
-
 static struct timespec64 persistent_ts;
 static u64 persistent_ms, last_persistent_ms;
-
 static struct delay_timer tegra_delay_timer;
-
-#define timer_writel(value, reg) \
-	writel_relaxed(value, timer_reg_base + (reg))
-#define timer_readl(reg) \
-	readl_relaxed(timer_reg_base + (reg))
+#endif
 
 static int tegra_timer_set_next_event(unsigned long cycles,
 					 struct clock_event_device *evt)
 {
-	u32 reg;
+	void __iomem *reg_base = timer_of_base(to_timer_of(evt));
 
-	reg = 0x80000000 | ((cycles > 1) ? (cycles-1) : 0);
-	timer_writel(reg, TIMER3_BASE + TIMER_PTV);
+	writel(TIMER_PTV_EN |
+	       ((cycles > 1) ? (cycles - 1) : 0), /* n+1 scheme */
+	       reg_base + TIMER_PTV);
 
 	return 0;
 }
 
-static inline void timer_shutdown(struct clock_event_device *evt)
+static int tegra_timer_shutdown(struct clock_event_device *evt)
 {
-	timer_writel(0, TIMER3_BASE + TIMER_PTV);
+	void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+	writel(0, reg_base + TIMER_PTV);
+
+	return 0;
 }
 
-static int tegra_timer_shutdown(struct clock_event_device *evt)
+static int tegra_timer_set_periodic(struct clock_event_device *evt)
 {
-	timer_shutdown(evt);
+	void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+	writel(TIMER_PTV_EN | TIMER_PTV_PER |
+	       ((timer_of_rate(to_timer_of(evt)) / HZ) - 1),
+	       reg_base + TIMER_PTV);
+
 	return 0;
 }
 
-static int tegra_timer_set_periodic(struct clock_event_device *evt)
+static irqreturn_t tegra_timer_isr(int irq, void *dev_id)
+{
+	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+	void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+	writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+	evt->event_handler(evt);
+
+	return IRQ_HANDLED;
+}
+
+static void tegra_timer_suspend(struct clock_event_device *evt)
+{
+	void __iomem *reg_base = timer_of_base(to_timer_of(evt));
+
+	writel(TIMER_PCR_INTR_CLR, reg_base + TIMER_PCR);
+}
+
+static void tegra_timer_resume(struct clock_event_device *evt)
+{
+	writel(usec_config, timer_reg_base + TIMERUS_USEC_CFG);
+}
+
+#ifdef CONFIG_ARM64
+static DEFINE_PER_CPU(struct timer_of, tegra_to) = {
+	.flags = TIMER_OF_CLOCK | TIMER_OF_BASE,
+
+	.clkevt = {
+		.name = "tegra_timer",
+		.rating = 460,
+		.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+		.set_next_event = tegra_timer_set_next_event,
+		.set_state_shutdown = tegra_timer_shutdown,
+		.set_state_periodic = tegra_timer_set_periodic,
+		.set_state_oneshot = tegra_timer_shutdown,
+		.tick_resume = tegra_timer_shutdown,
+		.suspend = tegra_timer_suspend,
+		.resume = tegra_timer_resume,
+	},
+};
+
+static int tegra_timer_setup(unsigned int cpu)
 {
-	u32 reg = 0xC0000000 | ((1000000 / HZ) - 1);
+	struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+	irq_force_affinity(to->clkevt.irq, cpumask_of(cpu));
+	enable_irq(to->clkevt.irq);
+
+	clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+					1, /* min */
+					0x1fffffff); /* 29 bits */
 
-	timer_shutdown(evt);
-	timer_writel(reg, TIMER3_BASE + TIMER_PTV);
 	return 0;
 }
 
-static struct clock_event_device tegra_clockevent = {
-	.name			= "timer0",
-	.rating			= 300,
-	.features		= CLOCK_EVT_FEAT_ONESHOT |
-				  CLOCK_EVT_FEAT_PERIODIC |
-				  CLOCK_EVT_FEAT_DYNIRQ,
-	.set_next_event		= tegra_timer_set_next_event,
-	.set_state_shutdown	= tegra_timer_shutdown,
-	.set_state_periodic	= tegra_timer_set_periodic,
-	.set_state_oneshot	= tegra_timer_shutdown,
-	.tick_resume		= tegra_timer_shutdown,
+static int tegra_timer_stop(unsigned int cpu)
+{
+	struct timer_of *to = per_cpu_ptr(&tegra_to, cpu);
+
+	to->clkevt.set_state_shutdown(&to->clkevt);
+	disable_irq_nosync(to->clkevt.irq);
+
+	return 0;
+}
+#else /* CONFIG_ARM */
+static struct timer_of tegra_to = {
+	.flags = TIMER_OF_CLOCK | TIMER_OF_BASE | TIMER_OF_IRQ,
+
+	.clkevt = {
+		.name = "tegra_timer",
+		.rating	= 300,
+		.features = CLOCK_EVT_FEAT_ONESHOT |
+			    CLOCK_EVT_FEAT_PERIODIC |
+			    CLOCK_EVT_FEAT_DYNIRQ,
+		.set_next_event	= tegra_timer_set_next_event,
+		.set_state_shutdown = tegra_timer_shutdown,
+		.set_state_periodic = tegra_timer_set_periodic,
+		.set_state_oneshot = tegra_timer_shutdown,
+		.tick_resume = tegra_timer_shutdown,
+		.suspend = tegra_timer_suspend,
+		.resume = tegra_timer_resume,
+		.cpumask = cpu_possible_mask,
+	},
+
+	.of_irq = {
+		.index = 2,
+		.flags = IRQF_TIMER | IRQF_TRIGGER_HIGH,
+		.handler = tegra_timer_isr,
+	},
 };
 
 static u64 notrace tegra_read_sched_clock(void)
 {
-	return timer_readl(TIMERUS_CNTR_1US);
+	return readl(timer_reg_base + TIMERUS_CNTR_1US);
+}
+
+static unsigned long tegra_delay_timer_read_counter_long(void)
+{
+	return readl(timer_reg_base + TIMERUS_CNTR_1US);
 }
 
 /*
@@ -143,100 +233,155 @@ static void tegra_read_persistent_clock64(struct timespec64 *ts)
 	timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC);
 	*ts = persistent_ts;
 }
+#endif
 
-static unsigned long tegra_delay_timer_read_counter_long(void)
-{
-	return readl(timer_reg_base + TIMERUS_CNTR_1US);
-}
-
-static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
-	timer_writel(1<<30, TIMER3_BASE + TIMER_PCR);
-	evt->event_handler(evt);
-	return IRQ_HANDLED;
-}
-
-static struct irqaction tegra_timer_irq = {
-	.name		= "timer0",
-	.flags		= IRQF_TIMER | IRQF_TRIGGER_HIGH,
-	.handler	= tegra_timer_interrupt,
-	.dev_id		= &tegra_clockevent,
-};
-
-static int __init tegra20_init_timer(struct device_node *np)
+static int tegra_timer_common_init(struct device_node *np, struct timer_of *to)
 {
-	struct clk *clk;
-	unsigned long rate;
-	int ret;
-
-	timer_reg_base = of_iomap(np, 0);
-	if (!timer_reg_base) {
-		pr_err("Can't map timer registers\n");
-		return -ENXIO;
-	}
+	int ret = 0;
 
-	tegra_timer_irq.irq = irq_of_parse_and_map(np, 2);
-	if (tegra_timer_irq.irq <= 0) {
-		pr_err("Failed to map timer IRQ\n");
-		return -EINVAL;
-	}
+	ret = timer_of_init(np, to);
+	if (ret < 0)
+		goto out;
 
-	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk)) {
-		pr_warn("Unable to get timer clock. Assuming 12Mhz input clock.\n");
-		rate = 12000000;
-	} else {
-		clk_prepare_enable(clk);
-		rate = clk_get_rate(clk);
-	}
+	timer_reg_base = timer_of_base(to);
 
-	switch (rate) {
+	/*
+	 * Configure microsecond timers to have 1MHz clock
+	 * Config register is 0xqqww, where qq is "dividend", ww is "divisor"
+	 * Uses n+1 scheme
+	 */
+	switch (timer_of_rate(to)) {
 	case 12000000:
-		timer_writel(0x000b, TIMERUS_USEC_CFG);
+		usec_config = 0x000b; /* (11+1)/(0+1) */
+		break;
+	case 12800000:
+		usec_config = 0x043f; /* (63+1)/(4+1) */
 		break;
 	case 13000000:
-		timer_writel(0x000c, TIMERUS_USEC_CFG);
+		usec_config = 0x000c; /* (12+1)/(0+1) */
+		break;
+	case 16800000:
+		usec_config = 0x0453; /* (83+1)/(4+1) */
 		break;
 	case 19200000:
-		timer_writel(0x045f, TIMERUS_USEC_CFG);
+		usec_config = 0x045f; /* (95+1)/(4+1) */
 		break;
 	case 26000000:
-		timer_writel(0x0019, TIMERUS_USEC_CFG);
+		usec_config = 0x0019; /* (25+1)/(0+1) */
+		break;
+	case 38400000:
+		usec_config = 0x04bf; /* (191+1)/(4+1) */
+		break;
+	case 48000000:
+		usec_config = 0x002f; /* (47+1)/(0+1) */
 		break;
 	default:
-		WARN(1, "Unknown clock rate");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	writel(usec_config, timer_of_base(to) + TIMERUS_USEC_CFG);
+
+out:
+	return ret;
+}
+
+#ifdef CONFIG_ARM64
+static int __init tegra_init_timer(struct device_node *np)
+{
+	int cpu, ret = 0;
+	struct timer_of *to;
+
+	to = this_cpu_ptr(&tegra_to);
+	ret = tegra_timer_common_init(np, to);
+	if (ret < 0)
+		goto out;
+
+	for_each_possible_cpu(cpu) {
+		struct timer_of *cpu_to;
+
+		cpu_to = per_cpu_ptr(&tegra_to, cpu);
+		cpu_to->of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(cpu);
+		cpu_to->of_clk.rate = timer_of_rate(to);
+		cpu_to->clkevt.cpumask = cpumask_of(cpu);
+		cpu_to->clkevt.irq =
+			irq_of_parse_and_map(np, IRQ_IDX_FOR_CPU(cpu));
+		if (!cpu_to->clkevt.irq) {
+			pr_err("%s: can't map IRQ for CPU%d\n",
+			       __func__, cpu);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		irq_set_status_flags(cpu_to->clkevt.irq, IRQ_NOAUTOEN);
+		ret = request_irq(cpu_to->clkevt.irq, tegra_timer_isr,
+				  IRQF_TIMER | IRQF_NOBALANCING,
+				  cpu_to->clkevt.name, &cpu_to->clkevt);
+		if (ret) {
+			pr_err("%s: cannot setup irq %d for CPU%d\n",
+				__func__, cpu_to->clkevt.irq, cpu);
+			ret = -EINVAL;
+			goto out_irq;
+		}
+	}
+
+	cpuhp_setup_state(CPUHP_AP_TEGRA_TIMER_STARTING,
+			  "AP_TEGRA_TIMER_STARTING", tegra_timer_setup,
+			  tegra_timer_stop);
+
+	return ret;
+out_irq:
+	for_each_possible_cpu(cpu) {
+		struct timer_of *cpu_to;
+
+		cpu_to = per_cpu_ptr(&tegra_to, cpu);
+		if (cpu_to->clkevt.irq) {
+			free_irq(cpu_to->clkevt.irq, &cpu_to->clkevt);
+			irq_dispose_mapping(cpu_to->clkevt.irq);
+		}
 	}
+out:
+	timer_of_cleanup(to);
+	return ret;
+}
+#else /* CONFIG_ARM */
+static int __init tegra_init_timer(struct device_node *np)
+{
+	int ret = 0;
+
+	ret = tegra_timer_common_init(np, &tegra_to);
+	if (ret < 0)
+		goto out;
 
-	sched_clock_register(tegra_read_sched_clock, 32, 1000000);
+	tegra_to.of_base.base = timer_reg_base + TIMER_BASE_FOR_CPU(0);
+	tegra_to.of_clk.rate = 1000000; /* microsecond timer */
 
+	sched_clock_register(tegra_read_sched_clock, 32,
+			     timer_of_rate(&tegra_to));
 	ret = clocksource_mmio_init(timer_reg_base + TIMERUS_CNTR_1US,
-				    "timer_us", 1000000, 300, 32,
-				    clocksource_mmio_readl_up);
+				    "timer_us", timer_of_rate(&tegra_to),
+				    300, 32, clocksource_mmio_readl_up);
 	if (ret) {
 		pr_err("Failed to register clocksource\n");
-		return ret;
+		goto out;
 	}
 
 	tegra_delay_timer.read_current_timer =
 			tegra_delay_timer_read_counter_long;
-	tegra_delay_timer.freq = 1000000;
+	tegra_delay_timer.freq = timer_of_rate(&tegra_to);
 	register_current_timer_delay(&tegra_delay_timer);
 
-	ret = setup_irq(tegra_timer_irq.irq, &tegra_timer_irq);
-	if (ret) {
-		pr_err("Failed to register timer IRQ: %d\n", ret);
-		return ret;
-	}
+	clockevents_config_and_register(&tegra_to.clkevt,
+					timer_of_rate(&tegra_to),
+					0x1,
+					0x1fffffff);
 
-	tegra_clockevent.cpumask = cpu_possible_mask;
-	tegra_clockevent.irq = tegra_timer_irq.irq;
-	clockevents_config_and_register(&tegra_clockevent, 1000000,
-					0x1, 0x1fffffff);
+	return ret;
+out:
+	timer_of_cleanup(&tegra_to);
 
-	return 0;
+	return ret;
 }
-TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra20_init_timer);
 
 static int __init tegra20_init_rtc(struct device_node *np)
 {
@@ -261,3 +406,6 @@ static int __init tegra20_init_rtc(struct device_node *np)
 	return register_persistent_clock(tegra_read_persistent_clock64);
 }
 TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
+#endif
+TIMER_OF_DECLARE(tegra210_timer, "nvidia,tegra210-timer", tegra_init_timer);
+TIMER_OF_DECLARE(tegra20_timer, "nvidia,tegra20-timer", tegra_init_timer);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index fd586d0301e7..e78281d07b70 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -121,6 +121,7 @@ enum cpuhp_state {
 	CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
 	CPUHP_AP_ARM_TWD_STARTING,
 	CPUHP_AP_QCOM_TIMER_STARTING,
+	CPUHP_AP_TEGRA_TIMER_STARTING,
 	CPUHP_AP_ARMADA_TIMER_STARTING,
 	CPUHP_AP_MARCO_TIMER_STARTING,
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
-- 
cgit v1.2.3


From 70fa3a9699cbc7aa1e93a5fddb9b9105d2b3acda Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 22 Feb 2019 23:51:44 +0100
Subject: net: phy: add genphy_c45_read_status

Similar to genphy_read_status() for Clause 22 add a generic read_status
function for Clause 45.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/phy.h       |  1 +
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 2d1ba43e14ec..c86bef005ef2 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -454,6 +454,39 @@ int genphy_c45_pma_read_abilities(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_pma_read_abilities);
 
+/**
+ * genphy_c45_read_status - read PHY status
+ * @phydev: target phy_device struct
+ *
+ * Reads status from PHY and sets phy_device members accordingly.
+ */
+int genphy_c45_read_status(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = genphy_c45_read_link(phydev);
+	if (ret)
+		return ret;
+
+	phydev->speed = SPEED_UNKNOWN;
+	phydev->duplex = DUPLEX_UNKNOWN;
+	phydev->pause = 0;
+	phydev->asym_pause = 0;
+
+	if (phydev->autoneg == AUTONEG_ENABLE) {
+		ret = genphy_c45_read_lpa(phydev);
+		if (ret)
+			return ret;
+
+		phy_resolve_aneg_linkmode(phydev);
+	} else {
+		ret = genphy_c45_read_pma(phydev);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(genphy_c45_read_status);
+
 /* The gen10g_* functions are the old Clause 45 stub */
 
 int gen10g_config_aneg(struct phy_device *phydev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 8e9fc576472b..a05ba366dae4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1107,6 +1107,7 @@ int genphy_c45_an_config_aneg(struct phy_device *phydev);
 int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 int genphy_c45_pma_read_abilities(struct phy_device *phydev);
+int genphy_c45_read_status(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From a75d1d01477d5235aac613c167b0653441281ad7 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 23 Feb 2019 22:25:12 -0800
Subject: trace: events: neigh_update: print new state in string format

Also, extend neigh_state_str to include neigh dummy states
noarp and permanent

Fixes: 9c03b282badb ("trace: events: add a few neigh tracepoints")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/neigh.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h
index ed10353eee73..0bdb08557763 100644
--- a/include/trace/events/neigh.h
+++ b/include/trace/events/neigh.h
@@ -16,7 +16,9 @@
 		{ NUD_STALE, "stale" },			\
 		{ NUD_DELAY, "delay" },			\
 		{ NUD_PROBE, "probe" },			\
-		{ NUD_FAILED, "failed" })
+		{ NUD_FAILED, "failed" },		\
+		{ NUD_NOARP, "noarp" },			\
+		{ NUD_PERMANENT, "permanent"})
 
 TRACE_EVENT(neigh_update,
 
@@ -90,7 +92,7 @@ TRACE_EVENT(neigh_update,
 	TP_printk("family %d dev %s lladdr %s flags %02x nud_state %s type %02x "
 		  "dead %d refcnt %d primary_key4 %pI4 primary_key6 %pI6c "
 		  "confirmed %lu updated %lu used %lu new_lladdr %s "
-		  "new_state %02x update_flags %02x pid %d",
+		  "new_state %s update_flags %02x pid %d",
 		  __entry->family, __get_str(dev),
 		  __print_hex_str(__entry->lladdr, __entry->lladdr_len),
 		  __entry->flags, neigh_state_str(__entry->nud_state),
@@ -98,7 +100,7 @@ TRACE_EVENT(neigh_update,
 		  __entry->primary_key4, __entry->primary_key6,
 		  __entry->confirmed, __entry->updated, __entry->used,
 		  __print_hex_str(__entry->new_lladdr, __entry->lladdr_len),
-		  __entry->new_state,
+		  neigh_state_str(__entry->new_state),
 		  __entry->update_flags, __entry->pid)
 );
 
-- 
cgit v1.2.3


From 663586c0a8929db81e617c775823efb9d65f2bc2 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Wed, 7 Nov 2018 23:16:19 +0100
Subject: ubi: Expose the bitrot interface

Using UBI_IOCRPEB and UBI_IOCSPEB userspace can force
reading and scrubbing of PEBs.

In case of bitflips UBI will automatically take action
and move data to a different PEB.
This interface allows a daemon to foster your NAND.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/cdev.c      |  30 +++++++++
 drivers/mtd/ubi/ubi.h       |   1 +
 drivers/mtd/ubi/wl.c        | 144 ++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/mtd/ubi-user.h |   5 ++
 4 files changed, 180 insertions(+)

(limited to 'include')

diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 22547d7a84ea..947a8adbc799 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -974,6 +974,36 @@ static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 
+	/* Check a specific PEB for bitflips and scrub it if needed */
+	case UBI_IOCRPEB:
+	{
+		int pnum;
+
+		err = get_user(pnum, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = ubi_bitflip_check(ubi, pnum, 0);
+		break;
+	}
+
+	/* Force scrubbing for a specific PEB */
+	case UBI_IOCSPEB:
+	{
+		int pnum;
+
+		err = get_user(pnum, (__user int32_t *)argp);
+		if (err) {
+			err = -EFAULT;
+			break;
+		}
+
+		err = ubi_bitflip_check(ubi, pnum, 1);
+		break;
+	}
+
 	default:
 		err = -ENOTTY;
 		break;
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index d47b9e436e67..a1b9e764d489 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -929,6 +929,7 @@ int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *used_e,
 int ubi_is_erase_work(struct ubi_work *wrk);
 void ubi_refill_pools(struct ubi_device *ubi);
 int ubi_ensure_anchor_pebs(struct ubi_device *ubi);
+int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force_scrub);
 
 /* io.c */
 int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset,
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index ca1b31385eb5..40f838d54b0f 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1440,6 +1440,150 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum)
 	return err;
 }
 
+static bool scrub_possible(struct ubi_device *ubi, struct ubi_wl_entry *e)
+{
+	if (in_wl_tree(e, &ubi->scrub))
+		return false;
+	else if (in_wl_tree(e, &ubi->erroneous))
+		return false;
+	else if (ubi->move_from == e)
+		return false;
+	else if (ubi->move_to == e)
+		return false;
+
+	return true;
+}
+
+/**
+ * ubi_bitflip_check - Check an eraseblock for bitflips and scrub it if needed.
+ * @ubi: UBI device description object
+ * @pnum: the physical eraseblock to schedule
+ * @force: dont't read the block, assume bitflips happened and take action.
+ *
+ * This function reads the given eraseblock and checks if bitflips occured.
+ * In case of bitflips, the eraseblock is scheduled for scrubbing.
+ * If scrubbing is forced with @force, the eraseblock is not read,
+ * but scheduled for scrubbing right away.
+ *
+ * Returns:
+ * %EINVAL, PEB is out of range
+ * %ENOENT, PEB is no longer used by UBI
+ * %EBUSY, PEB cannot be checked now or a check is currently running on it
+ * %EAGAIN, bit flips happened but scrubbing is currently not possible
+ * %EUCLEAN, bit flips happened and PEB is scheduled for scrubbing
+ * %0, no bit flips detected
+ */
+int ubi_bitflip_check(struct ubi_device *ubi, int pnum, int force)
+{
+	int err;
+	struct ubi_wl_entry *e;
+
+	if (pnum < 0 || pnum >= ubi->peb_count) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Pause all parallel work, otherwise it can happen that the
+	 * erase worker frees a wl entry under us.
+	 */
+	down_write(&ubi->work_sem);
+
+	/*
+	 * Make sure that the wl entry does not change state while
+	 * inspecting it.
+	 */
+	spin_lock(&ubi->wl_lock);
+	e = ubi->lookuptbl[pnum];
+	if (!e) {
+		spin_unlock(&ubi->wl_lock);
+		err = -ENOENT;
+		goto out_resume;
+	}
+
+	/*
+	 * Does it make sense to check this PEB?
+	 */
+	if (!scrub_possible(ubi, e)) {
+		spin_unlock(&ubi->wl_lock);
+		err = -EBUSY;
+		goto out_resume;
+	}
+	spin_unlock(&ubi->wl_lock);
+
+	if (!force) {
+		mutex_lock(&ubi->buf_mutex);
+		err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size);
+		mutex_unlock(&ubi->buf_mutex);
+	}
+
+	if (err == UBI_IO_BITFLIPS || force) {
+		/*
+		 * Okay, bit flip happened, let's figure out what we can do.
+		 */
+		spin_lock(&ubi->wl_lock);
+
+		/*
+		 * Recheck. We released wl_lock, UBI might have killed the
+		 * wl entry under us.
+		 */
+		e = ubi->lookuptbl[pnum];
+		if (!e) {
+			spin_unlock(&ubi->wl_lock);
+			err = -ENOENT;
+			goto out_resume;
+		}
+
+		/*
+		 * Need to re-check state
+		 */
+		if (!scrub_possible(ubi, e)) {
+			spin_unlock(&ubi->wl_lock);
+			err = -EBUSY;
+			goto out_resume;
+		}
+
+		if (in_pq(ubi, e)) {
+			prot_queue_del(ubi, e->pnum);
+			wl_tree_add(e, &ubi->scrub);
+			spin_unlock(&ubi->wl_lock);
+
+			err = ensure_wear_leveling(ubi, 1);
+		} else if (in_wl_tree(e, &ubi->used)) {
+			rb_erase(&e->u.rb, &ubi->used);
+			wl_tree_add(e, &ubi->scrub);
+			spin_unlock(&ubi->wl_lock);
+
+			err = ensure_wear_leveling(ubi, 1);
+		} else if (in_wl_tree(e, &ubi->free)) {
+			rb_erase(&e->u.rb, &ubi->free);
+			ubi->free_count--;
+			spin_unlock(&ubi->wl_lock);
+
+			/*
+			 * This PEB is empty we can schedule it for
+			 * erasure right away. No wear leveling needed.
+			 */
+			err = schedule_erase(ubi, e, UBI_UNKNOWN, UBI_UNKNOWN,
+					     force ? 0 : 1, true);
+		} else {
+			spin_unlock(&ubi->wl_lock);
+			err = -EAGAIN;
+		}
+
+		if (!err && !force)
+			err = -EUCLEAN;
+	} else {
+		err = 0;
+	}
+
+out_resume:
+	up_write(&ubi->work_sem);
+out:
+
+	return err;
+}
+
 /**
  * tree_destroy - destroy an RB-tree.
  * @ubi: UBI device description object
diff --git a/include/uapi/mtd/ubi-user.h b/include/uapi/mtd/ubi-user.h
index aad3b6201fc0..b69e9ba6742b 100644
--- a/include/uapi/mtd/ubi-user.h
+++ b/include/uapi/mtd/ubi-user.h
@@ -171,6 +171,11 @@
 /* Re-name volumes */
 #define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
 
+/* Read the specified PEB and scrub it if there are bitflips */
+#define UBI_IOCRPEB _IOW(UBI_IOC_MAGIC, 4, __s32)
+/* Force scrubbing on the specified PEB */
+#define UBI_IOCSPEB _IOW(UBI_IOC_MAGIC, 5, __s32)
+
 /* ioctl commands of the UBI control character device */
 
 #define UBI_CTRL_IOC_MAGIC 'o'
-- 
cgit v1.2.3


From 93b6604c5a669d84e45fe5129294875bf82eb1ff Mon Sep 17 00:00:00 2001
From: Jim Broadus <jbroadus@gmail.com>
Date: Tue, 19 Feb 2019 11:30:27 -0800
Subject: i2c: Allow recovery of the initial IRQ by an I2C client device.

A previous change allowed I2C client devices to discover new IRQs upon
reprobe by clearing the IRQ in i2c_device_remove. However, if an IRQ was
assigned in i2c_new_device, that information is lost.

For example, the touchscreen and trackpad devices on a Dell Inspiron laptop
are I2C devices whose IRQs are defined by ACPI extended IRQ types. The
client device structures are initialized during an ACPI walk. After
removing the i2c_hid device, modprobe fails.

This change caches the initial IRQ value in i2c_new_device and then resets
the client device IRQ to the initial value in i2c_device_remove.

Fixes: 6f108dd70d30 ("i2c: Clear client->irq in i2c_device_remove")
Signed-off-by: Jim Broadus <jbroadus@gmail.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Reviewed-by: Charles Keepax <ckeepax@opensource.cirrus.com>
[wsa: this is an easy to backport fix for the regression. We will
refactor the code to handle irq assignments better in general.]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core-base.c | 9 +++++----
 include/linux/i2c.h         | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 926ca0a7477f..cb6c5cb0df0b 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -430,7 +430,7 @@ static int i2c_device_remove(struct device *dev)
 	dev_pm_clear_wake_irq(&client->dev);
 	device_init_wakeup(&client->dev, false);
 
-	client->irq = 0;
+	client->irq = client->init_irq;
 
 	return status;
 }
@@ -741,10 +741,11 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info)
 	client->flags = info->flags;
 	client->addr = info->addr;
 
-	client->irq = info->irq;
-	if (!client->irq)
-		client->irq = i2c_dev_irq_from_resources(info->resources,
+	client->init_irq = info->irq;
+	if (!client->init_irq)
+		client->init_irq = i2c_dev_irq_from_resources(info->resources,
 							 info->num_resources);
+	client->irq = client->init_irq;
 
 	strlcpy(client->name, info->type, sizeof(client->name));
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 1f45331924d6..383510b4f083 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -333,6 +333,7 @@ struct i2c_client {
 	char name[I2C_NAME_SIZE];
 	struct i2c_adapter *adapter;	/* the adapter we sit on	*/
 	struct device dev;		/* the device structure		*/
+	int init_irq;			/* irq set at initialization	*/
 	int irq;			/* irq issued by device		*/
 	struct list_head detected;
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
-- 
cgit v1.2.3


From fb7e160019f4abb4082740bfeb27a38f6389c745 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Nov 2018 16:37:38 +0100
Subject: fs: add an iopoll method to struct file_operations

This new methods is used to explicitly poll for I/O completion for an
iocb.  It must be called for any iocb submitted asynchronously (that
is with a non-null ki_complete) which has the IOCB_HIPRI flag set.

The method is assisted by a new ki_cookie field in struct iocb to store
the polling cookie.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/filesystems/vfs.txt | 3 +++
 include/linux/fs.h                | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 8dc8e9c2913f..761c6fd24a53 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -857,6 +857,7 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+	int (*iopoll)(struct kiocb *kiocb, bool spin);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
@@ -902,6 +903,8 @@ otherwise noted.
 
   write_iter: possibly asynchronous write with iov_iter as source
 
+  iopoll: called when aio wants to poll for completions on HIPRI iocbs
+
   iterate: called when the VFS needs to read the directory contents
 
   iterate_shared: called when the VFS needs to read the directory contents
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29d8e2cfed0e..dedcc2e9265c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -310,6 +310,7 @@ struct kiocb {
 	int			ki_flags;
 	u16			ki_hint;
 	u16			ki_ioprio; /* See linux/ioprio.h */
+	unsigned int		ki_cookie; /* for ->iopoll */
 } __randomize_layout;
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -1787,6 +1788,7 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+	int (*iopoll)(struct kiocb *kiocb, bool spin);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
-- 
cgit v1.2.3


From 0bbb280d7b767e7c86a5adfc87c76a6f09ab0423 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 21 Dec 2018 09:10:46 -0700
Subject: block: add bio_set_polled() helper

For the upcoming async polled IO, we can't sleep allocating requests.
If we do, then we introduce a deadlock where the submitter already
has async polled IO in-flight, but can't wait for them to complete
since polled requests must be active found and reaped.

Utilize the helper in the blockdev DIRECT_IO code.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/block_dev.c      |  4 ++--
 include/linux/bio.h | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1fe498b08f1b..e9faa52bb489 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -248,7 +248,7 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 		task_io_account_write(ret);
 	}
 	if (iocb->ki_flags & IOCB_HIPRI)
-		bio.bi_opf |= REQ_HIPRI;
+		bio_set_polled(&bio, iocb);
 
 	qc = submit_bio(&bio);
 	for (;;) {
@@ -419,7 +419,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			bool polled = false;
 
 			if (iocb->ki_flags & IOCB_HIPRI) {
-				bio->bi_opf |= REQ_HIPRI;
+				bio_set_polled(bio, iocb);
 				polled = true;
 			}
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index bdd11d4c2f05..bb6090aa165d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -826,5 +826,19 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
+/*
+ * Mark a bio as polled. Note that for async polled IO, the caller must
+ * expect -EWOULDBLOCK if we cannot allocate a request (or other resources).
+ * We cannot block waiting for requests on polled IO, as those completions
+ * must be found by the caller. This is different than IRQ driven IO, where
+ * it's safe to wait for IO to complete.
+ */
+static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
+{
+	bio->bi_opf |= REQ_HIPRI;
+	if (!is_sync_kiocb(kiocb))
+		bio->bi_opf |= REQ_NOWAIT;
+}
+
 #endif /* CONFIG_BLOCK */
 #endif /* __LINUX_BIO_H */
-- 
cgit v1.2.3


From 81214bab582eeda068e7904d57b6a3095e8f3855 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 4 Dec 2018 11:12:08 -0700
Subject: iomap: wire up the iopoll method

Store the request queue the last bio was submitted to in the iocb
private data in addition to the cookie so that we find the right block
device.  Also refactor the common direct I/O bio submission code into a
nice little helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Modified to use bio_set_polled().

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/gfs2/file.c        |  2 ++
 fs/iomap.c            | 43 ++++++++++++++++++++++++++++---------------
 fs/xfs/xfs_file.c     |  1 +
 include/linux/iomap.h |  1 +
 4 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a2dea5bc0427..58a768e59712 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1280,6 +1280,7 @@ const struct file_operations gfs2_file_fops = {
 	.llseek		= gfs2_llseek,
 	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
+	.iopoll		= iomap_dio_iopoll,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
 	.open		= gfs2_open,
@@ -1310,6 +1311,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 	.llseek		= gfs2_llseek,
 	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
+	.iopoll		= iomap_dio_iopoll,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.mmap		= gfs2_mmap,
 	.open		= gfs2_open,
diff --git a/fs/iomap.c b/fs/iomap.c
index 6982d3d2bcc6..97cb9d486a7d 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1464,6 +1464,28 @@ struct iomap_dio {
 	};
 };
 
+int iomap_dio_iopoll(struct kiocb *kiocb, bool spin)
+{
+	struct request_queue *q = READ_ONCE(kiocb->private);
+
+	if (!q)
+		return 0;
+	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin);
+}
+EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
+
+static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
+		struct bio *bio)
+{
+	atomic_inc(&dio->ref);
+
+	if (dio->iocb->ki_flags & IOCB_HIPRI)
+		bio_set_polled(bio, dio->iocb);
+
+	dio->submit.last_queue = bdev_get_queue(iomap->bdev);
+	dio->submit.cookie = submit_bio(bio);
+}
+
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
 	struct kiocb *iocb = dio->iocb;
@@ -1577,7 +1599,7 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	}
 }
 
-static blk_qc_t
+static void
 iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 		unsigned len)
 {
@@ -1591,15 +1613,10 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos,
 	bio->bi_private = dio;
 	bio->bi_end_io = iomap_dio_bio_end_io;
 
-	if (dio->iocb->ki_flags & IOCB_HIPRI)
-		flags |= REQ_HIPRI;
-
 	get_page(page);
 	__bio_add_page(bio, page, len, 0);
 	bio_set_op_attrs(bio, REQ_OP_WRITE, flags);
-
-	atomic_inc(&dio->ref);
-	return submit_bio(bio);
+	iomap_dio_submit_bio(dio, iomap, bio);
 }
 
 static loff_t
@@ -1702,9 +1719,6 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 				bio_set_pages_dirty(bio);
 		}
 
-		if (dio->iocb->ki_flags & IOCB_HIPRI)
-			bio->bi_opf |= REQ_HIPRI;
-
 		iov_iter_advance(dio->submit.iter, n);
 
 		dio->size += n;
@@ -1712,11 +1726,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
 		copied += n;
 
 		nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES);
-
-		atomic_inc(&dio->ref);
-
-		dio->submit.last_queue = bdev_get_queue(iomap->bdev);
-		dio->submit.cookie = submit_bio(bio);
+		iomap_dio_submit_bio(dio, iomap, bio);
 	} while (nr_pages);
 
 	/*
@@ -1927,6 +1937,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (dio->flags & IOMAP_DIO_WRITE_FUA)
 		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
+	WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie);
+	WRITE_ONCE(iocb->private, dio->submit.last_queue);
+
 	/*
 	 * We are about to drop our additional submission reference, which
 	 * might be the last reference to the dio.  There are three three
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e47425071e65..60c2da41f0fc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1203,6 +1203,7 @@ const struct file_operations xfs_file_operations = {
 	.write_iter	= xfs_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+	.iopoll		= iomap_dio_iopoll,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 9a4258154b25..0fefb5455bda 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -162,6 +162,7 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
 		unsigned flags);
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP
 struct file;
-- 
cgit v1.2.3


From e646e0a50cfadae315f2db05e07a2ec072ce8d9c Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:14 -0800
Subject: ACPICA: ACPI 6.3: MADT: add support for statistical profiling in GICC

ACPICA commit 31b184052a986dc8d80c878edeca574d4ffa1cf5

Link: https://github.com/acpica/acpica/commit/31b18405
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index f876a3776c75..a8e78863ac57 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -623,7 +623,7 @@ struct acpi_madt_local_x2apic_nmi {
 	u8 reserved[3];		/* reserved - must be zero */
 };
 
-/* 11: Generic Interrupt (ACPI 5.0 + ACPI 6.0 changes) */
+/* 11: Generic interrupt - GICC (ACPI 5.0 + ACPI 6.0 + ACPI 6.3 changes) */
 
 struct acpi_madt_generic_interrupt {
 	struct acpi_subtable_header header;
@@ -641,7 +641,8 @@ struct acpi_madt_generic_interrupt {
 	u64 gicr_base_address;
 	u64 arm_mpidr;
 	u8 efficiency_class;
-	u8 reserved2[3];
+	u8 reserved2[1];
+	u16 spe_interrupt;	/* ACPI 6.3 */
 };
 
 /* Masks for Flags field above */
-- 
cgit v1.2.3


From ddcfd66c1d4b3f7b91e1e2e6ac0d8f9dd7855dca Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Tue, 19 Feb 2019 11:33:40 -0800
Subject: ACPICA: ACPI 6.3: add Error Disconnect Recover Notification value

ACPICA commit 205ac8fc721073f1e609df963b14ef2237aeba73

Link: https://github.com/acpica/acpica/commit/205ac8fc
Reviewed-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utdecode.c | 8 +++++---
 include/acpi/actypes.h         | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index 6be69c28c34e..ad9f77eb554f 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -430,8 +430,10 @@ static const char *acpi_gbl_generic_notify[ACPI_GENERIC_NOTIFY_MAX + 1] = {
 								/* 0C */ "Reserved (was previously Shutdown Request)",
 								/* Reserved in ACPI 6.0 */
 	/* 0D */ "System Resource Affinity Update",
-								/* 0E */ "Heterogeneous Memory Attributes Update"
+								/* 0E */ "Heterogeneous Memory Attributes Update",
 								/* ACPI 6.2 */
+						/* 0F */ "Error Disconnect Recover"
+						/* ACPI 6.3 */
 };
 
 static const char *acpi_gbl_device_notify[5] = {
@@ -461,13 +463,13 @@ static const char *acpi_gbl_thermal_notify[5] = {
 const char *acpi_ut_get_notify_name(u32 notify_value, acpi_object_type type)
 {
 
-	/* 00 - 0D are "common to all object types" (from ACPI Spec) */
+	/* 00 - 0F are "common to all object types" (from ACPI Spec) */
 
 	if (notify_value <= ACPI_GENERIC_NOTIFY_MAX) {
 		return (acpi_gbl_generic_notify[notify_value]);
 	}
 
-	/* 0E - 7F are reserved */
+	/* 10 - 7F are reserved */
 
 	if (notify_value <= ACPI_MAX_SYS_NOTIFY) {
 		return ("Reserved");
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 4c1395a6d82a..f73382e82c26 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -617,8 +617,9 @@ typedef u64 acpi_integer;
 #define ACPI_NOTIFY_SHUTDOWN_REQUEST    (u8) 0x0C
 #define ACPI_NOTIFY_AFFINITY_UPDATE     (u8) 0x0D
 #define ACPI_NOTIFY_MEMORY_UPDATE       (u8) 0x0E
+#define ACPI_NOTIFY_DISCONNECT_RECOVER  (u8) 0x0F
 
-#define ACPI_GENERIC_NOTIFY_MAX         0x0E
+#define ACPI_GENERIC_NOTIFY_MAX         0x0F
 #define ACPI_SPECIFIC_NOTIFY_MAX        0x84
 
 /*
-- 
cgit v1.2.3


From b5eab512e7cffb2bb37c4b342b5594e9e75fd486 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:16 -0800
Subject: ACPICA: ACPI 6.3: PPTT add additional fields in Processor Structure
 Flags

ACPICA commit c736ea34add19a3a07e0e398711847cd6b95affd

Link: https://github.com/acpica/acpica/commit/c736ea34
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl2.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index a8e78863ac57..6511c4ddec81 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -1474,8 +1474,11 @@ struct acpi_pptt_processor {
 
 /* Flags */
 
-#define ACPI_PPTT_PHYSICAL_PACKAGE          (1)	/* Physical package */
-#define ACPI_PPTT_ACPI_PROCESSOR_ID_VALID   (2)	/* ACPI Processor ID valid */
+#define ACPI_PPTT_PHYSICAL_PACKAGE          (1)
+#define ACPI_PPTT_ACPI_PROCESSOR_ID_VALID   (1<<1)
+#define ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD  (1<<2)	/* ACPI 6.3 */
+#define ACPI_PPTT_ACPI_LEAF_NODE            (1<<3)	/* ACPI 6.3 */
+#define ACPI_PPTT_ACPI_IDENTICAL            (1<<4)	/* ACPI 6.3 */
 
 /* 1: Cache Type Structure */
 
-- 
cgit v1.2.3


From 9a8d961f1ef835b0d338fbe13da03cb424e87ae5 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:17 -0800
Subject: ACPICA: ACPI 6.3: HMAT updates

ACPICA commit a216e8ca9f7c79f90788b193e2e61151b2c973c0

This change reserves several field and renames subtable 0 to
"memory proximity domain attributes"

Link: https://github.com/acpica/acpica/commit/a216e8ca
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 45b0f532f63b..e26d5cf30a1f 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1406,17 +1406,17 @@ struct acpi_hmat_structure {
  * HMAT Structures, correspond to Type in struct acpi_hmat_structure
  */
 
-/* 0: Memory subystem address range */
+/* 0: Memory proximity domain attributes */
 
-struct acpi_hmat_address_range {
+struct acpi_hmat_proximity_domain {
 	struct acpi_hmat_structure header;
 	u16 flags;
 	u16 reserved1;
 	u32 processor_PD;	/* Processor proximity domain */
 	u32 memory_PD;		/* Memory proximity domain */
 	u32 reserved2;
-	u64 physical_address_base;	/* Physical address range base */
-	u64 physical_address_length;	/* Physical address range length */
+	u64 reserved3;
+	u64 reserved4;
 };
 
 /* Masks for Flags field above */
-- 
cgit v1.2.3


From 8f5a14d0531002caf1c8978e8baddfec3ab2e824 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:18 -0800
Subject: ACPICA: ACPI 6.3: add GTDT Revision 3 support

ACPICA commit 2cd926fdf360062adcaac1349cb94136590c1c74

Link: https://github.com/acpica/acpica/commit/2cd926fd
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actbl1.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index e26d5cf30a1f..d37ddde8bf8f 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1001,6 +1001,11 @@ struct acpi_table_gtdt {
 #define ACPI_GTDT_INTERRUPT_POLARITY    (1<<1)
 #define ACPI_GTDT_ALWAYS_ON             (1<<2)
 
+struct acpi_gtdt_el2 {
+	u32 virtual_el2_timer_gsiv;
+	u32 virtual_el2_timer_flags;
+};
+
 /* Common GTDT subtable header */
 
 struct acpi_gtdt_header {
-- 
cgit v1.2.3


From c163f90cc8620716b08ac7987c4c4ebf16444fd5 Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Fri, 15 Feb 2019 13:36:19 -0800
Subject: ACPI/ACPICA: Trivial: fix spelling mistakes and fix whitespace
 formatting

Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/aclocal.h      |  2 +-
 drivers/acpi/acpica/acmacros.h     |  2 +-
 drivers/acpi/acpica/dbdisply.c     |  4 ++--
 drivers/acpi/acpica/dbnames.c      |  2 +-
 drivers/acpi/acpica/dbobject.c     |  2 +-
 drivers/acpi/acpica/dswload2.c     |  2 +-
 drivers/acpi/acpica/evgpe.c        |  2 +-
 drivers/acpi/acpica/evregion.c     |  2 +-
 drivers/acpi/acpica/evxfgpe.c      |  4 ++--
 drivers/acpi/acpica/exconvrt.c     |  2 +-
 drivers/acpi/acpica/exfield.c      |  7 ++++---
 drivers/acpi/acpica/exserial.c     |  2 +-
 drivers/acpi/acpica/exutils.c      |  2 +-
 drivers/acpi/acpica/nsload.c       |  2 +-
 drivers/acpi/acpica/nsutils.c      |  2 +-
 drivers/acpi/acpica/rsdumpinfo.c   | 14 +++++++-------
 drivers/acpi/acpica/rsirq.c        |  8 ++++----
 drivers/acpi/acpica/rsserial.c     | 10 +++++-----
 drivers/acpi/acpica/tbfadt.c       |  2 +-
 drivers/acpi/acpica/tbxface.c      |  4 ++--
 drivers/acpi/irq.c                 |  4 ++--
 drivers/acpi/pci_link.c            |  8 ++++----
 drivers/acpi/resource.c            |  4 ++--
 drivers/gpio/gpiolib-acpi.c        |  2 +-
 drivers/platform/x86/sony-laptop.c |  8 ++++----
 drivers/pnp/pnpacpi/rsparser.c     | 14 +++++++-------
 include/acpi/acconfig.h            |  2 +-
 include/acpi/acexcep.h             |  3 ++-
 include/acpi/acrestyp.h            | 14 +++++++-------
 include/acpi/actbl1.h              |  4 ++--
 include/acpi/actbl2.h              |  2 +-
 31 files changed, 72 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 18ec6627d674..a2dfbf6b004e 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -802,7 +802,7 @@ struct acpi_comment_addr_node {
 
 /*
  * File node - used for "Include" operator file stack and
- * depdendency tree for the -ca option
+ * dependency tree for the -ca option
  */
 struct acpi_file_node {
 	void *file;
diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h
index 99131cd4e389..283614e82a20 100644
--- a/drivers/acpi/acpica/acmacros.h
+++ b/drivers/acpi/acpica/acmacros.h
@@ -462,7 +462,7 @@
 #define ACPI_IS_OCTAL_DIGIT(d)              (((char)(d) >= '0') && ((char)(d) <= '7'))
 
 /*
- * Macors used for the ASL-/ASL+ converter utility
+ * Macros used for the ASL-/ASL+ converter utility
  */
 #ifdef ACPI_ASL_COMPILER
 
diff --git a/drivers/acpi/acpica/dbdisply.c b/drivers/acpi/acpica/dbdisply.c
index 9fcb8ec64681..30ab62b0fec8 100644
--- a/drivers/acpi/acpica/dbdisply.c
+++ b/drivers/acpi/acpica/dbdisply.c
@@ -237,7 +237,7 @@ void acpi_db_decode_and_display_object(char *target, char *output_type)
 
 		default:
 
-			/* Is not a recognizeable object */
+			/* Is not a recognizable object */
 
 			acpi_os_printf
 			    ("Not a known ACPI internal object, descriptor type %2.2X\n",
@@ -647,7 +647,7 @@ void acpi_db_display_object_type(char *object_arg)
  *
  * DESCRIPTION: Display the result of an AML opcode
  *
- * Note: Curently only displays the result object if we are single stepping.
+ * Note: Currently only displays the result object if we are single stepping.
  * However, this output may be useful in other contexts and could be enabled
  * to do so if needed.
  *
diff --git a/drivers/acpi/acpica/dbnames.c b/drivers/acpi/acpica/dbnames.c
index 992bd7b92540..004d34d9369b 100644
--- a/drivers/acpi/acpica/dbnames.c
+++ b/drivers/acpi/acpica/dbnames.c
@@ -904,7 +904,7 @@ acpi_db_bus_walk(acpi_handle obj_handle,
  *
  * RETURN:      None
  *
- * DESCRIPTION: Display info about system busses.
+ * DESCRIPTION: Display info about system buses.
  *
  ******************************************************************************/
 
diff --git a/drivers/acpi/acpica/dbobject.c b/drivers/acpi/acpica/dbobject.c
index a1c76bf21122..d220168dca01 100644
--- a/drivers/acpi/acpica/dbobject.c
+++ b/drivers/acpi/acpica/dbobject.c
@@ -243,7 +243,7 @@ acpi_db_display_internal_object(union acpi_operand_object *obj_desc,
 			acpi_os_printf("[%s] ",
 				       acpi_ut_get_reference_name(obj_desc));
 
-			/* Decode the refererence */
+			/* Decode the reference */
 
 			switch (obj_desc->reference.class) {
 			case ACPI_REFCLASS_LOCAL:
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 98cdc24c640f..935a8e2623e4 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -24,7 +24,7 @@ ACPI_MODULE_NAME("dswload2")
  * FUNCTION:    acpi_ds_load2_begin_op
  *
  * PARAMETERS:  walk_state      - Current state of the parse tree walk
- *              out_op          - Wher to return op if a new one is created
+ *              out_op          - Where to return op if a new one is created
  *
  * RETURN:      Status
  *
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index a5d435849d0d..62d3aa74277b 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -801,7 +801,7 @@ acpi_ev_gpe_dispatch(struct acpi_namespace_node *gpe_device,
 							      dispatch.handler->
 							      context);
 
-		/* If requested, clear (if level-triggered) and reenable the GPE */
+		/* If requested, clear (if level-triggered) and re-enable the GPE */
 
 		if (return_value & ACPI_REENABLE_GPE) {
 			(void)acpi_ev_finish_gpe(gpe_event_info);
diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c
index b683b3ad6baa..45dc797df05d 100644
--- a/drivers/acpi/acpica/evregion.c
+++ b/drivers/acpi/acpica/evregion.c
@@ -250,7 +250,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
 		/*
 		 * For handlers other than the default (supplied) handlers, we must
 		 * exit the interpreter because the handler *might* block -- we don't
-		 * know what it will do, so we can't hold the lock on the intepreter.
+		 * know what it will do, so we can't hold the lock on the interpreter.
 		 */
 		acpi_ex_exit_interpreter();
 	}
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index fb1d215dd288..30a083902f52 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -669,9 +669,9 @@ ACPI_EXPORT_SYMBOL(acpi_dispatch_gpe)
  *
  * RETURN:      Status
  *
- * DESCRIPTION: Clear and conditionally reenable a GPE. This completes the GPE
+ * DESCRIPTION: Clear and conditionally re-enable a GPE. This completes the GPE
  *              processing. Intended for use by asynchronous host-installed
- *              GPE handlers. The GPE is only reenabled if the enable_for_run bit
+ *              GPE handlers. The GPE is only re-enabled if the enable_for_run bit
  *              is set in the GPE info.
  *
  ******************************************************************************/
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index f5675f1b1928..ca2966bacb50 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -520,7 +520,7 @@ acpi_ex_convert_to_string(union acpi_operand_object * obj_desc,
 		for (i = 0; i < obj_desc->buffer.length; i++) {
 			if (base == 16) {
 
-				/* Emit 0x prefix for explict/implicit hex conversion */
+				/* Emit 0x prefix for explicit/implicit hex conversion */
 
 				*new_buf++ = '0';
 				*new_buf++ = 'x';
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 3760f96b19f9..d3d2dbfba680 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -41,7 +41,7 @@ const u8 acpi_protocol_lengths[] = {
 	0xFF			/* F - ATTRIB_RAW_PROCESS_BYTES */
 };
 
-#define PCC_MASTER_SUBSPACE	3
+#define PCC_MASTER_SUBSPACE     3
 
 /*
  * The following macros determine a given offset is a COMD field.
@@ -49,8 +49,8 @@ const u8 acpi_protocol_lengths[] = {
  * 2-byte COMD field at offset 4 and master subspaces (type 3) contains a 4-byte
  * COMD field starting at offset 12.
  */
-#define GENERIC_SUBSPACE_COMMAND(a)	(4 == a || a == 5)
-#define MASTER_SUBSPACE_COMMAND(a)	(12 <= a && a <= 15)
+#define GENERIC_SUBSPACE_COMMAND(a)     (4 == a || a == 5)
+#define MASTER_SUBSPACE_COMMAND(a)      (12 <= a && a <= 15)
 
 /*******************************************************************************
  *
@@ -319,6 +319,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		memcpy(obj_desc->field.region_obj->field.internal_pcc_buffer +
 		       obj_desc->field.base_byte_offset,
 		       source_desc->buffer.pointer, data_length);
+
 		if ((obj_desc->field.region_obj->region.address ==
 		     PCC_MASTER_SUBSPACE
 		     && MASTER_SUBSPACE_COMMAND(obj_desc->field.
diff --git a/drivers/acpi/acpica/exserial.c b/drivers/acpi/acpica/exserial.c
index 89bc0a2d8163..c5aa4b0deb70 100644
--- a/drivers/acpi/acpica/exserial.c
+++ b/drivers/acpi/acpica/exserial.c
@@ -21,7 +21,7 @@ ACPI_MODULE_NAME("exserial")
  * FUNCTION:    acpi_ex_read_gpio
  *
  * PARAMETERS:  obj_desc            - The named field to read
- *              buffer              - Where the return data is returnd
+ *              buffer              - Where the return data is returned
  *
  * RETURN:      Status
  *
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index a1f26b0ad449..75380be1c2ef 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -160,7 +160,7 @@ u8 acpi_ex_truncate_for32bit_table(union acpi_operand_object *obj_desc)
  * RETURN:      None
  *
  * DESCRIPTION: Obtain the ACPI hardware Global Lock, only if the field
- *              flags specifiy that it is to be obtained before field access.
+ *              flags specify that it is to be obtained before field access.
  *
  ******************************************************************************/
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index 6b90aa89f942..35fff5c75da1 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -75,7 +75,7 @@ acpi_ns_load_table(u32 table_index, struct acpi_namespace_node *node)
 		/*
 		 * On error, delete any namespace objects created by this table.
 		 * We cannot initialize these objects, so delete them. There are
-		 * a couple of expecially bad cases:
+		 * a couple of especially bad cases:
 		 * AE_ALREADY_EXISTS - namespace collision.
 		 * AE_NOT_FOUND - the target of a Scope operator does not
 		 * exist. This target of Scope must already exist in the
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 0c5662c4168c..e5cef1edf49f 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -350,7 +350,7 @@ acpi_ns_internalize_name(const char *external_name, char **converted_name)
  *
  * FUNCTION:    acpi_ns_externalize_name
  *
- * PARAMETERS:  internal_name_length - Lenth of the internal name below
+ * PARAMETERS:  internal_name_length - Length of the internal name below
  *              internal_name       - Internal representation of name
  *              converted_name_length - Where the length is returned
  *              converted_name      - Where the resulting external name
diff --git a/drivers/acpi/acpica/rsdumpinfo.c b/drivers/acpi/acpica/rsdumpinfo.c
index 77a3263169fa..cafa8134b4c6 100644
--- a/drivers/acpi/acpica/rsdumpinfo.c
+++ b/drivers/acpi/acpica/rsdumpinfo.c
@@ -32,7 +32,7 @@ struct acpi_rsdump_info acpi_rs_dump_irq[7] = {
 	 acpi_gbl_he_decode},
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(irq.polarity), "Polarity",
 	 acpi_gbl_ll_decode},
-	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(irq.sharable), "Sharing",
+	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(irq.shareable), "Sharing",
 	 acpi_gbl_shr_decode},
 	{ACPI_RSD_UINT8, ACPI_RSD_OFFSET(irq.interrupt_count),
 	 "Interrupt Count", NULL},
@@ -222,7 +222,7 @@ struct acpi_rsdump_info acpi_rs_dump_ext_irq[8] = {
 	 "Triggering", acpi_gbl_he_decode},
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(extended_irq.polarity), "Polarity",
 	 acpi_gbl_ll_decode},
-	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(extended_irq.sharable), "Sharing",
+	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(extended_irq.shareable), "Sharing",
 	 acpi_gbl_shr_decode},
 	{ACPI_RSD_SOURCE, ACPI_RSD_OFFSET(extended_irq.resource_source), NULL,
 	 NULL},
@@ -255,7 +255,7 @@ struct acpi_rsdump_info acpi_rs_dump_gpio[16] = {
 	 "ProducerConsumer", acpi_gbl_consume_decode},
 	{ACPI_RSD_UINT8, ACPI_RSD_OFFSET(gpio.pin_config), "PinConfig",
 	 acpi_gbl_ppc_decode},
-	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(gpio.sharable), "Sharing",
+	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(gpio.shareable), "Sharing",
 	 acpi_gbl_shr_decode},
 	{ACPI_RSD_2BITFLAG, ACPI_RSD_OFFSET(gpio.io_restriction),
 	 "IoRestriction", acpi_gbl_ior_decode},
@@ -285,7 +285,7 @@ struct acpi_rsdump_info acpi_rs_dump_pin_function[10] = {
 	 "RevisionId", NULL},
 	{ACPI_RSD_UINT8, ACPI_RSD_OFFSET(pin_function.pin_config), "PinConfig",
 	 acpi_gbl_ppc_decode},
-	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_function.sharable), "Sharing",
+	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_function.shareable), "Sharing",
 	 acpi_gbl_shr_decode},
 	{ACPI_RSD_UINT16, ACPI_RSD_OFFSET(pin_function.function_number),
 	 "FunctionNumber", NULL},
@@ -308,7 +308,7 @@ struct acpi_rsdump_info acpi_rs_dump_pin_config[11] = {
 	 NULL},
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_config.producer_consumer),
 	 "ProducerConsumer", acpi_gbl_consume_decode},
-	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_config.sharable), "Sharing",
+	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_config.shareable), "Sharing",
 	 acpi_gbl_shr_decode},
 	{ACPI_RSD_UINT8, ACPI_RSD_OFFSET(pin_config.pin_config_type),
 	 "PinConfigType", NULL},
@@ -353,7 +353,7 @@ struct acpi_rsdump_info acpi_rs_dump_pin_group_function[9] = {
 	{ACPI_RSD_1BITFLAG,
 	 ACPI_RSD_OFFSET(pin_group_function.producer_consumer),
 	 "ProducerConsumer", acpi_gbl_consume_decode},
-	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_group_function.sharable),
+	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_group_function.shareable),
 	 "Sharing", acpi_gbl_shr_decode},
 	{ACPI_RSD_UINT16, ACPI_RSD_OFFSET(pin_group_function.function_number),
 	 "FunctionNumber", NULL},
@@ -375,7 +375,7 @@ struct acpi_rsdump_info acpi_rs_dump_pin_group_config[10] = {
 	 "RevisionId", NULL},
 	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_group_config.producer_consumer),
 	 "ProducerConsumer", acpi_gbl_consume_decode},
-	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_group_config.sharable),
+	{ACPI_RSD_1BITFLAG, ACPI_RSD_OFFSET(pin_group_config.shareable),
 	 "Sharing", acpi_gbl_shr_decode},
 	{ACPI_RSD_UINT8, ACPI_RSD_OFFSET(pin_group_config.pin_config_type),
 	 "PinConfigType", NULL},
diff --git a/drivers/acpi/acpica/rsirq.c b/drivers/acpi/acpica/rsirq.c
index 134b67cd48ee..b0d970efa072 100644
--- a/drivers/acpi/acpica/rsirq.c
+++ b/drivers/acpi/acpica/rsirq.c
@@ -54,7 +54,7 @@ struct acpi_rsconvert_info acpi_rs_get_irq[9] = {
 	 AML_OFFSET(irq.flags),
 	 3},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.irq.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.irq.shareable),
 	 AML_OFFSET(irq.flags),
 	 4},
 
@@ -92,7 +92,7 @@ struct acpi_rsconvert_info acpi_rs_set_irq[14] = {
 	 AML_OFFSET(irq.flags),
 	 3},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.irq.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.irq.shareable),
 	 AML_OFFSET(irq.flags),
 	 4},
 
@@ -139,7 +139,7 @@ struct acpi_rsconvert_info acpi_rs_set_irq[14] = {
 	 ACPI_ACTIVE_HIGH},
 
 	{ACPI_RSC_EXIT_NE, ACPI_RSC_COMPARE_VALUE,
-	 ACPI_RS_OFFSET(data.irq.sharable),
+	 ACPI_RS_OFFSET(data.irq.shareable),
 	 ACPI_EXCLUSIVE},
 
 	/* We can optimize to a 2-byte irq_no_flags() descriptor */
@@ -178,7 +178,7 @@ struct acpi_rsconvert_info acpi_rs_convert_ext_irq[10] = {
 	 AML_OFFSET(extended_irq.flags),
 	 2},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.extended_irq.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.extended_irq.shareable),
 	 AML_OFFSET(extended_irq.flags),
 	 3},
 
diff --git a/drivers/acpi/acpica/rsserial.c b/drivers/acpi/acpica/rsserial.c
index d073ebb51f90..1b937d88980f 100644
--- a/drivers/acpi/acpica/rsserial.c
+++ b/drivers/acpi/acpica/rsserial.c
@@ -39,7 +39,7 @@ struct acpi_rsconvert_info acpi_rs_convert_gpio[18] = {
 	 AML_OFFSET(gpio.flags),
 	 0},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.gpio.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.gpio.shareable),
 	 AML_OFFSET(gpio.int_flags),
 	 3},
 
@@ -128,7 +128,7 @@ struct acpi_rsconvert_info acpi_rs_convert_pin_function[13] = {
 	 AML_OFFSET(pin_function.revision_id),
 	 1},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_function.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_function.shareable),
 	 AML_OFFSET(pin_function.flags),
 	 0},
 
@@ -518,7 +518,7 @@ struct acpi_rsconvert_info acpi_rs_convert_pin_config[14] = {
 	 AML_OFFSET(pin_config.revision_id),
 	 1},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_config.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_config.shareable),
 	 AML_OFFSET(pin_config.flags),
 	 0},
 
@@ -658,7 +658,7 @@ struct acpi_rsconvert_info acpi_rs_convert_pin_group_function[13] = {
 	 AML_OFFSET(pin_group_function.revision_id),
 	 1},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_group_function.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_group_function.shareable),
 	 AML_OFFSET(pin_group_function.flags),
 	 0},
 
@@ -735,7 +735,7 @@ struct acpi_rsconvert_info acpi_rs_convert_pin_group_config[14] = {
 	 AML_OFFSET(pin_group_config.revision_id),
 	 1},
 
-	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_group_config.sharable),
+	{ACPI_RSC_1BITFLAG, ACPI_RS_OFFSET(data.pin_group_config.shareable),
 	 AML_OFFSET(pin_group_config.flags),
 	 0},
 
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 2c67351d5a79..0041bfba9abc 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -556,7 +556,7 @@ static void acpi_tb_convert_fadt(void)
 				 * 64-bit X length field.
 				 * Note: If the legacy length field is > 0xFF bits, ignore
 				 * this check. (GPE registers can be larger than the
-				 * 64-bit GAS structure can accomodate, 0xFF bits).
+				 * 64-bit GAS structure can accommodate, 0xFF bits).
 				 */
 				if ((ACPI_MUL_8(length) <= ACPI_UINT8_MAX) &&
 				    (address64->bit_width !=
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 593c20c20104..36592888f0e7 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -108,7 +108,7 @@ acpi_initialize_tables(struct acpi_table_desc *initial_table_array,
 	/*
 	 * Get the root table (RSDT or XSDT) and extract all entries to the local
 	 * Root Table Array. This array contains the information of the RSDT/XSDT
-	 * in a common, more useable format.
+	 * in a common, more usable format.
 	 */
 	status = acpi_tb_parse_root_table(rsdp_address);
 	return_ACPI_STATUS(status);
@@ -169,7 +169,7 @@ acpi_status ACPI_INIT_FUNCTION acpi_reallocate_root_table(void)
 	if (!acpi_gbl_enable_table_validation) {
 		/*
 		 * Now it's safe to do full table validation. We can do deferred
-		 * table initilization here once the flag is set.
+		 * table initialization here once the flag is set.
 		 */
 		acpi_gbl_enable_table_validation = TRUE;
 		for (i = 0; i < acpi_gbl_root_table_list.current_table_count;
diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c
index 7c352cba0528..c3b2222e2129 100644
--- a/drivers/acpi/irq.c
+++ b/drivers/acpi/irq.c
@@ -196,7 +196,7 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
 		fwnode = acpi_gsi_domain_id;
 		acpi_irq_parse_one_match(fwnode, irq->interrupts[ctx->index],
 					 irq->triggering, irq->polarity,
-					 irq->sharable, ctx);
+					 irq->shareable, ctx);
 		return AE_CTRL_TERMINATE;
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		eirq = &ares->data.extended_irq;
@@ -209,7 +209,7 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares,
 		fwnode = acpi_get_irq_source_fwhandle(&eirq->resource_source);
 		acpi_irq_parse_one_match(fwnode, eirq->interrupts[ctx->index],
 					 eirq->triggering, eirq->polarity,
-					 eirq->sharable, ctx);
+					 eirq->shareable, ctx);
 		return AE_CTRL_TERMINATE;
 	}
 
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index d5eec352a6e1..df70b1eaef58 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -317,10 +317,10 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
 		resource->res.data.irq.polarity =
 		    link->irq.polarity;
 		if (link->irq.triggering == ACPI_EDGE_SENSITIVE)
-			resource->res.data.irq.sharable =
+			resource->res.data.irq.shareable =
 			    ACPI_EXCLUSIVE;
 		else
-			resource->res.data.irq.sharable = ACPI_SHARED;
+			resource->res.data.irq.shareable = ACPI_SHARED;
 		resource->res.data.irq.interrupt_count = 1;
 		resource->res.data.irq.interrupts[0] = irq;
 		break;
@@ -335,10 +335,10 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
 		resource->res.data.extended_irq.polarity =
 		    link->irq.polarity;
 		if (link->irq.triggering == ACPI_EDGE_SENSITIVE)
-			resource->res.data.irq.sharable =
+			resource->res.data.irq.shareable =
 			    ACPI_EXCLUSIVE;
 		else
-			resource->res.data.irq.sharable = ACPI_SHARED;
+			resource->res.data.irq.shareable = ACPI_SHARED;
 		resource->res.data.extended_irq.interrupt_count = 1;
 		resource->res.data.extended_irq.interrupts[0] = irq;
 		/* ignore resource_source, it's optional */
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 316a0fc785e3..d556f2144de8 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -476,7 +476,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 		}
 		acpi_dev_get_irqresource(res, irq->interrupts[index],
 					 irq->triggering, irq->polarity,
-					 irq->sharable, true);
+					 irq->shareable, true);
 		break;
 	case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
 		ext_irq = &ares->data.extended_irq;
@@ -487,7 +487,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index,
 		if (is_gsi(ext_irq))
 			acpi_dev_get_irqresource(res, ext_irq->interrupts[index],
 					 ext_irq->triggering, ext_irq->polarity,
-					 ext_irq->sharable, false);
+					 ext_irq->shareable, false);
 		else
 			acpi_dev_irqresource_disabled(res, 0);
 		break;
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 48534bda73d3..22955de8b997 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -897,7 +897,7 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 		 * event but only if the access here is ACPI_READ. In that
 		 * case we "borrow" the event GPIO instead.
 		 */
-		if (!found && agpio->sharable == ACPI_SHARED &&
+		if (!found && agpio->shareable == ACPI_SHARED &&
 		     function == ACPI_READ) {
 			struct acpi_gpio_event *event;
 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index b205b037fd61..4bfbfa3f78e6 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -4392,7 +4392,7 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context)
 				list_add(&interrupt->list, &dev->interrupts);
 				interrupt->irq.triggering = p->triggering;
 				interrupt->irq.polarity = p->polarity;
-				interrupt->irq.sharable = p->sharable;
+				interrupt->irq.shareable = p->shareable;
 				interrupt->irq.interrupt_count = 1;
 				interrupt->irq.interrupts[0] = p->interrupts[i];
 			}
@@ -4546,7 +4546,7 @@ static int sony_pic_enable(struct acpi_device *device,
 		memcpy(&resource->res3.data.irq, &irq->irq,
 				sizeof(struct acpi_resource_irq));
 		/* we requested a shared irq */
-		resource->res3.data.irq.sharable = ACPI_SHARED;
+		resource->res3.data.irq.shareable = ACPI_SHARED;
 
 		resource->res4.type = ACPI_RESOURCE_TYPE_END_TAG;
 		resource->res4.length = sizeof(struct acpi_resource);
@@ -4565,7 +4565,7 @@ static int sony_pic_enable(struct acpi_device *device,
 		memcpy(&resource->res2.data.irq, &irq->irq,
 				sizeof(struct acpi_resource_irq));
 		/* we requested a shared irq */
-		resource->res2.data.irq.sharable = ACPI_SHARED;
+		resource->res2.data.irq.shareable = ACPI_SHARED;
 
 		resource->res3.type = ACPI_RESOURCE_TYPE_END_TAG;
 		resource->res3.length = sizeof(struct acpi_resource);
@@ -4779,7 +4779,7 @@ static int sony_pic_add(struct acpi_device *device)
 					irq->irq.interrupts[0],
 					irq->irq.triggering,
 					irq->irq.polarity,
-					irq->irq.sharable);
+					irq->irq.shareable);
 			spic_dev.cur_irq = irq;
 			break;
 		}
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 43d8ed577e70..c79417ca1b3c 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -215,7 +215,7 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res,
 		if (i >= 0) {
 			flags = acpi_dev_irq_flags(gpio->triggering,
 						   gpio->polarity,
-						   gpio->sharable);
+						   gpio->shareable);
 		} else {
 			flags = IORESOURCE_DISABLED;
 		}
@@ -324,7 +324,7 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev,
 		if (p->interrupts[i])
 			__set_bit(p->interrupts[i], map.bits);
 
-	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable);
+	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->shareable);
 	pnp_register_irq_resource(dev, option_flags, &map, flags);
 }
 
@@ -348,7 +348,7 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev,
 		}
 	}
 
-	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable);
+	flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->shareable);
 	pnp_register_irq_resource(dev, option_flags, &map, flags);
 }
 
@@ -681,7 +681,7 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev,
 	decode_irq_flags(dev, p->flags, &triggering, &polarity, &shareable);
 	irq->triggering = triggering;
 	irq->polarity = polarity;
-	irq->sharable = shareable;
+	irq->shareable = shareable;
 	irq->interrupt_count = 1;
 	irq->interrupts[0] = p->start;
 
@@ -689,7 +689,7 @@ static void pnpacpi_encode_irq(struct pnp_dev *dev,
 		(int) p->start,
 		triggering == ACPI_LEVEL_SENSITIVE ? "level" : "edge",
 		polarity == ACPI_ACTIVE_LOW ? "low" : "high",
-		irq->sharable == ACPI_SHARED ? "shared" : "exclusive",
+		irq->shareable == ACPI_SHARED ? "shared" : "exclusive",
 		irq->descriptor_length);
 }
 
@@ -711,14 +711,14 @@ static void pnpacpi_encode_ext_irq(struct pnp_dev *dev,
 	extended_irq->producer_consumer = ACPI_CONSUMER;
 	extended_irq->triggering = triggering;
 	extended_irq->polarity = polarity;
-	extended_irq->sharable = shareable;
+	extended_irq->shareable = shareable;
 	extended_irq->interrupt_count = 1;
 	extended_irq->interrupts[0] = p->start;
 
 	pnp_dbg(&dev->dev, "  encode irq %d %s %s %s\n", (int) p->start,
 		triggering == ACPI_LEVEL_SENSITIVE ? "level" : "edge",
 		polarity == ACPI_ACTIVE_LOW ? "low" : "high",
-		extended_irq->sharable == ACPI_SHARED ? "shared" : "exclusive");
+		extended_irq->shareable == ACPI_SHARED ? "shared" : "exclusive");
 }
 
 static void pnpacpi_encode_dma(struct pnp_dev *dev,
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 298e15100789..16a83959e616 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -141,7 +141,7 @@
 
 /*
  * Maximal number of elements the Result Stack can contain,
- * it may be an arbitray value not exceeding the types of
+ * it may be an arbitrary value not exceeding the types of
  * result_size and result_count (now u8).
  */
 #define ACPI_RESULTS_OBJ_NUM_MAX        255
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 6b4fed5b9d11..233a72f169bb 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -311,7 +311,8 @@ static const struct acpi_exception_info acpi_gbl_exception_names_aml[] = {
 		  "An ACPI name contains invalid character(s)"),
 	EXCEP_TXT("AE_AML_NAME_NOT_FOUND",
 		  "Could not resolve a named reference"),
-	EXCEP_TXT("AE_AML_INTERNAL", "An internal error within the interprete"),
+	EXCEP_TXT("AE_AML_INTERNAL",
+		  "An internal error within the interpreter"),
 	EXCEP_TXT("AE_AML_INVALID_SPACE_ID",
 		  "An Operation Region SpaceID is invalid"),
 	EXCEP_TXT("AE_AML_STRING_LIMIT",
diff --git a/include/acpi/acrestyp.h b/include/acpi/acrestyp.h
index 000145ded0ec..62930583219f 100644
--- a/include/acpi/acrestyp.h
+++ b/include/acpi/acrestyp.h
@@ -139,7 +139,7 @@ struct acpi_resource_irq {
 	u8 descriptor_length;
 	u8 triggering;
 	u8 polarity;
-	u8 sharable;
+	u8 shareable;
 	u8 wake_capable;
 	u8 interrupt_count;
 	u8 interrupts[1];
@@ -328,7 +328,7 @@ struct acpi_resource_extended_irq {
 	u8 producer_consumer;
 	u8 triggering;
 	u8 polarity;
-	u8 sharable;
+	u8 shareable;
 	u8 wake_capable;
 	u8 interrupt_count;
 	struct acpi_resource_source resource_source;
@@ -348,7 +348,7 @@ struct acpi_resource_gpio {
 	u8 connection_type;
 	u8 producer_consumer;	/* For values, see Producer/Consumer above */
 	u8 pin_config;
-	u8 sharable;		/* For values, see Interrupt Attributes above */
+	u8 shareable;		/* For values, see Interrupt Attributes above */
 	u8 wake_capable;	/* For values, see Interrupt Attributes above */
 	u8 io_restriction;
 	u8 triggering;		/* For values, see Interrupt Attributes above */
@@ -508,7 +508,7 @@ struct acpi_resource_uart_serialbus {
 struct acpi_resource_pin_function {
 	u8 revision_id;
 	u8 pin_config;
-	u8 sharable;		/* For values, see Interrupt Attributes above */
+	u8 shareable;		/* For values, see Interrupt Attributes above */
 	u16 function_number;
 	u16 pin_table_length;
 	u16 vendor_length;
@@ -520,7 +520,7 @@ struct acpi_resource_pin_function {
 struct acpi_resource_pin_config {
 	u8 revision_id;
 	u8 producer_consumer;	/* For values, see Producer/Consumer above */
-	u8 sharable;		/* For values, see Interrupt Attributes above */
+	u8 shareable;		/* For values, see Interrupt Attributes above */
 	u8 pin_config_type;
 	u32 pin_config_value;
 	u16 pin_table_length;
@@ -560,7 +560,7 @@ struct acpi_resource_pin_group {
 struct acpi_resource_pin_group_function {
 	u8 revision_id;
 	u8 producer_consumer;	/* For values, see Producer/Consumer above */
-	u8 sharable;		/* For values, see Interrupt Attributes above */
+	u8 shareable;		/* For values, see Interrupt Attributes above */
 	u16 function_number;
 	u16 vendor_length;
 	struct acpi_resource_source resource_source;
@@ -571,7 +571,7 @@ struct acpi_resource_pin_group_function {
 struct acpi_resource_pin_group_config {
 	u8 revision_id;
 	u8 producer_consumer;	/* For values, see Producer/Consumer above */
-	u8 sharable;		/* For values, see Interrupt Attributes above */
+	u8 shareable;		/* For values, see Interrupt Attributes above */
 	u8 pin_config_type;	/* For values, see pin_config_type above */
 	u32 pin_config_value;
 	u16 vendor_length;
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index d37ddde8bf8f..d14037ddf108 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -562,7 +562,7 @@ struct acpi_dmar_hardware_unit {
 
 #define ACPI_DMAR_INCLUDE_ALL       (1)
 
-/* 1: Reserved Memory Defininition */
+/* 1: Reserved Memory Definition */
 
 struct acpi_dmar_reserved_memory {
 	struct acpi_dmar_header header;
@@ -1395,7 +1395,7 @@ struct acpi_table_hmat {
 /* Values for HMAT structure types */
 
 enum acpi_hmat_type {
-	ACPI_HMAT_TYPE_ADDRESS_RANGE = 0,	/* Memory subystem address range */
+	ACPI_HMAT_TYPE_ADDRESS_RANGE = 0,	/* Memory subsystem address range */
 	ACPI_HMAT_TYPE_LOCALITY = 1,	/* System locality latency and bandwidth information */
 	ACPI_HMAT_TYPE_CACHE = 2,	/* Memory side cache information */
 	ACPI_HMAT_TYPE_RESERVED = 3	/* 3 and greater are reserved */
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index 6511c4ddec81..e45ced27f4c3 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -143,7 +143,7 @@ struct acpi_iort_memory_access {
  */
 struct acpi_iort_its_group {
 	u32 its_count;
-	u32 identifiers[1];	/* GIC ITS identifier arrary */
+	u32 identifiers[1];	/* GIC ITS identifier array */
 };
 
 struct acpi_iort_named_component {
-- 
cgit v1.2.3


From 6b31c01651c230c39d051170f8a258f56b02c6bc Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 15 Feb 2019 13:36:20 -0800
Subject: ACPICA: Update version to 20190215

ACPICA commit e66a8468f83fadb35aebeb534d8ef92622274696

Version 20190215

Link: https://github.com/acpica/acpica/commit/e66a8468
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index bcf3bc06eb94..24dbb4e742a6 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20190108
+#define ACPI_CA_VERSION                 0x20190215
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 0e29ae0303224535017c0c01aa8b078dd619ebab Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Fri, 22 Feb 2019 11:31:41 +0000
Subject: net: phylink: update mac_config() documentation

A detail for mac_config() had been missed in the documentation for the
method - it is expected that the method will update the MAC to the
settings, rather than completely reprogram the MAC on each call.
Update the documentation for this method for this detail.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index f57059e4353f..6411c624f63a 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -149,6 +149,13 @@ int mac_link_state(struct net_device *ndev,
  *   configuration word. Nothing is advertised by the MAC. The MAC is
  *   responsible for reading the configuration word and configuring
  *   itself accordingly.
+ *
+ * Implementations are expected to update the MAC to reflect the
+ * requested settings - i.o.w., if nothing has changed between two
+ * calls, no action is expected.  If only flow control settings have
+ * changed, flow control should be updated *without* taking the link
+ * down.  This "update" behaviour is critical to avoid bouncing the
+ * link up status.
  */
 void mac_config(struct net_device *ndev, unsigned int mode,
 		const struct phylink_link_state *state);
-- 
cgit v1.2.3


From b58996795dc4921123ada213f9f10b8317d3f34f Mon Sep 17 00:00:00 2001
From: Andy Roulin <aroulin@cumulusnetworks.com>
Date: Fri, 22 Feb 2019 18:06:36 +0000
Subject: net: dev: add generic protodown handler

Introduce dev_change_proto_down_generic, a generic ndo_change_proto_down
implementation, which sets the netdev carrier state according to proto_down.

This adds the ability to set protodown on vxlan and macvlan devices in a
generic way for use by control protocols like VRRPD.

Signed-off-by: Andy Roulin <aroulin@cumulusnetworks.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6997f62cb6a0..ffbddd03242b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3673,6 +3673,7 @@ int dev_get_port_parent_id(struct net_device *dev,
 			   struct netdev_phys_item_id *ppid, bool recurse);
 bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b);
 int dev_change_proto_down(struct net_device *dev, bool proto_down);
+int dev_change_proto_down_generic(struct net_device *dev, bool proto_down);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8a0da95ff4cc..2b67f2aa59dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7954,6 +7954,25 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
 }
 EXPORT_SYMBOL(dev_change_proto_down);
 
+/**
+ *	dev_change_proto_down_generic - generic implementation for
+ * 	ndo_change_proto_down that sets carrier according to
+ * 	proto_down.
+ *
+ *	@dev: device
+ *	@proto_down: new value
+ */
+int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
+{
+	if (proto_down)
+		netif_carrier_off(dev);
+	else
+		netif_carrier_on(dev);
+	dev->proto_down = proto_down;
+	return 0;
+}
+EXPORT_SYMBOL(dev_change_proto_down_generic);
+
 u32 __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
 		    enum bpf_netdev_command cmd)
 {
-- 
cgit v1.2.3


From e728fdf0628971d43cb4e48860defc6e8a553761 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 22 Feb 2019 19:25:59 +0100
Subject: net: phy: improve definition of __ETHTOOL_LINK_MODE_MASK_NBITS

The way to define __ETHTOOL_LINK_MODE_MASK_NBITS seems to be overly
complicated, go with a standard approach instead.
Whilst we're at it, move the comment to the right place.

v2:
- rebased

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  4 ----
 include/uapi/linux/ethtool.h | 17 +++++++++--------
 2 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 19a8de5326fb..e6ebc9761822 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -98,10 +98,6 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
 	return index % n_rx_rings;
 }
 
-/* number of link mode bits/ulongs handled internally by kernel */
-#define __ETHTOOL_LINK_MODE_MASK_NBITS			\
-	(__ETHTOOL_LINK_MODE_LAST + 1)
-
 /* declare a link mode bitmap */
 #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name)		\
 	DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS)
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 378c52308d89..3652b239dad1 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1432,6 +1432,13 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT	= 29,
 	ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT	= 30,
 	ETHTOOL_LINK_MODE_25000baseCR_Full_BIT	= 31,
+
+	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
+	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
+	 * macro for bits > 31. The only way to use indices > 31 is to
+	 * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
+	 */
+
 	ETHTOOL_LINK_MODE_25000baseKR_Full_BIT	= 32,
 	ETHTOOL_LINK_MODE_25000baseSR_Full_BIT	= 33,
 	ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT	= 34,
@@ -1469,14 +1476,8 @@ enum ethtool_link_mode_bit_indices {
 	ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT	 = 65,
 	ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT	 = 66,
 
-	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
-	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
-	 * macro for bits > 31. The only way to use indices > 31 is to
-	 * use the new ETHTOOL_GLINKSETTINGS/ETHTOOL_SLINKSETTINGS API.
-	 */
-
-	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT,
+	/* must be last entry */
+	__ETHTOOL_LINK_MODE_MASK_NBITS
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
-- 
cgit v1.2.3


From 631ba9063b446800e96debb41ad45eba85f880a8 Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Sat, 23 Feb 2019 00:37:41 +0100
Subject: net: phy: marvell10g: Use a #define for 88X3310 family id

The PHY ID corresponding to the 88X3310 is also used for other PHYs in
the same family, such as the 88E2010. Use a #define for the PHY id, that
ignores the last nibble.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 4 ++--
 include/linux/marvell_phy.h  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 9342d8c2ff7f..9c0b8f16cec5 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -430,7 +430,7 @@ static int mv3310_read_status(struct phy_device *phydev)
 
 static struct phy_driver mv3310_drivers[] = {
 	{
-		.phy_id		= 0x002b09aa,
+		.phy_id		= MARVELL_PHY_ID_88X3310,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x3310",
 		.get_features	= mv3310_get_features,
@@ -448,7 +448,7 @@ static struct phy_driver mv3310_drivers[] = {
 module_phy_driver(mv3310_drivers);
 
 static struct mdio_device_id __maybe_unused mv3310_tbl[] = {
-	{ 0x002b09aa, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK },
 	{ },
 };
 MODULE_DEVICE_TABLE(mdio, mv3310_tbl);
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 1eb6f244588d..70c17345e118 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -20,6 +20,7 @@
 #define MARVELL_PHY_ID_88E1540		0x01410eb0
 #define MARVELL_PHY_ID_88E1545		0x01410ea0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
+#define MARVELL_PHY_ID_88X3310		0x002b09a0
 
 /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
  * not have a model ID. So the switch driver traps reads to the ID2
-- 
cgit v1.2.3


From 62d01535474b612b3c5d864999b17cbf2cd8f2cc Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Sat, 23 Feb 2019 00:37:44 +0100
Subject: net: phy: marvell10g: add support for the 88x2110 PHY

This patch adds support for the 88x2110 PHY, which is similar to the
already supported 88x3310 PHY without the SFP interface.

It supports 10/100/1000BASET along with 2.5GBASET, 5GBASET and 10GBASET,
with the same interface modes that are used by the 3310.

This PHY don't have the same issue as the 88x3310 regarding 2.5/5G
abilities, and correctly follows the 802.3bz standard to list the
supported abilities.

Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Suggested-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell10g.c | 13 +++++++++++++
 include/linux/marvell_phy.h  |  1 +
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 8f354c3f3876..580e91deadbc 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -478,12 +478,25 @@ static struct phy_driver mv3310_drivers[] = {
 		.aneg_done	= mv3310_aneg_done,
 		.read_status	= mv3310_read_status,
 	},
+	{
+		.phy_id		= MARVELL_PHY_ID_88E2110,
+		.phy_id_mask	= MARVELL_PHY_ID_MASK,
+		.name		= "mv88x2110",
+		.features	= PHY_10GBIT_FEATURES,
+		.probe		= mv3310_probe,
+		.soft_reset	= gen10g_no_soft_reset,
+		.config_init	= mv3310_config_init,
+		.config_aneg	= mv3310_config_aneg,
+		.aneg_done	= mv3310_aneg_done,
+		.read_status	= mv3310_read_status,
+	},
 };
 
 module_phy_driver(mv3310_drivers);
 
 static struct mdio_device_id __maybe_unused mv3310_tbl[] = {
 	{ MARVELL_PHY_ID_88X3310, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E2110, MARVELL_PHY_ID_MASK },
 	{ },
 };
 MODULE_DEVICE_TABLE(mdio, mv3310_tbl);
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 70c17345e118..73d04743a2bb 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -21,6 +21,7 @@
 #define MARVELL_PHY_ID_88E1545		0x01410ea0
 #define MARVELL_PHY_ID_88E3016		0x01410e60
 #define MARVELL_PHY_ID_88X3310		0x002b09a0
+#define MARVELL_PHY_ID_88E2110		0x002b09b0
 
 /* The MV88e6390 Ethernet switch contains embedded PHYs. These PHYs do
  * not have a model ID. So the switch driver traps reads to the ID2
-- 
cgit v1.2.3


From 6375da3dc07f7c9a3f006646e4a03c6ded7b163e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sun, 24 Feb 2019 06:46:33 +0000
Subject: mlxsw: spectrum_acl: Add vregion migration end tracepoint

Hit the new tracepoint once the vregion migration ends.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c  | 11 ++++++++---
 include/trace/events/mlxsw.h                         | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 822766f81236..c9d9cded1724 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -1274,8 +1274,10 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
 
 	region2 = mlxsw_sp_acl_tcam_region_create(mlxsw_sp, vregion->tcam,
 						  vregion, hints_priv);
-	if (IS_ERR(region2))
-		return PTR_ERR(region2);
+	if (IS_ERR(region2)) {
+		err = PTR_ERR(region2);
+		goto out;
+	}
 
 	vregion->region2 = region2;
 	err = mlxsw_sp_acl_tcam_group_region_attach(mlxsw_sp,
@@ -1309,11 +1311,14 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
 		mutex_unlock(&vregion->lock);
 	}
 
-	return err;
+	goto out;
 
 err_group_region_attach:
 	vregion->region2 = NULL;
 	mlxsw_sp_acl_tcam_region_destroy(mlxsw_sp, region2);
+out:
+	trace_mlxsw_sp_acl_tcam_vregion_migrate_end(mlxsw_sp, vregion);
+
 	return err;
 }
 
diff --git a/include/trace/events/mlxsw.h b/include/trace/events/mlxsw.h
index a5ce6df9dc49..6a4cfaef33a2 100644
--- a/include/trace/events/mlxsw.h
+++ b/include/trace/events/mlxsw.h
@@ -73,6 +73,26 @@ TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate,
 		  __entry->mlxsw_sp, __entry->vregion)
 );
 
+TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_migrate_end,
+	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
+		 const struct mlxsw_sp_acl_tcam_vregion *vregion),
+
+	TP_ARGS(mlxsw_sp, vregion),
+
+	TP_STRUCT__entry(
+		__field(const void *, mlxsw_sp)
+		__field(const void *, vregion)
+	),
+
+	TP_fast_assign(
+		__entry->mlxsw_sp = mlxsw_sp;
+		__entry->vregion = vregion;
+	),
+
+	TP_printk("mlxsw_sp %p, vregion %p",
+		  __entry->mlxsw_sp, __entry->vregion)
+);
+
 TRACE_EVENT(mlxsw_sp_acl_tcam_vregion_rehash_dis,
 	TP_PROTO(const struct mlxsw_sp *mlxsw_sp,
 		 const struct mlxsw_sp_acl_tcam_vregion *vregion),
-- 
cgit v1.2.3


From 75efc250d2e57c43761890388a92eecd93aa9e45 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Sat, 23 Feb 2019 15:28:28 +0800
Subject: ipv6: icmp: use percpu allocation

Use percpu allocation for the ipv6.icmp_sk.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv6.h |  2 +-
 net/ipv6/icmp.c          | 11 +++++------
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index ef1ed529f33c..b028a1dc150d 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -83,7 +83,7 @@ struct netns_ipv6 {
 	struct fib6_table       *fib6_local_tbl;
 	struct fib_rules_ops    *fib6_rules_ops;
 #endif
-	struct sock		**icmp_sk;
+	struct sock * __percpu	*icmp_sk;
 	struct sock             *ndisc_sk;
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index af520014def5..802faa2fcc0e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -81,7 +81,7 @@
  */
 static inline struct sock *icmpv6_sk(struct net *net)
 {
-	return net->ipv6.icmp_sk[smp_processor_id()];
+	return *this_cpu_ptr(net->ipv6.icmp_sk);
 }
 
 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -958,8 +958,8 @@ static void __net_exit icmpv6_sk_exit(struct net *net)
 	int i;
 
 	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
-	kfree(net->ipv6.icmp_sk);
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
+	free_percpu(net->ipv6.icmp_sk);
 }
 
 static int __net_init icmpv6_sk_init(struct net *net)
@@ -967,8 +967,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
 	struct sock *sk;
 	int err, i;
 
-	net->ipv6.icmp_sk =
-		kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
+	net->ipv6.icmp_sk = alloc_percpu(struct sock *);
 	if (!net->ipv6.icmp_sk)
 		return -ENOMEM;
 
@@ -981,7 +980,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
 			goto fail;
 		}
 
-		net->ipv6.icmp_sk[i] = sk;
+		*per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff struct overhead.
-- 
cgit v1.2.3


From 2b794c4098b525836e37d16045abee3091fdfe18 Mon Sep 17 00:00:00 2001
From: Vakul Garg <vakul.garg@nxp.com>
Date: Sat, 23 Feb 2019 08:42:37 +0000
Subject: tls: Return type of non-data records retrieved using MSG_PEEK in
 recvmsg

The patch enables returning 'type' in msghdr for records that are
retrieved with MSG_PEEK in recvmsg. Further it prevents records peeked
from socket from getting clubbed with any other record of different
type when records are subsequently dequeued from strparser.

For each record, we now retain its type in sk_buff's control buffer
cb[]. Inside control buffer, record's full length and offset are already
stored by strparser in 'struct strp_msg'. We store record type after
'struct strp_msg' inside 'struct tls_msg'. For tls1.2, the type is
stored just after record dequeue. For tls1.3, the type is stored after
record has been decrypted.

Inside process_rx_list(), before processing a non-data record, we check
that we must be able to return back the record type to the user
application. If not, the decrypted records in tls context's rx_list is
left there without consuming any data.

Fixes: 692d7b5d1f912 ("tls: Fix recvmsg() to be able to peek across multiple records")
Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h | 10 +++++++
 net/tls/tls_sw.c  | 78 +++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 77 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index a8b37226a287..9f4117ae2297 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -129,6 +129,11 @@ struct tls_rec {
 	u8 aead_req_ctx[];
 };
 
+struct tls_msg {
+	struct strp_msg rxm;
+	u8 control;
+};
+
 struct tx_work {
 	struct delayed_work work;
 	struct sock *sk;
@@ -333,6 +338,11 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
 				   int flags, long *timeo);
 
+static inline struct tls_msg *tls_msg(struct sk_buff *skb)
+{
+	return (struct tls_msg *)strp_msg(skb);
+}
+
 static inline bool tls_is_pending_closed_record(struct tls_context *ctx)
 {
 	return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 71be8acfbc9b..1cc830582fa8 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1530,22 +1530,38 @@ static bool tls_sw_advance_skb(struct sock *sk, struct sk_buff *skb,
 }
 
 /* This function traverses the rx_list in tls receive context to copies the
- * decrypted data records into the buffer provided by caller zero copy is not
+ * decrypted records into the buffer provided by caller zero copy is not
  * true. Further, the records are removed from the rx_list if it is not a peek
  * case and the record has been consumed completely.
  */
 static int process_rx_list(struct tls_sw_context_rx *ctx,
 			   struct msghdr *msg,
+			   u8 *control,
+			   bool *cmsg,
 			   size_t skip,
 			   size_t len,
 			   bool zc,
 			   bool is_peek)
 {
 	struct sk_buff *skb = skb_peek(&ctx->rx_list);
+	u8 ctrl = *control;
+	u8 msgc = *cmsg;
+	struct tls_msg *tlm;
 	ssize_t copied = 0;
 
+	/* Set the record type in 'control' if caller didn't pass it */
+	if (!ctrl && skb) {
+		tlm = tls_msg(skb);
+		ctrl = tlm->control;
+	}
+
 	while (skip && skb) {
 		struct strp_msg *rxm = strp_msg(skb);
+		tlm = tls_msg(skb);
+
+		/* Cannot process a record of different type */
+		if (ctrl != tlm->control)
+			return 0;
 
 		if (skip < rxm->full_len)
 			break;
@@ -1559,6 +1575,27 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
 		struct strp_msg *rxm = strp_msg(skb);
 		int chunk = min_t(unsigned int, rxm->full_len - skip, len);
 
+		tlm = tls_msg(skb);
+
+		/* Cannot process a record of different type */
+		if (ctrl != tlm->control)
+			return 0;
+
+		/* Set record type if not already done. For a non-data record,
+		 * do not proceed if record type could not be copied.
+		 */
+		if (!msgc) {
+			int cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
+					    sizeof(ctrl), &ctrl);
+			msgc = true;
+			if (ctrl != TLS_RECORD_TYPE_DATA) {
+				if (cerr || msg->msg_flags & MSG_CTRUNC)
+					return -EIO;
+
+				*cmsg = msgc;
+			}
+		}
+
 		if (!zc || (rxm->full_len - skip) > len) {
 			int err = skb_copy_datagram_msg(skb, rxm->offset + skip,
 						    msg, chunk);
@@ -1597,6 +1634,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
 		skb = next_skb;
 	}
 
+	*control = ctrl;
 	return copied;
 }
 
@@ -1614,6 +1652,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	unsigned char control = 0;
 	ssize_t decrypted = 0;
 	struct strp_msg *rxm;
+	struct tls_msg *tlm;
 	struct sk_buff *skb;
 	ssize_t copied = 0;
 	bool cmsg = false;
@@ -1632,7 +1671,8 @@ int tls_sw_recvmsg(struct sock *sk,
 	lock_sock(sk);
 
 	/* Process pending decrypted records. It must be non-zero-copy */
-	err = process_rx_list(ctx, msg, 0, len, false, is_peek);
+	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
+			      is_peek);
 	if (err < 0) {
 		tls_err_abort(sk, err);
 		goto end;
@@ -1668,6 +1708,12 @@ int tls_sw_recvmsg(struct sock *sk,
 				}
 			}
 			goto recv_end;
+		} else {
+			tlm = tls_msg(skb);
+			if (prot->version == TLS_1_3_VERSION)
+				tlm->control = 0;
+			else
+				tlm->control = ctx->control;
 		}
 
 		rxm = strp_msg(skb);
@@ -1694,22 +1740,34 @@ int tls_sw_recvmsg(struct sock *sk,
 
 		if (err == -EINPROGRESS)
 			num_async++;
+		else if (prot->version == TLS_1_3_VERSION)
+			tlm->control = ctx->control;
+
+		/* If the type of records being processed is not known yet,
+		 * set it to record type just dequeued. If it is already known,
+		 * but does not match the record type just dequeued, go to end.
+		 * We always get record type here since for tls1.2, record type
+		 * is known just after record is dequeued from stream parser.
+		 * For tls1.3, we disable async.
+		 */
+
+		if (!control)
+			control = tlm->control;
+		else if (control != tlm->control)
+			goto recv_end;
 
 		if (!cmsg) {
 			int cerr;
 
 			cerr = put_cmsg(msg, SOL_TLS, TLS_GET_RECORD_TYPE,
-					sizeof(ctx->control), &ctx->control);
+					sizeof(control), &control);
 			cmsg = true;
-			control = ctx->control;
-			if (ctx->control != TLS_RECORD_TYPE_DATA) {
+			if (control != TLS_RECORD_TYPE_DATA) {
 				if (cerr || msg->msg_flags & MSG_CTRUNC) {
 					err = -EIO;
 					goto recv_end;
 				}
 			}
-		} else if (control != ctx->control) {
-			goto recv_end;
 		}
 
 		if (async)
@@ -1784,18 +1842,16 @@ recv_end:
 
 		/* Drain records from the rx_list & copy if required */
 		if (is_peek || is_kvec)
-			err = process_rx_list(ctx, msg, copied,
+			err = process_rx_list(ctx, msg, &control, &cmsg, copied,
 					      decrypted, false, is_peek);
 		else
-			err = process_rx_list(ctx, msg, 0,
+			err = process_rx_list(ctx, msg, &control, &cmsg, 0,
 					      decrypted, true, is_peek);
 		if (err < 0) {
 			tls_err_abort(sk, err);
 			copied = 0;
 			goto end;
 		}
-
-		WARN_ON(decrypted != err);
 	}
 
 	copied += decrypted;
-- 
cgit v1.2.3


From 75104db0cb353ec0b74644d960640ac0f84ab839 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 24 Feb 2019 20:44:43 +0100
Subject: dsa: Remove phydev parameter from disable_port call

No current DSA driver makes use of the phydev parameter passed to the
disable_port call. Remove it.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c       | 4 ++--
 drivers/net/dsa/b53/b53_priv.h         | 2 +-
 drivers/net/dsa/bcm_sf2.c              | 9 ++++-----
 drivers/net/dsa/lan9303-core.c         | 3 +--
 drivers/net/dsa/lantiq_gswip.c         | 5 ++---
 drivers/net/dsa/microchip/ksz_common.c | 2 +-
 drivers/net/dsa/microchip/ksz_common.h | 2 +-
 drivers/net/dsa/mt7530.c               | 5 ++---
 drivers/net/dsa/mv88e6xxx/chip.c       | 3 +--
 drivers/net/dsa/qca8k.c                | 3 +--
 drivers/net/dsa/rtl8366rb.c            | 3 +--
 drivers/net/dsa/vitesse-vsc73xx.c      | 3 +--
 include/net/dsa.h                      | 3 +--
 net/dsa/dsa_priv.h                     | 2 +-
 net/dsa/port.c                         | 4 ++--
 net/dsa/slave.c                        | 2 +-
 16 files changed, 23 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index c76892ac4e69..0852e5e08177 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -543,7 +543,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
 }
 EXPORT_SYMBOL(b53_enable_port);
 
-void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
+void b53_disable_port(struct dsa_switch *ds, int port)
 {
 	struct b53_device *dev = ds->priv;
 	u8 reg;
@@ -963,7 +963,7 @@ static int b53_setup(struct dsa_switch *ds)
 		if (dsa_is_cpu_port(ds, port))
 			b53_enable_cpu_port(dev, port);
 		else if (dsa_is_unused_port(ds, port))
-			b53_disable_port(ds, port, NULL);
+			b53_disable_port(ds, port);
 	}
 
 	return ret;
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 4dc7ee38b258..e3441dcf2d21 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -356,7 +356,7 @@ enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port);
 void b53_mirror_del(struct dsa_switch *ds, int port,
 		    struct dsa_mall_mirror_tc_entry *mirror);
 int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
-void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void b53_disable_port(struct dsa_switch *ds, int port);
 void b53_brcm_hdr_setup(struct dsa_switch *ds, int port);
 void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable);
 int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index f91b8e77d543..c8e3f05e1d72 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -221,8 +221,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
 	return b53_enable_port(ds, port, phy);
 }
 
-static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
-				 struct phy_device *phy)
+static void bcm_sf2_port_disable(struct dsa_switch *ds, int port)
 {
 	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 reg;
@@ -241,7 +240,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
 	if (priv->int_phy_mask & 1 << port && priv->hw_params.num_gphy == 1)
 		bcm_sf2_gphy_enable_set(ds, false);
 
-	b53_disable_port(ds, port, phy);
+	b53_disable_port(ds, port);
 
 	/* Power down the port memory */
 	reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
@@ -692,7 +691,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
 	 */
 	for (port = 0; port < ds->num_ports; port++) {
 		if (dsa_is_user_port(ds, port) || dsa_is_cpu_port(ds, port))
-			bcm_sf2_port_disable(ds, port, NULL);
+			bcm_sf2_port_disable(ds, port);
 	}
 
 	return 0;
@@ -788,7 +787,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
 		else if (dsa_is_cpu_port(ds, port))
 			bcm_sf2_imp_setup(ds, port);
 		else
-			bcm_sf2_port_disable(ds, port, NULL);
+			bcm_sf2_port_disable(ds, port);
 	}
 
 	b53_configure_vlan(ds);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index b4f6e1a67dd9..2ffab7ee3d80 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -1091,8 +1091,7 @@ static int lan9303_port_enable(struct dsa_switch *ds, int port,
 	return lan9303_enable_processing_port(chip, port);
 }
 
-static void lan9303_port_disable(struct dsa_switch *ds, int port,
-				 struct phy_device *phy)
+static void lan9303_port_disable(struct dsa_switch *ds, int port)
 {
 	struct lan9303 *chip = ds->priv;
 
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 27d092cab40e..ee1455758764 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -480,8 +480,7 @@ static int gswip_port_enable(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static void gswip_port_disable(struct dsa_switch *ds, int port,
-			       struct phy_device *phy)
+static void gswip_port_disable(struct dsa_switch *ds, int port)
 {
 	struct gswip_priv *priv = ds->priv;
 
@@ -549,7 +548,7 @@ static int gswip_setup(struct dsa_switch *ds)
 
 	/* disable port fetch/store dma on all ports */
 	for (i = 0; i < priv->hw_info->max_ports; i++)
-		gswip_port_disable(ds, i, NULL);
+		gswip_port_disable(ds, i);
 
 	/* enable Switch */
 	gswip_mdio_mask(priv, 0, GSWIP_MDIO_GLOB_ENABLE, GSWIP_MDIO_GLOB);
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index d89c97724c35..9328b88849d2 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -380,7 +380,7 @@ int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
 }
 EXPORT_SYMBOL_GPL(ksz_enable_port);
 
-void ksz_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
+void ksz_disable_port(struct dsa_switch *ds, int port)
 {
 	struct ksz_device *dev = ds->priv;
 
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 0d25bc44f453..21cd794e18f1 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -35,7 +35,7 @@ void ksz_port_mdb_add(struct dsa_switch *ds, int port,
 int ksz_port_mdb_del(struct dsa_switch *ds, int port,
 		     const struct switchdev_obj_port_mdb *mdb);
 int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
-void ksz_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void ksz_disable_port(struct dsa_switch *ds, int port);
 
 /* Common register access functions */
 
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index c2b61500f958..0390c5e8271a 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -729,8 +729,7 @@ mt7530_port_enable(struct dsa_switch *ds, int port,
 }
 
 static void
-mt7530_port_disable(struct dsa_switch *ds, int port,
-		    struct phy_device *phy)
+mt7530_port_disable(struct dsa_switch *ds, int port)
 {
 	struct mt7530_priv *priv = ds->priv;
 
@@ -1301,7 +1300,7 @@ mt7530_setup(struct dsa_switch *ds)
 		if (dsa_is_cpu_port(ds, i))
 			mt7530_cpu_port_enable(priv, i);
 		else
-			mt7530_port_disable(ds, i, NULL);
+			mt7530_port_disable(ds, i);
 	}
 
 	/* Flush the FDB table */
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d30336f259ce..0c944ed2173e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2388,8 +2388,7 @@ static int mv88e6xxx_port_enable(struct dsa_switch *ds, int port,
 	return err;
 }
 
-static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port,
-				   struct phy_device *phydev)
+static void mv88e6xxx_port_disable(struct dsa_switch *ds, int port)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 195a8a87b984..576b37d12a63 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -801,8 +801,7 @@ qca8k_port_enable(struct dsa_switch *ds, int port,
 }
 
 static void
-qca8k_port_disable(struct dsa_switch *ds, int port,
-		   struct phy_device *phy)
+qca8k_port_disable(struct dsa_switch *ds, int port)
 {
 	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
 
diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c
index a4d5049df692..40b3974970c6 100644
--- a/drivers/net/dsa/rtl8366rb.c
+++ b/drivers/net/dsa/rtl8366rb.c
@@ -1073,8 +1073,7 @@ rtl8366rb_port_enable(struct dsa_switch *ds, int port,
 }
 
 static void
-rtl8366rb_port_disable(struct dsa_switch *ds, int port,
-		       struct phy_device *phy)
+rtl8366rb_port_disable(struct dsa_switch *ds, int port)
 {
 	struct realtek_smi *smi = ds->priv;
 	int ret;
diff --git a/drivers/net/dsa/vitesse-vsc73xx.c b/drivers/net/dsa/vitesse-vsc73xx.c
index 9f1b5f2e8a64..d4780610ea8a 100644
--- a/drivers/net/dsa/vitesse-vsc73xx.c
+++ b/drivers/net/dsa/vitesse-vsc73xx.c
@@ -1013,8 +1013,7 @@ static int vsc73xx_port_enable(struct dsa_switch *ds, int port,
 	return 0;
 }
 
-static void vsc73xx_port_disable(struct dsa_switch *ds, int port,
-				 struct phy_device *phy)
+static void vsc73xx_port_disable(struct dsa_switch *ds, int port)
 {
 	struct vsc73xx *vsc = ds->priv;
 
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2c2c10812814..e8ac5b35ac4a 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -364,8 +364,7 @@ struct dsa_switch_ops {
 	 */
 	int	(*port_enable)(struct dsa_switch *ds, int port,
 			       struct phy_device *phy);
-	void	(*port_disable)(struct dsa_switch *ds, int port,
-				struct phy_device *phy);
+	void	(*port_disable)(struct dsa_switch *ds, int port);
 
 	/*
 	 * Port's MAC EEE settings
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 47a1d1379d15..c6caa58c5c71 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -143,7 +143,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
 int dsa_port_set_state(struct dsa_port *dp, u8 state,
 		       struct switchdev_trans *trans);
 int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
-void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp);
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 6b114b045162..a2dad10646cb 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -85,7 +85,7 @@ int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
 	return 0;
 }
 
-void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+void dsa_port_disable(struct dsa_port *dp)
 {
 	struct dsa_switch *ds = dp->ds;
 	int port = dp->index;
@@ -94,7 +94,7 @@ void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
 		dsa_port_set_state_now(dp, BR_STATE_DISABLED);
 
 	if (ds->ops->port_disable)
-		ds->ops->port_disable(ds, port, phy);
+		ds->ops->port_disable(ds, port);
 }
 
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 90629b12beaf..7274499293c9 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -122,7 +122,7 @@ static int dsa_slave_close(struct net_device *dev)
 
 	phylink_stop(dp->pl);
 
-	dsa_port_disable(dp, dev->phydev);
+	dsa_port_disable(dp);
 
 	dev_mc_unsync(master, dev);
 	dev_uc_unsync(master, dev);
-- 
cgit v1.2.3


From 72636db5ba7bb0520bf39ea56dc87c90580ec6fd Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 24 Feb 2019 18:39:02 -0800
Subject: switchdev: Complete removal of switchdev_port_attr_get()

We have no more in tree users of switchdev_port_attr_get() after
d0e698d57a94 ("Merge branch 'net-Get-rid-of-switchdev_port_attr_get'")
so completely remove the function signature and body.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   |  4 ----
 net/switchdev/switchdev.c | 42 ------------------------------------------
 2 files changed, 46 deletions(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 45310ddf2d7e..be4b13e66668 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -115,13 +115,9 @@ typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 /**
  * struct switchdev_ops - switchdev operations
  *
- * @switchdev_port_attr_get: Get a port attribute (see switchdev_attr).
- *
  * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr).
  */
 struct switchdev_ops {
-	int	(*switchdev_port_attr_get)(struct net_device *dev,
-					   struct switchdev_attr *attr);
 	int	(*switchdev_port_attr_set)(struct net_device *dev,
 					   const struct switchdev_attr *attr,
 					   struct switchdev_trans *trans);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 7e1357db33d7..362413c9b389 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -174,48 +174,6 @@ static int switchdev_deferred_enqueue(struct net_device *dev,
 	return 0;
 }
 
-/**
- *	switchdev_port_attr_get - Get port attribute
- *
- *	@dev: port device
- *	@attr: attribute to get
- */
-int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct list_head *iter;
-	struct switchdev_attr first = {
-		.id = SWITCHDEV_ATTR_ID_UNDEFINED
-	};
-	int err = -EOPNOTSUPP;
-
-	if (ops && ops->switchdev_port_attr_get)
-		return ops->switchdev_port_attr_get(dev, attr);
-
-	if (attr->flags & SWITCHDEV_F_NO_RECURSE)
-		return err;
-
-	/* Switch device port(s) may be stacked under
-	 * bond/team/vlan dev, so recurse down to get attr on
-	 * each port.  Return -ENODATA if attr values don't
-	 * compare across ports.
-	 */
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		err = switchdev_port_attr_get(lower_dev, attr);
-		if (err)
-			break;
-		if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
-			first = *attr;
-		else if (memcmp(&first, attr, sizeof(*attr)))
-			return -ENODATA;
-	}
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
-
 static int __switchdev_port_attr_set(struct net_device *dev,
 				     const struct switchdev_attr *attr,
 				     struct switchdev_trans *trans)
-- 
cgit v1.2.3


From c58ccf2b6de7d52994f9bb93227dfabf8077de24 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 3 Feb 2019 09:27:00 +0100
Subject: mmc: bcm2835: Drop pointer to mmc_host from bcm2835_host

The BCM2835 MMC host driver uses a pointer to get from the private
bcm2835_host structure to the generic mmc_host structure.  However the
latter is always immediately preceding the former in memory, so compute
its address with a subtraction (which is cheaper than a dereference) and
drop the superfluous pointer.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Frank Pavlic <f.pavlic@kunbus.de>
Cc: Alexander Graf <agraf@suse.de>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/bcm2835.c | 20 ++++++++++----------
 include/linux/mmc/host.h   |  5 +++++
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/bcm2835.c b/drivers/mmc/host/bcm2835.c
index ab8d58a60352..246c8ec24148 100644
--- a/drivers/mmc/host/bcm2835.c
+++ b/drivers/mmc/host/bcm2835.c
@@ -148,7 +148,6 @@ struct bcm2835_host {
 	void __iomem		*ioaddr;
 	u32			phys_addr;
 
-	struct mmc_host		*mmc;
 	struct platform_device	*pdev;
 
 	int			clock;		/* Current clock speed */
@@ -618,7 +617,7 @@ static void bcm2835_finish_request(struct bcm2835_host *host)
 				"failed to terminate DMA (%d)\n", err);
 	}
 
-	mmc_request_done(host->mmc, mrq);
+	mmc_request_done(mmc_from_priv(host), mrq);
 }
 
 static
@@ -837,7 +836,7 @@ static void bcm2835_timeout(struct work_struct *work)
 		dev_err(dev, "timeout waiting for hardware interrupt.\n");
 		bcm2835_dumpregs(host);
 
-		bcm2835_reset(host->mmc);
+		bcm2835_reset(mmc_from_priv(host));
 
 		if (host->data) {
 			host->data->error = -ETIMEDOUT;
@@ -1100,6 +1099,7 @@ static void bcm2835_dma_complete_work(struct work_struct *work)
 
 static void bcm2835_set_clock(struct bcm2835_host *host, unsigned int clock)
 {
+	struct mmc_host *mmc = mmc_from_priv(host);
 	int div;
 
 	/* The SDCDIV register has 11 bits, and holds (div - 2).  But
@@ -1143,18 +1143,18 @@ static void bcm2835_set_clock(struct bcm2835_host *host, unsigned int clock)
 		div = SDCDIV_MAX_CDIV;
 
 	clock = host->max_clk / (div + 2);
-	host->mmc->actual_clock = clock;
+	mmc->actual_clock = clock;
 
 	/* Calibrate some delays */
 
 	host->ns_per_fifo_word = (1000000000 / clock) *
-		((host->mmc->caps & MMC_CAP_4_BIT_DATA) ? 8 : 32);
+		((mmc->caps & MMC_CAP_4_BIT_DATA) ? 8 : 32);
 
 	host->cdiv = div;
 	writel(host->cdiv, host->ioaddr + SDCDIV);
 
 	/* Set the timeout to 500ms */
-	writel(host->mmc->actual_clock / 2, host->ioaddr + SDTOUT);
+	writel(mmc->actual_clock / 2, host->ioaddr + SDTOUT);
 }
 
 static void bcm2835_request(struct mmc_host *mmc, struct mmc_request *mrq)
@@ -1264,7 +1264,7 @@ static const struct mmc_host_ops bcm2835_ops = {
 
 static int bcm2835_add_host(struct bcm2835_host *host)
 {
-	struct mmc_host *mmc = host->mmc;
+	struct mmc_host *mmc = mmc_from_priv(host);
 	struct device *dev = &host->pdev->dev;
 	char pio_limit_string[20];
 	int ret;
@@ -1370,7 +1370,6 @@ static int bcm2835_probe(struct platform_device *pdev)
 
 	mmc->ops = &bcm2835_ops;
 	host = mmc_priv(mmc);
-	host->mmc = mmc;
 	host->pdev = pdev;
 	spin_lock_init(&host->lock);
 
@@ -1441,8 +1440,9 @@ err:
 static int bcm2835_remove(struct platform_device *pdev)
 {
 	struct bcm2835_host *host = platform_get_drvdata(pdev);
+	struct mmc_host *mmc = mmc_from_priv(host);
 
-	mmc_remove_host(host->mmc);
+	mmc_remove_host(mmc);
 
 	writel(SDVDD_POWER_OFF, host->ioaddr + SDVDD);
 
@@ -1454,7 +1454,7 @@ static int bcm2835_remove(struct platform_device *pdev)
 	if (host->dma_chan_rxtx)
 		dma_release_channel(host->dma_chan_rxtx);
 
-	mmc_free_host(host->mmc);
+	mmc_free_host(mmc);
 	platform_set_drvdata(pdev, NULL);
 
 	return 0;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4d35ff36ceff..d893902b2f1c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -478,6 +478,11 @@ static inline void *mmc_priv(struct mmc_host *host)
 	return (void *)host->private;
 }
 
+static inline struct mmc_host *mmc_from_priv(void *priv)
+{
+	return container_of(priv, struct mmc_host, private);
+}
+
 #define mmc_host_is_spi(host)	((host)->caps & MMC_CAP_SPI)
 
 #define mmc_dev(x)	((x)->parent)
-- 
cgit v1.2.3


From a2b760a60194aaa754dc78dd037d81ee6c3508a1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 5 Feb 2019 10:30:22 +0100
Subject: mmc: slot-gpio: Remove override_active_level on WP

The argument "override_active_level" made it possible to
enforce a specific polarity on the write-protect
GPIO line. All callers in the kernel pass "false" to this
call after I have converted all drivers to use GPIO machine
descriptors, so remove the argument and clean out this.

This kind of polarity inversion should be handled by the
GPIO descriptor inside the GPIO library if needed.

This rids us of one instance of the kludgy calls into
the gpiod_get_raw_value() API.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c            | 2 +-
 drivers/mmc/core/slot-gpio.c       | 9 +--------
 drivers/mmc/host/davinci_mmc.c     | 2 +-
 drivers/mmc/host/mmc_spi.c         | 2 +-
 drivers/mmc/host/mmci.c            | 2 +-
 drivers/mmc/host/pxamci.c          | 2 +-
 drivers/mmc/host/s3cmci.c          | 2 +-
 drivers/mmc/host/sdhci-esdhc-imx.c | 2 +-
 include/linux/mmc/slot-gpio.h      | 2 +-
 9 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 71fb228ad447..652ea6502336 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -260,7 +260,7 @@ int mmc_of_parse(struct mmc_host *host)
 	/* Parse Write Protection */
 	ro_cap_invert = device_property_read_bool(dev, "wp-inverted");
 
-	ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &ro_gpio_invert);
+	ret = mmc_gpiod_request_ro(host, "wp", 0, 0, &ro_gpio_invert);
 	if (!ret)
 		dev_info(host->parent, "Got WP GPIO\n");
 	else if (ret != -ENOENT && ret != -ENOSYS)
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 319ccd93383d..4afc6b87b465 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -22,7 +22,6 @@
 struct mmc_gpio {
 	struct gpio_desc *ro_gpio;
 	struct gpio_desc *cd_gpio;
-	bool override_ro_active_level;
 	bool override_cd_active_level;
 	irqreturn_t (*cd_gpio_isr)(int irq, void *dev_id);
 	char *ro_label;
@@ -71,10 +70,6 @@ int mmc_gpio_get_ro(struct mmc_host *host)
 	if (!ctx || !ctx->ro_gpio)
 		return -ENOSYS;
 
-	if (ctx->override_ro_active_level)
-		return !gpiod_get_raw_value_cansleep(ctx->ro_gpio) ^
-			!!(host->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
-
 	return gpiod_get_value_cansleep(ctx->ro_gpio);
 }
 EXPORT_SYMBOL(mmc_gpio_get_ro);
@@ -225,7 +220,6 @@ EXPORT_SYMBOL(mmc_can_gpio_cd);
  * @host: mmc host
  * @con_id: function within the GPIO consumer
  * @idx: index of the GPIO to obtain in the consumer
- * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
  * @debounce: debounce time in microseconds
  * @gpio_invert: will return whether the GPIO line is inverted or not,
  * set to NULL to ignore
@@ -233,7 +227,7 @@ EXPORT_SYMBOL(mmc_can_gpio_cd);
  * Returns zero on success, else an error.
  */
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
-			 unsigned int idx, bool override_active_level,
+			 unsigned int idx,
 			 unsigned int debounce, bool *gpio_invert)
 {
 	struct mmc_gpio *ctx = host->slot.handler_priv;
@@ -253,7 +247,6 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 	if (gpio_invert)
 		*gpio_invert = !gpiod_is_active_low(desc);
 
-	ctx->override_ro_active_level = override_active_level;
 	ctx->ro_gpio = desc;
 
 	return 0;
diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c
index 9e68c3645e22..49e0daf2ef5e 100644
--- a/drivers/mmc/host/davinci_mmc.c
+++ b/drivers/mmc/host/davinci_mmc.c
@@ -1193,7 +1193,7 @@ static int mmc_davinci_parse_pdata(struct mmc_host *mmc)
 	else if (ret)
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
-	ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
+	ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0, NULL);
 	if (ret == -EPROBE_DEFER)
 		return ret;
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 10ba46b728e8..d7a5bbeb391b 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1452,7 +1452,7 @@ static int mmc_spi_probe(struct spi_device *spi)
 	}
 
 	/* Index 1 is write protect/read only */
-	status = mmc_gpiod_request_ro(mmc, NULL, 1, false, 0, NULL);
+	status = mmc_gpiod_request_ro(mmc, NULL, 1, 0, NULL);
 	if (status == -EPROBE_DEFER)
 		goto fail_add_host;
 	if (!status)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index e352f5ad5801..7dd3ccf5baf0 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2011,7 +2011,7 @@ static int mmci_probe(struct amba_device *dev,
 		if (ret == -EPROBE_DEFER)
 			goto clk_disable;
 
-		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
+		ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0, NULL);
 		if (ret == -EPROBE_DEFER)
 			goto clk_disable;
 	}
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 8779bbaa6b69..c907bf502a12 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -743,7 +743,7 @@ static int pxamci_probe(struct platform_device *pdev)
 			goto out;
 		}
 
-		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
+		ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0, NULL);
 		if (ret && ret != -ENOENT) {
 			dev_err(dev, "Failed requesting gpio_ro\n");
 			goto out;
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index 10f5219b3b40..f31333e831a7 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -1530,7 +1530,7 @@ static int s3cmci_probe_pdata(struct s3cmci_host *host)
 		return ret;
 	}
 
-	ret = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL);
+	ret = mmc_gpiod_request_ro(host->mmc, "wp", 0, 0, NULL);
 	if (ret != -ENOENT) {
 		dev_err(&pdev->dev, "error requesting GPIO for WP %d\n",
 			ret);
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index fad600739d0e..32ca3703b432 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -1351,7 +1351,7 @@ static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev,
 				host->mmc->parent->platform_data);
 	/* write_protect */
 	if (boarddata->wp_type == ESDHC_WP_GPIO) {
-		err = mmc_gpiod_request_ro(host->mmc, "wp", 0, false, 0, NULL);
+		err = mmc_gpiod_request_ro(host->mmc, "wp", 0, 0, NULL);
 		if (err) {
 			dev_err(mmc_dev(host->mmc),
 				"failed to request write-protect gpio!\n");
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index feebd7aa6f5c..9fd3ce64a885 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -22,7 +22,7 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
 			 unsigned int debounce, bool *gpio_invert);
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
-			 unsigned int idx, bool override_active_level,
+			 unsigned int idx,
 			 unsigned int debounce, bool *gpio_invert);
 void mmc_gpio_set_cd_isr(struct mmc_host *host,
 			 irqreturn_t (*isr)(int irq, void *dev_id));
-- 
cgit v1.2.3


From 01904ff77676ca6c88e972906ed204a2dfbabab6 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Wed, 6 Feb 2019 13:28:05 +0200
Subject: mmc: core: Calculate the discard arg only once

In MMC, the discard arg is a read-only ext_csd parameter - set it once
on card init. To be consistent, do that for SD as well even though its
discard arg is always 0x0.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/block.c | 12 +++---------
 drivers/mmc/core/core.c  |  4 ++--
 drivers/mmc/core/mmc.c   |  8 ++++++++
 drivers/mmc/core/sd.c    |  2 ++
 include/linux/mmc/card.h |  1 +
 include/linux/mmc/sd.h   |  5 +++++
 6 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index dc55bdfede92..54a7b7410441 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1124,7 +1124,7 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->blkdata;
 	struct mmc_card *card = md->queue.card;
-	unsigned int from, nr, arg;
+	unsigned int from, nr;
 	int err = 0, type = MMC_BLK_DISCARD;
 	blk_status_t status = BLK_STS_OK;
 
@@ -1136,24 +1136,18 @@ static void mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 	from = blk_rq_pos(req);
 	nr = blk_rq_sectors(req);
 
-	if (mmc_can_discard(card))
-		arg = MMC_DISCARD_ARG;
-	else if (mmc_can_trim(card))
-		arg = MMC_TRIM_ARG;
-	else
-		arg = MMC_ERASE_ARG;
 	do {
 		err = 0;
 		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
 			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 					 INAND_CMD38_ARG_EXT_CSD,
-					 arg == MMC_TRIM_ARG ?
+					 card->erase_arg == MMC_TRIM_ARG ?
 					 INAND_CMD38_ARG_TRIM :
 					 INAND_CMD38_ARG_ERASE,
 					 0);
 		}
 		if (!err)
-			err = mmc_erase(card, from, nr, arg);
+			err = mmc_erase(card, from, nr, card->erase_arg);
 	} while (err == -EIO && !mmc_blk_reset(md, card->host, type));
 	if (err)
 		status = BLK_STS_IOERR;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 5bd58b95d318..de0f1a1f0a63 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2164,7 +2164,7 @@ static unsigned int mmc_align_erase_size(struct mmc_card *card,
  * @card: card to erase
  * @from: first sector to erase
  * @nr: number of sectors to erase
- * @arg: erase command argument (SD supports only %MMC_ERASE_ARG)
+ * @arg: erase command argument (SD supports only %SD_ERASE_ARG)
  *
  * Caller must claim host before calling this function.
  */
@@ -2181,7 +2181,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	if (!card->erase_size)
 		return -EOPNOTSUPP;
 
-	if (mmc_card_sd(card) && arg != MMC_ERASE_ARG)
+	if (mmc_card_sd(card) && arg != SD_ERASE_ARG)
 		return -EOPNOTSUPP;
 
 	if ((arg & MMC_SECURE_ARGS) &&
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index da892a599524..09c688f5ff65 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1743,6 +1743,14 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			card->ext_csd.power_off_notification = EXT_CSD_POWER_ON;
 	}
 
+	/* set erase_arg */
+	if (mmc_can_discard(card))
+		card->erase_arg = MMC_DISCARD_ARG;
+	else if (mmc_can_trim(card))
+		card->erase_arg = MMC_TRIM_ARG;
+	else
+		card->erase_arg = MMC_ERASE_ARG;
+
 	/*
 	 * Select timing interface
 	 */
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index d0d9f90e7cdf..bd48b28d641b 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -271,6 +271,8 @@ static int mmc_read_ssr(struct mmc_card *card)
 		}
 	}
 
+	card->erase_arg = SD_ERASE_ARG;
+
 	return 0;
 }
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 8ef330027b13..e2bbceb80725 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -277,6 +277,7 @@ struct mmc_card {
  	unsigned int		erase_shift;	/* if erase unit is power 2 */
  	unsigned int		pref_erase;	/* in sectors */
 	unsigned int		eg_boundary;	/* don't cross erase-group boundaries */
+	unsigned int		erase_arg;	/* erase / trim / discard */
  	u8			erased_byte;	/* value of erased bytes */
 
 	u32			raw_cid[4];	/* raw card CID */
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 1ebcf9ba1256..1a6d10fdf682 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -91,4 +91,9 @@
 #define SD_SWITCH_ACCESS_DEF	0
 #define SD_SWITCH_ACCESS_HS	1
 
+/*
+ * Erase/discard
+ */
+#define SD_ERASE_ARG			0x00000000
+
 #endif /* LINUX_MMC_SD_H */
-- 
cgit v1.2.3


From 68539e2bc34437d8c5fbcc234dddcc40bd6bb1cb Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Wed, 6 Feb 2019 13:28:06 +0200
Subject: mmc: core: Indicate SD specs higher than 4.0

SD specs version 4.x and 5.x have a dedicated slices in the SCR register.
Higher versions will rely on a combination of the existing fields.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sd.c    | 5 +++++
 include/linux/mmc/card.h | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index bd48b28d641b..c2db94dab711 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -209,6 +209,11 @@ static int mmc_decode_scr(struct mmc_card *card)
 		/* Check if Physical Layer Spec v3.0 is supported */
 		scr->sda_spec3 = UNSTUFF_BITS(resp, 47, 1);
 
+	if (scr->sda_spec3) {
+		scr->sda_spec4 = UNSTUFF_BITS(resp, 42, 1);
+		scr->sda_specx = UNSTUFF_BITS(resp, 38, 4);
+	}
+
 	if (UNSTUFF_BITS(resp, 55, 1))
 		card->erased_byte = 0xFF;
 	else
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index e2bbceb80725..19566ab9decb 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -133,6 +133,8 @@ struct mmc_ext_csd {
 struct sd_scr {
 	unsigned char		sda_vsn;
 	unsigned char		sda_spec3;
+	unsigned char		sda_spec4;
+	unsigned char		sda_specx;
 	unsigned char		bus_widths;
 #define SD_SCR_BUS_WIDTH_1	(1<<0)
 #define SD_SCR_BUS_WIDTH_4	(1<<2)
-- 
cgit v1.2.3


From 228a73abde5c04428678e917b271f8526cfd90ed Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Fri, 4 Jan 2019 13:31:54 +0800
Subject: btrfs: introduce new ioctl to unregister a btrfs device

Support for a new command that can be used eg. as a command

  $ btrfs device scan --forget [dev]'
(the final name may change though)

to undo the effects of 'btrfs device scan [dev]'. For this purpose
this patch proposes to use ioctl #5 as it was empty and is next to the
SCAN ioctl.

The new ioctl BTRFS_IOC_FORGET_DEV works only on the control device
(/dev/btrfs-control) to unregister one or all devices, devices that are
not mounted.

The argument is struct btrfs_ioctl_vol_args, ::name specifies the device
path. To unregister all device, the path is an empty string.

Again, the devices are removed only if they aren't part of a mounte
filesystem.

This new ioctl provides:

- release of unwanted btrfs_fs_devices and btrfs_devices structures
  from memory if the device is not going to be mounted

- ability to mount filesystem in degraded mode, when one devices is
  corrupted like in split brain raid1

- running test cases which would require reloading the kernel module
  but this is not possible eg. due to mounted filesystem or built-in

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/super.c           |  3 +++
 fs/btrfs/volumes.c         | 11 +++++++++++
 fs/btrfs/volumes.h         |  1 +
 include/uapi/linux/btrfs.h |  2 ++
 4 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0a3f122dd61f..f9d13a30aa8a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2190,6 +2190,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 		ret = PTR_ERR_OR_ZERO(device);
 		mutex_unlock(&uuid_mutex);
 		break;
+	case BTRFS_IOC_FORGET_DEV:
+		ret = btrfs_forget_devices(vol->name);
+		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
 		device = btrfs_scan_one_device(vol->name, FMODE_READ,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7ca31c83e730..fe122e6099ae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1446,6 +1446,17 @@ static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
 	return 0;
 }
 
+int btrfs_forget_devices(const char *path)
+{
+	int ret;
+
+	mutex_lock(&uuid_mutex);
+	ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
+	mutex_unlock(&uuid_mutex);
+
+	return ret;
+}
+
 /*
  * Look for a btrfs signature on a device. This may be called out of the mount path
  * and we are not allowed to call set_blocksize during the scan. The superblock
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 656ea8d85770..3ad9d58d1b66 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -416,6 +416,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,
 					   fmode_t flags, void *holder);
+int btrfs_forget_devices(const char *path);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, int step);
 void btrfs_assign_next_active_device(struct btrfs_device *device,
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index e0763bc4158e..c195896d478f 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -837,6 +837,8 @@ enum btrfs_err_code {
 				   struct btrfs_ioctl_vol_args)
 #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
 				   struct btrfs_ioctl_vol_args)
+#define BTRFS_IOC_FORGET_DEV _IOW(BTRFS_IOCTL_MAGIC, 5, \
+				   struct btrfs_ioctl_vol_args)
 /* trans start and trans end are dangerous, and only for
  * use by applications that know how to avoid the
  * resulting deadlocks
-- 
cgit v1.2.3


From 450114fc0db0cd5c2e7324b917e5de52cff991d7 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 21 Nov 2018 14:03:08 -0500
Subject: btrfs: don't use global reserve for chunk allocation

We've done this forever because of the voodoo around knowing how much
space we have.  However, we have better ways of doing this now, and on
normal file systems we'll easily have a global reserve of 512MiB, and
since metadata chunks are usually 1GiB that means we'll allocate
metadata chunks more readily.  Instead use the actual used amount when
determining if we need to allocate a chunk or not.

This has a side effect for mixed block group fs'es where we are no
longer allocating enough chunks for the data/metadata requirements.  To
deal with this add a ALLOC_CHUNK_FORCE step to the flushing state
machine.  This will only get used if we've already made a full loop
through the flushing machinery and tried committing the transaction.

If we have then we can try and force a chunk allocation since we likely
need it to make progress.  This resolves issues I was seeing with
the mixed bg tests in xfstests without the new flushing state.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
[ merged with patch "add ALLOC_CHUNK_FORCE to the flushing code" ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             |  3 ++-
 fs/btrfs/extent-tree.c       | 26 ++++++++++++++++----------
 include/trace/events/btrfs.h |  1 +
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6e0fd98c6bd9..9306925b6790 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2805,7 +2805,8 @@ enum btrfs_flush_state {
 	FLUSH_DELALLOC		=	5,
 	FLUSH_DELALLOC_WAIT	=	6,
 	ALLOC_CHUNK		=	7,
-	COMMIT_TRANS		=	8,
+	ALLOC_CHUNK_FORCE	=	8,
+	COMMIT_TRANS		=	9,
 };
 
 int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9e74ce2d4cf2..d637f4c4bcd0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4399,21 +4399,12 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
 static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
 			      struct btrfs_space_info *sinfo, int force)
 {
-	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
 	u64 bytes_used = btrfs_space_info_used(sinfo, false);
 	u64 thresh;
 
 	if (force == CHUNK_ALLOC_FORCE)
 		return 1;
 
-	/*
-	 * We need to take into account the global rsv because for all intents
-	 * and purposes it's used space.  Don't worry about locking the
-	 * global_rsv, it doesn't change except when the transaction commits.
-	 */
-	if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
-		bytes_used += calc_global_rsv_need_space(global_rsv);
-
 	/*
 	 * in limited mode, we want to have some free space up to
 	 * about 1% of the FS size.
@@ -4960,6 +4951,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		btrfs_end_transaction(trans);
 		break;
 	case ALLOC_CHUNK:
+	case ALLOC_CHUNK_FORCE:
 		trans = btrfs_join_transaction(root);
 		if (IS_ERR(trans)) {
 			ret = PTR_ERR(trans);
@@ -4967,7 +4959,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		}
 		ret = do_chunk_alloc(trans,
 				     btrfs_metadata_alloc_profile(fs_info),
-				     CHUNK_ALLOC_NO_FORCE);
+				     (state == ALLOC_CHUNK) ?
+				      CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE);
 		btrfs_end_transaction(trans);
 		if (ret > 0 || ret == -ENOSPC)
 			ret = 0;
@@ -5111,6 +5104,19 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 				commit_cycles--;
 		}
 
+		/*
+		 * We don't want to force a chunk allocation until we've tried
+		 * pretty hard to reclaim space.  Think of the case where we
+		 * freed up a bunch of space and so have a lot of pinned space
+		 * to reclaim.  We would rather use that than possibly create a
+		 * underutilized metadata chunk.  So if this is our first run
+		 * through the flushing state machine skip ALLOC_CHUNK_FORCE and
+		 * commit the transaction.  If nothing has changed the next go
+		 * around then we can force a chunk allocation.
+		 */
+		if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles)
+			flush_state++;
+
 		if (flush_state > COMMIT_TRANS) {
 			commit_cycles++;
 			if (commit_cycles > 2) {
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2887503e4d12..3f08b652363b 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1051,6 +1051,7 @@ TRACE_EVENT(btrfs_trigger_flush,
 		{ FLUSH_DELAYED_REFS_NR,	"FLUSH_DELAYED_REFS_NR"},	\
 		{ FLUSH_DELAYED_REFS,		"FLUSH_ELAYED_REFS"},		\
 		{ ALLOC_CHUNK,			"ALLOC_CHUNK"},			\
+		{ ALLOC_CHUNK_FORCE,		"ALLOC_CHUNK_FORCE"},		\
 		{ COMMIT_TRANS,			"COMMIT_TRANS"})
 
 TRACE_EVENT(btrfs_flush_space,
-- 
cgit v1.2.3


From 1418bae1c22951aad9883bc8f8f4dccb272cce1e Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Wed, 23 Jan 2019 15:15:12 +0800
Subject: btrfs: qgroup: Move reserved data accounting from
 btrfs_delayed_ref_head to btrfs_qgroup_extent_record

[BUG]
Btrfs/139 will fail with a high probability if the testing machine (VM)
has only 2G RAM.

Resulting the final write success while it should fail due to EDQUOT,
and the fs will have quota exceeding the limit by 16K.

The simplified reproducer will be: (needs a 2G ram VM)

  $ mkfs.btrfs -f $dev
  $ mount $dev $mnt

  $ btrfs subv create $mnt/subv
  $ btrfs quota enable $mnt
  $ btrfs quota rescan -w $mnt
  $ btrfs qgroup limit -e 1G $mnt/subv

  $ for i in $(seq -w  1 8); do
  	xfs_io -f -c "pwrite 0 128M" $mnt/subv/file_$i > /dev/null
  	echo "file $i written" > /dev/kmsg
    done
  $ sync
  $ btrfs qgroup show -pcre --raw $mnt

The last pwrite will not trigger EDQUOT and final 'qgroup show' will
show something like:

  qgroupid         rfer         excl     max_rfer     max_excl parent  child
  --------         ----         ----     --------     -------- ------  -----
  0/5             16384        16384         none         none ---     ---
  0/256      1073758208   1073758208         none   1073741824 ---     ---

And 1073758208 is larger than
  > 1073741824.

[CAUSE]
It's a bug in btrfs qgroup data reserved space management.

For quota limit, we must ensure that:
  reserved (data + metadata) + rfer/excl <= limit

Since rfer/excl is only updated at transaction commmit time, reserved
space needs to be taken special care.

One important part of reserved space is data, and for a new data extent
written to disk, we still need to take the reserved space until
rfer/excl numbers get updated.

Originally when an ordered extent finishes, we migrate the reserved
qgroup data space from extent_io tree to delayed ref head of the data
extent, expecting delayed ref will only be cleaned up at commit
transaction time.

However for small RAM machine, due to memory pressure dirty pages can be
flushed back to disk without committing a transaction.

The related events will be something like:

  file 1 written
  btrfs_finish_ordered_io: ino=258 ordered offset=0 len=54947840
  btrfs_finish_ordered_io: ino=258 ordered offset=54947840 len=5636096
  btrfs_finish_ordered_io: ino=258 ordered offset=61153280 len=57344
  btrfs_finish_ordered_io: ino=258 ordered offset=61210624 len=8192
  btrfs_finish_ordered_io: ino=258 ordered offset=60583936 len=569344
  cleanup_ref_head: num_bytes=54947840
  cleanup_ref_head: num_bytes=5636096
  cleanup_ref_head: num_bytes=569344
  cleanup_ref_head: num_bytes=57344
  cleanup_ref_head: num_bytes=8192
  ^^^^^^^^^^^^^^^^ This will free qgroup data reserved space
  file 2 written
  ...
  file 8 written
  cleanup_ref_head: num_bytes=8192
  ...
  btrfs_commit_transaction  <<< the only transaction committed during
				the test

When file 2 is written, we have already freed 128M reserved qgroup data
space for ino 258. Thus later write won't trigger EDQUOT.

This allows us to write more data beyond qgroup limit.

In my 2G ram VM, it could reach about 1.2G before hitting EDQUOT.

[FIX]
By moving reserved qgroup data space from btrfs_delayed_ref_head to
btrfs_qgroup_extent_record, we can ensure that reserved qgroup data
space won't be freed half way before commit transaction, thus fix the
problem.

Fixes: f64d5ca86821 ("btrfs: delayed_ref: Add new function to record reserved space into delayed ref")
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/delayed-ref.c       | 15 ++++-----------
 fs/btrfs/delayed-ref.h       | 11 -----------
 fs/btrfs/extent-tree.c       |  3 ---
 fs/btrfs/qgroup.c            | 19 +++++++++++++++----
 fs/btrfs/qgroup.h            | 20 +++++++++++---------
 include/trace/events/btrfs.h | 29 -----------------------------
 6 files changed, 30 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index cad36c99a483..7d2a413df90d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -602,17 +602,14 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
 	RB_CLEAR_NODE(&head_ref->href_node);
 	head_ref->processing = 0;
 	head_ref->total_ref_mod = count_mod;
-	head_ref->qgroup_reserved = 0;
-	head_ref->qgroup_ref_root = 0;
 	spin_lock_init(&head_ref->lock);
 	mutex_init(&head_ref->mutex);
 
 	if (qrecord) {
 		if (ref_root && reserved) {
-			head_ref->qgroup_ref_root = ref_root;
-			head_ref->qgroup_reserved = reserved;
+			qrecord->data_rsv = reserved;
+			qrecord->data_rsv_refroot = ref_root;
 		}
-
 		qrecord->bytenr = bytenr;
 		qrecord->num_bytes = num_bytes;
 		qrecord->old_roots = NULL;
@@ -651,10 +648,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
 	existing = htree_insert(&delayed_refs->href_root,
 				&head_ref->href_node);
 	if (existing) {
-		WARN_ON(qrecord && head_ref->qgroup_ref_root
-			&& head_ref->qgroup_reserved
-			&& existing->qgroup_ref_root
-			&& existing->qgroup_reserved);
 		update_existing_head_ref(trans, existing, head_ref,
 					 old_ref_mod);
 		/*
@@ -770,7 +763,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
 
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
 	    is_fstree(ref_root)) {
-		record = kmalloc(sizeof(*record), GFP_NOFS);
+		record = kzalloc(sizeof(*record), GFP_NOFS);
 		if (!record) {
 			kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
 			kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
@@ -867,7 +860,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
 
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) &&
 	    is_fstree(ref_root)) {
-		record = kmalloc(sizeof(*record), GFP_NOFS);
+		record = kzalloc(sizeof(*record), GFP_NOFS);
 		if (!record) {
 			kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
 			kmem_cache_free(btrfs_delayed_ref_head_cachep,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index d2af974f68a1..70606da440aa 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -102,17 +102,6 @@ struct btrfs_delayed_ref_head {
 	 */
 	int ref_mod;
 
-	/*
-	 * For qgroup reserved space freeing.
-	 *
-	 * ref_root and reserved will be recorded after
-	 * BTRFS_ADD_DELAYED_EXTENT is called.
-	 * And will be used to free reserved qgroup space at
-	 * run_delayed_refs() time.
-	 */
-	u64 qgroup_ref_root;
-	u64 qgroup_reserved;
-
 	/*
 	 * when a new extent is allocated, it is just reserved in memory
 	 * The actual extent isn't inserted into the extent allocation tree
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 9f012c2facbe..994f0cc41799 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2492,9 +2492,6 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
 		}
 	}
 
-	/* Also free its reserved qgroup space */
-	btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root,
-				      head->qgroup_reserved);
 	btrfs_delayed_refs_rsv_release(fs_info, nr_items);
 }
 
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 9a2f8c4c0fb9..e618ea9cdf7e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1546,12 +1546,18 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
 		parent_node = *p;
 		entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
 				 node);
-		if (bytenr < entry->bytenr)
+		if (bytenr < entry->bytenr) {
 			p = &(*p)->rb_left;
-		else if (bytenr > entry->bytenr)
+		} else if (bytenr > entry->bytenr) {
 			p = &(*p)->rb_right;
-		else
+		} else {
+			if (record->data_rsv && !entry->data_rsv) {
+				entry->data_rsv = record->data_rsv;
+				entry->data_rsv_refroot =
+					record->data_rsv_refroot;
+			}
 			return 1;
+		}
 	}
 
 	rb_link_node(&record->node, parent_node, p);
@@ -1597,7 +1603,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)
 	    || bytenr == 0 || num_bytes == 0)
 		return 0;
-	record = kmalloc(sizeof(*record), gfp_flag);
+	record = kzalloc(sizeof(*record), gfp_flag);
 	if (!record)
 		return -ENOMEM;
 
@@ -2517,6 +2523,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
 					goto cleanup;
 			}
 
+			/* Free the reserved data space */
+			btrfs_qgroup_free_refroot(fs_info,
+					record->data_rsv_refroot,
+					record->data_rsv,
+					BTRFS_QGROUP_RSV_DATA);
 			/*
 			 * Use SEQ_LAST as time_seq to do special search, which
 			 * doesn't lock tree or delayed_refs and search current
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5e93733b78c8..46ba7bd2961c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -107,6 +107,17 @@ struct btrfs_qgroup_extent_record {
 	struct rb_node node;
 	u64 bytenr;
 	u64 num_bytes;
+
+	/*
+	 * For qgroup reserved data space freeing.
+	 *
+	 * @data_rsv_refroot and @data_rsv will be recorded after
+	 * BTRFS_ADD_DELAYED_EXTENT is called.
+	 * And will be used to free reserved qgroup space at
+	 * transaction commit time.
+	 */
+	u32 data_rsv;		/* reserved data space needs to be freed */
+	u64 data_rsv_refroot;	/* which root the reserved data belongs to */
 	struct ulist *old_roots;
 };
 
@@ -326,15 +337,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
 void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
 			       u64 ref_root, u64 num_bytes,
 			       enum btrfs_qgroup_rsv_type type);
-static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
-						 u64 ref_root, u64 num_bytes)
-{
-	if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
-		return;
-	trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
-	btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
-				  BTRFS_QGROUP_RSV_DATA);
-}
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 3f08b652363b..ab1cc33adbac 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1513,35 +1513,6 @@ DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data,
 	TP_ARGS(inode, start, len, reserved, op)
 );
 
-DECLARE_EVENT_CLASS(btrfs__qgroup_delayed_ref,
-
-	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 u64 ref_root, u64 reserved),
-
-	TP_ARGS(fs_info, ref_root, reserved),
-
-	TP_STRUCT__entry_btrfs(
-		__field(	u64,		ref_root	)
-		__field(	u64,		reserved	)
-	),
-
-	TP_fast_assign_btrfs(fs_info,
-		__entry->ref_root	= ref_root;
-		__entry->reserved	= reserved;
-	),
-
-	TP_printk_btrfs("root=%llu reserved=%llu op=free",
-		  __entry->ref_root, __entry->reserved)
-);
-
-DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref,
-
-	TP_PROTO(const struct btrfs_fs_info *fs_info,
-		 u64 ref_root, u64 reserved),
-
-	TP_ARGS(fs_info, ref_root, reserved)
-);
-
 DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
 	TP_PROTO(const struct btrfs_fs_info *fs_info,
 		 const struct btrfs_qgroup_extent_record *rec),
-- 
cgit v1.2.3


From de13d5a44e61366ab5b75c111449ca284b6e3f5d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 13 Feb 2019 18:10:37 +0100
Subject: mmc: core: Move regulator helpers to separate file

The mmc regulator helper functions, are placed in the extensive core.c
file.  In a step towards trying to create a better structure of files,
avoiding too many lines of code per file, let's move these helpers to a new
file, regulator.c.

Moreover, this within this context it makes sense to also drop the export
of mmc_vddrange_to_ocrmask(), but instead let's make it internal to the mmc
core.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/Makefile    |   2 +-
 drivers/mmc/core/core.c      | 242 ---------------------------------------
 drivers/mmc/core/core.h      |   1 +
 drivers/mmc/core/regulator.c | 261 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/host.h     |   1 -
 5 files changed, 263 insertions(+), 244 deletions(-)
 create mode 100644 drivers/mmc/core/regulator.c

(limited to 'include')

diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index abba078f7f49..95ffe008ebdf 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -8,7 +8,7 @@ mmc_core-y			:= core.o bus.o host.o \
 				   mmc.o mmc_ops.o sd.o sd_ops.o \
 				   sdio.o sdio_ops.o sdio_bus.o \
 				   sdio_cis.o sdio_io.o sdio_irq.o \
-				   slot-gpio.o
+				   slot-gpio.o regulator.o
 mmc_core-$(CONFIG_OF)		+= pwrseq.o
 obj-$(CONFIG_PWRSEQ_SIMPLE)	+= pwrseq_simple.o
 obj-$(CONFIG_PWRSEQ_SD8787)	+= pwrseq_sd8787.o
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index de0f1a1f0a63..f796a6afb19b 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -21,7 +21,6 @@
 #include <linux/leds.h>
 #include <linux/scatterlist.h>
 #include <linux/log2.h>
-#include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_wakeup.h>
 #include <linux/suspend.h>
@@ -1112,7 +1111,6 @@ u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max)
 
 	return mask;
 }
-EXPORT_SYMBOL(mmc_vddrange_to_ocrmask);
 
 #ifdef CONFIG_OF
 
@@ -1190,246 +1188,6 @@ struct device_node *mmc_of_find_child_device(struct mmc_host *host,
 	return NULL;
 }
 
-#ifdef CONFIG_REGULATOR
-
-/**
- * mmc_ocrbitnum_to_vdd - Convert a OCR bit number to its voltage
- * @vdd_bit:	OCR bit number
- * @min_uV:	minimum voltage value (mV)
- * @max_uV:	maximum voltage value (mV)
- *
- * This function returns the voltage range according to the provided OCR
- * bit number. If conversion is not possible a negative errno value returned.
- */
-static int mmc_ocrbitnum_to_vdd(int vdd_bit, int *min_uV, int *max_uV)
-{
-	int		tmp;
-
-	if (!vdd_bit)
-		return -EINVAL;
-
-	/*
-	 * REVISIT mmc_vddrange_to_ocrmask() may have set some
-	 * bits this regulator doesn't quite support ... don't
-	 * be too picky, most cards and regulators are OK with
-	 * a 0.1V range goof (it's a small error percentage).
-	 */
-	tmp = vdd_bit - ilog2(MMC_VDD_165_195);
-	if (tmp == 0) {
-		*min_uV = 1650 * 1000;
-		*max_uV = 1950 * 1000;
-	} else {
-		*min_uV = 1900 * 1000 + tmp * 100 * 1000;
-		*max_uV = *min_uV + 100 * 1000;
-	}
-
-	return 0;
-}
-
-/**
- * mmc_regulator_get_ocrmask - return mask of supported voltages
- * @supply: regulator to use
- *
- * This returns either a negative errno, or a mask of voltages that
- * can be provided to MMC/SD/SDIO devices using the specified voltage
- * regulator.  This would normally be called before registering the
- * MMC host adapter.
- */
-int mmc_regulator_get_ocrmask(struct regulator *supply)
-{
-	int			result = 0;
-	int			count;
-	int			i;
-	int			vdd_uV;
-	int			vdd_mV;
-
-	count = regulator_count_voltages(supply);
-	if (count < 0)
-		return count;
-
-	for (i = 0; i < count; i++) {
-		vdd_uV = regulator_list_voltage(supply, i);
-		if (vdd_uV <= 0)
-			continue;
-
-		vdd_mV = vdd_uV / 1000;
-		result |= mmc_vddrange_to_ocrmask(vdd_mV, vdd_mV);
-	}
-
-	if (!result) {
-		vdd_uV = regulator_get_voltage(supply);
-		if (vdd_uV <= 0)
-			return vdd_uV;
-
-		vdd_mV = vdd_uV / 1000;
-		result = mmc_vddrange_to_ocrmask(vdd_mV, vdd_mV);
-	}
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(mmc_regulator_get_ocrmask);
-
-/**
- * mmc_regulator_set_ocr - set regulator to match host->ios voltage
- * @mmc: the host to regulate
- * @supply: regulator to use
- * @vdd_bit: zero for power off, else a bit number (host->ios.vdd)
- *
- * Returns zero on success, else negative errno.
- *
- * MMC host drivers may use this to enable or disable a regulator using
- * a particular supply voltage.  This would normally be called from the
- * set_ios() method.
- */
-int mmc_regulator_set_ocr(struct mmc_host *mmc,
-			struct regulator *supply,
-			unsigned short vdd_bit)
-{
-	int			result = 0;
-	int			min_uV, max_uV;
-
-	if (vdd_bit) {
-		mmc_ocrbitnum_to_vdd(vdd_bit, &min_uV, &max_uV);
-
-		result = regulator_set_voltage(supply, min_uV, max_uV);
-		if (result == 0 && !mmc->regulator_enabled) {
-			result = regulator_enable(supply);
-			if (!result)
-				mmc->regulator_enabled = true;
-		}
-	} else if (mmc->regulator_enabled) {
-		result = regulator_disable(supply);
-		if (result == 0)
-			mmc->regulator_enabled = false;
-	}
-
-	if (result)
-		dev_err(mmc_dev(mmc),
-			"could not set regulator OCR (%d)\n", result);
-	return result;
-}
-EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
-
-static int mmc_regulator_set_voltage_if_supported(struct regulator *regulator,
-						  int min_uV, int target_uV,
-						  int max_uV)
-{
-	/*
-	 * Check if supported first to avoid errors since we may try several
-	 * signal levels during power up and don't want to show errors.
-	 */
-	if (!regulator_is_supported_voltage(regulator, min_uV, max_uV))
-		return -EINVAL;
-
-	return regulator_set_voltage_triplet(regulator, min_uV, target_uV,
-					     max_uV);
-}
-
-/**
- * mmc_regulator_set_vqmmc - Set VQMMC as per the ios
- *
- * For 3.3V signaling, we try to match VQMMC to VMMC as closely as possible.
- * That will match the behavior of old boards where VQMMC and VMMC were supplied
- * by the same supply.  The Bus Operating conditions for 3.3V signaling in the
- * SD card spec also define VQMMC in terms of VMMC.
- * If this is not possible we'll try the full 2.7-3.6V of the spec.
- *
- * For 1.2V and 1.8V signaling we'll try to get as close as possible to the
- * requested voltage.  This is definitely a good idea for UHS where there's a
- * separate regulator on the card that's trying to make 1.8V and it's best if
- * we match.
- *
- * This function is expected to be used by a controller's
- * start_signal_voltage_switch() function.
- */
-int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios)
-{
-	struct device *dev = mmc_dev(mmc);
-	int ret, volt, min_uV, max_uV;
-
-	/* If no vqmmc supply then we can't change the voltage */
-	if (IS_ERR(mmc->supply.vqmmc))
-		return -EINVAL;
-
-	switch (ios->signal_voltage) {
-	case MMC_SIGNAL_VOLTAGE_120:
-		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
-						1100000, 1200000, 1300000);
-	case MMC_SIGNAL_VOLTAGE_180:
-		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
-						1700000, 1800000, 1950000);
-	case MMC_SIGNAL_VOLTAGE_330:
-		ret = mmc_ocrbitnum_to_vdd(mmc->ios.vdd, &volt, &max_uV);
-		if (ret < 0)
-			return ret;
-
-		dev_dbg(dev, "%s: found vmmc voltage range of %d-%duV\n",
-			__func__, volt, max_uV);
-
-		min_uV = max(volt - 300000, 2700000);
-		max_uV = min(max_uV + 200000, 3600000);
-
-		/*
-		 * Due to a limitation in the current implementation of
-		 * regulator_set_voltage_triplet() which is taking the lowest
-		 * voltage possible if below the target, search for a suitable
-		 * voltage in two steps and try to stay close to vmmc
-		 * with a 0.3V tolerance at first.
-		 */
-		if (!mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
-						min_uV, volt, max_uV))
-			return 0;
-
-		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
-						2700000, volt, 3600000);
-	default:
-		return -EINVAL;
-	}
-}
-EXPORT_SYMBOL_GPL(mmc_regulator_set_vqmmc);
-
-#endif /* CONFIG_REGULATOR */
-
-/**
- * mmc_regulator_get_supply - try to get VMMC and VQMMC regulators for a host
- * @mmc: the host to regulate
- *
- * Returns 0 or errno. errno should be handled, it is either a critical error
- * or -EPROBE_DEFER. 0 means no critical error but it does not mean all
- * regulators have been found because they all are optional. If you require
- * certain regulators, you need to check separately in your driver if they got
- * populated after calling this function.
- */
-int mmc_regulator_get_supply(struct mmc_host *mmc)
-{
-	struct device *dev = mmc_dev(mmc);
-	int ret;
-
-	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
-	mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
-
-	if (IS_ERR(mmc->supply.vmmc)) {
-		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		dev_dbg(dev, "No vmmc regulator found\n");
-	} else {
-		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
-		if (ret > 0)
-			mmc->ocr_avail = ret;
-		else
-			dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
-	}
-
-	if (IS_ERR(mmc->supply.vqmmc)) {
-		if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		dev_dbg(dev, "No vqmmc regulator found\n");
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
-
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 8fb6bc37f808..b5083b13d594 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -59,6 +59,7 @@ void mmc_power_up(struct mmc_host *host, u32 ocr);
 void mmc_power_off(struct mmc_host *host);
 void mmc_power_cycle(struct mmc_host *host, u32 ocr);
 void mmc_set_initial_state(struct mmc_host *host);
+u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
 
 static inline void mmc_delay(unsigned int ms)
 {
diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c
new file mode 100644
index 000000000000..80f95f86ca0e
--- /dev/null
+++ b/drivers/mmc/core/regulator.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions for MMC regulators.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/log2.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/mmc/host.h>
+
+#include "core.h"
+#include "host.h"
+
+#ifdef CONFIG_REGULATOR
+
+/**
+ * mmc_ocrbitnum_to_vdd - Convert a OCR bit number to its voltage
+ * @vdd_bit:	OCR bit number
+ * @min_uV:	minimum voltage value (mV)
+ * @max_uV:	maximum voltage value (mV)
+ *
+ * This function returns the voltage range according to the provided OCR
+ * bit number. If conversion is not possible a negative errno value returned.
+ */
+static int mmc_ocrbitnum_to_vdd(int vdd_bit, int *min_uV, int *max_uV)
+{
+	int		tmp;
+
+	if (!vdd_bit)
+		return -EINVAL;
+
+	/*
+	 * REVISIT mmc_vddrange_to_ocrmask() may have set some
+	 * bits this regulator doesn't quite support ... don't
+	 * be too picky, most cards and regulators are OK with
+	 * a 0.1V range goof (it's a small error percentage).
+	 */
+	tmp = vdd_bit - ilog2(MMC_VDD_165_195);
+	if (tmp == 0) {
+		*min_uV = 1650 * 1000;
+		*max_uV = 1950 * 1000;
+	} else {
+		*min_uV = 1900 * 1000 + tmp * 100 * 1000;
+		*max_uV = *min_uV + 100 * 1000;
+	}
+
+	return 0;
+}
+
+/**
+ * mmc_regulator_get_ocrmask - return mask of supported voltages
+ * @supply: regulator to use
+ *
+ * This returns either a negative errno, or a mask of voltages that
+ * can be provided to MMC/SD/SDIO devices using the specified voltage
+ * regulator.  This would normally be called before registering the
+ * MMC host adapter.
+ */
+int mmc_regulator_get_ocrmask(struct regulator *supply)
+{
+	int			result = 0;
+	int			count;
+	int			i;
+	int			vdd_uV;
+	int			vdd_mV;
+
+	count = regulator_count_voltages(supply);
+	if (count < 0)
+		return count;
+
+	for (i = 0; i < count; i++) {
+		vdd_uV = regulator_list_voltage(supply, i);
+		if (vdd_uV <= 0)
+			continue;
+
+		vdd_mV = vdd_uV / 1000;
+		result |= mmc_vddrange_to_ocrmask(vdd_mV, vdd_mV);
+	}
+
+	if (!result) {
+		vdd_uV = regulator_get_voltage(supply);
+		if (vdd_uV <= 0)
+			return vdd_uV;
+
+		vdd_mV = vdd_uV / 1000;
+		result = mmc_vddrange_to_ocrmask(vdd_mV, vdd_mV);
+	}
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(mmc_regulator_get_ocrmask);
+
+/**
+ * mmc_regulator_set_ocr - set regulator to match host->ios voltage
+ * @mmc: the host to regulate
+ * @supply: regulator to use
+ * @vdd_bit: zero for power off, else a bit number (host->ios.vdd)
+ *
+ * Returns zero on success, else negative errno.
+ *
+ * MMC host drivers may use this to enable or disable a regulator using
+ * a particular supply voltage.  This would normally be called from the
+ * set_ios() method.
+ */
+int mmc_regulator_set_ocr(struct mmc_host *mmc,
+			struct regulator *supply,
+			unsigned short vdd_bit)
+{
+	int			result = 0;
+	int			min_uV, max_uV;
+
+	if (vdd_bit) {
+		mmc_ocrbitnum_to_vdd(vdd_bit, &min_uV, &max_uV);
+
+		result = regulator_set_voltage(supply, min_uV, max_uV);
+		if (result == 0 && !mmc->regulator_enabled) {
+			result = regulator_enable(supply);
+			if (!result)
+				mmc->regulator_enabled = true;
+		}
+	} else if (mmc->regulator_enabled) {
+		result = regulator_disable(supply);
+		if (result == 0)
+			mmc->regulator_enabled = false;
+	}
+
+	if (result)
+		dev_err(mmc_dev(mmc),
+			"could not set regulator OCR (%d)\n", result);
+	return result;
+}
+EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
+
+static int mmc_regulator_set_voltage_if_supported(struct regulator *regulator,
+						  int min_uV, int target_uV,
+						  int max_uV)
+{
+	/*
+	 * Check if supported first to avoid errors since we may try several
+	 * signal levels during power up and don't want to show errors.
+	 */
+	if (!regulator_is_supported_voltage(regulator, min_uV, max_uV))
+		return -EINVAL;
+
+	return regulator_set_voltage_triplet(regulator, min_uV, target_uV,
+					     max_uV);
+}
+
+/**
+ * mmc_regulator_set_vqmmc - Set VQMMC as per the ios
+ *
+ * For 3.3V signaling, we try to match VQMMC to VMMC as closely as possible.
+ * That will match the behavior of old boards where VQMMC and VMMC were supplied
+ * by the same supply.  The Bus Operating conditions for 3.3V signaling in the
+ * SD card spec also define VQMMC in terms of VMMC.
+ * If this is not possible we'll try the full 2.7-3.6V of the spec.
+ *
+ * For 1.2V and 1.8V signaling we'll try to get as close as possible to the
+ * requested voltage.  This is definitely a good idea for UHS where there's a
+ * separate regulator on the card that's trying to make 1.8V and it's best if
+ * we match.
+ *
+ * This function is expected to be used by a controller's
+ * start_signal_voltage_switch() function.
+ */
+int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct device *dev = mmc_dev(mmc);
+	int ret, volt, min_uV, max_uV;
+
+	/* If no vqmmc supply then we can't change the voltage */
+	if (IS_ERR(mmc->supply.vqmmc))
+		return -EINVAL;
+
+	switch (ios->signal_voltage) {
+	case MMC_SIGNAL_VOLTAGE_120:
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						1100000, 1200000, 1300000);
+	case MMC_SIGNAL_VOLTAGE_180:
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						1700000, 1800000, 1950000);
+	case MMC_SIGNAL_VOLTAGE_330:
+		ret = mmc_ocrbitnum_to_vdd(mmc->ios.vdd, &volt, &max_uV);
+		if (ret < 0)
+			return ret;
+
+		dev_dbg(dev, "%s: found vmmc voltage range of %d-%duV\n",
+			__func__, volt, max_uV);
+
+		min_uV = max(volt - 300000, 2700000);
+		max_uV = min(max_uV + 200000, 3600000);
+
+		/*
+		 * Due to a limitation in the current implementation of
+		 * regulator_set_voltage_triplet() which is taking the lowest
+		 * voltage possible if below the target, search for a suitable
+		 * voltage in two steps and try to stay close to vmmc
+		 * with a 0.3V tolerance at first.
+		 */
+		if (!mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						min_uV, volt, max_uV))
+			return 0;
+
+		return mmc_regulator_set_voltage_if_supported(mmc->supply.vqmmc,
+						2700000, volt, 3600000);
+	default:
+		return -EINVAL;
+	}
+}
+EXPORT_SYMBOL_GPL(mmc_regulator_set_vqmmc);
+
+#else
+
+static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
+{
+	return 0;
+}
+
+#endif /* CONFIG_REGULATOR */
+
+/**
+ * mmc_regulator_get_supply - try to get VMMC and VQMMC regulators for a host
+ * @mmc: the host to regulate
+ *
+ * Returns 0 or errno. errno should be handled, it is either a critical error
+ * or -EPROBE_DEFER. 0 means no critical error but it does not mean all
+ * regulators have been found because they all are optional. If you require
+ * certain regulators, you need to check separately in your driver if they got
+ * populated after calling this function.
+ */
+int mmc_regulator_get_supply(struct mmc_host *mmc)
+{
+	struct device *dev = mmc_dev(mmc);
+	int ret;
+
+	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
+	mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
+
+	if (IS_ERR(mmc->supply.vmmc)) {
+		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_dbg(dev, "No vmmc regulator found\n");
+	} else {
+		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+		if (ret > 0)
+			mmc->ocr_avail = ret;
+		else
+			dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
+	}
+
+	if (IS_ERR(mmc->supply.vqmmc)) {
+		if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_dbg(dev, "No vqmmc regulator found\n");
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index d893902b2f1c..7f93747c8cdc 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -532,7 +532,6 @@ static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc,
 }
 #endif
 
-u32 mmc_vddrange_to_ocrmask(int vdd_min, int vdd_max);
 int mmc_regulator_get_supply(struct mmc_host *mmc);
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
-- 
cgit v1.2.3


From 3958790e673244ec3b0c62197b7372af303f1351 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 13 Feb 2019 18:42:06 +0100
Subject: mmc: core: Convert mmc_regulator_get_ocrmask() to static

The only left user of mmc_regulator_get_ocrmask() is the mmc core itself.
Therefore, let's drop the export and turn it into static.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/regulator.c | 3 +--
 include/linux/mmc/host.h     | 6 ------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/regulator.c b/drivers/mmc/core/regulator.c
index 80f95f86ca0e..b6febbcf8978 100644
--- a/drivers/mmc/core/regulator.c
+++ b/drivers/mmc/core/regulator.c
@@ -58,7 +58,7 @@ static int mmc_ocrbitnum_to_vdd(int vdd_bit, int *min_uV, int *max_uV)
  * regulator.  This would normally be called before registering the
  * MMC host adapter.
  */
-int mmc_regulator_get_ocrmask(struct regulator *supply)
+static int mmc_regulator_get_ocrmask(struct regulator *supply)
 {
 	int			result = 0;
 	int			count;
@@ -90,7 +90,6 @@ int mmc_regulator_get_ocrmask(struct regulator *supply)
 
 	return result;
 }
-EXPORT_SYMBOL_GPL(mmc_regulator_get_ocrmask);
 
 /**
  * mmc_regulator_set_ocr - set regulator to match host->ios voltage
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7f93747c8cdc..43d0f0c496f6 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -507,17 +507,11 @@ void sdio_run_irqs(struct mmc_host *host);
 void sdio_signal_irq(struct mmc_host *host);
 
 #ifdef CONFIG_REGULATOR
-int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
 int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios);
 #else
-static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
-{
-	return 0;
-}
-
 static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
 				 struct regulator *supply,
 				 unsigned short vdd_bit)
-- 
cgit v1.2.3


From de7b7dca8735f720793dae8ad818091309979c39 Mon Sep 17 00:00:00 2001
From: "Angus Ainslie (Purism)" <angus@akkea.ca>
Date: Mon, 28 Jan 2019 09:03:22 -0700
Subject: dmaengine: imx-sdma: add a test for imx8mq multi sdma devices

On i.mx8mq, there are two sdma instances, and the common dma framework
will get a channel dynamically from any available sdma instance whether
it's the first sdma device or the second sdma device. Some IPs like
SAI only work with sdma2 not sdma1. To make sure the sdma channel is from
the correct sdma device, use the node pointer to match.

Signed-off-by: Angus Ainslie (Purism) <angus@akkea.ca>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/imx-sdma.c                | 6 ++++++
 include/linux/platform_data/dma-imx.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index fc8bc80617d8..8fb0cd293b54 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1913,11 +1913,16 @@ disable_clk_ipg:
 static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
 	struct imx_dma_data *data = fn_param;
 
 	if (!imx_dma_is_general_purpose(chan))
 		return false;
 
+	/* return false if it's not the right device */
+	if (sdma->dev->of_node != data->of_node)
+		return false;
+
 	sdmac->data = *data;
 	chan->private = &sdmac->data;
 
@@ -1945,6 +1950,7 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
 	 * be set to sdmac->event_id1.
 	 */
 	data.dma_request2 = 0;
+	data.of_node = ofdma->of_node;
 
 	return dma_request_channel(mask, sdma_filter_fn, &data);
 }
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 7d964e787299..9daea8d42a10 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -55,6 +55,7 @@ struct imx_dma_data {
 	int dma_request2; /* secondary DMA request line */
 	enum sdma_peripheral_type peripheral_type;
 	int priority;
+	struct device_node *of_node;
 };
 
 static inline int imx_dma_is_ipu(struct dma_chan *chan)
-- 
cgit v1.2.3


From 6676d5e416eecaf4e823a2c3903aa132ffc2ede0 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 25 Feb 2019 17:38:31 +0200
Subject: net: sched: set dedicated tcf_walker flag when tp is empty

Using tcf_walker->stop flag to determine when tcf_walker->fn() was called
at least once is unreliable. Some classifiers set 'stop' flag on error
before calling walker callback, other classifiers used to call it with NULL
filter pointer when empty. In order to prevent further regressions, extend
tcf_walker structure with dedicated 'nonempty' flag. Set this flag in
tcf_walker->fn() implementation that is used to check if classifier has
filters configured.

Fixes: 8b64678e0af8 ("net: sched: refactor tp insert/delete for concurrent execution")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h |  1 +
 net/sched/cls_api.c   | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 58ea48e1221c..d5e7a1af346f 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -17,6 +17,7 @@ struct tcf_walker {
 	int	stop;
 	int	skip;
 	int	count;
+	bool	nonempty;
 	unsigned long cookie;
 	int	(*fn)(struct tcf_proto *, void *node, struct tcf_walker *);
 };
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6593c245f714..97d9312c2ff9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -238,18 +238,23 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
 		tcf_proto_destroy(tp, rtnl_held, extack);
 }
 
-static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+static int walker_check_empty(struct tcf_proto *tp, void *d,
+			      struct tcf_walker *arg)
 {
-	return -1;
+	if (tp) {
+		arg->nonempty = true;
+		return -1;
+	}
+	return 0;
 }
 
 static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
 {
-	struct tcf_walker walker = { .fn = walker_noop, };
+	struct tcf_walker walker = { .fn = walker_check_empty, };
 
 	if (tp->ops->walk) {
 		tp->ops->walk(tp, &walker, rtnl_held);
-		return !walker.stop;
+		return !walker.nonempty;
 	}
 	return true;
 }
-- 
cgit v1.2.3


From ad5ea5b9d513107869acd460f0180d8fb94856b9 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 25 Feb 2019 21:20:45 +0100
Subject: rtc: remove rtc_class_ops.read_callback

Since commit 416f0e8056f7 ("RTC: sa1100: Update the sa1100 RTC driver."),
the last user of .read_callback is gone. It has been 8 years and now new
user appeared. Simply remove it.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/dev.c   | 5 -----
 include/linux/rtc.h | 3 +--
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index 43d962a9c210..1d006ef4bb57 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -178,11 +178,6 @@ rtc_dev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 	remove_wait_queue(&rtc->irq_queue, &wait);
 
 	if (ret == 0) {
-		/* Check for any data updates */
-		if (rtc->ops->read_callback)
-			data = rtc->ops->read_callback(rtc->dev.parent,
-						       data);
-
 		if (sizeof(int) != sizeof(long) &&
 		    count == sizeof(unsigned int))
 			ret = put_user(data, (unsigned int __user *)buf) ?:
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index c1089fe5344a..f89bfbb54902 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -67,7 +67,7 @@ extern struct class *rtc_class;
  *
  * The (current) exceptions are mostly filesystem hooks:
  *   - the proc() hook for procfs
- *   - non-ioctl() chardev hooks:  open(), release(), read_callback()
+ *   - non-ioctl() chardev hooks:  open(), release()
  *
  * REVISIT those periodic irq calls *do* have ops_lock when they're
  * issued through ioctl() ...
@@ -81,7 +81,6 @@ struct rtc_class_ops {
 	int (*proc)(struct device *, struct seq_file *);
 	int (*set_mmss64)(struct device *, time64_t secs);
 	int (*set_mmss)(struct device *, unsigned long secs);
-	int (*read_callback)(struct device *, int data);
 	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
 	int (*read_offset)(struct device *, long *offset);
 	int (*set_offset)(struct device *, long offset);
-- 
cgit v1.2.3


From 3f7ae5f3dc5295ac17d6521130ed8a8f8a723fbf Mon Sep 17 00:00:00 2001
From: "Mohit P. Tahiliani" <tahiliani@nitk.edu.in>
Date: Tue, 26 Feb 2019 00:39:59 +0530
Subject: net: sched: pie: add more cases to auto-tune alpha and beta

The current implementation scales the local alpha and beta
variables in the calculate_probability function by the same
amount for all values of drop probability below 1%.

RFC 8033 suggests using additional cases for auto-tuning
alpha and beta when the drop probability is less than 1%.

In order to add more auto-tuning cases, MAX_PROB must be
scaled by u64 instead of u32 to prevent underflow when
scaling the local alpha and beta variables in the
calculate_probability function.

Signed-off-by: Mohit P. Tahiliani <tahiliani@nitk.edu.in>
Signed-off-by: Dhaval Khandla <dhavaljkhandla26@gmail.com>
Signed-off-by: Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
Signed-off-by: Manish Kumar B <bmanish15597@gmail.com>
Signed-off-by: Sachin D. Patil <sdp.sachin@gmail.com>
Signed-off-by: Leslie Monis <lesliemonis@gmail.com>
Acked-by: Dave Taht <dave.taht@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 +-
 net/sched/sch_pie.c            | 65 +++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..1eb572ef3f27 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -954,7 +954,7 @@ enum {
 #define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
 
 struct tc_pie_xstats {
-	__u32 prob;             /* current probability */
+	__u64 prob;             /* current probability */
 	__u32 delay;            /* current delay in ms */
 	__u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
 	__u32 packets_in;       /* total number of packets enqueued */
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d88ab53593b3..30f158582499 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -33,7 +33,7 @@
 
 #define QUEUE_THRESHOLD 16384
 #define DQCOUNT_INVALID -1
-#define MAX_PROB  0xffffffff
+#define MAX_PROB 0xffffffffffffffff
 #define PIE_SCALE 8
 
 /* parameters used */
@@ -49,7 +49,7 @@ struct pie_params {
 
 /* variables used */
 struct pie_vars {
-	u32 prob;		/* probability but scaled by u32 limit. */
+	u64 prob;		/* probability but scaled by u64 limit. */
 	psched_time_t burst_time;
 	psched_time_t qdelay;
 	psched_time_t qdelay_old;
@@ -99,8 +99,8 @@ static void pie_vars_init(struct pie_vars *vars)
 static bool drop_early(struct Qdisc *sch, u32 packet_size)
 {
 	struct pie_sched_data *q = qdisc_priv(sch);
-	u32 rnd;
-	u32 local_prob = q->vars.prob;
+	u64 rnd;
+	u64 local_prob = q->vars.prob;
 	u32 mtu = psched_mtu(qdisc_dev(sch));
 
 	/* If there is still burst allowance left skip random early drop */
@@ -124,11 +124,11 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
 	 * probablity. Smaller packets will have lower drop prob in this case
 	 */
 	if (q->params.bytemode && packet_size <= mtu)
-		local_prob = (local_prob / mtu) * packet_size;
+		local_prob = (u64)packet_size * div_u64(local_prob, mtu);
 	else
 		local_prob = q->vars.prob;
 
-	rnd = prandom_u32();
+	prandom_bytes(&rnd, 8);
 	if (rnd < local_prob)
 		return true;
 
@@ -317,9 +317,10 @@ static void calculate_probability(struct Qdisc *sch)
 	u32 qlen = sch->qstats.backlog;	/* queue size in bytes */
 	psched_time_t qdelay = 0;	/* in pschedtime */
 	psched_time_t qdelay_old = q->vars.qdelay;	/* in pschedtime */
-	s32 delta = 0;		/* determines the change in probability */
-	u32 oldprob;
-	u32 alpha, beta;
+	s64 delta = 0;		/* determines the change in probability */
+	u64 oldprob;
+	u64 alpha, beta;
+	u32 power;
 	bool update_prob = true;
 
 	q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +340,36 @@ static void calculate_probability(struct Qdisc *sch)
 	 * value for alpha as 0.125. In this implementation, we use values 0-32
 	 * passed from user space to represent this. Also, alpha and beta have
 	 * unit of HZ and need to be scaled before they can used to update
-	 * probability. alpha/beta are updated locally below by 1) scaling them
-	 * appropriately 2) scaling down by 16 to come to 0-2 range.
-	 * Please see paper for details.
-	 *
-	 * We scale alpha and beta differently depending on whether we are in
-	 * light, medium or high dropping mode.
+	 * probability. alpha/beta are updated locally below by scaling down
+	 * by 16 to come to 0-2 range.
 	 */
-	if (q->vars.prob < MAX_PROB / 100) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
-	} else if (q->vars.prob < MAX_PROB / 10) {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
-	} else {
-		alpha =
-		    (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
-		beta =
-		    (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+	beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+	/* We scale alpha and beta differently depending on how heavy the
+	 * congestion is. Please see RFC 8033 for details.
+	 */
+	if (q->vars.prob < MAX_PROB / 10) {
+		alpha >>= 1;
+		beta >>= 1;
+
+		power = 100;
+		while (q->vars.prob < div_u64(MAX_PROB, power) &&
+		       power <= 1000000) {
+			alpha >>= 2;
+			beta >>= 2;
+			power *= 10;
+		}
 	}
 
 	/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
-	delta += alpha * ((qdelay - q->params.target));
-	delta += beta * ((qdelay - qdelay_old));
+	delta += alpha * (u64)(qdelay - q->params.target);
+	delta += beta * (u64)(qdelay - qdelay_old);
 
 	oldprob = q->vars.prob;
 
 	/* to ensure we increase probability in steps of no more than 2% */
-	if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+	if (delta > (s64)(MAX_PROB / (100 / 2)) &&
 	    q->vars.prob >= MAX_PROB / 10)
 		delta = (MAX_PROB / 100) * 2;
 
-- 
cgit v1.2.3


From 7a0e5b15ca458dd47e4c60b7fa9f22b84c7068c7 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Tue, 19 Feb 2019 12:05:57 -0800
Subject: Bluetooth: Add quirk for reading BD_ADDR from fwnode property

Add HCI_QUIRK_USE_BDADDR_PROPERTY to allow controllers to retrieve
the public Bluetooth address from the firmware node property
'local-bd-address'. If quirk is set and the property does not exist
or is invalid the controller is marked as unconfigured.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Balakrishna Godavarthi <bgodavar@codeaurora.org>
Tested-by: Balakrishna Godavarthi <bgodavar@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h | 12 ++++++++++++
 net/bluetooth/hci_core.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/mgmt.c        |  6 ++++--
 3 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index c36dc1e20556..fbba43e9bef5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -158,6 +158,18 @@ enum {
 	 */
 	HCI_QUIRK_INVALID_BDADDR,
 
+	/* When this quirk is set, the public Bluetooth address
+	 * initially reported by HCI Read BD Address command
+	 * is considered invalid. The public BD Address can be
+	 * specified in the fwnode property 'local-bd-address'.
+	 * If this property does not exist or is invalid controller
+	 * configuration is required before this device can be used.
+	 *
+	 * This quirk can be set before hci_register_dev is called or
+	 * during the hdev->setup vendor callback.
+	 */
+	HCI_QUIRK_USE_BDADDR_PROPERTY,
+
 	/* When this quirk is set, the duplicate filtering during
 	 * scanning is based on Bluetooth devices addresses. To allow
 	 * RSSI based updates, restart scanning if needed.
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 26e3d36aee29..d6b2540ba7f8 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -30,6 +30,7 @@
 #include <linux/rfkill.h>
 #include <linux/debugfs.h>
 #include <linux/crypto.h>
+#include <linux/property.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -1355,6 +1356,32 @@ done:
 	return err;
 }
 
+/**
+ * hci_dev_get_bd_addr_from_property - Get the Bluetooth Device Address
+ *				       (BD_ADDR) for a HCI device from
+ *				       a firmware node property.
+ * @hdev:	The HCI device
+ *
+ * Search the firmware node for 'local-bd-address'.
+ *
+ * All-zero BD addresses are rejected, because those could be properties
+ * that exist in the firmware tables, but were not updated by the firmware. For
+ * example, the DTS could define 'local-bd-address', with zero BD addresses.
+ */
+static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
+{
+	struct fwnode_handle *fwnode = dev_fwnode(hdev->dev.parent);
+	bdaddr_t ba;
+	int ret;
+
+	ret = fwnode_property_read_u8_array(fwnode, "local-bd-address",
+					    (u8 *)&ba, sizeof(ba));
+	if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
+		return;
+
+	bacpy(&hdev->public_addr, &ba);
+}
+
 static int hci_dev_do_open(struct hci_dev *hdev)
 {
 	int ret = 0;
@@ -1422,6 +1449,22 @@ static int hci_dev_do_open(struct hci_dev *hdev)
 		if (hdev->setup)
 			ret = hdev->setup(hdev);
 
+		if (ret)
+			goto setup_failed;
+
+		if (test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) {
+			if (!bacmp(&hdev->public_addr, BDADDR_ANY))
+				hci_dev_get_bd_addr_from_property(hdev);
+
+			if (bacmp(&hdev->public_addr, BDADDR_ANY) &&
+			    hdev->set_bdaddr)
+				ret = hdev->set_bdaddr(hdev,
+						       &hdev->public_addr);
+			else
+				ret = -EADDRNOTAVAIL;
+		}
+
+setup_failed:
 		/* The transport driver can set these quirks before
 		 * creating the HCI device or in its setup callback.
 		 *
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 1e2acaddcdfd..2457f408d17d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -549,7 +549,8 @@ static bool is_configured(struct hci_dev *hdev)
 	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		return false;
 
-	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+	if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
+	     test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
 	    !bacmp(&hdev->public_addr, BDADDR_ANY))
 		return false;
 
@@ -564,7 +565,8 @@ static __le32 get_missing_options(struct hci_dev *hdev)
 	    !hci_dev_test_flag(hdev, HCI_EXT_CONFIGURED))
 		options |= MGMT_OPTION_EXTERNAL_CONFIG;
 
-	if (test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) &&
+	if ((test_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks) ||
+	     test_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks)) &&
 	    !bacmp(&hdev->public_addr, BDADDR_ANY))
 		options |= MGMT_OPTION_PUBLIC_ADDRESS;
 
-- 
cgit v1.2.3


From db04d4a3d72f0c5ee34609559f535d11ab47303c Mon Sep 17 00:00:00 2001
From: Tom Murphy <murphyt7@tcd.ie>
Date: Mon, 11 Feb 2019 15:50:33 +0000
Subject: iommu: Fix flush_tlb_all typo

Fix typo, flush_tlb_all should be flush_iotlb_all.

Signed-off-by: Tom Murphy <murphyt7@tcd.ie>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e90da6b6f3d1..2b402dcbcf81 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -167,7 +167,7 @@ struct iommu_resv_region {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
- * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain
+ * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_range_add: Add a given iova range to the flush queue for this domain
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
  *            queue
-- 
cgit v1.2.3


From e5567f5f67621877726f99be040af9fbedda37dc Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Tue, 19 Feb 2019 11:04:51 -0800
Subject: PCI/ATS: Add pci_prg_resp_pasid_required() interface.

Return the PRG Response PASID Required bit in the Page Request
Status Register.

As per PCIe spec r4.0, sec 10.5.2.3, if this bit is Set, the device
expects a PASID TLP Prefix on PRG Response Messages when the
corresponding Page Requests had a PASID TLP Prefix. If Clear, the device
does not expect PASID TLP Prefixes on any PRG Response Message, and the
device behavior is undefined if the device receives a PRG Response Message
with a PASID TLP Prefix. Also the device behavior is undefined if this
bit is Set and the device receives a PRG Response Message with no PASID TLP
Prefix when the corresponding Page Requests had a PASID TLP Prefix.

This function will be used by drivers like IOMMU, if it is required to
check the status of the PRG Response PASID Required bit before enabling
the PASID support of the device.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c             | 30 ++++++++++++++++++++++++++++++
 include/linux/pci-ats.h       |  5 +++++
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 5b78f3b1b918..420cd0a578d0 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -368,6 +368,36 @@ int pci_pasid_features(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(pci_pasid_features);
 
+/**
+ * pci_prg_resp_pasid_required - Return PRG Response PASID Required bit
+ *				 status.
+ * @pdev: PCI device structure
+ *
+ * Returns 1 if PASID is required in PRG Response Message, 0 otherwise.
+ *
+ * Even though the PRG response PASID status is read from PRI Status
+ * Register, since this API will mainly be used by PASID users, this
+ * function is defined within #ifdef CONFIG_PCI_PASID instead of
+ * CONFIG_PCI_PRI.
+ */
+int pci_prg_resp_pasid_required(struct pci_dev *pdev)
+{
+	u16 status;
+	int pos;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+	if (!pos)
+		return 0;
+
+	pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
+
+	if (status & PCI_PRI_STATUS_PASID)
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_prg_resp_pasid_required);
+
 #define PASID_NUMBER_SHIFT	8
 #define PASID_NUMBER_MASK	(0x1f << PASID_NUMBER_SHIFT)
 /**
diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index 7c4b8e27268c..facfd6a18fe1 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -40,6 +40,7 @@ void pci_disable_pasid(struct pci_dev *pdev);
 void pci_restore_pasid_state(struct pci_dev *pdev);
 int pci_pasid_features(struct pci_dev *pdev);
 int pci_max_pasids(struct pci_dev *pdev);
+int pci_prg_resp_pasid_required(struct pci_dev *pdev);
 
 #else  /* CONFIG_PCI_PASID */
 
@@ -66,6 +67,10 @@ static inline int pci_max_pasids(struct pci_dev *pdev)
 	return -EINVAL;
 }
 
+static int pci_prg_resp_pasid_required(struct pci_dev *pdev)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_PASID */
 
 
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index e1e9888c85e6..898be572b010 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -880,6 +880,7 @@
 #define  PCI_PRI_STATUS_RF	0x001	/* Response Failure */
 #define  PCI_PRI_STATUS_UPRGI	0x002	/* Unexpected PRG index */
 #define  PCI_PRI_STATUS_STOPPED	0x100	/* PRI Stopped */
+#define  PCI_PRI_STATUS_PASID	0x8000	/* PRG Response PASID Required */
 #define PCI_PRI_MAX_REQ		0x08	/* PRI max reqs supported */
 #define PCI_PRI_ALLOC_REQ	0x0c	/* PRI max reqs allowed */
 #define PCI_EXT_CAP_PRI_SIZEOF	16
-- 
cgit v1.2.3


From 8c938ddc6df3bbe72809db1be6c9f3af83f5d7a9 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Tue, 19 Feb 2019 11:06:09 -0800
Subject: PCI/ATS: Add pci_ats_page_aligned() interface

Return the Page Aligned Request bit in the ATS Capability Register.

As per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit is
set, it indicates the Untranslated Addresses generated by the device are
always aligned to a 4096 byte boundary.

An IOMMU that can only translate page-aligned addresses can only be used
with devices that always produce aligned Untranslated Addresses. This
interface will be used by drivers for such IOMMUs to determine whether
devices can use the ATS service.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Suggested-by: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c             | 27 +++++++++++++++++++++++++++
 include/linux/pci.h           |  2 ++
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 420cd0a578d0..97c08146534a 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -142,6 +142,33 @@ int pci_ats_queue_depth(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_ats_queue_depth);
 
+/**
+ * pci_ats_page_aligned - Return Page Aligned Request bit status.
+ * @pdev: the PCI device
+ *
+ * Returns 1, if the Untranslated Addresses generated by the device
+ * are always aligned or 0 otherwise.
+ *
+ * Per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit
+ * is set, it indicates the Untranslated Addresses generated by the
+ * device are always aligned to a 4096 byte boundary.
+ */
+int pci_ats_page_aligned(struct pci_dev *pdev)
+{
+	u16 cap;
+
+	if (!pdev->ats_cap)
+		return 0;
+
+	pci_read_config_word(pdev, pdev->ats_cap + PCI_ATS_CAP, &cap);
+
+	if (cap & PCI_ATS_CAP_PAGE_ALIGNED)
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_ats_page_aligned);
+
 #ifdef CONFIG_PCI_PRI
 /**
  * pci_enable_pri - Enable PRI capability
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 65f1d8c2f082..9724a8c0496b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1524,11 +1524,13 @@ void pci_ats_init(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
+int pci_ats_page_aligned(struct pci_dev *dev);
 #else
 static inline void pci_ats_init(struct pci_dev *d) { }
 static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
+static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
 #endif
 
 #ifdef CONFIG_PCIE_PTM
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 898be572b010..5c98133f2c94 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -866,6 +866,7 @@
 #define PCI_ATS_CAP		0x04	/* ATS Capability Register */
 #define  PCI_ATS_CAP_QDEP(x)	((x) & 0x1f)	/* Invalidate Queue Depth */
 #define  PCI_ATS_MAX_QDEP	32	/* Max Invalidate Queue Depth */
+#define  PCI_ATS_CAP_PAGE_ALIGNED	0x0020 /* Page Aligned Request */
 #define PCI_ATS_CTRL		0x06	/* ATS Control Register */
 #define  PCI_ATS_CTRL_ENABLE	0x8000	/* ATS Enable */
 #define  PCI_ATS_CTRL_STU(x)	((x) & 0x1f)	/* Smallest Translation Unit */
-- 
cgit v1.2.3


From 2405bc162583e1d7c40b13bf078e87428d2dfe4e Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 20 Feb 2019 14:00:52 +0100
Subject: iommu: Document iommu_ops.iotlb_sync_map()

Add missing kerneldoc for iommu_ops.iotlb_sync_map().

Fixes: 1d7ae53b152dbc5b ("iommu: Introduce iotlb_sync_map callback")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2b402dcbcf81..28ad97801032 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -169,6 +169,7 @@ struct iommu_resv_region {
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
  * @iotlb_range_add: Add a given iova range to the flush queue for this domain
+ * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
  *            queue
  * @iova_to_phys: translate iova to physical address
-- 
cgit v1.2.3


From a7055d572c51338bed8673331ead6759cae6b70b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 20 Feb 2019 14:00:53 +0100
Subject: iommu: Document iommu_ops.is_attach_deferred()

Add missing kerneldoc for iommu_ops.is_attach_deferred().

Fixes: e01d1913b0d08171 ("iommu: Add is_attach_deferred call-back to iommu-ops")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 28ad97801032..41fa7958592d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -184,6 +184,8 @@ struct iommu_resv_region {
  * @domain_window_enable: Configure and enable a particular window for a domain
  * @domain_window_disable: Disable a particular window for a domain
  * @of_xlate: add OF master IDs to iommu grouping
+ * @is_attach_deferred: Check if domain attach should be deferred from iommu
+ *                      driver init to device driver init (default no)
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
 struct iommu_ops {
-- 
cgit v1.2.3


From e85fa28ebcb598bbb439402609fdde5d0f80622d Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach@linaro.org>
Date: Wed, 13 Feb 2019 14:41:49 +0100
Subject: ARM: 8838/1: drivers: amba: Updates to component identification for
 driver matching.

The CoreSight specification (ARM IHI 0029E), updates the ID register
requirements for components on an AMBA bus, to cover both traditional
ARM Primecell type devices, and newer CoreSight and other components.

The Peripheral ID (PID) / Component ID (CID) pair is extended in certain
cases to uniquely identify components. CoreSight components related to
a single function can share Peripheral ID values, and must be further
identified using a Unique Component Identifier (UCI). e.g. the ETM, CTI,
PMU and Debug hardware of the A35 all share the same PID.

Bits 15:12 of the CID are defined to be the device class.
Class 0xF remains for PrimeCell and legacy components.
Class 0x9 defines the component as CoreSight (CORESIGHT_CID above)
Class 0x0, 0x1, 0xB, 0xE define components that do not have driver support
at present.
Class 0x2-0x8,0xA and 0xD-0xD are presently reserved.

The specification futher defines which classes of device use the standard
CID/PID pair, and when additional ID registers are required.

This patch introduces the amba_cs_uci_id structure which will be used in
all coresight drivers for indentification via the private data pointer in
the amba_id structure.

Existing drivers that currently use the amba_id->data pointer for private
data are updated to use the amba_cs_uci_id->data pointer. Macros and
inline functions are added to simplify this code.

Signed-off-by: Mike Leach <mike.leach@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 drivers/hwtracing/coresight/coresight-etm3x.c | 44 ++++++++-------------------
 drivers/hwtracing/coresight/coresight-priv.h  | 32 +++++++++++++++++++
 drivers/hwtracing/coresight/coresight-stm.c   | 14 ++-------
 drivers/hwtracing/coresight/coresight-tmc.c   | 30 ++++++------------
 include/linux/amba/bus.h                      | 33 ++++++++++++++++++++
 5 files changed, 90 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 9a63e87ea5f3..be302ec5f66b 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -871,7 +871,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
 	}
 
 	pm_runtime_put(&adev->dev);
-	dev_info(dev, "%s initialized\n", (char *)id->data);
+	dev_info(dev, "%s initialized\n", (char *)coresight_get_uci_data(id));
 	if (boot_enable) {
 		coresight_enable(drvdata->csdev);
 		drvdata->boot_enable = true;
@@ -915,36 +915,18 @@ static const struct dev_pm_ops etm_dev_pm_ops = {
 };
 
 static const struct amba_id etm_ids[] = {
-	{	/* ETM 3.3 */
-		.id	= 0x000bb921,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.3",
-	},
-	{	/* ETM 3.5 - Cortex-A5 */
-		.id	= 0x000bb955,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.5",
-	},
-	{	/* ETM 3.5 */
-		.id	= 0x000bb956,
-		.mask	= 0x000fffff,
-		.data	= "ETM 3.5",
-	},
-	{	/* PTM 1.0 */
-		.id	= 0x000bb950,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.0",
-	},
-	{	/* PTM 1.1 */
-		.id	= 0x000bb95f,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.1",
-	},
-	{	/* PTM 1.1 Qualcomm */
-		.id	= 0x000b006f,
-		.mask	= 0x000fffff,
-		.data	= "PTM 1.1",
-	},
+	/* ETM 3.3 */
+	CS_AMBA_ID_DATA(0x000bb921, "ETM 3.3"),
+	/* ETM 3.5 - Cortex-A5 */
+	CS_AMBA_ID_DATA(0x000bb955, "ETM 3.5"),
+	/* ETM 3.5 */
+	CS_AMBA_ID_DATA(0x000bb956, "ETM 3.5"),
+	/* PTM 1.0 */
+	CS_AMBA_ID_DATA(0x000bb950, "PTM 1.0"),
+	/* PTM 1.1 */
+	CS_AMBA_ID_DATA(0x000bb95f, "PTM 1.1"),
+	/* PTM 1.1 Qualcomm */
+	CS_AMBA_ID_DATA(0x000b006f, "PTM 1.1"),
 	{ 0, 0},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 579f34943bf1..02a1f5204f9d 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -6,6 +6,7 @@
 #ifndef _CORESIGHT_PRIV_H
 #define _CORESIGHT_PRIV_H
 
+#include <linux/amba/bus.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/coresight.h>
@@ -159,4 +160,35 @@ static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
 static inline int etm_writel_cp14(u32 off, u32 val) { return 0; }
 #endif
 
+/*
+ * Macros and inline functions to handle CoreSight UCI data and driver
+ * private data in AMBA ID table entries, and extract data values.
+ */
+
+/* coresight AMBA ID, no UCI, no driver data: id table entry */
+#define CS_AMBA_ID(pid)			\
+	{				\
+		.id	= pid,		\
+		.mask	= 0x000fffff,	\
+	}
+
+/* coresight AMBA ID, UCI with driver data only: id table entry. */
+#define CS_AMBA_ID_DATA(pid, dval)				\
+	{							\
+		.id	= pid,					\
+		.mask	= 0x000fffff,				\
+		.data	=  (void *)&(struct amba_cs_uci_id)	\
+			{				\
+				.data = (void *)dval,	\
+			}				\
+	}
+
+/* extract the data value from a UCI structure given amba_id pointer. */
+static inline void *coresight_get_uci_data(const struct amba_id *id)
+{
+	if (id->data)
+		return ((struct amba_cs_uci_id *)(id->data))->data;
+	return 0;
+}
+
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index ef339ff22090..2a70cdd68a7b 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -874,7 +874,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
 
 	pm_runtime_put(&adev->dev);
 
-	dev_info(dev, "%s initialized\n", (char *)id->data);
+	dev_info(dev, "%s initialized\n", (char *)coresight_get_uci_data(id));
 	return 0;
 
 stm_unregister:
@@ -909,16 +909,8 @@ static const struct dev_pm_ops stm_dev_pm_ops = {
 };
 
 static const struct amba_id stm_ids[] = {
-	{
-		.id     = 0x000bb962,
-		.mask   = 0x000fffff,
-		.data	= "STM32",
-	},
-	{
-		.id	= 0x000bb963,
-		.mask	= 0x000fffff,
-		.data	= "STM500",
-	},
+	CS_AMBA_ID_DATA(0x000bb962, "STM32"),
+	CS_AMBA_ID_DATA(0x000bb963, "STM500"),
 	{ 0, 0},
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index ea249f0bcd73..2a02da3d630f 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -443,7 +443,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
 		desc.type = CORESIGHT_DEV_TYPE_SINK;
 		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
 		desc.ops = &tmc_etr_cs_ops;
-		ret = tmc_etr_setup_caps(drvdata, devid, id->data);
+		ret = tmc_etr_setup_caps(drvdata, devid,
+					 coresight_get_uci_data(id));
 		if (ret)
 			goto out;
 		break;
@@ -475,26 +476,13 @@ out:
 }
 
 static const struct amba_id tmc_ids[] = {
-	{
-		.id     = 0x000bb961,
-		.mask   = 0x000fffff,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETR/ETS */
-		.id	= 0x000bb9e8,
-		.mask	= 0x000fffff,
-		.data	= (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETB */
-		.id	= 0x000bb9e9,
-		.mask	= 0x000fffff,
-	},
-	{
-		/* Coresight SoC 600 TMC-ETF */
-		.id	= 0x000bb9ea,
-		.mask	= 0x000fffff,
-	},
+	CS_AMBA_ID(0x000bb961),
+	/* Coresight SoC 600 TMC-ETR/ETS */
+	CS_AMBA_ID_DATA(0x000bb9e8, (unsigned long)CORESIGHT_SOC_600_ETR_CAPS),
+	/* Coresight SoC 600 TMC-ETB */
+	CS_AMBA_ID(0x000bb9e9),
+	/* Coresight SoC 600 TMC-ETF */
+	CS_AMBA_ID(0x000bb9ea),
 	{ 0, 0},
 };
 
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index d143c13bed26..e3c36223e40b 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -25,6 +25,39 @@
 #define AMBA_CID	0xb105f00d
 #define CORESIGHT_CID	0xb105900d
 
+/*
+ * CoreSight Architecture specification updates the ID specification
+ * for components on the AMBA bus. (ARM IHI 0029E)
+ *
+ * Bits 15:12 of the CID are the device class.
+ *
+ * Class 0xF remains for PrimeCell and legacy components. (AMBA_CID above)
+ * Class 0x9 defines the component as CoreSight (CORESIGHT_CID above)
+ * Class 0x0, 0x1, 0xB, 0xE define components that do not have driver support
+ * at present.
+ * Class 0x2-0x8,0xA and 0xD-0xD are presently reserved.
+ *
+ * Remaining CID bits stay as 0xb105-00d
+ */
+
+/**
+ * Class 0x9 components use additional values to form a Unique Component
+ * Identifier (UCI), where peripheral ID values are identical for different
+ * components. Passed to the amba bus code from the component driver via
+ * the amba_id->data pointer.
+ * @devarch	: coresight devarch register value
+ * @devarch_mask: mask bits used for matching. 0 indicates UCI not used.
+ * @devtype	: coresight device type value
+ * @data	: additional driver data. As we have usurped the original
+ *		pointer some devices may still need additional data
+ */
+struct amba_cs_uci_id {
+	unsigned int devarch;
+	unsigned int devarch_mask;
+	unsigned int devtype;
+	void *data;
+};
+
 struct clk;
 
 struct amba_device {
-- 
cgit v1.2.3


From 4a2910fa80d75dbe18d822482a48ae50a218029c Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach@linaro.org>
Date: Wed, 13 Feb 2019 14:41:50 +0100
Subject: ARM: 8836/1: drivers: amba: Update component matching to use the
 CoreSight UCI values.

The patches provide an update of amba_device and matching code to handle
the additional registers required for the Class 0x9 (CoreSight) UCI.

The *data pointer in the amba_id is used by the driver to provide extended
ID register values for matching.

CoreSight components where PID/CID pair is currently sufficient for
unique identification need not provide this additional information.

Signed-off-by: Mike Leach <mike.leach@linaro.org>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 drivers/amba/bus.c       | 45 +++++++++++++++++++++++++++++++++++++--------
 include/linux/amba/bus.h |  6 ++++++
 2 files changed, 43 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 41b706403ef7..b4dae624b9af 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -26,19 +26,36 @@
 
 #define to_amba_driver(d)	container_of(d, struct amba_driver, drv)
 
-static const struct amba_id *
-amba_lookup(const struct amba_id *table, struct amba_device *dev)
+/* called on periphid match and class 0x9 coresight device. */
+static int
+amba_cs_uci_id_match(const struct amba_id *table, struct amba_device *dev)
 {
 	int ret = 0;
+	struct amba_cs_uci_id *uci;
+
+	uci = table->data;
 
+	/* no table data or zero mask - return match on periphid */
+	if (!uci || (uci->devarch_mask == 0))
+		return 1;
+
+	/* test against read devtype and masked devarch value */
+	ret = (dev->uci.devtype == uci->devtype) &&
+		((dev->uci.devarch & uci->devarch_mask) == uci->devarch);
+	return ret;
+}
+
+static const struct amba_id *
+amba_lookup(const struct amba_id *table, struct amba_device *dev)
+{
 	while (table->mask) {
-		ret = (dev->periphid & table->mask) == table->id;
-		if (ret)
-			break;
+		if (((dev->periphid & table->mask) == table->id) &&
+			((dev->cid != CORESIGHT_CID) ||
+			 (amba_cs_uci_id_match(table, dev))))
+			return table;
 		table++;
 	}
-
-	return ret ? table : NULL;
+	return NULL;
 }
 
 static int amba_match(struct device *dev, struct device_driver *drv)
@@ -399,10 +416,22 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
 			cid |= (readl(tmp + size - 0x10 + 4 * i) & 255) <<
 				(i * 8);
 
+		if (cid == CORESIGHT_CID) {
+			/* set the base to the start of the last 4k block */
+			void __iomem *csbase = tmp + size - 4096;
+
+			dev->uci.devarch =
+				readl(csbase + UCI_REG_DEVARCH_OFFSET);
+			dev->uci.devtype =
+				readl(csbase + UCI_REG_DEVTYPE_OFFSET) & 0xff;
+		}
+
 		amba_put_disable_pclk(dev);
 
-		if (cid == AMBA_CID || cid == CORESIGHT_CID)
+		if (cid == AMBA_CID || cid == CORESIGHT_CID) {
 			dev->periphid = pid;
+			dev->cid = cid;
+		}
 
 		if (!dev->periphid)
 			ret = -ENODEV;
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index e3c36223e40b..f99b74a6e4ca 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -58,6 +58,10 @@ struct amba_cs_uci_id {
 	void *data;
 };
 
+/* define offsets for registers used by UCI */
+#define UCI_REG_DEVTYPE_OFFSET	0xFCC
+#define UCI_REG_DEVARCH_OFFSET	0xFBC
+
 struct clk;
 
 struct amba_device {
@@ -65,6 +69,8 @@ struct amba_device {
 	struct resource		res;
 	struct clk		*pclk;
 	unsigned int		periphid;
+	unsigned int		cid;
+	struct amba_cs_uci_id	uci;
 	unsigned int		irq[AMBA_NR_IRQS];
 	char			*driver_override;
 };
-- 
cgit v1.2.3


From a73881c96d73ee72b7dbbd38a6eeef66182a8ef7 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 26 Feb 2019 06:33:02 -0500
Subject: SUNRPC: Fix an Oops in udp_poll()

udp_poll() checks the struct file for the O_NONBLOCK flag, so we must not
call it with a NULL file pointer.

Fixes: 0ffe86f48026 ("SUNRPC: Use poll() to fix up the socket requeue races")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h |  1 +
 net/sunrpc/xprtsock.c           | 21 +++++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 458bfe0137f5..b81d0b3e0799 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -26,6 +26,7 @@ struct sock_xprt {
 	 */
 	struct socket *		sock;
 	struct sock *		inet;
+	struct file *		file;
 
 	/*
 	 * State of TCP reply receive
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 53de72d2dded..e829036ed81f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -670,7 +670,8 @@ out_err:
 
 static __poll_t xs_poll_socket(struct sock_xprt *transport)
 {
-	return transport->sock->ops->poll(NULL, transport->sock, NULL);
+	return transport->sock->ops->poll(transport->file, transport->sock,
+			NULL);
 }
 
 static bool xs_poll_socket_readable(struct sock_xprt *transport)
@@ -1253,6 +1254,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	struct socket *sock = transport->sock;
 	struct sock *sk = transport->inet;
 	struct rpc_xprt *xprt = &transport->xprt;
+	struct file *filp = transport->file;
 
 	if (sk == NULL)
 		return;
@@ -1266,6 +1268,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	write_lock_bh(&sk->sk_callback_lock);
 	transport->inet = NULL;
 	transport->sock = NULL;
+	transport->file = NULL;
 
 	sk->sk_user_data = NULL;
 
@@ -1278,7 +1281,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
 	mutex_unlock(&transport->recv_mutex);
 
 	trace_rpc_socket_close(xprt, sock);
-	sock_release(sock);
+	fput(filp);
 
 	xprt_disconnect_done(xprt);
 }
@@ -1873,6 +1876,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 		struct sock_xprt *transport, int family, int type,
 		int protocol, bool reuseport)
 {
+	struct file *filp;
 	struct socket *sock;
 	int err;
 
@@ -1893,6 +1897,11 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 		goto out;
 	}
 
+	filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+	if (IS_ERR(filp))
+		return ERR_CAST(filp);
+	transport->file = filp;
+
 	return sock;
 out:
 	return ERR_PTR(err);
@@ -1938,6 +1947,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 static int xs_local_setup_socket(struct sock_xprt *transport)
 {
 	struct rpc_xprt *xprt = &transport->xprt;
+	struct file *filp;
 	struct socket *sock;
 	int status = -EIO;
 
@@ -1950,6 +1960,13 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
 	}
 	xs_reclassify_socket(AF_LOCAL, sock);
 
+	filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+	if (IS_ERR(filp)) {
+		status = PTR_ERR(filp);
+		goto out;
+	}
+	transport->file = filp;
+
 	dprintk("RPC:       worker connecting xprt %p via AF_LOCAL to %s\n",
 			xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
 
-- 
cgit v1.2.3


From f2db7361cb19bf3a6f7fd367f21d8eb325397946 Mon Sep 17 00:00:00 2001
From: Vishnu DASA <vdasa@vmware.com>
Date: Fri, 15 Feb 2019 16:32:47 +0000
Subject: VMCI: Support upto 64-bit PPNs

Add support in the VMCI driver to handle upto 64-bit PPNs when the VMCI
device exposes the capability for 64-bit PPNs.

Reviewed-by: Adit Ranadive <aditr@vmware.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: Vishnu Dasa <vdasa@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/vmw_vmci/vmci_doorbell.c   |  9 +++--
 drivers/misc/vmw_vmci/vmci_doorbell.h   |  2 +-
 drivers/misc/vmw_vmci/vmci_driver.h     |  2 ++
 drivers/misc/vmw_vmci/vmci_guest.c      | 39 ++++++++++++++++----
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 63 +++++++++++++++------------------
 drivers/misc/vmw_vmci/vmci_queue_pair.h |  4 +--
 include/linux/vmw_vmci_defs.h           |  7 ++--
 7 files changed, 77 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c
index b3fa738ae005..7824c7494916 100644
--- a/drivers/misc/vmw_vmci/vmci_doorbell.c
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.c
@@ -330,7 +330,7 @@ int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle)
 /*
  * Register the notification bitmap with the host.
  */
-bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn)
+bool vmci_dbell_register_notification_bitmap(u64 bitmap_ppn)
 {
 	int result;
 	struct vmci_notify_bm_set_msg bitmap_set_msg;
@@ -340,11 +340,14 @@ bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn)
 	bitmap_set_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
 	bitmap_set_msg.hdr.payload_size = sizeof(bitmap_set_msg) -
 	    VMCI_DG_HEADERSIZE;
-	bitmap_set_msg.bitmap_ppn = bitmap_ppn;
+	if (vmci_use_ppn64())
+		bitmap_set_msg.bitmap_ppn64 = bitmap_ppn;
+	else
+		bitmap_set_msg.bitmap_ppn32 = (u32) bitmap_ppn;
 
 	result = vmci_send_datagram(&bitmap_set_msg.hdr);
 	if (result != VMCI_SUCCESS) {
-		pr_devel("Failed to register (PPN=%u) as notification bitmap (error=%d)\n",
+		pr_devel("Failed to register (PPN=%llu) as notification bitmap (error=%d)\n",
 			 bitmap_ppn, result);
 		return false;
 	}
diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.h b/drivers/misc/vmw_vmci/vmci_doorbell.h
index e4c0b17486a5..410a21f8436f 100644
--- a/drivers/misc/vmw_vmci/vmci_doorbell.h
+++ b/drivers/misc/vmw_vmci/vmci_doorbell.h
@@ -45,7 +45,7 @@ struct dbell_cpt_state {
 int vmci_dbell_host_context_notify(u32 src_cid, struct vmci_handle handle);
 int vmci_dbell_get_priv_flags(struct vmci_handle handle, u32 *priv_flags);
 
-bool vmci_dbell_register_notification_bitmap(u32 bitmap_ppn);
+bool vmci_dbell_register_notification_bitmap(u64 bitmap_ppn);
 void vmci_dbell_scan_notification_entries(u8 *bitmap);
 
 #endif /* VMCI_DOORBELL_H */
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
index cee9e977d318..2fbf4a0ac657 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.h
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -54,4 +54,6 @@ void vmci_guest_exit(void);
 bool vmci_guest_code_active(void);
 u32 vmci_get_vm_context_id(void);
 
+bool vmci_use_ppn64(void);
+
 #endif /* _VMCI_DRIVER_H_ */
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index dad5abee656e..928708128177 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -64,6 +64,13 @@ struct vmci_guest_device {
 	dma_addr_t notification_base;
 };
 
+static bool use_ppn64;
+
+bool vmci_use_ppn64(void)
+{
+	return use_ppn64;
+}
+
 /* vmci_dev singleton device and supporting data*/
 struct pci_dev *vmci_pdev;
 static struct vmci_guest_device *vmci_dev_g;
@@ -432,6 +439,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 	struct vmci_guest_device *vmci_dev;
 	void __iomem *iobase;
 	unsigned int capabilities;
+	unsigned int caps_in_use;
 	unsigned long cmd;
 	int vmci_err;
 	int error;
@@ -496,6 +504,23 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 		error = -ENXIO;
 		goto err_free_data_buffer;
 	}
+	caps_in_use = VMCI_CAPS_DATAGRAM;
+
+	/*
+	 * Use 64-bit PPNs if the device supports.
+	 *
+	 * There is no check for the return value of dma_set_mask_and_coherent
+	 * since this driver can handle the default mask values if
+	 * dma_set_mask_and_coherent fails.
+	 */
+	if (capabilities & VMCI_CAPS_PPN64) {
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+		use_ppn64 = true;
+		caps_in_use |= VMCI_CAPS_PPN64;
+	} else {
+		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
+		use_ppn64 = false;
+	}
 
 	/*
 	 * If the hardware supports notifications, we will use that as
@@ -510,14 +535,14 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 				 "Unable to allocate notification bitmap\n");
 		} else {
 			memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
-			capabilities |= VMCI_CAPS_NOTIFICATIONS;
+			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
 		}
 	}
 
-	dev_info(&pdev->dev, "Using capabilities 0x%x\n", capabilities);
+	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
 
 	/* Let the host know which capabilities we intend to use. */
-	iowrite32(capabilities, vmci_dev->iobase + VMCI_CAPS_ADDR);
+	iowrite32(caps_in_use, vmci_dev->iobase + VMCI_CAPS_ADDR);
 
 	/* Set up global device so that we can start sending datagrams */
 	spin_lock_irq(&vmci_dev_spinlock);
@@ -529,13 +554,13 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 	 * Register notification bitmap with device if that capability is
 	 * used.
 	 */
-	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
+	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
 		unsigned long bitmap_ppn =
 			vmci_dev->notification_base >> PAGE_SHIFT;
 		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
 			dev_warn(&pdev->dev,
-				 "VMCI device unable to register notification bitmap with PPN 0x%x\n",
-				 (u32) bitmap_ppn);
+				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
+				 bitmap_ppn);
 			error = -ENXIO;
 			goto err_remove_vmci_dev_g;
 		}
@@ -611,7 +636,7 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 
 	/* Enable specific interrupt bits. */
 	cmd = VMCI_IMR_DATAGRAM;
-	if (capabilities & VMCI_CAPS_NOTIFICATIONS)
+	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
 		cmd |= VMCI_IMR_NOTIFICATION;
 	iowrite32(cmd, vmci_dev->iobase + VMCI_IMR_ADDR);
 
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 264f4ed8eef2..f5f1aac9d163 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -435,8 +435,8 @@ static int qp_alloc_ppn_set(void *prod_q,
 			    void *cons_q,
 			    u64 num_consume_pages, struct ppn_set *ppn_set)
 {
-	u32 *produce_ppns;
-	u32 *consume_ppns;
+	u64 *produce_ppns;
+	u64 *consume_ppns;
 	struct vmci_queue *produce_q = prod_q;
 	struct vmci_queue *consume_q = cons_q;
 	u64 i;
@@ -462,31 +462,13 @@ static int qp_alloc_ppn_set(void *prod_q,
 		return VMCI_ERROR_NO_MEM;
 	}
 
-	for (i = 0; i < num_produce_pages; i++) {
-		unsigned long pfn;
-
+	for (i = 0; i < num_produce_pages; i++)
 		produce_ppns[i] =
 			produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
-		pfn = produce_ppns[i];
-
-		/* Fail allocation if PFN isn't supported by hypervisor. */
-		if (sizeof(pfn) > sizeof(*produce_ppns)
-		    && pfn != produce_ppns[i])
-			goto ppn_error;
-	}
-
-	for (i = 0; i < num_consume_pages; i++) {
-		unsigned long pfn;
 
+	for (i = 0; i < num_consume_pages; i++)
 		consume_ppns[i] =
 			consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
-		pfn = consume_ppns[i];
-
-		/* Fail allocation if PFN isn't supported by hypervisor. */
-		if (sizeof(pfn) > sizeof(*consume_ppns)
-		    && pfn != consume_ppns[i])
-			goto ppn_error;
-	}
 
 	ppn_set->num_produce_pages = num_produce_pages;
 	ppn_set->num_consume_pages = num_consume_pages;
@@ -494,11 +476,6 @@ static int qp_alloc_ppn_set(void *prod_q,
 	ppn_set->consume_ppns = consume_ppns;
 	ppn_set->initialized = true;
 	return VMCI_SUCCESS;
-
- ppn_error:
-	kfree(produce_ppns);
-	kfree(consume_ppns);
-	return VMCI_ERROR_INVALID_ARGS;
 }
 
 /*
@@ -520,12 +497,28 @@ static void qp_free_ppn_set(struct ppn_set *ppn_set)
  */
 static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
 {
-	memcpy(call_buf, ppn_set->produce_ppns,
-	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns));
-	memcpy(call_buf +
-	       ppn_set->num_produce_pages * sizeof(*ppn_set->produce_ppns),
-	       ppn_set->consume_ppns,
-	       ppn_set->num_consume_pages * sizeof(*ppn_set->consume_ppns));
+	if (vmci_use_ppn64()) {
+		memcpy(call_buf, ppn_set->produce_ppns,
+		       ppn_set->num_produce_pages *
+		       sizeof(*ppn_set->produce_ppns));
+		memcpy(call_buf +
+		       ppn_set->num_produce_pages *
+		       sizeof(*ppn_set->produce_ppns),
+		       ppn_set->consume_ppns,
+		       ppn_set->num_consume_pages *
+		       sizeof(*ppn_set->consume_ppns));
+	} else {
+		int i;
+		u32 *ppns = (u32 *) call_buf;
+
+		for (i = 0; i < ppn_set->num_produce_pages; i++)
+			ppns[i] = (u32) ppn_set->produce_ppns[i];
+
+		ppns = &ppns[ppn_set->num_produce_pages];
+
+		for (i = 0; i < ppn_set->num_consume_pages; i++)
+			ppns[i] = (u32) ppn_set->consume_ppns[i];
+	}
 
 	return VMCI_SUCCESS;
 }
@@ -951,13 +944,15 @@ static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
 {
 	struct vmci_qp_alloc_msg *alloc_msg;
 	size_t msg_size;
+	size_t ppn_size;
 	int result;
 
 	if (!entry || entry->num_ppns <= 2)
 		return VMCI_ERROR_INVALID_ARGS;
 
+	ppn_size = vmci_use_ppn64() ? sizeof(u64) : sizeof(u32);
 	msg_size = sizeof(*alloc_msg) +
-	    (size_t) entry->num_ppns * sizeof(u32);
+	    (size_t) entry->num_ppns * ppn_size;
 	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
 	if (!alloc_msg)
 		return VMCI_ERROR_NO_MEM;
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.h b/drivers/misc/vmw_vmci/vmci_queue_pair.h
index ed177f04ef24..46c0b6c7bafb 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.h
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.h
@@ -28,8 +28,8 @@ typedef int (*vmci_event_release_cb) (void *client_data);
 struct ppn_set {
 	u64 num_produce_pages;
 	u64 num_consume_pages;
-	u32 *produce_ppns;
-	u32 *consume_ppns;
+	u64 *produce_ppns;
+	u64 *consume_ppns;
 	bool initialized;
 };
 
diff --git a/include/linux/vmw_vmci_defs.h b/include/linux/vmw_vmci_defs.h
index b724ef7005de..eaa1e762bf06 100644
--- a/include/linux/vmw_vmci_defs.h
+++ b/include/linux/vmw_vmci_defs.h
@@ -45,6 +45,7 @@
 #define VMCI_CAPS_GUESTCALL     0x2
 #define VMCI_CAPS_DATAGRAM      0x4
 #define VMCI_CAPS_NOTIFICATIONS 0x8
+#define VMCI_CAPS_PPN64         0x10
 
 /* Interrupt Cause register bits. */
 #define VMCI_ICR_DATAGRAM      0x1
@@ -569,8 +570,10 @@ struct vmci_resource_query_msg {
  */
 struct vmci_notify_bm_set_msg {
 	struct vmci_datagram hdr;
-	u32 bitmap_ppn;
-	u32 _pad;
+	union {
+		u32 bitmap_ppn32;
+		u64 bitmap_ppn64;
+	};
 };
 
 /*
-- 
cgit v1.2.3


From 2c1ea6abde8884208a9b94254740ae4597c62000 Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 21 Feb 2019 11:29:35 +0000
Subject: platform: set of_node in platform_device_register_full()

If the provided fwnode is an OF node, set dev.of_node as well.

Also add an of_node_reused flag to struct platform_device_info and copy
this to the new device.  This is needed to avoid pinctrl settings being
requested twice.  See 4e75e1d7dac9 ("driver core: add helper to reuse a
device-tree node") for a longer explanation.

Some drivers are just shims that create extra "glue" devices with the
DT device as parent and have the real driver bind to these.  In these
cases, the glue device needs to get a reference to the original DT node
in order for the main driver to access properties and child nodes.

For example, the sunxi-musb driver creates such a glue device using
platform_device_register_full().  Consequently, devices attached to
this USB interface don't get associated with DT nodes, if present,
the way they do with EHCI.

This change will allow sunxi-musb and similar drivers to easily
propagate the DT node to child devices as required.

Signed-off-by: Mans Rullgard <mans@mansr.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c         | 2 ++
 include/linux/platform_device.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 0d3611cd1b3b..fc67a325beaa 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -525,6 +525,8 @@ struct platform_device *platform_device_register_full(
 
 	pdev->dev.parent = pdevinfo->parent;
 	pdev->dev.fwnode = pdevinfo->fwnode;
+	pdev->dev.of_node = of_node_get(to_of_node(pdev->dev.fwnode));
+	pdev->dev.of_node_reused = pdevinfo->of_node_reused;
 
 	if (pdevinfo->dma_mask) {
 		/*
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index c7c081dc6034..466a8d02e298 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -63,6 +63,7 @@ extern int platform_add_devices(struct platform_device **, int);
 struct platform_device_info {
 		struct device *parent;
 		struct fwnode_handle *fwnode;
+		bool of_node_reused;
 
 		const char *name;
 		int id;
-- 
cgit v1.2.3


From 82c10ac79b79481f3246cb6d011da7a15776e762 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Sun, 17 Feb 2019 08:30:27 +0200
Subject: scsi: clean obsolete return values of eh_timed_out

Those are no longer in use since commit 242f9dcb8ba6
("block: unify request timeout handling").

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_host.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 4047d68d1b08..dbbdfbdb1a04 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -300,11 +300,7 @@ struct scsi_host_template {
 	/*
 	 * This is an optional routine that allows the transport to become
 	 * involved when a scsi io timer fires. The return value tells the
-	 * timer routine how to finish the io timeout handling:
-	 * EH_HANDLED:		I fixed the error, please complete the command
-	 * EH_RESET_TIMER:	I need more time, reset the timer and
-	 *			begin counting again
-	 * EH_DONE:		Begin normal error recovery
+	 * timer routine how to finish the io timeout handling.
 	 *
 	 * Status: OPTIONAL
 	 */
-- 
cgit v1.2.3


From d8cf757fbd3ee96a449f656707e773c91ca805b8 Mon Sep 17 00:00:00 2001
From: Peter Oskolkov <posk@google.com>
Date: Mon, 25 Feb 2019 17:43:46 -0800
Subject: net: remove unused struct inet_frag_queue.fragments field

Now that all users of struct inet_frag_queue have been converted
to use 'rb_fragments', remove the unused 'fragments' field.

Build with `make allyesconfig` succeeded. ip_defrag selftest passed.

Signed-off-by: Peter Oskolkov <posk@google.com>
Acked-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  4 +--
 net/ieee802154/6lowpan/reassembly.c     |  1 -
 net/ipv4/inet_fragment.c                | 44 ++++++++++-----------------------
 net/ipv4/ip_fragment.c                  |  2 --
 net/ipv6/netfilter/nf_conntrack_reasm.c |  1 -
 net/ipv6/reassembly.c                   |  1 -
 6 files changed, 14 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index b02bf737d019..378904ee9129 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -56,7 +56,6 @@ struct frag_v6_compare_key {
  * @timer: queue expiration timer
  * @lock: spinlock protecting this frag
  * @refcnt: reference count of the queue
- * @fragments: received fragments head
  * @rb_fragments: received fragments rb-tree root
  * @fragments_tail: received fragments tail
  * @last_run_head: the head of the last "run". see ip_fragment.c
@@ -77,8 +76,7 @@ struct inet_frag_queue {
 	struct timer_list	timer;
 	spinlock_t		lock;
 	refcount_t		refcnt;
-	struct sk_buff		*fragments;  /* used in 6lopwpan IPv6. */
-	struct rb_root		rb_fragments; /* Used in IPv4/IPv6. */
+	struct rb_root		rb_fragments;
 	struct sk_buff		*fragments_tail;
 	struct sk_buff		*last_run_head;
 	ktime_t			stamp;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index bd61633d2c32..4196bcd4105a 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -179,7 +179,6 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
 
 	skb->dev = ldev;
 	skb->tstamp = fq->q.stamp;
-	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	fq->q.last_run_head = NULL;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 9f69411251d0..737808e27f8b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -203,7 +203,6 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
 
 void inet_frag_destroy(struct inet_frag_queue *q)
 {
-	struct sk_buff *fp;
 	struct netns_frags *nf;
 	unsigned int sum, sum_truesize = 0;
 	struct inet_frags *f;
@@ -212,20 +211,9 @@ void inet_frag_destroy(struct inet_frag_queue *q)
 	WARN_ON(del_timer(&q->timer) != 0);
 
 	/* Release all fragment data. */
-	fp = q->fragments;
 	nf = q->net;
 	f = nf->f;
-	if (fp) {
-		do {
-			struct sk_buff *xp = fp->next;
-
-			sum_truesize += fp->truesize;
-			kfree_skb(fp);
-			fp = xp;
-		} while (fp);
-	} else {
-		sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
-	}
+	sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
 	sum = sum_truesize + f->qsize;
 
 	call_rcu(&q->rcu, inet_frag_destroy_rcu);
@@ -489,26 +477,20 @@ EXPORT_SYMBOL(inet_frag_reasm_finish);
 
 struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
 {
-	struct sk_buff *head;
+	struct sk_buff *head, *skb;
 
-	if (q->fragments) {
-		head = q->fragments;
-		q->fragments = head->next;
-	} else {
-		struct sk_buff *skb;
+	head = skb_rb_first(&q->rb_fragments);
+	if (!head)
+		return NULL;
+	skb = FRAG_CB(head)->next_frag;
+	if (skb)
+		rb_replace_node(&head->rbnode, &skb->rbnode,
+				&q->rb_fragments);
+	else
+		rb_erase(&head->rbnode, &q->rb_fragments);
+	memset(&head->rbnode, 0, sizeof(head->rbnode));
+	barrier();
 
-		head = skb_rb_first(&q->rb_fragments);
-		if (!head)
-			return NULL;
-		skb = FRAG_CB(head)->next_frag;
-		if (skb)
-			rb_replace_node(&head->rbnode, &skb->rbnode,
-					&q->rb_fragments);
-		else
-			rb_erase(&head->rbnode, &q->rb_fragments);
-		memset(&head->rbnode, 0, sizeof(head->rbnode));
-		barrier();
-	}
 	if (head == q->fragments_tail)
 		q->fragments_tail = NULL;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 486ecb0aeb87..cf2b0a6a3337 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -261,7 +261,6 @@ static int ip_frag_reinit(struct ipq *qp)
 	qp->q.flags = 0;
 	qp->q.len = 0;
 	qp->q.meat = 0;
-	qp->q.fragments = NULL;
 	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->q.last_run_head = NULL;
@@ -451,7 +450,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	ip_send_check(iph);
 
 	__IP_INC_STATS(net, IPSTATS_MIB_REASMOKS);
-	qp->q.fragments = NULL;
 	qp->q.rb_fragments = RB_ROOT;
 	qp->q.fragments_tail = NULL;
 	qp->q.last_run_head = NULL;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index cb1b4772dac0..3de0e9b0a482 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -365,7 +365,6 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
 					 skb_network_header_len(skb),
 					 skb->csum);
 
-	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	fq->q.last_run_head = NULL;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 24264d0a4b85..1a832f5e190b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -304,7 +304,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 	rcu_read_lock();
 	__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
 	rcu_read_unlock();
-	fq->q.fragments = NULL;
 	fq->q.rb_fragments = RB_ROOT;
 	fq->q.fragments_tail = NULL;
 	fq->q.last_run_head = NULL;
-- 
cgit v1.2.3


From f4b6bcc7002f0e3a3428bac33cf1945abff95450 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 25 Feb 2019 19:34:02 -0800
Subject: net: devlink: turn devlink into a built-in

Being able to build devlink as a module causes growing pains.
First all drivers had to add a meta dependency to make sure
they are not built in when devlink is built as a module.  Now
we are struggling to invoke ethtool compat code reliably.

Make devlink code built-in, users can still not build it at
all but the dynamically loadable module option is removed.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/bnxt_re/Kconfig           |  1 -
 drivers/infiniband/hw/mlx4/Kconfig              |  1 -
 drivers/net/Kconfig                             |  1 -
 drivers/net/ethernet/broadcom/Kconfig           |  1 -
 drivers/net/ethernet/cavium/Kconfig             |  1 -
 drivers/net/ethernet/mellanox/mlx4/Kconfig      |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig |  1 -
 drivers/net/ethernet/mellanox/mlxsw/Kconfig     |  1 -
 drivers/net/ethernet/netronome/Kconfig          |  1 -
 include/net/devlink.h                           | 10 ++++------
 net/Kconfig                                     | 11 +----------
 net/core/devlink.c                              | 15 ++-------------
 net/dsa/Kconfig                                 |  2 +-
 13 files changed, 8 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
index 18f5ed082f41..19982a4a9bba 100644
--- a/drivers/infiniband/hw/bnxt_re/Kconfig
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -1,7 +1,6 @@
 config INFINIBAND_BNXT_RE
     tristate "Broadcom Netxtreme HCA support"
     depends on ETHERNET && NETDEVICES && PCI && INET && DCB
-    depends on MAY_USE_DEVLINK
     select NET_VENDOR_BROADCOM
     select BNXT
     ---help---
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index d1de3285fd88..4e9936731867 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -2,7 +2,6 @@ config MLX4_INFINIBAND
 	tristate "Mellanox ConnectX HCA support"
 	depends on NETDEVICES && ETHERNET && PCI && INET
 	depends on INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
-	depends on MAY_USE_DEVLINK
 	select NET_VENDOR_MELLANOX
 	select MLX4_CORE
 	---help---
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 6210757772ed..5e4ca082cfcd 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -505,7 +505,6 @@ source "drivers/net/hyperv/Kconfig"
 config NETDEVSIM
 	tristate "Simulated networking device"
 	depends on DEBUG_FS
-	depends on MAY_USE_DEVLINK
 	help
 	  This driver is a developer testing tool and software model that can
 	  be used to test various control path networking APIs, especially
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index c1d3ee9baf7e..716bfbba59cf 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -194,7 +194,6 @@ config SYSTEMPORT
 config BNXT
 	tristate "Broadcom NetXtreme-C/E support"
 	depends on PCI
-	depends on MAY_USE_DEVLINK
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 05f4a3b21e29..6650e2a5f171 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -64,7 +64,6 @@ config CAVIUM_PTP
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT && PCI
-	depends on MAY_USE_DEVLINK
 	depends on PCI
 	imply PTP_1588_CLOCK
 	select FW_LOADER
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index f200b8c420d5..ff8057ed97ee 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -4,7 +4,6 @@
 
 config MLX4_EN
 	tristate "Mellanox Technologies 1/10/40Gbit Ethernet support"
-	depends on MAY_USE_DEVLINK
 	depends on PCI && NETDEVICES && ETHERNET && INET
 	select MLX4_CORE
 	imply PTP_1588_CLOCK
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 37a551436e4a..6debffb8336b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -4,7 +4,6 @@
 
 config MLX5_CORE
 	tristate "Mellanox 5th generation network adapters (ConnectX series) core driver"
-	depends on MAY_USE_DEVLINK
 	depends on PCI
 	imply PTP_1588_CLOCK
 	imply VXLAN
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index b9a25aed5d11..9c195dfed031 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -4,7 +4,6 @@
 
 config MLXSW_CORE
 	tristate "Mellanox Technologies Switch ASICs support"
-	depends on MAY_USE_DEVLINK
 	---help---
 	  This driver supports Mellanox Technologies Switch ASICs family.
 
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index 66f15b05b65e..549898d5d450 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -19,7 +19,6 @@ config NFP
 	tristate "Netronome(R) NFP4000/NFP6000 NIC driver"
 	depends on PCI && PCI_MSI
 	depends on VXLAN || VXLAN=n
-	depends on MAY_USE_DEVLINK
 	---help---
 	  This driver supports the Netronome(R) NFP4000/NFP6000 based
 	  cards working as a advanced Ethernet NIC.  It works with both
diff --git a/include/net/devlink.h b/include/net/devlink.h
index a2da49dd9147..f9f7fe974652 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -707,6 +707,10 @@ devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
 int devlink_health_report(struct devlink_health_reporter *reporter,
 			  const char *msg, void *priv_ctx);
 
+void devlink_compat_running_version(struct net_device *dev,
+				    char *buf, size_t len);
+int devlink_compat_flash_update(struct net_device *dev, const char *file_name);
+
 #else
 
 static inline struct devlink *devlink_alloc(const struct devlink_ops *ops,
@@ -1190,13 +1194,7 @@ devlink_health_report(struct devlink_health_reporter *reporter,
 {
 	return 0;
 }
-#endif
 
-#if IS_REACHABLE(CONFIG_NET_DEVLINK)
-void devlink_compat_running_version(struct net_device *dev,
-				    char *buf, size_t len);
-int devlink_compat_flash_update(struct net_device *dev, const char *file_name);
-#else
 static inline void
 devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
 {
diff --git a/net/Kconfig b/net/Kconfig
index 62da6148e9f8..1efe1f9ee492 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -429,21 +429,12 @@ config NET_SOCK_MSG
 	  with the help of BPF programs.
 
 config NET_DEVLINK
-	tristate "Network physical/parent device Netlink interface"
+	bool "Network physical/parent device Netlink interface"
 	help
 	  Network physical/parent device Netlink interface provides
 	  infrastructure to support access to physical chip-wide config and
 	  monitoring.
 
-config MAY_USE_DEVLINK
-	tristate
-	default m if NET_DEVLINK=m
-	default y if NET_DEVLINK=y || NET_DEVLINK=n
-	help
-	  Drivers using the devlink infrastructure should have a dependency
-	  on MAY_USE_DEVLINK to ensure they do not cause link errors when
-	  devlink is a loadable module and the driver using it is built-in.
-
 config PAGE_POOL
        bool
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4f31ddc883e7..05e04ea0a5c7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6482,20 +6482,9 @@ int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
 	return -EOPNOTSUPP;
 }
 
-static int __init devlink_module_init(void)
+static int __init devlink_init(void)
 {
 	return genl_register_family(&devlink_nl_family);
 }
 
-static void __exit devlink_module_exit(void)
-{
-	genl_unregister_family(&devlink_nl_family);
-}
-
-module_init(devlink_module_init);
-module_exit(devlink_module_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
-MODULE_DESCRIPTION("Network physical device Netlink interface");
-MODULE_ALIAS_GENL_FAMILY(DEVLINK_GENL_NAME);
+subsys_initcall(devlink_init);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 91e52973ee13..fab49132345f 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -6,7 +6,7 @@ config HAVE_NET_DSA
 
 config NET_DSA
 	tristate "Distributed Switch Architecture"
-	depends on HAVE_NET_DSA && MAY_USE_DEVLINK
+	depends on HAVE_NET_DSA
 	depends on BRIDGE || BRIDGE=n
 	select NET_SWITCHDEV
 	select PHYLINK
-- 
cgit v1.2.3


From b473b0d23529cde6c825a592c035e9d910b19e21 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 25 Feb 2019 19:34:03 -0800
Subject: devlink: create a special NDO for getting the devlink instance

Instead of iterating over all devlink ports add a NDO which
will return the devlink instance from the driver.

v2: add the netdev_to_devlink() helper (Michal)
v3: check that devlink has ops (Florian)
v4: hold devlink_mutex (Jiri)

Suggested-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 ++++++
 include/net/devlink.h     |  9 ++++++++
 net/core/devlink.c        | 56 ++++++++++++++---------------------------------
 3 files changed, 33 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ffbddd03242b..58e83bd7a861 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -941,6 +941,8 @@ struct dev_ifalias {
 	char ifalias[];
 };
 
+struct devlink;
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1249,6 +1251,10 @@ struct dev_ifalias {
  *	that got dropped are freed/returned via xdp_return_frame().
  *	Returns negative number, means general error invoking ndo, meaning
  *	no frames were xmit'ed and core-caller will free all frames.
+ * struct devlink *(*ndo_get_devlink)(struct net_device *dev);
+ *	Get devlink instance associated with a given netdev.
+ *	Called with a reference on the netdevice and devlink locks only,
+ *	rtnl_lock is not held.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1447,6 +1453,7 @@ struct net_device_ops {
 						u32 flags);
 	int			(*ndo_xsk_async_xmit)(struct net_device *dev,
 						      u32 queue_id);
+	struct devlink *	(*ndo_get_devlink)(struct net_device *dev);
 };
 
 /**
diff --git a/include/net/devlink.h b/include/net/devlink.h
index f9f7fe974652..7f5a0bdca228 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -538,6 +538,15 @@ static inline struct devlink *priv_to_devlink(void *priv)
 	return container_of(priv, struct devlink, priv);
 }
 
+static inline struct devlink *netdev_to_devlink(struct net_device *dev)
+{
+#if IS_ENABLED(CONFIG_NET_DEVLINK)
+	if (dev->netdev_ops->ndo_get_devlink)
+		return dev->netdev_ops->ndo_get_devlink(dev);
+#endif
+	return NULL;
+}
+
 struct ib_device;
 
 #if IS_ENABLED(CONFIG_NET_DEVLINK)
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 05e04ea0a5c7..24bfbd2d71e7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -6397,9 +6397,6 @@ static void __devlink_compat_running_version(struct devlink *devlink,
 	struct sk_buff *msg;
 	int rem, err;
 
-	if (!devlink->ops->info_get)
-		return;
-
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
 		return;
@@ -6431,55 +6428,36 @@ free_msg:
 void devlink_compat_running_version(struct net_device *dev,
 				    char *buf, size_t len)
 {
-	struct devlink_port *devlink_port;
 	struct devlink *devlink;
 
 	mutex_lock(&devlink_mutex);
-	list_for_each_entry(devlink, &devlink_list, list) {
-		mutex_lock(&devlink->lock);
-		list_for_each_entry(devlink_port, &devlink->port_list, list) {
-			if (devlink_port->type == DEVLINK_PORT_TYPE_ETH &&
-			    devlink_port->type_dev == dev) {
-				__devlink_compat_running_version(devlink,
-								 buf, len);
-				mutex_unlock(&devlink->lock);
-				goto out;
-			}
-		}
-		mutex_unlock(&devlink->lock);
-	}
-out:
+	devlink = netdev_to_devlink(dev);
+	if (!devlink || !devlink->ops || !devlink->ops->info_get)
+		goto unlock_list;
+
+	mutex_lock(&devlink->lock);
+	__devlink_compat_running_version(devlink, buf, len);
+	mutex_unlock(&devlink->lock);
+unlock_list:
 	mutex_unlock(&devlink_mutex);
 }
 
 int devlink_compat_flash_update(struct net_device *dev, const char *file_name)
 {
-	struct devlink_port *devlink_port;
 	struct devlink *devlink;
+	int ret = -EOPNOTSUPP;
 
 	mutex_lock(&devlink_mutex);
-	list_for_each_entry(devlink, &devlink_list, list) {
-		mutex_lock(&devlink->lock);
-		list_for_each_entry(devlink_port, &devlink->port_list, list) {
-			int ret = -EOPNOTSUPP;
-
-			if (devlink_port->type != DEVLINK_PORT_TYPE_ETH ||
-			    devlink_port->type_dev != dev)
-				continue;
+	devlink = netdev_to_devlink(dev);
+	if (!devlink || !devlink->ops || !devlink->ops->flash_update)
+		goto unlock_list;
 
-			mutex_unlock(&devlink_mutex);
-			if (devlink->ops->flash_update)
-				ret = devlink->ops->flash_update(devlink,
-								 file_name,
-								 NULL, NULL);
-			mutex_unlock(&devlink->lock);
-			return ret;
-		}
-		mutex_unlock(&devlink->lock);
-	}
+	mutex_lock(&devlink->lock);
+	ret = devlink->ops->flash_update(devlink, file_name, NULL, NULL);
+	mutex_unlock(&devlink->lock);
+unlock_list:
 	mutex_unlock(&devlink_mutex);
-
-	return -EOPNOTSUPP;
+	return ret;
 }
 
 static int __init devlink_init(void)
-- 
cgit v1.2.3


From 70fb0828800b45bfb1887223715263440dfb6b23 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Mon, 25 Feb 2019 22:03:01 -0800
Subject: vxlan: add extack support for create and changelink

This patch adds extack coverage in vxlan link
create and changelink paths. Introduces a new helper
vxlan_nl2flags to consolidate flag attribute validation.

thanks to Johannes Berg for some tips to construct the
generic vxlan flag extack strings.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 208 +++++++++++++++++++++++++++++++++++-----------------
 include/net/vxlan.h |  31 ++++++++
 2 files changed, 172 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 577201cd880c..a3c46d78d216 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -3583,11 +3583,40 @@ errout:
 	return err;
 }
 
+/* Set/clear flags based on attribute */
+static int vxlan_nl2flag(struct vxlan_config *conf, struct nlattr *tb[],
+			  int attrtype, unsigned long mask, bool changelink,
+			  bool changelink_supported,
+			  struct netlink_ext_ack *extack)
+{
+	unsigned long flags;
+
+	if (!tb[attrtype])
+		return 0;
+
+	if (changelink && !changelink_supported) {
+		vxlan_flag_attr_error(attrtype, extack);
+		return -EOPNOTSUPP;
+	}
+
+	if (vxlan_policy[attrtype].type == NLA_FLAG)
+		flags = conf->flags | mask;
+	else if (nla_get_u8(tb[attrtype]))
+		flags = conf->flags | mask;
+	else
+		flags = conf->flags & ~mask;
+
+	conf->flags = flags;
+
+	return 0;
+}
+
 static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			 struct net_device *dev, struct vxlan_config *conf,
-			 bool changelink)
+			 bool changelink, struct netlink_ext_ack *extack)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	int err = 0;
 
 	memset(conf, 0, sizeof(*conf));
 
@@ -3598,40 +3627,54 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 	if (data[IFLA_VXLAN_ID]) {
 		__be32 vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
 
-		if (changelink && (vni != conf->vni))
+		if (changelink && (vni != conf->vni)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_ID], "Cannot change VNI");
 			return -EOPNOTSUPP;
+		}
 		conf->vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
 	}
 
 	if (data[IFLA_VXLAN_GROUP]) {
-		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET))
+		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP], "New group address family does not match old group");
 			return -EOPNOTSUPP;
+		}
 
 		conf->remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
 		conf->remote_ip.sa.sa_family = AF_INET;
 	} else if (data[IFLA_VXLAN_GROUP6]) {
-		if (!IS_ENABLED(CONFIG_IPV6))
+		if (!IS_ENABLED(CONFIG_IPV6)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "IPv6 support not enabled in the kernel");
 			return -EPFNOSUPPORT;
+		}
 
-		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6))
+		if (changelink && (conf->remote_ip.sa.sa_family != AF_INET6)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_GROUP6], "New group address family does not match old group");
 			return -EOPNOTSUPP;
+		}
 
 		conf->remote_ip.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_GROUP6]);
 		conf->remote_ip.sa.sa_family = AF_INET6;
 	}
 
 	if (data[IFLA_VXLAN_LOCAL]) {
-		if (changelink && (conf->saddr.sa.sa_family != AF_INET))
+		if (changelink && (conf->saddr.sa.sa_family != AF_INET)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL], "New local address family does not match old");
 			return -EOPNOTSUPP;
+		}
 
 		conf->saddr.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_LOCAL]);
 		conf->saddr.sa.sa_family = AF_INET;
 	} else if (data[IFLA_VXLAN_LOCAL6]) {
-		if (!IS_ENABLED(CONFIG_IPV6))
+		if (!IS_ENABLED(CONFIG_IPV6)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "IPv6 support not enabled in the kernel");
 			return -EPFNOSUPPORT;
+		}
 
-		if (changelink && (conf->saddr.sa.sa_family != AF_INET6))
+		if (changelink && (conf->saddr.sa.sa_family != AF_INET6)) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LOCAL6], "New local address family does not match old");
 			return -EOPNOTSUPP;
+		}
 
 		/* TODO: respect scope id */
 		conf->saddr.sin6.sin6_addr = nla_get_in6_addr(data[IFLA_VXLAN_LOCAL6]);
@@ -3648,9 +3691,12 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 		conf->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
 
 	if (data[IFLA_VXLAN_TTL_INHERIT]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		conf->flags |= VXLAN_F_TTL_INHERIT;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_TTL_INHERIT,
+				    VXLAN_F_TTL_INHERIT, changelink, false,
+				    extack);
+		if (err)
+			return err;
+
 	}
 
 	if (data[IFLA_VXLAN_LABEL])
@@ -3658,10 +3704,11 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			     IPV6_FLOWLABEL_MASK;
 
 	if (data[IFLA_VXLAN_LEARNING]) {
-		if (nla_get_u8(data[IFLA_VXLAN_LEARNING]))
-			conf->flags |= VXLAN_F_LEARN;
-		else
-			conf->flags &= ~VXLAN_F_LEARN;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LEARNING,
+				    VXLAN_F_LEARN, changelink, true,
+				    extack);
+		if (err)
+			return err;
 	} else if (!changelink) {
 		/* default to learn on a new device */
 		conf->flags |= VXLAN_F_LEARN;
@@ -3671,44 +3718,52 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 		conf->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]);
 
 	if (data[IFLA_VXLAN_PROXY]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_PROXY]))
-			conf->flags |= VXLAN_F_PROXY;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_PROXY,
+				    VXLAN_F_PROXY, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_RSC]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_RSC]))
-			conf->flags |= VXLAN_F_RSC;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_RSC,
+				    VXLAN_F_RSC, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_L2MISS]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_L2MISS]))
-			conf->flags |= VXLAN_F_L2MISS;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L2MISS,
+				    VXLAN_F_L2MISS, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_L3MISS]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_L3MISS]))
-			conf->flags |= VXLAN_F_L3MISS;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_L3MISS,
+				    VXLAN_F_L3MISS, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_LIMIT]) {
-		if (changelink)
+		if (changelink) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_LIMIT],
+					    "Cannot change limit");
 			return -EOPNOTSUPP;
+		}
 		conf->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]);
 	}
 
 	if (data[IFLA_VXLAN_COLLECT_METADATA]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_COLLECT_METADATA]))
-			conf->flags |= VXLAN_F_COLLECT_METADATA;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_COLLECT_METADATA,
+				    VXLAN_F_COLLECT_METADATA, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_PORT_RANGE]) {
@@ -3718,72 +3773,92 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
 			conf->port_min = ntohs(p->low);
 			conf->port_max = ntohs(p->high);
 		} else {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT_RANGE],
+					    "Cannot change port range");
 			return -EOPNOTSUPP;
 		}
 	}
 
 	if (data[IFLA_VXLAN_PORT]) {
-		if (changelink)
+		if (changelink) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_PORT],
+					    "Cannot change port");
 			return -EOPNOTSUPP;
+		}
 		conf->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
 	}
 
 	if (data[IFLA_VXLAN_UDP_CSUM]) {
-		if (changelink)
+		if (changelink) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_UDP_CSUM],
+					    "Cannot change UDP_CSUM flag");
 			return -EOPNOTSUPP;
+		}
 		if (!nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
 			conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
 	}
 
 	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
-			conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+				    VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
+				    false, extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
-			conf->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+				    VXLAN_F_UDP_ZERO_CSUM6_RX, changelink,
+				    false, extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_REMCSUM_TX]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
-			conf->flags |= VXLAN_F_REMCSUM_TX;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_TX,
+				    VXLAN_F_REMCSUM_TX, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_REMCSUM_RX]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		if (nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
-			conf->flags |= VXLAN_F_REMCSUM_RX;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_RX,
+				    VXLAN_F_REMCSUM_RX, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_GBP]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		conf->flags |= VXLAN_F_GBP;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GBP,
+				    VXLAN_F_GBP, changelink, false, extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_GPE]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		conf->flags |= VXLAN_F_GPE;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_GPE,
+				    VXLAN_F_GPE, changelink, false,
+				    extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) {
-		if (changelink)
-			return -EOPNOTSUPP;
-		conf->flags |= VXLAN_F_REMCSUM_NOPARTIAL;
+		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL,
+				    VXLAN_F_REMCSUM_NOPARTIAL, changelink,
+				    false, extack);
+		if (err)
+			return err;
 	}
 
 	if (tb[IFLA_MTU]) {
-		if (changelink)
+		if (changelink) {
+			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_MTU],
+					    "Cannot change mtu");
 			return -EOPNOTSUPP;
+		}
 		conf->mtu = nla_get_u32(tb[IFLA_MTU]);
 	}
 
@@ -3800,7 +3875,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
 	struct vxlan_config conf;
 	int err;
 
-	err = vxlan_nl2conf(tb, data, dev, &conf, false);
+	err = vxlan_nl2conf(tb, data, dev, &conf, false, extack);
 	if (err)
 		return err;
 
@@ -3817,8 +3892,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 	struct vxlan_config conf;
 	int err;
 
-	err = vxlan_nl2conf(tb, data,
-			    dev, &conf, true);
+	err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
 	if (err)
 		return err;
 
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 09767819c3d4..00254a58824b 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -453,4 +453,35 @@ vxlan_fdb_clear_offload(const struct net_device *dev, __be32 vni)
 }
 #endif
 
+static inline void vxlan_flag_attr_error(int attrtype,
+					 struct netlink_ext_ack *extack)
+{
+#define VXLAN_FLAG(flg) \
+	case IFLA_VXLAN_##flg: \
+		NL_SET_ERR_MSG_MOD(extack, \
+				   "cannot change " #flg " flag"); \
+		break
+	switch (attrtype) {
+	VXLAN_FLAG(TTL_INHERIT);
+	VXLAN_FLAG(LEARNING);
+	VXLAN_FLAG(PROXY);
+	VXLAN_FLAG(RSC);
+	VXLAN_FLAG(L2MISS);
+	VXLAN_FLAG(L3MISS);
+	VXLAN_FLAG(COLLECT_METADATA);
+	VXLAN_FLAG(UDP_ZERO_CSUM6_TX);
+	VXLAN_FLAG(UDP_ZERO_CSUM6_RX);
+	VXLAN_FLAG(REMCSUM_TX);
+	VXLAN_FLAG(REMCSUM_RX);
+	VXLAN_FLAG(GBP);
+	VXLAN_FLAG(GPE);
+	VXLAN_FLAG(REMCSUM_NOPARTIAL);
+	default:
+		NL_SET_ERR_MSG_MOD(extack, \
+				   "cannot change flag");
+		break;
+	}
+#undef VXLAN_FLAG
+}
+
 #endif
-- 
cgit v1.2.3


From ae23a0fe58887a1c0518062b49bf8ac30209c26c Mon Sep 17 00:00:00 2001
From: Horia Geantă <horia.geanta@nxp.com>
Date: Thu, 21 Feb 2019 12:37:31 +0200
Subject: soc: fsl: guts: make fsl_guts_get_svr() static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The export of fsl_guts_get_svr() is a left-over, it's currently used
only internally and users needing SoC information should use the generic
soc_device infrastructure.

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Acked-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: Li Yang <leoyang.li@nxp.com>
---
 drivers/soc/fsl/guts.c   | 3 +--
 include/linux/fsl/guts.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c
index 4f9655087bd7..63f6df86f9e5 100644
--- a/drivers/soc/fsl/guts.c
+++ b/drivers/soc/fsl/guts.c
@@ -115,7 +115,7 @@ static const struct fsl_soc_die_attr *fsl_soc_die_match(
 	return NULL;
 }
 
-u32 fsl_guts_get_svr(void)
+static u32 fsl_guts_get_svr(void)
 {
 	u32 svr = 0;
 
@@ -129,7 +129,6 @@ u32 fsl_guts_get_svr(void)
 
 	return svr;
 }
-EXPORT_SYMBOL(fsl_guts_get_svr);
 
 static int fsl_guts_probe(struct platform_device *pdev)
 {
diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h
index 941b11811f85..1fc0edd71c52 100644
--- a/include/linux/fsl/guts.h
+++ b/include/linux/fsl/guts.h
@@ -135,8 +135,6 @@ struct ccsr_guts {
 	u32	srds2cr1;	/* 0x.0f44 - SerDes2 Control Register 0 */
 } __attribute__ ((packed));
 
-u32 fsl_guts_get_svr(void);
-
 /* Alternate function signal multiplex control */
 #define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x))
 
-- 
cgit v1.2.3


From 64f4466c887e2f16cb01467c8064ff1106c980a3 Mon Sep 17 00:00:00 2001
From: Seiya Wang <seiya.wang@mediatek.com>
Date: Mon, 25 Feb 2019 14:51:12 +0800
Subject: clk: mediatek: correct cpu clock name for MT8173 SoC

Correct cpu clock name from ca57 to ca72 since MT8173 does use cortex-a72.

Signed-off-by: Seiya Wang <seiya.wang@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mediatek/clk-mt8173.c      | 4 ++--
 include/dt-bindings/clock/mt8173-clk.h | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c
index 96c292c3e440..deedeb3ea33b 100644
--- a/drivers/clk/mediatek/clk-mt8173.c
+++ b/drivers/clk/mediatek/clk-mt8173.c
@@ -533,7 +533,7 @@ static const char * const ca53_parents[] __initconst = {
 	"univpll"
 };
 
-static const char * const ca57_parents[] __initconst = {
+static const char * const ca72_parents[] __initconst = {
 	"clk26m",
 	"armca15pll",
 	"mainpll",
@@ -542,7 +542,7 @@ static const char * const ca57_parents[] __initconst = {
 
 static const struct mtk_composite cpu_muxes[] __initconst = {
 	MUX(CLK_INFRA_CA53SEL, "infra_ca53_sel", ca53_parents, 0x0000, 0, 2),
-	MUX(CLK_INFRA_CA57SEL, "infra_ca57_sel", ca57_parents, 0x0000, 2, 2),
+	MUX(CLK_INFRA_CA72SEL, "infra_ca72_sel", ca72_parents, 0x0000, 2, 2),
 };
 
 static const struct mtk_composite top_muxes[] __initconst = {
diff --git a/include/dt-bindings/clock/mt8173-clk.h b/include/dt-bindings/clock/mt8173-clk.h
index 8aea623dd518..76e4e5b65353 100644
--- a/include/dt-bindings/clock/mt8173-clk.h
+++ b/include/dt-bindings/clock/mt8173-clk.h
@@ -194,7 +194,8 @@
 #define CLK_INFRA_PMICWRAP		11
 #define CLK_INFRA_CLK_13M		12
 #define CLK_INFRA_CA53SEL               13
-#define CLK_INFRA_CA57SEL               14
+#define CLK_INFRA_CA57SEL               14 /* Deprecated. Don't use it. */
+#define CLK_INFRA_CA72SEL               14
 #define CLK_INFRA_NR_CLK                15
 
 /* PERI_SYS */
-- 
cgit v1.2.3


From 6c7b4ee7f96d77a40e474f7541c4e543a669dbde Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Feb 2019 09:49:09 -0800
Subject: tcp: get rid of tcp_check_send_head()

This helper is used only once, and its name is no longer relevant.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 6 ------
 net/ipv4/tcp.c    | 3 ++-
 2 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a6e0355921e1..813042c08dc5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1716,12 +1716,6 @@ static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
 	return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
 }
 
-static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
-{
-	if (tcp_write_queue_empty(sk))
-		tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
-}
-
 static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
 {
 	__skb_queue_tail(&sk->sk_write_queue, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 769508c75dce..e2fa6eb9f81a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1412,7 +1412,8 @@ do_fault:
 		/* It is the one place in all of TCP, except connection
 		 * reset, where we can be unlinking the send_head.
 		 */
-		tcp_check_send_head(sk, skb);
+		if (tcp_write_queue_empty(sk))
+			tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
 		sk_wmem_free_skb(sk, skb);
 	}
 
-- 
cgit v1.2.3


From a43e052beacb2c0cecd0e807590b70fc4ff99dba Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Feb 2019 09:49:10 -0800
Subject: tcp: get rid of __tcp_add_write_queue_tail()

This helper is only used from tcp_add_write_queue_tail(), and does
not make the code more readable.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 813042c08dc5..d66ac76a33d1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1716,14 +1716,9 @@ static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
 	return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
 }
 
-static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
-{
-	__skb_queue_tail(&sk->sk_write_queue, skb);
-}
-
 static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
 {
-	__tcp_add_write_queue_tail(sk, skb);
+	__skb_queue_tail(&sk->sk_write_queue, skb);
 
 	/* Queue it, remembering where we must start sending. */
 	if (sk->sk_write_queue.next == skb)
-- 
cgit v1.2.3


From 921f9a0f2e8c326bfcdde7a59be0bac801a3d588 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Feb 2019 09:49:11 -0800
Subject: tcp: convert tcp_md5_needed to static_branch API

We prefer static_branch_unlikely() over static_key_false() these days.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 4 ++--
 net/ipv4/tcp.c        | 2 +-
 net/ipv4/tcp_ipv4.c   | 2 +-
 net/ipv4/tcp_output.c | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index d66ac76a33d1..68ee02523b87 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1558,7 +1558,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
 
 #ifdef CONFIG_TCP_MD5SIG
 #include <linux/jump_label.h>
-extern struct static_key tcp_md5_needed;
+extern struct static_key_false tcp_md5_needed;
 struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
 					   const union tcp_md5_addr *addr,
 					   int family);
@@ -1567,7 +1567,7 @@ tcp_md5_do_lookup(const struct sock *sk,
 		  const union tcp_md5_addr *addr,
 		  int family)
 {
-	if (!static_key_false(&tcp_md5_needed))
+	if (!static_branch_unlikely(&tcp_md5_needed))
 		return NULL;
 	return __tcp_md5_do_lookup(sk, addr, family);
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e2fa6eb9f81a..ad07dd71063d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3699,7 +3699,7 @@ bool tcp_alloc_md5sig_pool(void)
 		if (!tcp_md5sig_pool_populated) {
 			__tcp_alloc_md5sig_pool();
 			if (tcp_md5sig_pool_populated)
-				static_key_slow_inc(&tcp_md5_needed);
+				static_branch_inc(&tcp_md5_needed);
 		}
 
 		mutex_unlock(&tcp_md5sig_mutex);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4010ae3644f3..831d844a27ca 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -973,7 +973,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
  * We need to maintain these in the sk structure.
  */
 
-struct static_key tcp_md5_needed __read_mostly;
+DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
 EXPORT_SYMBOL(tcp_md5_needed);
 
 /* Find the Key structure for an address.  */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e72aa0ff5785..91f5cc5a3f88 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -597,7 +597,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 
 	*md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-	if (static_key_false(&tcp_md5_needed) &&
+	if (static_branch_unlikely(&tcp_md5_needed) &&
 	    rcu_access_pointer(tp->md5sig_info)) {
 		*md5 = tp->af_specific->md5_lookup(sk, sk);
 		if (*md5) {
@@ -734,7 +734,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 
 	*md5 = NULL;
 #ifdef CONFIG_TCP_MD5SIG
-	if (static_key_false(&tcp_md5_needed) &&
+	if (static_branch_unlikely(&tcp_md5_needed) &&
 	    rcu_access_pointer(tp->md5sig_info)) {
 		*md5 = tp->af_specific->md5_lookup(sk, sk);
 		if (*md5) {
-- 
cgit v1.2.3


From b8e204006340b7aaf32bd2b9806c692f6e0cb38a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 13 Feb 2019 13:18:36 +0100
Subject: netfilter: nft_compat: use .release_ops and remove list of extension

Add .release_ops, that is called in case of error at a later stage in
the expression initialization path, ie. .select_ops() has been already
set up operations and that needs to be undone. This allows us to unwind
.select_ops from the error path, ie. release the dynamic operations for
this extension.

Moreover, allocate one single operation instead of recycling them, this
comes at the cost of consuming a bit more memory per rule, but it
simplifies the infrastructure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   3 +
 net/netfilter/nf_tables_api.c     |   7 +-
 net/netfilter/nft_compat.c        | 281 ++++++++------------------------------
 3 files changed, 64 insertions(+), 227 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a66fcd316734..c331e96a713b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -690,10 +690,12 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
 	gcb->elems[gcb->head.cnt++] = elem;
 }
 
+struct nft_expr_ops;
 /**
  *	struct nft_expr_type - nf_tables expression type
  *
  *	@select_ops: function to select nft_expr_ops
+ *	@release_ops: release nft_expr_ops
  *	@ops: default ops, used when no select_ops functions is present
  *	@list: used internally
  *	@name: Identifier
@@ -706,6 +708,7 @@ static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
 struct nft_expr_type {
 	const struct nft_expr_ops	*(*select_ops)(const struct nft_ctx *,
 						       const struct nlattr * const tb[]);
+	void				(*release_ops)(const struct nft_expr_ops *ops);
 	const struct nft_expr_ops	*ops;
 	struct list_head		list;
 	const char			*name;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e1a88ba2249e..faf6bd10a19f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2172,6 +2172,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 {
 	struct nft_expr_info info;
 	struct nft_expr *expr;
+	struct module *owner;
 	int err;
 
 	err = nf_tables_expr_parse(ctx, nla, &info);
@@ -2191,7 +2192,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
 err3:
 	kfree(expr);
 err2:
-	module_put(info.ops->type->owner);
+	owner = info.ops->type->owner;
+	if (info.ops->type->release_ops)
+		info.ops->type->release_ops(info.ops);
+
+	module_put(owner);
 err1:
 	return ERR_PTR(err);
 }
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 0a4bad55a8aa..469f9da5073b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -22,23 +22,6 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netns/generic.h>
-
-struct nft_xt {
-	struct list_head	head;
-	struct nft_expr_ops	ops;
-	refcount_t		refcnt;
-
-	/* used only when transaction mutex is locked */
-	unsigned int		listcnt;
-
-	/* Unlike other expressions, ops doesn't have static storage duration.
-	 * nft core assumes they do.  We use kfree_rcu so that nft core can
-	 * can check expr->ops->size even after nft_compat->destroy() frees
-	 * the nft_xt struct that holds the ops structure.
-	 */
-	struct rcu_head		rcu_head;
-};
 
 /* Used for matches where *info is larger than X byte */
 #define NFT_MATCH_LARGE_THRESH	192
@@ -47,46 +30,6 @@ struct nft_xt_match_priv {
 	void *info;
 };
 
-struct nft_compat_net {
-	struct list_head nft_target_list;
-	struct list_head nft_match_list;
-};
-
-static unsigned int nft_compat_net_id __read_mostly;
-static struct nft_expr_type nft_match_type;
-static struct nft_expr_type nft_target_type;
-
-static struct nft_compat_net *nft_compat_pernet(struct net *net)
-{
-	return net_generic(net, nft_compat_net_id);
-}
-
-static void nft_xt_get(struct nft_xt *xt)
-{
-	/* refcount_inc() warns on 0 -> 1 transition, but we can't
-	 * init the reference count to 1 in .select_ops -- we can't
-	 * undo such an increase when another expression inside the same
-	 * rule fails afterwards.
-	 */
-	if (xt->listcnt == 0)
-		refcount_set(&xt->refcnt, 1);
-	else
-		refcount_inc(&xt->refcnt);
-
-	xt->listcnt++;
-}
-
-static bool nft_xt_put(struct nft_xt *xt)
-{
-	if (refcount_dec_and_test(&xt->refcnt)) {
-		WARN_ON_ONCE(!list_empty(&xt->head));
-		kfree_rcu(xt, rcu_head);
-		return true;
-	}
-
-	return false;
-}
-
 static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
 						const char *tablename)
 {
@@ -281,7 +224,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_target *target = expr->ops->data;
 	struct xt_tgchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
-	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -305,8 +247,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (!target->target)
 		return -EINVAL;
 
-	nft_xt = container_of(expr->ops, struct nft_xt, ops);
-	nft_xt_get(nft_xt);
 	return 0;
 }
 
@@ -325,8 +265,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 
-	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-		module_put(me);
+	module_put(me);
+	kfree(expr->ops);
 }
 
 static int nft_extension_dump_info(struct sk_buff *skb, int attr,
@@ -499,7 +439,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
-	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -515,13 +454,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 
 	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
 
-	ret = xt_check_match(&par, size, proto, inv);
-	if (ret < 0)
-		return ret;
-
-	nft_xt = container_of(expr->ops, struct nft_xt, ops);
-	nft_xt_get(nft_xt);
-	return 0;
+	return xt_check_match(&par, size, proto, inv);
 }
 
 static int
@@ -564,8 +497,8 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 
-	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
-		module_put(me);
+	module_put(me);
+	kfree(expr->ops);
 }
 
 static void
@@ -574,18 +507,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
 }
 
-static void nft_compat_deactivate(const struct nft_ctx *ctx,
-				  const struct nft_expr *expr,
-				  enum nft_trans_phase phase)
-{
-	struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
-
-	if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) {
-		if (--xt->listcnt == 0)
-			list_del_init(&xt->head);
-	}
-}
-
 static void
 nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 {
@@ -780,19 +701,13 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
 	.cb		= nfnl_nft_compat_cb,
 };
 
-static bool nft_match_cmp(const struct xt_match *match,
-			  const char *name, u32 rev, u32 family)
-{
-	return strcmp(match->name, name) == 0 && match->revision == rev &&
-	       (match->family == NFPROTO_UNSPEC || match->family == family);
-}
+static struct nft_expr_type nft_match_type;
 
 static const struct nft_expr_ops *
 nft_match_select_ops(const struct nft_ctx *ctx,
 		     const struct nlattr * const tb[])
 {
-	struct nft_compat_net *cn;
-	struct nft_xt *nft_match;
+	struct nft_expr_ops *ops;
 	struct xt_match *match;
 	unsigned int matchsize;
 	char *mt_name;
@@ -808,16 +723,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
 	family = ctx->family;
 
-	cn = nft_compat_pernet(ctx->net);
-
-	/* Re-use the existing match if it's already loaded. */
-	list_for_each_entry(nft_match, &cn->nft_match_list, head) {
-		struct xt_match *match = nft_match->ops.data;
-
-		if (nft_match_cmp(match, mt_name, rev, family))
-			return &nft_match->ops;
-	}
-
 	match = xt_request_find_match(family, mt_name, rev);
 	if (IS_ERR(match))
 		return ERR_PTR(-ENOENT);
@@ -827,65 +732,62 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	/* This is the first time we use this match, allocate operations */
-	nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_match == NULL) {
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
 		err = -ENOMEM;
 		goto err;
 	}
 
-	refcount_set(&nft_match->refcnt, 0);
-	nft_match->ops.type = &nft_match_type;
-	nft_match->ops.eval = nft_match_eval;
-	nft_match->ops.init = nft_match_init;
-	nft_match->ops.destroy = nft_match_destroy;
-	nft_match->ops.deactivate = nft_compat_deactivate;
-	nft_match->ops.dump = nft_match_dump;
-	nft_match->ops.validate = nft_match_validate;
-	nft_match->ops.data = match;
+	ops->type = &nft_match_type;
+	ops->eval = nft_match_eval;
+	ops->init = nft_match_init;
+	ops->destroy = nft_match_destroy;
+	ops->dump = nft_match_dump;
+	ops->validate = nft_match_validate;
+	ops->data = match;
 
 	matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
 	if (matchsize > NFT_MATCH_LARGE_THRESH) {
 		matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
 
-		nft_match->ops.eval = nft_match_large_eval;
-		nft_match->ops.init = nft_match_large_init;
-		nft_match->ops.destroy = nft_match_large_destroy;
-		nft_match->ops.dump = nft_match_large_dump;
+		ops->eval = nft_match_large_eval;
+		ops->init = nft_match_large_init;
+		ops->destroy = nft_match_large_destroy;
+		ops->dump = nft_match_large_dump;
 	}
 
-	nft_match->ops.size = matchsize;
+	ops->size = matchsize;
 
-	nft_match->listcnt = 0;
-	list_add(&nft_match->head, &cn->nft_match_list);
-
-	return &nft_match->ops;
+	return ops;
 err:
 	module_put(match->me);
 	return ERR_PTR(err);
 }
 
+static void nft_match_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_match *match = ops->data;
+
+	module_put(match->me);
+	kfree(ops);
+}
+
 static struct nft_expr_type nft_match_type __read_mostly = {
 	.name		= "match",
 	.select_ops	= nft_match_select_ops,
+	.release_ops	= nft_match_release_ops,
 	.policy		= nft_match_policy,
 	.maxattr	= NFTA_MATCH_MAX,
 	.owner		= THIS_MODULE,
 };
 
-static bool nft_target_cmp(const struct xt_target *tg,
-			   const char *name, u32 rev, u32 family)
-{
-	return strcmp(tg->name, name) == 0 && tg->revision == rev &&
-	       (tg->family == NFPROTO_UNSPEC || tg->family == family);
-}
+static struct nft_expr_type nft_target_type;
 
 static const struct nft_expr_ops *
 nft_target_select_ops(const struct nft_ctx *ctx,
 		      const struct nlattr * const tb[])
 {
-	struct nft_compat_net *cn;
-	struct nft_xt *nft_target;
+	struct nft_expr_ops *ops;
 	struct xt_target *target;
 	char *tg_name;
 	u32 rev, family;
@@ -905,18 +807,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	    strcmp(tg_name, "standard") == 0)
 		return ERR_PTR(-EINVAL);
 
-	cn = nft_compat_pernet(ctx->net);
-	/* Re-use the existing target if it's already loaded. */
-	list_for_each_entry(nft_target, &cn->nft_target_list, head) {
-		struct xt_target *target = nft_target->ops.data;
-
-		if (!target->target)
-			continue;
-
-		if (nft_target_cmp(target, tg_name, rev, family))
-			return &nft_target->ops;
-	}
-
 	target = xt_request_find_target(family, tg_name, rev);
 	if (IS_ERR(target))
 		return ERR_PTR(-ENOENT);
@@ -931,113 +821,55 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	/* This is the first time we use this target, allocate operations */
-	nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
-	if (nft_target == NULL) {
+	ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+	if (!ops) {
 		err = -ENOMEM;
 		goto err;
 	}
 
-	refcount_set(&nft_target->refcnt, 0);
-	nft_target->ops.type = &nft_target_type;
-	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
-	nft_target->ops.init = nft_target_init;
-	nft_target->ops.destroy = nft_target_destroy;
-	nft_target->ops.deactivate = nft_compat_deactivate;
-	nft_target->ops.dump = nft_target_dump;
-	nft_target->ops.validate = nft_target_validate;
-	nft_target->ops.data = target;
+	ops->type = &nft_target_type;
+	ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
+	ops->init = nft_target_init;
+	ops->destroy = nft_target_destroy;
+	ops->dump = nft_target_dump;
+	ops->validate = nft_target_validate;
+	ops->data = target;
 
 	if (family == NFPROTO_BRIDGE)
-		nft_target->ops.eval = nft_target_eval_bridge;
+		ops->eval = nft_target_eval_bridge;
 	else
-		nft_target->ops.eval = nft_target_eval_xt;
-
-	nft_target->listcnt = 0;
-	list_add(&nft_target->head, &cn->nft_target_list);
+		ops->eval = nft_target_eval_xt;
 
-	return &nft_target->ops;
+	return ops;
 err:
 	module_put(target->me);
 	return ERR_PTR(err);
 }
 
+static void nft_target_release_ops(const struct nft_expr_ops *ops)
+{
+	struct xt_target *target = ops->data;
+
+	module_put(target->me);
+	kfree(ops);
+}
+
 static struct nft_expr_type nft_target_type __read_mostly = {
 	.name		= "target",
 	.select_ops	= nft_target_select_ops,
+	.release_ops	= nft_target_release_ops,
 	.policy		= nft_target_policy,
 	.maxattr	= NFTA_TARGET_MAX,
 	.owner		= THIS_MODULE,
 };
 
-static int __net_init nft_compat_init_net(struct net *net)
-{
-	struct nft_compat_net *cn = nft_compat_pernet(net);
-
-	INIT_LIST_HEAD(&cn->nft_target_list);
-	INIT_LIST_HEAD(&cn->nft_match_list);
-
-	return 0;
-}
-
-static void __net_exit nft_compat_exit_net(struct net *net)
-{
-	struct nft_compat_net *cn = nft_compat_pernet(net);
-	struct nft_xt *xt, *next;
-
-	if (list_empty(&cn->nft_match_list) &&
-	    list_empty(&cn->nft_target_list))
-		return;
-
-	/* If there was an error that caused nft_xt expr to not be initialized
-	 * fully and noone else requested the same expression later, the lists
-	 * contain 0-refcount entries that still hold module reference.
-	 *
-	 * Clean them here.
-	 */
-	mutex_lock(&net->nft.commit_mutex);
-	list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) {
-		struct xt_target *target = xt->ops.data;
-
-		list_del_init(&xt->head);
-
-		if (refcount_read(&xt->refcnt))
-			continue;
-		module_put(target->me);
-		kfree(xt);
-	}
-
-	list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) {
-		struct xt_match *match = xt->ops.data;
-
-		list_del_init(&xt->head);
-
-		if (refcount_read(&xt->refcnt))
-			continue;
-		module_put(match->me);
-		kfree(xt);
-	}
-	mutex_unlock(&net->nft.commit_mutex);
-}
-
-static struct pernet_operations nft_compat_net_ops = {
-	.init	= nft_compat_init_net,
-	.exit	= nft_compat_exit_net,
-	.id	= &nft_compat_net_id,
-	.size	= sizeof(struct nft_compat_net),
-};
-
 static int __init nft_compat_module_init(void)
 {
 	int ret;
 
-	ret = register_pernet_subsys(&nft_compat_net_ops);
-	if (ret < 0)
-		goto err_target;
-
 	ret = nft_register_expr(&nft_match_type);
 	if (ret < 0)
-		goto err_pernet;
+		return ret;
 
 	ret = nft_register_expr(&nft_target_type);
 	if (ret < 0)
@@ -1054,8 +886,6 @@ err_target:
 	nft_unregister_expr(&nft_target_type);
 err_match:
 	nft_unregister_expr(&nft_match_type);
-err_pernet:
-	unregister_pernet_subsys(&nft_compat_net_ops);
 	return ret;
 }
 
@@ -1064,7 +894,6 @@ static void __exit nft_compat_module_exit(void)
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);
-	unregister_pernet_subsys(&nft_compat_net_ops);
 }
 
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
-- 
cgit v1.2.3


From d1aca8ab3104aa7131f5ab144c6f586b54df084b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:19 +0100
Subject: netfilter: nat: merge ipv4 and ipv6 masquerade functionality

Before:
   text	   data	    bss	    dec	    hex	filename
  13916	   1412	   4128	  19456	   4c00	nf_nat.ko
   4510	    968	      4	   5482	   156a	nf_nat_ipv4.ko
   5146	    944	      8	   6098	   17d2	nf_nat_ipv6.ko

After:
   text	   data	    bss	    dec	    hex	filename
  16566	   1576	   4136	  22278	   5706	nf_nat.ko
   3187	    844	      0	   4031	    fbf	nf_nat_ipv4.ko
   3598	    844	      0	   4442	   115a	nf_nat_ipv6.ko

... so no drastic changes in combined size.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h              |   6 +-
 net/ipv4/netfilter/Kconfig                  |   7 +-
 net/ipv4/netfilter/Makefile                 |   1 -
 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 196 ---------------
 net/ipv6/netfilter/Kconfig                  |  11 +-
 net/ipv6/netfilter/Makefile                 |   1 -
 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c | 240 ------------------
 net/netfilter/Kconfig                       |   3 +
 net/netfilter/Makefile                      |   1 +
 net/netfilter/nf_nat_masquerade.c           | 362 ++++++++++++++++++++++++++++
 10 files changed, 372 insertions(+), 456 deletions(-)
 delete mode 100644 net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
 delete mode 100644 net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
 create mode 100644 net/netfilter/nf_nat_masquerade.c

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 8aff77cafb8b..e53b4f9b8b44 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -31,8 +31,7 @@ struct nf_conn;
 /* The structure embedded in the conntrack structure. */
 struct nf_conn_nat {
 	union nf_conntrack_nat_help help;
-#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
-    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE)
 	int masq_index;
 #endif
 };
@@ -61,8 +60,7 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 				      struct nf_conn_nat *nat,
 				      const struct net_device *out)
 {
-#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
-    IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
+#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE)
 	return nat && nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
 	       CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
 	       nat->masq_index != out->ifindex;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 80f72cc5ca8d..db05a835748a 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -106,9 +106,6 @@ config NF_NAT_IPV4
 
 if NF_NAT_IPV4
 
-config NF_NAT_MASQUERADE_IPV4
-	bool
-
 if NF_TABLES
 config NFT_CHAIN_NAT_IPV4
 	depends on NF_TABLES_IPV4
@@ -123,7 +120,7 @@ config NFT_MASQ_IPV4
 	tristate "IPv4 masquerading support for nf_tables"
 	depends on NF_TABLES_IPV4
 	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE_IPV4
+	select NF_NAT_MASQUERADE
 	help
 	  This is the expression that provides IPv4 masquerading support for
 	  nf_tables.
@@ -276,7 +273,7 @@ if IP_NF_NAT
 
 config IP_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	select NF_NAT_MASQUERADE_IPV4
+	select NF_NAT_MASQUERADE
 	default m if NETFILTER_ADVANCED=n
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index fd7122e0e2c9..ddeb35ab8bdb 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,7 +4,6 @@
 #
 
 nf_nat_ipv4-y		:= nf_nat_l3proto_ipv4.o
-nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
 
 # defrag
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
deleted file mode 100644
index 41327bb99093..000000000000
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/atomic.h>
-#include <linux/inetdevice.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <net/protocol.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-unsigned int
-nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
-		       const struct nf_nat_range2 *range,
-		       const struct net_device *out)
-{
-	struct nf_conn *ct;
-	struct nf_conn_nat *nat;
-	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range2 newrange;
-	const struct rtable *rt;
-	__be32 newsrc, nh;
-
-	WARN_ON(hooknum != NF_INET_POST_ROUTING);
-
-	ct = nf_ct_get(skb, &ctinfo);
-
-	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			 ctinfo == IP_CT_RELATED_REPLY)));
-
-	/* Source address is 0.0.0.0 - locally generated packet that is
-	 * probably not supposed to be masqueraded.
-	 */
-	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
-		return NF_ACCEPT;
-
-	rt = skb_rtable(skb);
-	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
-	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
-	if (!newsrc) {
-		pr_info("%s ate my IP address\n", out->name);
-		return NF_DROP;
-	}
-
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
-	/* Transfer from original range. */
-	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
-	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
-	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
-	newrange.min_addr.ip = newsrc;
-	newrange.max_addr.ip = newsrc;
-	newrange.min_proto   = range->min_proto;
-	newrange.max_proto   = range->max_proto;
-
-	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
-
-static int device_cmp(struct nf_conn *i, void *ifindex)
-{
-	const struct nf_conn_nat *nat = nfct_nat(i);
-
-	if (!nat)
-		return 0;
-	if (nf_ct_l3num(i) != NFPROTO_IPV4)
-		return 0;
-	return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
-			     unsigned long event,
-			     void *ptr)
-{
-	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct net *net = dev_net(dev);
-
-	if (event == NETDEV_DOWN) {
-		/* Device was downed.  Search entire table for
-		 * conntracks which were associated with that device,
-		 * and forget them.
-		 */
-		WARN_ON(dev->ifindex == 0);
-
-		nf_ct_iterate_cleanup_net(net, device_cmp,
-					  (void *)(long)dev->ifindex, 0, 0);
-	}
-
-	return NOTIFY_DONE;
-}
-
-static int inet_cmp(struct nf_conn *ct, void *ptr)
-{
-	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
-	struct net_device *dev = ifa->ifa_dev->dev;
-	struct nf_conntrack_tuple *tuple;
-
-	if (!device_cmp(ct, (void *)(long)dev->ifindex))
-		return 0;
-
-	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
-	return ifa->ifa_address == tuple->dst.u3.ip;
-}
-
-static int masq_inet_event(struct notifier_block *this,
-			   unsigned long event,
-			   void *ptr)
-{
-	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
-	struct net *net = dev_net(idev->dev);
-
-	/* The masq_dev_notifier will catch the case of the device going
-	 * down.  So if the inetdev is dead and being destroyed we have
-	 * no work to do.  Otherwise this is an individual address removal
-	 * and we have to perform the flush.
-	 */
-	if (idev->dead)
-		return NOTIFY_DONE;
-
-	if (event == NETDEV_DOWN)
-		nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
-	.notifier_call	= masq_device_event,
-};
-
-static struct notifier_block masq_inet_notifier = {
-	.notifier_call	= masq_inet_event,
-};
-
-static int masq_refcnt;
-static DEFINE_MUTEX(masq_mutex);
-
-int nf_nat_masquerade_ipv4_register_notifier(void)
-{
-	int ret = 0;
-
-	mutex_lock(&masq_mutex);
-	/* check if the notifier was already set */
-	if (++masq_refcnt > 1)
-		goto out_unlock;
-
-	/* Register for device down reports */
-	ret = register_netdevice_notifier(&masq_dev_notifier);
-	if (ret)
-		goto err_dec;
-	/* Register IP address change reports */
-	ret = register_inetaddr_notifier(&masq_inet_notifier);
-	if (ret)
-		goto err_unregister;
-
-	mutex_unlock(&masq_mutex);
-	return ret;
-
-err_unregister:
-	unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
-	masq_refcnt--;
-out_unlock:
-	mutex_unlock(&masq_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
-
-void nf_nat_masquerade_ipv4_unregister_notifier(void)
-{
-	mutex_lock(&masq_mutex);
-	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
-		goto out_unlock;
-
-	unregister_netdevice_notifier(&masq_dev_notifier);
-	unregister_inetaddr_notifier(&masq_inet_notifier);
-out_unlock:
-	mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 339d0762b027..f57fc99e9a04 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -44,7 +44,7 @@ config NFT_CHAIN_NAT_IPV6
 config NFT_MASQ_IPV6
 	tristate "IPv6 masquerade support for nf_tables"
 	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE_IPV6
+	select NF_NAT_MASQUERADE
 	help
 	  This is the expression that provides IPv4 masquerading support for
 	  nf_tables.
@@ -116,13 +116,6 @@ config NF_NAT_IPV6
 	  forms of full Network Address Port Translation. This can be
 	  controlled by iptables or nft.
 
-if NF_NAT_IPV6
-
-config NF_NAT_MASQUERADE_IPV6
-	bool
-
-endif # NF_NAT_IPV6
-
 config IP6_NF_IPTABLES
 	tristate "IP6 tables support (required for filtering)"
 	depends on INET && IPV6
@@ -324,7 +317,7 @@ if IP6_NF_NAT
 
 config IP6_NF_TARGET_MASQUERADE
 	tristate "MASQUERADE target support"
-	select NF_NAT_MASQUERADE_IPV6
+	select NF_NAT_MASQUERADE
 	help
 	  Masquerading is a special case of NAT: all outgoing connections are
 	  changed to seem to come from a particular interface's address, and
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9ea43d5256e0..a7b18d13e056 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
 
 nf_nat_ipv6-y		:= nf_nat_l3proto_ipv6.o
-nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
 
 # defrag
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
deleted file mode 100644
index fd313b726263..000000000000
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/atomic.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-#define MAX_WORK_COUNT	16
-
-static atomic_t v6_worker_count;
-
-static int
-nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
-		       const struct in6_addr *daddr, unsigned int srcprefs,
-		       struct in6_addr *saddr)
-{
-#ifdef CONFIG_IPV6_MODULE
-	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
-
-	if (!v6_ops)
-		return -EHOSTUNREACH;
-
-	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
-#else
-	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
-#endif
-}
-
-unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
-		       const struct net_device *out)
-{
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn_nat *nat;
-	struct in6_addr src;
-	struct nf_conn *ct;
-	struct nf_nat_range2 newrange;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
-			 ctinfo == IP_CT_RELATED_REPLY)));
-
-	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
-				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
-		return NF_DROP;
-
-	nat = nf_ct_nat_ext_add(ct);
-	if (nat)
-		nat->masq_index = out->ifindex;
-
-	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
-	newrange.min_addr.in6	= src;
-	newrange.max_addr.in6	= src;
-	newrange.min_proto	= range->min_proto;
-	newrange.max_proto	= range->max_proto;
-
-	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-
-static int device_cmp(struct nf_conn *ct, void *ifindex)
-{
-	const struct nf_conn_nat *nat = nfct_nat(ct);
-
-	if (!nat)
-		return 0;
-	if (nf_ct_l3num(ct) != NFPROTO_IPV6)
-		return 0;
-	return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
-			     unsigned long event, void *ptr)
-{
-	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct net *net = dev_net(dev);
-
-	if (event == NETDEV_DOWN)
-		nf_ct_iterate_cleanup_net(net, device_cmp,
-					  (void *)(long)dev->ifindex, 0, 0);
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
-	.notifier_call	= masq_device_event,
-};
-
-struct masq_dev_work {
-	struct work_struct work;
-	struct net *net;
-	struct in6_addr addr;
-	int ifindex;
-};
-
-static int inet_cmp(struct nf_conn *ct, void *work)
-{
-	struct masq_dev_work *w = (struct masq_dev_work *)work;
-	struct nf_conntrack_tuple *tuple;
-
-	if (!device_cmp(ct, (void *)(long)w->ifindex))
-		return 0;
-
-	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
-	return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
-	struct masq_dev_work *w;
-
-	w = container_of(work, struct masq_dev_work, work);
-
-	nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
-
-	put_net(w->net);
-	kfree(w);
-	atomic_dec(&v6_worker_count);
-	module_put(THIS_MODULE);
-}
-
-/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
- * schedule.
- *
- * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
- * time if there are lots of conntracks and the system
- * handles high softirq load, so it frequently calls cond_resched
- * while iterating the conntrack table.
- *
- * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
- *
- * As we can have 'a lot' of inet_events (depending on amount
- * of ipv6 addresses being deleted), we also need to add an upper
- * limit to the number of queued work items.
- */
-static int masq_inet6_event(struct notifier_block *this,
-			    unsigned long event, void *ptr)
-{
-	struct inet6_ifaddr *ifa = ptr;
-	const struct net_device *dev;
-	struct masq_dev_work *w;
-	struct net *net;
-
-	if (event != NETDEV_DOWN ||
-	    atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
-		return NOTIFY_DONE;
-
-	dev = ifa->idev->dev;
-	net = maybe_get_net(dev_net(dev));
-	if (!net)
-		return NOTIFY_DONE;
-
-	if (!try_module_get(THIS_MODULE))
-		goto err_module;
-
-	w = kmalloc(sizeof(*w), GFP_ATOMIC);
-	if (w) {
-		atomic_inc(&v6_worker_count);
-
-		INIT_WORK(&w->work, iterate_cleanup_work);
-		w->ifindex = dev->ifindex;
-		w->net = net;
-		w->addr = ifa->addr;
-		schedule_work(&w->work);
-
-		return NOTIFY_DONE;
-	}
-
-	module_put(THIS_MODULE);
- err_module:
-	put_net(net);
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_inet6_notifier = {
-	.notifier_call	= masq_inet6_event,
-};
-
-static int masq_refcnt;
-static DEFINE_MUTEX(masq_mutex);
-
-int nf_nat_masquerade_ipv6_register_notifier(void)
-{
-	int ret = 0;
-
-	mutex_lock(&masq_mutex);
-	/* check if the notifier is already set */
-	if (++masq_refcnt > 1)
-		goto out_unlock;
-
-	ret = register_netdevice_notifier(&masq_dev_notifier);
-	if (ret)
-		goto err_dec;
-
-	ret = register_inet6addr_notifier(&masq_inet6_notifier);
-	if (ret)
-		goto err_unregister;
-
-	mutex_unlock(&masq_mutex);
-	return ret;
-
-err_unregister:
-	unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
-	masq_refcnt--;
-out_unlock:
-	mutex_unlock(&masq_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
-
-void nf_nat_masquerade_ipv6_unregister_notifier(void)
-{
-	mutex_lock(&masq_mutex);
-	/* check if the notifier still has clients */
-	if (--masq_refcnt > 0)
-		goto out_unlock;
-
-	unregister_inet6addr_notifier(&masq_inet6_notifier);
-	unregister_netdevice_notifier(&masq_dev_notifier);
-out_unlock:
-	mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index fefd63a243f2..5a753cec005b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -431,6 +431,9 @@ config NF_NAT_TFTP
 config NF_NAT_REDIRECT
 	bool
 
+config NF_NAT_MASQUERADE
+	bool
+
 config NETFILTER_SYNPROXY
 	tristate
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index e66067befa42..c7910706f8dd 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
 
 obj-$(CONFIG_NF_NAT) += nf_nat.o
 nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
+nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
 
 # NAT helpers
 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
new file mode 100644
index 000000000000..86fa4dcc63c5
--- /dev/null
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+static DEFINE_MUTEX(masq_mutex);
+static unsigned int masq_refcnt __read_mostly;
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+		       const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	struct nf_conn *ct;
+	struct nf_conn_nat *nat;
+	enum ip_conntrack_info ctinfo;
+	struct nf_nat_range2 newrange;
+	const struct rtable *rt;
+	__be32 newsrc, nh;
+
+	WARN_ON(hooknum != NF_INET_POST_ROUTING);
+
+	ct = nf_ct_get(skb, &ctinfo);
+
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	/* Source address is 0.0.0.0 - locally generated packet that is
+	 * probably not supposed to be masqueraded.
+	 */
+	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+		return NF_ACCEPT;
+
+	rt = skb_rtable(skb);
+	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+	if (!newsrc) {
+		pr_info("%s ate my IP address\n", out->name);
+		return NF_DROP;
+	}
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	/* Transfer from original range. */
+	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.ip = newsrc;
+	newrange.max_addr.ip = newsrc;
+	newrange.min_proto   = range->min_proto;
+	newrange.max_proto   = range->max_proto;
+
+	/* Hand modified range to generic setup. */
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static int device_cmp(struct nf_conn *i, void *ifindex)
+{
+	const struct nf_conn_nat *nat = nfct_nat(i);
+
+	if (!nat)
+		return 0;
+	return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+			     unsigned long event,
+			     void *ptr)
+{
+	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+
+	if (event == NETDEV_DOWN) {
+		/* Device was downed.  Search entire table for
+		 * conntracks which were associated with that device,
+		 * and forget them.
+		 */
+
+		nf_ct_iterate_cleanup_net(net, device_cmp,
+					  (void *)(long)dev->ifindex, 0, 0);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+	struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+	struct net_device *dev = ifa->ifa_dev->dev;
+	struct nf_conntrack_tuple *tuple;
+
+	if (!device_cmp(ct, (void *)(long)dev->ifindex))
+		return 0;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	return ifa->ifa_address == tuple->dst.u3.ip;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+			   unsigned long event,
+			   void *ptr)
+{
+	struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
+	struct net *net = dev_net(idev->dev);
+
+	/* The masq_dev_notifier will catch the case of the device going
+	 * down.  So if the inetdev is dead and being destroyed we have
+	 * no work to do.  Otherwise this is an individual address removal
+	 * and we have to perform the flush.
+	 */
+	if (idev->dead)
+		return NOTIFY_DONE;
+
+	if (event == NETDEV_DOWN)
+		nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+	.notifier_call	= masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+	.notifier_call	= masq_inet_event,
+};
+
+int nf_nat_masquerade_ipv4_register_notifier(void)
+{
+	int ret = 0;
+
+	mutex_lock(&masq_mutex);
+	/* check if the notifier was already set */
+	if (++masq_refcnt > 1)
+		goto out_unlock;
+
+	/* Register for device down reports */
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+	/* Register IP address change reports */
+	ret = register_inetaddr_notifier(&masq_inet_notifier);
+	if (ret)
+		goto err_unregister;
+
+	mutex_unlock(&masq_mutex);
+	return ret;
+
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
+
+void nf_nat_masquerade_ipv4_unregister_notifier(void)
+{
+	mutex_lock(&masq_mutex);
+	/* check if the notifier still has clients */
+	if (--masq_refcnt > 0)
+		goto out_unlock;
+
+	unregister_netdevice_notifier(&masq_dev_notifier);
+	unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+	mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static atomic_t v6_worker_count __read_mostly;
+
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+		       const struct in6_addr *daddr, unsigned int srcprefs,
+		       struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+	if (!v6_ops)
+		return -EHOSTUNREACH;
+
+	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+		       const struct net_device *out)
+{
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn_nat *nat;
+	struct in6_addr src;
+	struct nf_conn *ct;
+	struct nf_nat_range2 newrange;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+			 ctinfo == IP_CT_RELATED_REPLY)));
+
+	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+		return NF_DROP;
+
+	nat = nf_ct_nat_ext_add(ct);
+	if (nat)
+		nat->masq_index = out->ifindex;
+
+	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
+	newrange.min_addr.in6	= src;
+	newrange.max_addr.in6	= src;
+	newrange.min_proto	= range->min_proto;
+	newrange.max_proto	= range->max_proto;
+
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+struct masq_dev_work {
+	struct work_struct work;
+	struct net *net;
+	struct in6_addr addr;
+	int ifindex;
+};
+
+static int inet6_cmp(struct nf_conn *ct, void *work)
+{
+	struct masq_dev_work *w = (struct masq_dev_work *)work;
+	struct nf_conntrack_tuple *tuple;
+
+	if (!device_cmp(ct, (void *)(long)w->ifindex))
+		return 0;
+
+	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+	return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+	struct masq_dev_work *w;
+
+	w = container_of(work, struct masq_dev_work, work);
+
+	nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
+
+	put_net(w->net);
+	kfree(w);
+	atomic_dec(&v6_worker_count);
+	module_put(THIS_MODULE);
+}
+
+/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
+ *
+ * Defer it to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount of ipv6
+ * addresses being deleted), we also need to limit work item queue.
+ */
+static int masq_inet6_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = ptr;
+	const struct net_device *dev;
+	struct masq_dev_work *w;
+	struct net *net;
+
+	if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+		return NOTIFY_DONE;
+
+	dev = ifa->idev->dev;
+	net = maybe_get_net(dev_net(dev));
+	if (!net)
+		return NOTIFY_DONE;
+
+	if (!try_module_get(THIS_MODULE))
+		goto err_module;
+
+	w = kmalloc(sizeof(*w), GFP_ATOMIC);
+	if (w) {
+		atomic_inc(&v6_worker_count);
+
+		INIT_WORK(&w->work, iterate_cleanup_work);
+		w->ifindex = dev->ifindex;
+		w->net = net;
+		w->addr = ifa->addr;
+		schedule_work(&w->work);
+
+		return NOTIFY_DONE;
+	}
+
+	module_put(THIS_MODULE);
+ err_module:
+	put_net(net);
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_inet6_notifier = {
+	.notifier_call	= masq_inet6_event,
+};
+
+int nf_nat_masquerade_ipv6_register_notifier(void)
+{
+	int ret = 0;
+
+	mutex_lock(&masq_mutex);
+	/* check if the notifier is already set */
+	if (++masq_refcnt > 1)
+		goto out_unlock;
+
+	ret = register_netdevice_notifier(&masq_dev_notifier);
+	if (ret)
+		goto err_dec;
+
+	ret = register_inet6addr_notifier(&masq_inet6_notifier);
+	if (ret)
+		goto err_unregister;
+
+	mutex_unlock(&masq_mutex);
+	return ret;
+
+err_unregister:
+	unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+	masq_refcnt--;
+out_unlock:
+	mutex_unlock(&masq_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
+
+void nf_nat_masquerade_ipv6_unregister_notifier(void)
+{
+	mutex_lock(&masq_mutex);
+	/* check if the notifier still has clients */
+	if (--masq_refcnt > 0)
+		goto out_unlock;
+
+	unregister_inet6addr_notifier(&masq_inet6_notifier);
+	unregister_netdevice_notifier(&masq_dev_notifier);
+out_unlock:
+	mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
+#endif
-- 
cgit v1.2.3


From 096d09067a67702f9802e5b3a0fc2ea9c22f1cf6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:20 +0100
Subject: netfilter: nat: move nlattr parse and xfrm session decode to core

None of these functions calls any external functions, moving them allows
to avoid both the indirection and a need to export these symbols.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h   |   9 --
 net/ipv4/netfilter/nf_nat_l3proto_ipv4.c |  58 -------------
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |  60 -------------
 net/netfilter/nf_nat_core.c              | 142 ++++++++++++++++++++++++++-----
 4 files changed, 123 insertions(+), 146 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index d774ca0c4c5e..100972bbd9ad 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -18,15 +18,6 @@ struct nf_nat_l3proto {
 	void	(*csum_recalc)(struct sk_buff *skb, u8 proto,
 			       void *data, __sum16 *check,
 			       int datalen, int oldlen);
-
-	void	(*decode_session)(struct sk_buff *skb,
-				  const struct nf_conn *ct,
-				  enum ip_conntrack_dir dir,
-				  unsigned long statusbit,
-				  struct flowi *fl);
-
-	int	(*nlattr_to_range)(struct nlattr *tb[],
-				   struct nf_nat_range2 *range);
 };
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 4b07eb8a9b18..36b4f9659ffa 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -28,40 +28,6 @@
 
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
 
-#ifdef CONFIG_XFRM
-static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
-				       const struct nf_conn *ct,
-				       enum ip_conntrack_dir dir,
-				       unsigned long statusbit,
-				       struct flowi *fl)
-{
-	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
-	struct flowi4 *fl4 = &fl->u.ip4;
-
-	if (ct->status & statusbit) {
-		fl4->daddr = t->dst.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_dport = t->dst.u.all;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl4->saddr = t->src.u3.ip;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl4->fl4_sport = t->src.u.all;
-	}
-}
-#endif /* CONFIG_XFRM */
-
 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
 				  unsigned int iphdroff,
 				  const struct nf_conntrack_tuple *target,
@@ -127,35 +93,11 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 					 htons(oldlen), htons(datalen), true);
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range2 *range)
-{
-	if (tb[CTA_NAT_V4_MINIP]) {
-		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
-		range->flags |= NF_NAT_RANGE_MAP_IPS;
-	}
-
-	if (tb[CTA_NAT_V4_MAXIP])
-		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
-	else
-		range->max_addr.ip = range->min_addr.ip;
-
-	return 0;
-}
-#endif
-
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 	.l3proto		= NFPROTO_IPV4,
 	.manip_pkt		= nf_nat_ipv4_manip_pkt,
 	.csum_update		= nf_nat_ipv4_csum_update,
 	.csum_recalc		= nf_nat_ipv4_csum_recalc,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_ipv4_nlattr_to_range,
-#endif
-#ifdef CONFIG_XFRM
-	.decode_session		= nf_nat_ipv4_decode_session,
-#endif
 };
 
 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 490bfd3c9162..5d667cf9bab8 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -28,40 +28,6 @@
 
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
 
-#ifdef CONFIG_XFRM
-static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
-				       const struct nf_conn *ct,
-				       enum ip_conntrack_dir dir,
-				       unsigned long statusbit,
-				       struct flowi *fl)
-{
-	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
-	struct flowi6 *fl6 = &fl->u.ip6;
-
-	if (ct->status & statusbit) {
-		fl6->daddr = t->dst.u3.in6;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl6->fl6_dport = t->dst.u.all;
-	}
-
-	statusbit ^= IPS_NAT_MASK;
-
-	if (ct->status & statusbit) {
-		fl6->saddr = t->src.u3.in6;
-		if (t->dst.protonum == IPPROTO_TCP ||
-		    t->dst.protonum == IPPROTO_UDP ||
-		    t->dst.protonum == IPPROTO_UDPLITE ||
-		    t->dst.protonum == IPPROTO_DCCP ||
-		    t->dst.protonum == IPPROTO_SCTP)
-			fl6->fl6_sport = t->src.u.all;
-	}
-}
-#endif
-
 static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
 				  unsigned int iphdroff,
 				  const struct nf_conntrack_tuple *target,
@@ -136,37 +102,11 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 					 htons(oldlen), htons(datalen), true);
 }
 
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
-				       struct nf_nat_range2 *range)
-{
-	if (tb[CTA_NAT_V6_MINIP]) {
-		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
-			   sizeof(struct in6_addr));
-		range->flags |= NF_NAT_RANGE_MAP_IPS;
-	}
-
-	if (tb[CTA_NAT_V6_MAXIP])
-		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
-			   sizeof(struct in6_addr));
-	else
-		range->max_addr = range->min_addr;
-
-	return 0;
-}
-#endif
-
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
 	.l3proto		= NFPROTO_IPV6,
 	.manip_pkt		= nf_nat_ipv6_manip_pkt,
 	.csum_update		= nf_nat_ipv6_csum_update,
 	.csum_recalc		= nf_nat_ipv6_csum_recalc,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-	.nlattr_to_range	= nf_nat_ipv6_nlattr_to_range,
-#endif
-#ifdef CONFIG_XFRM
-	.decode_session	= nf_nat_ipv6_decode_session,
-#endif
 };
 
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 35e61038ae96..0f39ae7a9f34 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -65,9 +65,74 @@ __nf_nat_l3proto_find(u8 family)
 }
 
 #ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi4 *fl4 = &fl->u.ip4;
+
+	if (ct->status & statusbit) {
+		fl4->daddr = t->dst.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl4->saddr = t->src.u3.ip;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl4->fl4_sport = t->src.u.all;
+	}
+}
+
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+				       const struct nf_conn *ct,
+				       enum ip_conntrack_dir dir,
+				       unsigned long statusbit,
+				       struct flowi *fl)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+	struct flowi6 *fl6 = &fl->u.ip6;
+
+	if (ct->status & statusbit) {
+		fl6->daddr = t->dst.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_dport = t->dst.u.all;
+	}
+
+	statusbit ^= IPS_NAT_MASK;
+
+	if (ct->status & statusbit) {
+		fl6->saddr = t->src.u3.in6;
+		if (t->dst.protonum == IPPROTO_TCP ||
+		    t->dst.protonum == IPPROTO_UDP ||
+		    t->dst.protonum == IPPROTO_UDPLITE ||
+		    t->dst.protonum == IPPROTO_DCCP ||
+		    t->dst.protonum == IPPROTO_SCTP)
+			fl6->fl6_sport = t->src.u.all;
+	}
+#endif
+}
+
 static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 {
-	const struct nf_nat_l3proto *l3proto;
 	const struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	enum ip_conntrack_dir dir;
@@ -79,17 +144,20 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
 		return;
 
 	family = nf_ct_l3num(ct);
-	l3proto = __nf_nat_l3proto_find(family);
-	if (l3proto == NULL)
-		return;
-
 	dir = CTINFO2DIR(ctinfo);
 	if (dir == IP_CT_DIR_ORIGINAL)
 		statusbit = IPS_DST_NAT;
 	else
 		statusbit = IPS_SRC_NAT;
 
-	l3proto->decode_session(skb, ct, dir, statusbit, fl);
+	switch (family) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl);
+		return;
+	}
 }
 
 int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
@@ -887,10 +955,43 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 	[CTA_NAT_PROTO]		= { .type = NLA_NESTED },
 };
 
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V4_MINIP]) {
+		range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V4_MAXIP])
+		range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+	else
+		range->max_addr.ip = range->min_addr.ip;
+
+	return 0;
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+				       struct nf_nat_range2 *range)
+{
+	if (tb[CTA_NAT_V6_MINIP]) {
+		nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+			   sizeof(struct in6_addr));
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
+	}
+
+	if (tb[CTA_NAT_V6_MAXIP])
+		nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+			   sizeof(struct in6_addr));
+	else
+		range->max_addr = range->min_addr;
+
+	return 0;
+}
+
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range2 *range,
-		    const struct nf_nat_l3proto *l3proto)
+		    const struct nf_conn *ct, struct nf_nat_range2 *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
 	int err;
@@ -901,8 +1002,19 @@ nfnetlink_parse_nat(const struct nlattr *nat,
 	if (err < 0)
 		return err;
 
-	err = l3proto->nlattr_to_range(tb, range);
-	if (err < 0)
+	switch (nf_ct_l3num(ct)) {
+	case NFPROTO_IPV4:
+		err = nf_nat_ipv4_nlattr_to_range(tb, range);
+		break;
+	case NFPROTO_IPV6:
+		err = nf_nat_ipv6_nlattr_to_range(tb, range);
+		break;
+	default:
+		err = -EPROTONOSUPPORT;
+		break;
+	}
+
+	if (err)
 		return err;
 
 	if (!tb[CTA_NAT_PROTO])
@@ -918,7 +1030,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  const struct nlattr *attr)
 {
 	struct nf_nat_range2 range;
-	const struct nf_nat_l3proto *l3proto;
 	int err;
 
 	/* Should not happen, restricted to creating new conntracks
@@ -927,18 +1038,11 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 	if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
 		return -EEXIST;
 
-	/* Make sure that L3 NAT is there by when we call nf_nat_setup_info to
-	 * attach the null binding, otherwise this may oops.
-	 */
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	if (l3proto == NULL)
-		return -EAGAIN;
-
 	/* No NAT information has been passed, allocate the null-binding */
 	if (attr == NULL)
 		return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0;
 
-	err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
+	err = nfnetlink_parse_nat(attr, ct, &range);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3


From 14cb1a6e29675b3d4ab5f59e50e1c101ae3349e4 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:22 +0100
Subject: netfilter: nat: remove nf_nat_l4proto.h

after ipv4/6 nat tracker merge, there are no external callers, so
make last function static and remove the header.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l4proto.h | 16 ----------------
 net/netfilter/nf_nat_helper.c          |  1 -
 net/netfilter/nf_nat_proto.c           | 15 ++++++---------
 3 files changed, 6 insertions(+), 26 deletions(-)
 delete mode 100644 include/net/netfilter/nf_nat_l4proto.h

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
deleted file mode 100644
index 95a4655bd1ad..000000000000
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Header for use in defining a given protocol. */
-#ifndef _NF_NAT_L4PROTO_H
-#define _NF_NAT_L4PROTO_H
-#include <net/netfilter/nf_nat.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-struct nf_nat_l3proto;
-
-/* Translate a packet to the target according to manip type.  Return on success. */
-bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
-			      const struct nf_nat_l3proto *l3proto,
-			      unsigned int iphdroff, unsigned int hdroff,
-			      const struct nf_conntrack_tuple *tuple,
-			      enum nf_nat_manip_type maniptype);
-#endif /*_NF_NAT_L4PROTO_H*/
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 38793b95d9bc..12dea976d959 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -23,7 +23,6 @@
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 9c4db18741ef..ecb988ed4d69 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -22,7 +22,6 @@
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
 
 #include <linux/ipv6.h>
 #include <linux/netfilter_ipv6.h>
@@ -34,6 +33,7 @@
 
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
 
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -320,7 +320,7 @@ gre_manip_pkt(struct sk_buff *skb,
 	return true;
 }
 
-bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
+static bool l4proto_manip_pkt(struct sk_buff *skb,
 			      const struct nf_nat_l3proto *l3proto,
 			      unsigned int iphdroff, unsigned int hdroff,
 			      const struct nf_conntrack_tuple *tuple,
@@ -356,7 +356,6 @@ bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
 	/* If we don't know protocol -- no error, pass it unmodified. */
 	return true;
 }
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt);
 
 static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
 				  unsigned int iphdroff,
@@ -372,8 +371,8 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
 	iph = (void *)skb->data + iphdroff;
 	hdroff = iphdroff + iph->ihl * 4;
 
-	if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
-				      hdroff, target, maniptype))
+	if (!l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
+			       hdroff, target, maniptype))
 		return false;
 	iph = (void *)skb->data + iphdroff;
 
@@ -409,8 +408,8 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
 		goto manip_addr;
 
 	if ((frag_off & htons(~0x7)) == 0 &&
-	    !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
-				      target, maniptype))
+	    !l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+			       target, maniptype))
 		return false;
 
 	/* must reload, offset might have changed */
@@ -758,8 +757,6 @@ void nf_nat_l3proto_exit(void)
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
-
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
 	.l3proto		= NFPROTO_IPV6,
 	.manip_pkt		= nf_nat_ipv6_manip_pkt,
-- 
cgit v1.2.3


From 2e666b229d97a9cdbc9fe571737eb297e7232098 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:23 +0100
Subject: netfilter: nat: remove l3 manip_pkt hook

We can now use direct calls.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h |  9 ++++-----
 net/netfilter/nf_nat_core.c            | 17 -----------------
 net/netfilter/nf_nat_proto.c           | 28 ++++++++++++++++++++++++++--
 3 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index 100972bbd9ad..62ef15eb7594 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -5,11 +5,6 @@
 struct nf_nat_l3proto {
 	u8	l3proto;
 
-	bool	(*manip_pkt)(struct sk_buff *skb,
-			     unsigned int iphdroff,
-			     const struct nf_conntrack_tuple *target,
-			     enum nf_nat_manip_type maniptype);
-
 	void	(*csum_update)(struct sk_buff *skb, unsigned int iphdroff,
 			       __sum16 *check,
 			       const struct nf_conntrack_tuple *t,
@@ -20,6 +15,10 @@ struct nf_nat_l3proto {
 			       int datalen, int oldlen);
 };
 
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+			      enum nf_nat_manip_type mtype,
+			      enum ip_conntrack_dir dir);
+
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
 void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *);
 const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 0c548ff215b2..8c5c29189383 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -699,23 +699,6 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 }
 EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
 
-static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
-				     enum nf_nat_manip_type mtype,
-				     enum ip_conntrack_dir dir)
-{
-	const struct nf_nat_l3proto *l3proto;
-	struct nf_conntrack_tuple target;
-
-	/* We are aiming to look like inverse of other direction. */
-	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
-
-	l3proto = __nf_nat_l3proto_find(target.src.l3num);
-	if (!l3proto->manip_pkt(skb, 0, &target, mtype))
-		return NF_DROP;
-
-	return NF_ACCEPT;
-}
-
 /* Do packet manipulations according to nf_nat_setup_info. */
 unsigned int nf_nat_packet(struct nf_conn *ct,
 			   enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index ecb988ed4d69..5a6496dbf1bf 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -425,6 +425,32 @@ manip_addr:
 	return true;
 }
 
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+			      enum nf_nat_manip_type mtype,
+			      enum ip_conntrack_dir dir)
+{
+	struct nf_conntrack_tuple target;
+
+	/* We are aiming to look like inverse of other direction. */
+	nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+
+	switch (target.src.l3num) {
+	case NFPROTO_IPV6:
+		if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	case NFPROTO_IPV4:
+		if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
+			return NF_ACCEPT;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	return NF_DROP;
+}
+
 static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
 				    unsigned int iphdroff, __sum16 *check,
 				    const struct nf_conntrack_tuple *t,
@@ -506,7 +532,6 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 	.l3proto		= NFPROTO_IPV4,
-	.manip_pkt		= nf_nat_ipv4_manip_pkt,
 	.csum_update		= nf_nat_ipv4_csum_update,
 	.csum_recalc		= nf_nat_ipv4_csum_recalc,
 };
@@ -759,7 +784,6 @@ void nf_nat_l3proto_exit(void)
 #if IS_ENABLED(CONFIG_IPV6)
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
 	.l3proto		= NFPROTO_IPV6,
-	.manip_pkt		= nf_nat_ipv6_manip_pkt,
 	.csum_update		= nf_nat_ipv6_csum_update,
 	.csum_recalc		= nf_nat_ipv6_csum_recalc,
 };
-- 
cgit v1.2.3


From 03fe5efc4c9c8c7157092d0cbb5d1bbea629ce48 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:24 +0100
Subject: netfilter: nat: remove csum_update hook

We can now use direct calls.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h |  5 ---
 net/netfilter/nf_nat_proto.c           | 69 ++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index 62ef15eb7594..c1123030c94f 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -5,11 +5,6 @@
 struct nf_nat_l3proto {
 	u8	l3proto;
 
-	void	(*csum_update)(struct sk_buff *skb, unsigned int iphdroff,
-			       __sum16 *check,
-			       const struct nf_conntrack_tuple *t,
-			       enum nf_nat_manip_type maniptype);
-
 	void	(*csum_recalc)(struct sk_buff *skb, u8 proto,
 			       void *data, __sum16 *check,
 			       int datalen, int oldlen);
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 5a6496dbf1bf..8a306a77914d 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -40,9 +40,13 @@ static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
 #endif
 
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype);
+
 static void
 __udp_manip_pkt(struct sk_buff *skb,
-	        const struct nf_nat_l3proto *l3proto,
 	        unsigned int iphdroff, struct udphdr *hdr,
 	        const struct nf_conntrack_tuple *tuple,
 	        enum nf_nat_manip_type maniptype, bool do_csum)
@@ -59,8 +63,7 @@ __udp_manip_pkt(struct sk_buff *skb,
 		portptr = &hdr->dest;
 	}
 	if (do_csum) {
-		l3proto->csum_update(skb, iphdroff, &hdr->check,
-				     tuple, maniptype);
+		nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
 		inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
 					 false);
 		if (!hdr->check)
@@ -70,7 +73,6 @@ __udp_manip_pkt(struct sk_buff *skb,
 }
 
 static bool udp_manip_pkt(struct sk_buff *skb,
-			  const struct nf_nat_l3proto *l3proto,
 			  unsigned int iphdroff, unsigned int hdroff,
 			  const struct nf_conntrack_tuple *tuple,
 			  enum nf_nat_manip_type maniptype)
@@ -84,12 +86,11 @@ static bool udp_manip_pkt(struct sk_buff *skb,
 	hdr = (struct udphdr *)(skb->data + hdroff);
 	do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
 
-	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
 	return true;
 }
 
 static bool udplite_manip_pkt(struct sk_buff *skb,
-			      const struct nf_nat_l3proto *l3proto,
 			      unsigned int iphdroff, unsigned int hdroff,
 			      const struct nf_conntrack_tuple *tuple,
 			      enum nf_nat_manip_type maniptype)
@@ -101,14 +102,13 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
 		return false;
 
 	hdr = (struct udphdr *)(skb->data + hdroff);
-	__udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
+	__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
 #endif
 	return true;
 }
 
 static bool
 sctp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
 	       unsigned int iphdroff, unsigned int hdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
@@ -151,7 +151,6 @@ sctp_manip_pkt(struct sk_buff *skb,
 
 static bool
 tcp_manip_pkt(struct sk_buff *skb,
-	      const struct nf_nat_l3proto *l3proto,
 	      unsigned int iphdroff, unsigned int hdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
@@ -187,14 +186,13 @@ tcp_manip_pkt(struct sk_buff *skb,
 	if (hdrsize < sizeof(*hdr))
 		return true;
 
-	l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+	nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
 	inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
 	return true;
 }
 
 static bool
 dccp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
 	       unsigned int iphdroff, unsigned int hdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
@@ -226,8 +224,7 @@ dccp_manip_pkt(struct sk_buff *skb,
 	if (hdrsize < sizeof(*hdr))
 		return true;
 
-	l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
-			     tuple, maniptype);
+	nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
 	inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
 				 false);
 #endif
@@ -236,7 +233,6 @@ dccp_manip_pkt(struct sk_buff *skb,
 
 static bool
 icmp_manip_pkt(struct sk_buff *skb,
-	       const struct nf_nat_l3proto *l3proto,
 	       unsigned int iphdroff, unsigned int hdroff,
 	       const struct nf_conntrack_tuple *tuple,
 	       enum nf_nat_manip_type maniptype)
@@ -255,7 +251,6 @@ icmp_manip_pkt(struct sk_buff *skb,
 
 static bool
 icmpv6_manip_pkt(struct sk_buff *skb,
-		 const struct nf_nat_l3proto *l3proto,
 		 unsigned int iphdroff, unsigned int hdroff,
 		 const struct nf_conntrack_tuple *tuple,
 		 enum nf_nat_manip_type maniptype)
@@ -266,8 +261,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
 		return false;
 
 	hdr = (struct icmp6hdr *)(skb->data + hdroff);
-	l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
-			     tuple, maniptype);
+	nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
 	if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
 	    hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
 		inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
@@ -281,7 +275,6 @@ icmpv6_manip_pkt(struct sk_buff *skb,
 /* manipulate a GRE packet according to maniptype */
 static bool
 gre_manip_pkt(struct sk_buff *skb,
-	      const struct nf_nat_l3proto *l3proto,
 	      unsigned int iphdroff, unsigned int hdroff,
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
@@ -321,35 +314,34 @@ gre_manip_pkt(struct sk_buff *skb,
 }
 
 static bool l4proto_manip_pkt(struct sk_buff *skb,
-			      const struct nf_nat_l3proto *l3proto,
 			      unsigned int iphdroff, unsigned int hdroff,
 			      const struct nf_conntrack_tuple *tuple,
 			      enum nf_nat_manip_type maniptype)
 {
 	switch (tuple->dst.protonum) {
 	case IPPROTO_TCP:
-		return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return tcp_manip_pkt(skb, iphdroff, hdroff,
 				     tuple, maniptype);
 	case IPPROTO_UDP:
-		return udp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return udp_manip_pkt(skb, iphdroff, hdroff,
 				     tuple, maniptype);
 	case IPPROTO_UDPLITE:
-		return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return udplite_manip_pkt(skb, iphdroff, hdroff,
 					 tuple, maniptype);
 	case IPPROTO_SCTP:
-		return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return sctp_manip_pkt(skb, iphdroff, hdroff,
 				      tuple, maniptype);
 	case IPPROTO_ICMP:
-		return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return icmp_manip_pkt(skb, iphdroff, hdroff,
 				      tuple, maniptype);
 	case IPPROTO_ICMPV6:
-		return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return icmpv6_manip_pkt(skb, iphdroff, hdroff,
 					tuple, maniptype);
 	case IPPROTO_DCCP:
-		return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return dccp_manip_pkt(skb, iphdroff, hdroff,
 				      tuple, maniptype);
 	case IPPROTO_GRE:
-		return gre_manip_pkt(skb, l3proto, iphdroff, hdroff,
+		return gre_manip_pkt(skb, iphdroff, hdroff,
 				     tuple, maniptype);
 	}
 
@@ -371,8 +363,7 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
 	iph = (void *)skb->data + iphdroff;
 	hdroff = iphdroff + iph->ihl * 4;
 
-	if (!l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
-			       hdroff, target, maniptype))
+	if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
 		return false;
 	iph = (void *)skb->data + iphdroff;
 
@@ -408,8 +399,7 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
 		goto manip_addr;
 
 	if ((frag_off & htons(~0x7)) == 0 &&
-	    !l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
-			       target, maniptype))
+	    !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
 		return false;
 
 	/* must reload, offset might have changed */
@@ -490,6 +480,21 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
 #endif
 }
 
+static void nf_csum_update(struct sk_buff *skb,
+			   unsigned int iphdroff, __sum16 *check,
+			   const struct nf_conntrack_tuple *t,
+			   enum nf_nat_manip_type maniptype)
+{
+	switch (t->src.l3num) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
+		return;
+	}
+}
+
 static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
 				    u8 proto, void *data, __sum16 *check,
 				    int datalen, int oldlen)
@@ -532,7 +537,6 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 	.l3proto		= NFPROTO_IPV4,
-	.csum_update		= nf_nat_ipv4_csum_update,
 	.csum_recalc		= nf_nat_ipv4_csum_recalc,
 };
 
@@ -784,7 +788,6 @@ void nf_nat_l3proto_exit(void)
 #if IS_ENABLED(CONFIG_IPV6)
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
 	.l3proto		= NFPROTO_IPV6,
-	.csum_update		= nf_nat_ipv6_csum_update,
 	.csum_recalc		= nf_nat_ipv6_csum_recalc,
 };
 
-- 
cgit v1.2.3


From dac3fe72596f91011afc649a9d181b18466dd895 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:25 +0100
Subject: netfilter: nat: remove csum_recalc hook

We can now use direct calls.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h |  7 +++----
 net/netfilter/nf_nat_helper.c          | 12 ++++--------
 net/netfilter/nf_nat_proto.c           | 22 ++++++++++++++++++++--
 3 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index c1123030c94f..8ee0b5b629c7 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -4,15 +4,14 @@
 
 struct nf_nat_l3proto {
 	u8	l3proto;
-
-	void	(*csum_recalc)(struct sk_buff *skb, u8 proto,
-			       void *data, __sum16 *check,
-			       int datalen, int oldlen);
 };
 
 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
 			      enum nf_nat_manip_type mtype,
 			      enum ip_conntrack_dir dir);
+void nf_nat_csum_recalc(struct sk_buff *skb,
+			u8 nfproto, u8 proto, void *data, __sum16 *check,
+			int datalen, int oldlen);
 
 int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
 void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *);
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 12dea976d959..0a8dd5f368cd 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -97,7 +97,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 				const char *rep_buffer,
 				unsigned int rep_len, bool adjust)
 {
-	const struct nf_nat_l3proto *l3proto;
 	struct tcphdr *tcph;
 	int oldlen, datalen;
 
@@ -117,9 +116,8 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 
 	datalen = skb->len - protoff;
 
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
-			     datalen, oldlen);
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+			   tcph, &tcph->check, datalen, oldlen);
 
 	if (adjust && rep_len != match_len)
 		nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
@@ -149,7 +147,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
 			 const char *rep_buffer,
 			 unsigned int rep_len)
 {
-	const struct nf_nat_l3proto *l3proto;
 	struct udphdr *udph;
 	int datalen, oldlen;
 
@@ -175,9 +172,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
 	if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
 		return true;
 
-	l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
-	l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
-			     datalen, oldlen);
+	nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+			   udph, &udph->check, datalen, oldlen);
 
 	return true;
 }
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 8a306a77914d..8284ed1b3173 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -535,9 +535,28 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
 }
 #endif
 
+void nf_nat_csum_recalc(struct sk_buff *skb,
+			u8 nfproto, u8 proto, void *data, __sum16 *check,
+			int datalen, int oldlen)
+{
+	switch (nfproto) {
+	case NFPROTO_IPV4:
+		nf_nat_ipv4_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#if IS_ENABLED(CONFIG_IPV6)
+	case NFPROTO_IPV6:
+		nf_nat_ipv6_csum_recalc(skb, proto, data, check,
+					datalen, oldlen);
+		return;
+#endif
+	}
+
+	WARN_ON_ONCE(1);
+}
+
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
 	.l3proto		= NFPROTO_IPV4,
-	.csum_recalc		= nf_nat_ipv4_csum_recalc,
 };
 
 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
@@ -788,7 +807,6 @@ void nf_nat_l3proto_exit(void)
 #if IS_ENABLED(CONFIG_IPV6)
 static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
 	.l3proto		= NFPROTO_IPV6,
-	.csum_recalc		= nf_nat_ipv6_csum_recalc,
 };
 
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
-- 
cgit v1.2.3


From d6c4c8ffb5e54b0516742f3386ea9e329e019455 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:26 +0100
Subject: netfilter: nat: remove l3proto struct

All l3proto function pointers have been removed.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat_l3proto.h |  8 -----
 net/netfilter/nf_nat_core.c            | 54 ----------------------------------
 net/netfilter/nf_nat_proto.c           | 38 ------------------------
 3 files changed, 100 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index 8ee0b5b629c7..9a68f2b53a9e 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -2,10 +2,6 @@
 #ifndef _NF_NAT_L3PROTO_H
 #define _NF_NAT_L3PROTO_H
 
-struct nf_nat_l3proto {
-	u8	l3proto;
-};
-
 unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
 			      enum nf_nat_manip_type mtype,
 			      enum ip_conntrack_dir dir);
@@ -13,10 +9,6 @@ void nf_nat_csum_recalc(struct sk_buff *skb,
 			u8 nfproto, u8 proto, void *data, __sum16 *check,
 			int datalen, int oldlen);
 
-int nf_nat_l3proto_register(const struct nf_nat_l3proto *);
-void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *);
-const struct nf_nat_l3proto *__nf_nat_l3proto_find(u8 l3proto);
-
 int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
 				  unsigned int hooknum);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 8c5c29189383..d9b70e560007 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -35,8 +35,6 @@
 static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
 
 static DEFINE_MUTEX(nf_nat_proto_mutex);
-static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
-						__read_mostly;
 static unsigned int nat_net_id __read_mostly;
 
 static struct hlist_head *nf_nat_bysource __read_mostly;
@@ -58,12 +56,6 @@ struct nat_net {
 	struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO];
 };
 
-inline const struct nf_nat_l3proto *
-__nf_nat_l3proto_find(u8 family)
-{
-	return rcu_dereference(nf_nat_l3protos[family]);
-}
-
 #ifdef CONFIG_XFRM
 static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
 				       const struct nf_conn *ct,
@@ -849,33 +841,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
 	return 0;
 }
 
-static void nf_nat_l3proto_clean(u8 l3proto)
-{
-	struct nf_nat_proto_clean clean = {
-		.l3proto = l3proto,
-	};
-
-	nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
-}
-
-int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
-{
-	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
-
-void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
-{
-	mutex_lock(&nf_nat_proto_mutex);
-	RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
-	mutex_unlock(&nf_nat_proto_mutex);
-	synchronize_rcu();
-
-	nf_nat_l3proto_clean(l3proto->l3proto);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
-
 /* No one using conntrack by the time this called. */
 static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
 {
@@ -1122,7 +1087,6 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
 	mutex_unlock(&nf_nat_proto_mutex);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(nf_nat_register_fn);
 
 void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
 		          unsigned int ops_count)
@@ -1171,7 +1135,6 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
 unlock:
 	mutex_unlock(&nf_nat_proto_mutex);
 }
-EXPORT_SYMBOL_GPL(nf_nat_unregister_fn);
 
 static struct pernet_operations nat_net_ops = {
 	.id = &nat_net_id,
@@ -1186,8 +1149,6 @@ static struct nf_nat_hook nat_hook = {
 	.manip_pkt		= nf_nat_manip_pkt,
 };
 
-int nf_nat_l3proto_init(void);
-void nf_nat_l3proto_exit(void);
 static int __init nf_nat_init(void)
 {
 	int ret, i;
@@ -1222,19 +1183,6 @@ static int __init nf_nat_init(void)
 	WARN_ON(nf_nat_hook != NULL);
 	RCU_INIT_POINTER(nf_nat_hook, &nat_hook);
 
-	ret = nf_nat_l3proto_init();
-	if (ret) {
-		nf_ct_extend_unregister(&nat_extend);
-		nf_ct_helper_expectfn_unregister(&follow_master_nat);
-		RCU_INIT_POINTER(nf_nat_hook, NULL);
-
-		synchronize_net();
-		kvfree(nf_nat_bysource);
-		unregister_pernet_subsys(&nat_net_ops);
-
-		return ret;
-	}
-
 	return 0;
 }
 
@@ -1244,8 +1192,6 @@ static void __exit nf_nat_cleanup(void)
 
 	nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
 
-	nf_nat_l3proto_exit();
-
 	nf_ct_extend_unregister(&nat_extend);
 	nf_ct_helper_expectfn_unregister(&follow_master_nat);
 	RCU_INIT_POINTER(nf_nat_hook, NULL);
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 8284ed1b3173..f5c60d3b9d38 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -35,11 +35,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
-#if IS_ENABLED(CONFIG_IPV6)
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
-#endif
-
 static void nf_csum_update(struct sk_buff *skb,
 			   unsigned int iphdroff, __sum16 *check,
 			   const struct nf_conntrack_tuple *t,
@@ -555,10 +550,6 @@ void nf_nat_csum_recalc(struct sk_buff *skb,
 	WARN_ON_ONCE(1);
 }
 
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
-	.l3proto		= NFPROTO_IPV4,
-};
-
 int nf_nat_icmp_reply_translation(struct sk_buff *skb,
 				  struct nf_conn *ct,
 				  enum ip_conntrack_info ctinfo,
@@ -779,36 +770,7 @@ void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops
 }
 EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
 
-int nf_nat_l3proto_init(void)
-{
-	int ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
-
 #if IS_ENABLED(CONFIG_IPV6)
-	if (ret)
-		return ret;
-
-	ret = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
-	if (ret == 0)
-		return ret;
-
-	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
-#endif
-	return ret;
-}
-
-void nf_nat_l3proto_exit(void)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
-#endif
-	nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
-	.l3proto		= NFPROTO_IPV6,
-};
-
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
 				    struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
-- 
cgit v1.2.3


From d2c5c103b1337f590b7edf1509a6e294bdf22402 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 19 Feb 2019 17:38:27 +0100
Subject: netfilter: nat: remove nf_nat_l3proto.h and nf_nat_core.h

The l3proto name is gone, its header file is the last trace.
While at it, also remove nf_nat_core.h, its very small and all users
include nf_nat.h too.

before:
   text    data     bss     dec     hex filename
  22948    1612    4136   28696    7018 nf_nat.ko

after removal of l3proto register/unregister functions:
   text	   data	    bss	    dec	    hex	filename
  22196	   1516	   4136	  27848	   6cc8 nf_nat.ko

checkpatch complains about overly long lines, but line breaks
do not make things more readable and the line length gets smaller
here, not larger.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h          | 39 +++++++++++++++++++++++++++++++++
 include/net/netfilter/nf_nat_core.h     | 29 ------------------------
 include/net/netfilter/nf_nat_l3proto.h  | 26 ----------------------
 net/ipv4/netfilter/iptable_nat.c        |  8 +++----
 net/ipv4/netfilter/nft_chain_nat_ipv4.c |  6 ++---
 net/ipv6/netfilter/ip6table_nat.c       |  8 +++----
 net/ipv6/netfilter/nft_chain_nat_ipv6.c |  6 ++---
 net/netfilter/nf_conntrack_core.c       |  1 -
 net/netfilter/nf_conntrack_netlink.c    |  2 +-
 net/netfilter/nf_nat_core.c             |  2 --
 net/netfilter/nf_nat_helper.c           |  2 --
 net/netfilter/nf_nat_proto.c            | 18 +++++++--------
 net/netfilter/nft_nat.c                 |  2 --
 net/netfilter/xt_nat.c                  |  2 +-
 net/openvswitch/conntrack.c             |  4 +---
 15 files changed, 60 insertions(+), 95 deletions(-)
 delete mode 100644 include/net/netfilter/nf_nat_core.h
 delete mode 100644 include/net/netfilter/nf_nat_l3proto.h

(limited to 'include')

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index e53b4f9b8b44..cf332c4e0b32 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -73,4 +73,43 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
 		       const struct nf_hook_ops *nat_ops, unsigned int ops_count);
 void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
 			  unsigned int ops_count);
+
+unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+			   unsigned int hooknum, struct sk_buff *skb);
+
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+			      enum nf_nat_manip_type mtype,
+			      enum ip_conntrack_dir dir);
+void nf_nat_csum_recalc(struct sk_buff *skb,
+			u8 nfproto, u8 proto, void *data, __sum16 *check,
+			int datalen, int oldlen);
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
+				  enum ip_conntrack_info ctinfo,
+				  unsigned int hooknum);
+
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
+				    enum ip_conntrack_info ctinfo,
+				    unsigned int hooknum, unsigned int hdrlen);
+
+int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+
+int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops);
+void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
+
+unsigned int
+nf_nat_inet_fn(void *priv, struct sk_buff *skb,
+	       const struct nf_hook_state *state);
+
+int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
+
+static inline int nf_nat_initialized(struct nf_conn *ct,
+				     enum nf_nat_manip_type manip)
+{
+	if (manip == NF_NAT_MANIP_SRC)
+		return ct->status & IPS_SRC_NAT_DONE;
+	else
+		return ct->status & IPS_DST_NAT_DONE;
+}
 #endif
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
deleted file mode 100644
index dc7cd0440229..000000000000
--- a/include/net/netfilter/nf_nat_core.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_CORE_H
-#define _NF_NAT_CORE_H
-#include <linux/list.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-
-/* This header used to share core functionality between the standalone
-   NAT module, and the compatibility layer's use of NAT for masquerading. */
-
-unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-			   unsigned int hooknum, struct sk_buff *skb);
-
-unsigned int
-nf_nat_inet_fn(void *priv, struct sk_buff *skb,
-	       const struct nf_hook_state *state);
-
-int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family);
-
-static inline int nf_nat_initialized(struct nf_conn *ct,
-				     enum nf_nat_manip_type manip)
-{
-	if (manip == NF_NAT_MANIP_SRC)
-		return ct->status & IPS_SRC_NAT_DONE;
-	else
-		return ct->status & IPS_DST_NAT_DONE;
-}
-
-#endif /* _NF_NAT_CORE_H */
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
deleted file mode 100644
index 9a68f2b53a9e..000000000000
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NF_NAT_L3PROTO_H
-#define _NF_NAT_L3PROTO_H
-
-unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
-			      enum nf_nat_manip_type mtype,
-			      enum ip_conntrack_dir dir);
-void nf_nat_csum_recalc(struct sk_buff *skb,
-			u8 nfproto, u8 proto, void *data, __sum16 *check,
-			int datalen, int oldlen);
-
-int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
-				  enum ip_conntrack_info ctinfo,
-				  unsigned int hooknum);
-
-int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
-				    enum ip_conntrack_info ctinfo,
-				    unsigned int hooknum, unsigned int hdrlen);
-
-int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops);
-void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
-
-int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops);
-void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
-
-#endif /* _NF_NAT_L3PROTO_H */
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index a317445448bf..007da0882412 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -15,8 +15,6 @@
 #include <net/ip.h>
 
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 
 static int __net_init iptable_nat_table_init(struct net *net);
 
@@ -70,10 +68,10 @@ static int ipt_nat_register_lookups(struct net *net)
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++) {
-		ret = nf_nat_l3proto_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
+		ret = nf_nat_ipv4_register_fn(net, &nf_nat_ipv4_ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
+				nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[--i]);
 
 			return ret;
 		}
@@ -87,7 +85,7 @@ static void ipt_nat_unregister_lookups(struct net *net)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv4_ops); i++)
-		nf_nat_l3proto_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
+		nf_nat_ipv4_unregister_fn(net, &nf_nat_ipv4_ops[i]);
 }
 
 static int __net_init iptable_nat_table_init(struct net *net)
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index a3c4ea303e3e..0d1ad5901aff 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -20,10 +20,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_ipv4.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ip.h>
 
 static unsigned int nft_nat_do_chain(void *priv,
@@ -40,12 +38,12 @@ static unsigned int nft_nat_do_chain(void *priv,
 
 static int nft_nat_ipv4_reg(struct net *net, const struct nf_hook_ops *ops)
 {
-	return nf_nat_l3proto_ipv4_register_fn(net, ops);
+	return nf_nat_ipv4_register_fn(net, ops);
 }
 
 static void nft_nat_ipv4_unreg(struct net *net, const struct nf_hook_ops *ops)
 {
-	nf_nat_l3proto_ipv4_unregister_fn(net, ops);
+	nf_nat_ipv4_unregister_fn(net, ops);
 }
 
 static const struct nft_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 67ba70ab9f5c..3e1fab9d7503 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -17,8 +17,6 @@
 #include <net/ipv6.h>
 
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 
 static int __net_init ip6table_nat_table_init(struct net *net);
 
@@ -72,10 +70,10 @@ static int ip6t_nat_register_lookups(struct net *net)
 	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) {
-		ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
+		ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
 		if (ret) {
 			while (i)
-				nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
+				nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
 
 			return ret;
 		}
@@ -89,7 +87,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++)
-		nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
+		nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
 }
 
 static int __net_init ip6table_nat_table_init(struct net *net)
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 8a081ad7d5db..e66bfd0b3d15 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -18,10 +18,8 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_ipv6.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ipv6.h>
 
 static unsigned int nft_nat_do_chain(void *priv,
@@ -38,12 +36,12 @@ static unsigned int nft_nat_do_chain(void *priv,
 
 static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops)
 {
-	return nf_nat_l3proto_ipv6_register_fn(net, ops);
+	return nf_nat_ipv6_register_fn(net, ops);
 }
 
 static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops)
 {
-	nf_nat_l3proto_ipv6_unregister_fn(net, ops);
+	nf_nat_ipv6_unregister_fn(net, ops);
 }
 
 static const struct nft_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e139c256e269..4a9107e4a69c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -51,7 +51,6 @@
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
 #include <net/ip.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 349b42a65c8a..66c596d287a5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -46,7 +46,7 @@
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 #ifdef CONFIG_NF_NAT_NEEDED
-#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
 #endif
 
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d9b70e560007..11acd7367623 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -22,8 +22,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 0a8dd5f368cd..ccc06f7539d7 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -22,8 +22,6 @@
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
 
 /* Frobs data inside this packet, which is linear. */
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index f5c60d3b9d38..62743da3004f 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -20,8 +20,6 @@
 
 #include <linux/netfilter.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 
 #include <linux/ipv6.h>
 #include <linux/netfilter_ipv6.h>
@@ -758,17 +756,17 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	},
 };
 
-int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
+int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
 {
 	return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
 }
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_register_fn);
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
 
-void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
 {
 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
 }
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
 
 #if IS_ENABLED(CONFIG_IPV6)
 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
@@ -1010,16 +1008,16 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	},
 };
 
-int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
+int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
 {
 	return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
 				  ARRAY_SIZE(nf_nat_ipv6_ops));
 }
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
 
-void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
 {
 	nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
 }
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
 #endif /* CONFIG_IPV6 */
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index c15807d10b91..e93aed9bda88 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -21,9 +21,7 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat_l3proto.h>
 #include <net/ip.h>
 
 struct nft_nat {
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index ac91170fc8c8..61eabd171186 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -14,7 +14,7 @@
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat.h>
 
 static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
 {
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index def4d28fcbc3..1b6896896fff 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -29,9 +29,7 @@
 #include <net/ipv6_frag.h>
 
 #ifdef CONFIG_NF_NAT_NEEDED
-#include <linux/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat.h>
 #endif
 
 #include "datapath.h"
-- 
cgit v1.2.3


From cc16921351d8ba1eabc1923fd61d4f369fadde56 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 Feb 2019 15:38:29 +0100
Subject: netfilter: conntrack: avoid same-timeout update

No need to dirty a cache line if timeout is unchanged.
Also, WARN() is useless here: we crash on 'skb->len' access
if skb is NULL.

Last, ct->timeout is u32, not 'unsigned long' so adapt the
function prototype accordingly.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 10 +++++-----
 net/netfilter/nf_conntrack_core.c    |  9 ++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b5aac5ae5129..5ee7b30b4917 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -190,23 +190,23 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 
 void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			  const struct sk_buff *skb,
-			  unsigned long extra_jiffies, int do_acct);
+			  u32 extra_jiffies, bool do_acct);
 
 /* Refresh conntrack for this many jiffies and do accounting */
 static inline void nf_ct_refresh_acct(struct nf_conn *ct,
 				      enum ip_conntrack_info ctinfo,
 				      const struct sk_buff *skb,
-				      unsigned long extra_jiffies)
+				      u32 extra_jiffies)
 {
-	__nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1);
+	__nf_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, true);
 }
 
 /* Refresh conntrack for this many jiffies */
 static inline void nf_ct_refresh(struct nf_conn *ct,
 				 const struct sk_buff *skb,
-				 unsigned long extra_jiffies)
+				 u32 extra_jiffies)
 {
-	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
+	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, false);
 }
 
 /* kill conntrack and do accounting */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4a9107e4a69c..ff9313c579b4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1751,11 +1751,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
 void __nf_ct_refresh_acct(struct nf_conn *ct,
 			  enum ip_conntrack_info ctinfo,
 			  const struct sk_buff *skb,
-			  unsigned long extra_jiffies,
-			  int do_acct)
+			  u32 extra_jiffies,
+			  bool do_acct)
 {
-	WARN_ON(!skb);
-
 	/* Only update if this is not a fixed timeout */
 	if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
 		goto acct;
@@ -1764,7 +1762,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 	if (nf_ct_is_confirmed(ct))
 		extra_jiffies += nfct_time_stamp;
 
-	ct->timeout = extra_jiffies;
+	if (ct->timeout != extra_jiffies)
+		ct->timeout = extra_jiffies;
 acct:
 	if (do_acct)
 		nf_ct_acct_update(ct, ctinfo, skb->len);
-- 
cgit v1.2.3


From 4d633062c1c0794a6b3836b7b55afba4599736e8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 27 Feb 2019 20:40:10 +0800
Subject: block: introduce bvec_nth_page()

Single-page bvec can often be seen in small BS workloads, so
introduce bvec_nth_page() for avoiding to call nth_page() unnecessarily,
which looks not cheap.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c    |  2 +-
 include/linux/bvec.h | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 066b66430523..c7e8a8273460 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -483,7 +483,7 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
 
 		offset = (total + bvec->bv_offset) % PAGE_SIZE;
 		idx = (total + bvec->bv_offset) / PAGE_SIZE;
-		pg = nth_page(bvec->bv_page, idx);
+		pg = bvec_nth_page(bvec->bv_page, idx);
 
 		sg_set_page(*sg, pg, seg_size, offset);
 
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 30a57b68d017..4376f683c08a 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -51,6 +51,11 @@ struct bvec_iter_all {
 	unsigned	done;
 };
 
+static inline struct page *bvec_nth_page(struct page *page, int idx)
+{
+	return idx == 0 ? page : nth_page(page, idx);
+}
+
 /*
  * various member access, note that bio_data should of course not be used
  * on highmem page vectors
@@ -87,8 +92,8 @@ struct bvec_iter_all {
 	      PAGE_SIZE - bvec_iter_offset((bvec), (iter)))
 
 #define bvec_iter_page(bvec, iter)				\
-	nth_page(mp_bvec_iter_page((bvec), (iter)),		\
-		 mp_bvec_iter_page_idx((bvec), (iter)))
+	bvec_nth_page(mp_bvec_iter_page((bvec), (iter)),		\
+		      mp_bvec_iter_page_idx((bvec), (iter)))
 
 #define bvec_iter_bvec(bvec, iter)				\
 ((struct bio_vec) {						\
@@ -171,7 +176,7 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec,
 	unsigned total = bvec->bv_offset + bvec->bv_len;
 	unsigned last_page = (total - 1) / PAGE_SIZE;
 
-	seg->bv_page = nth_page(bvec->bv_page, last_page);
+	seg->bv_page = bvec_nth_page(bvec->bv_page, last_page);
 
 	/* the whole segment is inside the last page */
 	if (bvec->bv_offset >= last_page * PAGE_SIZE) {
-- 
cgit v1.2.3


From b9cef50921eb018917e3c60b0c7f4ae1e113ad03 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Tue, 26 Feb 2019 15:56:42 +0100
Subject: scsi: kill command serial number

No users left, kill it.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c      | 16 ----------------
 drivers/scsi/scsi_lib.c  |  1 -
 include/scsi/scsi_cmnd.h | 10 ----------
 include/scsi/scsi_host.h |  8 --------
 4 files changed, 35 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7675ff0ca2ea..99a7b9f520ae 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -174,22 +174,6 @@ void scsi_log_completion(struct scsi_cmnd *cmd, int disposition)
 }
 #endif
 
-/**
- * scsi_cmd_get_serial - Assign a serial number to a command
- * @host: the scsi host
- * @cmd: command to assign serial number to
- *
- * Description: a serial number identifies a request for error recovery
- * and debugging purposes.  Protected by the Host_Lock of host.
- */
-void scsi_cmd_get_serial(struct Scsi_Host *host, struct scsi_cmnd *cmd)
-{
-	cmd->serial_number = host->cmd_serial_number++;
-	if (cmd->serial_number == 0) 
-		cmd->serial_number = host->cmd_serial_number++;
-}
-EXPORT_SYMBOL(scsi_cmd_get_serial);
-
 /**
  * scsi_finish_command - cleanup and pass command back to upper layer
  * @cmd: the command
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index c77cb0d31dbc..95466a5bf6d6 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -316,7 +316,6 @@ EXPORT_SYMBOL(__scsi_execute);
  */
 static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
 {
-	cmd->serial_number = 0;
 	scsi_set_resid(cmd, 0);
 	memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
 	if (cmd->cmd_len == 0)
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 6c7793c2ad01..76ed5e4acd38 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -74,16 +74,6 @@ struct scsi_cmnd {
 
 	int eh_eflags;		/* Used by error handlr */
 
-	/*
-	 * A SCSI Command is assigned a nonzero serial_number before passed
-	 * to the driver's queue command function.  The serial_number is
-	 * cleared when scsi_done is entered indicating that the command
-	 * has been completed.  It is a bug for LLDDs to use this number
-	 * for purposes other than printk (and even that is only useful
-	 * for debugging).
-	 */
-	unsigned long serial_number;
-
 	/*
 	 * This is set to jiffies as it was when the command was first
 	 * allocated.  It is used to time how long the command has
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index dbbdfbdb1a04..2b539a1b3f62 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -486,7 +486,6 @@ struct scsi_host_template {
 		unsigned long irq_flags;				\
 		int rc;							\
 		spin_lock_irqsave(shost->host_lock, irq_flags);		\
-		scsi_cmd_get_serial(shost, cmd);			\
 		rc = func_name##_lck (cmd, cmd->scsi_done);			\
 		spin_unlock_irqrestore(shost->host_lock, irq_flags);	\
 		return rc;						\
@@ -596,12 +595,6 @@ struct Scsi_Host {
 	 * is nr_hw_queues * can_queue.
 	 */
 	unsigned nr_hw_queues;
-	/* 
-	 * Used to assign serial numbers to the cmds.
-	 * Protected by the host lock.
-	 */
-	unsigned long cmd_serial_number;
-	
 	unsigned active_mode:2;
 	unsigned unchecked_isa_dma:1;
 
@@ -738,7 +731,6 @@ extern int scsi_host_busy(struct Scsi_Host *shost);
 extern void scsi_host_put(struct Scsi_Host *t);
 extern struct Scsi_Host *scsi_host_lookup(unsigned short);
 extern const char *scsi_host_state_name(enum scsi_host_state);
-extern void scsi_cmd_get_serial(struct Scsi_Host *, struct scsi_cmnd *);
 
 static inline int __must_check scsi_add_host(struct Scsi_Host *host,
 					     struct device *dev)
-- 
cgit v1.2.3


From 230afe74d139f37ba5e344ad4e53d65911d12188 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Wed, 27 Feb 2019 00:19:18 +0200
Subject: habanalabs: allow memory allocations larger than 4GB

This patch increase the size field in the uapi structure of the Memory
IOCTL from 32-bit to 64-bit. This is to allow the user to allocate and/or
map memory in chunks that are larger then 4GB.

Goya's device memory (DRAM) can be up to 16GB, and for certain
topologies, the user may want an allocation that is larger than 4GB.

This change doesn't break current user-space because there was a "pad"
field in the uapi structure right after the size field. Changing the size
field to be 64-bit and removing the pad field maintains compatibility with
current user-space.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/habanalabs/habanalabs.h |  2 +-
 drivers/misc/habanalabs/memory.c     | 10 ++++------
 include/uapi/misc/habanalabs.h       |  6 ++----
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 901542d685e8..fdf517448599 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -1320,7 +1320,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx);
 int hl_vm_init(struct hl_device *hdev);
 void hl_vm_fini(struct hl_device *hdev);
 
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 			struct hl_userptr *userptr);
 int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
 void hl_userptr_delete_list(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 9e3491dc3b55..4b57d7ce50dd 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -1210,7 +1210,7 @@ out:
  * - Pins the physical pages
  * - Create a SG list from those pages
  */
-int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
+int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 			struct hl_userptr *userptr)
 {
 	u64 start, end;
@@ -1218,14 +1218,12 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
 	int rc;
 
 	if (!size) {
-		dev_err(hdev->dev, "size to pin is invalid - %d\n",
-			size);
+		dev_err(hdev->dev, "size to pin is invalid - %llu\n", size);
 		return -EINVAL;
 	}
 
 	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
-		dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n",
-			addr);
+		dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
 		return -EFAULT;
 	}
 
@@ -1236,7 +1234,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u32 size,
 	if (((addr + size) < addr) ||
 			PAGE_ALIGN(addr + size) < (addr + size)) {
 		dev_err(hdev->dev,
-			"user pointer 0x%llx + %u causes integer overflow\n",
+			"user pointer 0x%llx + %llu causes integer overflow\n",
 			addr, size);
 		return -EINVAL;
 	}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 4afc1891ece8..23d6ad3459cb 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -237,8 +237,7 @@ struct hl_mem_in {
 		/* HL_MEM_OP_ALLOC- allocate device memory */
 		struct {
 			/* Size to alloc */
-			__u32 mem_size;
-			__u32 pad;
+			__u64 mem_size;
 		} alloc;
 
 		/* HL_MEM_OP_FREE - free device memory */
@@ -278,8 +277,7 @@ struct hl_mem_in {
 			 */
 			__u64 hint_addr;
 			/* Size of allocated host memory */
-			__u32 mem_size;
-			__u32 pad;
+			__u64 mem_size;
 		} map_host;
 
 		/* HL_MEM_OP_UNMAP - unmap host memory */
-- 
cgit v1.2.3


From 492ecee892c2a4ba6a14903d5d586ff750b7e805 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 25 Feb 2019 14:28:39 -0800
Subject: bpf: enable program stats

JITed BPF programs are indistinguishable from kernel functions, but unlike
kernel code BPF code can be changed often.
Typical approach of "perf record" + "perf report" profiling and tuning of
kernel code works just as well for BPF programs, but kernel code doesn't
need to be monitored whereas BPF programs do.
Users load and run large amount of BPF programs.
These BPF stats allow tools monitor the usage of BPF on the server.
The monitoring tools will turn sysctl kernel.bpf_stats_enabled
on and off for few seconds to sample average cost of the programs.
Aggregated data over hours and days will provide an insight into cost of BPF
and alarms can trigger in case given program suddenly gets more expensive.

The cost of two sched_clock() per program invocation adds ~20 nsec.
Fast BPF progs (like selftests/bpf/progs/test_pkt_access.c) will slow down
from ~10 nsec to ~30 nsec.
static_key minimizes the cost of the stats collection.
There is no measurable difference before/after this patch
with kernel.bpf_stats_enabled=0

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h    |  9 +++++++++
 include/linux/filter.h | 20 +++++++++++++++++++-
 kernel/bpf/core.c      | 31 +++++++++++++++++++++++++++++--
 kernel/bpf/syscall.c   | 34 ++++++++++++++++++++++++++++++++--
 kernel/bpf/verifier.c  |  7 ++++++-
 kernel/sysctl.c        | 34 ++++++++++++++++++++++++++++++++++
 6 files changed, 129 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index de18227b3d95..a2132e09dc1c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -16,6 +16,7 @@
 #include <linux/rbtree_latch.h>
 #include <linux/numa.h>
 #include <linux/wait.h>
+#include <linux/u64_stats_sync.h>
 
 struct bpf_verifier_env;
 struct perf_event;
@@ -340,6 +341,12 @@ enum bpf_cgroup_storage_type {
 
 #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
 
+struct bpf_prog_stats {
+	u64 cnt;
+	u64 nsecs;
+	struct u64_stats_sync syncp;
+};
+
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
@@ -389,6 +396,7 @@ struct bpf_prog_aux {
 	 * main prog always has linfo_idx == 0
 	 */
 	u32 linfo_idx;
+	struct bpf_prog_stats __percpu *stats;
 	union {
 		struct work_struct work;
 		struct rcu_head	rcu;
@@ -559,6 +567,7 @@ void bpf_map_area_free(void *base);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 
 extern int sysctl_unprivileged_bpf_disabled;
+extern int sysctl_bpf_stats_enabled;
 
 int bpf_map_new_fd(struct bpf_map *map, int flags);
 int bpf_prog_new_fd(struct bpf_prog *prog);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f32b3eca5a04..7e5e3db11106 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -533,7 +533,24 @@ struct sk_filter {
 	struct bpf_prog	*prog;
 };
 
-#define BPF_PROG_RUN(filter, ctx)  ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
+DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+
+#define BPF_PROG_RUN(prog, ctx)	({				\
+	u32 ret;						\
+	cant_sleep();						\
+	if (static_branch_unlikely(&bpf_stats_enabled_key)) {	\
+		struct bpf_prog_stats *stats;			\
+		u64 start = sched_clock();			\
+		ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi);	\
+		stats = this_cpu_ptr(prog->aux->stats);		\
+		u64_stats_update_begin(&stats->syncp);		\
+		stats->cnt++;					\
+		stats->nsecs += sched_clock() - start;		\
+		u64_stats_update_end(&stats->syncp);		\
+	} else {						\
+		ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi);	\
+	}							\
+	ret; })
 
 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 
@@ -764,6 +781,7 @@ void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
 void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
 struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef88b167959d..1c14c347f3cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
 	return NULL;
 }
 
-struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
 {
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
 	struct bpf_prog_aux *aux;
@@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 
 	return fp;
 }
+
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
+	struct bpf_prog *prog;
+
+	prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
+	if (!prog)
+		return NULL;
+
+	prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+	if (!prog->aux->stats) {
+		kfree(prog->aux);
+		vfree(prog);
+		return NULL;
+	}
+
+	u64_stats_init(&prog->aux->stats->syncp);
+	return prog;
+}
 EXPORT_SYMBOL_GPL(bpf_prog_alloc);
 
 int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
@@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 
 void __bpf_prog_free(struct bpf_prog *fp)
 {
-	kfree(fp->aux);
+	if (fp->aux) {
+		free_percpu(fp->aux->stats);
+		kfree(fp->aux);
+	}
 	vfree(fp);
 }
 
@@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
 	return -EFAULT;
 }
 
+DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
+EXPORT_SYMBOL(bpf_stats_enabled_key);
+int sysctl_bpf_stats_enabled __read_mostly;
+
 /* All definitions of tracepoints related to BPF. */
 #define CREATE_TRACE_POINTS
 #include <linux/bpf_trace.h>
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec7c552af76b..31cf66fc3f5c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static void bpf_prog_get_stats(const struct bpf_prog *prog,
+			       struct bpf_prog_stats *stats)
+{
+	u64 nsecs = 0, cnt = 0;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		const struct bpf_prog_stats *st;
+		unsigned int start;
+		u64 tnsecs, tcnt;
+
+		st = per_cpu_ptr(prog->aux->stats, cpu);
+		do {
+			start = u64_stats_fetch_begin_irq(&st->syncp);
+			tnsecs = st->nsecs;
+			tcnt = st->cnt;
+		} while (u64_stats_fetch_retry_irq(&st->syncp, start));
+		nsecs += tnsecs;
+		cnt += tcnt;
+	}
+	stats->nsecs = nsecs;
+	stats->cnt = cnt;
+}
+
 #ifdef CONFIG_PROC_FS
 static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_prog *prog = filp->private_data;
 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
+	struct bpf_prog_stats stats;
 
+	bpf_prog_get_stats(prog, &stats);
 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
 		   "prog_type:\t%u\n"
 		   "prog_jited:\t%u\n"
 		   "prog_tag:\t%s\n"
 		   "memlock:\t%llu\n"
-		   "prog_id:\t%u\n",
+		   "prog_id:\t%u\n"
+		   "run_time_ns:\t%llu\n"
+		   "run_cnt:\t%llu\n",
 		   prog->type,
 		   prog->jited,
 		   prog_tag,
 		   prog->pages * 1ULL << PAGE_SHIFT,
-		   prog->aux->id);
+		   prog->aux->id,
+		   stats.nsecs,
+		   stats.cnt);
 }
 #endif
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b9496c41383..0e4edd7e3c5f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		subprog_end = env->subprog_info[i + 1].start;
 
 		len = subprog_end - subprog_start;
-		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+		/* BPF_PROG_RUN doesn't call subprogs directly,
+		 * hence main prog stats include the runtime of subprogs.
+		 * subprogs don't have IDs and not reachable via prog_get_next_id
+		 * func[i]->aux->stats will never be accessed and stays NULL
+		 */
+		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
 		if (!func[i])
 			goto out_free;
 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba4d9e85feb8..86e0771352f2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
 #endif
 static int proc_dopipe_max_size(struct ctl_table *table, int write,
 		void __user *buffer, size_t *lenp, loff_t *ppos);
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos);
 
 #ifdef CONFIG_MAGIC_SYSRQ
 /* Note: sysrq code uses its own private copy */
@@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 #endif
+	{
+		.procname	= "bpf_stats_enabled",
+		.data		= &sysctl_bpf_stats_enabled,
+		.maxlen		= sizeof(sysctl_bpf_stats_enabled),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_bpf_stats,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
 	{
 		.procname	= "panic_on_rcu_stall",
@@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
 
 #endif /* CONFIG_PROC_SYSCTL */
 
+static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	int ret, bpf_stats = *(int *)table->data;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &bpf_stats;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		*(int *)table->data = bpf_stats;
+		if (bpf_stats)
+			static_branch_enable(&bpf_stats_enabled_key);
+		else
+			static_branch_disable(&bpf_stats_enabled_key);
+	}
+	return ret;
+}
+
 /*
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
-- 
cgit v1.2.3


From 5f8f8b93aeb8371c54af08bece2bd04bc2d48707 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 25 Feb 2019 14:28:40 -0800
Subject: bpf: expose program stats via bpf_prog_info

Return bpf program run_time_ns and run_cnt via bpf_prog_info

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/bpf.h | 2 ++
 kernel/bpf/syscall.c     | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bcdd2474eee7..2e308e90ffea 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2813,6 +2813,8 @@ struct bpf_prog_info {
 	__u32 jited_line_info_rec_size;
 	__u32 nr_prog_tags;
 	__aligned_u64 prog_tags;
+	__u64 run_time_ns;
+	__u64 run_cnt;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 31cf66fc3f5c..174581dfe225 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2152,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
 	struct bpf_prog_info info = {};
 	u32 info_len = attr->info.info_len;
+	struct bpf_prog_stats stats;
 	char __user *uinsns;
 	u32 ulen;
 	int err;
@@ -2191,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	if (err)
 		return err;
 
+	bpf_prog_get_stats(prog, &stats);
+	info.run_time_ns = stats.nsecs;
+	info.run_cnt = stats.cnt;
+
 	if (!capable(CAP_SYS_ADMIN)) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
-- 
cgit v1.2.3


From 1cb33af1f75099282a51c4cc0bcc0ad413730079 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Feb 2019 11:44:25 -0800
Subject: switchdev: Add SWITCHDEV_PORT_ATTR_SET

In preparation for allowing switchdev enabled drivers to veto specific
attribute settings from within the context of the caller, introduce a
new switchdev notifier type for port attributes.

Suggested-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   | 27 +++++++++++++++++++++++++
 net/switchdev/switchdev.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index be4b13e66668..5087c06ceb4b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -132,6 +132,7 @@ enum switchdev_notifier_type {
 
 	SWITCHDEV_PORT_OBJ_ADD, /* Blocking. */
 	SWITCHDEV_PORT_OBJ_DEL, /* Blocking. */
+	SWITCHDEV_PORT_ATTR_SET, /* May be blocking . */
 
 	SWITCHDEV_VXLAN_FDB_ADD_TO_BRIDGE,
 	SWITCHDEV_VXLAN_FDB_DEL_TO_BRIDGE,
@@ -160,6 +161,13 @@ struct switchdev_notifier_port_obj_info {
 	bool handled;
 };
 
+struct switchdev_notifier_port_attr_info {
+	struct switchdev_notifier_info info; /* must be first */
+	const struct switchdev_attr *attr;
+	struct switchdev_trans *trans;
+	bool handled;
+};
+
 static inline struct net_device *
 switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info)
 {
@@ -212,7 +220,15 @@ int switchdev_handle_port_obj_del(struct net_device *dev,
 			int (*del_cb)(struct net_device *dev,
 				      const struct switchdev_obj *obj));
 
+int switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans));
+
 #define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
+
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -299,6 +315,17 @@ switchdev_handle_port_obj_del(struct net_device *dev,
 	return 0;
 }
 
+static inline int
+switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans))
+{
+	return 0;
+}
+
 #define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
 
 #endif
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 362413c9b389..3560c19aa7e2 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -655,3 +655,54 @@ int switchdev_handle_port_obj_del(struct net_device *dev,
 	return err;
 }
 EXPORT_SYMBOL_GPL(switchdev_handle_port_obj_del);
+
+static int __switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans))
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int err = -EOPNOTSUPP;
+
+	if (check_cb(dev)) {
+		port_attr_info->handled = true;
+		return set_cb(dev, port_attr_info->attr,
+			      port_attr_info->trans);
+	}
+
+	/* Switch ports might be stacked under e.g. a LAG. Ignore the
+	 * unsupported devices, another driver might be able to handle them. But
+	 * propagate to the callers any hard errors.
+	 *
+	 * If the driver does its own bookkeeping of stacked ports, it's not
+	 * necessary to go through this helper.
+	 */
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = __switchdev_handle_port_attr_set(lower_dev, port_attr_info,
+						       check_cb, set_cb);
+		if (err && err != -EOPNOTSUPP)
+			return err;
+	}
+
+	return err;
+}
+
+int switchdev_handle_port_attr_set(struct net_device *dev,
+			struct switchdev_notifier_port_attr_info *port_attr_info,
+			bool (*check_cb)(const struct net_device *dev),
+			int (*set_cb)(struct net_device *dev,
+				      const struct switchdev_attr *attr,
+				      struct switchdev_trans *trans))
+{
+	int err;
+
+	err = __switchdev_handle_port_attr_set(dev, port_attr_info, check_cb,
+					       set_cb);
+	if (err == -EOPNOTSUPP)
+		err = 0;
+	return err;
+}
+EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set);
-- 
cgit v1.2.3


From 3d705f07d16b1d872c556b4ebf44deabeca0e9c1 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Feb 2019 11:44:32 -0800
Subject: net: Remove switchdev_ops

Now that we have converted all possible callers to using a switchdev
notifier for attributes we do not have a need for implementing
switchdev_ops anymore, and this can be removed from all drivers the
net_device structure.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c          |  3 ---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h          |  2 --
 .../net/ethernet/mellanox/mlxsw/spectrum_switchdev.c    | 12 ------------
 drivers/net/ethernet/mscc/ocelot.c                      |  5 -----
 drivers/net/ethernet/rocker/rocker_main.c               |  5 -----
 drivers/staging/fsl-dpaa2/ethsw/ethsw.c                 |  5 -----
 include/linux/netdevice.h                               |  3 ---
 include/net/switchdev.h                                 | 17 -----------------
 net/dsa/slave.c                                         |  5 -----
 9 files changed, 57 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b00f6f74f91a..6c797e322be8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3660,7 +3660,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	}
 	mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan;
 
-	mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
 	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
 	if (err) {
@@ -3677,7 +3676,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 
 err_register_netdev:
 	mlxsw_sp->ports[local_port] = NULL;
-	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_vlan_destroy(mlxsw_sp_port_vlan);
 err_port_vlan_create:
 err_port_pvid_set:
@@ -3720,7 +3718,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
-	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
 	mlxsw_sp_port_vlan_flush(mlxsw_sp_port, true);
 	mlxsw_sp_port_nve_fini(mlxsw_sp_port);
 	mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a61c1130d9e3..da6278b0caa4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -407,8 +407,6 @@ extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
 /* spectrum_switchdev.c */
 int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp);
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port);
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port);
 int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid,
 			bool adding);
 void
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index c1aedfea3a31..f6ce386c3036 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1938,10 +1938,6 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 	return NULL;
 }
 
-static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
-	.switchdev_port_attr_set	= mlxsw_sp_port_attr_set,
-};
-
 static int
 mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
 				struct mlxsw_sp_bridge_port *bridge_port,
@@ -3545,11 +3541,3 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(mlxsw_sp->bridge);
 }
 
-void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
-}
-
-void mlxsw_sp_port_switchdev_fini(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-}
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 83a678b11757..a1d0d6e42533 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1324,10 +1324,6 @@ static int ocelot_port_obj_del(struct net_device *dev,
 	return ret;
 }
 
-static const struct switchdev_ops ocelot_port_switchdev_ops = {
-	.switchdev_port_attr_set	= ocelot_port_attr_set,
-};
-
 static int ocelot_port_bridge_join(struct ocelot_port *ocelot_port,
 				   struct net_device *bridge)
 {
@@ -1660,7 +1656,6 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
 
 	dev->netdev_ops = &ocelot_port_netdev_ops;
 	dev->ethtool_ops = &ocelot_ethtool_ops;
-	dev->switchdev_ops = &ocelot_port_switchdev_ops;
 
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS;
 	dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index fc772cf079cc..c883aa89b7ca 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2142,10 +2142,6 @@ static int rocker_port_obj_del(struct net_device *dev,
 	return err;
 }
 
-static const struct switchdev_ops rocker_port_switchdev_ops = {
-	.switchdev_port_attr_set	= rocker_port_attr_set,
-};
-
 struct rocker_fib_event_work {
 	struct work_struct work;
 	union {
@@ -2599,7 +2595,6 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 	rocker_port_dev_addr_init(rocker_port);
 	dev->netdev_ops = &rocker_port_netdev_ops;
 	dev->ethtool_ops = &rocker_port_ethtool_ops;
-	dev->switchdev_ops = &rocker_port_switchdev_ops;
 	netif_tx_napi_add(dev, &rocker_port->napi_tx, rocker_port_poll_tx,
 			  NAPI_POLL_WEIGHT);
 	netif_napi_add(dev, &rocker_port->napi_rx, rocker_port_poll_rx,
diff --git a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
index b0d2d9bf2532..ad577beeb052 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
+++ b/drivers/staging/fsl-dpaa2/ethsw/ethsw.c
@@ -925,10 +925,6 @@ static int swdev_port_obj_del(struct net_device *netdev,
 	return err;
 }
 
-static const struct switchdev_ops ethsw_port_switchdev_ops = {
-	.switchdev_port_attr_set	= swdev_port_attr_set,
-};
-
 static int
 ethsw_switchdev_port_attr_set_event(struct net_device *netdev,
 		struct switchdev_notifier_port_attr_info *port_attr_info)
@@ -1455,7 +1451,6 @@ static int ethsw_probe_port(struct ethsw_core *ethsw, u16 port_idx)
 	SET_NETDEV_DEV(port_netdev, dev);
 	port_netdev->netdev_ops = &ethsw_port_ops;
 	port_netdev->ethtool_ops = &ethsw_port_ethtool_ops;
-	port_netdev->switchdev_ops = &ethsw_port_switchdev_ops;
 
 	/* Set MTU limits */
 	port_netdev->min_mtu = ETH_MIN_MTU;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 58e83bd7a861..c10b60297d28 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1843,9 +1843,6 @@ struct net_device {
 #endif
 	const struct net_device_ops *netdev_ops;
 	const struct ethtool_ops *ethtool_ops;
-#ifdef CONFIG_NET_SWITCHDEV
-	const struct switchdev_ops *switchdev_ops;
-#endif
 #ifdef CONFIG_NET_L3_MASTER_DEV
 	const struct l3mdev_ops	*l3mdev_ops;
 #endif
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 5087c06ceb4b..e4f751e19ecf 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -112,17 +112,6 @@ void *switchdev_trans_item_dequeue(struct switchdev_trans *trans);
 
 typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 
-/**
- * struct switchdev_ops - switchdev operations
- *
- * @switchdev_port_attr_set: Set a port attribute (see switchdev_attr).
- */
-struct switchdev_ops {
-	int	(*switchdev_port_attr_set)(struct net_device *dev,
-					   const struct switchdev_attr *attr,
-					   struct switchdev_trans *trans);
-};
-
 enum switchdev_notifier_type {
 	SWITCHDEV_FDB_ADD_TO_BRIDGE = 1,
 	SWITCHDEV_FDB_DEL_TO_BRIDGE,
@@ -226,9 +215,6 @@ int switchdev_handle_port_attr_set(struct net_device *dev,
 			int (*set_cb)(struct net_device *dev,
 				      const struct switchdev_attr *attr,
 				      struct switchdev_trans *trans));
-
-#define SWITCHDEV_SET_OPS(netdev, ops) ((netdev)->switchdev_ops = (ops))
-
 #else
 
 static inline void switchdev_deferred_process(void)
@@ -325,9 +311,6 @@ switchdev_handle_port_attr_set(struct net_device *dev,
 {
 	return 0;
 }
-
-#define SWITCHDEV_SET_OPS(netdev, ops) do {} while (0)
-
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index b089b43120e1..1808a2cd6872 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1118,10 +1118,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
 };
 
-static const struct switchdev_ops dsa_slave_switchdev_ops = {
-	.switchdev_port_attr_set	= dsa_slave_port_attr_set,
-};
-
 static struct device_type dsa_type = {
 	.name	= "dsa",
 };
@@ -1382,7 +1378,6 @@ int dsa_slave_create(struct dsa_port *port)
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->priv_flags |= IFF_NO_QUEUE;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
-	slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
 	slave_dev->min_mtu = 0;
 	slave_dev->max_mtu = ETH_MAX_MTU;
 	SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
-- 
cgit v1.2.3


From 24ba14406c5c8e33fcbe064f77b3ab01b84ec830 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Sun, 24 Feb 2019 11:36:20 +0800
Subject: route: Add multipath_hash in flowi_common to make user-define hash

Current fib_multipath_hash_policy can make hash based on the L3 or
L4. But it only work on the outer IP. So a specific tunnel always
has the same hash value. But a specific tunnel may contain so many
inner connections.

This patch provide a generic multipath_hash in floi_common. It can
make a user-define hash which can mix with L3 or L4 hash.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 2 +-
 include/net/flow.h                                  | 2 ++
 include/net/ip_tunnels.h                            | 3 ++-
 net/ipv4/ip_gre.c                                   | 2 +-
 net/ipv4/ip_tunnel.c                                | 6 +++---
 net/ipv4/route.c                                    | 4 ++++
 6 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index ad5a9b9e1466..536c23c578c3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -305,7 +305,7 @@ mlxsw_sp_span_gretap4_route(const struct net_device *to_dev,
 
 	parms = mlxsw_sp_ipip_netdev_parms4(to_dev);
 	ip_tunnel_init_flow(&fl4, parms.iph.protocol, *daddrp, *saddrp,
-			    0, 0, parms.link, tun->fwmark);
+			    0, 0, parms.link, tun->fwmark, 0);
 
 	rt = ip_route_output_key(tun->net, &fl4);
 	if (IS_ERR(rt))
diff --git a/include/net/flow.h b/include/net/flow.h
index 93f2c9a0f098..a50fb77a0b27 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -40,6 +40,7 @@ struct flowi_common {
 	__u32	flowic_secid;
 	kuid_t  flowic_uid;
 	struct flowi_tunnel flowic_tun_key;
+	__u32		flowic_multipath_hash;
 };
 
 union flowi_uli {
@@ -78,6 +79,7 @@ struct flowi4 {
 #define flowi4_secid		__fl_common.flowic_secid
 #define flowi4_tun_key		__fl_common.flowic_tun_key
 #define flowi4_uid		__fl_common.flowic_uid
+#define flowi4_multipath_hash	__fl_common.flowic_multipath_hash
 
 	/* (saddr,daddr) must be grouped, same order as in IP header */
 	__be32			saddr;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f069f64ebf29..af645604f328 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -241,7 +241,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
 				       int proto,
 				       __be32 daddr, __be32 saddr,
 				       __be32 key, __u8 tos, int oif,
-				       __u32 mark)
+				       __u32 mark, __u32 tun_inner_hash)
 {
 	memset(fl4, 0, sizeof(*fl4));
 	fl4->flowi4_oif = oif;
@@ -251,6 +251,7 @@ static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
 	fl4->flowi4_proto = proto;
 	fl4->fl4_gre_key = key;
 	fl4->flowi4_mark = mark;
+	fl4->flowi4_multipath_hash = tun_inner_hash;
 }
 
 int ip_tunnel_init(struct net_device *dev);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0f4925a0d6b2..fd219f7bd3ea 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -578,7 +578,7 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 	key = &info->key;
 	ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
 			    tunnel_id_to_key32(key->tun_id), key->tos, 0,
-			    skb->mark);
+			    skb->mark, skb_get_hash(skb));
 	rt = ip_route_output_key(dev_net(dev), &fl4);
 	if (IS_ERR(rt))
 		return PTR_ERR(rt);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 2973067b831d..2756fb725bf0 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -310,7 +310,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
 		ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
 				    iph->saddr, tunnel->parms.o_key,
 				    RT_TOS(iph->tos), tunnel->parms.link,
-				    tunnel->fwmark);
+				    tunnel->fwmark, 0);
 		rt = ip_route_output_key(tunnel->net, &fl4);
 
 		if (!IS_ERR(rt)) {
@@ -584,7 +584,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	}
 	ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
 			    tunnel_id_to_key32(key->tun_id), RT_TOS(tos),
-			    0, skb->mark);
+			    0, skb->mark, skb_get_hash(skb));
 	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
 		goto tx_error;
 
@@ -744,7 +744,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
 			    tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
-			    tunnel->fwmark);
+			    tunnel->fwmark, skb_get_hash(skb));
 
 	if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
 		goto tx_error;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ecc12a768191..7cf4c8305071 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1820,6 +1820,7 @@ out:
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 		       const struct sk_buff *skb, struct flow_keys *flkeys)
 {
+	u32 multipath_hash = fl4->flowi4_multipath_hash;
 	struct flow_keys hash_keys;
 	u32 mhash;
 
@@ -1870,6 +1871,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
 	}
 	mhash = flow_hash_from_keys(&hash_keys);
 
+	if (multipath_hash)
+		mhash = jhash_2words(mhash, multipath_hash, 0);
+
 	return mhash >> 1;
 }
 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
-- 
cgit v1.2.3


From 02e525b2aff1d665f6466e1d123ee4cb69f1d4b0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 Feb 2019 15:38:40 +0100
Subject: locking/percpu-rwsem: Remove preempt_disable variants

Effective revert commit:

  87709e28dc7c ("fs/locks: Use percpu_down_read_preempt_disable()")

This is causing major pain for PREEMPT_RT.

Sebastian did a lot of lockperf runs on 2 and 4 node machines with all
preemption modes (PREEMPT=n should be an obvious NOP for this patch
and thus serves as a good control) and no results showed significance
over 2-sigma (the PREEMPT=n results were almost empty at 1-sigma).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/locks.c                   | 32 ++++++++++++++++----------------
 include/linux/percpu-rwsem.h | 24 ++++--------------------
 2 files changed, 20 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index ff6af2c32601..eaa1cfaf73b0 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1058,7 +1058,7 @@ static int flock_lock_inode(struct inode *inode, struct file_lock *request)
 			return -ENOMEM;
 	}
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	if (request->fl_flags & FL_ACCESS)
 		goto find_conflict;
@@ -1100,7 +1100,7 @@ find_conflict:
 
 out:
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 	if (new_fl)
 		locks_free_lock(new_fl);
 	locks_dispose_list(&dispose);
@@ -1138,7 +1138,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 		new_fl2 = locks_alloc_lock();
 	}
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	/*
 	 * New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1312,7 +1312,7 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request,
 	}
  out:
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 	/*
 	 * Free any unused locks.
 	 */
@@ -1584,7 +1584,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 		return error;
 	}
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 
 	time_out_leases(inode, &dispose);
@@ -1636,13 +1636,13 @@ restart:
 	locks_insert_block(fl, new_fl, leases_conflict);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 
 	locks_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_blocker, break_time);
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	trace_break_lease_unblock(inode, new_fl);
 	locks_delete_block(new_fl);
@@ -1659,7 +1659,7 @@ restart:
 	}
 out:
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	locks_free_lock(new_fl);
 	return error;
@@ -1729,7 +1729,7 @@ int fcntl_getlease(struct file *filp)
 
 	ctx = smp_load_acquire(&inode->i_flctx);
 	if (ctx && !list_empty_careful(&ctx->flc_lease)) {
-		percpu_down_read_preempt_disable(&file_rwsem);
+		percpu_down_read(&file_rwsem);
 		spin_lock(&ctx->flc_lock);
 		time_out_leases(inode, &dispose);
 		list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
@@ -1739,7 +1739,7 @@ int fcntl_getlease(struct file *filp)
 			break;
 		}
 		spin_unlock(&ctx->flc_lock);
-		percpu_up_read_preempt_enable(&file_rwsem);
+		percpu_up_read(&file_rwsem);
 
 		locks_dispose_list(&dispose);
 	}
@@ -1813,7 +1813,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 		return -EINVAL;
 	}
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	time_out_leases(inode, &dispose);
 	error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1884,7 +1884,7 @@ out_setup:
 		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	if (is_deleg)
 		inode_unlock(inode);
@@ -1907,7 +1907,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 		return error;
 	}
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
 		if (fl->fl_file == filp &&
@@ -1920,7 +1920,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
 	if (victim)
 		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 	locks_dispose_list(&dispose);
 	return error;
 }
@@ -2643,13 +2643,13 @@ locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
 	if (list_empty(&ctx->flc_lease))
 		return;
 
-	percpu_down_read_preempt_disable(&file_rwsem);
+	percpu_down_read(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
 		if (filp == fl->fl_file)
 			lease_modify(fl, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
-	percpu_up_read_preempt_enable(&file_rwsem);
+	percpu_up_read(&file_rwsem);
 
 	locks_dispose_list(&dispose);
 }
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 71b75643c432..03cb4b6f842e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -29,7 +29,7 @@ static struct percpu_rw_semaphore name = {				\
 extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
 extern void __percpu_up_read(struct percpu_rw_semaphore *);
 
-static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
 {
 	might_sleep();
 
@@ -47,16 +47,10 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *
 	__this_cpu_inc(*sem->read_count);
 	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
 		__percpu_down_read(sem, false); /* Unconditional memory barrier */
-	barrier();
 	/*
-	 * The barrier() prevents the compiler from
+	 * The preempt_enable() prevents the compiler from
 	 * bleeding the critical section out.
 	 */
-}
-
-static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
-{
-	percpu_down_read_preempt_disable(sem);
 	preempt_enable();
 }
 
@@ -83,13 +77,9 @@ static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
 	return ret;
 }
 
-static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
 {
-	/*
-	 * The barrier() prevents the compiler from
-	 * bleeding the critical section out.
-	 */
-	barrier();
+	preempt_disable();
 	/*
 	 * Same as in percpu_down_read().
 	 */
@@ -102,12 +92,6 @@ static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem
 	rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
 }
 
-static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
-{
-	preempt_disable();
-	percpu_up_read_preempt_enable(sem);
-}
-
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
-- 
cgit v1.2.3


From 09329d1c2024522308ca4de977fc6bba753bab1a Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:40 -0800
Subject: locking/lockdep: Reorder struct lock_class members

This patch does not change any functionality but makes the patch that
frees lock classes that are no longer in use easier to read.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: johannes.berg@intel.com
Cc: tj@kernel.org
Link: https://lkml.kernel.org/r/20190214230058.196511-6-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index c5335df2372f..0c38bade84b7 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -76,6 +76,13 @@ struct lock_class {
 	 */
 	struct list_head		lock_entry;
 
+	/*
+	 * These fields represent a directed graph of lock dependencies,
+	 * to every node we attach a list of "forward" and a list of
+	 * "backward" graph nodes.
+	 */
+	struct list_head		locks_after, locks_before;
+
 	struct lockdep_subclass_key	*key;
 	unsigned int			subclass;
 	unsigned int			dep_gen_id;
@@ -86,13 +93,6 @@ struct lock_class {
 	unsigned long			usage_mask;
 	struct stack_trace		usage_traces[XXX_LOCK_USAGE_STATES];
 
-	/*
-	 * These fields represent a directed graph of lock dependencies,
-	 * to every node we attach a list of "forward" and a list of
-	 * "backward" graph nodes.
-	 */
-	struct list_head		locks_after, locks_before;
-
 	/*
 	 * Generation counter, when doing certain classes of graph walking,
 	 * to ensure that we check one node only once:
-- 
cgit v1.2.3


From 86cffb80a525f7b8f969c8c79669d383e02f17d1 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:41 -0800
Subject: locking/lockdep: Make zap_class() remove all matching lock order
 entries

Make sure that all lock order entries that refer to a class are removed
from the list_entries[] array when a kernel module is unloaded.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: johannes.berg@intel.com
Cc: tj@kernel.org
Link: https://lkml.kernel.org/r/20190214230058.196511-7-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  |  1 +
 kernel/locking/lockdep.c | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0c38bade84b7..b5e6bfe0ae4a 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -178,6 +178,7 @@ static inline void lockdep_copy_map(struct lockdep_map *to,
 struct lock_list {
 	struct list_head		entry;
 	struct lock_class		*class;
+	struct lock_class		*links_to;
 	struct stack_trace		trace;
 	int				distance;
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 21d84510e28f..28fbeb2a10cc 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -859,7 +859,8 @@ static struct lock_list *alloc_list_entry(void)
 /*
  * Add a new dependency to the head of the list:
  */
-static int add_lock_to_list(struct lock_class *this, struct list_head *head,
+static int add_lock_to_list(struct lock_class *this,
+			    struct lock_class *links_to, struct list_head *head,
 			    unsigned long ip, int distance,
 			    struct stack_trace *trace)
 {
@@ -873,6 +874,7 @@ static int add_lock_to_list(struct lock_class *this, struct list_head *head,
 		return 0;
 
 	entry->class = this;
+	entry->links_to = links_to;
 	entry->distance = distance;
 	entry->trace = *trace;
 	/*
@@ -1907,14 +1909,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 * Ok, all validations passed, add the new lock
 	 * to the previous lock's dependency list:
 	 */
-	ret = add_lock_to_list(hlock_class(next),
+	ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
 			       &hlock_class(prev)->locks_after,
 			       next->acquire_ip, distance, trace);
 
 	if (!ret)
 		return 0;
 
-	ret = add_lock_to_list(hlock_class(prev),
+	ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
 			       &hlock_class(next)->locks_before,
 			       next->acquire_ip, distance, trace);
 	if (!ret)
@@ -4107,15 +4109,20 @@ void lockdep_reset(void)
  */
 static void zap_class(struct lock_class *class)
 {
+	struct lock_list *entry;
 	int i;
 
 	/*
 	 * Remove all dependencies this lock is
 	 * involved in:
 	 */
-	for (i = 0; i < nr_list_entries; i++) {
-		if (list_entries[i].class == class)
-			list_del_rcu(&list_entries[i].entry);
+	for (i = 0, entry = list_entries; i < nr_list_entries; i++, entry++) {
+		if (entry->class != class && entry->links_to != class)
+			continue;
+		list_del_rcu(&entry->entry);
+		/* Clear .class and .links_to to avoid double removal. */
+		WRITE_ONCE(entry->class, NULL);
+		WRITE_ONCE(entry->links_to, NULL);
 	}
 	/*
 	 * Unhash the class and remove it from the all_lock_classes list:
-- 
cgit v1.2.3


From cdc84d794947b5431c0a6916c303aee7114819d2 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:44 -0800
Subject: locking/lockdep: Make it easy to detect whether or not inside a
 selftest

The patch that frees unused lock classes will modify the behavior of
lockdep_free_key_range() and lockdep_reset_lock() depending on whether
or not these functions are called from the context of the lockdep
selftests. Hence make it easy to detect whether or not lockdep code
is called from the context of a lockdep selftest.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: johannes.berg@intel.com
Cc: tj@kernel.org
Link: https://lkml.kernel.org/r/20190214230058.196511-10-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  | 5 +++++
 kernel/locking/lockdep.c | 6 ++++++
 lib/locking-selftest.c   | 2 ++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b5e6bfe0ae4a..66eee1ba0f2a 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -265,6 +265,7 @@ extern void lockdep_reset(void);
 extern void lockdep_reset_lock(struct lockdep_map *lock);
 extern void lockdep_free_key_range(void *start, unsigned long size);
 extern asmlinkage void lockdep_sys_exit(void);
+extern void lockdep_set_selftest_task(struct task_struct *task);
 
 extern void lockdep_off(void);
 extern void lockdep_on(void);
@@ -395,6 +396,10 @@ static inline void lockdep_on(void)
 {
 }
 
+static inline void lockdep_set_selftest_task(struct task_struct *task)
+{
+}
+
 # define lock_acquire(l, s, t, r, c, n, i)	do { } while (0)
 # define lock_release(l, n, i)			do { } while (0)
 # define lock_downgrade(l, i)			do { } while (0)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 2d4c21a02546..34cd87c65f5d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -81,6 +81,7 @@ module_param(lock_stat, int, 0644);
  * code to recurse back into the lockdep code...
  */
 static arch_spinlock_t lockdep_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+static struct task_struct *lockdep_selftest_task_struct;
 
 static int graph_lock(void)
 {
@@ -331,6 +332,11 @@ void lockdep_on(void)
 }
 EXPORT_SYMBOL(lockdep_on);
 
+void lockdep_set_selftest_task(struct task_struct *task)
+{
+	lockdep_selftest_task_struct = task;
+}
+
 /*
  * Debugging switches:
  */
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 1e1bbf171eca..a1705545e6ac 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1989,6 +1989,7 @@ void locking_selftest(void)
 
 	init_shared_classes();
 	debug_locks_silent = !debug_locks_verbose;
+	lockdep_set_selftest_task(current);
 
 	DO_TESTCASE_6R("A-A deadlock", AA);
 	DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
@@ -2097,5 +2098,6 @@ void locking_selftest(void)
 		printk("---------------------------------\n");
 		debug_locks = 1;
 	}
+	lockdep_set_selftest_task(NULL);
 	debug_locks_silent = 0;
 }
-- 
cgit v1.2.3


From a0b0fd53e1e67639b303b15939b9c653dbe7a8c4 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:46 -0800
Subject: locking/lockdep: Free lock classes that are no longer in use

Instead of leaving lock classes that are no longer in use in the
lock_classes array, reuse entries from that array that are no longer in
use. Maintain a linked list of free lock classes with list head
'free_lock_class'. Only add freed lock classes to the free_lock_classes
list after a grace period to avoid that a lock_classes[] element would
be reused while an RCU reader is accessing it. Since the lockdep
selftests run in a context where sleeping is not allowed and since the
selftests require that lock resetting/zapping works with debug_locks
off, make the behavior of lockdep_free_key_range() and
lockdep_reset_lock() depend on whether or not these are called from
the context of the lockdep selftests.

Thanks to Peter for having shown how to modify get_pending_free()
such that that function does not have to sleep.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: johannes.berg@intel.com
Cc: tj@kernel.org
Link: https://lkml.kernel.org/r/20190214230058.196511-12-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  |   9 +-
 kernel/locking/lockdep.c | 396 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 354 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 66eee1ba0f2a..619ec3f26cdc 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -63,7 +63,8 @@ extern struct lock_class_key __lockdep_no_validate__;
 #define LOCKSTAT_POINTS		4
 
 /*
- * The lock-class itself:
+ * The lock-class itself. The order of the structure members matters.
+ * reinit_class() zeroes the key member and all subsequent members.
  */
 struct lock_class {
 	/*
@@ -72,7 +73,9 @@ struct lock_class {
 	struct hlist_node		hash_entry;
 
 	/*
-	 * global list of all lock-classes:
+	 * Entry in all_lock_classes when in use. Entry in free_lock_classes
+	 * when not in use. Instances that are being freed are on one of the
+	 * zapped_classes lists.
 	 */
 	struct list_head		lock_entry;
 
@@ -104,7 +107,7 @@ struct lock_class {
 	unsigned long			contention_point[LOCKSTAT_POINTS];
 	unsigned long			contending_point[LOCKSTAT_POINTS];
 #endif
-};
+} __no_randomize_layout;
 
 #ifdef CONFIG_LOCK_STAT
 struct lock_time {
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c7ca3a4def7e..8ecf355dd163 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -50,6 +50,7 @@
 #include <linux/random.h>
 #include <linux/jhash.h>
 #include <linux/nmi.h>
+#include <linux/rcupdate.h>
 
 #include <asm/sections.h>
 
@@ -135,8 +136,8 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 /*
  * All data structures here are protected by the global debug_lock.
  *
- * Mutex key structs only get allocated, once during bootup, and never
- * get freed - this significantly simplifies the debugging code.
+ * nr_lock_classes is the number of elements of lock_classes[] that is
+ * in use.
  */
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
@@ -278,11 +279,39 @@ static inline void lock_release_holdtime(struct held_lock *hlock)
 #endif
 
 /*
- * We keep a global list of all lock classes. The list only grows,
- * never shrinks. The list is only accessed with the lockdep
- * spinlock lock held.
+ * We keep a global list of all lock classes. The list is only accessed with
+ * the lockdep spinlock lock held. free_lock_classes is a list with free
+ * elements. These elements are linked together by the lock_entry member in
+ * struct lock_class.
  */
 LIST_HEAD(all_lock_classes);
+static LIST_HEAD(free_lock_classes);
+
+/**
+ * struct pending_free - information about data structures about to be freed
+ * @zapped: Head of a list with struct lock_class elements.
+ */
+struct pending_free {
+	struct list_head zapped;
+};
+
+/**
+ * struct delayed_free - data structures used for delayed freeing
+ *
+ * A data structure for delayed freeing of data structures that may be
+ * accessed by RCU readers at the time these were freed.
+ *
+ * @rcu_head:  Used to schedule an RCU callback for freeing data structures.
+ * @index:     Index of @pf to which freed data structures are added.
+ * @scheduled: Whether or not an RCU callback has been scheduled.
+ * @pf:        Array with information about data structures about to be freed.
+ */
+static struct delayed_free {
+	struct rcu_head		rcu_head;
+	int			index;
+	int			scheduled;
+	struct pending_free	pf[2];
+} delayed_free;
 
 /*
  * The lockdep classes are in a hash-table as well, for fast lookup:
@@ -742,7 +771,8 @@ static bool assign_lock_key(struct lockdep_map *lock)
 }
 
 /*
- * Initialize the lock_classes[] array elements.
+ * Initialize the lock_classes[] array elements, the free_lock_classes list
+ * and also the delayed_free structure.
  */
 static void init_data_structures_once(void)
 {
@@ -754,7 +784,12 @@ static void init_data_structures_once(void)
 
 	initialization_happened = true;
 
+	init_rcu_head(&delayed_free.rcu_head);
+	INIT_LIST_HEAD(&delayed_free.pf[0].zapped);
+	INIT_LIST_HEAD(&delayed_free.pf[1].zapped);
+
 	for (i = 0; i < ARRAY_SIZE(lock_classes); i++) {
+		list_add_tail(&lock_classes[i].lock_entry, &free_lock_classes);
 		INIT_LIST_HEAD(&lock_classes[i].locks_after);
 		INIT_LIST_HEAD(&lock_classes[i].locks_before);
 	}
@@ -802,11 +837,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 
 	init_data_structures_once();
 
-	/*
-	 * Allocate a new key from the static array, and add it to
-	 * the hash:
-	 */
-	if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
+	/* Allocate a new lock class and add it to the hash. */
+	class = list_first_entry_or_null(&free_lock_classes, typeof(*class),
+					 lock_entry);
+	if (!class) {
 		if (!debug_locks_off_graph_unlock()) {
 			return NULL;
 		}
@@ -815,7 +849,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 		dump_stack();
 		return NULL;
 	}
-	class = lock_classes + nr_lock_classes++;
+	nr_lock_classes++;
 	debug_atomic_inc(nr_unused_locks);
 	class->key = key;
 	class->name = lock->name;
@@ -829,9 +863,10 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	 */
 	hlist_add_head_rcu(&class->hash_entry, hash_head);
 	/*
-	 * Add it to the global list of classes:
+	 * Remove the class from the free list and add it to the global list
+	 * of classes.
 	 */
-	list_add_tail(&class->lock_entry, &all_lock_classes);
+	list_move_tail(&class->lock_entry, &all_lock_classes);
 
 	if (verbose(class)) {
 		graph_unlock();
@@ -1860,6 +1895,24 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	struct lock_list this;
 	int ret;
 
+	if (!hlock_class(prev)->key || !hlock_class(next)->key) {
+		/*
+		 * The warning statements below may trigger a use-after-free
+		 * of the class name. It is better to trigger a use-after free
+		 * and to have the class name most of the time instead of not
+		 * having the class name available.
+		 */
+		WARN_ONCE(!debug_locks_silent && !hlock_class(prev)->key,
+			  "Detected use-after-free of lock class %px/%s\n",
+			  hlock_class(prev),
+			  hlock_class(prev)->name);
+		WARN_ONCE(!debug_locks_silent && !hlock_class(next)->key,
+			  "Detected use-after-free of lock class %px/%s\n",
+			  hlock_class(next),
+			  hlock_class(next)->name);
+		return 2;
+	}
+
 	/*
 	 * Prove that the new <prev> -> <next> dependency would not
 	 * create a circular dependency in the graph. (We do this by
@@ -2242,19 +2295,16 @@ static inline int add_chain_cache(struct task_struct *curr,
 }
 
 /*
- * Look up a dependency chain.
+ * Look up a dependency chain. Must be called with either the graph lock or
+ * the RCU read lock held.
  */
 static inline struct lock_chain *lookup_chain_cache(u64 chain_key)
 {
 	struct hlist_head *hash_head = chainhashentry(chain_key);
 	struct lock_chain *chain;
 
-	/*
-	 * We can walk it lock-free, because entries only get added
-	 * to the hash:
-	 */
 	hlist_for_each_entry_rcu(chain, hash_head, entry) {
-		if (chain->chain_key == chain_key) {
+		if (READ_ONCE(chain->chain_key) == chain_key) {
 			debug_atomic_inc(chain_lookup_hits);
 			return chain;
 		}
@@ -3337,6 +3387,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (nest_lock && !__lock_is_held(nest_lock, -1))
 		return print_lock_nested_lock_not_held(curr, hlock, ip);
 
+	if (!debug_locks_silent) {
+		WARN_ON_ONCE(depth && !hlock_class(hlock - 1)->key);
+		WARN_ON_ONCE(!hlock_class(hlock)->key);
+	}
+
 	if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
 		return 0;
 
@@ -4131,14 +4186,92 @@ void lockdep_reset(void)
 	raw_local_irq_restore(flags);
 }
 
+/* Remove a class from a lock chain. Must be called with the graph lock held. */
+static void remove_class_from_lock_chain(struct lock_chain *chain,
+					 struct lock_class *class)
+{
+#ifdef CONFIG_PROVE_LOCKING
+	struct lock_chain *new_chain;
+	u64 chain_key;
+	int i;
+
+	for (i = chain->base; i < chain->base + chain->depth; i++) {
+		if (chain_hlocks[i] != class - lock_classes)
+			continue;
+		/* The code below leaks one chain_hlock[] entry. */
+		if (--chain->depth > 0)
+			memmove(&chain_hlocks[i], &chain_hlocks[i + 1],
+				(chain->base + chain->depth - i) *
+				sizeof(chain_hlocks[0]));
+		/*
+		 * Each lock class occurs at most once in a lock chain so once
+		 * we found a match we can break out of this loop.
+		 */
+		goto recalc;
+	}
+	/* Since the chain has not been modified, return. */
+	return;
+
+recalc:
+	chain_key = 0;
+	for (i = chain->base; i < chain->base + chain->depth; i++)
+		chain_key = iterate_chain_key(chain_key, chain_hlocks[i] + 1);
+	if (chain->depth && chain->chain_key == chain_key)
+		return;
+	/* Overwrite the chain key for concurrent RCU readers. */
+	WRITE_ONCE(chain->chain_key, chain_key);
+	/*
+	 * Note: calling hlist_del_rcu() from inside a
+	 * hlist_for_each_entry_rcu() loop is safe.
+	 */
+	hlist_del_rcu(&chain->entry);
+	if (chain->depth == 0)
+		return;
+	/*
+	 * If the modified lock chain matches an existing lock chain, drop
+	 * the modified lock chain.
+	 */
+	if (lookup_chain_cache(chain_key))
+		return;
+	if (WARN_ON_ONCE(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
+		debug_locks_off();
+		return;
+	}
+	/*
+	 * Leak *chain because it is not safe to reinsert it before an RCU
+	 * grace period has expired.
+	 */
+	new_chain = lock_chains + nr_lock_chains++;
+	*new_chain = *chain;
+	hlist_add_head_rcu(&new_chain->entry, chainhashentry(chain_key));
+#endif
+}
+
+/* Must be called with the graph lock held. */
+static void remove_class_from_lock_chains(struct lock_class *class)
+{
+	struct lock_chain *chain;
+	struct hlist_head *head;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(chainhash_table); i++) {
+		head = chainhash_table + i;
+		hlist_for_each_entry_rcu(chain, head, entry) {
+			remove_class_from_lock_chain(chain, class);
+		}
+	}
+}
+
 /*
  * Remove all references to a lock class. The caller must hold the graph lock.
  */
-static void zap_class(struct lock_class *class)
+static void zap_class(struct pending_free *pf, struct lock_class *class)
 {
 	struct lock_list *entry;
 	int i;
 
+	WARN_ON_ONCE(!class->key);
+
 	/*
 	 * Remove all dependencies this lock is
 	 * involved in:
@@ -4151,14 +4284,33 @@ static void zap_class(struct lock_class *class)
 		WRITE_ONCE(entry->class, NULL);
 		WRITE_ONCE(entry->links_to, NULL);
 	}
-	/*
-	 * Unhash the class and remove it from the all_lock_classes list:
-	 */
-	hlist_del_rcu(&class->hash_entry);
-	list_del(&class->lock_entry);
+	if (list_empty(&class->locks_after) &&
+	    list_empty(&class->locks_before)) {
+		list_move_tail(&class->lock_entry, &pf->zapped);
+		hlist_del_rcu(&class->hash_entry);
+		WRITE_ONCE(class->key, NULL);
+		WRITE_ONCE(class->name, NULL);
+		nr_lock_classes--;
+	} else {
+		WARN_ONCE(true, "%s() failed for class %s\n", __func__,
+			  class->name);
+	}
 
-	RCU_INIT_POINTER(class->key, NULL);
-	RCU_INIT_POINTER(class->name, NULL);
+	remove_class_from_lock_chains(class);
+}
+
+static void reinit_class(struct lock_class *class)
+{
+	void *const p = class;
+	const unsigned int offset = offsetof(struct lock_class, key);
+
+	WARN_ON_ONCE(!class->lock_entry.next);
+	WARN_ON_ONCE(!list_empty(&class->locks_after));
+	WARN_ON_ONCE(!list_empty(&class->locks_before));
+	memset(p + offset, 0, sizeof(*class) - offset);
+	WARN_ON_ONCE(!class->lock_entry.next);
+	WARN_ON_ONCE(!list_empty(&class->locks_after));
+	WARN_ON_ONCE(!list_empty(&class->locks_before));
 }
 
 static inline int within(const void *addr, void *start, unsigned long size)
@@ -4166,7 +4318,87 @@ static inline int within(const void *addr, void *start, unsigned long size)
 	return addr >= start && addr < start + size;
 }
 
-static void __lockdep_free_key_range(void *start, unsigned long size)
+static bool inside_selftest(void)
+{
+	return current == lockdep_selftest_task_struct;
+}
+
+/* The caller must hold the graph lock. */
+static struct pending_free *get_pending_free(void)
+{
+	return delayed_free.pf + delayed_free.index;
+}
+
+static void free_zapped_rcu(struct rcu_head *cb);
+
+/*
+ * Schedule an RCU callback if no RCU callback is pending. Must be called with
+ * the graph lock held.
+ */
+static void call_rcu_zapped(struct pending_free *pf)
+{
+	WARN_ON_ONCE(inside_selftest());
+
+	if (list_empty(&pf->zapped))
+		return;
+
+	if (delayed_free.scheduled)
+		return;
+
+	delayed_free.scheduled = true;
+
+	WARN_ON_ONCE(delayed_free.pf + delayed_free.index != pf);
+	delayed_free.index ^= 1;
+
+	call_rcu(&delayed_free.rcu_head, free_zapped_rcu);
+}
+
+/* The caller must hold the graph lock. May be called from RCU context. */
+static void __free_zapped_classes(struct pending_free *pf)
+{
+	struct lock_class *class;
+
+	list_for_each_entry(class, &pf->zapped, lock_entry)
+		reinit_class(class);
+
+	list_splice_init(&pf->zapped, &free_lock_classes);
+}
+
+static void free_zapped_rcu(struct rcu_head *ch)
+{
+	struct pending_free *pf;
+	unsigned long flags;
+
+	if (WARN_ON_ONCE(ch != &delayed_free.rcu_head))
+		return;
+
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
+	/* closed head */
+	pf = delayed_free.pf + (delayed_free.index ^ 1);
+	__free_zapped_classes(pf);
+	delayed_free.scheduled = false;
+
+	/*
+	 * If there's anything on the open list, close and start a new callback.
+	 */
+	call_rcu_zapped(delayed_free.pf + delayed_free.index);
+
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
+}
+
+/*
+ * Remove all lock classes from the class hash table and from the
+ * all_lock_classes list whose key or name is in the address range [start,
+ * start + size). Move these lock classes to the zapped_classes list. Must
+ * be called with the graph lock held.
+ */
+static void __lockdep_free_key_range(struct pending_free *pf, void *start,
+				     unsigned long size)
 {
 	struct lock_class *class;
 	struct hlist_head *head;
@@ -4179,7 +4411,7 @@ static void __lockdep_free_key_range(void *start, unsigned long size)
 			if (!within(class->key, start, size) &&
 			    !within(class->name, start, size))
 				continue;
-			zap_class(class);
+			zap_class(pf, class);
 		}
 	}
 }
@@ -4192,8 +4424,9 @@ static void __lockdep_free_key_range(void *start, unsigned long size)
  * guaranteed nobody will look up these exact classes -- they're properly dead
  * but still allocated.
  */
-void lockdep_free_key_range(void *start, unsigned long size)
+static void lockdep_free_key_range_reg(void *start, unsigned long size)
 {
+	struct pending_free *pf;
 	unsigned long flags;
 	int locked;
 
@@ -4201,9 +4434,15 @@ void lockdep_free_key_range(void *start, unsigned long size)
 
 	raw_local_irq_save(flags);
 	locked = graph_lock();
-	__lockdep_free_key_range(start, size);
-	if (locked)
-		graph_unlock();
+	if (!locked)
+		goto out_irq;
+
+	pf = get_pending_free();
+	__lockdep_free_key_range(pf, start, size);
+	call_rcu_zapped(pf);
+
+	graph_unlock();
+out_irq:
 	raw_local_irq_restore(flags);
 
 	/*
@@ -4211,12 +4450,35 @@ void lockdep_free_key_range(void *start, unsigned long size)
 	 * before continuing to free the memory they refer to.
 	 */
 	synchronize_rcu();
+}
 
-	/*
-	 * XXX at this point we could return the resources to the pool;
-	 * instead we leak them. We would need to change to bitmap allocators
-	 * instead of the linear allocators we have now.
-	 */
+/*
+ * Free all lockdep keys in the range [start, start+size). Does not sleep.
+ * Ignores debug_locks. Must only be used by the lockdep selftests.
+ */
+static void lockdep_free_key_range_imm(void *start, unsigned long size)
+{
+	struct pending_free *pf = delayed_free.pf;
+	unsigned long flags;
+
+	init_data_structures_once();
+
+	raw_local_irq_save(flags);
+	arch_spin_lock(&lockdep_lock);
+	__lockdep_free_key_range(pf, start, size);
+	__free_zapped_classes(pf);
+	arch_spin_unlock(&lockdep_lock);
+	raw_local_irq_restore(flags);
+}
+
+void lockdep_free_key_range(void *start, unsigned long size)
+{
+	init_data_structures_once();
+
+	if (inside_selftest())
+		lockdep_free_key_range_imm(start, size);
+	else
+		lockdep_free_key_range_reg(start, size);
 }
 
 /*
@@ -4242,7 +4504,8 @@ static bool lock_class_cache_is_registered(struct lockdep_map *lock)
 }
 
 /* The caller must hold the graph lock. Does not sleep. */
-static void __lockdep_reset_lock(struct lockdep_map *lock)
+static void __lockdep_reset_lock(struct pending_free *pf,
+				 struct lockdep_map *lock)
 {
 	struct lock_class *class;
 	int j;
@@ -4256,7 +4519,7 @@ static void __lockdep_reset_lock(struct lockdep_map *lock)
 		 */
 		class = look_up_lock_class(lock, j);
 		if (class)
-			zap_class(class);
+			zap_class(pf, class);
 	}
 	/*
 	 * Debug check: in the end all mapped classes should
@@ -4266,21 +4529,57 @@ static void __lockdep_reset_lock(struct lockdep_map *lock)
 		debug_locks_off();
 }
 
-void lockdep_reset_lock(struct lockdep_map *lock)
+/*
+ * Remove all information lockdep has about a lock if debug_locks == 1. Free
+ * released data structures from RCU context.
+ */
+static void lockdep_reset_lock_reg(struct lockdep_map *lock)
 {
+	struct pending_free *pf;
 	unsigned long flags;
 	int locked;
 
-	init_data_structures_once();
-
 	raw_local_irq_save(flags);
 	locked = graph_lock();
-	__lockdep_reset_lock(lock);
-	if (locked)
-		graph_unlock();
+	if (!locked)
+		goto out_irq;
+
+	pf = get_pending_free();
+	__lockdep_reset_lock(pf, lock);
+	call_rcu_zapped(pf);
+
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
+}
+
+/*
+ * Reset a lock. Does not sleep. Ignores debug_locks. Must only be used by the
+ * lockdep selftests.
+ */
+static void lockdep_reset_lock_imm(struct lockdep_map *lock)
+{
+	struct pending_free *pf = delayed_free.pf;
+	unsigned long flags;
+
+	raw_local_irq_save(flags);
+	arch_spin_lock(&lockdep_lock);
+	__lockdep_reset_lock(pf, lock);
+	__free_zapped_classes(pf);
+	arch_spin_unlock(&lockdep_lock);
 	raw_local_irq_restore(flags);
 }
 
+void lockdep_reset_lock(struct lockdep_map *lock)
+{
+	init_data_structures_once();
+
+	if (inside_selftest())
+		lockdep_reset_lock_imm(lock);
+	else
+		lockdep_reset_lock_reg(lock);
+}
+
 void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4297,7 +4596,8 @@ void __init lockdep_init(void)
 	       (sizeof(lock_classes) +
 		sizeof(classhash_table) +
 		sizeof(list_entries) +
-		sizeof(chainhash_table)
+		sizeof(chainhash_table) +
+		sizeof(delayed_free)
 #ifdef CONFIG_PROVE_LOCKING
 		+ sizeof(lock_cq)
 		+ sizeof(lock_chains)
-- 
cgit v1.2.3


From 108c14858b9ea224686e476c8f5ec345a0df9e27 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:53 -0800
Subject: locking/lockdep: Add support for dynamic keys

A shortcoming of the current lockdep implementation is that it requires
lock keys to be allocated statically. That forces all instances of lock
objects that occur in a given data structure to share a lock key. Since
lock dependency analysis groups lock objects per key sharing lock keys
can cause false positive lockdep reports. Make it possible to avoid
such false positive reports by allowing lock keys to be allocated
dynamically. Require that dynamically allocated lock keys are
registered before use by calling lockdep_register_key(). Complain about
attempts to register the same lock key pointer twice without calling
lockdep_unregister_key() between successive registration calls.

The purpose of the new lock_keys_hash[] data structure that keeps
track of all dynamic keys is twofold:

  - Verify whether the lockdep_register_key() and lockdep_unregister_key()
    functions are used correctly.

  - Avoid that lockdep_init_map() complains when encountering a dynamically
    allocated key.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: johannes.berg@intel.com
Cc: tj@kernel.org
Link: https://lkml.kernel.org/r/20190214230058.196511-19-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  |  21 ++++++--
 kernel/locking/lockdep.c | 121 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 131 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 619ec3f26cdc..43fb35bd7baf 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -46,15 +46,19 @@ extern int lock_stat;
 #define NR_LOCKDEP_CACHING_CLASSES	2
 
 /*
- * Lock-classes are keyed via unique addresses, by embedding the
- * lockclass-key into the kernel (or module) .data section. (For
- * static locks we use the lock address itself as the key.)
+ * A lockdep key is associated with each lock object. For static locks we use
+ * the lock address itself as the key. Dynamically allocated lock objects can
+ * have a statically or dynamically allocated key. Dynamically allocated lock
+ * keys must be registered before being used and must be unregistered before
+ * the key memory is freed.
  */
 struct lockdep_subclass_key {
 	char __one_byte;
 } __attribute__ ((__packed__));
 
+/* hash_entry is used to keep track of dynamically allocated keys. */
 struct lock_class_key {
+	struct hlist_node		hash_entry;
 	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
@@ -273,6 +277,9 @@ extern void lockdep_set_selftest_task(struct task_struct *task);
 extern void lockdep_off(void);
 extern void lockdep_on(void);
 
+extern void lockdep_register_key(struct lock_class_key *key);
+extern void lockdep_unregister_key(struct lock_class_key *key);
+
 /*
  * These methods are used by specific locking variants (spinlocks,
  * rwlocks, mutexes and rwsems) to pass init/acquire/release events
@@ -434,6 +441,14 @@ static inline void lockdep_set_selftest_task(struct task_struct *task)
  */
 struct lock_class_key { };
 
+static inline void lockdep_register_key(struct lock_class_key *key)
+{
+}
+
+static inline void lockdep_unregister_key(struct lock_class_key *key)
+{
+}
+
 /*
  * The lockdep_map takes no space if lockdep is disabled:
  */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 84427441824e..c73bc4334bee 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -143,6 +143,9 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
  * nr_lock_classes is the number of elements of lock_classes[] that is
  * in use.
  */
+#define KEYHASH_BITS		(MAX_LOCKDEP_KEYS_BITS - 1)
+#define KEYHASH_SIZE		(1UL << KEYHASH_BITS)
+static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
 unsigned long nr_lock_classes;
 #ifndef CONFIG_DEBUG_LOCKDEP
 static
@@ -641,7 +644,7 @@ static int very_verbose(struct lock_class *class)
  * Is this the address of a static object:
  */
 #ifdef __KERNEL__
-static int static_obj(void *obj)
+static int static_obj(const void *obj)
 {
 	unsigned long start = (unsigned long) &_stext,
 		      end   = (unsigned long) &_end,
@@ -975,6 +978,71 @@ static void init_data_structures_once(void)
 	}
 }
 
+static inline struct hlist_head *keyhashentry(const struct lock_class_key *key)
+{
+	unsigned long hash = hash_long((uintptr_t)key, KEYHASH_BITS);
+
+	return lock_keys_hash + hash;
+}
+
+/* Register a dynamically allocated key. */
+void lockdep_register_key(struct lock_class_key *key)
+{
+	struct hlist_head *hash_head;
+	struct lock_class_key *k;
+	unsigned long flags;
+
+	if (WARN_ON_ONCE(static_obj(key)))
+		return;
+	hash_head = keyhashentry(key);
+
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto restore_irqs;
+	hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+		if (WARN_ON_ONCE(k == key))
+			goto out_unlock;
+	}
+	hlist_add_head_rcu(&key->hash_entry, hash_head);
+out_unlock:
+	graph_unlock();
+restore_irqs:
+	raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lockdep_register_key);
+
+/* Check whether a key has been registered as a dynamic key. */
+static bool is_dynamic_key(const struct lock_class_key *key)
+{
+	struct hlist_head *hash_head;
+	struct lock_class_key *k;
+	bool found = false;
+
+	if (WARN_ON_ONCE(static_obj(key)))
+		return false;
+
+	/*
+	 * If lock debugging is disabled lock_keys_hash[] may contain
+	 * pointers to memory that has already been freed. Avoid triggering
+	 * a use-after-free in that case by returning early.
+	 */
+	if (!debug_locks)
+		return true;
+
+	hash_head = keyhashentry(key);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+		if (k == key) {
+			found = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return found;
+}
+
 /*
  * Register a lock's class in the hash-table, if the class is not present
  * yet. Otherwise we look it up. We cache the result in the lock object
@@ -996,7 +1064,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 	if (!lock->key) {
 		if (!assign_lock_key(lock))
 			return NULL;
-	} else if (!static_obj(lock->key)) {
+	} else if (!static_obj(lock->key) && !is_dynamic_key(lock->key)) {
 		return NULL;
 	}
 
@@ -3378,13 +3446,12 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 	if (DEBUG_LOCKS_WARN_ON(!key))
 		return;
 	/*
-	 * Sanity check, the lock-class key must be persistent:
+	 * Sanity check, the lock-class key must either have been allocated
+	 * statically or must have been registered as a dynamic key.
 	 */
-	if (!static_obj(key)) {
-		printk("BUG: key %px not in .data!\n", key);
-		/*
-		 * What it says above ^^^^^, I suggest you read it.
-		 */
+	if (!static_obj(key) && !is_dynamic_key(key)) {
+		if (debug_locks)
+			printk(KERN_ERR "BUG: key %px has not been registered!\n", key);
 		DEBUG_LOCKS_WARN_ON(1);
 		return;
 	}
@@ -4795,6 +4862,44 @@ void lockdep_reset_lock(struct lockdep_map *lock)
 		lockdep_reset_lock_reg(lock);
 }
 
+/* Unregister a dynamically allocated key. */
+void lockdep_unregister_key(struct lock_class_key *key)
+{
+	struct hlist_head *hash_head = keyhashentry(key);
+	struct lock_class_key *k;
+	struct pending_free *pf;
+	unsigned long flags;
+	bool found = false;
+
+	might_sleep();
+
+	if (WARN_ON_ONCE(static_obj(key)))
+		return;
+
+	raw_local_irq_save(flags);
+	if (!graph_lock())
+		goto out_irq;
+
+	pf = get_pending_free();
+	hlist_for_each_entry_rcu(k, hash_head, hash_entry) {
+		if (k == key) {
+			hlist_del_rcu(&k->hash_entry);
+			found = true;
+			break;
+		}
+	}
+	WARN_ON_ONCE(!found);
+	__lockdep_free_key_range(pf, key, 1);
+	call_rcu_zapped(pf);
+	graph_unlock();
+out_irq:
+	raw_local_irq_restore(flags);
+
+	/* Wait until is_dynamic_key() has finished accessing k->hash_entry. */
+	synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(lockdep_unregister_key);
+
 void __init lockdep_init(void)
 {
 	printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
-- 
cgit v1.2.3


From 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 14 Feb 2019 15:00:54 -0800
Subject: kernel/workqueue: Use dynamic lockdep keys for workqueues

The following commit:

  87915adc3f0a ("workqueue: re-add lockdep dependencies for flushing")

improved deadlock checking in the workqueue implementation. Unfortunately
that patch also introduced a few false positive lockdep complaints.

This patch suppresses these false positives by allocating the workqueue mutex
lockdep key dynamically.

An example of a false positive lockdep complaint suppressed by this patch
can be found below. The root cause of the lockdep complaint shown below
is that the direct I/O code can call alloc_workqueue() from inside a work
item created by another alloc_workqueue() call and that both workqueues
share the same lockdep key. This patch avoids that that lockdep complaint
is triggered by allocating the work queue lockdep keys dynamically.

In other words, this patch guarantees that a unique lockdep key is
associated with each work queue mutex.

  ======================================================
  WARNING: possible circular locking dependency detected
  4.19.0-dbg+ #1 Not tainted
  fio/4129 is trying to acquire lock:
  00000000a01cfe1a ((wq_completion)"dio/%s"sb->s_id){+.+.}, at: flush_workqueue+0xd0/0x970

  but task is already holding lock:
  00000000a0acecf9 (&sb->s_type->i_mutex_key#14){+.+.}, at: ext4_file_write_iter+0x154/0x710

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #2 (&sb->s_type->i_mutex_key#14){+.+.}:
         down_write+0x3d/0x80
         __generic_file_fsync+0x77/0xf0
         ext4_sync_file+0x3c9/0x780
         vfs_fsync_range+0x66/0x100
         dio_complete+0x2f5/0x360
         dio_aio_complete_work+0x1c/0x20
         process_one_work+0x481/0x9f0
         worker_thread+0x63/0x5a0
         kthread+0x1cf/0x1f0
         ret_from_fork+0x24/0x30

  -> #1 ((work_completion)(&dio->complete_work)){+.+.}:
         process_one_work+0x447/0x9f0
         worker_thread+0x63/0x5a0
         kthread+0x1cf/0x1f0
         ret_from_fork+0x24/0x30

  -> #0 ((wq_completion)"dio/%s"sb->s_id){+.+.}:
         lock_acquire+0xc5/0x200
         flush_workqueue+0xf3/0x970
         drain_workqueue+0xec/0x220
         destroy_workqueue+0x23/0x350
         sb_init_dio_done_wq+0x6a/0x80
         do_blockdev_direct_IO+0x1f33/0x4be0
         __blockdev_direct_IO+0x79/0x86
         ext4_direct_IO+0x5df/0xbb0
         generic_file_direct_write+0x119/0x220
         __generic_file_write_iter+0x131/0x2d0
         ext4_file_write_iter+0x3fa/0x710
         aio_write+0x235/0x330
         io_submit_one+0x510/0xeb0
         __x64_sys_io_submit+0x122/0x340
         do_syscall_64+0x71/0x220
         entry_SYSCALL_64_after_hwframe+0x49/0xbe

  other info that might help us debug this:

  Chain exists of:
    (wq_completion)"dio/%s"sb->s_id --> (work_completion)(&dio->complete_work) --> &sb->s_type->i_mutex_key#14

   Possible unsafe locking scenario:

         CPU0                    CPU1
         ----                    ----
    lock(&sb->s_type->i_mutex_key#14);
                                 lock((work_completion)(&dio->complete_work));
                                 lock(&sb->s_type->i_mutex_key#14);
    lock((wq_completion)"dio/%s"sb->s_id);

   *** DEADLOCK ***

  1 lock held by fio/4129:
   #0: 00000000a0acecf9 (&sb->s_type->i_mutex_key#14){+.+.}, at: ext4_file_write_iter+0x154/0x710

  stack backtrace:
  CPU: 3 PID: 4129 Comm: fio Not tainted 4.19.0-dbg+ #1
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
  Call Trace:
   dump_stack+0x86/0xc5
   print_circular_bug.isra.32+0x20a/0x218
   __lock_acquire+0x1c68/0x1cf0
   lock_acquire+0xc5/0x200
   flush_workqueue+0xf3/0x970
   drain_workqueue+0xec/0x220
   destroy_workqueue+0x23/0x350
   sb_init_dio_done_wq+0x6a/0x80
   do_blockdev_direct_IO+0x1f33/0x4be0
   __blockdev_direct_IO+0x79/0x86
   ext4_direct_IO+0x5df/0xbb0
   generic_file_direct_write+0x119/0x220
   __generic_file_write_iter+0x131/0x2d0
   ext4_file_write_iter+0x3fa/0x710
   aio_write+0x235/0x330
   io_submit_one+0x510/0xeb0
   __x64_sys_io_submit+0x122/0x340
   do_syscall_64+0x71/0x220
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190214230058.196511-20-bvanassche@acm.org
[ Reworked the changelog a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/workqueue.h | 28 ++++------------------
 kernel/workqueue.c        | 59 +++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 54 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..d9a1a480e920 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -390,43 +390,23 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-extern struct workqueue_struct *
-__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
-	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-
 /**
  * alloc_workqueue - allocate a workqueue
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags
  * @max_active: max in-flight work items, 0 for default
- * @args...: args for @fmt
+ * remaining args: args for @fmt
  *
  * Allocate a workqueue with the specified parameters.  For detailed
  * information on WQ_* flags, please refer to
  * Documentation/core-api/workqueue.rst.
  *
- * The __lock_name macro dance is to guarantee that single lock_class_key
- * doesn't end up with different namesm, which isn't allowed by lockdep.
- *
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-#ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(fmt, flags, max_active, args...)		\
-({									\
-	static struct lock_class_key __key;				\
-	const char *__lock_name;					\
-									\
-	__lock_name = "(wq_completion)"#fmt#args;			\
-									\
-	__alloc_workqueue_key((fmt), (flags), (max_active),		\
-			      &__key, __lock_name, ##args);		\
-})
-#else
-#define alloc_workqueue(fmt, flags, max_active, args...)		\
-	__alloc_workqueue_key((fmt), (flags), (max_active),		\
-			      NULL, NULL, ##args)
-#endif
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+					 unsigned int flags,
+					 int max_active, ...);
 
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index fc5d23d752a5..e163e7a7f5e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -259,6 +259,8 @@ struct workqueue_struct {
 	struct wq_device	*wq_dev;	/* I: for sysfs interface */
 #endif
 #ifdef CONFIG_LOCKDEP
+	char			*lock_name;
+	struct lock_class_key	key;
 	struct lockdep_map	lockdep_map;
 #endif
 	char			name[WQ_NAME_LEN]; /* I: workqueue name */
@@ -3337,11 +3339,49 @@ static int init_worker_pool(struct worker_pool *pool)
 	return 0;
 }
 
+#ifdef CONFIG_LOCKDEP
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+	char *lock_name;
+
+	lockdep_register_key(&wq->key);
+	lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
+	if (!lock_name)
+		lock_name = wq->name;
+	lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+	lockdep_unregister_key(&wq->key);
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+	if (wq->lock_name != wq->name)
+		kfree(wq->lock_name);
+}
+#else
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+}
+#endif
+
 static void rcu_free_wq(struct rcu_head *rcu)
 {
 	struct workqueue_struct *wq =
 		container_of(rcu, struct workqueue_struct, rcu);
 
+	wq_free_lockdep(wq);
+
 	if (!(wq->flags & WQ_UNBOUND))
 		free_percpu(wq->cpu_pwqs);
 	else
@@ -3532,8 +3572,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 	 * If we're the last pwq going away, @wq is already dead and no one
 	 * is gonna access it anymore.  Schedule RCU free.
 	 */
-	if (is_last)
+	if (is_last) {
+		wq_unregister_lockdep(wq);
 		call_rcu(&wq->rcu, rcu_free_wq);
+	}
 }
 
 /**
@@ -4067,11 +4109,9 @@ static int init_rescuer(struct workqueue_struct *wq)
 	return 0;
 }
 
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
-					       unsigned int flags,
-					       int max_active,
-					       struct lock_class_key *key,
-					       const char *lock_name, ...)
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+					 unsigned int flags,
+					 int max_active, ...)
 {
 	size_t tbl_size = 0;
 	va_list args;
@@ -4106,7 +4146,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 			goto err_free_wq;
 	}
 
-	va_start(args, lock_name);
+	va_start(args, max_active);
 	vsnprintf(wq->name, sizeof(wq->name), fmt, args);
 	va_end(args);
 
@@ -4123,7 +4163,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 	INIT_LIST_HEAD(&wq->maydays);
 
-	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
+	wq_init_lockdep(wq);
 	INIT_LIST_HEAD(&wq->list);
 
 	if (alloc_and_link_pwqs(wq) < 0)
@@ -4161,7 +4201,7 @@ err_destroy:
 	destroy_workqueue(wq);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
+EXPORT_SYMBOL_GPL(alloc_workqueue);
 
 /**
  * destroy_workqueue - safely terminate a workqueue
@@ -4214,6 +4254,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kthread_stop(wq->rescuer->task);
 
 	if (!(wq->flags & WQ_UNBOUND)) {
+		wq_unregister_lockdep(wq);
 		/*
 		 * The base ref is never dropped on per-cpu pwqs.  Directly
 		 * schedule RCU free.
-- 
cgit v1.2.3


From 28d49e282665e2a51cc91b716937fccfa24d80e1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 26 Feb 2019 18:19:09 +0100
Subject: locking/lockdep: Shrink struct lock_class_key

Shrink struct lock_class_key; we never store anything in subkeys[], we
only use the addresses.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 43fb35bd7baf..79c3873d58ac 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -58,8 +58,10 @@ struct lockdep_subclass_key {
 
 /* hash_entry is used to keep track of dynamically allocated keys. */
 struct lock_class_key {
-	struct hlist_node		hash_entry;
-	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
+	union {
+		struct hlist_node		hash_entry;
+		struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
+	};
 };
 
 extern struct lock_class_key __lockdep_no_validate__;
-- 
cgit v1.2.3


From bc47e2f6f9e261ea07c678c3cad76eb5590c0fea Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Tue, 26 Feb 2019 17:10:24 +0200
Subject: mmc: core: Add discard support to sd

SD spec v5.1 adds discard support. The flows and commands are similar to
mmc, so just set the discard arg in CMD38.

A host which supports DISCARD shall check if the DISCARD_SUPPORT (b313)
is set in the SD_STATUS register.  If the card does not support discard,
the host shall not issue DISCARD command, but ERASE command instead.

Post the DISCARD operation, the card may de-allocate the discarded
blocks partially or completely. So the host mustn't make any assumptions
concerning the content of the discarded region. This is unlike ERASE
command, in which the region is guaranteed to contain either '0's or
'1's, depends on the content of DATA_STAT_AFTER_ERASE (b55) in the scr
register.

One more important difference compared to ERASE is the busy timeout
which we will address on the next patch.

Signed-off-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c |  8 ++++----
 drivers/mmc/core/sd.c   | 10 +++++++++-
 include/linux/mmc/sd.h  |  1 +
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b45aaa904107..681b089f669a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1847,7 +1847,7 @@ static unsigned int mmc_align_erase_size(struct mmc_card *card,
  * @card: card to erase
  * @from: first sector to erase
  * @nr: number of sectors to erase
- * @arg: erase command argument (SD supports only %SD_ERASE_ARG)
+ * @arg: erase command argument
  *
  * Caller must claim host before calling this function.
  */
@@ -1864,14 +1864,14 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
 	if (!card->erase_size)
 		return -EOPNOTSUPP;
 
-	if (mmc_card_sd(card) && arg != SD_ERASE_ARG)
+	if (mmc_card_sd(card) && arg != SD_ERASE_ARG && arg != SD_DISCARD_ARG)
 		return -EOPNOTSUPP;
 
-	if ((arg & MMC_SECURE_ARGS) &&
+	if (mmc_card_mmc(card) && (arg & MMC_SECURE_ARGS) &&
 	    !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN))
 		return -EOPNOTSUPP;
 
-	if ((arg & MMC_TRIM_ARGS) &&
+	if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) &&
 	    !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN))
 		return -EOPNOTSUPP;
 
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index c2db94dab711..2b4fc2205b53 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -231,6 +231,8 @@ static int mmc_read_ssr(struct mmc_card *card)
 {
 	unsigned int au, es, et, eo;
 	__be32 *raw_ssr;
+	u32 resp[4] = {};
+	u8 discard_support;
 	int i;
 
 	if (!(card->csd.cmdclass & CCC_APP_SPEC)) {
@@ -276,7 +278,13 @@ static int mmc_read_ssr(struct mmc_card *card)
 		}
 	}
 
-	card->erase_arg = SD_ERASE_ARG;
+	/*
+	 * starting SD5.1 discard is supported if DISCARD_SUPPORT (b313) is set
+	 */
+	resp[3] = card->raw_ssr[6];
+	discard_support = UNSTUFF_BITS(resp, 313 - 288, 1);
+	card->erase_arg = (card->scr.sda_specx && discard_support) ?
+			    SD_DISCARD_ARG : SD_ERASE_ARG;
 
 	return 0;
 }
diff --git a/include/linux/mmc/sd.h b/include/linux/mmc/sd.h
index 1a6d10fdf682..ec94a5aa02bb 100644
--- a/include/linux/mmc/sd.h
+++ b/include/linux/mmc/sd.h
@@ -95,5 +95,6 @@
  * Erase/discard
  */
 #define SD_ERASE_ARG			0x00000000
+#define SD_DISCARD_ARG			0x00000001
 
 #endif /* LINUX_MMC_SD_H */
-- 
cgit v1.2.3


From 31d921c7fb9691722ba9503b64153cdc322a7fa8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:24 +0000
Subject: vfs: Add configuration parser helpers

Because the new API passes in key,value parameters, match_token() cannot be
used with it.  Instead, provide three new helpers to aid with parsing:

 (1) fs_parse().  This takes a parameter and a simple static description of
     all the parameters and maps the key name to an ID.  It returns 1 on a
     match, 0 on no match if unknowns should be ignored and some other
     negative error code on a parse error.

     The parameter description includes a list of key names to IDs, desired
     parameter types and a list of enumeration name -> ID mappings.

     [!] Note that for the moment I've required that the key->ID mapping
     array is expected to be sorted and unterminated.  The size of the
     array is noted in the fsconfig_parser struct.  This allows me to use
     bsearch(), but I'm not sure any performance gain is worth the hassle
     of requiring people to keep the array sorted.

     The parameter type array is sized according to the number of parameter
     IDs and is indexed directly.  The optional enum mapping array is an
     unterminated, unsorted list and the size goes into the fsconfig_parser
     struct.

     The function can do some additional things:

	(a) If it's not ambiguous and no value is given, the prefix "no" on
	    a key name is permitted to indicate that the parameter should
	    be considered negatory.

	(b) If the desired type is a single simple integer, it will perform
	    an appropriate conversion and store the result in a union in
	    the parse result.

	(c) If the desired type is an enumeration, {key ID, name} will be
	    looked up in the enumeration list and the matching value will
	    be stored in the parse result union.

	(d) Optionally generate an error if the key is unrecognised.

     This is called something like:

	enum rdt_param {
		Opt_cdp,
		Opt_cdpl2,
		Opt_mba_mpbs,
		nr__rdt_params
	};

	const struct fs_parameter_spec rdt_param_specs[nr__rdt_params] = {
		[Opt_cdp]	= { fs_param_is_bool },
		[Opt_cdpl2]	= { fs_param_is_bool },
		[Opt_mba_mpbs]	= { fs_param_is_bool },
	};

	const const char *const rdt_param_keys[nr__rdt_params] = {
		[Opt_cdp]	= "cdp",
		[Opt_cdpl2]	= "cdpl2",
		[Opt_mba_mpbs]	= "mba_mbps",
	};

	const struct fs_parameter_description rdt_parser = {
		.name		= "rdt",
		.nr_params	= nr__rdt_params,
		.keys		= rdt_param_keys,
		.specs		= rdt_param_specs,
		.no_source	= true,
	};

	int rdt_parse_param(struct fs_context *fc,
			    struct fs_parameter *param)
	{
		struct fs_parse_result parse;
		struct rdt_fs_context *ctx = rdt_fc2context(fc);
		int ret;

		ret = fs_parse(fc, &rdt_parser, param, &parse);
		if (ret < 0)
			return ret;

		switch (parse.key) {
		case Opt_cdp:
			ctx->enable_cdpl3 = true;
			return 0;
		case Opt_cdpl2:
			ctx->enable_cdpl2 = true;
			return 0;
		case Opt_mba_mpbs:
			ctx->enable_mba_mbps = true;
			return 0;
		}

		return -EINVAL;
	}

 (2) fs_lookup_param().  This takes a { dirfd, path, LOOKUP_EMPTY? } or
     string value and performs an appropriate path lookup to convert it
     into a path object, which it will then return.

     If the desired type was a blockdev, the type of the looked up inode
     will be checked to make sure it is one.

     This can be used like:

	enum foo_param {
		Opt_source,
		nr__foo_params
	};

	const struct fs_parameter_spec foo_param_specs[nr__foo_params] = {
		[Opt_source]	= { fs_param_is_blockdev },
	};

	const char *char foo_param_keys[nr__foo_params] = {
		[Opt_source]	= "source",
	};

	const struct constant_table foo_param_alt_keys[] = {
		{ "device",	Opt_source },
	};

	const struct fs_parameter_description foo_parser = {
		.name		= "foo",
		.nr_params	= nr__foo_params,
		.nr_alt_keys	= ARRAY_SIZE(foo_param_alt_keys),
		.keys		= foo_param_keys,
		.alt_keys	= foo_param_alt_keys,
		.specs		= foo_param_specs,
	};

	int foo_parse_param(struct fs_context *fc,
			    struct fs_parameter *param)
	{
		struct fs_parse_result parse;
		struct foo_fs_context *ctx = foo_fc2context(fc);
		int ret;

		ret = fs_parse(fc, &foo_parser, param, &parse);
		if (ret < 0)
			return ret;

		switch (parse.key) {
		case Opt_source:
			return fs_lookup_param(fc, &foo_parser, param,
					       &parse, &ctx->source);
		default:
			return -EINVAL;
		}
	}

 (3) lookup_constant().  This takes a table of named constants and looks up
     the given name within it.  The table is expected to be sorted such
     that bsearch() be used upon it.

     Possibly I should require the table be terminated and just use a
     for-loop to scan it instead of using bsearch() to reduce hassle.

     Tables look something like:

	static const struct constant_table bool_names[] = {
		{ "0",		false },
		{ "1",		true },
		{ "false",	false },
		{ "no",		false },
		{ "true",	true },
		{ "yes",	true },
	};

     and a lookup is done with something like:

	b = lookup_constant(bool_names, param->string, -1);

Additionally, optional validation routines for the parameter description
are provided that can be enabled at compile time.  A later patch will
invoke these when a filesystem is registered.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Kconfig                 |   7 +
 fs/Makefile                |   2 +-
 fs/fs_parser.c             | 447 +++++++++++++++++++++++++++++++++++++++++++++
 fs/internal.h              |   2 +
 fs/namei.c                 |   4 +-
 include/linux/errno.h      |   1 +
 include/linux/fs_context.h |  29 +++
 include/linux/fs_parser.h  | 151 +++++++++++++++
 8 files changed, 640 insertions(+), 3 deletions(-)
 create mode 100644 fs/fs_parser.c
 create mode 100644 include/linux/fs_parser.h

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index ac474a61be37..25700b152c75 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -8,6 +8,13 @@ menu "File systems"
 config DCACHE_WORD_ACCESS
        bool
 
+config VALIDATE_FS_PARSER
+	bool "Validate filesystem parameter description"
+	default y
+	help
+	  Enable this to perform validation of the parameter description for a
+	  filesystem when it is registered.
+
 if BLOCK
 
 config FS_IOMAP
diff --git a/fs/Makefile b/fs/Makefile
index 5563cf34f7c2..9a0b8003f069 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -13,7 +13,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o d_path.o \
 		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
-		fs_context.o
+		fs_context.o fs_parser.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
new file mode 100644
index 000000000000..842e8f749db6
--- /dev/null
+++ b/fs/fs_parser.c
@@ -0,0 +1,447 @@
+/* Filesystem parameter parser.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/export.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/slab.h>
+#include <linux/security.h>
+#include <linux/namei.h>
+#include "internal.h"
+
+static const struct constant_table bool_names[] = {
+	{ "0",		false },
+	{ "1",		true },
+	{ "false",	false },
+	{ "no",		false },
+	{ "true",	true },
+	{ "yes",	true },
+};
+
+/**
+ * lookup_constant - Look up a constant by name in an ordered table
+ * @tbl: The table of constants to search.
+ * @tbl_size: The size of the table.
+ * @name: The name to look up.
+ * @not_found: The value to return if the name is not found.
+ */
+int __lookup_constant(const struct constant_table *tbl, size_t tbl_size,
+		      const char *name, int not_found)
+{
+	unsigned int i;
+
+	for (i = 0; i < tbl_size; i++)
+		if (strcmp(name, tbl[i].name) == 0)
+			return tbl[i].value;
+
+	return not_found;
+}
+EXPORT_SYMBOL(__lookup_constant);
+
+static const struct fs_parameter_spec *fs_lookup_key(
+	const struct fs_parameter_description *desc,
+	const char *name)
+{
+	const struct fs_parameter_spec *p;
+
+	if (!desc->specs)
+		return NULL;
+
+	for (p = desc->specs; p->name; p++)
+		if (strcmp(p->name, name) == 0)
+			return p;
+
+	return NULL;
+}
+
+/*
+ * fs_parse - Parse a filesystem configuration parameter
+ * @fc: The filesystem context to log errors through.
+ * @desc: The parameter description to use.
+ * @param: The parameter.
+ * @result: Where to place the result of the parse
+ *
+ * Parse a filesystem configuration parameter and attempt a conversion for a
+ * simple parameter for which this is requested.  If successful, the determined
+ * parameter ID is placed into @result->key, the desired type is indicated in
+ * @result->t and any converted value is placed into an appropriate member of
+ * the union in @result.
+ *
+ * The function returns the parameter number if the parameter was matched,
+ * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that
+ * unknown parameters are okay and -EINVAL if there was a conversion issue or
+ * the parameter wasn't recognised and unknowns aren't okay.
+ */
+int fs_parse(struct fs_context *fc,
+	     const struct fs_parameter_description *desc,
+	     struct fs_parameter *param,
+	     struct fs_parse_result *result)
+{
+	const struct fs_parameter_spec *p;
+	const struct fs_parameter_enum *e;
+	int ret = -ENOPARAM, b;
+
+	result->has_value = !!param->string;
+	result->negated = false;
+	result->uint_64 = 0;
+
+	p = fs_lookup_key(desc, param->key);
+	if (!p) {
+		/* If we didn't find something that looks like "noxxx", see if
+		 * "xxx" takes the "no"-form negative - but only if there
+		 * wasn't an value.
+		 */
+		if (result->has_value)
+			goto unknown_parameter;
+		if (param->key[0] != 'n' || param->key[1] != 'o' || !param->key[2])
+			goto unknown_parameter;
+
+		p = fs_lookup_key(desc, param->key + 2);
+		if (!p)
+			goto unknown_parameter;
+		if (!(p->flags & fs_param_neg_with_no))
+			goto unknown_parameter;
+		result->boolean = false;
+		result->negated = true;
+	}
+
+	if (p->flags & fs_param_deprecated)
+		warnf(fc, "%s: Deprecated parameter '%s'",
+		      desc->name, param->key);
+
+	if (result->negated)
+		goto okay;
+
+	/* Certain parameter types only take a string and convert it. */
+	switch (p->type) {
+	case __fs_param_wasnt_defined:
+		return -EINVAL;
+	case fs_param_is_u32:
+	case fs_param_is_u32_octal:
+	case fs_param_is_u32_hex:
+	case fs_param_is_s32:
+	case fs_param_is_u64:
+	case fs_param_is_enum:
+	case fs_param_is_string:
+		if (param->type != fs_value_is_string)
+			goto bad_value;
+		if (!result->has_value) {
+			if (p->flags & fs_param_v_optional)
+				goto okay;
+			goto bad_value;
+		}
+		/* Fall through */
+	default:
+		break;
+	}
+
+	/* Try to turn the type we were given into the type desired by the
+	 * parameter and give an error if we can't.
+	 */
+	switch (p->type) {
+	case fs_param_is_flag:
+		if (param->type != fs_value_is_flag &&
+		    (param->type != fs_value_is_string || result->has_value))
+			return invalf(fc, "%s: Unexpected value for '%s'",
+				      desc->name, param->key);
+		result->boolean = true;
+		goto okay;
+
+	case fs_param_is_bool:
+		switch (param->type) {
+		case fs_value_is_flag:
+			result->boolean = true;
+			goto okay;
+		case fs_value_is_string:
+			if (param->size == 0) {
+				result->boolean = true;
+				goto okay;
+			}
+			b = lookup_constant(bool_names, param->string, -1);
+			if (b == -1)
+				goto bad_value;
+			result->boolean = b;
+			goto okay;
+		default:
+			goto bad_value;
+		}
+
+	case fs_param_is_u32:
+		ret = kstrtouint(param->string, 0, &result->uint_32);
+		goto maybe_okay;
+	case fs_param_is_u32_octal:
+		ret = kstrtouint(param->string, 8, &result->uint_32);
+		goto maybe_okay;
+	case fs_param_is_u32_hex:
+		ret = kstrtouint(param->string, 16, &result->uint_32);
+		goto maybe_okay;
+	case fs_param_is_s32:
+		ret = kstrtoint(param->string, 0, &result->int_32);
+		goto maybe_okay;
+	case fs_param_is_u64:
+		ret = kstrtoull(param->string, 0, &result->uint_64);
+		goto maybe_okay;
+
+	case fs_param_is_enum:
+		for (e = desc->enums; e->name[0]; e++) {
+			if (e->opt == p->opt &&
+			    strcmp(e->name, param->string) == 0) {
+				result->uint_32 = e->value;
+				goto okay;
+			}
+		}
+		goto bad_value;
+
+	case fs_param_is_string:
+		goto okay;
+	case fs_param_is_blob:
+		if (param->type != fs_value_is_blob)
+			goto bad_value;
+		goto okay;
+
+	case fs_param_is_fd: {
+		if (param->type != fs_value_is_file)
+			goto bad_value;
+		goto okay;
+	}
+
+	case fs_param_is_blockdev:
+	case fs_param_is_path:
+		goto okay;
+	default:
+		BUG();
+	}
+
+maybe_okay:
+	if (ret < 0)
+		goto bad_value;
+okay:
+	return p->opt;
+
+bad_value:
+	return invalf(fc, "%s: Bad value for '%s'", desc->name, param->key);
+unknown_parameter:
+	return -ENOPARAM;
+}
+EXPORT_SYMBOL(fs_parse);
+
+/**
+ * fs_lookup_param - Look up a path referred to by a parameter
+ * @fc: The filesystem context to log errors through.
+ * @param: The parameter.
+ * @want_bdev: T if want a blockdev
+ * @_path: The result of the lookup
+ */
+int fs_lookup_param(struct fs_context *fc,
+		    struct fs_parameter *param,
+		    bool want_bdev,
+		    struct path *_path)
+{
+	struct filename *f;
+	unsigned int flags = 0;
+	bool put_f;
+	int ret;
+
+	switch (param->type) {
+	case fs_value_is_string:
+		f = getname_kernel(param->string);
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+		put_f = true;
+		break;
+	case fs_value_is_filename_empty:
+		flags = LOOKUP_EMPTY;
+		/* Fall through */
+	case fs_value_is_filename:
+		f = param->name;
+		put_f = false;
+		break;
+	default:
+		return invalf(fc, "%s: not usable as path", param->key);
+	}
+
+	ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
+	if (ret < 0) {
+		errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
+		goto out;
+	}
+
+	if (want_bdev &&
+	    !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) {
+		path_put(_path);
+		_path->dentry = NULL;
+		_path->mnt = NULL;
+		errorf(fc, "%s: Non-blockdev passed as '%s'",
+		       param->key, f->name);
+		ret = -ENOTBLK;
+	}
+
+out:
+	if (put_f)
+		putname(f);
+	return ret;
+}
+EXPORT_SYMBOL(fs_lookup_param);
+
+#ifdef CONFIG_VALIDATE_FS_PARSER
+/**
+ * validate_constant_table - Validate a constant table
+ * @name: Name to use in reporting
+ * @tbl: The constant table to validate.
+ * @tbl_size: The size of the table.
+ * @low: The lowest permissible value.
+ * @high: The highest permissible value.
+ * @special: One special permissible value outside of the range.
+ */
+bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
+			     int low, int high, int special)
+{
+	size_t i;
+	bool good = true;
+
+	if (tbl_size == 0) {
+		pr_warn("VALIDATE C-TBL: Empty\n");
+		return true;
+	}
+
+	for (i = 0; i < tbl_size; i++) {
+		if (!tbl[i].name) {
+			pr_err("VALIDATE C-TBL[%zu]: Null\n", i);
+			good = false;
+		} else if (i > 0 && tbl[i - 1].name) {
+			int c = strcmp(tbl[i-1].name, tbl[i].name);
+
+			if (c == 0) {
+				pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n",
+				       i, tbl[i].name);
+				good = false;
+			}
+			if (c > 0) {
+				pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n",
+				       i, tbl[i-1].name, tbl[i].name);
+				good = false;
+			}
+		}
+
+		if (tbl[i].value != special &&
+		    (tbl[i].value < low || tbl[i].value > high)) {
+			pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n",
+			       i, tbl[i].name, tbl[i].value, low, high);
+			good = false;
+		}
+	}
+
+	return good;
+}
+
+/**
+ * fs_validate_description - Validate a parameter description
+ * @desc: The parameter description to validate.
+ */
+bool fs_validate_description(const struct fs_parameter_description *desc)
+{
+	const struct fs_parameter_spec *param, *p2;
+	const struct fs_parameter_enum *e;
+	const char *name = desc->name;
+	unsigned int nr_params = 0;
+	bool good = true, enums = false;
+
+	pr_notice("*** VALIDATE %s ***\n", name);
+
+	if (!name[0]) {
+		pr_err("VALIDATE Parser: No name\n");
+		name = "Unknown";
+		good = false;
+	}
+
+	if (desc->specs) {
+		for (param = desc->specs; param->name; param++) {
+			enum fs_parameter_type t = param->type;
+
+			/* Check that the type is in range */
+			if (t == __fs_param_wasnt_defined ||
+			    t >= nr__fs_parameter_type) {
+				pr_err("VALIDATE %s: PARAM[%s] Bad type %u\n",
+				       name, param->name, t);
+				good = false;
+			} else if (t == fs_param_is_enum) {
+				enums = true;
+			}
+
+			/* Check for duplicate parameter names */
+			for (p2 = desc->specs; p2 < param; p2++) {
+				if (strcmp(param->name, p2->name) == 0) {
+					pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n",
+					       name, param->name);
+					good = false;
+				}
+			}
+		}
+
+		nr_params = param - desc->specs;
+	}
+
+	if (desc->enums) {
+		if (!nr_params) {
+			pr_err("VALIDATE %s: Enum table but no parameters\n",
+			       name);
+			good = false;
+			goto no_enums;
+		}
+		if (!enums) {
+			pr_err("VALIDATE %s: Enum table but no enum-type values\n",
+			       name);
+			good = false;
+			goto no_enums;
+		}
+
+		for (e = desc->enums; e->name[0]; e++) {
+			/* Check that all entries in the enum table have at
+			 * least one parameter that uses them.
+			 */
+			for (param = desc->specs; param->name; param++) {
+				if (param->opt == e->opt &&
+				    param->type != fs_param_is_enum) {
+					pr_err("VALIDATE %s: e[%lu] enum val for %s\n",
+					       name, e - desc->enums, param->name);
+					good = false;
+				}
+			}
+		}
+
+		/* Check that all enum-type parameters have at least one enum
+		 * value in the enum table.
+		 */
+		for (param = desc->specs; param->name; param++) {
+			if (param->type != fs_param_is_enum)
+				continue;
+			for (e = desc->enums; e->name[0]; e++)
+				if (e->opt == param->opt)
+					break;
+			if (!e->name[0]) {
+				pr_err("VALIDATE %s: PARAM[%s] enum with no values\n",
+				       name, param->name);
+				good = false;
+			}
+		}
+	} else {
+		if (enums) {
+			pr_err("VALIDATE %s: enum-type values, but no enum table\n",
+			       name);
+			good = false;
+			goto no_enums;
+		}
+	}
+
+no_enums:
+	return good;
+}
+#endif /* CONFIG_VALIDATE_FS_PARSER */
diff --git a/fs/internal.h b/fs/internal.h
index 8f8d07cc433f..6a8b71643af4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,6 +61,8 @@ extern void fc_drop_locked(struct fs_context *);
 /*
  * namei.c
  */
+extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
+			   struct path *path, struct path *root);
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
diff --git a/fs/namei.c b/fs/namei.c
index 914178cdbe94..a85deb55d0c9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2333,8 +2333,8 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
 	return err;
 }
 
-static int filename_lookup(int dfd, struct filename *name, unsigned flags,
-			   struct path *path, struct path *root)
+int filename_lookup(int dfd, struct filename *name, unsigned flags,
+		    struct path *path, struct path *root)
 {
 	int retval;
 	struct nameidata nd;
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 3cba627577d6..d73f597a2484 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -18,6 +18,7 @@
 #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
 #define EPROBE_DEFER	517	/* Driver requests probe retry */
 #define EOPENSTALE	518	/* open found a stale dentry */
+#define ENOPARAM	519	/* Parameter not supported */
 
 /* Defined for the NFSv3 protocol */
 #define EBADHANDLE	521	/* Illegal NFS file handle */
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index d208cc40b868..899027c94788 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -34,6 +34,35 @@ enum fs_context_purpose {
 	FS_CONTEXT_FOR_RECONFIGURE,	/* Superblock reconfiguration (remount) */
 };
 
+/*
+ * Type of parameter value.
+ */
+enum fs_value_type {
+	fs_value_is_undefined,
+	fs_value_is_flag,		/* Value not given a value */
+	fs_value_is_string,		/* Value is a string */
+	fs_value_is_blob,		/* Value is a binary blob */
+	fs_value_is_filename,		/* Value is a filename* + dirfd */
+	fs_value_is_filename_empty,	/* Value is a filename* + dirfd + AT_EMPTY_PATH */
+	fs_value_is_file,		/* Value is a file* */
+};
+
+/*
+ * Configuration parameter.
+ */
+struct fs_parameter {
+	const char		*key;		/* Parameter name */
+	enum fs_value_type	type:8;		/* The type of value here */
+	union {
+		char		*string;
+		void		*blob;
+		struct filename	*name;
+		struct file	*file;
+	};
+	size_t	size;
+	int	dirfd;
+};
+
 /*
  * Filesystem context for holding the parameters used in the creation or
  * reconfiguration of a superblock.
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
new file mode 100644
index 000000000000..d966f96ffe62
--- /dev/null
+++ b/include/linux/fs_parser.h
@@ -0,0 +1,151 @@
+/* Filesystem parameter description and parser
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_FS_PARSER_H
+#define _LINUX_FS_PARSER_H
+
+#include <linux/fs_context.h>
+
+struct path;
+
+struct constant_table {
+	const char	*name;
+	int		value;
+};
+
+/*
+ * The type of parameter expected.
+ */
+enum fs_parameter_type {
+	__fs_param_wasnt_defined,
+	fs_param_is_flag,
+	fs_param_is_bool,
+	fs_param_is_u32,
+	fs_param_is_u32_octal,
+	fs_param_is_u32_hex,
+	fs_param_is_s32,
+	fs_param_is_u64,
+	fs_param_is_enum,
+	fs_param_is_string,
+	fs_param_is_blob,
+	fs_param_is_blockdev,
+	fs_param_is_path,
+	fs_param_is_fd,
+	nr__fs_parameter_type,
+};
+
+/*
+ * Specification of the type of value a parameter wants.
+ *
+ * Note that the fsparam_flag(), fsparam_string(), fsparam_u32(), ... macros
+ * should be used to generate elements of this type.
+ */
+struct fs_parameter_spec {
+	const char		*name;
+	u8			opt;	/* Option number (returned by fs_parse()) */
+	enum fs_parameter_type	type:8;	/* The desired parameter type */
+	unsigned short		flags;
+#define fs_param_v_optional	0x0001	/* The value is optional */
+#define fs_param_neg_with_no	0x0002	/* "noxxx" is negative param */
+#define fs_param_neg_with_empty	0x0004	/* "xxx=" is negative param */
+#define fs_param_deprecated	0x0008	/* The param is deprecated */
+};
+
+struct fs_parameter_enum {
+	u8		opt;		/* Option number (as fs_parameter_spec::opt) */
+	char		name[14];
+	u8		value;
+};
+
+struct fs_parameter_description {
+	const char	name[16];		/* Name for logging purposes */
+	const struct fs_parameter_spec *specs;	/* List of param specifications */
+	const struct fs_parameter_enum *enums;	/* Enum values */
+};
+
+/*
+ * Result of parse.
+ */
+struct fs_parse_result {
+	bool			negated;	/* T if param was "noxxx" */
+	bool			has_value;	/* T if value supplied to param */
+	union {
+		bool		boolean;	/* For spec_bool */
+		int		int_32;		/* For spec_s32/spec_enum */
+		unsigned int	uint_32;	/* For spec_u32{,_octal,_hex}/spec_enum */
+		u64		uint_64;	/* For spec_u64 */
+	};
+};
+
+extern int fs_parse(struct fs_context *fc,
+		    const struct fs_parameter_description *desc,
+		    struct fs_parameter *value,
+		    struct fs_parse_result *result);
+extern int fs_lookup_param(struct fs_context *fc,
+			   struct fs_parameter *param,
+			   bool want_bdev,
+			   struct path *_path);
+
+extern int __lookup_constant(const struct constant_table tbl[], size_t tbl_size,
+			     const char *name, int not_found);
+#define lookup_constant(t, n, nf) __lookup_constant(t, ARRAY_SIZE(t), (n), (nf))
+
+#ifdef CONFIG_VALIDATE_FS_PARSER
+extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
+				    int low, int high, int special);
+extern bool fs_validate_description(const struct fs_parameter_description *desc);
+#else
+static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size,
+					   int low, int high, int special)
+{ return true; }
+static inline bool fs_validate_description(const struct fs_parameter_description *desc)
+{ return true; }
+#endif
+
+/*
+ * Parameter type, name, index and flags element constructors.  Use as:
+ *
+ *  fsparam_xxxx("foo", Opt_foo)
+ *
+ * If existing helpers are not enough, direct use of __fsparam() would
+ * work, but any such case is probably a sign that new helper is needed.
+ * Helpers will remain stable; low-level implementation may change.
+ */
+#define __fsparam(TYPE, NAME, OPT, FLAGS) \
+	{ \
+		.name = NAME, \
+		.opt = OPT, \
+		.type = TYPE, \
+		.flags = FLAGS \
+	}
+
+#define fsparam_flag(NAME, OPT)	__fsparam(fs_param_is_flag, NAME, OPT, 0)
+#define fsparam_flag_no(NAME, OPT) \
+				__fsparam(fs_param_is_flag, NAME, OPT, \
+					    fs_param_neg_with_no)
+#define fsparam_bool(NAME, OPT)	__fsparam(fs_param_is_bool, NAME, OPT, 0)
+#define fsparam_u32(NAME, OPT)	__fsparam(fs_param_is_u32, NAME, OPT, 0)
+#define fsparam_u32oct(NAME, OPT) \
+				__fsparam(fs_param_is_u32_octal, NAME, OPT, 0)
+#define fsparam_u32hex(NAME, OPT) \
+				__fsparam(fs_param_is_u32_hex, NAME, OPT, 0)
+#define fsparam_s32(NAME, OPT)	__fsparam(fs_param_is_s32, NAME, OPT, 0)
+#define fsparam_u64(NAME, OPT)	__fsparam(fs_param_is_u64, NAME, OPT, 0)
+#define fsparam_enum(NAME, OPT)	__fsparam(fs_param_is_enum, NAME, OPT, 0)
+#define fsparam_string(NAME, OPT) \
+				__fsparam(fs_param_is_string, NAME, OPT, 0)
+#define fsparam_blob(NAME, OPT)	__fsparam(fs_param_is_blob, NAME, OPT, 0)
+#define fsparam_bdev(NAME, OPT)	__fsparam(fs_param_is_blockdev, NAME, OPT, 0)
+#define fsparam_path(NAME, OPT)	__fsparam(fs_param_is_path, NAME, OPT, 0)
+#define fsparam_fd(NAME, OPT)	__fsparam(fs_param_is_fd, NAME, OPT, 0)
+
+
+#endif /* _LINUX_FS_PARSER_H */
-- 
cgit v1.2.3


From da2441fdffbf7602da702aea5bd95ca4dc3d63fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:24 +0000
Subject: vfs: Add LSM hooks for the new mount API

Add LSM hooks for use by the new mount API and filesystem context code.
This includes:

 (1) Hooks to handle allocation, duplication and freeing of the security
     record attached to a filesystem context.

 (2) A hook to snoop source specifications.  There may be multiple of these
     if the filesystem supports it.  They will to be local files/devices if
     fs_context::source_is_dev is true and will be something else, possibly
     remote server specifications, if false.

 (3) A hook to snoop superblock configuration options in key[=val] form.
     If the LSM decides it wants to handle it, it can suppress the option
     being passed to the filesystem.  Note that 'val' may include commas
     and binary data with the fsopen patch.

 (4) A hook to perform validation and allocation after the configuration
     has been done but before the superblock is allocated and set up.

 (5) A hook to transfer the security from the context to a newly created
     superblock.

 (6) A hook to rule on whether a path point can be used as a mountpoint.

These are intended to replace:

	security_sb_copy_data
	security_sb_kern_mount
	security_sb_mount
	security_sb_set_mnt_opts
	security_sb_clone_mnt_opts
	security_sb_parse_opts_str

[AV -- some of the methods being replaced are already gone, some of the
methods are not added for the lack of need]

Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-security-module@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lsm_hooks.h | 14 ++++++++++++++
 include/linux/security.h  | 10 ++++++++++
 security/security.c       |  5 +++++
 3 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 9a0bdf91e646..47ba4db4d8fb 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -76,6 +76,17 @@
  *	changes on the process such as clearing out non-inheritable signal
  *	state.  This is called immediately after commit_creds().
  *
+ * Security hooks for mount using fs_context.
+ *	[See also Documentation/filesystems/mounting.txt]
+ *
+ * @fs_context_parse_param:
+ *	Userspace provided a parameter to configure a superblock.  The LSM may
+ *	reject it with an error and may use it for itself, in which case it
+ *	should return 0; otherwise it should return -ENOPARAM to pass it on to
+ *	the filesystem.
+ *	@fc indicates the filesystem context.
+ *	@param The parameter
+ *
  * Security hooks for filesystem operations.
  *
  * @sb_alloc_security:
@@ -1459,6 +1470,8 @@ union security_list_options {
 	void (*bprm_committing_creds)(struct linux_binprm *bprm);
 	void (*bprm_committed_creds)(struct linux_binprm *bprm);
 
+	int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param);
+
 	int (*sb_alloc_security)(struct super_block *sb);
 	void (*sb_free_security)(struct super_block *sb);
 	void (*sb_free_mnt_opts)(void *mnt_opts);
@@ -1800,6 +1813,7 @@ struct security_hook_heads {
 	struct hlist_head bprm_check_security;
 	struct hlist_head bprm_committing_creds;
 	struct hlist_head bprm_committed_creds;
+	struct hlist_head fs_context_parse_param;
 	struct hlist_head sb_alloc_security;
 	struct hlist_head sb_free_security;
 	struct hlist_head sb_free_mnt_opts;
diff --git a/include/linux/security.h b/include/linux/security.h
index dbfb5a66babb..1cc4d7a3d6fa 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -53,6 +53,9 @@ struct msg_msg;
 struct xattr;
 struct xfrm_sec_ctx;
 struct mm_struct;
+struct fs_context;
+struct fs_parameter;
+enum fs_value_type;
 
 /* If capable should audit the security request */
 #define SECURITY_CAP_NOAUDIT 0
@@ -220,6 +223,7 @@ int security_bprm_set_creds(struct linux_binprm *bprm);
 int security_bprm_check(struct linux_binprm *bprm);
 void security_bprm_committing_creds(struct linux_binprm *bprm);
 void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
 int security_sb_alloc(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
 void security_free_mnt_opts(void **mnt_opts);
@@ -517,6 +521,12 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
 }
 
+static inline int security_fs_context_parse_param(struct fs_context *fc,
+						  struct fs_parameter *param)
+{
+	return -ENOPARAM;
+}
+
 static inline int security_sb_alloc(struct super_block *sb)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index f1b8d2587639..e5519488327d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -374,6 +374,11 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
 	call_void_hook(bprm_committed_creds, bprm);
 }
 
+int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
+}
+
 int security_sb_alloc(struct super_block *sb)
 {
 	return call_int_hook(sb_alloc_security, 0, sb);
-- 
cgit v1.2.3


From 846e56621897a63966b7f03a70be29060394c363 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:24 +0000
Subject: vfs: Put security flags into the fs_context struct

Put security flags, such as SECURITY_LSM_NATIVE_LABELS, into the filesystem
context so that the filesystem can communicate them to the LSM more easily.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs_context.h | 1 +
 include/linux/security.h   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 899027c94788..d5ff3b0bc28d 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -85,6 +85,7 @@ struct fs_context {
 	void			*security;	/* Linux S&M options */
 	unsigned int		sb_flags;	/* Proposed superblock flags (SB_*) */
 	unsigned int		sb_flags_mask;	/* Superblock flags that were changed */
+	unsigned int		lsm_flags;	/* Information flags from the fs to the LSM */
 	enum fs_context_purpose	purpose:8;
 	bool			need_free:1;	/* Need to call ops->free() */
 };
diff --git a/include/linux/security.h b/include/linux/security.h
index 1cc4d7a3d6fa..2da9336a987e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -61,7 +61,7 @@ enum fs_value_type;
 #define SECURITY_CAP_NOAUDIT 0
 #define SECURITY_CAP_AUDIT 1
 
-/* LSM Agnostic defines for sb_set_mnt_opts */
+/* LSM Agnostic defines for fs_context::lsm_flags */
 #define SECURITY_LSM_NATIVE_LABELS	1
 
 struct ctl_table;
-- 
cgit v1.2.3


From 3e1aeb00e6d132efc151dacc062b38269bc9eccc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:25 +0000
Subject: vfs: Implement a filesystem superblock creation/configuration context

[AV - unfuck kern_mount_data(); we want non-NULL ->mnt_ns on long-living
mounts]
[AV - reordering fs/namespace.c is badly overdue, but let's keep it
separate from that series]
[AV - drop simple_pin_fs() change]
[AV - clean vfs_kern_mount() failure exits up]

Implement a filesystem context concept to be used during superblock
creation for mount and superblock reconfiguration for remount.

The mounting procedure then becomes:

 (1) Allocate new fs_context context.

 (2) Configure the context.

 (3) Create superblock.

 (4) Query the superblock.

 (5) Create a mount for the superblock.

 (6) Destroy the context.

Rather than calling fs_type->mount(), an fs_context struct is created and
fs_type->init_fs_context() is called to set it up.  Pointers exist for the
filesystem and LSM to hang their private data off.

A set of operations has to be set by ->init_fs_context() to provide
freeing, duplication, option parsing, binary data parsing, validation,
mounting and superblock filling.

Legacy filesystems are supported by the provision of a set of legacy
fs_context operations that build up a list of mount options and then invoke
fs_type->mount() from within the fs_context ->get_tree() operation.  This
allows all filesystems to be accessed using fs_context.

It should be noted that, whilst this patch adds a lot of lines of code,
there is quite a bit of duplication with existing code that can be
eliminated should all filesystems be converted over.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/filesystems.c           |   4 +
 fs/fs_context.c            | 300 ++++++++++++++++++++++++++++++++++++++++++++-
 fs/namespace.c             |  25 ++--
 include/linux/fs.h         |   2 +
 include/linux/fs_context.h |   5 +
 5 files changed, 319 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/filesystems.c b/fs/filesystems.c
index b03f57b1105b..9135646e41ac 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/fs_parser.h>
 
 /*
  * Handling of filesystem drivers list.
@@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs)
 	int res = 0;
 	struct file_system_type ** p;
 
+	if (fs->parameters && !fs_validate_description(fs->parameters))
+		return -EINVAL;
+
 	BUG_ON(strchr(fs->name, '.'));
 	if (fs->next)
 		return -EBUSY;
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 825d1b2c8807..aa7e0ffb591a 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -12,6 +12,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include <linux/fs.h>
 #include <linux/mount.h>
 #include <linux/nsproxy.h>
@@ -25,13 +26,217 @@
 #include "mount.h"
 #include "internal.h"
 
+enum legacy_fs_param {
+	LEGACY_FS_UNSET_PARAMS,
+	LEGACY_FS_MONOLITHIC_PARAMS,
+	LEGACY_FS_INDIVIDUAL_PARAMS,
+};
+
 struct legacy_fs_context {
 	char			*legacy_data;	/* Data page for legacy filesystems */
 	size_t			data_size;
+	enum legacy_fs_param	param_type;
 };
 
 static int legacy_init_fs_context(struct fs_context *fc);
 
+static const struct constant_table common_set_sb_flag[] = {
+	{ "dirsync",	SB_DIRSYNC },
+	{ "lazytime",	SB_LAZYTIME },
+	{ "mand",	SB_MANDLOCK },
+	{ "posixacl",	SB_POSIXACL },
+	{ "ro",		SB_RDONLY },
+	{ "sync",	SB_SYNCHRONOUS },
+};
+
+static const struct constant_table common_clear_sb_flag[] = {
+	{ "async",	SB_SYNCHRONOUS },
+	{ "nolazytime",	SB_LAZYTIME },
+	{ "nomand",	SB_MANDLOCK },
+	{ "rw",		SB_RDONLY },
+	{ "silent",	SB_SILENT },
+};
+
+static const char *const forbidden_sb_flag[] = {
+	"bind",
+	"dev",
+	"exec",
+	"move",
+	"noatime",
+	"nodev",
+	"nodiratime",
+	"noexec",
+	"norelatime",
+	"nostrictatime",
+	"nosuid",
+	"private",
+	"rec",
+	"relatime",
+	"remount",
+	"shared",
+	"slave",
+	"strictatime",
+	"suid",
+	"unbindable",
+};
+
+/*
+ * Check for a common mount option that manipulates s_flags.
+ */
+static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
+{
+	unsigned int token;
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(forbidden_sb_flag); i++)
+		if (strcmp(key, forbidden_sb_flag[i]) == 0)
+			return -EINVAL;
+
+	token = lookup_constant(common_set_sb_flag, key, 0);
+	if (token) {
+		fc->sb_flags |= token;
+		fc->sb_flags_mask |= token;
+		return 0;
+	}
+
+	token = lookup_constant(common_clear_sb_flag, key, 0);
+	if (token) {
+		fc->sb_flags &= ~token;
+		fc->sb_flags_mask |= token;
+		return 0;
+	}
+
+	return -ENOPARAM;
+}
+
+/**
+ * vfs_parse_fs_param - Add a single parameter to a superblock config
+ * @fc: The filesystem context to modify
+ * @param: The parameter
+ *
+ * A single mount option in string form is applied to the filesystem context
+ * being set up.  Certain standard options (for example "ro") are translated
+ * into flag bits without going to the filesystem.  The active security module
+ * is allowed to observe and poach options.  Any other options are passed over
+ * to the filesystem to parse.
+ *
+ * This may be called multiple times for a context.
+ *
+ * Returns 0 on success and a negative error code on failure.  In the event of
+ * failure, supplementary error information may have been set.
+ */
+int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	int ret;
+
+	if (!param->key)
+		return invalf(fc, "Unnamed parameter\n");
+
+	ret = vfs_parse_sb_flag(fc, param->key);
+	if (ret != -ENOPARAM)
+		return ret;
+
+	ret = security_fs_context_parse_param(fc, param);
+	if (ret != -ENOPARAM)
+		/* Param belongs to the LSM or is disallowed by the LSM; so
+		 * don't pass to the FS.
+		 */
+		return ret;
+
+	if (fc->ops->parse_param) {
+		ret = fc->ops->parse_param(fc, param);
+		if (ret != -ENOPARAM)
+			return ret;
+	}
+
+	/* If the filesystem doesn't take any arguments, give it the
+	 * default handling of source.
+	 */
+	if (strcmp(param->key, "source") == 0) {
+		if (param->type != fs_value_is_string)
+			return invalf(fc, "VFS: Non-string source");
+		if (fc->source)
+			return invalf(fc, "VFS: Multiple sources");
+		fc->source = param->string;
+		param->string = NULL;
+		return 0;
+	}
+
+	return invalf(fc, "%s: Unknown parameter '%s'",
+		      fc->fs_type->name, param->key);
+}
+EXPORT_SYMBOL(vfs_parse_fs_param);
+
+/**
+ * vfs_parse_fs_string - Convenience function to just parse a string.
+ */
+int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			const char *value, size_t v_size)
+{
+	int ret;
+
+	struct fs_parameter param = {
+		.key	= key,
+		.type	= fs_value_is_string,
+		.size	= v_size,
+	};
+
+	if (v_size > 0) {
+		param.string = kmemdup_nul(value, v_size, GFP_KERNEL);
+		if (!param.string)
+			return -ENOMEM;
+	}
+
+	ret = vfs_parse_fs_param(fc, &param);
+	kfree(param.string);
+	return ret;
+}
+EXPORT_SYMBOL(vfs_parse_fs_string);
+
+/**
+ * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
+ * @ctx: The superblock configuration to fill in.
+ * @data: The data to parse
+ *
+ * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+ * called from the ->monolithic_mount_data() fs_context operation.
+ *
+ * Returns 0 on success or the error returned by the ->parse_option() fs_context
+ * operation on failure.
+ */
+int generic_parse_monolithic(struct fs_context *fc, void *data)
+{
+	char *options = data, *key;
+	int ret = 0;
+
+	if (!options)
+		return 0;
+
+	ret = security_sb_eat_lsm_opts(options, &fc->security);
+	if (ret)
+		return ret;
+
+	while ((key = strsep(&options, ",")) != NULL) {
+		if (*key) {
+			size_t v_len = 0;
+			char *value = strchr(key, '=');
+
+			if (value) {
+				if (value == key)
+					continue;
+				*value++ = 0;
+				v_len = strlen(value);
+			}
+			ret = vfs_parse_fs_string(fc, key, value, v_len);
+			if (ret < 0)
+				break;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(generic_parse_monolithic);
+
 /**
  * alloc_fs_context - Create a filesystem context.
  * @fs_type: The filesystem type.
@@ -166,7 +371,87 @@ EXPORT_SYMBOL(put_fs_context);
  */
 static void legacy_fs_context_free(struct fs_context *fc)
 {
-	kfree(fc->fs_private);
+	struct legacy_fs_context *ctx = fc->fs_private;
+
+	if (ctx) {
+		if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS)
+			kfree(ctx->legacy_data);
+		kfree(ctx);
+	}
+}
+
+/*
+ * Add a parameter to a legacy config.  We build up a comma-separated list of
+ * options.
+ */
+static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct legacy_fs_context *ctx = fc->fs_private;
+	unsigned int size = ctx->data_size;
+	size_t len = 0;
+
+	if (strcmp(param->key, "source") == 0) {
+		if (param->type != fs_value_is_string)
+			return invalf(fc, "VFS: Legacy: Non-string source");
+		if (fc->source)
+			return invalf(fc, "VFS: Legacy: Multiple sources");
+		fc->source = param->string;
+		param->string = NULL;
+		return 0;
+	}
+
+	if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
+	    strcmp(param->key, "subtype") == 0) {
+		if (param->type != fs_value_is_string)
+			return invalf(fc, "VFS: Legacy: Non-string subtype");
+		if (fc->subtype)
+			return invalf(fc, "VFS: Legacy: Multiple subtype");
+		fc->subtype = param->string;
+		param->string = NULL;
+		return 0;
+	}
+
+	if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
+		return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
+
+	switch (param->type) {
+	case fs_value_is_string:
+		len = 1 + param->size;
+		/* Fall through */
+	case fs_value_is_flag:
+		len += strlen(param->key);
+		break;
+	default:
+		return invalf(fc, "VFS: Legacy: Parameter type for '%s' not supported",
+			      param->key);
+	}
+
+	if (len > PAGE_SIZE - 2 - size)
+		return invalf(fc, "VFS: Legacy: Cumulative options too large");
+	if (strchr(param->key, ',') ||
+	    (param->type == fs_value_is_string &&
+	     memchr(param->string, ',', param->size)))
+		return invalf(fc, "VFS: Legacy: Option '%s' contained comma",
+			      param->key);
+	if (!ctx->legacy_data) {
+		ctx->legacy_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+		if (!ctx->legacy_data)
+			return -ENOMEM;
+	}
+
+	ctx->legacy_data[size++] = ',';
+	len = strlen(param->key);
+	memcpy(ctx->legacy_data + size, param->key, len);
+	size += len;
+	if (param->type == fs_value_is_string) {
+		ctx->legacy_data[size++] = '=';
+		memcpy(ctx->legacy_data + size, param->string, param->size);
+		size += param->size;
+	}
+	ctx->legacy_data[size] = '\0';
+	ctx->data_size = size;
+	ctx->param_type = LEGACY_FS_INDIVIDUAL_PARAMS;
+	return 0;
 }
 
 /*
@@ -175,9 +460,17 @@ static void legacy_fs_context_free(struct fs_context *fc)
 static int legacy_parse_monolithic(struct fs_context *fc, void *data)
 {
 	struct legacy_fs_context *ctx = fc->fs_private;
+
+	if (ctx->param_type != LEGACY_FS_UNSET_PARAMS) {
+		pr_warn("VFS: Can't mix monolithic and individual options\n");
+		return -EINVAL;
+	}
+
 	ctx->legacy_data = data;
+	ctx->param_type = LEGACY_FS_MONOLITHIC_PARAMS;
 	if (!ctx->legacy_data)
 		return 0;
+
 	if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA)
 		return 0;
 	return security_sb_eat_lsm_opts(ctx->legacy_data, &fc->security);
@@ -221,6 +514,7 @@ static int legacy_reconfigure(struct fs_context *fc)
 
 const struct fs_context_operations legacy_fs_context_ops = {
 	.free			= legacy_fs_context_free,
+	.parse_param		= legacy_parse_param,
 	.parse_monolithic	= legacy_parse_monolithic,
 	.get_tree		= legacy_get_tree,
 	.reconfigure		= legacy_reconfigure,
@@ -242,6 +536,10 @@ static int legacy_init_fs_context(struct fs_context *fc)
 int parse_monolithic_mount_data(struct fs_context *fc, void *data)
 {
 	int (*monolithic_mount_data)(struct fs_context *, void *);
+
 	monolithic_mount_data = fc->ops->parse_monolithic;
+	if (!monolithic_mount_data)
+		monolithic_mount_data = generic_parse_monolithic;
+
 	return monolithic_mount_data(fc, data);
 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 931228d8518a..1a1ed2528f47 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -997,17 +997,15 @@ struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 	int ret = 0;
 
 	if (!type)
-		return ERR_PTR(-ENODEV);
+		return ERR_PTR(-EINVAL);
 
 	fc = fs_context_for_mount(type, flags);
 	if (IS_ERR(fc))
 		return ERR_CAST(fc);
 
-	if (name) {
-		fc->source = kstrdup(name, GFP_KERNEL);
-		if (!fc->source)
-			ret = -ENOMEM;
-	}
+	if (name)
+		ret = vfs_parse_fs_string(fc, "source",
+					  name, strlen(name));
 	if (!ret)
 		ret = parse_monolithic_mount_data(fc, data);
 	if (!ret)
@@ -2611,16 +2609,11 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
 	if (IS_ERR(fc))
 		return PTR_ERR(fc);
 
-	if (subtype) {
-		fc->subtype = kstrdup(subtype, GFP_KERNEL);
-		if (!fc->subtype)
-			err = -ENOMEM;
-	}
-	if (!err && name) {
-		fc->source = kstrdup(name, GFP_KERNEL);
-		if (!fc->source)
-			err = -ENOMEM;
-	}
+	if (subtype)
+		err = vfs_parse_fs_string(fc, "subtype",
+					  subtype, strlen(subtype));
+	if (!err && name)
+		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
 	if (!err)
 		err = parse_monolithic_mount_data(fc, data);
 	if (!err)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8d578a9e1e8c..cf6e9ea161eb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -62,6 +62,7 @@ struct iov_iter;
 struct fscrypt_info;
 struct fscrypt_operations;
 struct fs_context;
+struct fs_parameter_description;
 
 extern void __init inode_init(void);
 extern void __init inode_init_early(void);
@@ -2175,6 +2176,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
+	const struct fs_parameter_description *parameters;
 	struct dentry *(*mount) (struct file_system_type *, int,
 		       const char *, void *);
 	void (*kill_sb) (struct super_block *);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index d5ff3b0bc28d..d794b04e9fbb 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -92,6 +92,7 @@ struct fs_context {
 
 struct fs_context_operations {
 	void (*free)(struct fs_context *fc);
+	int (*parse_param)(struct fs_context *fc, struct fs_parameter *param);
 	int (*parse_monolithic)(struct fs_context *fc, void *data);
 	int (*get_tree)(struct fs_context *fc);
 	int (*reconfigure)(struct fs_context *fc);
@@ -108,6 +109,10 @@ extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type,
 						struct dentry *reference);
 
+extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
+extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
+			       const char *value, size_t v_size);
+extern int generic_parse_monolithic(struct fs_context *fc, void *data);
 extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 
-- 
cgit v1.2.3


From cb50b348c71ffa90d7d1b2a494b553b5099bc090 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Dec 2018 17:25:47 -0500
Subject: convenience helpers: vfs_get_super() and sget_fc()

the former is an analogue of mount_{single,nodev} for use in
->get_tree() instances, the latter - analogue of sget() for the
same.

These are fairly similar to the originals, but the callback signature
for sget_fc() is different from sget() ones, so getting bits and
pieces shared would be too convoluted; we might get around to that
later, but for now let's just remember to keep them in sync.  They
do live next to each other, and changes in either won't be hard
to spot.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c                 | 171 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h         |   4 ++
 include/linux/fs_context.h |  15 ++++
 3 files changed, 190 insertions(+)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 76b3181c782d..0ebb5c11fa56 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -476,6 +476,94 @@ void generic_shutdown_super(struct super_block *sb)
 
 EXPORT_SYMBOL(generic_shutdown_super);
 
+/**
+ * sget_fc - Find or create a superblock
+ * @fc:	Filesystem context.
+ * @test: Comparison callback
+ * @set: Setup callback
+ *
+ * Find or create a superblock using the parameters stored in the filesystem
+ * context and the two callback functions.
+ *
+ * If an extant superblock is matched, then that will be returned with an
+ * elevated reference count that the caller must transfer or discard.
+ *
+ * If no match is made, a new superblock will be allocated and basic
+ * initialisation will be performed (s_type, s_fs_info and s_id will be set and
+ * the set() callback will be invoked), the superblock will be published and it
+ * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
+ * as yet unset.
+ */
+struct super_block *sget_fc(struct fs_context *fc,
+			    int (*test)(struct super_block *, struct fs_context *),
+			    int (*set)(struct super_block *, struct fs_context *))
+{
+	struct super_block *s = NULL;
+	struct super_block *old;
+	struct user_namespace *user_ns = fc->global ? &init_user_ns : fc->user_ns;
+	int err;
+
+	if (!(fc->sb_flags & SB_KERNMOUNT) &&
+	    fc->purpose != FS_CONTEXT_FOR_SUBMOUNT) {
+		/* Don't allow mounting unless the caller has CAP_SYS_ADMIN
+		 * over the namespace.
+		 */
+		if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) {
+			if (!capable(CAP_SYS_ADMIN))
+				return ERR_PTR(-EPERM);
+		} else {
+			if (!ns_capable(fc->user_ns, CAP_SYS_ADMIN))
+				return ERR_PTR(-EPERM);
+		}
+	}
+
+retry:
+	spin_lock(&sb_lock);
+	if (test) {
+		hlist_for_each_entry(old, &fc->fs_type->fs_supers, s_instances) {
+			if (test(old, fc))
+				goto share_extant_sb;
+		}
+	}
+	if (!s) {
+		spin_unlock(&sb_lock);
+		s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
+		if (!s)
+			return ERR_PTR(-ENOMEM);
+		goto retry;
+	}
+
+	s->s_fs_info = fc->s_fs_info;
+	err = set(s, fc);
+	if (err) {
+		s->s_fs_info = NULL;
+		spin_unlock(&sb_lock);
+		destroy_unused_super(s);
+		return ERR_PTR(err);
+	}
+	fc->s_fs_info = NULL;
+	s->s_type = fc->fs_type;
+	strlcpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+	list_add_tail(&s->s_list, &super_blocks);
+	hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
+	spin_unlock(&sb_lock);
+	get_filesystem(s->s_type);
+	register_shrinker_prepared(&s->s_shrink);
+	return s;
+
+share_extant_sb:
+	if (user_ns != old->s_user_ns) {
+		spin_unlock(&sb_lock);
+		destroy_unused_super(s);
+		return ERR_PTR(-EBUSY);
+	}
+	if (!grab_super(old))
+		goto retry;
+	destroy_unused_super(s);
+	return old;
+}
+EXPORT_SYMBOL(sget_fc);
+
 /**
  *	sget_userns -	find or create a superblock
  *	@type:	filesystem type superblock should belong to
@@ -1103,6 +1191,89 @@ struct dentry *mount_ns(struct file_system_type *fs_type,
 
 EXPORT_SYMBOL(mount_ns);
 
+int set_anon_super_fc(struct super_block *sb, struct fs_context *fc)
+{
+	return set_anon_super(sb, NULL);
+}
+EXPORT_SYMBOL(set_anon_super_fc);
+
+static int test_keyed_super(struct super_block *sb, struct fs_context *fc)
+{
+	return sb->s_fs_info == fc->s_fs_info;
+}
+
+static int test_single_super(struct super_block *s, struct fs_context *fc)
+{
+	return 1;
+}
+
+/**
+ * vfs_get_super - Get a superblock with a search key set in s_fs_info.
+ * @fc: The filesystem context holding the parameters
+ * @keying: How to distinguish superblocks
+ * @fill_super: Helper to initialise a new superblock
+ *
+ * Search for a superblock and create a new one if not found.  The search
+ * criterion is controlled by @keying.  If the search fails, a new superblock
+ * is created and @fill_super() is called to initialise it.
+ *
+ * @keying can take one of a number of values:
+ *
+ * (1) vfs_get_single_super - Only one superblock of this type may exist on the
+ *     system.  This is typically used for special system filesystems.
+ *
+ * (2) vfs_get_keyed_super - Multiple superblocks may exist, but they must have
+ *     distinct keys (where the key is in s_fs_info).  Searching for the same
+ *     key again will turn up the superblock for that key.
+ *
+ * (3) vfs_get_independent_super - Multiple superblocks may exist and are
+ *     unkeyed.  Each call will get a new superblock.
+ *
+ * A permissions check is made by sget_fc() unless we're getting a superblock
+ * for a kernel-internal mount or a submount.
+ */
+int vfs_get_super(struct fs_context *fc,
+		  enum vfs_get_super_keying keying,
+		  int (*fill_super)(struct super_block *sb,
+				    struct fs_context *fc))
+{
+	int (*test)(struct super_block *, struct fs_context *);
+	struct super_block *sb;
+
+	switch (keying) {
+	case vfs_get_single_super:
+		test = test_single_super;
+		break;
+	case vfs_get_keyed_super:
+		test = test_keyed_super;
+		break;
+	case vfs_get_independent_super:
+		test = NULL;
+		break;
+	default:
+		BUG();
+	}
+
+	sb = sget_fc(fc, test, set_anon_super_fc);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		int err = fill_super(sb, fc);
+		if (err) {
+			deactivate_locked_super(sb);
+			return err;
+		}
+
+		sb->s_flags |= SB_ACTIVE;
+	}
+
+	BUG_ON(fc->root);
+	fc->root = dget(sb->s_root);
+	return 0;
+}
+EXPORT_SYMBOL(vfs_get_super);
+
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cf6e9ea161eb..9d05c128ccf6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2232,8 +2232,12 @@ void kill_litter_super(struct super_block *sb);
 void deactivate_super(struct super_block *sb);
 void deactivate_locked_super(struct super_block *sb);
 int set_anon_super(struct super_block *s, void *data);
+int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
 int get_anon_bdev(dev_t *);
 void free_anon_bdev(dev_t);
+struct super_block *sget_fc(struct fs_context *fc,
+			    int (*test)(struct super_block *, struct fs_context *),
+			    int (*set)(struct super_block *, struct fs_context *));
 struct super_block *sget_userns(struct file_system_type *type,
 			int (*test)(struct super_block *,void *),
 			int (*set)(struct super_block *,void *),
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index d794b04e9fbb..b1a95db7a111 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -83,11 +83,13 @@ struct fs_context {
 	const char		*source;	/* The source name (eg. dev path) */
 	const char		*subtype;	/* The subtype to set on the superblock */
 	void			*security;	/* Linux S&M options */
+	void			*s_fs_info;	/* Proposed s_fs_info */
 	unsigned int		sb_flags;	/* Proposed superblock flags (SB_*) */
 	unsigned int		sb_flags_mask;	/* Superblock flags that were changed */
 	unsigned int		lsm_flags;	/* Information flags from the fs to the LSM */
 	enum fs_context_purpose	purpose:8;
 	bool			need_free:1;	/* Need to call ops->free() */
+	bool			global:1;	/* Goes into &init_user_ns */
 };
 
 struct fs_context_operations {
@@ -116,6 +118,19 @@ extern int generic_parse_monolithic(struct fs_context *fc, void *data);
 extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 
+/*
+ * sget() wrapper to be called from the ->get_tree() op.
+ */
+enum vfs_get_super_keying {
+	vfs_get_single_super,	/* Only one such superblock may exist */
+	vfs_get_keyed_super,	/* Superblocks with different s_fs_info keys may exist */
+	vfs_get_independent_super, /* Multiple independent superblocks may exist */
+};
+extern int vfs_get_super(struct fs_context *fc,
+			 enum vfs_get_super_keying keying,
+			 int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc));
+
 #define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__)
 
 /**
-- 
cgit v1.2.3


From 0b52075ee62301dd150c9f2c3ddd0035ed894cde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 23 Dec 2018 16:02:47 -0500
Subject: introduce cloning of fs_context

new primitive: vfs_dup_fs_context().  Comes with fs_context
method (->dup()) for copying the filesystem-specific parts
of fs_context, along with LSM one (->fs_context_dup()) for
doing the same to LSM parts.

[needs better commit message, and change of Author:, anyway]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_context.c            | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs_context.h |  2 ++
 include/linux/lsm_hooks.h  |  7 +++++
 include/linux/security.h   |  6 +++++
 security/security.c        |  5 ++++
 security/selinux/hooks.c   | 39 +++++++++++++++++++++++++++
 security/smack/smack_lsm.c | 49 +++++++++++++++++++++++++++++++++
 7 files changed, 175 insertions(+)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index aa7e0ffb591a..57f61833ac83 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -337,6 +337,47 @@ void fc_drop_locked(struct fs_context *fc)
 
 static void legacy_fs_context_free(struct fs_context *fc);
 
+/**
+ * vfs_dup_fc_config: Duplicate a filesystem context.
+ * @src_fc: The context to copy.
+ */
+struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
+{
+	struct fs_context *fc;
+	int ret;
+
+	if (!src_fc->ops->dup)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	fc = kmemdup(src_fc, sizeof(struct fs_context), GFP_KERNEL);
+	if (!fc)
+		return ERR_PTR(-ENOMEM);
+
+	fc->fs_private	= NULL;
+	fc->s_fs_info	= NULL;
+	fc->source	= NULL;
+	fc->security	= NULL;
+	get_filesystem(fc->fs_type);
+	get_net(fc->net_ns);
+	get_user_ns(fc->user_ns);
+	get_cred(fc->cred);
+
+	/* Can't call put until we've called ->dup */
+	ret = fc->ops->dup(fc, src_fc);
+	if (ret < 0)
+		goto err_fc;
+
+	ret = security_fs_context_dup(fc, src_fc);
+	if (ret < 0)
+		goto err_fc;
+	return fc;
+
+err_fc:
+	put_fs_context(fc);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(vfs_dup_fs_context);
+
 /**
  * put_fs_context - Dispose of a superblock configuration context.
  * @fc: The context to dispose of.
@@ -380,6 +421,31 @@ static void legacy_fs_context_free(struct fs_context *fc)
 	}
 }
 
+/*
+ * Duplicate a legacy config.
+ */
+static int legacy_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+	struct legacy_fs_context *ctx;
+	struct legacy_fs_context *src_ctx = src_fc->fs_private;
+
+	ctx = kmemdup(src_ctx, sizeof(*src_ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	if (ctx->param_type == LEGACY_FS_INDIVIDUAL_PARAMS) {
+		ctx->legacy_data = kmemdup(src_ctx->legacy_data,
+					   src_ctx->data_size, GFP_KERNEL);
+		if (!ctx->legacy_data) {
+			kfree(ctx);
+			return -ENOMEM;
+		}
+	}
+
+	fc->fs_private = ctx;
+	return 0;
+}
+
 /*
  * Add a parameter to a legacy config.  We build up a comma-separated list of
  * options.
@@ -514,6 +580,7 @@ static int legacy_reconfigure(struct fs_context *fc)
 
 const struct fs_context_operations legacy_fs_context_ops = {
 	.free			= legacy_fs_context_free,
+	.dup			= legacy_fs_context_dup,
 	.parse_param		= legacy_parse_param,
 	.parse_monolithic	= legacy_parse_monolithic,
 	.get_tree		= legacy_get_tree,
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index b1a95db7a111..0db0b645c7b8 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -94,6 +94,7 @@ struct fs_context {
 
 struct fs_context_operations {
 	void (*free)(struct fs_context *fc);
+	int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
 	int (*parse_param)(struct fs_context *fc, struct fs_parameter *param);
 	int (*parse_monolithic)(struct fs_context *fc, void *data);
 	int (*get_tree)(struct fs_context *fc);
@@ -111,6 +112,7 @@ extern struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 extern struct fs_context *fs_context_for_submount(struct file_system_type *fs_type,
 						struct dentry *reference);
 
+extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc);
 extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param);
 extern int vfs_parse_fs_string(struct fs_context *fc, const char *key,
 			       const char *value, size_t v_size);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 47ba4db4d8fb..356e78fe90a8 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -79,6 +79,11 @@
  * Security hooks for mount using fs_context.
  *	[See also Documentation/filesystems/mounting.txt]
  *
+ * @fs_context_dup:
+ *	Allocate and attach a security structure to sc->security.  This pointer
+ *	is initialised to NULL by the caller.
+ *	@fc indicates the new filesystem context.
+ *	@src_fc indicates the original filesystem context.
  * @fs_context_parse_param:
  *	Userspace provided a parameter to configure a superblock.  The LSM may
  *	reject it with an error and may use it for itself, in which case it
@@ -1470,6 +1475,7 @@ union security_list_options {
 	void (*bprm_committing_creds)(struct linux_binprm *bprm);
 	void (*bprm_committed_creds)(struct linux_binprm *bprm);
 
+	int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc);
 	int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param);
 
 	int (*sb_alloc_security)(struct super_block *sb);
@@ -1813,6 +1819,7 @@ struct security_hook_heads {
 	struct hlist_head bprm_check_security;
 	struct hlist_head bprm_committing_creds;
 	struct hlist_head bprm_committed_creds;
+	struct hlist_head fs_context_dup;
 	struct hlist_head fs_context_parse_param;
 	struct hlist_head sb_alloc_security;
 	struct hlist_head sb_free_security;
diff --git a/include/linux/security.h b/include/linux/security.h
index 2da9336a987e..f28a1ebfd78e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -223,6 +223,7 @@ int security_bprm_set_creds(struct linux_binprm *bprm);
 int security_bprm_check(struct linux_binprm *bprm);
 void security_bprm_committing_creds(struct linux_binprm *bprm);
 void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
 int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
 int security_sb_alloc(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
@@ -521,6 +522,11 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
 }
 
+static inline int security_fs_context_dup(struct fs_context *fc,
+					  struct fs_context *src_fc)
+{
+	return 0;
+}
 static inline int security_fs_context_parse_param(struct fs_context *fc,
 						  struct fs_parameter *param)
 {
diff --git a/security/security.c b/security/security.c
index e5519488327d..5759339319dc 100644
--- a/security/security.c
+++ b/security/security.c
@@ -374,6 +374,11 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
 	call_void_hook(bprm_committed_creds, bprm);
 }
 
+int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+	return call_int_hook(fs_context_dup, 0, fc, src_fc);
+}
+
 int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	return call_int_hook(fs_context_parse_param, -ENOPARAM, fc, param);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f99381e97d73..4ba83de5fa80 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2764,6 +2764,44 @@ static int selinux_umount(struct vfsmount *mnt, int flags)
 				   FILESYSTEM__UNMOUNT, NULL);
 }
 
+static int selinux_fs_context_dup(struct fs_context *fc,
+				  struct fs_context *src_fc)
+{
+	const struct selinux_mnt_opts *src = src_fc->security;
+	struct selinux_mnt_opts *opts;
+
+	if (!src)
+		return 0;
+
+	fc->security = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL);
+	if (!fc->security)
+		return -ENOMEM;
+
+	opts = fc->security;
+
+	if (src->fscontext) {
+		opts->fscontext = kstrdup(src->fscontext, GFP_KERNEL);
+		if (!opts->fscontext)
+			return -ENOMEM;
+	}
+	if (src->context) {
+		opts->context = kstrdup(src->context, GFP_KERNEL);
+		if (!opts->context)
+			return -ENOMEM;
+	}
+	if (src->rootcontext) {
+		opts->rootcontext = kstrdup(src->rootcontext, GFP_KERNEL);
+		if (!opts->rootcontext)
+			return -ENOMEM;
+	}
+	if (src->defcontext) {
+		opts->defcontext = kstrdup(src->defcontext, GFP_KERNEL);
+		if (!opts->defcontext)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 static const struct fs_parameter_spec selinux_param_specs[] = {
 	fsparam_string(CONTEXT_STR,	Opt_context),
 	fsparam_string(DEFCONTEXT_STR,	Opt_defcontext),
@@ -6745,6 +6783,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(bprm_committing_creds, selinux_bprm_committing_creds),
 	LSM_HOOK_INIT(bprm_committed_creds, selinux_bprm_committed_creds),
 
+	LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup),
 	LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param),
 
 	LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security),
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 5f93c4f84384..03176f600a87 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -647,6 +647,54 @@ out_opt_err:
 	return -EINVAL;
 }
 
+/**
+ * smack_fs_context_dup - Duplicate the security data on fs_context duplication
+ * @fc: The new filesystem context.
+ * @src_fc: The source filesystem context being duplicated.
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ */
+static int smack_fs_context_dup(struct fs_context *fc,
+				struct fs_context *src_fc)
+{
+	struct smack_mnt_opts *dst, *src = src_fc->security;
+
+	if (!src)
+		return 0;
+
+	fc->security = kzalloc(sizeof(struct smack_mnt_opts), GFP_KERNEL);
+	if (!fc->security)
+		return -ENOMEM;
+	dst = fc->security;
+
+	if (src->fsdefault) {
+		dst->fsdefault = kstrdup(src->fsdefault, GFP_KERNEL);
+		if (!dst->fsdefault)
+			return -ENOMEM;
+	}
+	if (src->fsfloor) {
+		dst->fsfloor = kstrdup(src->fsfloor, GFP_KERNEL);
+		if (!dst->fsfloor)
+			return -ENOMEM;
+	}
+	if (src->fshat) {
+		dst->fshat = kstrdup(src->fshat, GFP_KERNEL);
+		if (!dst->fshat)
+			return -ENOMEM;
+	}
+	if (src->fsroot) {
+		dst->fsroot = kstrdup(src->fsroot, GFP_KERNEL);
+		if (!dst->fsroot)
+			return -ENOMEM;
+	}
+	if (src->fstransmute) {
+		dst->fstransmute = kstrdup(src->fstransmute, GFP_KERNEL);
+		if (!dst->fstransmute)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 static const struct fs_parameter_spec smack_param_specs[] = {
 	fsparam_string("fsdefault",	Opt_fsdefault),
 	fsparam_string("fsfloor",	Opt_fsfloor),
@@ -4626,6 +4674,7 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
 	LSM_HOOK_INIT(syslog, smack_syslog),
 
+	LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup),
 	LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param),
 
 	LSM_HOOK_INIT(sb_alloc_security, smack_sb_alloc_security),
-- 
cgit v1.2.3


From 23bf1b6be9c291a7130118dcc7384f72ac04d813 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:26 +0000
Subject: kernfs, sysfs, cgroup, intel_rdt: Support fs_context

Make kernfs support superblock creation/mount/remount with fs_context.

This requires that sysfs, cgroup and intel_rdt, which are built on kernfs,
be made to support fs_context also.

Notes:

 (1) A kernfs_fs_context struct is created to wrap fs_context and the
     kernfs mount parameters are moved in here (or are in fs_context).

 (2) kernfs_mount{,_ns}() are made into kernfs_get_tree().  The extra
     namespace tag parameter is passed in the context if desired

 (3) kernfs_free_fs_context() is provided as a destructor for the
     kernfs_fs_context struct, but for the moment it does nothing except
     get called in the right places.

 (4) sysfs doesn't wrap kernfs_fs_context since it has no parameters to
     pass, but possibly this should be done anyway in case someone wants to
     add a parameter in future.

 (5) A cgroup_fs_context struct is created to wrap kernfs_fs_context and
     the cgroup v1 and v2 mount parameters are all moved there.

 (6) cgroup1 parameter parsing error messages are now handled by invalf(),
     which allows userspace to collect them directly.

 (7) cgroup1 parameter cleanup is now done in the context destructor rather
     than in the mount/get_tree and remount functions.

Weirdies:

 (*) cgroup_do_get_tree() calls cset_cgroup_from_root() with locks held,
     but then uses the resulting pointer after dropping the locks.  I'm
     told this is okay and needs commenting.

 (*) The cgroup refcount web.  This really needs documenting.

 (*) cgroup2 only has one root?

Add a suggestion from Thomas Gleixner in which the RDT enablement code is
placed into its own function.

[folded a leak fix from Andrey Vagin]

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
cc: Tejun Heo <tj@kernel.org>
cc: Li Zefan <lizefan@huawei.com>
cc: Johannes Weiner <hannes@cmpxchg.org>
cc: cgroups@vger.kernel.org
cc: fenghua.yu@intel.com
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/cpu/resctrl/internal.h |  16 +++
 arch/x86/kernel/cpu/resctrl/rdtgroup.c | 185 +++++++++++++++++++++------------
 fs/kernfs/kernfs-internal.h            |   1 +
 fs/kernfs/mount.c                      |  89 +++++++---------
 fs/sysfs/mount.c                       |  73 +++++++++----
 include/linux/kernfs.h                 |  38 +++----
 kernel/cgroup/cgroup-internal.h        |   5 +-
 kernel/cgroup/cgroup.c                 |  31 +++---
 8 files changed, 262 insertions(+), 176 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 822b7db634ee..e49b77283924 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -4,6 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/kernfs.h>
+#include <linux/fs_context.h>
 #include <linux/jump_label.h>
 
 #define MSR_IA32_L3_QOS_CFG		0xc81
@@ -40,6 +41,21 @@
 #define RMID_VAL_ERROR			BIT_ULL(63)
 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
 
+
+struct rdt_fs_context {
+	struct kernfs_fs_context	kfc;
+	bool				enable_cdpl2;
+	bool				enable_cdpl3;
+	bool				enable_mba_mbps;
+};
+
+static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
+{
+	struct kernfs_fs_context *kfc = fc->fs_private;
+
+	return container_of(kfc, struct rdt_fs_context, kfc);
+}
+
 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
 
 /**
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 8388adf241b2..399601eda8e4 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -24,6 +24,7 @@
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
 #include <linux/fs.h>
+#include <linux/fs_parser.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
 #include <linux/seq_buf.h>
@@ -32,6 +33,7 @@
 #include <linux/sched/task.h>
 #include <linux/slab.h>
 #include <linux/task_work.h>
+#include <linux/user_namespace.h>
 
 #include <uapi/linux/magic.h>
 
@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
 		cdpl2_disable();
 }
 
-static int parse_rdtgroupfs_options(char *data)
-{
-	char *token, *o = data;
-	int ret = 0;
-
-	while ((token = strsep(&o, ",")) != NULL) {
-		if (!*token) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		if (!strcmp(token, "cdp")) {
-			ret = cdpl3_enable();
-			if (ret)
-				goto out;
-		} else if (!strcmp(token, "cdpl2")) {
-			ret = cdpl2_enable();
-			if (ret)
-				goto out;
-		} else if (!strcmp(token, "mba_MBps")) {
-			if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-				ret = set_mba_sc(true);
-			else
-				ret = -EINVAL;
-			if (ret)
-				goto out;
-		} else {
-			ret = -EINVAL;
-			goto out;
-		}
-	}
-
-	return 0;
-
-out:
-	pr_err("Invalid mount option \"%s\"\n", token);
-
-	return ret;
-}
-
 /*
  * We don't allow rdtgroup directories to be created anywhere
  * except the root directory. Thus when looking for the rdtgroup
@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
 			     struct rdtgroup *prgrp,
 			     struct kernfs_node **mon_data_kn);
 
-static struct dentry *rdt_mount(struct file_system_type *fs_type,
-				int flags, const char *unused_dev_name,
-				void *data)
+static int rdt_enable_ctx(struct rdt_fs_context *ctx)
+{
+	int ret = 0;
+
+	if (ctx->enable_cdpl2)
+		ret = cdpl2_enable();
+
+	if (!ret && ctx->enable_cdpl3)
+		ret = cdpl3_enable();
+
+	if (!ret && ctx->enable_mba_mbps)
+		ret = set_mba_sc(true);
+
+	return ret;
+}
+
+static int rdt_get_tree(struct fs_context *fc)
 {
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
 	struct rdt_domain *dom;
 	struct rdt_resource *r;
-	struct dentry *dentry;
 	int ret;
 
 	cpus_read_lock();
@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 	 * resctrl file system can only be mounted once.
 	 */
 	if (static_branch_unlikely(&rdt_enable_key)) {
-		dentry = ERR_PTR(-EBUSY);
+		ret = -EBUSY;
 		goto out;
 	}
 
-	ret = parse_rdtgroupfs_options(data);
-	if (ret) {
-		dentry = ERR_PTR(ret);
+	ret = rdt_enable_ctx(ctx);
+	if (ret < 0)
 		goto out_cdp;
-	}
 
 	closid_init();
 
 	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
-	if (ret) {
-		dentry = ERR_PTR(ret);
-		goto out_cdp;
-	}
+	if (ret < 0)
+		goto out_mba;
 
 	if (rdt_mon_capable) {
 		ret = mongroup_create_dir(rdtgroup_default.kn,
 					  NULL, "mon_groups",
 					  &kn_mongrp);
-		if (ret) {
-			dentry = ERR_PTR(ret);
+		if (ret < 0)
 			goto out_info;
-		}
 		kernfs_get(kn_mongrp);
 
 		ret = mkdir_mondata_all(rdtgroup_default.kn,
 					&rdtgroup_default, &kn_mondata);
-		if (ret) {
-			dentry = ERR_PTR(ret);
+		if (ret < 0)
 			goto out_mongrp;
-		}
 		kernfs_get(kn_mondata);
 		rdtgroup_default.mon.mon_data_kn = kn_mondata;
 	}
 
 	ret = rdt_pseudo_lock_init();
-	if (ret) {
-		dentry = ERR_PTR(ret);
+	if (ret)
 		goto out_mondata;
-	}
 
-	dentry = kernfs_mount(fs_type, flags, rdt_root,
-			      RDTGROUP_SUPER_MAGIC, NULL);
-	if (IS_ERR(dentry))
+	ret = kernfs_get_tree(fc);
+	if (ret < 0)
 		goto out_psl;
 
 	if (rdt_alloc_capable)
@@ -2059,14 +2024,95 @@ out_mongrp:
 		kernfs_remove(kn_mongrp);
 out_info:
 	kernfs_remove(kn_info);
+out_mba:
+	if (ctx->enable_mba_mbps)
+		set_mba_sc(false);
 out_cdp:
 	cdp_disable_all();
 out:
 	rdt_last_cmd_clear();
 	mutex_unlock(&rdtgroup_mutex);
 	cpus_read_unlock();
+	return ret;
+}
+
+enum rdt_param {
+	Opt_cdp,
+	Opt_cdpl2,
+	Opt_mba_mpbs,
+	nr__rdt_params
+};
+
+static const struct fs_parameter_spec rdt_param_specs[] = {
+	fsparam_flag("cdp",		Opt_cdp),
+	fsparam_flag("cdpl2",		Opt_cdpl2),
+	fsparam_flag("mba_mpbs",	Opt_mba_mpbs),
+	{}
+};
+
+static const struct fs_parameter_description rdt_fs_parameters = {
+	.name		= "rdt",
+	.specs		= rdt_param_specs,
+};
+
+static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+	struct fs_parse_result result;
+	int opt;
+
+	opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
+	if (opt < 0)
+		return opt;
 
-	return dentry;
+	switch (opt) {
+	case Opt_cdp:
+		ctx->enable_cdpl3 = true;
+		return 0;
+	case Opt_cdpl2:
+		ctx->enable_cdpl2 = true;
+		return 0;
+	case Opt_mba_mpbs:
+		if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+			return -EINVAL;
+		ctx->enable_mba_mbps = true;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void rdt_fs_context_free(struct fs_context *fc)
+{
+	struct rdt_fs_context *ctx = rdt_fc2context(fc);
+
+	kernfs_free_fs_context(fc);
+	kfree(ctx);
+}
+
+static const struct fs_context_operations rdt_fs_context_ops = {
+	.free		= rdt_fs_context_free,
+	.parse_param	= rdt_parse_param,
+	.get_tree	= rdt_get_tree,
+};
+
+static int rdt_init_fs_context(struct fs_context *fc)
+{
+	struct rdt_fs_context *ctx;
+
+	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->kfc.root = rdt_root;
+	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
+	fc->fs_private = &ctx->kfc;
+	fc->ops = &rdt_fs_context_ops;
+	if (fc->user_ns)
+		put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(&init_user_ns);
+	fc->global = true;
+	return 0;
 }
 
 static int reset_all_ctrls(struct rdt_resource *r)
@@ -2239,9 +2285,10 @@ static void rdt_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type rdt_fs_type = {
-	.name    = "resctrl",
-	.mount   = rdt_mount,
-	.kill_sb = rdt_kill_sb,
+	.name			= "resctrl",
+	.init_fs_context	= rdt_init_fs_context,
+	.parameters		= &rdt_fs_parameters,
+	.kill_sb		= rdt_kill_sb,
 };
 
 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 3d83b114bb08..379e3a9eb1ec 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -17,6 +17,7 @@
 #include <linux/xattr.h>
 
 #include <linux/kernfs.h>
+#include <linux/fs_context.h>
 
 struct kernfs_iattrs {
 	struct iattr		ia_iattr;
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 4d303047a4f8..36376cc5c9c2 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -22,16 +22,6 @@
 
 struct kmem_cache *kernfs_node_cache;
 
-static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
-{
-	struct kernfs_root *root = kernfs_info(sb)->root;
-	struct kernfs_syscall_ops *scops = root->syscall_ops;
-
-	if (scops && scops->remount_fs)
-		return scops->remount_fs(root, flags, data);
-	return 0;
-}
-
 static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
 {
 	struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
@@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = {
 	.drop_inode	= generic_delete_inode,
 	.evict_inode	= kernfs_evict_inode,
 
-	.remount_fs	= kernfs_sop_remount_fs,
 	.show_options	= kernfs_sop_show_options,
 	.show_path	= kernfs_sop_show_path,
 };
@@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
 	} while (true);
 }
 
-static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
+static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct inode *inode;
@@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 	sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
 	sb->s_blocksize = PAGE_SIZE;
 	sb->s_blocksize_bits = PAGE_SHIFT;
-	sb->s_magic = magic;
+	sb->s_magic = kfc->magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_xattr = kernfs_xattr_handlers;
 	if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
@@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 	return 0;
 }
 
-static int kernfs_test_super(struct super_block *sb, void *data)
+static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
 {
 	struct kernfs_super_info *sb_info = kernfs_info(sb);
-	struct kernfs_super_info *info = data;
+	struct kernfs_super_info *info = fc->s_fs_info;
 
 	return sb_info->root == info->root && sb_info->ns == info->ns;
 }
 
-static int kernfs_set_super(struct super_block *sb, void *data)
+static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
 {
-	int error;
-	error = set_anon_super(sb, data);
-	if (!error)
-		sb->s_fs_info = data;
-	return error;
+	struct kernfs_fs_context *kfc = fc->fs_private;
+
+	kfc->ns_tag = NULL;
+	return set_anon_super_fc(sb, fc);
 }
 
 /**
@@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb)
 }
 
 /**
- * kernfs_mount_ns - kernfs mount helper
- * @fs_type: file_system_type of the fs being mounted
- * @flags: mount flags specified for the mount
- * @root: kernfs_root of the hierarchy being mounted
- * @magic: file system specific magic number
- * @new_sb_created: tell the caller if we allocated a new superblock
- * @ns: optional namespace tag of the mount
+ * kernfs_get_tree - kernfs filesystem access/retrieval helper
+ * @fc: The filesystem context.
  *
- * This is to be called from each kernfs user's file_system_type->mount()
- * implementation, which should pass through the specified @fs_type and
- * @flags, and specify the hierarchy and namespace tag to mount via @root
- * and @ns, respectively.
- *
- * The return value can be passed to the vfs layer verbatim.
+ * This is to be called from each kernfs user's fs_context->ops->get_tree()
+ * implementation, which should set the specified ->@fs_type and ->@flags, and
+ * specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
+ * respectively.
  */
-struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-				struct kernfs_root *root, unsigned long magic,
-				bool *new_sb_created, const void *ns)
+int kernfs_get_tree(struct fs_context *fc)
 {
+	struct kernfs_fs_context *kfc = fc->fs_private;
 	struct super_block *sb;
 	struct kernfs_super_info *info;
 	int error;
 
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
-	info->root = root;
-	info->ns = ns;
+	info->root = kfc->root;
+	info->ns = kfc->ns_tag;
 	INIT_LIST_HEAD(&info->node);
 
-	sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags,
-			 &init_user_ns, info);
-	if (IS_ERR(sb) || sb->s_fs_info != info)
-		kfree(info);
+	fc->s_fs_info = info;
+	sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
 	if (IS_ERR(sb))
-		return ERR_CAST(sb);
-
-	if (new_sb_created)
-		*new_sb_created = !sb->s_root;
+		return PTR_ERR(sb);
 
 	if (!sb->s_root) {
 		struct kernfs_super_info *info = kernfs_info(sb);
 
-		error = kernfs_fill_super(sb, magic);
+		kfc->new_sb_created = true;
+
+		error = kernfs_fill_super(sb, kfc);
 		if (error) {
 			deactivate_locked_super(sb);
-			return ERR_PTR(error);
+			return error;
 		}
 		sb->s_flags |= SB_ACTIVE;
 
 		mutex_lock(&kernfs_mutex);
-		list_add(&info->node, &root->supers);
+		list_add(&info->node, &info->root->supers);
 		mutex_unlock(&kernfs_mutex);
 	}
 
-	return dget(sb->s_root);
+	fc->root = dget(sb->s_root);
+	return 0;
+}
+
+void kernfs_free_fs_context(struct fs_context *fc)
+{
+	/* Note that we don't deal with kfc->ns_tag here. */
+	kfree(fc->s_fs_info);
+	fc->s_fs_info = NULL;
 }
 
 /**
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 92682fcc41f6..4cb21b558a85 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -13,34 +13,69 @@
 #include <linux/magic.h>
 #include <linux/mount.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/user_namespace.h>
+#include <linux/fs_context.h>
+#include <net/net_namespace.h>
 
 #include "sysfs.h"
 
 static struct kernfs_root *sysfs_root;
 struct kernfs_node *sysfs_root_kn;
 
-static struct dentry *sysfs_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int sysfs_get_tree(struct fs_context *fc)
 {
-	struct dentry *root;
-	void *ns;
-	bool new_sb = false;
+	struct kernfs_fs_context *kfc = fc->fs_private;
+	int ret;
 
-	if (!(flags & SB_KERNMOUNT)) {
+	ret = kernfs_get_tree(fc);
+	if (ret)
+		return ret;
+
+	if (kfc->new_sb_created)
+		fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
+	return 0;
+}
+
+static void sysfs_fs_context_free(struct fs_context *fc)
+{
+	struct kernfs_fs_context *kfc = fc->fs_private;
+
+	if (kfc->ns_tag)
+		kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag);
+	kernfs_free_fs_context(fc);
+	kfree(kfc);
+}
+
+static const struct fs_context_operations sysfs_fs_context_ops = {
+	.free		= sysfs_fs_context_free,
+	.get_tree	= sysfs_get_tree,
+};
+
+static int sysfs_init_fs_context(struct fs_context *fc)
+{
+	struct kernfs_fs_context *kfc;
+	struct net *netns;
+
+	if (!(fc->sb_flags & SB_KERNMOUNT)) {
 		if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
-			return ERR_PTR(-EPERM);
+			return -EPERM;
 	}
 
-	ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
-	root = kernfs_mount_ns(fs_type, flags, sysfs_root,
-				SYSFS_MAGIC, &new_sb, ns);
-	if (!new_sb)
-		kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
-	else if (!IS_ERR(root))
-		root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
+	kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL);
+	if (!kfc)
+		return -ENOMEM;
 
-	return root;
+	kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
+	kfc->root = sysfs_root;
+	kfc->magic = SYSFS_MAGIC;
+	fc->fs_private = kfc;
+	fc->ops = &sysfs_fs_context_ops;
+	if (fc->user_ns)
+		put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(netns->user_ns);
+	fc->global = true;
+	return 0;
 }
 
 static void sysfs_kill_sb(struct super_block *sb)
@@ -52,10 +87,10 @@ static void sysfs_kill_sb(struct super_block *sb)
 }
 
 static struct file_system_type sysfs_fs_type = {
-	.name		= "sysfs",
-	.mount		= sysfs_mount,
-	.kill_sb	= sysfs_kill_sb,
-	.fs_flags	= FS_USERNS_MOUNT,
+	.name			= "sysfs",
+	.init_fs_context	= sysfs_init_fs_context,
+	.kill_sb		= sysfs_kill_sb,
+	.fs_flags		= FS_USERNS_MOUNT,
 };
 
 int __init sysfs_init(void)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 44acb4c3659c..822a64e65b41 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -25,7 +25,9 @@ struct seq_file;
 struct vm_area_struct;
 struct super_block;
 struct file_system_type;
+struct fs_context;
 
+struct kernfs_fs_context;
 struct kernfs_open_node;
 struct kernfs_iattrs;
 
@@ -167,7 +169,6 @@ struct kernfs_node {
  * kernfs_node parameter.
  */
 struct kernfs_syscall_ops {
-	int (*remount_fs)(struct kernfs_root *root, int *flags, char *data);
 	int (*show_options)(struct seq_file *sf, struct kernfs_root *root);
 
 	int (*mkdir)(struct kernfs_node *parent, const char *name,
@@ -268,6 +269,18 @@ struct kernfs_ops {
 #endif
 };
 
+/*
+ * The kernfs superblock creation/mount parameter context.
+ */
+struct kernfs_fs_context {
+	struct kernfs_root	*root;		/* Root of the hierarchy being mounted */
+	void			*ns_tag;	/* Namespace tag of the mount (or NULL) */
+	unsigned long		magic;		/* File system specific magic number */
+
+	/* The following are set/used by kernfs_mount() */
+	bool			new_sb_created;	/* Set to T if we allocated a new sb */
+};
+
 #ifdef CONFIG_KERNFS
 
 static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn)
@@ -353,9 +366,8 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
 void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);
-struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, unsigned long magic,
-			       bool *new_sb_created, const void *ns);
+int kernfs_get_tree(struct fs_context *fc);
+void kernfs_free_fs_context(struct fs_context *fc);
 void kernfs_kill_sb(struct super_block *sb);
 
 void kernfs_init(void);
@@ -458,11 +470,10 @@ static inline void kernfs_notify(struct kernfs_node *kn) { }
 static inline const void *kernfs_super_ns(struct super_block *sb)
 { return NULL; }
 
-static inline struct dentry *
-kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-		struct kernfs_root *root, unsigned long magic,
-		bool *new_sb_created, const void *ns)
-{ return ERR_PTR(-ENOSYS); }
+static inline int kernfs_get_tree(struct fs_context *fc)
+{ return -ENOSYS; }
+
+static inline void kernfs_free_fs_context(struct fs_context *fc) { }
 
 static inline void kernfs_kill_sb(struct super_block *sb) { }
 
@@ -545,13 +556,4 @@ static inline int kernfs_rename(struct kernfs_node *kn,
 	return kernfs_rename_ns(kn, new_parent, new_name, NULL);
 }
 
-static inline struct dentry *
-kernfs_mount(struct file_system_type *fs_type, int flags,
-		struct kernfs_root *root, unsigned long magic,
-		bool *new_sb_created)
-{
-	return kernfs_mount_ns(fs_type, flags, root,
-				magic, new_sb_created, NULL);
-}
-
 #endif	/* __LINUX_KERNFS_H */
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 37cf709b7a0e..30e39f3932ad 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -41,6 +41,7 @@ extern void __init enable_debug_cgroup(void);
  * The cgroup filesystem superblock creation/mount context.
  */
 struct cgroup_fs_context {
+	struct kernfs_fs_context kfc;
 	struct cgroup_root	*root;
 	struct cgroup_namespace	*ns;
 	unsigned int	flags;			/* CGRP_ROOT_* flags */
@@ -56,7 +57,9 @@ struct cgroup_fs_context {
 
 static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc)
 {
-	return fc->fs_private;
+	struct kernfs_fs_context *kfc = fc->fs_private;
+
+	return container_of(kfc, struct cgroup_fs_context, kfc);
 }
 
 /*
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 0c6bef234a7c..747e5b17f9da 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2039,18 +2039,14 @@ out:
 int cgroup_do_get_tree(struct fs_context *fc)
 {
 	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
-	bool new_sb = false;
-	unsigned long magic;
-	int ret = 0;
+	int ret;
 
+	ctx->kfc.root = ctx->root->kf_root;
 	if (fc->fs_type == &cgroup2_fs_type)
-		magic = CGROUP2_SUPER_MAGIC;
+		ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
 	else
-		magic = CGROUP_SUPER_MAGIC;
-	fc->root = kernfs_mount(fc->fs_type, fc->sb_flags, ctx->root->kf_root,
-				magic, &new_sb);
-	if (IS_ERR(fc->root))
-		ret = PTR_ERR(fc->root);
+		ctx->kfc.magic = CGROUP_SUPER_MAGIC;
+	ret = kernfs_get_tree(fc);
 
 	/*
 	 * In non-init cgroup namespace, instead of root cgroup's dentry,
@@ -2078,7 +2074,7 @@ int cgroup_do_get_tree(struct fs_context *fc)
 		}
 	}
 
-	if (!new_sb)
+	if (!ctx->kfc.new_sb_created)
 		cgroup_put(&ctx->root->cgrp);
 
 	return ret;
@@ -2094,19 +2090,15 @@ static void cgroup_fs_context_free(struct fs_context *fc)
 	kfree(ctx->name);
 	kfree(ctx->release_agent);
 	put_cgroup_ns(ctx->ns);
+	kernfs_free_fs_context(fc);
 	kfree(ctx);
 }
 
 static int cgroup_get_tree(struct fs_context *fc)
 {
-	struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
 	struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
 	int ret;
 
-	/* Check if the caller has permission to mount. */
-	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
-		return -EPERM;
-
 	cgrp_dfl_visible = true;
 	cgroup_get_live(&cgrp_dfl_root.cgrp);
 	ctx->root = &cgrp_dfl_root;
@@ -2132,7 +2124,8 @@ static const struct fs_context_operations cgroup1_fs_context_ops = {
 };
 
 /*
- * Initialise the cgroup filesystem creation/reconfiguration context.
+ * Initialise the cgroup filesystem creation/reconfiguration context.  Notably,
+ * we select the namespace we're going to use.
  */
 static int cgroup_init_fs_context(struct fs_context *fc)
 {
@@ -2151,11 +2144,15 @@ static int cgroup_init_fs_context(struct fs_context *fc)
 
 	ctx->ns = current->nsproxy->cgroup_ns;
 	get_cgroup_ns(ctx->ns);
-	fc->fs_private = ctx;
+	fc->fs_private = &ctx->kfc;
 	if (fc->fs_type == &cgroup2_fs_type)
 		fc->ops = &cgroup_fs_context_ops;
 	else
 		fc->ops = &cgroup1_fs_context_ops;
+	if (fc->user_ns)
+		put_user_ns(fc->user_ns);
+	fc->user_ns = get_user_ns(ctx->ns->user_ns);
+	fc->global = true;
 	return 0;
 }
 
-- 
cgit v1.2.3


From d911b4585eb3501f752160e8e0f1bb00c3c7c4e5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:26 +0000
Subject: vfs: Remove kern_mount_data()

The kern_mount_data() isn't used any more so remove it.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c     | 6 +++---
 include/linux/fs.h | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/namespace.c b/fs/namespace.c
index 1a1ed2528f47..bb9b7db1c66c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3390,10 +3390,10 @@ void put_mnt_ns(struct mnt_namespace *ns)
 	free_mnt_ns(ns);
 }
 
-struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
+struct vfsmount *kern_mount(struct file_system_type *type)
 {
 	struct vfsmount *mnt;
-	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, data);
+	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
 	if (!IS_ERR(mnt)) {
 		/*
 		 * it is a longterm mount, don't release mnt until
@@ -3403,7 +3403,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
 	}
 	return mnt;
 }
-EXPORT_SYMBOL_GPL(kern_mount_data);
+EXPORT_SYMBOL_GPL(kern_mount);
 
 void kern_unmount(struct vfsmount *mnt)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9d05c128ccf6..3e85cb8e8c20 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2280,8 +2280,7 @@ mount_pseudo(struct file_system_type *fs_type, char *name,
 
 extern int register_filesystem(struct file_system_type *);
 extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
-#define kern_mount(type) kern_mount_data(type, NULL)
+extern struct vfsmount *kern_mount(struct file_system_type *);
 extern void kern_unmount(struct vfsmount *mnt);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
-- 
cgit v1.2.3


From e7582e16a170db4c85995c1c03d194ea1ea621fc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 1 Nov 2018 23:07:26 +0000
Subject: vfs: Implement logging through fs_context

Implement the ability for filesystems to log error, warning and
informational messages through the fs_context.  In the future, these will
be extractable by userspace by reading from an fd created by the fsopen()
syscall.

Error messages are prefixed with "e ", warnings with "w " and informational
messages with "i ".

In the future, inside the kernel, formatted messages will be malloc'd but
unformatted messages will not copied if they're either in the core .rodata
section or in the .rodata section of the filesystem module pinned by
fs_context::fs_type.  The messages will only be good till the fs_type is
released.

Note that the logging object will be shared between duplicated fs_context
structures.  This is so that such as NFS which do a mount within a mount
can get at least some of the errors from the inner mount.

Five logging functions are provided for this:

 (1) void logfc(struct fs_context *fc, const char *fmt, ...);

     This logs a message into the context.  If the buffer is full, the
     earliest message is discarded.

 (2) void errorf(fc, fmt, ...);

     This wraps logfc() to log an error.

 (3) void invalf(fc, fmt, ...);

     This wraps errorf() and returns -EINVAL for convenience.

 (4) void warnf(fc, fmt, ...);

     This wraps logfc() to log a warning.

 (5) void infof(fc, fmt, ...);

     This wraps logfc() to log an informational message.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fs_context.c            | 30 ++++++++++++++++++++++++++++++
 include/linux/fs_context.h | 18 ++++++++++++++----
 2 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 57f61833ac83..87e3546b9a52 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -378,6 +378,36 @@ err_fc:
 }
 EXPORT_SYMBOL(vfs_dup_fs_context);
 
+#ifdef CONFIG_PRINTK
+/**
+ * logfc - Log a message to a filesystem context
+ * @fc: The filesystem context to log to.
+ * @fmt: The format of the buffer.
+ */
+void logfc(struct fs_context *fc, const char *fmt, ...)
+{
+	va_list va;
+
+	va_start(va, fmt);
+
+	switch (fmt[0]) {
+	case 'w':
+		vprintk_emit(0, LOGLEVEL_WARNING, NULL, 0, fmt, va);
+		break;
+	case 'e':
+		vprintk_emit(0, LOGLEVEL_ERR, NULL, 0, fmt, va);
+		break;
+	default:
+		vprintk_emit(0, LOGLEVEL_NOTICE, NULL, 0, fmt, va);
+		break;
+	}
+
+	pr_cont("\n");
+	va_end(va);
+}
+EXPORT_SYMBOL(logfc);
+#endif
+
 /**
  * put_fs_context - Dispose of a superblock configuration context.
  * @fc: The context to dispose of.
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 0db0b645c7b8..eaca452088fa 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -133,7 +133,17 @@ extern int vfs_get_super(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
 
-#define logfc(FC, FMT, ...) pr_notice(FMT, ## __VA_ARGS__)
+extern const struct file_operations fscontext_fops;
+
+#ifdef CONFIG_PRINTK
+extern __attribute__((format(printf, 2, 3)))
+void logfc(struct fs_context *fc, const char *fmt, ...);
+#else
+static inline __attribute__((format(printf, 2, 3)))
+void logfc(struct fs_context *fc, const char *fmt, ...)
+{
+}
+#endif
 
 /**
  * infof - Store supplementary informational message
@@ -143,7 +153,7 @@ extern int vfs_get_super(struct fs_context *fc,
  * Store the supplementary informational message for the process if the process
  * has enabled the facility.
  */
-#define infof(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+#define infof(fc, fmt, ...) ({ logfc(fc, "i "fmt, ## __VA_ARGS__); })
 
 /**
  * warnf - Store supplementary warning message
@@ -153,7 +163,7 @@ extern int vfs_get_super(struct fs_context *fc,
  * Store the supplementary warning message for the process if the process has
  * enabled the facility.
  */
-#define warnf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+#define warnf(fc, fmt, ...) ({ logfc(fc, "w "fmt, ## __VA_ARGS__); })
 
 /**
  * errorf - Store supplementary error message
@@ -163,7 +173,7 @@ extern int vfs_get_super(struct fs_context *fc,
  * Store the supplementary error message for the process if the process has
  * enabled the facility.
  */
-#define errorf(fc, fmt, ...) ({ logfc(fc, fmt, ## __VA_ARGS__); })
+#define errorf(fc, fmt, ...) ({ logfc(fc, "e "fmt, ## __VA_ARGS__); })
 
 /**
  * invalf - Store supplementary invalid argument error message
-- 
cgit v1.2.3


From fff42928ade591969836ff49888d063b829ac888 Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Wed, 27 Feb 2019 11:26:46 -0800
Subject: PCI/ATS: Add inline to pci_prg_resp_pasid_required()

Fix unused function warning when compiled with CONFIG_PCI_PASID
disabled.

Fixes: e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.")
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/pci-ats.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h
index facfd6a18fe1..1ebb88e7c184 100644
--- a/include/linux/pci-ats.h
+++ b/include/linux/pci-ats.h
@@ -67,7 +67,7 @@ static inline int pci_max_pasids(struct pci_dev *pdev)
 	return -EINVAL;
 }
 
-static int pci_prg_resp_pasid_required(struct pci_dev *pdev)
+static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From fe99a4f4d6022ec92f9b52a5528cb9b77513e7d1 Mon Sep 17 00:00:00 2001
From: Julia Cartwright <julia@ni.com>
Date: Tue, 12 Feb 2019 17:25:53 +0100
Subject: kthread: Convert worker lock to raw spinlock

In order to enable the queuing of kthread work items from hardirq context
even when PREEMPT_RT_FULL is enabled, convert the worker spin_lock to a
raw_spin_lock.

This is only acceptable to do because the work performed under the lock is
well-bounded and minimal.

Reported-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Reported-by: Tim Sander <tim@krieglstein.org>
Signed-off-by: Julia Cartwright <julia@ni.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Link: https://lkml.kernel.org/r/20190212162554.19779-1-bigeasy@linutronix.de
---
 include/linux/kthread.h |  4 ++--
 kernel/kthread.c        | 42 +++++++++++++++++++++---------------------
 2 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..6b8c064f0cbc 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -85,7 +85,7 @@ enum {
 
 struct kthread_worker {
 	unsigned int		flags;
-	spinlock_t		lock;
+	raw_spinlock_t		lock;
 	struct list_head	work_list;
 	struct list_head	delayed_work_list;
 	struct task_struct	*task;
@@ -106,7 +106,7 @@ struct kthread_delayed_work {
 };
 
 #define KTHREAD_WORKER_INIT(worker)	{				\
-	.lock = __SPIN_LOCK_UNLOCKED((worker).lock),			\
+	.lock = __RAW_SPIN_LOCK_UNLOCKED((worker).lock),		\
 	.work_list = LIST_HEAD_INIT((worker).work_list),		\
 	.delayed_work_list = LIST_HEAD_INIT((worker).delayed_work_list),\
 	}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 087d18d771b5..5641b55783a6 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -599,7 +599,7 @@ void __kthread_init_worker(struct kthread_worker *worker,
 				struct lock_class_key *key)
 {
 	memset(worker, 0, sizeof(struct kthread_worker));
-	spin_lock_init(&worker->lock);
+	raw_spin_lock_init(&worker->lock);
 	lockdep_set_class_and_name(&worker->lock, key, name);
 	INIT_LIST_HEAD(&worker->work_list);
 	INIT_LIST_HEAD(&worker->delayed_work_list);
@@ -641,21 +641,21 @@ repeat:
 
 	if (kthread_should_stop()) {
 		__set_current_state(TASK_RUNNING);
-		spin_lock_irq(&worker->lock);
+		raw_spin_lock_irq(&worker->lock);
 		worker->task = NULL;
-		spin_unlock_irq(&worker->lock);
+		raw_spin_unlock_irq(&worker->lock);
 		return 0;
 	}
 
 	work = NULL;
-	spin_lock_irq(&worker->lock);
+	raw_spin_lock_irq(&worker->lock);
 	if (!list_empty(&worker->work_list)) {
 		work = list_first_entry(&worker->work_list,
 					struct kthread_work, node);
 		list_del_init(&work->node);
 	}
 	worker->current_work = work;
-	spin_unlock_irq(&worker->lock);
+	raw_spin_unlock_irq(&worker->lock);
 
 	if (work) {
 		__set_current_state(TASK_RUNNING);
@@ -812,12 +812,12 @@ bool kthread_queue_work(struct kthread_worker *worker,
 	bool ret = false;
 	unsigned long flags;
 
-	spin_lock_irqsave(&worker->lock, flags);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 	if (!queuing_blocked(worker, work)) {
 		kthread_insert_work(worker, work, &worker->work_list);
 		ret = true;
 	}
-	spin_unlock_irqrestore(&worker->lock, flags);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_queue_work);
@@ -843,7 +843,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 	if (WARN_ON_ONCE(!worker))
 		return;
 
-	spin_lock(&worker->lock);
+	raw_spin_lock(&worker->lock);
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
 
@@ -852,7 +852,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 	list_del_init(&work->node);
 	kthread_insert_work(worker, work, &worker->work_list);
 
-	spin_unlock(&worker->lock);
+	raw_spin_unlock(&worker->lock);
 }
 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
 
@@ -908,14 +908,14 @@ bool kthread_queue_delayed_work(struct kthread_worker *worker,
 	unsigned long flags;
 	bool ret = false;
 
-	spin_lock_irqsave(&worker->lock, flags);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 
 	if (!queuing_blocked(worker, work)) {
 		__kthread_queue_delayed_work(worker, dwork, delay);
 		ret = true;
 	}
 
-	spin_unlock_irqrestore(&worker->lock, flags);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
@@ -951,7 +951,7 @@ void kthread_flush_work(struct kthread_work *work)
 	if (!worker)
 		return;
 
-	spin_lock_irq(&worker->lock);
+	raw_spin_lock_irq(&worker->lock);
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
 
@@ -963,7 +963,7 @@ void kthread_flush_work(struct kthread_work *work)
 	else
 		noop = true;
 
-	spin_unlock_irq(&worker->lock);
+	raw_spin_unlock_irq(&worker->lock);
 
 	if (!noop)
 		wait_for_completion(&fwork.done);
@@ -996,9 +996,9 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
 		 * any queuing is blocked by setting the canceling counter.
 		 */
 		work->canceling++;
-		spin_unlock_irqrestore(&worker->lock, *flags);
+		raw_spin_unlock_irqrestore(&worker->lock, *flags);
 		del_timer_sync(&dwork->timer);
-		spin_lock_irqsave(&worker->lock, *flags);
+		raw_spin_lock_irqsave(&worker->lock, *flags);
 		work->canceling--;
 	}
 
@@ -1045,7 +1045,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 	unsigned long flags;
 	int ret = false;
 
-	spin_lock_irqsave(&worker->lock, flags);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 
 	/* Do not bother with canceling when never queued. */
 	if (!work->worker)
@@ -1062,7 +1062,7 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
 fast_queue:
 	__kthread_queue_delayed_work(worker, dwork, delay);
 out:
-	spin_unlock_irqrestore(&worker->lock, flags);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
@@ -1076,7 +1076,7 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
 	if (!worker)
 		goto out;
 
-	spin_lock_irqsave(&worker->lock, flags);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
 
@@ -1090,13 +1090,13 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
 	 * In the meantime, block any queuing by setting the canceling counter.
 	 */
 	work->canceling++;
-	spin_unlock_irqrestore(&worker->lock, flags);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 	kthread_flush_work(work);
-	spin_lock_irqsave(&worker->lock, flags);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 	work->canceling--;
 
 out_fast:
-	spin_unlock_irqrestore(&worker->lock, flags);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 out:
 	return ret;
 }
-- 
cgit v1.2.3


From ad01423aedaa7c6dd62d560b73a3cb39e6da3901 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Tue, 12 Feb 2019 17:25:54 +0100
Subject: kthread: Do not use TIMER_IRQSAFE

The TIMER_IRQSAFE usage was introduced in commit 22597dc3d97b1 ("kthread:
initial support for delayed kthread work") which modelled the delayed
kthread code after workqueue's code. The workqueue code requires the flag
TIMER_IRQSAFE for synchronisation purpose. This is not true for kthread's
delay timer since all operations occur under a lock.

Remove TIMER_IRQSAFE from the timer initialisation and use timer_setup()
for initialisation purpose which is the official function.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Link: https://lkml.kernel.org/r/20190212162554.19779-2-bigeasy@linutronix.de
---
 include/linux/kthread.h | 5 ++---
 kernel/kthread.c        | 5 +++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 6b8c064f0cbc..3d9d834c66a2 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -164,9 +164,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker,
 #define kthread_init_delayed_work(dwork, fn)				\
 	do {								\
 		kthread_init_work(&(dwork)->work, (fn));		\
-		__init_timer(&(dwork)->timer,				\
-			     kthread_delayed_work_timer_fn,		\
-			     TIMER_IRQSAFE);				\
+		timer_setup(&(dwork)->timer,				\
+			     kthread_delayed_work_timer_fn, 0);		\
 	} while (0)
 
 int kthread_worker_fn(void *worker_ptr);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5641b55783a6..537335541267 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -835,6 +835,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 	struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
 	struct kthread_work *work = &dwork->work;
 	struct kthread_worker *worker = work->worker;
+	unsigned long flags;
 
 	/*
 	 * This might happen when a pending work is reinitialized.
@@ -843,7 +844,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 	if (WARN_ON_ONCE(!worker))
 		return;
 
-	raw_spin_lock(&worker->lock);
+	raw_spin_lock_irqsave(&worker->lock, flags);
 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
 	WARN_ON_ONCE(work->worker != worker);
 
@@ -852,7 +853,7 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
 	list_del_init(&work->node);
 	kthread_insert_work(worker, work, &worker->work_list);
 
-	raw_spin_unlock(&worker->lock);
+	raw_spin_unlock_irqrestore(&worker->lock, flags);
 }
 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
 
-- 
cgit v1.2.3


From 541664d360d1fdaa116473410265b2cb8a806b50 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Thu, 28 Feb 2019 11:55:44 +0200
Subject: habanalabs: add comments in uapi/misc/habanalabs.h

Add comment about minimum and maximum size of command buffer.
Add some text about the expected input of CS IOCTL.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/misc/habanalabs.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 23d6ad3459cb..7fd6f633534c 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -112,7 +112,9 @@ struct hl_cb_in {
 	__u64 cb_handle;
 	/* HL_CB_OP_* */
 	__u32 op;
-	/* Size of CB. Minimum requested size must be PAGE_SIZE */
+	/* Size of CB. Maximum size is 2MB. The minimum size that will be
+	 * allocated, regardless of this parameter's value, is PAGE_SIZE
+	 */
 	__u32 cb_size;
 	/* Context ID - Currently not in use */
 	__u32 ctx_id;
@@ -364,6 +366,12 @@ union hl_mem_args {
  * internal. The driver will get completion notifications from the device only
  * on JOBS which are enqueued in the external queues.
  *
+ * For jobs on external queues, the user needs to create command buffers
+ * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on
+ * internal queues, the user needs to prepare a "command buffer" with packets
+ * on either the SRAM or DRAM, and give the device address of that buffer to
+ * the CS ioctl.
+ *
  * This IOCTL is asynchronous in regard to the actual execution of the CS. This
  * means it returns immediately after ALL the JOBS were enqueued on their
  * relevant queues. Therefore, the user mustn't assume the CS has been completed
-- 
cgit v1.2.3


From 4f062dc1b759299851939524ff755b20542d8fc1 Mon Sep 17 00:00:00 2001
From: Igor Opaniuk <igor.opaniuk@linaro.org>
Date: Thu, 24 Jan 2019 19:32:31 +0200
Subject: tee: add cancellation support to client interface

Add support of cancellation request to the TEE kernel internal
client interface. Can be used by software TPM drivers, that leverage
TEE under the hood (for instance TPM2.0 mobile profile), for requesting
cancellation of time-consuming operations (RSA key-pair generation etc.).

Signed-off-by: Igor Opaniuk <igor.opaniuk@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/tee_core.c  |  9 +++++++++
 include/linux/tee_drv.h | 12 ++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c
index 25f3b9cc8908..ecffdd8a29b7 100644
--- a/drivers/tee/tee_core.c
+++ b/drivers/tee/tee_core.c
@@ -1039,6 +1039,15 @@ int tee_client_invoke_func(struct tee_context *ctx,
 }
 EXPORT_SYMBOL_GPL(tee_client_invoke_func);
 
+int tee_client_cancel_req(struct tee_context *ctx,
+			  struct tee_ioctl_cancel_arg *arg)
+{
+	if (!ctx->teedev->desc->ops->cancel_req)
+		return -EINVAL;
+	return ctx->teedev->desc->ops->cancel_req(ctx, arg->cancel_id,
+						  arg->session);
+}
+
 static int tee_client_device_match(struct device *dev,
 				   struct device_driver *drv)
 {
diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 56d7f1b4516d..4a49f80e7f71 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -535,6 +535,18 @@ int tee_client_invoke_func(struct tee_context *ctx,
 			   struct tee_ioctl_invoke_arg *arg,
 			   struct tee_param *param);
 
+/**
+ * tee_client_cancel_req() - Request cancellation of the previous open-session
+ * or invoke-command operations in a Trusted Application
+ * @ctx:       TEE Context
+ * @arg:       Cancellation arguments, see description of
+ *             struct tee_ioctl_cancel_arg
+ *
+ * Returns < 0 on error else 0 if the cancellation was successfully requested.
+ */
+int tee_client_cancel_req(struct tee_context *ctx,
+			  struct tee_ioctl_cancel_arg *arg);
+
 static inline bool tee_param_is_memref(struct tee_param *param)
 {
 	switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
-- 
cgit v1.2.3


From 594b9a89af8e7629e95a4cd844d188361be32790 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 27 Feb 2019 20:40:13 +0800
Subject: block: introduce mp_bvec_for_each_page() for iterating over page

mp_bvec_for_each_segment() is a bit big for the iteration, so introduce
a light-weight helper for iterating over pages, then 32bytes stack
space can be saved.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 4376f683c08a..87e82e503a52 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -188,4 +188,9 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec,
 	}
 }
 
+#define mp_bvec_for_each_page(pg, bv, i)				\
+	for (i = (bv)->bv_offset / PAGE_SIZE;				\
+		(i <= (((bv)->bv_offset + (bv)->bv_len - 1) / PAGE_SIZE)) && \
+		(pg = bvec_nth_page((bv)->bv_page, i)); i += 1)
+
 #endif /* __LINUX_BVEC_ITER_H */
-- 
cgit v1.2.3


From 2b188cc1bb857a9d4701ae59aa7768b5124e262e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 7 Jan 2019 10:46:33 -0700
Subject: Add io_uring IO interface

The submission queue (SQ) and completion queue (CQ) rings are shared
between the application and the kernel. This eliminates the need to
copy data back and forth to submit and complete IO.

IO submissions use the io_uring_sqe data structure, and completions
are generated in the form of io_uring_cqe data structures. The SQ
ring is an index into the io_uring_sqe array, which makes it possible
to submit a batch of IOs without them being contiguous in the ring.
The CQ ring is always contiguous, as completion events are inherently
unordered, and hence any io_uring_cqe entry can point back to an
arbitrary submission.

Two new system calls are added for this:

io_uring_setup(entries, params)
	Sets up an io_uring instance for doing async IO. On success,
	returns a file descriptor that the application can mmap to
	gain access to the SQ ring, CQ ring, and io_uring_sqes.

io_uring_enter(fd, to_submit, min_complete, flags, sigset, sigsetsize)
	Initiates IO against the rings mapped to this fd, or waits for
	them to complete, or both. The behavior is controlled by the
	parameters passed in. If 'to_submit' is non-zero, then we'll
	try and submit new IO. If IORING_ENTER_GETEVENTS is set, the
	kernel will wait for 'min_complete' events, if they aren't
	already available. It's valid to set IORING_ENTER_GETEVENTS
	and 'min_complete' == 0 at the same time, this allows the
	kernel to return already completed events without waiting
	for them. This is useful only for polling, as for IRQ
	driven IO, the application can just check the CQ ring
	without entering the kernel.

With this setup, it's possible to do async IO with a single system
call. Future developments will enable polled IO with this interface,
and polled submission as well. The latter will enable an application
to do IO without doing ANY system calls at all.

For IRQ driven IO, an application only needs to enter the kernel for
completions if it wants to wait for them to occur.

Each io_uring is backed by a workqueue, to support buffered async IO
as well. We will only punt to an async context if the command would
need to wait for IO on the device side. Any data that can be accessed
directly in the page cache is done inline. This avoids the slowness
issue of usual threadpools, since cached data is accessed as quickly
as a sync interface.

Sample application: http://git.kernel.dk/cgit/fio/plain/t/io_uring.c

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/x86/entry/syscalls/syscall_32.tbl |    2 +
 arch/x86/entry/syscalls/syscall_64.tbl |    2 +
 fs/Makefile                            |    1 +
 fs/io_uring.c                          | 1255 ++++++++++++++++++++++++++++++++
 include/linux/fs.h                     |    9 +
 include/linux/sched/user.h             |    2 +-
 include/linux/syscalls.h               |    6 +
 include/uapi/asm-generic/unistd.h      |    6 +-
 include/uapi/linux/io_uring.h          |   95 +++
 init/Kconfig                           |    9 +
 kernel/sys_ni.c                        |    2 +
 net/unix/garbage.c                     |    3 +
 12 files changed, 1390 insertions(+), 2 deletions(-)
 create mode 100644 fs/io_uring.c
 create mode 100644 include/uapi/linux/io_uring.h

(limited to 'include')

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b3d1..481c126259e9 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,3 +398,5 @@
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
 385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents
 386	i386	rseq			sys_rseq			__ia32_sys_rseq
+425	i386	io_uring_setup		sys_io_uring_setup		__ia32_sys_io_uring_setup
+426	i386	io_uring_enter		sys_io_uring_enter		__ia32_sys_io_uring_enter
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..6a32a430c8e0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,8 @@
 332	common	statx			__x64_sys_statx
 333	common	io_pgetevents		__x64_sys_io_pgetevents
 334	common	rseq			__x64_sys_rseq
+425	common	io_uring_setup		__x64_sys_io_uring_setup
+426	common	io_uring_enter		__x64_sys_io_uring_enter
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/Makefile b/fs/Makefile
index 293733f61594..8e15d6fc4340 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_TIMERFD)		+= timerfd.o
 obj-$(CONFIG_EVENTFD)		+= eventfd.o
 obj-$(CONFIG_USERFAULTFD)	+= userfaultfd.o
 obj-$(CONFIG_AIO)               += aio.o
+obj-$(CONFIG_IO_URING)		+= io_uring.o
 obj-$(CONFIG_FS_DAX)		+= dax.o
 obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
 obj-$(CONFIG_FILE_LOCKING)      += locks.o
diff --git a/fs/io_uring.c b/fs/io_uring.c
new file mode 100644
index 000000000000..f68052290426
--- /dev/null
+++ b/fs/io_uring.c
@@ -0,0 +1,1255 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shared application/kernel submission and completion ring pairs, for
+ * supporting fast/efficient IO.
+ *
+ * A note on the read/write ordering memory barriers that are matched between
+ * the application and kernel side. When the application reads the CQ ring
+ * tail, it must use an appropriate smp_rmb() to order with the smp_wmb()
+ * the kernel uses after writing the tail. Failure to do so could cause a
+ * delay in when the application notices that completion events available.
+ * This isn't a fatal condition. Likewise, the application must use an
+ * appropriate smp_wmb() both before writing the SQ tail, and after writing
+ * the SQ tail. The first one orders the sqe writes with the tail write, and
+ * the latter is paired with the smp_rmb() the kernel will issue before
+ * reading the SQ tail on submission.
+ *
+ * Also see the examples in the liburing library:
+ *
+ *	git://git.kernel.dk/liburing
+ *
+ * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens
+ * from data shared between the kernel and application. This is done both
+ * for ordering purposes, but also to ensure that once a value is loaded from
+ * data that the application could potentially modify, it remains stable.
+ *
+ * Copyright (C) 2018-2019 Jens Axboe
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/refcount.h>
+#include <linux/uio.h>
+
+#include <linux/sched/signal.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/mmu_context.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/blkdev.h>
+#include <linux/net.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/mm.h>
+#include <linux/uaccess.h>
+#include <linux/nospec.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "internal.h"
+
+#define IORING_MAX_ENTRIES	4096
+
+struct io_uring {
+	u32 head ____cacheline_aligned_in_smp;
+	u32 tail ____cacheline_aligned_in_smp;
+};
+
+struct io_sq_ring {
+	struct io_uring		r;
+	u32			ring_mask;
+	u32			ring_entries;
+	u32			dropped;
+	u32			flags;
+	u32			array[];
+};
+
+struct io_cq_ring {
+	struct io_uring		r;
+	u32			ring_mask;
+	u32			ring_entries;
+	u32			overflow;
+	struct io_uring_cqe	cqes[];
+};
+
+struct io_ring_ctx {
+	struct {
+		struct percpu_ref	refs;
+	} ____cacheline_aligned_in_smp;
+
+	struct {
+		unsigned int		flags;
+		bool			compat;
+		bool			account_mem;
+
+		/* SQ ring */
+		struct io_sq_ring	*sq_ring;
+		unsigned		cached_sq_head;
+		unsigned		sq_entries;
+		unsigned		sq_mask;
+		struct io_uring_sqe	*sq_sqes;
+	} ____cacheline_aligned_in_smp;
+
+	/* IO offload */
+	struct workqueue_struct	*sqo_wq;
+	struct mm_struct	*sqo_mm;
+
+	struct {
+		/* CQ ring */
+		struct io_cq_ring	*cq_ring;
+		unsigned		cached_cq_tail;
+		unsigned		cq_entries;
+		unsigned		cq_mask;
+		struct wait_queue_head	cq_wait;
+		struct fasync_struct	*cq_fasync;
+	} ____cacheline_aligned_in_smp;
+
+	struct user_struct	*user;
+
+	struct completion	ctx_done;
+
+	struct {
+		struct mutex		uring_lock;
+		wait_queue_head_t	wait;
+	} ____cacheline_aligned_in_smp;
+
+	struct {
+		spinlock_t		completion_lock;
+	} ____cacheline_aligned_in_smp;
+
+#if defined(CONFIG_UNIX)
+	struct socket		*ring_sock;
+#endif
+};
+
+struct sqe_submit {
+	const struct io_uring_sqe	*sqe;
+	unsigned short			index;
+	bool				has_user;
+};
+
+struct io_kiocb {
+	struct kiocb		rw;
+
+	struct sqe_submit	submit;
+
+	struct io_ring_ctx	*ctx;
+	struct list_head	list;
+	unsigned int		flags;
+#define REQ_F_FORCE_NONBLOCK	1	/* inline submission attempt */
+	u64			user_data;
+
+	struct work_struct	work;
+};
+
+#define IO_PLUG_THRESHOLD		2
+
+static struct kmem_cache *req_cachep;
+
+static const struct file_operations io_uring_fops;
+
+struct sock *io_uring_get_socket(struct file *file)
+{
+#if defined(CONFIG_UNIX)
+	if (file->f_op == &io_uring_fops) {
+		struct io_ring_ctx *ctx = file->private_data;
+
+		return ctx->ring_sock->sk;
+	}
+#endif
+	return NULL;
+}
+EXPORT_SYMBOL(io_uring_get_socket);
+
+static void io_ring_ctx_ref_free(struct percpu_ref *ref)
+{
+	struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
+
+	complete(&ctx->ctx_done);
+}
+
+static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
+{
+	struct io_ring_ctx *ctx;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, 0, GFP_KERNEL)) {
+		kfree(ctx);
+		return NULL;
+	}
+
+	ctx->flags = p->flags;
+	init_waitqueue_head(&ctx->cq_wait);
+	init_completion(&ctx->ctx_done);
+	mutex_init(&ctx->uring_lock);
+	init_waitqueue_head(&ctx->wait);
+	spin_lock_init(&ctx->completion_lock);
+	return ctx;
+}
+
+static void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+	struct io_cq_ring *ring = ctx->cq_ring;
+
+	if (ctx->cached_cq_tail != READ_ONCE(ring->r.tail)) {
+		/* order cqe stores with ring update */
+		smp_store_release(&ring->r.tail, ctx->cached_cq_tail);
+
+		/*
+		 * Write sider barrier of tail update, app has read side. See
+		 * comment at the top of this file.
+		 */
+		smp_wmb();
+
+		if (wq_has_sleeper(&ctx->cq_wait)) {
+			wake_up_interruptible(&ctx->cq_wait);
+			kill_fasync(&ctx->cq_fasync, SIGIO, POLL_IN);
+		}
+	}
+}
+
+static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
+{
+	struct io_cq_ring *ring = ctx->cq_ring;
+	unsigned tail;
+
+	tail = ctx->cached_cq_tail;
+	/* See comment at the top of the file */
+	smp_rmb();
+	if (tail + 1 == READ_ONCE(ring->r.head))
+		return NULL;
+
+	ctx->cached_cq_tail++;
+	return &ring->cqes[tail & ctx->cq_mask];
+}
+
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+				 long res, unsigned ev_flags)
+{
+	struct io_uring_cqe *cqe;
+
+	/*
+	 * If we can't get a cq entry, userspace overflowed the
+	 * submission (by quite a lot). Increment the overflow count in
+	 * the ring.
+	 */
+	cqe = io_get_cqring(ctx);
+	if (cqe) {
+		WRITE_ONCE(cqe->user_data, ki_user_data);
+		WRITE_ONCE(cqe->res, res);
+		WRITE_ONCE(cqe->flags, ev_flags);
+	} else {
+		unsigned overflow = READ_ONCE(ctx->cq_ring->overflow);
+
+		WRITE_ONCE(ctx->cq_ring->overflow, overflow + 1);
+	}
+}
+
+static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+				long res, unsigned ev_flags)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->completion_lock, flags);
+	io_cqring_fill_event(ctx, ki_user_data, res, ev_flags);
+	io_commit_cqring(ctx);
+	spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+	if (waitqueue_active(&ctx->wait))
+		wake_up(&ctx->wait);
+}
+
+static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
+{
+	percpu_ref_put_many(&ctx->refs, refs);
+
+	if (waitqueue_active(&ctx->wait))
+		wake_up(&ctx->wait);
+}
+
+static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req;
+
+	if (!percpu_ref_tryget(&ctx->refs))
+		return NULL;
+
+	req = kmem_cache_alloc(req_cachep, __GFP_NOWARN);
+	if (req) {
+		req->ctx = ctx;
+		req->flags = 0;
+		return req;
+	}
+
+	io_ring_drop_ctx_refs(ctx, 1);
+	return NULL;
+}
+
+static void io_free_req(struct io_kiocb *req)
+{
+	io_ring_drop_ctx_refs(req->ctx, 1);
+	kmem_cache_free(req_cachep, req);
+}
+
+static void kiocb_end_write(struct kiocb *kiocb)
+{
+	if (kiocb->ki_flags & IOCB_WRITE) {
+		struct inode *inode = file_inode(kiocb->ki_filp);
+
+		/*
+		 * Tell lockdep we inherited freeze protection from submission
+		 * thread.
+		 */
+		if (S_ISREG(inode->i_mode))
+			__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+		file_end_write(kiocb->ki_filp);
+	}
+}
+
+static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+
+	kiocb_end_write(kiocb);
+
+	fput(kiocb->ki_filp);
+	io_cqring_add_event(req->ctx, req->user_data, res, 0);
+	io_free_req(req);
+}
+
+/*
+ * If we tracked the file through the SCM inflight mechanism, we could support
+ * any file. For now, just ensure that anything potentially problematic is done
+ * inline.
+ */
+static bool io_file_supports_async(struct file *file)
+{
+	umode_t mode = file_inode(file)->i_mode;
+
+	if (S_ISBLK(mode) || S_ISCHR(mode))
+		return true;
+	if (S_ISREG(mode) && file->f_op != &io_uring_fops)
+		return true;
+
+	return false;
+}
+
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+		      bool force_nonblock)
+{
+	struct kiocb *kiocb = &req->rw;
+	unsigned ioprio;
+	int fd, ret;
+
+	/* For -EAGAIN retry, everything is already prepped */
+	if (kiocb->ki_filp)
+		return 0;
+
+	fd = READ_ONCE(sqe->fd);
+	kiocb->ki_filp = fget(fd);
+	if (unlikely(!kiocb->ki_filp))
+		return -EBADF;
+	if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
+		force_nonblock = false;
+	kiocb->ki_pos = READ_ONCE(sqe->off);
+	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+	kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+
+	ioprio = READ_ONCE(sqe->ioprio);
+	if (ioprio) {
+		ret = ioprio_check_cap(ioprio);
+		if (ret)
+			goto out_fput;
+
+		kiocb->ki_ioprio = ioprio;
+	} else
+		kiocb->ki_ioprio = get_current_ioprio();
+
+	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+	if (unlikely(ret))
+		goto out_fput;
+	if (force_nonblock) {
+		kiocb->ki_flags |= IOCB_NOWAIT;
+		req->flags |= REQ_F_FORCE_NONBLOCK;
+	}
+	if (kiocb->ki_flags & IOCB_HIPRI) {
+		ret = -EINVAL;
+		goto out_fput;
+	}
+
+	kiocb->ki_complete = io_complete_rw;
+	return 0;
+out_fput:
+	fput(kiocb->ki_filp);
+	return ret;
+}
+
+static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+{
+	switch (ret) {
+	case -EIOCBQUEUED:
+		break;
+	case -ERESTARTSYS:
+	case -ERESTARTNOINTR:
+	case -ERESTARTNOHAND:
+	case -ERESTART_RESTARTBLOCK:
+		/*
+		 * We can't just restart the syscall, since previously
+		 * submitted sqes may already be in progress. Just fail this
+		 * IO with EINTR.
+		 */
+		ret = -EINTR;
+		/* fall through */
+	default:
+		kiocb->ki_complete(kiocb, ret, 0);
+	}
+}
+
+static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
+			   const struct sqe_submit *s, struct iovec **iovec,
+			   struct iov_iter *iter)
+{
+	const struct io_uring_sqe *sqe = s->sqe;
+	void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
+	size_t sqe_len = READ_ONCE(sqe->len);
+
+	if (!s->has_user)
+		return -EFAULT;
+
+#ifdef CONFIG_COMPAT
+	if (ctx->compat)
+		return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
+						iovec, iter);
+#endif
+
+	return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
+}
+
+static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
+		       bool force_nonblock)
+{
+	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+	struct kiocb *kiocb = &req->rw;
+	struct iov_iter iter;
+	struct file *file;
+	ssize_t ret;
+
+	ret = io_prep_rw(req, s->sqe, force_nonblock);
+	if (ret)
+		return ret;
+	file = kiocb->ki_filp;
+
+	ret = -EBADF;
+	if (unlikely(!(file->f_mode & FMODE_READ)))
+		goto out_fput;
+	ret = -EINVAL;
+	if (unlikely(!file->f_op->read_iter))
+		goto out_fput;
+
+	ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
+	if (ret)
+		goto out_fput;
+
+	ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_iter_count(&iter));
+	if (!ret) {
+		ssize_t ret2;
+
+		/* Catch -EAGAIN return for forced non-blocking submission */
+		ret2 = call_read_iter(file, kiocb, &iter);
+		if (!force_nonblock || ret2 != -EAGAIN)
+			io_rw_done(kiocb, ret2);
+		else
+			ret = -EAGAIN;
+	}
+	kfree(iovec);
+out_fput:
+	/* Hold on to the file for -EAGAIN */
+	if (unlikely(ret && ret != -EAGAIN))
+		fput(file);
+	return ret;
+}
+
+static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
+			bool force_nonblock)
+{
+	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+	struct kiocb *kiocb = &req->rw;
+	struct iov_iter iter;
+	struct file *file;
+	ssize_t ret;
+
+	ret = io_prep_rw(req, s->sqe, force_nonblock);
+	if (ret)
+		return ret;
+	/* Hold on to the file for -EAGAIN */
+	if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT))
+		return -EAGAIN;
+
+	ret = -EBADF;
+	file = kiocb->ki_filp;
+	if (unlikely(!(file->f_mode & FMODE_WRITE)))
+		goto out_fput;
+	ret = -EINVAL;
+	if (unlikely(!file->f_op->write_iter))
+		goto out_fput;
+
+	ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
+	if (ret)
+		goto out_fput;
+
+	ret = rw_verify_area(WRITE, file, &kiocb->ki_pos,
+				iov_iter_count(&iter));
+	if (!ret) {
+		/*
+		 * Open-code file_start_write here to grab freeze protection,
+		 * which will be released by another thread in
+		 * io_complete_rw().  Fool lockdep by telling it the lock got
+		 * released so that it doesn't complain about the held lock when
+		 * we return to userspace.
+		 */
+		if (S_ISREG(file_inode(file)->i_mode)) {
+			__sb_start_write(file_inode(file)->i_sb,
+						SB_FREEZE_WRITE, true);
+			__sb_writers_release(file_inode(file)->i_sb,
+						SB_FREEZE_WRITE);
+		}
+		kiocb->ki_flags |= IOCB_WRITE;
+		io_rw_done(kiocb, call_write_iter(file, kiocb, &iter));
+	}
+	kfree(iovec);
+out_fput:
+	if (unlikely(ret))
+		fput(file);
+	return ret;
+}
+
+/*
+ * IORING_OP_NOP just posts a completion event, nothing else.
+ */
+static int io_nop(struct io_kiocb *req, u64 user_data)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	long err = 0;
+
+	/*
+	 * Twilight zone - it's possible that someone issued an opcode that
+	 * has a file attached, then got -EAGAIN on submission, and changed
+	 * the sqe before we retried it from async context. Avoid dropping
+	 * a file reference for this malicious case, and flag the error.
+	 */
+	if (req->rw.ki_filp) {
+		err = -EBADF;
+		fput(req->rw.ki_filp);
+	}
+	io_cqring_add_event(ctx, user_data, err, 0);
+	io_free_req(req);
+	return 0;
+}
+
+static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+			   const struct sqe_submit *s, bool force_nonblock)
+{
+	ssize_t ret;
+	int opcode;
+
+	if (unlikely(s->index >= ctx->sq_entries))
+		return -EINVAL;
+	req->user_data = READ_ONCE(s->sqe->user_data);
+
+	opcode = READ_ONCE(s->sqe->opcode);
+	switch (opcode) {
+	case IORING_OP_NOP:
+		ret = io_nop(req, req->user_data);
+		break;
+	case IORING_OP_READV:
+		ret = io_read(req, s, force_nonblock);
+		break;
+	case IORING_OP_WRITEV:
+		ret = io_write(req, s, force_nonblock);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static void io_sq_wq_submit_work(struct work_struct *work)
+{
+	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+	struct sqe_submit *s = &req->submit;
+	const struct io_uring_sqe *sqe = s->sqe;
+	struct io_ring_ctx *ctx = req->ctx;
+	mm_segment_t old_fs = get_fs();
+	int ret;
+
+	 /* Ensure we clear previously set forced non-block flag */
+	req->flags &= ~REQ_F_FORCE_NONBLOCK;
+	req->rw.ki_flags &= ~IOCB_NOWAIT;
+
+	if (!mmget_not_zero(ctx->sqo_mm)) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	use_mm(ctx->sqo_mm);
+	set_fs(USER_DS);
+	s->has_user = true;
+
+	ret = __io_submit_sqe(ctx, req, s, false);
+
+	set_fs(old_fs);
+	unuse_mm(ctx->sqo_mm);
+	mmput(ctx->sqo_mm);
+err:
+	if (ret) {
+		io_cqring_add_event(ctx, sqe->user_data, ret, 0);
+		io_free_req(req);
+	}
+
+	/* async context always use a copy of the sqe */
+	kfree(sqe);
+}
+
+static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s)
+{
+	struct io_kiocb *req;
+	ssize_t ret;
+
+	/* enforce forwards compatibility on users */
+	if (unlikely(s->sqe->flags))
+		return -EINVAL;
+
+	req = io_get_req(ctx);
+	if (unlikely(!req))
+		return -EAGAIN;
+
+	req->rw.ki_filp = NULL;
+
+	ret = __io_submit_sqe(ctx, req, s, true);
+	if (ret == -EAGAIN) {
+		struct io_uring_sqe *sqe_copy;
+
+		sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
+		if (sqe_copy) {
+			memcpy(sqe_copy, s->sqe, sizeof(*sqe_copy));
+			s->sqe = sqe_copy;
+
+			memcpy(&req->submit, s, sizeof(*s));
+			INIT_WORK(&req->work, io_sq_wq_submit_work);
+			queue_work(ctx->sqo_wq, &req->work);
+			ret = 0;
+		}
+	}
+	if (ret)
+		io_free_req(req);
+
+	return ret;
+}
+
+static void io_commit_sqring(struct io_ring_ctx *ctx)
+{
+	struct io_sq_ring *ring = ctx->sq_ring;
+
+	if (ctx->cached_sq_head != READ_ONCE(ring->r.head)) {
+		/*
+		 * Ensure any loads from the SQEs are done at this point,
+		 * since once we write the new head, the application could
+		 * write new data to them.
+		 */
+		smp_store_release(&ring->r.head, ctx->cached_sq_head);
+
+		/*
+		 * write side barrier of head update, app has read side. See
+		 * comment at the top of this file
+		 */
+		smp_wmb();
+	}
+}
+
+/*
+ * Undo last io_get_sqring()
+ */
+static void io_drop_sqring(struct io_ring_ctx *ctx)
+{
+	ctx->cached_sq_head--;
+}
+
+/*
+ * Fetch an sqe, if one is available. Note that s->sqe will point to memory
+ * that is mapped by userspace. This means that care needs to be taken to
+ * ensure that reads are stable, as we cannot rely on userspace always
+ * being a good citizen. If members of the sqe are validated and then later
+ * used, it's important that those reads are done through READ_ONCE() to
+ * prevent a re-load down the line.
+ */
+static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
+{
+	struct io_sq_ring *ring = ctx->sq_ring;
+	unsigned head;
+
+	/*
+	 * The cached sq head (or cq tail) serves two purposes:
+	 *
+	 * 1) allows us to batch the cost of updating the user visible
+	 *    head updates.
+	 * 2) allows the kernel side to track the head on its own, even
+	 *    though the application is the one updating it.
+	 */
+	head = ctx->cached_sq_head;
+	/* See comment at the top of this file */
+	smp_rmb();
+	if (head == READ_ONCE(ring->r.tail))
+		return false;
+
+	head = READ_ONCE(ring->array[head & ctx->sq_mask]);
+	if (head < ctx->sq_entries) {
+		s->index = head;
+		s->sqe = &ctx->sq_sqes[head];
+		ctx->cached_sq_head++;
+		return true;
+	}
+
+	/* drop invalid entries */
+	ctx->cached_sq_head++;
+	ring->dropped++;
+	/* See comment at the top of this file */
+	smp_wmb();
+	return false;
+}
+
+static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
+{
+	int i, ret = 0, submit = 0;
+	struct blk_plug plug;
+
+	if (to_submit > IO_PLUG_THRESHOLD)
+		blk_start_plug(&plug);
+
+	for (i = 0; i < to_submit; i++) {
+		struct sqe_submit s;
+
+		if (!io_get_sqring(ctx, &s))
+			break;
+
+		s.has_user = true;
+		ret = io_submit_sqe(ctx, &s);
+		if (ret) {
+			io_drop_sqring(ctx);
+			break;
+		}
+
+		submit++;
+	}
+	io_commit_sqring(ctx);
+
+	if (to_submit > IO_PLUG_THRESHOLD)
+		blk_finish_plug(&plug);
+
+	return submit ? submit : ret;
+}
+
+static unsigned io_cqring_events(struct io_cq_ring *ring)
+{
+	return READ_ONCE(ring->r.tail) - READ_ONCE(ring->r.head);
+}
+
+/*
+ * Wait until events become available, if we don't already have some. The
+ * application must reap them itself, as they reside on the shared cq ring.
+ */
+static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+			  const sigset_t __user *sig, size_t sigsz)
+{
+	struct io_cq_ring *ring = ctx->cq_ring;
+	sigset_t ksigmask, sigsaved;
+	DEFINE_WAIT(wait);
+	int ret;
+
+	/* See comment at the top of this file */
+	smp_rmb();
+	if (io_cqring_events(ring) >= min_events)
+		return 0;
+
+	if (sig) {
+		ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz);
+		if (ret)
+			return ret;
+	}
+
+	do {
+		prepare_to_wait(&ctx->wait, &wait, TASK_INTERRUPTIBLE);
+
+		ret = 0;
+		/* See comment at the top of this file */
+		smp_rmb();
+		if (io_cqring_events(ring) >= min_events)
+			break;
+
+		schedule();
+
+		ret = -EINTR;
+		if (signal_pending(current))
+			break;
+	} while (1);
+
+	finish_wait(&ctx->wait, &wait);
+
+	if (sig)
+		restore_user_sigmask(sig, &sigsaved);
+
+	return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
+}
+
+static int io_sq_offload_start(struct io_ring_ctx *ctx)
+{
+	int ret;
+
+	mmgrab(current->mm);
+	ctx->sqo_mm = current->mm;
+
+	/* Do QD, or 2 * CPUS, whatever is smallest */
+	ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
+			min(ctx->sq_entries - 1, 2 * num_online_cpus()));
+	if (!ctx->sqo_wq) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	return 0;
+err:
+	mmdrop(ctx->sqo_mm);
+	ctx->sqo_mm = NULL;
+	return ret;
+}
+
+static void io_unaccount_mem(struct user_struct *user, unsigned long nr_pages)
+{
+	atomic_long_sub(nr_pages, &user->locked_vm);
+}
+
+static int io_account_mem(struct user_struct *user, unsigned long nr_pages)
+{
+	unsigned long page_limit, cur_pages, new_pages;
+
+	/* Don't allow more pages than we can safely lock */
+	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	do {
+		cur_pages = atomic_long_read(&user->locked_vm);
+		new_pages = cur_pages + nr_pages;
+		if (new_pages > page_limit)
+			return -ENOMEM;
+	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
+					new_pages) != cur_pages);
+
+	return 0;
+}
+
+static void io_mem_free(void *ptr)
+{
+	struct page *page = virt_to_head_page(ptr);
+
+	if (put_page_testzero(page))
+		free_compound_page(page);
+}
+
+static void *io_mem_alloc(size_t size)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP |
+				__GFP_NORETRY;
+
+	return (void *) __get_free_pages(gfp_flags, get_order(size));
+}
+
+static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
+{
+	struct io_sq_ring *sq_ring;
+	struct io_cq_ring *cq_ring;
+	size_t bytes;
+
+	bytes = struct_size(sq_ring, array, sq_entries);
+	bytes += array_size(sizeof(struct io_uring_sqe), sq_entries);
+	bytes += struct_size(cq_ring, cqes, cq_entries);
+
+	return (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+}
+
+static void io_ring_ctx_free(struct io_ring_ctx *ctx)
+{
+	if (ctx->sqo_wq)
+		destroy_workqueue(ctx->sqo_wq);
+	if (ctx->sqo_mm)
+		mmdrop(ctx->sqo_mm);
+#if defined(CONFIG_UNIX)
+	if (ctx->ring_sock)
+		sock_release(ctx->ring_sock);
+#endif
+
+	io_mem_free(ctx->sq_ring);
+	io_mem_free(ctx->sq_sqes);
+	io_mem_free(ctx->cq_ring);
+
+	percpu_ref_exit(&ctx->refs);
+	if (ctx->account_mem)
+		io_unaccount_mem(ctx->user,
+				ring_pages(ctx->sq_entries, ctx->cq_entries));
+	free_uid(ctx->user);
+	kfree(ctx);
+}
+
+static __poll_t io_uring_poll(struct file *file, poll_table *wait)
+{
+	struct io_ring_ctx *ctx = file->private_data;
+	__poll_t mask = 0;
+
+	poll_wait(file, &ctx->cq_wait, wait);
+	/* See comment at the top of this file */
+	smp_rmb();
+	if (READ_ONCE(ctx->sq_ring->r.tail) + 1 != ctx->cached_sq_head)
+		mask |= EPOLLOUT | EPOLLWRNORM;
+	if (READ_ONCE(ctx->cq_ring->r.head) != ctx->cached_cq_tail)
+		mask |= EPOLLIN | EPOLLRDNORM;
+
+	return mask;
+}
+
+static int io_uring_fasync(int fd, struct file *file, int on)
+{
+	struct io_ring_ctx *ctx = file->private_data;
+
+	return fasync_helper(fd, file, on, &ctx->cq_fasync);
+}
+
+static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+{
+	mutex_lock(&ctx->uring_lock);
+	percpu_ref_kill(&ctx->refs);
+	mutex_unlock(&ctx->uring_lock);
+
+	wait_for_completion(&ctx->ctx_done);
+	io_ring_ctx_free(ctx);
+}
+
+static int io_uring_release(struct inode *inode, struct file *file)
+{
+	struct io_ring_ctx *ctx = file->private_data;
+
+	file->private_data = NULL;
+	io_ring_ctx_wait_and_kill(ctx);
+	return 0;
+}
+
+static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	loff_t offset = (loff_t) vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long sz = vma->vm_end - vma->vm_start;
+	struct io_ring_ctx *ctx = file->private_data;
+	unsigned long pfn;
+	struct page *page;
+	void *ptr;
+
+	switch (offset) {
+	case IORING_OFF_SQ_RING:
+		ptr = ctx->sq_ring;
+		break;
+	case IORING_OFF_SQES:
+		ptr = ctx->sq_sqes;
+		break;
+	case IORING_OFF_CQ_RING:
+		ptr = ctx->cq_ring;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	page = virt_to_head_page(ptr);
+	if (sz > (PAGE_SIZE << compound_order(page)))
+		return -EINVAL;
+
+	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
+	return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot);
+}
+
+SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
+		u32, min_complete, u32, flags, const sigset_t __user *, sig,
+		size_t, sigsz)
+{
+	struct io_ring_ctx *ctx;
+	long ret = -EBADF;
+	int submitted = 0;
+	struct fd f;
+
+	if (flags & ~IORING_ENTER_GETEVENTS)
+		return -EINVAL;
+
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	ret = -EOPNOTSUPP;
+	if (f.file->f_op != &io_uring_fops)
+		goto out_fput;
+
+	ret = -ENXIO;
+	ctx = f.file->private_data;
+	if (!percpu_ref_tryget(&ctx->refs))
+		goto out_fput;
+
+	ret = 0;
+	if (to_submit) {
+		to_submit = min(to_submit, ctx->sq_entries);
+
+		mutex_lock(&ctx->uring_lock);
+		submitted = io_ring_submit(ctx, to_submit);
+		mutex_unlock(&ctx->uring_lock);
+
+		if (submitted < 0)
+			goto out_ctx;
+	}
+	if (flags & IORING_ENTER_GETEVENTS) {
+		min_complete = min(min_complete, ctx->cq_entries);
+
+		/*
+		 * The application could have included the 'to_submit' count
+		 * in how many events it wanted to wait for. If we failed to
+		 * submit the desired count, we may need to adjust the number
+		 * of events to poll/wait for.
+		 */
+		if (submitted < to_submit)
+			min_complete = min_t(unsigned, submitted, min_complete);
+
+		ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+	}
+
+out_ctx:
+	io_ring_drop_ctx_refs(ctx, 1);
+out_fput:
+	fdput(f);
+	return submitted ? submitted : ret;
+}
+
+static const struct file_operations io_uring_fops = {
+	.release	= io_uring_release,
+	.mmap		= io_uring_mmap,
+	.poll		= io_uring_poll,
+	.fasync		= io_uring_fasync,
+};
+
+static int io_allocate_scq_urings(struct io_ring_ctx *ctx,
+				  struct io_uring_params *p)
+{
+	struct io_sq_ring *sq_ring;
+	struct io_cq_ring *cq_ring;
+	size_t size;
+
+	sq_ring = io_mem_alloc(struct_size(sq_ring, array, p->sq_entries));
+	if (!sq_ring)
+		return -ENOMEM;
+
+	ctx->sq_ring = sq_ring;
+	sq_ring->ring_mask = p->sq_entries - 1;
+	sq_ring->ring_entries = p->sq_entries;
+	ctx->sq_mask = sq_ring->ring_mask;
+	ctx->sq_entries = sq_ring->ring_entries;
+
+	size = array_size(sizeof(struct io_uring_sqe), p->sq_entries);
+	if (size == SIZE_MAX)
+		return -EOVERFLOW;
+
+	ctx->sq_sqes = io_mem_alloc(size);
+	if (!ctx->sq_sqes) {
+		io_mem_free(ctx->sq_ring);
+		return -ENOMEM;
+	}
+
+	cq_ring = io_mem_alloc(struct_size(cq_ring, cqes, p->cq_entries));
+	if (!cq_ring) {
+		io_mem_free(ctx->sq_ring);
+		io_mem_free(ctx->sq_sqes);
+		return -ENOMEM;
+	}
+
+	ctx->cq_ring = cq_ring;
+	cq_ring->ring_mask = p->cq_entries - 1;
+	cq_ring->ring_entries = p->cq_entries;
+	ctx->cq_mask = cq_ring->ring_mask;
+	ctx->cq_entries = cq_ring->ring_entries;
+	return 0;
+}
+
+/*
+ * Allocate an anonymous fd, this is what constitutes the application
+ * visible backing of an io_uring instance. The application mmaps this
+ * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled,
+ * we have to tie this fd to a socket for file garbage collection purposes.
+ */
+static int io_uring_get_fd(struct io_ring_ctx *ctx)
+{
+	struct file *file;
+	int ret;
+
+#if defined(CONFIG_UNIX)
+	ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
+				&ctx->ring_sock);
+	if (ret)
+		return ret;
+#endif
+
+	ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+	if (ret < 0)
+		goto err;
+
+	file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
+					O_RDWR | O_CLOEXEC);
+	if (IS_ERR(file)) {
+		put_unused_fd(ret);
+		ret = PTR_ERR(file);
+		goto err;
+	}
+
+#if defined(CONFIG_UNIX)
+	ctx->ring_sock->file = file;
+#endif
+	fd_install(ret, file);
+	return ret;
+err:
+#if defined(CONFIG_UNIX)
+	sock_release(ctx->ring_sock);
+	ctx->ring_sock = NULL;
+#endif
+	return ret;
+}
+
+static int io_uring_create(unsigned entries, struct io_uring_params *p)
+{
+	struct user_struct *user = NULL;
+	struct io_ring_ctx *ctx;
+	bool account_mem;
+	int ret;
+
+	if (!entries || entries > IORING_MAX_ENTRIES)
+		return -EINVAL;
+
+	/*
+	 * Use twice as many entries for the CQ ring. It's possible for the
+	 * application to drive a higher depth than the size of the SQ ring,
+	 * since the sqes are only used at submission time. This allows for
+	 * some flexibility in overcommitting a bit.
+	 */
+	p->sq_entries = roundup_pow_of_two(entries);
+	p->cq_entries = 2 * p->sq_entries;
+
+	user = get_uid(current_user());
+	account_mem = !capable(CAP_IPC_LOCK);
+
+	if (account_mem) {
+		ret = io_account_mem(user,
+				ring_pages(p->sq_entries, p->cq_entries));
+		if (ret) {
+			free_uid(user);
+			return ret;
+		}
+	}
+
+	ctx = io_ring_ctx_alloc(p);
+	if (!ctx) {
+		if (account_mem)
+			io_unaccount_mem(user, ring_pages(p->sq_entries,
+								p->cq_entries));
+		free_uid(user);
+		return -ENOMEM;
+	}
+	ctx->compat = in_compat_syscall();
+	ctx->account_mem = account_mem;
+	ctx->user = user;
+
+	ret = io_allocate_scq_urings(ctx, p);
+	if (ret)
+		goto err;
+
+	ret = io_sq_offload_start(ctx);
+	if (ret)
+		goto err;
+
+	ret = io_uring_get_fd(ctx);
+	if (ret < 0)
+		goto err;
+
+	memset(&p->sq_off, 0, sizeof(p->sq_off));
+	p->sq_off.head = offsetof(struct io_sq_ring, r.head);
+	p->sq_off.tail = offsetof(struct io_sq_ring, r.tail);
+	p->sq_off.ring_mask = offsetof(struct io_sq_ring, ring_mask);
+	p->sq_off.ring_entries = offsetof(struct io_sq_ring, ring_entries);
+	p->sq_off.flags = offsetof(struct io_sq_ring, flags);
+	p->sq_off.dropped = offsetof(struct io_sq_ring, dropped);
+	p->sq_off.array = offsetof(struct io_sq_ring, array);
+
+	memset(&p->cq_off, 0, sizeof(p->cq_off));
+	p->cq_off.head = offsetof(struct io_cq_ring, r.head);
+	p->cq_off.tail = offsetof(struct io_cq_ring, r.tail);
+	p->cq_off.ring_mask = offsetof(struct io_cq_ring, ring_mask);
+	p->cq_off.ring_entries = offsetof(struct io_cq_ring, ring_entries);
+	p->cq_off.overflow = offsetof(struct io_cq_ring, overflow);
+	p->cq_off.cqes = offsetof(struct io_cq_ring, cqes);
+	return ret;
+err:
+	io_ring_ctx_wait_and_kill(ctx);
+	return ret;
+}
+
+/*
+ * Sets up an aio uring context, and returns the fd. Applications asks for a
+ * ring size, we return the actual sq/cq ring sizes (among other things) in the
+ * params structure passed in.
+ */
+static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
+{
+	struct io_uring_params p;
+	long ret;
+	int i;
+
+	if (copy_from_user(&p, params, sizeof(p)))
+		return -EFAULT;
+	for (i = 0; i < ARRAY_SIZE(p.resv); i++) {
+		if (p.resv[i])
+			return -EINVAL;
+	}
+
+	if (p.flags)
+		return -EINVAL;
+
+	ret = io_uring_create(entries, &p);
+	if (ret < 0)
+		return ret;
+
+	if (copy_to_user(params, &p, sizeof(p)))
+		return -EFAULT;
+
+	return ret;
+}
+
+SYSCALL_DEFINE2(io_uring_setup, u32, entries,
+		struct io_uring_params __user *, params)
+{
+	return io_uring_setup(entries, params);
+}
+
+static int __init io_uring_init(void)
+{
+	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+	return 0;
+};
+__initcall(io_uring_init);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index dedcc2e9265c..61aa210f0c2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3517,4 +3517,13 @@ extern void inode_nohighmem(struct inode *inode);
 extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
 		       int advice);
 
+#if defined(CONFIG_IO_URING)
+extern struct sock *io_uring_get_socket(struct file *file);
+#else
+static inline struct sock *io_uring_get_socket(struct file *file)
+{
+	return NULL;
+}
+#endif
+
 #endif /* _LINUX_FS_H */
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 39ad98c09c58..c7b5f86b91a1 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -40,7 +40,7 @@ struct user_struct {
 	kuid_t uid;
 
 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
-    defined(CONFIG_NET)
+    defined(CONFIG_NET) || defined(CONFIG_IO_URING)
 	atomic_long_t locked_vm;
 #endif
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 257cccba3062..3072dbaa7869 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -69,6 +69,7 @@ struct file_handle;
 struct sigaltstack;
 struct rseq;
 union bpf_attr;
+struct io_uring_params;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -309,6 +310,11 @@ asmlinkage long sys_io_pgetevents_time32(aio_context_t ctx_id,
 				struct io_event __user *events,
 				struct old_timespec32 __user *timeout,
 				const struct __aio_sigset *sig);
+asmlinkage long sys_io_uring_setup(u32 entries,
+				struct io_uring_params __user *p);
+asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
+				u32 min_complete, u32 flags,
+				const sigset_t __user *sig, size_t sigsz);
 
 /* fs/xattr.c */
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d90127298f12..87871e7b7ea7 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -740,9 +740,13 @@ __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
+#define __NR_io_uring_setup 425
+__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
+#define __NR_io_uring_enter 426
+__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
 
 #undef __NR_syscalls
-#define __NR_syscalls 295
+#define __NR_syscalls 427
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
new file mode 100644
index 000000000000..ac692823d6f4
--- /dev/null
+++ b/include/uapi/linux/io_uring.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Header file for the io_uring interface.
+ *
+ * Copyright (C) 2019 Jens Axboe
+ * Copyright (C) 2019 Christoph Hellwig
+ */
+#ifndef LINUX_IO_URING_H
+#define LINUX_IO_URING_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+	__u8	opcode;		/* type of operation for this sqe */
+	__u8	flags;		/* as of now unused */
+	__u16	ioprio;		/* ioprio for the request */
+	__s32	fd;		/* file descriptor to do IO on */
+	__u64	off;		/* offset into file */
+	__u64	addr;		/* pointer to buffer or iovecs */
+	__u32	len;		/* buffer size or number of iovecs */
+	union {
+		__kernel_rwf_t	rw_flags;
+		__u32		__resv;
+	};
+	__u64	user_data;	/* data to be passed back at completion time */
+	__u64	__pad2[3];
+};
+
+#define IORING_OP_NOP		0
+#define IORING_OP_READV		1
+#define IORING_OP_WRITEV	2
+
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+	__u64	user_data;	/* sqe->data submission passed back */
+	__s32	res;		/* result code for this event */
+	__u32	flags;
+};
+
+/*
+ * Magic offsets for the application to mmap the data it needs
+ */
+#define IORING_OFF_SQ_RING		0ULL
+#define IORING_OFF_CQ_RING		0x8000000ULL
+#define IORING_OFF_SQES			0x10000000ULL
+
+/*
+ * Filled with the offset for mmap(2)
+ */
+struct io_sqring_offsets {
+	__u32 head;
+	__u32 tail;
+	__u32 ring_mask;
+	__u32 ring_entries;
+	__u32 flags;
+	__u32 dropped;
+	__u32 array;
+	__u32 resv1;
+	__u64 resv2;
+};
+
+struct io_cqring_offsets {
+	__u32 head;
+	__u32 tail;
+	__u32 ring_mask;
+	__u32 ring_entries;
+	__u32 overflow;
+	__u32 cqes;
+	__u64 resv[2];
+};
+
+/*
+ * io_uring_enter(2) flags
+ */
+#define IORING_ENTER_GETEVENTS	(1U << 0)
+
+/*
+ * Passed in for io_uring_setup(2). Copied back with updated info on success
+ */
+struct io_uring_params {
+	__u32 sq_entries;
+	__u32 cq_entries;
+	__u32 flags;
+	__u32 resv[7];
+	struct io_sqring_offsets sq_off;
+	struct io_cqring_offsets cq_off;
+};
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index c9386a365eea..53b54214a36e 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1414,6 +1414,15 @@ config AIO
 	  by some high performance threaded applications. Disabling
 	  this option saves about 7k.
 
+config IO_URING
+	bool "Enable IO uring support" if EXPERT
+	select ANON_INODES
+	default y
+	help
+	  This option enables support for the io_uring interface, enabling
+	  applications to submit and complete IO through submission and
+	  completion rings that are shared between the kernel and application.
+
 config ADVISE_SYSCALLS
 	bool "Enable madvise/fadvise syscalls" if EXPERT
 	default y
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index ab9d0e3c6d50..ee5e523564bb 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -46,6 +46,8 @@ COND_SYSCALL(io_getevents);
 COND_SYSCALL(io_pgetevents);
 COND_SYSCALL_COMPAT(io_getevents);
 COND_SYSCALL_COMPAT(io_pgetevents);
+COND_SYSCALL(io_uring_setup);
+COND_SYSCALL(io_uring_enter);
 
 /* fs/xattr.c */
 
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e72844..f81854d74c7d 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -108,6 +108,9 @@ struct sock *unix_get_socket(struct file *filp)
 		/* PF_UNIX ? */
 		if (s && sock->ops && sock->ops->family == PF_UNIX)
 			u_sock = s;
+	} else {
+		/* Could be an io_uring instance */
+		u_sock = io_uring_get_socket(filp);
 	}
 	return u_sock;
 }
-- 
cgit v1.2.3


From c992fe2925d776be066d9f6cc13f9ea11d78b657 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 11 Jan 2019 09:43:02 -0700
Subject: io_uring: add fsync support

Add a new fsync opcode, which either syncs a range if one is passed,
or the whole file if the offset and length fields are both cleared
to zero.  A flag is provided to use fdatasync semantics, that is only
force out metadata which is required to retrieve the file data, but
not others like metadata.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 54 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/io_uring.h |  8 ++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f68052290426..745413d92712 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -24,6 +24,7 @@
  * data that the application could potentially modify, it remains stable.
  *
  * Copyright (C) 2018-2019 Jens Axboe
+ * Copyright (c) 2018-2019 Christoph Hellwig
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -557,6 +558,56 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
 	return 0;
 }
 
+static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	int fd;
+
+	/* Prep already done */
+	if (req->rw.ki_filp)
+		return 0;
+
+	if (unlikely(sqe->addr || sqe->ioprio))
+		return -EINVAL;
+
+	fd = READ_ONCE(sqe->fd);
+	req->rw.ki_filp = fget(fd);
+	if (unlikely(!req->rw.ki_filp))
+		return -EBADF;
+
+	return 0;
+}
+
+static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+		    bool force_nonblock)
+{
+	loff_t sqe_off = READ_ONCE(sqe->off);
+	loff_t sqe_len = READ_ONCE(sqe->len);
+	loff_t end = sqe_off + sqe_len;
+	unsigned fsync_flags;
+	int ret;
+
+	fsync_flags = READ_ONCE(sqe->fsync_flags);
+	if (unlikely(fsync_flags & ~IORING_FSYNC_DATASYNC))
+		return -EINVAL;
+
+	ret = io_prep_fsync(req, sqe);
+	if (ret)
+		return ret;
+
+	/* fsync always requires a blocking context */
+	if (force_nonblock)
+		return -EAGAIN;
+
+	ret = vfs_fsync_range(req->rw.ki_filp, sqe_off,
+				end > 0 ? end : LLONG_MAX,
+				fsync_flags & IORING_FSYNC_DATASYNC);
+
+	fput(req->rw.ki_filp);
+	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+	io_free_req(req);
+	return 0;
+}
+
 static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			   const struct sqe_submit *s, bool force_nonblock)
 {
@@ -578,6 +629,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	case IORING_OP_WRITEV:
 		ret = io_write(req, s, force_nonblock);
 		break;
+	case IORING_OP_FSYNC:
+		ret = io_fsync(req, s->sqe, force_nonblock);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac692823d6f4..4589d56d0b68 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -24,7 +24,7 @@ struct io_uring_sqe {
 	__u32	len;		/* buffer size or number of iovecs */
 	union {
 		__kernel_rwf_t	rw_flags;
-		__u32		__resv;
+		__u32		fsync_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	__u64	__pad2[3];
@@ -33,6 +33,12 @@ struct io_uring_sqe {
 #define IORING_OP_NOP		0
 #define IORING_OP_READV		1
 #define IORING_OP_WRITEV	2
+#define IORING_OP_FSYNC		3
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC	(1U << 0)
 
 /*
  * IO completion data structure (Completion Queue Entry)
-- 
cgit v1.2.3


From def596e9557c91d9846fc4d84d26f2c564644416 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 9 Jan 2019 08:59:42 -0700
Subject: io_uring: support for IO polling

Add support for a polled io_uring instance. When a read or write is
submitted to a polled io_uring, the application must poll for
completions on the CQ ring through io_uring_enter(2). Polled IO may not
generate IRQ completions, hence they need to be actively found by the
application itself.

To use polling, io_uring_setup() must be used with the
IORING_SETUP_IOPOLL flag being set. It is illegal to mix and match
polled and non-polled IO on an io_uring.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 275 ++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/io_uring.h |   5 +
 2 files changed, 271 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 745413d92712..578797a4f318 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -124,6 +124,14 @@ struct io_ring_ctx {
 
 	struct {
 		spinlock_t		completion_lock;
+		bool			poll_multi_file;
+		/*
+		 * ->poll_list is protected by the ctx->uring_lock for
+		 * io_uring instances that don't use IORING_SETUP_SQPOLL.
+		 * For SQPOLL, only the single threaded io_sq_thread() will
+		 * manipulate the list, hence no extra locking is needed there.
+		 */
+		struct list_head	poll_list;
 	} ____cacheline_aligned_in_smp;
 
 #if defined(CONFIG_UNIX)
@@ -135,6 +143,7 @@ struct sqe_submit {
 	const struct io_uring_sqe	*sqe;
 	unsigned short			index;
 	bool				has_user;
+	bool				needs_lock;
 };
 
 struct io_kiocb {
@@ -146,12 +155,15 @@ struct io_kiocb {
 	struct list_head	list;
 	unsigned int		flags;
 #define REQ_F_FORCE_NONBLOCK	1	/* inline submission attempt */
+#define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
 	u64			user_data;
+	u64			error;
 
 	struct work_struct	work;
 };
 
 #define IO_PLUG_THRESHOLD		2
+#define IO_IOPOLL_BATCH			8
 
 static struct kmem_cache *req_cachep;
 
@@ -196,6 +208,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	mutex_init(&ctx->uring_lock);
 	init_waitqueue_head(&ctx->wait);
 	spin_lock_init(&ctx->completion_lock);
+	INIT_LIST_HEAD(&ctx->poll_list);
 	return ctx;
 }
 
@@ -297,12 +310,153 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
+static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
+{
+	if (*nr) {
+		kmem_cache_free_bulk(req_cachep, *nr, reqs);
+		io_ring_drop_ctx_refs(ctx, *nr);
+		*nr = 0;
+	}
+}
+
 static void io_free_req(struct io_kiocb *req)
 {
 	io_ring_drop_ctx_refs(req->ctx, 1);
 	kmem_cache_free(req_cachep, req);
 }
 
+/*
+ * Find and free completed poll iocbs
+ */
+static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
+			       struct list_head *done)
+{
+	void *reqs[IO_IOPOLL_BATCH];
+	struct io_kiocb *req;
+	int to_free = 0;
+
+	while (!list_empty(done)) {
+		req = list_first_entry(done, struct io_kiocb, list);
+		list_del(&req->list);
+
+		io_cqring_fill_event(ctx, req->user_data, req->error, 0);
+
+		reqs[to_free++] = req;
+		(*nr_events)++;
+
+		fput(req->rw.ki_filp);
+		if (to_free == ARRAY_SIZE(reqs))
+			io_free_req_many(ctx, reqs, &to_free);
+	}
+	io_commit_cqring(ctx);
+
+	io_free_req_many(ctx, reqs, &to_free);
+}
+
+static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
+			long min)
+{
+	struct io_kiocb *req, *tmp;
+	LIST_HEAD(done);
+	bool spin;
+	int ret;
+
+	/*
+	 * Only spin for completions if we don't have multiple devices hanging
+	 * off our complete list, and we're under the requested amount.
+	 */
+	spin = !ctx->poll_multi_file && *nr_events < min;
+
+	ret = 0;
+	list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
+		struct kiocb *kiocb = &req->rw;
+
+		/*
+		 * Move completed entries to our local list. If we find a
+		 * request that requires polling, break out and complete
+		 * the done list first, if we have entries there.
+		 */
+		if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+			list_move_tail(&req->list, &done);
+			continue;
+		}
+		if (!list_empty(&done))
+			break;
+
+		ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
+		if (ret < 0)
+			break;
+
+		if (ret && spin)
+			spin = false;
+		ret = 0;
+	}
+
+	if (!list_empty(&done))
+		io_iopoll_complete(ctx, nr_events, &done);
+
+	return ret;
+}
+
+/*
+ * Poll for a mininum of 'min' events. Note that if min == 0 we consider that a
+ * non-spinning poll check - we'll still enter the driver poll loop, but only
+ * as a non-spinning completion check.
+ */
+static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
+				long min)
+{
+	while (!list_empty(&ctx->poll_list)) {
+		int ret;
+
+		ret = io_do_iopoll(ctx, nr_events, min);
+		if (ret < 0)
+			return ret;
+		if (!min || *nr_events >= min)
+			return 0;
+	}
+
+	return 1;
+}
+
+/*
+ * We can't just wait for polled events to come to us, we have to actively
+ * find and complete them.
+ */
+static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
+{
+	if (!(ctx->flags & IORING_SETUP_IOPOLL))
+		return;
+
+	mutex_lock(&ctx->uring_lock);
+	while (!list_empty(&ctx->poll_list)) {
+		unsigned int nr_events = 0;
+
+		io_iopoll_getevents(ctx, &nr_events, 1);
+	}
+	mutex_unlock(&ctx->uring_lock);
+}
+
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+			   long min)
+{
+	int ret = 0;
+
+	do {
+		int tmin = 0;
+
+		if (*nr_events < min)
+			tmin = min - *nr_events;
+
+		ret = io_iopoll_getevents(ctx, nr_events, tmin);
+		if (ret <= 0)
+			break;
+		ret = 0;
+	} while (min && !*nr_events && !need_resched());
+
+	return ret;
+}
+
 static void kiocb_end_write(struct kiocb *kiocb)
 {
 	if (kiocb->ki_flags & IOCB_WRITE) {
@@ -329,6 +483,53 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 	io_free_req(req);
 }
 
+static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
+{
+	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+
+	kiocb_end_write(kiocb);
+
+	req->error = res;
+	if (res != -EAGAIN)
+		req->flags |= REQ_F_IOPOLL_COMPLETED;
+}
+
+/*
+ * After the iocb has been issued, it's safe to be found on the poll list.
+ * Adding the kiocb to the list AFTER submission ensures that we don't
+ * find it from a io_iopoll_getevents() thread before the issuer is done
+ * accessing the kiocb cookie.
+ */
+static void io_iopoll_req_issued(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+
+	/*
+	 * Track whether we have multiple files in our lists. This will impact
+	 * how we do polling eventually, not spinning if we're on potentially
+	 * different devices.
+	 */
+	if (list_empty(&ctx->poll_list)) {
+		ctx->poll_multi_file = false;
+	} else if (!ctx->poll_multi_file) {
+		struct io_kiocb *list_req;
+
+		list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
+						list);
+		if (list_req->rw.ki_filp != req->rw.ki_filp)
+			ctx->poll_multi_file = true;
+	}
+
+	/*
+	 * For fast devices, IO may have already completed. If it has, add
+	 * it to the front so we find it first.
+	 */
+	if (req->flags & REQ_F_IOPOLL_COMPLETED)
+		list_add(&req->list, &ctx->poll_list);
+	else
+		list_add_tail(&req->list, &ctx->poll_list);
+}
+
 /*
  * If we tracked the file through the SCM inflight mechanism, we could support
  * any file. For now, just ensure that anything potentially problematic is done
@@ -349,6 +550,7 @@ static bool io_file_supports_async(struct file *file)
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      bool force_nonblock)
 {
+	struct io_ring_ctx *ctx = req->ctx;
 	struct kiocb *kiocb = &req->rw;
 	unsigned ioprio;
 	int fd, ret;
@@ -384,12 +586,22 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		kiocb->ki_flags |= IOCB_NOWAIT;
 		req->flags |= REQ_F_FORCE_NONBLOCK;
 	}
-	if (kiocb->ki_flags & IOCB_HIPRI) {
-		ret = -EINVAL;
-		goto out_fput;
-	}
+	if (ctx->flags & IORING_SETUP_IOPOLL) {
+		ret = -EOPNOTSUPP;
+		if (!(kiocb->ki_flags & IOCB_DIRECT) ||
+		    !kiocb->ki_filp->f_op->iopoll)
+			goto out_fput;
 
-	kiocb->ki_complete = io_complete_rw;
+		req->error = 0;
+		kiocb->ki_flags |= IOCB_HIPRI;
+		kiocb->ki_complete = io_complete_rw_iopoll;
+	} else {
+		if (kiocb->ki_flags & IOCB_HIPRI) {
+			ret = -EINVAL;
+			goto out_fput;
+		}
+		kiocb->ki_complete = io_complete_rw;
+	}
 	return 0;
 out_fput:
 	fput(kiocb->ki_filp);
@@ -543,6 +755,9 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
 	struct io_ring_ctx *ctx = req->ctx;
 	long err = 0;
 
+	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+
 	/*
 	 * Twilight zone - it's possible that someone issued an opcode that
 	 * has a file attached, then got -EAGAIN on submission, and changed
@@ -566,6 +781,8 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (req->rw.ki_filp)
 		return 0;
 
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
 	if (unlikely(sqe->addr || sqe->ioprio))
 		return -EINVAL;
 
@@ -637,7 +854,22 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		break;
 	}
 
-	return ret;
+	if (ret)
+		return ret;
+
+	if (ctx->flags & IORING_SETUP_IOPOLL) {
+		if (req->error == -EAGAIN)
+			return -EAGAIN;
+
+		/* workqueue context doesn't hold uring_lock, grab it now */
+		if (s->needs_lock)
+			mutex_lock(&ctx->uring_lock);
+		io_iopoll_req_issued(req);
+		if (s->needs_lock)
+			mutex_unlock(&ctx->uring_lock);
+	}
+
+	return 0;
 }
 
 static void io_sq_wq_submit_work(struct work_struct *work)
@@ -661,8 +893,19 @@ static void io_sq_wq_submit_work(struct work_struct *work)
 	use_mm(ctx->sqo_mm);
 	set_fs(USER_DS);
 	s->has_user = true;
+	s->needs_lock = true;
 
-	ret = __io_submit_sqe(ctx, req, s, false);
+	do {
+		ret = __io_submit_sqe(ctx, req, s, false);
+		/*
+		 * We can get EAGAIN for polled IO even though we're forcing
+		 * a sync submission from here, since we can't wait for
+		 * request slots on the block side.
+		 */
+		if (ret != -EAGAIN)
+			break;
+		cond_resched();
+	} while (1);
 
 	set_fs(old_fs);
 	unuse_mm(ctx->sqo_mm);
@@ -799,6 +1042,8 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 			break;
 
 		s.has_user = true;
+		s.needs_lock = false;
+
 		ret = io_submit_sqe(ctx, &s);
 		if (ret) {
 			io_drop_sqring(ctx);
@@ -947,6 +1192,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		destroy_workqueue(ctx->sqo_wq);
 	if (ctx->sqo_mm)
 		mmdrop(ctx->sqo_mm);
+
+	io_iopoll_reap_events(ctx);
+
 #if defined(CONFIG_UNIX)
 	if (ctx->ring_sock)
 		sock_release(ctx->ring_sock);
@@ -993,6 +1241,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	mutex_unlock(&ctx->uring_lock);
 
+	io_iopoll_reap_events(ctx);
 	wait_for_completion(&ctx->ctx_done);
 	io_ring_ctx_free(ctx);
 }
@@ -1074,6 +1323,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			goto out_ctx;
 	}
 	if (flags & IORING_ENTER_GETEVENTS) {
+		unsigned nr_events = 0;
+
 		min_complete = min(min_complete, ctx->cq_entries);
 
 		/*
@@ -1085,7 +1336,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		if (submitted < to_submit)
 			min_complete = min_t(unsigned, submitted, min_complete);
 
-		ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+		if (ctx->flags & IORING_SETUP_IOPOLL) {
+			mutex_lock(&ctx->uring_lock);
+			ret = io_iopoll_check(ctx, &nr_events, min_complete);
+			mutex_unlock(&ctx->uring_lock);
+		} else {
+			ret = io_cqring_wait(ctx, min_complete, sig, sigsz);
+		}
 	}
 
 out_ctx:
@@ -1282,7 +1539,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 			return -EINVAL;
 	}
 
-	if (p.flags)
+	if (p.flags & ~IORING_SETUP_IOPOLL)
 		return -EINVAL;
 
 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 4589d56d0b68..5c457ea396e6 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -30,6 +30,11 @@ struct io_uring_sqe {
 	__u64	__pad2[3];
 };
 
+/*
+ * io_uring_setup() flags
+ */
+#define IORING_SETUP_IOPOLL	(1U << 0)	/* io_context is polled */
+
 #define IORING_OP_NOP		0
 #define IORING_OP_READV		1
 #define IORING_OP_WRITEV	2
-- 
cgit v1.2.3


From 091141a42e15fe47ada737f3996b317072afcefb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 21 Nov 2018 10:32:39 -0700
Subject: fs: add fget_many() and fput_many()

Some uses cases repeatedly get and put references to the same file, but
the only exposed interface is doing these one at the time. As each of
these entail an atomic inc or dec on a shared structure, that cost can
add up.

Add fget_many(), which works just like fget(), except it takes an
argument for how many references to get on the file. Ditto fput_many(),
which can drop an arbitrary number of references to a file.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/file.c            | 15 ++++++++++-----
 fs/file_table.c      |  9 +++++++--
 include/linux/file.h |  2 ++
 include/linux/fs.h   |  4 +++-
 4 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/file.c b/fs/file.c
index 3209ee271c41..97df385d6ab0 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -705,7 +705,7 @@ void do_close_on_exec(struct files_struct *files)
 	spin_unlock(&files->file_lock);
 }
 
-static struct file *__fget(unsigned int fd, fmode_t mask)
+static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs)
 {
 	struct files_struct *files = current->files;
 	struct file *file;
@@ -720,7 +720,7 @@ loop:
 		 */
 		if (file->f_mode & mask)
 			file = NULL;
-		else if (!get_file_rcu(file))
+		else if (!get_file_rcu_many(file, refs))
 			goto loop;
 	}
 	rcu_read_unlock();
@@ -728,15 +728,20 @@ loop:
 	return file;
 }
 
+struct file *fget_many(unsigned int fd, unsigned int refs)
+{
+	return __fget(fd, FMODE_PATH, refs);
+}
+
 struct file *fget(unsigned int fd)
 {
-	return __fget(fd, FMODE_PATH);
+	return __fget(fd, FMODE_PATH, 1);
 }
 EXPORT_SYMBOL(fget);
 
 struct file *fget_raw(unsigned int fd)
 {
-	return __fget(fd, 0);
+	return __fget(fd, 0, 1);
 }
 EXPORT_SYMBOL(fget_raw);
 
@@ -767,7 +772,7 @@ static unsigned long __fget_light(unsigned int fd, fmode_t mask)
 			return 0;
 		return (unsigned long)file;
 	} else {
-		file = __fget(fd, mask);
+		file = __fget(fd, mask, 1);
 		if (!file)
 			return 0;
 		return FDPUT_FPUT | (unsigned long)file;
diff --git a/fs/file_table.c b/fs/file_table.c
index 5679e7fcb6b0..155d7514a094 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -326,9 +326,9 @@ void flush_delayed_fput(void)
 
 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
 
-void fput(struct file *file)
+void fput_many(struct file *file, unsigned int refs)
 {
-	if (atomic_long_dec_and_test(&file->f_count)) {
+	if (atomic_long_sub_and_test(refs, &file->f_count)) {
 		struct task_struct *task = current;
 
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
@@ -347,6 +347,11 @@ void fput(struct file *file)
 	}
 }
 
+void fput(struct file *file)
+{
+	fput_many(file, 1);
+}
+
 /*
  * synchronous analog of fput(); for kernel threads that might be needed
  * in some umount() (and thus can't use flush_delayed_fput() without
diff --git a/include/linux/file.h b/include/linux/file.h
index 6b2fb032416c..3fcddff56bc4 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -13,6 +13,7 @@
 struct file;
 
 extern void fput(struct file *);
+extern void fput_many(struct file *, unsigned int);
 
 struct file_operations;
 struct vfsmount;
@@ -44,6 +45,7 @@ static inline void fdput(struct fd fd)
 }
 
 extern struct file *fget(unsigned int fd);
+extern struct file *fget_many(unsigned int fd, unsigned int refs);
 extern struct file *fget_raw(unsigned int fd);
 extern unsigned long __fdget(unsigned int fd);
 extern unsigned long __fdget_raw(unsigned int fd);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 61aa210f0c2b..80e1b199a4b1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -952,7 +952,9 @@ static inline struct file *get_file(struct file *f)
 	atomic_long_inc(&f->f_count);
 	return f;
 }
-#define get_file_rcu(x) atomic_long_inc_not_zero(&(x)->f_count)
+#define get_file_rcu_many(x, cnt)	\
+	atomic_long_add_unless(&(x)->f_count, (cnt), 0)
+#define get_file_rcu(x) get_file_rcu_many((x), 1)
 #define fput_atomic(x)	atomic_long_add_unless(&(x)->f_count, -1, 1)
 #define file_count(x)	atomic_long_read(&(x)->f_count)
 
-- 
cgit v1.2.3


From edafccee56ff31678a091ddb7219aba9b28bc3cb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 9 Jan 2019 09:16:05 -0700
Subject: io_uring: add support for pre-mapped user IO buffers

If we have fixed user buffers, we can map them into the kernel when we
setup the io_uring. That avoids the need to do get_user_pages() for
each and every IO.

To utilize this feature, the application must call io_uring_register()
after having setup an io_uring instance, passing in
IORING_REGISTER_BUFFERS as the opcode. The argument must be a pointer to
an iovec array, and the nr_args should contain how many iovecs the
application wishes to map.

If successful, these buffers are now mapped into the kernel, eligible
for IO. To use these fixed buffers, the application must use the
IORING_OP_READ_FIXED and IORING_OP_WRITE_FIXED opcodes, and then
set sqe->index to the desired buffer index. sqe->addr..sqe->addr+seq->len
must point to somewhere inside the indexed buffer.

The application may register buffers throughout the lifetime of the
io_uring instance. It can call io_uring_register() with
IORING_UNREGISTER_BUFFERS as the opcode to unregister the current set of
buffers, and then register a new set. The application need not
unregister buffers explicitly before shutting down the io_uring
instance.

It's perfectly valid to setup a larger buffer, and then sometimes only
use parts of it for an IO. As long as the range is within the originally
mapped region, it will work just fine.

For now, buffers must not be file backed. If file backed buffers are
passed in, the registration will fail with -1/EOPNOTSUPP. This
restriction may be relaxed in the future.

RLIMIT_MEMLOCK is used to check how much memory we can pin. A somewhat
arbitrary 1G per buffer size is also imposed.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 fs/io_uring.c                          | 374 +++++++++++++++++++++++++++++++--
 include/linux/syscalls.h               |   2 +
 include/uapi/asm-generic/unistd.h      |   4 +-
 include/uapi/linux/io_uring.h          |  13 +-
 kernel/sys_ni.c                        |   1 +
 7 files changed, 381 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 481c126259e9..2eefd2a7c1ce 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -400,3 +400,4 @@
 386	i386	rseq			sys_rseq			__ia32_sys_rseq
 425	i386	io_uring_setup		sys_io_uring_setup		__ia32_sys_io_uring_setup
 426	i386	io_uring_enter		sys_io_uring_enter		__ia32_sys_io_uring_enter
+427	i386	io_uring_register	sys_io_uring_register		__ia32_sys_io_uring_register
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 6a32a430c8e0..65c026185e61 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
 334	common	rseq			__x64_sys_rseq
 425	common	io_uring_setup		__x64_sys_io_uring_setup
 426	common	io_uring_enter		__x64_sys_io_uring_enter
+427	common	io_uring_register	__x64_sys_io_uring_register
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 31f43ed894ba..c0c0f68568b5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -45,6 +45,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/blkdev.h>
+#include <linux/bvec.h>
 #include <linux/net.h>
 #include <net/sock.h>
 #include <net/af_unix.h>
@@ -52,6 +53,8 @@
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
 #include <linux/nospec.h>
+#include <linux/sizes.h>
+#include <linux/hugetlb.h>
 
 #include <uapi/linux/io_uring.h>
 
@@ -81,6 +84,13 @@ struct io_cq_ring {
 	struct io_uring_cqe	cqes[];
 };
 
+struct io_mapped_ubuf {
+	u64		ubuf;
+	size_t		len;
+	struct		bio_vec *bvec;
+	unsigned int	nr_bvecs;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -113,6 +123,10 @@ struct io_ring_ctx {
 		struct fasync_struct	*cq_fasync;
 	} ____cacheline_aligned_in_smp;
 
+	/* if used, fixed mapped user buffers */
+	unsigned		nr_user_bufs;
+	struct io_mapped_ubuf	*user_bufs;
+
 	struct user_struct	*user;
 
 	struct completion	ctx_done;
@@ -732,6 +746,46 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 	}
 }
 
+static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
+			   const struct io_uring_sqe *sqe,
+			   struct iov_iter *iter)
+{
+	size_t len = READ_ONCE(sqe->len);
+	struct io_mapped_ubuf *imu;
+	unsigned index, buf_index;
+	size_t offset;
+	u64 buf_addr;
+
+	/* attempt to use fixed buffers without having provided iovecs */
+	if (unlikely(!ctx->user_bufs))
+		return -EFAULT;
+
+	buf_index = READ_ONCE(sqe->buf_index);
+	if (unlikely(buf_index >= ctx->nr_user_bufs))
+		return -EFAULT;
+
+	index = array_index_nospec(buf_index, ctx->nr_user_bufs);
+	imu = &ctx->user_bufs[index];
+	buf_addr = READ_ONCE(sqe->addr);
+
+	/* overflow */
+	if (buf_addr + len < buf_addr)
+		return -EFAULT;
+	/* not inside the mapped region */
+	if (buf_addr < imu->ubuf || buf_addr + len > imu->ubuf + imu->len)
+		return -EFAULT;
+
+	/*
+	 * May not be a start of buffer, set size appropriately
+	 * and advance us to the beginning.
+	 */
+	offset = buf_addr - imu->ubuf;
+	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
+	if (offset)
+		iov_iter_advance(iter, offset);
+	return 0;
+}
+
 static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
 			   const struct sqe_submit *s, struct iovec **iovec,
 			   struct iov_iter *iter)
@@ -739,6 +793,23 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
 	const struct io_uring_sqe *sqe = s->sqe;
 	void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
 	size_t sqe_len = READ_ONCE(sqe->len);
+	u8 opcode;
+
+	/*
+	 * We're reading ->opcode for the second time, but the first read
+	 * doesn't care whether it's _FIXED or not, so it doesn't matter
+	 * whether ->opcode changes concurrently. The first read does care
+	 * about whether it is a READ or a WRITE, so we don't trust this read
+	 * for that purpose and instead let the caller pass in the read/write
+	 * flag.
+	 */
+	opcode = READ_ONCE(sqe->opcode);
+	if (opcode == IORING_OP_READ_FIXED ||
+	    opcode == IORING_OP_WRITE_FIXED) {
+		ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
+		*iovec = NULL;
+		return ret;
+	}
 
 	if (!s->has_user)
 		return -EFAULT;
@@ -886,7 +957,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
-	if (unlikely(sqe->addr || sqe->ioprio))
+	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
 		return -EINVAL;
 
 	fd = READ_ONCE(sqe->fd);
@@ -945,9 +1016,19 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		ret = io_nop(req, req->user_data);
 		break;
 	case IORING_OP_READV:
+		if (unlikely(s->sqe->buf_index))
+			return -EINVAL;
 		ret = io_read(req, s, force_nonblock, state);
 		break;
 	case IORING_OP_WRITEV:
+		if (unlikely(s->sqe->buf_index))
+			return -EINVAL;
+		ret = io_write(req, s, force_nonblock, state);
+		break;
+	case IORING_OP_READ_FIXED:
+		ret = io_read(req, s, force_nonblock, state);
+		break;
+	case IORING_OP_WRITE_FIXED:
 		ret = io_write(req, s, force_nonblock, state);
 		break;
 	case IORING_OP_FSYNC:
@@ -976,28 +1057,46 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	return 0;
 }
 
+static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
+{
+	u8 opcode = READ_ONCE(sqe->opcode);
+
+	return !(opcode == IORING_OP_READ_FIXED ||
+		 opcode == IORING_OP_WRITE_FIXED);
+}
+
 static void io_sq_wq_submit_work(struct work_struct *work)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct sqe_submit *s = &req->submit;
 	const struct io_uring_sqe *sqe = s->sqe;
 	struct io_ring_ctx *ctx = req->ctx;
-	mm_segment_t old_fs = get_fs();
+	mm_segment_t old_fs;
+	bool needs_user;
 	int ret;
 
 	 /* Ensure we clear previously set forced non-block flag */
 	req->flags &= ~REQ_F_FORCE_NONBLOCK;
 	req->rw.ki_flags &= ~IOCB_NOWAIT;
 
-	if (!mmget_not_zero(ctx->sqo_mm)) {
-		ret = -EFAULT;
-		goto err;
-	}
-
-	use_mm(ctx->sqo_mm);
-	set_fs(USER_DS);
-	s->has_user = true;
 	s->needs_lock = true;
+	s->has_user = false;
+
+	/*
+	 * If we're doing IO to fixed buffers, we don't need to get/set
+	 * user context
+	 */
+	needs_user = io_sqe_needs_user(s->sqe);
+	if (needs_user) {
+		if (!mmget_not_zero(ctx->sqo_mm)) {
+			ret = -EFAULT;
+			goto err;
+		}
+		use_mm(ctx->sqo_mm);
+		old_fs = get_fs();
+		set_fs(USER_DS);
+		s->has_user = true;
+	}
 
 	do {
 		ret = __io_submit_sqe(ctx, req, s, false, NULL);
@@ -1011,9 +1110,11 @@ static void io_sq_wq_submit_work(struct work_struct *work)
 		cond_resched();
 	} while (1);
 
-	set_fs(old_fs);
-	unuse_mm(ctx->sqo_mm);
-	mmput(ctx->sqo_mm);
+	if (needs_user) {
+		set_fs(old_fs);
+		unuse_mm(ctx->sqo_mm);
+		mmput(ctx->sqo_mm);
+	}
 err:
 	if (ret) {
 		io_cqring_add_event(ctx, sqe->user_data, ret, 0);
@@ -1317,6 +1418,198 @@ static unsigned long ring_pages(unsigned sq_entries, unsigned cq_entries)
 	return (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
 }
 
+static int io_sqe_buffer_unregister(struct io_ring_ctx *ctx)
+{
+	int i, j;
+
+	if (!ctx->user_bufs)
+		return -ENXIO;
+
+	for (i = 0; i < ctx->nr_user_bufs; i++) {
+		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
+
+		for (j = 0; j < imu->nr_bvecs; j++)
+			put_page(imu->bvec[j].bv_page);
+
+		if (ctx->account_mem)
+			io_unaccount_mem(ctx->user, imu->nr_bvecs);
+		kfree(imu->bvec);
+		imu->nr_bvecs = 0;
+	}
+
+	kfree(ctx->user_bufs);
+	ctx->user_bufs = NULL;
+	ctx->nr_user_bufs = 0;
+	return 0;
+}
+
+static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
+		       void __user *arg, unsigned index)
+{
+	struct iovec __user *src;
+
+#ifdef CONFIG_COMPAT
+	if (ctx->compat) {
+		struct compat_iovec __user *ciovs;
+		struct compat_iovec ciov;
+
+		ciovs = (struct compat_iovec __user *) arg;
+		if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
+			return -EFAULT;
+
+		dst->iov_base = (void __user *) (unsigned long) ciov.iov_base;
+		dst->iov_len = ciov.iov_len;
+		return 0;
+	}
+#endif
+	src = (struct iovec __user *) arg;
+	if (copy_from_user(dst, &src[index], sizeof(*dst)))
+		return -EFAULT;
+	return 0;
+}
+
+static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
+				  unsigned nr_args)
+{
+	struct vm_area_struct **vmas = NULL;
+	struct page **pages = NULL;
+	int i, j, got_pages = 0;
+	int ret = -EINVAL;
+
+	if (ctx->user_bufs)
+		return -EBUSY;
+	if (!nr_args || nr_args > UIO_MAXIOV)
+		return -EINVAL;
+
+	ctx->user_bufs = kcalloc(nr_args, sizeof(struct io_mapped_ubuf),
+					GFP_KERNEL);
+	if (!ctx->user_bufs)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; i++) {
+		struct io_mapped_ubuf *imu = &ctx->user_bufs[i];
+		unsigned long off, start, end, ubuf;
+		int pret, nr_pages;
+		struct iovec iov;
+		size_t size;
+
+		ret = io_copy_iov(ctx, &iov, arg, i);
+		if (ret)
+			break;
+
+		/*
+		 * Don't impose further limits on the size and buffer
+		 * constraints here, we'll -EINVAL later when IO is
+		 * submitted if they are wrong.
+		 */
+		ret = -EFAULT;
+		if (!iov.iov_base || !iov.iov_len)
+			goto err;
+
+		/* arbitrary limit, but we need something */
+		if (iov.iov_len > SZ_1G)
+			goto err;
+
+		ubuf = (unsigned long) iov.iov_base;
+		end = (ubuf + iov.iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		start = ubuf >> PAGE_SHIFT;
+		nr_pages = end - start;
+
+		if (ctx->account_mem) {
+			ret = io_account_mem(ctx->user, nr_pages);
+			if (ret)
+				goto err;
+		}
+
+		ret = 0;
+		if (!pages || nr_pages > got_pages) {
+			kfree(vmas);
+			kfree(pages);
+			pages = kmalloc_array(nr_pages, sizeof(struct page *),
+						GFP_KERNEL);
+			vmas = kmalloc_array(nr_pages,
+					sizeof(struct vm_area_struct *),
+					GFP_KERNEL);
+			if (!pages || !vmas) {
+				ret = -ENOMEM;
+				if (ctx->account_mem)
+					io_unaccount_mem(ctx->user, nr_pages);
+				goto err;
+			}
+			got_pages = nr_pages;
+		}
+
+		imu->bvec = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+						GFP_KERNEL);
+		ret = -ENOMEM;
+		if (!imu->bvec) {
+			if (ctx->account_mem)
+				io_unaccount_mem(ctx->user, nr_pages);
+			goto err;
+		}
+
+		ret = 0;
+		down_read(&current->mm->mmap_sem);
+		pret = get_user_pages_longterm(ubuf, nr_pages, FOLL_WRITE,
+						pages, vmas);
+		if (pret == nr_pages) {
+			/* don't support file backed memory */
+			for (j = 0; j < nr_pages; j++) {
+				struct vm_area_struct *vma = vmas[j];
+
+				if (vma->vm_file &&
+				    !is_file_hugepages(vma->vm_file)) {
+					ret = -EOPNOTSUPP;
+					break;
+				}
+			}
+		} else {
+			ret = pret < 0 ? pret : -EFAULT;
+		}
+		up_read(&current->mm->mmap_sem);
+		if (ret) {
+			/*
+			 * if we did partial map, or found file backed vmas,
+			 * release any pages we did get
+			 */
+			if (pret > 0) {
+				for (j = 0; j < pret; j++)
+					put_page(pages[j]);
+			}
+			if (ctx->account_mem)
+				io_unaccount_mem(ctx->user, nr_pages);
+			goto err;
+		}
+
+		off = ubuf & ~PAGE_MASK;
+		size = iov.iov_len;
+		for (j = 0; j < nr_pages; j++) {
+			size_t vec_len;
+
+			vec_len = min_t(size_t, size, PAGE_SIZE - off);
+			imu->bvec[j].bv_page = pages[j];
+			imu->bvec[j].bv_len = vec_len;
+			imu->bvec[j].bv_offset = off;
+			off = 0;
+			size -= vec_len;
+		}
+		/* store original address for later verification */
+		imu->ubuf = ubuf;
+		imu->len = iov.iov_len;
+		imu->nr_bvecs = nr_pages;
+
+		ctx->nr_user_bufs++;
+	}
+	kfree(pages);
+	kfree(vmas);
+	return 0;
+err:
+	kfree(pages);
+	kfree(vmas);
+	io_sqe_buffer_unregister(ctx);
+	return ret;
+}
+
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
 	if (ctx->sqo_wq)
@@ -1325,6 +1618,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 		mmdrop(ctx->sqo_mm);
 
 	io_iopoll_reap_events(ctx);
+	io_sqe_buffer_unregister(ctx);
 
 #if defined(CONFIG_UNIX)
 	if (ctx->ring_sock)
@@ -1689,6 +1983,60 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
 	return io_uring_setup(entries, params);
 }
 
+static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
+			       void __user *arg, unsigned nr_args)
+{
+	int ret;
+
+	percpu_ref_kill(&ctx->refs);
+	wait_for_completion(&ctx->ctx_done);
+
+	switch (opcode) {
+	case IORING_REGISTER_BUFFERS:
+		ret = io_sqe_buffer_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_BUFFERS:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_sqe_buffer_unregister(ctx);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	/* bring the ctx back to life */
+	reinit_completion(&ctx->ctx_done);
+	percpu_ref_reinit(&ctx->refs);
+	return ret;
+}
+
+SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
+		void __user *, arg, unsigned int, nr_args)
+{
+	struct io_ring_ctx *ctx;
+	long ret = -EBADF;
+	struct fd f;
+
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	ret = -EOPNOTSUPP;
+	if (f.file->f_op != &io_uring_fops)
+		goto out_fput;
+
+	ctx = f.file->private_data;
+
+	mutex_lock(&ctx->uring_lock);
+	ret = __io_uring_register(ctx, opcode, arg, nr_args);
+	mutex_unlock(&ctx->uring_lock);
+out_fput:
+	fdput(f);
+	return ret;
+}
+
 static int __init io_uring_init(void)
 {
 	req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3072dbaa7869..3681c05ac538 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -315,6 +315,8 @@ asmlinkage long sys_io_uring_setup(u32 entries,
 asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
 				u32 min_complete, u32 flags,
 				const sigset_t __user *sig, size_t sigsz);
+asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
+				void __user *arg, unsigned int nr_args);
 
 /* fs/xattr.c */
 asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 87871e7b7ea7..d346229a1eb0 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -744,9 +744,11 @@ __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
 __SYSCALL(__NR_io_uring_setup, sys_io_uring_setup)
 #define __NR_io_uring_enter 426
 __SYSCALL(__NR_io_uring_enter, sys_io_uring_enter)
+#define __NR_io_uring_register 427
+__SYSCALL(__NR_io_uring_register, sys_io_uring_register)
 
 #undef __NR_syscalls
-#define __NR_syscalls 427
+#define __NR_syscalls 428
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5c457ea396e6..cf28f7a11f12 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -27,7 +27,10 @@ struct io_uring_sqe {
 		__u32		fsync_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
-	__u64	__pad2[3];
+	union {
+		__u16	buf_index;	/* index into fixed buffers, if used */
+		__u64	__pad2[3];
+	};
 };
 
 /*
@@ -39,6 +42,8 @@ struct io_uring_sqe {
 #define IORING_OP_READV		1
 #define IORING_OP_WRITEV	2
 #define IORING_OP_FSYNC		3
+#define IORING_OP_READ_FIXED	4
+#define IORING_OP_WRITE_FIXED	5
 
 /*
  * sqe->fsync_flags
@@ -103,4 +108,10 @@ struct io_uring_params {
 	struct io_cqring_offsets cq_off;
 };
 
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+#define IORING_REGISTER_BUFFERS		0
+#define IORING_UNREGISTER_BUFFERS	1
+
 #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index ee5e523564bb..1bb6604dc19f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -48,6 +48,7 @@ COND_SYSCALL_COMPAT(io_getevents);
 COND_SYSCALL_COMPAT(io_pgetevents);
 COND_SYSCALL(io_uring_setup);
 COND_SYSCALL(io_uring_enter);
+COND_SYSCALL(io_uring_register);
 
 /* fs/xattr.c */
 
-- 
cgit v1.2.3


From f4e65870e5cede5ca1ec0006b6c9803994e5f7b8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 8 Feb 2019 09:01:44 -0700
Subject: net: split out functions related to registering inflight socket files

We need this functionality for the io_uring file registration, but
we cannot rely on it since CONFIG_UNIX can be modular. Move the helpers
to a separate file, that's always builtin to the kernel if CONFIG_UNIX is
m/y.

No functional changes in this patch, just moving code around.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/net/af_unix.h |   1 +
 net/Makefile          |   2 +-
 net/unix/Kconfig      |   5 ++
 net/unix/Makefile     |   2 +
 net/unix/af_unix.c    |  63 +--------------------
 net/unix/garbage.c    |  71 +-----------------------
 net/unix/scm.c        | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/unix/scm.h        |  10 ++++
 8 files changed, 174 insertions(+), 131 deletions(-)
 create mode 100644 net/unix/scm.c
 create mode 100644 net/unix/scm.h

(limited to 'include')

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index ddbba838d048..3426d6dacc45 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -10,6 +10,7 @@
 
 void unix_inflight(struct user_struct *user, struct file *fp);
 void unix_notinflight(struct user_struct *user, struct file *fp);
+void unix_destruct_scm(struct sk_buff *skb);
 void unix_gc(void);
 void wait_for_unix_gc(void);
 struct sock *unix_get_socket(struct file *filp);
diff --git a/net/Makefile b/net/Makefile
index bdaf53925acd..449fc0b221f8 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER)		+= netfilter/
 obj-$(CONFIG_INET)		+= ipv4/
 obj-$(CONFIG_TLS)		+= tls/
 obj-$(CONFIG_XFRM)		+= xfrm/
-obj-$(CONFIG_UNIX)		+= unix/
+obj-$(CONFIG_UNIX_SCM)		+= unix/
 obj-$(CONFIG_NET)		+= ipv6/
 obj-$(CONFIG_BPFILTER)		+= bpfilter/
 obj-$(CONFIG_PACKET)		+= packet/
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 8b31ab85d050..3b9e450656a4 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,6 +19,11 @@ config UNIX
 
 	  Say Y unless you know what you are doing.
 
+config UNIX_SCM
+	bool
+	depends on UNIX
+	default y
+
 config UNIX_DIAG
 	tristate "UNIX: socket monitoring interface"
 	depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index ffd0a275c3a7..54e58cc4f945 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL)	+= sysctl_net_unix.o
 
 obj-$(CONFIG_UNIX_DIAG)	+= unix_diag.o
 unix_diag-y		:= diag.o
+
+obj-$(CONFIG_UNIX_SCM)	+= scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 74d1eed7cbd4..2ce32dbb2feb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -119,6 +119,8 @@
 #include <linux/freezer.h>
 #include <linux/file.h>
 
+#include "scm.h"
+
 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 EXPORT_SYMBOL_GPL(unix_socket_table);
 DEFINE_SPINLOCK(unix_table_lock);
@@ -1486,67 +1488,6 @@ out:
 	return err;
 }
 
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
-	int i;
-
-	scm->fp = UNIXCB(skb).fp;
-	UNIXCB(skb).fp = NULL;
-
-	for (i = scm->fp->count-1; i >= 0; i--)
-		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
-	struct scm_cookie scm;
-	memset(&scm, 0, sizeof(scm));
-	scm.pid  = UNIXCB(skb).pid;
-	if (UNIXCB(skb).fp)
-		unix_detach_fds(&scm, skb);
-
-	/* Alas, it calls VFS */
-	/* So fscking what? fput() had been SMP-safe since the last Summer */
-	scm_destroy(&scm);
-	sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
-{
-	struct user_struct *user = current_user();
-
-	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
-		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
-	return false;
-}
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
-	int i;
-
-	if (too_many_unix_fds(current))
-		return -ETOOMANYREFS;
-
-	/*
-	 * Need to duplicate file references for the sake of garbage
-	 * collection.  Otherwise a socket in the fps might become a
-	 * candidate for GC while the skb is not yet queued.
-	 */
-	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
-	if (!UNIXCB(skb).fp)
-		return -ENOMEM;
-
-	for (i = scm->fp->count - 1; i >= 0; i--)
-		unix_inflight(scm->fp->user, scm->fp->fp[i]);
-	return 0;
-}
-
 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
 {
 	int err = 0;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index f81854d74c7d..8bbe1b8e4ff7 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,80 +86,13 @@
 #include <net/scm.h>
 #include <net/tcp_states.h>
 
+#include "scm.h"
+
 /* Internal data structures and random procedures: */
 
-static LIST_HEAD(gc_inflight_list);
 static LIST_HEAD(gc_candidates);
-static DEFINE_SPINLOCK(unix_gc_lock);
 static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
 
-unsigned int unix_tot_inflight;
-
-struct sock *unix_get_socket(struct file *filp)
-{
-	struct sock *u_sock = NULL;
-	struct inode *inode = file_inode(filp);
-
-	/* Socket ? */
-	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
-		struct socket *sock = SOCKET_I(inode);
-		struct sock *s = sock->sk;
-
-		/* PF_UNIX ? */
-		if (s && sock->ops && sock->ops->family == PF_UNIX)
-			u_sock = s;
-	} else {
-		/* Could be an io_uring instance */
-		u_sock = io_uring_get_socket(filp);
-	}
-	return u_sock;
-}
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
-	struct sock *s = unix_get_socket(fp);
-
-	spin_lock(&unix_gc_lock);
-
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
-		if (atomic_long_inc_return(&u->inflight) == 1) {
-			BUG_ON(!list_empty(&u->link));
-			list_add_tail(&u->link, &gc_inflight_list);
-		} else {
-			BUG_ON(list_empty(&u->link));
-		}
-		unix_tot_inflight++;
-	}
-	user->unix_inflight++;
-	spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
-	struct sock *s = unix_get_socket(fp);
-
-	spin_lock(&unix_gc_lock);
-
-	if (s) {
-		struct unix_sock *u = unix_sk(s);
-
-		BUG_ON(!atomic_long_read(&u->inflight));
-		BUG_ON(list_empty(&u->link));
-
-		if (atomic_long_dec_and_test(&u->inflight))
-			list_del_init(&u->link);
-		unix_tot_inflight--;
-	}
-	user->unix_inflight--;
-	spin_unlock(&unix_gc_lock);
-}
-
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
 {
diff --git a/net/unix/scm.c b/net/unix/scm.c
new file mode 100644
index 000000000000..8c40f2b32392
--- /dev/null
+++ b/net/unix/scm.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+#include <linux/init.h>
+
+#include "scm.h"
+
+unsigned int unix_tot_inflight;
+EXPORT_SYMBOL(unix_tot_inflight);
+
+LIST_HEAD(gc_inflight_list);
+EXPORT_SYMBOL(gc_inflight_list);
+
+DEFINE_SPINLOCK(unix_gc_lock);
+EXPORT_SYMBOL(unix_gc_lock);
+
+struct sock *unix_get_socket(struct file *filp)
+{
+	struct sock *u_sock = NULL;
+	struct inode *inode = file_inode(filp);
+
+	/* Socket ? */
+	if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+		struct socket *sock = SOCKET_I(inode);
+		struct sock *s = sock->sk;
+
+		/* PF_UNIX ? */
+		if (s && sock->ops && sock->ops->family == PF_UNIX)
+			u_sock = s;
+	} else {
+		/* Could be an io_uring instance */
+		u_sock = io_uring_get_socket(filp);
+	}
+	return u_sock;
+}
+EXPORT_SYMBOL(unix_get_socket);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+
+	spin_lock(&unix_gc_lock);
+
+	if (s) {
+		struct unix_sock *u = unix_sk(s);
+
+		if (atomic_long_inc_return(&u->inflight) == 1) {
+			BUG_ON(!list_empty(&u->link));
+			list_add_tail(&u->link, &gc_inflight_list);
+		} else {
+			BUG_ON(list_empty(&u->link));
+		}
+		unix_tot_inflight++;
+	}
+	user->unix_inflight++;
+	spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *fp)
+{
+	struct sock *s = unix_get_socket(fp);
+
+	spin_lock(&unix_gc_lock);
+
+	if (s) {
+		struct unix_sock *u = unix_sk(s);
+
+		BUG_ON(!atomic_long_read(&u->inflight));
+		BUG_ON(list_empty(&u->link));
+
+		if (atomic_long_dec_and_test(&u->inflight))
+			list_del_init(&u->link);
+		unix_tot_inflight--;
+	}
+	user->unix_inflight--;
+	spin_unlock(&unix_gc_lock);
+}
+
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+	struct user_struct *user = current_user();
+
+	if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+		return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+	return false;
+}
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	if (too_many_unix_fds(current))
+		return -ETOOMANYREFS;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
+	for (i = scm->fp->count - 1; i >= 0; i--)
+		unix_inflight(scm->fp->user, scm->fp->fp[i]);
+	return 0;
+}
+EXPORT_SYMBOL(unix_attach_fds);
+
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+	int i;
+
+	scm->fp = UNIXCB(skb).fp;
+	UNIXCB(skb).fp = NULL;
+
+	for (i = scm->fp->count-1; i >= 0; i--)
+		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+EXPORT_SYMBOL(unix_detach_fds);
+
+void unix_destruct_scm(struct sk_buff *skb)
+{
+	struct scm_cookie scm;
+
+	memset(&scm, 0, sizeof(scm));
+	scm.pid  = UNIXCB(skb).pid;
+	if (UNIXCB(skb).fp)
+		unix_detach_fds(&scm, skb);
+
+	/* Alas, it calls VFS */
+	/* So fscking what? fput() had been SMP-safe since the last Summer */
+	scm_destroy(&scm);
+	sock_wfree(skb);
+}
+EXPORT_SYMBOL(unix_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
new file mode 100644
index 000000000000..5a255a477f16
--- /dev/null
+++ b/net/unix/scm.h
@@ -0,0 +1,10 @@
+#ifndef NET_UNIX_SCM_H
+#define NET_UNIX_SCM_H
+
+extern struct list_head gc_inflight_list;
+extern spinlock_t unix_gc_lock;
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+
+#endif
-- 
cgit v1.2.3


From 6b06314c47e141031be043539900d80d2c7ba10f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Jan 2019 22:13:58 -0700
Subject: io_uring: add file set registration

We normally have to fget/fput for each IO we do on a file. Even with
the batching we do, the cost of the atomic inc/dec of the file usage
count adds up.

This adds IORING_REGISTER_FILES, and IORING_UNREGISTER_FILES opcodes
for the io_uring_register(2) system call. The arguments passed in must
be an array of __s32 holding file descriptors, and nr_args should hold
the number of file descriptors the application wishes to pin for the
duration of the io_uring instance (or until IORING_UNREGISTER_FILES is
called).

When used, the application must set IOSQE_FIXED_FILE in the sqe->flags
member. Then, instead of setting sqe->fd to the real fd, it sets sqe->fd
to the index in the array passed in to IORING_REGISTER_FILES.

Files are automatically unregistered when the io_uring instance is torn
down. An application need only unregister if it wishes to register a new
set of fds.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 311 +++++++++++++++++++++++++++++++++++++-----
 include/uapi/linux/io_uring.h |   9 +-
 2 files changed, 288 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c0c0f68568b5..5eaa1c4a3f7c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -49,6 +49,7 @@
 #include <linux/net.h>
 #include <net/sock.h>
 #include <net/af_unix.h>
+#include <net/scm.h>
 #include <linux/anon_inodes.h>
 #include <linux/sched/mm.h>
 #include <linux/uaccess.h>
@@ -61,6 +62,7 @@
 #include "internal.h"
 
 #define IORING_MAX_ENTRIES	4096
+#define IORING_MAX_FIXED_FILES	1024
 
 struct io_uring {
 	u32 head ____cacheline_aligned_in_smp;
@@ -123,6 +125,14 @@ struct io_ring_ctx {
 		struct fasync_struct	*cq_fasync;
 	} ____cacheline_aligned_in_smp;
 
+	/*
+	 * If used, fixed file set. Writers must ensure that ->refs is dead,
+	 * readers must ensure that ->refs is alive as long as the file* is
+	 * used. Only updated through io_uring_register(2).
+	 */
+	struct file		**user_files;
+	unsigned		nr_user_files;
+
 	/* if used, fixed mapped user buffers */
 	unsigned		nr_user_bufs;
 	struct io_mapped_ubuf	*user_bufs;
@@ -170,6 +180,7 @@ struct io_kiocb {
 	unsigned int		flags;
 #define REQ_F_FORCE_NONBLOCK	1	/* inline submission attempt */
 #define REQ_F_IOPOLL_COMPLETED	2	/* polled IO has completed */
+#define REQ_F_FIXED_FILE	4	/* ctx owns file */
 	u64			user_data;
 	u64			error;
 
@@ -404,15 +415,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		 * Batched puts of the same file, to avoid dirtying the
 		 * file usage count multiple times, if avoidable.
 		 */
-		if (!file) {
-			file = req->rw.ki_filp;
-			file_count = 1;
-		} else if (file == req->rw.ki_filp) {
-			file_count++;
-		} else {
-			fput_many(file, file_count);
-			file = req->rw.ki_filp;
-			file_count = 1;
+		if (!(req->flags & REQ_F_FIXED_FILE)) {
+			if (!file) {
+				file = req->rw.ki_filp;
+				file_count = 1;
+			} else if (file == req->rw.ki_filp) {
+				file_count++;
+			} else {
+				fput_many(file, file_count);
+				file = req->rw.ki_filp;
+				file_count = 1;
+			}
 		}
 
 		if (to_free == ARRAY_SIZE(reqs))
@@ -544,13 +557,19 @@ static void kiocb_end_write(struct kiocb *kiocb)
 	}
 }
 
+static void io_fput(struct io_kiocb *req)
+{
+	if (!(req->flags & REQ_F_FIXED_FILE))
+		fput(req->rw.ki_filp);
+}
+
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
 
 	kiocb_end_write(kiocb);
 
-	fput(kiocb->ki_filp);
+	io_fput(req);
 	io_cqring_add_event(req->ctx, req->user_data, res, 0);
 	io_free_req(req);
 }
@@ -666,19 +685,29 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct kiocb *kiocb = &req->rw;
-	unsigned ioprio;
+	unsigned ioprio, flags;
 	int fd, ret;
 
 	/* For -EAGAIN retry, everything is already prepped */
 	if (kiocb->ki_filp)
 		return 0;
 
+	flags = READ_ONCE(sqe->flags);
 	fd = READ_ONCE(sqe->fd);
-	kiocb->ki_filp = io_file_get(state, fd);
-	if (unlikely(!kiocb->ki_filp))
-		return -EBADF;
-	if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
-		force_nonblock = false;
+
+	if (flags & IOSQE_FIXED_FILE) {
+		if (unlikely(!ctx->user_files ||
+		    (unsigned) fd >= ctx->nr_user_files))
+			return -EBADF;
+		kiocb->ki_filp = ctx->user_files[fd];
+		req->flags |= REQ_F_FIXED_FILE;
+	} else {
+		kiocb->ki_filp = io_file_get(state, fd);
+		if (unlikely(!kiocb->ki_filp))
+			return -EBADF;
+		if (force_nonblock && !io_file_supports_async(kiocb->ki_filp))
+			force_nonblock = false;
+	}
 	kiocb->ki_pos = READ_ONCE(sqe->off);
 	kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
 	kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
@@ -718,10 +747,14 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	}
 	return 0;
 out_fput:
-	/* in case of error, we didn't use this file reference. drop it. */
-	if (state)
-		state->used_refs--;
-	io_file_put(state, kiocb->ki_filp);
+	if (!(flags & IOSQE_FIXED_FILE)) {
+		/*
+		 * in case of error, we didn't use this file reference. drop it.
+		 */
+		if (state)
+			state->used_refs--;
+		io_file_put(state, kiocb->ki_filp);
+	}
 	return ret;
 }
 
@@ -863,7 +896,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
 out_fput:
 	/* Hold on to the file for -EAGAIN */
 	if (unlikely(ret && ret != -EAGAIN))
-		fput(file);
+		io_fput(req);
 	return ret;
 }
 
@@ -917,7 +950,7 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
 	kfree(iovec);
 out_fput:
 	if (unlikely(ret))
-		fput(file);
+		io_fput(req);
 	return ret;
 }
 
@@ -940,7 +973,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
 	 */
 	if (req->rw.ki_filp) {
 		err = -EBADF;
-		fput(req->rw.ki_filp);
+		io_fput(req);
 	}
 	io_cqring_add_event(ctx, user_data, err, 0);
 	io_free_req(req);
@@ -949,21 +982,32 @@ static int io_nop(struct io_kiocb *req, u64 user_data)
 
 static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
+	struct io_ring_ctx *ctx = req->ctx;
+	unsigned flags;
 	int fd;
 
 	/* Prep already done */
 	if (req->rw.ki_filp)
 		return 0;
 
-	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
 		return -EINVAL;
 	if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
 		return -EINVAL;
 
 	fd = READ_ONCE(sqe->fd);
-	req->rw.ki_filp = fget(fd);
-	if (unlikely(!req->rw.ki_filp))
-		return -EBADF;
+	flags = READ_ONCE(sqe->flags);
+
+	if (flags & IOSQE_FIXED_FILE) {
+		if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
+			return -EBADF;
+		req->rw.ki_filp = ctx->user_files[fd];
+		req->flags |= REQ_F_FIXED_FILE;
+	} else {
+		req->rw.ki_filp = fget(fd);
+		if (unlikely(!req->rw.ki_filp))
+			return -EBADF;
+	}
 
 	return 0;
 }
@@ -993,7 +1037,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 				end > 0 ? end : LLONG_MAX,
 				fsync_flags & IORING_FSYNC_DATASYNC);
 
-	fput(req->rw.ki_filp);
+	io_fput(req);
 	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
 	io_free_req(req);
 	return 0;
@@ -1132,7 +1176,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
 	ssize_t ret;
 
 	/* enforce forwards compatibility on users */
-	if (unlikely(s->sqe->flags))
+	if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE))
 		return -EINVAL;
 
 	req = io_get_req(ctx, state);
@@ -1344,6 +1388,201 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 	return READ_ONCE(ring->r.head) == READ_ONCE(ring->r.tail) ? ret : 0;
 }
 
+static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
+{
+#if defined(CONFIG_UNIX)
+	if (ctx->ring_sock) {
+		struct sock *sock = ctx->ring_sock->sk;
+		struct sk_buff *skb;
+
+		while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
+			kfree_skb(skb);
+	}
+#else
+	int i;
+
+	for (i = 0; i < ctx->nr_user_files; i++)
+		fput(ctx->user_files[i]);
+#endif
+}
+
+static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
+{
+	if (!ctx->user_files)
+		return -ENXIO;
+
+	__io_sqe_files_unregister(ctx);
+	kfree(ctx->user_files);
+	ctx->user_files = NULL;
+	ctx->nr_user_files = 0;
+	return 0;
+}
+
+static void io_finish_async(struct io_ring_ctx *ctx)
+{
+	if (ctx->sqo_wq) {
+		destroy_workqueue(ctx->sqo_wq);
+		ctx->sqo_wq = NULL;
+	}
+}
+
+#if defined(CONFIG_UNIX)
+static void io_destruct_skb(struct sk_buff *skb)
+{
+	struct io_ring_ctx *ctx = skb->sk->sk_user_data;
+
+	io_finish_async(ctx);
+	unix_destruct_scm(skb);
+}
+
+/*
+ * Ensure the UNIX gc is aware of our file set, so we are certain that
+ * the io_uring can be safely unregistered on process exit, even if we have
+ * loops in the file referencing.
+ */
+static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
+{
+	struct sock *sk = ctx->ring_sock->sk;
+	struct scm_fp_list *fpl;
+	struct sk_buff *skb;
+	int i;
+
+	if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+		unsigned long inflight = ctx->user->unix_inflight + nr;
+
+		if (inflight > task_rlimit(current, RLIMIT_NOFILE))
+			return -EMFILE;
+	}
+
+	fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
+	if (!fpl)
+		return -ENOMEM;
+
+	skb = alloc_skb(0, GFP_KERNEL);
+	if (!skb) {
+		kfree(fpl);
+		return -ENOMEM;
+	}
+
+	skb->sk = sk;
+	skb->destructor = io_destruct_skb;
+
+	fpl->user = get_uid(ctx->user);
+	for (i = 0; i < nr; i++) {
+		fpl->fp[i] = get_file(ctx->user_files[i + offset]);
+		unix_inflight(fpl->user, fpl->fp[i]);
+	}
+
+	fpl->max = fpl->count = nr;
+	UNIXCB(skb).fp = fpl;
+	refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+	skb_queue_head(&sk->sk_receive_queue, skb);
+
+	for (i = 0; i < nr; i++)
+		fput(fpl->fp[i]);
+
+	return 0;
+}
+
+/*
+ * If UNIX sockets are enabled, fd passing can cause a reference cycle which
+ * causes regular reference counting to break down. We rely on the UNIX
+ * garbage collection to take care of this problem for us.
+ */
+static int io_sqe_files_scm(struct io_ring_ctx *ctx)
+{
+	unsigned left, total;
+	int ret = 0;
+
+	total = 0;
+	left = ctx->nr_user_files;
+	while (left) {
+		unsigned this_files = min_t(unsigned, left, SCM_MAX_FD);
+		int ret;
+
+		ret = __io_sqe_files_scm(ctx, this_files, total);
+		if (ret)
+			break;
+		left -= this_files;
+		total += this_files;
+	}
+
+	if (!ret)
+		return 0;
+
+	while (total < ctx->nr_user_files) {
+		fput(ctx->user_files[total]);
+		total++;
+	}
+
+	return ret;
+}
+#else
+static int io_sqe_files_scm(struct io_ring_ctx *ctx)
+{
+	return 0;
+}
+#endif
+
+static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
+				 unsigned nr_args)
+{
+	__s32 __user *fds = (__s32 __user *) arg;
+	int fd, ret = 0;
+	unsigned i;
+
+	if (ctx->user_files)
+		return -EBUSY;
+	if (!nr_args)
+		return -EINVAL;
+	if (nr_args > IORING_MAX_FIXED_FILES)
+		return -EMFILE;
+
+	ctx->user_files = kcalloc(nr_args, sizeof(struct file *), GFP_KERNEL);
+	if (!ctx->user_files)
+		return -ENOMEM;
+
+	for (i = 0; i < nr_args; i++) {
+		ret = -EFAULT;
+		if (copy_from_user(&fd, &fds[i], sizeof(fd)))
+			break;
+
+		ctx->user_files[i] = fget(fd);
+
+		ret = -EBADF;
+		if (!ctx->user_files[i])
+			break;
+		/*
+		 * Don't allow io_uring instances to be registered. If UNIX
+		 * isn't enabled, then this causes a reference cycle and this
+		 * instance can never get freed. If UNIX is enabled we'll
+		 * handle it just fine, but there's still no point in allowing
+		 * a ring fd as it doesn't support regular read/write anyway.
+		 */
+		if (ctx->user_files[i]->f_op == &io_uring_fops) {
+			fput(ctx->user_files[i]);
+			break;
+		}
+		ctx->nr_user_files++;
+		ret = 0;
+	}
+
+	if (ret) {
+		for (i = 0; i < ctx->nr_user_files; i++)
+			fput(ctx->user_files[i]);
+
+		kfree(ctx->user_files);
+		ctx->nr_user_files = 0;
+		return ret;
+	}
+
+	ret = io_sqe_files_scm(ctx);
+	if (ret)
+		io_sqe_files_unregister(ctx);
+
+	return ret;
+}
+
 static int io_sq_offload_start(struct io_ring_ctx *ctx)
 {
 	int ret;
@@ -1612,13 +1851,13 @@ err:
 
 static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
-	if (ctx->sqo_wq)
-		destroy_workqueue(ctx->sqo_wq);
+	io_finish_async(ctx);
 	if (ctx->sqo_mm)
 		mmdrop(ctx->sqo_mm);
 
 	io_iopoll_reap_events(ctx);
 	io_sqe_buffer_unregister(ctx);
+	io_sqe_files_unregister(ctx);
 
 #if defined(CONFIG_UNIX)
 	if (ctx->ring_sock)
@@ -1858,6 +2097,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
 
 #if defined(CONFIG_UNIX)
 	ctx->ring_sock->file = file;
+	ctx->ring_sock->sk->sk_user_data = ctx;
 #endif
 	fd_install(ret, file);
 	return ret;
@@ -2001,6 +2241,15 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_sqe_buffer_unregister(ctx);
 		break;
+	case IORING_REGISTER_FILES:
+		ret = io_sqe_files_register(ctx, arg, nr_args);
+		break;
+	case IORING_UNREGISTER_FILES:
+		ret = -EINVAL;
+		if (arg || nr_args)
+			break;
+		ret = io_sqe_files_unregister(ctx);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index cf28f7a11f12..6257478d55e9 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -16,7 +16,7 @@
  */
 struct io_uring_sqe {
 	__u8	opcode;		/* type of operation for this sqe */
-	__u8	flags;		/* as of now unused */
+	__u8	flags;		/* IOSQE_ flags */
 	__u16	ioprio;		/* ioprio for the request */
 	__s32	fd;		/* file descriptor to do IO on */
 	__u64	off;		/* offset into file */
@@ -33,6 +33,11 @@ struct io_uring_sqe {
 	};
 };
 
+/*
+ * sqe->flags
+ */
+#define IOSQE_FIXED_FILE	(1U << 0)	/* use fixed fileset */
+
 /*
  * io_uring_setup() flags
  */
@@ -113,5 +118,7 @@ struct io_uring_params {
  */
 #define IORING_REGISTER_BUFFERS		0
 #define IORING_UNREGISTER_BUFFERS	1
+#define IORING_REGISTER_FILES		2
+#define IORING_UNREGISTER_FILES		3
 
 #endif
-- 
cgit v1.2.3


From 6c271ce2f1d572f7fa225700a13cfe7ced492434 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 10 Jan 2019 11:22:30 -0700
Subject: io_uring: add submission polling

This enables an application to do IO, without ever entering the kernel.
By using the SQ ring to fill in new sqes and watching for completions
on the CQ ring, we can submit and reap IOs without doing a single system
call. The kernel side thread will poll for new submissions, and in case
of HIPRI/polled IO, it'll also poll for completions.

By default, we allow 1 second of active spinning. This can by changed
by passing in a different grace period at io_uring_register(2) time.
If the thread exceeds this idle time without having any work to do, it
will set:

sq_ring->flags |= IORING_SQ_NEED_WAKEUP.

The application will have to call io_uring_enter() to start things back
up again. If IO is kept busy, that will never be needed. Basically an
application that has this feature enabled will guard it's
io_uring_enter(2) call with:

read_barrier();
if (*sq_ring->flags & IORING_SQ_NEED_WAKEUP)
	io_uring_enter(fd, 0, 0, IORING_ENTER_SQ_WAKEUP);

instead of calling it unconditionally.

It's mandatory to use fixed files with this feature. Failure to do so
will result in the application getting an -EBADF CQ entry when
submitting IO.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 249 ++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/io_uring.h |  12 +-
 2 files changed, 253 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5eaa1c4a3f7c..0a4caedf82c1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -44,6 +44,7 @@
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <linux/blkdev.h>
 #include <linux/bvec.h>
 #include <linux/net.h>
@@ -108,12 +109,16 @@ struct io_ring_ctx {
 		unsigned		cached_sq_head;
 		unsigned		sq_entries;
 		unsigned		sq_mask;
+		unsigned		sq_thread_idle;
 		struct io_uring_sqe	*sq_sqes;
 	} ____cacheline_aligned_in_smp;
 
 	/* IO offload */
 	struct workqueue_struct	*sqo_wq;
+	struct task_struct	*sqo_thread;	/* if using sq thread polling */
 	struct mm_struct	*sqo_mm;
+	wait_queue_head_t	sqo_wait;
+	unsigned		sqo_stop;
 
 	struct {
 		/* CQ ring */
@@ -168,6 +173,7 @@ struct sqe_submit {
 	unsigned short			index;
 	bool				has_user;
 	bool				needs_lock;
+	bool				needs_fixed_file;
 };
 
 struct io_kiocb {
@@ -327,6 +333,8 @@ static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data,
 
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
+	if (waitqueue_active(&ctx->sqo_wait))
+		wake_up(&ctx->sqo_wait);
 }
 
 static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
@@ -680,9 +688,10 @@ static bool io_file_supports_async(struct file *file)
 	return false;
 }
 
-static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
 		      bool force_nonblock, struct io_submit_state *state)
 {
+	const struct io_uring_sqe *sqe = s->sqe;
 	struct io_ring_ctx *ctx = req->ctx;
 	struct kiocb *kiocb = &req->rw;
 	unsigned ioprio, flags;
@@ -702,6 +711,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		kiocb->ki_filp = ctx->user_files[fd];
 		req->flags |= REQ_F_FIXED_FILE;
 	} else {
+		if (s->needs_fixed_file)
+			return -EBADF;
 		kiocb->ki_filp = io_file_get(state, fd);
 		if (unlikely(!kiocb->ki_filp))
 			return -EBADF;
@@ -865,7 +876,7 @@ static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s,
 	struct file *file;
 	ssize_t ret;
 
-	ret = io_prep_rw(req, s->sqe, force_nonblock, state);
+	ret = io_prep_rw(req, s, force_nonblock, state);
 	if (ret)
 		return ret;
 	file = kiocb->ki_filp;
@@ -909,7 +920,7 @@ static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s,
 	struct file *file;
 	ssize_t ret;
 
-	ret = io_prep_rw(req, s->sqe, force_nonblock, state);
+	ret = io_prep_rw(req, s, force_nonblock, state);
 	if (ret)
 		return ret;
 	/* Hold on to the file for -EAGAIN */
@@ -1301,6 +1312,169 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
 	return false;
 }
 
+static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
+			  unsigned int nr, bool has_user, bool mm_fault)
+{
+	struct io_submit_state state, *statep = NULL;
+	int ret, i, submitted = 0;
+
+	if (nr > IO_PLUG_THRESHOLD) {
+		io_submit_state_start(&state, ctx, nr);
+		statep = &state;
+	}
+
+	for (i = 0; i < nr; i++) {
+		if (unlikely(mm_fault)) {
+			ret = -EFAULT;
+		} else {
+			sqes[i].has_user = has_user;
+			sqes[i].needs_lock = true;
+			sqes[i].needs_fixed_file = true;
+			ret = io_submit_sqe(ctx, &sqes[i], statep);
+		}
+		if (!ret) {
+			submitted++;
+			continue;
+		}
+
+		io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret, 0);
+	}
+
+	if (statep)
+		io_submit_state_end(&state);
+
+	return submitted;
+}
+
+static int io_sq_thread(void *data)
+{
+	struct sqe_submit sqes[IO_IOPOLL_BATCH];
+	struct io_ring_ctx *ctx = data;
+	struct mm_struct *cur_mm = NULL;
+	mm_segment_t old_fs;
+	DEFINE_WAIT(wait);
+	unsigned inflight;
+	unsigned long timeout;
+
+	old_fs = get_fs();
+	set_fs(USER_DS);
+
+	timeout = inflight = 0;
+	while (!kthread_should_stop() && !ctx->sqo_stop) {
+		bool all_fixed, mm_fault = false;
+		int i;
+
+		if (inflight) {
+			unsigned nr_events = 0;
+
+			if (ctx->flags & IORING_SETUP_IOPOLL) {
+				/*
+				 * We disallow the app entering submit/complete
+				 * with polling, but we still need to lock the
+				 * ring to prevent racing with polled issue
+				 * that got punted to a workqueue.
+				 */
+				mutex_lock(&ctx->uring_lock);
+				io_iopoll_check(ctx, &nr_events, 0);
+				mutex_unlock(&ctx->uring_lock);
+			} else {
+				/*
+				 * Normal IO, just pretend everything completed.
+				 * We don't have to poll completions for that.
+				 */
+				nr_events = inflight;
+			}
+
+			inflight -= nr_events;
+			if (!inflight)
+				timeout = jiffies + ctx->sq_thread_idle;
+		}
+
+		if (!io_get_sqring(ctx, &sqes[0])) {
+			/*
+			 * We're polling. If we're within the defined idle
+			 * period, then let us spin without work before going
+			 * to sleep.
+			 */
+			if (inflight || !time_after(jiffies, timeout)) {
+				cpu_relax();
+				continue;
+			}
+
+			/*
+			 * Drop cur_mm before scheduling, we can't hold it for
+			 * long periods (or over schedule()). Do this before
+			 * adding ourselves to the waitqueue, as the unuse/drop
+			 * may sleep.
+			 */
+			if (cur_mm) {
+				unuse_mm(cur_mm);
+				mmput(cur_mm);
+				cur_mm = NULL;
+			}
+
+			prepare_to_wait(&ctx->sqo_wait, &wait,
+						TASK_INTERRUPTIBLE);
+
+			/* Tell userspace we may need a wakeup call */
+			ctx->sq_ring->flags |= IORING_SQ_NEED_WAKEUP;
+			smp_wmb();
+
+			if (!io_get_sqring(ctx, &sqes[0])) {
+				if (kthread_should_stop()) {
+					finish_wait(&ctx->sqo_wait, &wait);
+					break;
+				}
+				if (signal_pending(current))
+					flush_signals(current);
+				schedule();
+				finish_wait(&ctx->sqo_wait, &wait);
+
+				ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
+				smp_wmb();
+				continue;
+			}
+			finish_wait(&ctx->sqo_wait, &wait);
+
+			ctx->sq_ring->flags &= ~IORING_SQ_NEED_WAKEUP;
+			smp_wmb();
+		}
+
+		i = 0;
+		all_fixed = true;
+		do {
+			if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
+				all_fixed = false;
+
+			i++;
+			if (i == ARRAY_SIZE(sqes))
+				break;
+		} while (io_get_sqring(ctx, &sqes[i]));
+
+		/* Unless all new commands are FIXED regions, grab mm */
+		if (!all_fixed && !cur_mm) {
+			mm_fault = !mmget_not_zero(ctx->sqo_mm);
+			if (!mm_fault) {
+				use_mm(ctx->sqo_mm);
+				cur_mm = ctx->sqo_mm;
+			}
+		}
+
+		inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
+						mm_fault);
+
+		/* Commit SQ ring head once we've consumed all SQEs */
+		io_commit_sqring(ctx);
+	}
+
+	set_fs(old_fs);
+	if (cur_mm) {
+		unuse_mm(cur_mm);
+		mmput(cur_mm);
+	}
+	return 0;
+}
+
 static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 {
 	struct io_submit_state state, *statep = NULL;
@@ -1319,6 +1493,7 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
 
 		s.has_user = true;
 		s.needs_lock = false;
+		s.needs_fixed_file = false;
 
 		ret = io_submit_sqe(ctx, &s, statep);
 		if (ret) {
@@ -1418,8 +1593,20 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	return 0;
 }
 
+static void io_sq_thread_stop(struct io_ring_ctx *ctx)
+{
+	if (ctx->sqo_thread) {
+		ctx->sqo_stop = 1;
+		mb();
+		kthread_stop(ctx->sqo_thread);
+		ctx->sqo_thread = NULL;
+	}
+}
+
 static void io_finish_async(struct io_ring_ctx *ctx)
 {
+	io_sq_thread_stop(ctx);
+
 	if (ctx->sqo_wq) {
 		destroy_workqueue(ctx->sqo_wq);
 		ctx->sqo_wq = NULL;
@@ -1583,13 +1770,47 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 	return ret;
 }
 
-static int io_sq_offload_start(struct io_ring_ctx *ctx)
+static int io_sq_offload_start(struct io_ring_ctx *ctx,
+			       struct io_uring_params *p)
 {
 	int ret;
 
+	init_waitqueue_head(&ctx->sqo_wait);
 	mmgrab(current->mm);
 	ctx->sqo_mm = current->mm;
 
+	ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
+	if (!ctx->sq_thread_idle)
+		ctx->sq_thread_idle = HZ;
+
+	ret = -EINVAL;
+	if (!cpu_possible(p->sq_thread_cpu))
+		goto err;
+
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		if (p->flags & IORING_SETUP_SQ_AFF) {
+			int cpu;
+
+			cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
+			ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
+							ctx, cpu,
+							"io_uring-sq");
+		} else {
+			ctx->sqo_thread = kthread_create(io_sq_thread, ctx,
+							"io_uring-sq");
+		}
+		if (IS_ERR(ctx->sqo_thread)) {
+			ret = PTR_ERR(ctx->sqo_thread);
+			ctx->sqo_thread = NULL;
+			goto err;
+		}
+		wake_up_process(ctx->sqo_thread);
+	} else if (p->flags & IORING_SETUP_SQ_AFF) {
+		/* Can't have SQ_AFF without SQPOLL */
+		ret = -EINVAL;
+		goto err;
+	}
+
 	/* Do QD, or 2 * CPUS, whatever is smallest */
 	ctx->sqo_wq = alloc_workqueue("io_ring-wq", WQ_UNBOUND | WQ_FREEZABLE,
 			min(ctx->sq_entries - 1, 2 * num_online_cpus()));
@@ -1600,6 +1821,7 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx)
 
 	return 0;
 err:
+	io_sq_thread_stop(ctx);
 	mmdrop(ctx->sqo_mm);
 	ctx->sqo_mm = NULL;
 	return ret;
@@ -1959,7 +2181,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	int submitted = 0;
 	struct fd f;
 
-	if (flags & ~IORING_ENTER_GETEVENTS)
+	if (flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP))
 		return -EINVAL;
 
 	f = fdget(fd);
@@ -1975,6 +2197,18 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 	if (!percpu_ref_tryget(&ctx->refs))
 		goto out_fput;
 
+	/*
+	 * For SQ polling, the thread will do all submissions and completions.
+	 * Just return the requested submit count, and wake the thread if
+	 * we were asked to.
+	 */
+	if (ctx->flags & IORING_SETUP_SQPOLL) {
+		if (flags & IORING_ENTER_SQ_WAKEUP)
+			wake_up(&ctx->sqo_wait);
+		submitted = to_submit;
+		goto out_ctx;
+	}
+
 	ret = 0;
 	if (to_submit) {
 		to_submit = min(to_submit, ctx->sq_entries);
@@ -2156,7 +2390,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
 	if (ret)
 		goto err;
 
-	ret = io_sq_offload_start(ctx);
+	ret = io_sq_offload_start(ctx, p);
 	if (ret)
 		goto err;
 
@@ -2204,7 +2438,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 			return -EINVAL;
 	}
 
-	if (p.flags & ~IORING_SETUP_IOPOLL)
+	if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
+			IORING_SETUP_SQ_AFF))
 		return -EINVAL;
 
 	ret = io_uring_create(entries, &p);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 6257478d55e9..0ec74bab8dbe 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -42,6 +42,8 @@ struct io_uring_sqe {
  * io_uring_setup() flags
  */
 #define IORING_SETUP_IOPOLL	(1U << 0)	/* io_context is polled */
+#define IORING_SETUP_SQPOLL	(1U << 1)	/* SQ poll thread */
+#define IORING_SETUP_SQ_AFF	(1U << 2)	/* sq_thread_cpu is valid */
 
 #define IORING_OP_NOP		0
 #define IORING_OP_READV		1
@@ -86,6 +88,11 @@ struct io_sqring_offsets {
 	__u64 resv2;
 };
 
+/*
+ * sq_ring->flags
+ */
+#define IORING_SQ_NEED_WAKEUP	(1U << 0) /* needs io_uring_enter wakeup */
+
 struct io_cqring_offsets {
 	__u32 head;
 	__u32 tail;
@@ -100,6 +107,7 @@ struct io_cqring_offsets {
  * io_uring_enter(2) flags
  */
 #define IORING_ENTER_GETEVENTS	(1U << 0)
+#define IORING_ENTER_SQ_WAKEUP	(1U << 1)
 
 /*
  * Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -108,7 +116,9 @@ struct io_uring_params {
 	__u32 sq_entries;
 	__u32 cq_entries;
 	__u32 flags;
-	__u32 resv[7];
+	__u32 sq_thread_cpu;
+	__u32 sq_thread_idle;
+	__u32 resv[5];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };
-- 
cgit v1.2.3


From abbbbc83a210e95e2ae4ee71e50930a8cec6f081 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 22 Feb 2019 18:04:52 +0000
Subject: asm-generic/io: Pass result of I/O accessor to __io_[p]ar()

The inX() and readX() I/O accessors must enforce ordering against
subsequent calls to the delay() routines, so that a read-back from a
device can be used to postpone a subsequent write to the same device.

On some architectures, including arm64, this ordering can only be
achieved by creating a dependency on the value returned by the I/O
accessor operation, so we need to pass the value we read to the
__io_par() and __io_ar() macros in these cases.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Palmer Dabbelt <palmer@sifive.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/asm-generic/io.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index d356f802945a..303871651f8a 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -32,9 +32,9 @@
 /* prevent prefetching of coherent DMA data ahead of a dma-complete */
 #ifndef __io_ar
 #ifdef rmb
-#define __io_ar()      rmb()
+#define __io_ar(v)      rmb()
 #else
-#define __io_ar()      barrier()
+#define __io_ar(v)      barrier()
 #endif
 #endif
 
@@ -65,7 +65,7 @@
 #endif
 
 #ifndef __io_par
-#define __io_par()     __io_ar()
+#define __io_par(v)     __io_ar(v)
 #endif
 
 
@@ -158,7 +158,7 @@ static inline u8 readb(const volatile void __iomem *addr)
 
 	__io_br();
 	val = __raw_readb(addr);
-	__io_ar();
+	__io_ar(val);
 	return val;
 }
 #endif
@@ -171,7 +171,7 @@ static inline u16 readw(const volatile void __iomem *addr)
 
 	__io_br();
 	val = __le16_to_cpu(__raw_readw(addr));
-	__io_ar();
+	__io_ar(val);
 	return val;
 }
 #endif
@@ -184,7 +184,7 @@ static inline u32 readl(const volatile void __iomem *addr)
 
 	__io_br();
 	val = __le32_to_cpu(__raw_readl(addr));
-	__io_ar();
+	__io_ar(val);
 	return val;
 }
 #endif
@@ -198,7 +198,7 @@ static inline u64 readq(const volatile void __iomem *addr)
 
 	__io_br();
 	val = __le64_to_cpu(__raw_readq(addr));
-	__io_ar();
+	__io_ar(val);
 	return val;
 }
 #endif
@@ -471,7 +471,7 @@ static inline u8 inb(unsigned long addr)
 
 	__io_pbr();
 	val = __raw_readb(PCI_IOBASE + addr);
-	__io_par();
+	__io_par(val);
 	return val;
 }
 #endif
@@ -484,7 +484,7 @@ static inline u16 inw(unsigned long addr)
 
 	__io_pbr();
 	val = __le16_to_cpu(__raw_readw(PCI_IOBASE + addr));
-	__io_par();
+	__io_par(val);
 	return val;
 }
 #endif
@@ -497,7 +497,7 @@ static inline u32 inl(unsigned long addr)
 
 	__io_pbr();
 	val = __le32_to_cpu(__raw_readl(PCI_IOBASE + addr));
-	__io_par();
+	__io_par(val);
 	return val;
 }
 #endif
-- 
cgit v1.2.3


From 221e1e0b016529f33b0d1bbf7d07c54463b55ca6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 Feb 2019 14:35:45 +0100
Subject: of: mark early_init_dt_alloc_reserved_memory_arch static

This function is only used in of_reserved_mem.c, and never overridden
despite the __weak marker.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_reserved_mem.c    | 2 +-
 include/linux/of_reserved_mem.h | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9e02a5d80225..e773063c6de9 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -26,7 +26,7 @@
 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
 static int reserved_mem_count;
 
-int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
+static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
 	phys_addr_t *res_base)
 {
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 67ab8d271df3..60f541912ccf 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -35,13 +35,6 @@ int of_reserved_mem_device_init_by_idx(struct device *dev,
 				       struct device_node *np, int idx);
 void of_reserved_mem_device_release(struct device *dev);
 
-int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
-					     phys_addr_t align,
-					     phys_addr_t start,
-					     phys_addr_t end,
-					     bool nomap,
-					     phys_addr_t *res_base);
-
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
-- 
cgit v1.2.3


From 0c91c11c7d62de9fea6f5520edb143a7199f3307 Mon Sep 17 00:00:00 2001
From: Anson Huang <anson.huang@nxp.com>
Date: Wed, 27 Feb 2019 01:28:26 +0000
Subject: clk: imx8mq: add GPIO clocks to clock tree

i.MX8MQ has clock gate for each GPIO bank, add them
into clock tree for GPIO driver to manage.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/imx/clk-imx8mq.c             | 5 +++++
 include/dt-bindings/clock/imx8mq-clock.h | 8 +++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c
index 081dc2d7c99c..a9b3888aef0c 100644
--- a/drivers/clk/imx/clk-imx8mq.c
+++ b/drivers/clk/imx/clk-imx8mq.c
@@ -473,6 +473,11 @@ static int imx8mq_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MQ_CLK_ECSPI2_ROOT] = imx_clk_gate4("ecspi2_root_clk", "ecspi2", base + 0x4080, 0);
 	clks[IMX8MQ_CLK_ECSPI3_ROOT] = imx_clk_gate4("ecspi3_root_clk", "ecspi3", base + 0x4090, 0);
 	clks[IMX8MQ_CLK_ENET1_ROOT] = imx_clk_gate4("enet1_root_clk", "enet_axi", base + 0x40a0, 0);
+	clks[IMX8MQ_CLK_GPIO1_ROOT] = imx_clk_gate4("gpio1_root_clk", "ipg_root", base + 0x40b0, 0);
+	clks[IMX8MQ_CLK_GPIO2_ROOT] = imx_clk_gate4("gpio2_root_clk", "ipg_root", base + 0x40c0, 0);
+	clks[IMX8MQ_CLK_GPIO3_ROOT] = imx_clk_gate4("gpio3_root_clk", "ipg_root", base + 0x40d0, 0);
+	clks[IMX8MQ_CLK_GPIO4_ROOT] = imx_clk_gate4("gpio4_root_clk", "ipg_root", base + 0x40e0, 0);
+	clks[IMX8MQ_CLK_GPIO5_ROOT] = imx_clk_gate4("gpio5_root_clk", "ipg_root", base + 0x40f0, 0);
 	clks[IMX8MQ_CLK_GPT1_ROOT] = imx_clk_gate4("gpt1_root_clk", "gpt1", base + 0x4100, 0);
 	clks[IMX8MQ_CLK_I2C1_ROOT] = imx_clk_gate4("i2c1_root_clk", "i2c1", base + 0x4170, 0);
 	clks[IMX8MQ_CLK_I2C2_ROOT] = imx_clk_gate4("i2c2_root_clk", "i2c2", base + 0x4180, 0);
diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h
index 491e39c146be..b58cc643c9c9 100644
--- a/include/dt-bindings/clock/imx8mq-clock.h
+++ b/include/dt-bindings/clock/imx8mq-clock.h
@@ -394,5 +394,11 @@
 #define IMX8MQ_CLK_CLKO1			268
 #define IMX8MQ_CLK_ARM				269
 
-#define IMX8MQ_CLK_END				270
+#define IMX8MQ_CLK_GPIO1_ROOT			270
+#define IMX8MQ_CLK_GPIO2_ROOT			271
+#define IMX8MQ_CLK_GPIO3_ROOT			272
+#define IMX8MQ_CLK_GPIO4_ROOT			273
+#define IMX8MQ_CLK_GPIO5_ROOT			274
+
+#define IMX8MQ_CLK_END				275
 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */
-- 
cgit v1.2.3


From 5b88a17cfdeba75e0092bab2c79aaf7d9e7db482 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 Feb 2019 11:00:18 -0500
Subject: block: optimize bvec iteration in bvec_iter_advance

There is no need to only iterate in chunks of PAGE_SIZE or less in
bvec_iter_advance, given that the callers pass in the chunk length that
they are operating on - either that already is less than PAGE_SIZE
because they do classic page-based iteration, or it is larger because
the caller operates on multi-page bvecs.

This should help shaving off a few cycles of the I/O hot path.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 87e82e503a52..f6275c4da13a 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -112,14 +112,15 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	}
 
 	while (bytes) {
-		unsigned iter_len = bvec_iter_len(bv, *iter);
-		unsigned len = min(bytes, iter_len);
+		const struct bio_vec *cur = bv + iter->bi_idx;
+		unsigned len = min3(bytes, iter->bi_size,
+				    cur->bv_len - iter->bi_bvec_done);
 
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
 
-		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+		if (iter->bi_bvec_done == cur->bv_len) {
 			iter->bi_bvec_done = 0;
 			iter->bi_idx++;
 		}
-- 
cgit v1.2.3


From 11d4dd0b20041289e60f0642d458b96389b3125d Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 22 Feb 2019 21:45:52 +0800
Subject: netfilter: convert the proto argument from u8 to u16

The proto in struct xt_match and struct xt_target is u16, when
calling xt_check_target/match, their proto argument is u8,
and will cause truncation, it is harmless to ip packet, since
ip proto is u8

if a etable's match/target has proto that is u16, will cause
the check failure.

and convert be16 to short in bridge/netfilter/ebtables.c

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 4 ++--
 net/bridge/netfilter/ebtables.c    | 6 +++---
 net/netfilter/x_tables.c           | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 9077b3ebea08..bf384b3eedb8 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -289,9 +289,9 @@ bool xt_find_jump_offset(const unsigned int *offsets,
 
 int xt_check_proc_name(const char *name, unsigned int size);
 
-int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
+int xt_check_match(struct xt_mtchk_param *, unsigned int size, u16 proto,
 		   bool inv_proto);
-int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
+int xt_check_target(struct xt_tgchk_param *, unsigned int size, u16 proto,
 		    bool inv_proto);
 
 int xt_match_to_user(const struct xt_entry_match *m,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f77888ec93f1..eb15891f8b9f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -381,7 +381,7 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 	par->match     = match;
 	par->matchinfo = m->data;
 	ret = xt_check_match(par, m->match_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(match->me);
 		return ret;
@@ -418,7 +418,7 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	par->target   = watcher;
 	par->targinfo = w->data;
 	ret = xt_check_target(par, w->watcher_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(watcher->me);
 		return ret;
@@ -744,7 +744,7 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	tgpar.target   = target;
 	tgpar.targinfo = t->data;
 	ret = xt_check_target(&tgpar, t->target_size,
-	      e->ethproto, e->invflags & EBT_IPROTO);
+	      ntohs(e->ethproto), e->invflags & EBT_IPROTO);
 	if (ret < 0) {
 		module_put(target->me);
 		goto cleanup_watchers;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 13e1ac333fa4..e5e5c64df8d1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -461,7 +461,7 @@ int xt_check_proc_name(const char *name, unsigned int size)
 EXPORT_SYMBOL(xt_check_proc_name);
 
 int xt_check_match(struct xt_mtchk_param *par,
-		   unsigned int size, u_int8_t proto, bool inv_proto)
+		   unsigned int size, u16 proto, bool inv_proto)
 {
 	int ret;
 
@@ -984,7 +984,7 @@ bool xt_find_jump_offset(const unsigned int *offsets,
 EXPORT_SYMBOL(xt_find_jump_offset);
 
 int xt_check_target(struct xt_tgchk_param *par,
-		    unsigned int size, u_int8_t proto, bool inv_proto)
+		    unsigned int size, u16 proto, bool inv_proto)
 {
 	int ret;
 
-- 
cgit v1.2.3


From c78efc99c75089efd3df2ebd3bd279b52b4ab125 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 28 Feb 2019 12:02:50 +0100
Subject: netfilter: nf_tables: nat: merge nft_redir protocol specific modules

before:
 text	   data	    bss	    dec	    hex	filename
 990	    832	      0	   1822	    71e nft_redir.ko
 697	    896	      0	   1593	    639 nft_redir_ipv4.ko
 713	    896	      0	   1609	    649	nft_redir_ipv6.ko

after:
 text	   data	    bss	    dec	    hex	filename
 1910	    960	      0	   2870	    b36	nft_redir.ko

size is reduced, all helpers from nft_redir.ko can be made static.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_redir.h   |  22 ------
 net/ipv4/netfilter/Kconfig          |   8 --
 net/ipv4/netfilter/Makefile         |   1 -
 net/ipv4/netfilter/nft_redir_ipv4.c |  82 -------------------
 net/ipv6/netfilter/Kconfig          |   8 --
 net/ipv6/netfilter/Makefile         |   1 -
 net/ipv6/netfilter/nft_redir_ipv6.c |  83 -------------------
 net/netfilter/Kconfig               |   1 +
 net/netfilter/nft_redir.c           | 154 +++++++++++++++++++++++++++++++++---
 9 files changed, 143 insertions(+), 217 deletions(-)
 delete mode 100644 include/net/netfilter/nft_redir.h
 delete mode 100644 net/ipv4/netfilter/nft_redir_ipv4.c
 delete mode 100644 net/ipv6/netfilter/nft_redir_ipv6.c

(limited to 'include')

diff --git a/include/net/netfilter/nft_redir.h b/include/net/netfilter/nft_redir.h
deleted file mode 100644
index 4a970737c03c..000000000000
--- a/include/net/netfilter/nft_redir.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFT_REDIR_H_
-#define _NFT_REDIR_H_
-
-struct nft_redir {
-	enum nft_registers	sreg_proto_min:8;
-	enum nft_registers	sreg_proto_max:8;
-	u16			flags;
-};
-
-extern const struct nla_policy nft_redir_policy[];
-
-int nft_redir_init(const struct nft_ctx *ctx,
-		   const struct nft_expr *expr,
-		   const struct nlattr * const tb[]);
-
-int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr);
-
-int nft_redir_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		       const struct nft_data **data);
-
-#endif /* _NFT_REDIR_H_ */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 8688461ed077..12cc3f7d7733 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -115,14 +115,6 @@ config NFT_MASQ_IPV4
 	  This is the expression that provides IPv4 masquerading support for
 	  nf_tables.
 
-config NFT_REDIR_IPV4
-	tristate "IPv4 redirect support for nf_tables"
-	depends on NF_TABLES_IPV4
-	depends on NFT_REDIR
-	select NF_NAT_REDIRECT
-	help
-	  This is the expression that provides IPv4 redirect support for
-	  nf_tables.
 endif # NF_TABLES
 
 config NF_NAT_SNMP_BASIC
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index b2cdf705fdf1..5558caf92578 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
 obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
-obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
 
 # flow table support
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
deleted file mode 100644
index 5120be1d3118..000000000000
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_redirect.h>
-#include <net/netfilter/nft_redir.h>
-
-static void nft_redir_ipv4_eval(const struct nft_expr *expr,
-				struct nft_regs *regs,
-				const struct nft_pktinfo *pkt)
-{
-	struct nft_redir *priv = nft_expr_priv(expr);
-	struct nf_nat_ipv4_multi_range_compat mr;
-
-	memset(&mr, 0, sizeof(mr));
-	if (priv->sreg_proto_min) {
-		mr.range[0].min.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		mr.range[0].max.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-
-	mr.range[0].flags |= priv->flags;
-
-	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
-}
-
-static void
-nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
-}
-
-static struct nft_expr_type nft_redir_ipv4_type;
-static const struct nft_expr_ops nft_redir_ipv4_ops = {
-	.type		= &nft_redir_ipv4_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
-	.eval		= nft_redir_ipv4_eval,
-	.init		= nft_redir_init,
-	.destroy	= nft_redir_ipv4_destroy,
-	.dump		= nft_redir_dump,
-	.validate	= nft_redir_validate,
-};
-
-static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
-	.family		= NFPROTO_IPV4,
-	.name		= "redir",
-	.ops		= &nft_redir_ipv4_ops,
-	.policy		= nft_redir_policy,
-	.maxattr	= NFTA_REDIR_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_redir_ipv4_module_init(void)
-{
-	return nft_register_expr(&nft_redir_ipv4_type);
-}
-
-static void __exit nft_redir_ipv4_module_exit(void)
-{
-	nft_unregister_expr(&nft_redir_ipv4_type);
-}
-
-module_init(nft_redir_ipv4_module_init);
-module_exit(nft_redir_ipv4_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a04a38166d8c..7e7cc411003a 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -49,14 +49,6 @@ config NFT_MASQ_IPV6
 	  This is the expression that provides IPv4 masquerading support for
 	  nf_tables.
 
-config NFT_REDIR_IPV6
-	tristate "IPv6 redirect support for nf_tables"
-	depends on NFT_REDIR
-	select NF_NAT_REDIRECT
-	help
-	  This is the expression that provides IPv4 redirect support for
-	  nf_tables.
-
 endif # NF_NAT
 
 config NFT_REJECT_IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index afb880427133..42a80d03245a 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
 obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
-obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
 obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
deleted file mode 100644
index 74269865acc8..000000000000
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_redir.h>
-#include <net/netfilter/nf_nat_redirect.h>
-
-static void nft_redir_ipv6_eval(const struct nft_expr *expr,
-				struct nft_regs *regs,
-				const struct nft_pktinfo *pkt)
-{
-	struct nft_redir *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
-	}
-
-	range.flags |= priv->flags;
-
-	regs->verdict.code =
-		nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
-}
-
-static void
-nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_redir_ipv6_type;
-static const struct nft_expr_ops nft_redir_ipv6_ops = {
-	.type		= &nft_redir_ipv6_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
-	.eval		= nft_redir_ipv6_eval,
-	.init		= nft_redir_init,
-	.destroy	= nft_redir_ipv6_destroy,
-	.dump		= nft_redir_dump,
-	.validate	= nft_redir_validate,
-};
-
-static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
-	.family		= NFPROTO_IPV6,
-	.name		= "redir",
-	.ops		= &nft_redir_ipv6_ops,
-	.policy		= nft_redir_policy,
-	.maxattr	= NFTA_REDIR_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_redir_ipv6_module_init(void)
-{
-	return nft_register_expr(&nft_redir_ipv6_type);
-}
-
-static void __exit nft_redir_ipv6_module_exit(void)
-{
-	nft_unregister_expr(&nft_redir_ipv6_type);
-}
-
-module_init(nft_redir_ipv6_module_init);
-module_exit(nft_redir_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 5beb51d39dc2..73857f9fdb25 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -541,6 +541,7 @@ config NFT_REDIR
 	depends on NF_CONNTRACK
 	depends on NF_NAT
 	tristate "Netfilter nf_tables redirect support"
+	select NF_NAT_REDIRECT
 	help
 	  This options adds the "redirect" expression that you can use
 	  to perform NAT in the redirect flavour.
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index c64cbe78dee7..f8092926f704 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -13,19 +13,24 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_redir.h>
 
-const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
+struct nft_redir {
+	enum nft_registers	sreg_proto_min:8;
+	enum nft_registers	sreg_proto_max:8;
+	u16			flags;
+};
+
+static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
 	[NFTA_REDIR_REG_PROTO_MIN]	= { .type = NLA_U32 },
 	[NFTA_REDIR_REG_PROTO_MAX]	= { .type = NLA_U32 },
 	[NFTA_REDIR_FLAGS]		= { .type = NLA_U32 },
 };
-EXPORT_SYMBOL_GPL(nft_redir_policy);
 
-int nft_redir_validate(const struct nft_ctx *ctx,
-		       const struct nft_expr *expr,
-		       const struct nft_data **data)
+static int nft_redir_validate(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nft_data **data)
 {
 	int err;
 
@@ -37,11 +42,10 @@ int nft_redir_validate(const struct nft_ctx *ctx,
 					(1 << NF_INET_PRE_ROUTING) |
 					(1 << NF_INET_LOCAL_OUT));
 }
-EXPORT_SYMBOL_GPL(nft_redir_validate);
 
-int nft_redir_init(const struct nft_ctx *ctx,
-		   const struct nft_expr *expr,
-		   const struct nlattr * const tb[])
+static int nft_redir_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
 {
 	struct nft_redir *priv = nft_expr_priv(expr);
 	unsigned int plen;
@@ -77,7 +81,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
 
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
-EXPORT_SYMBOL_GPL(nft_redir_init);
 
 int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
@@ -101,7 +104,134 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
 nla_put_failure:
 	return -1;
 }
-EXPORT_SYMBOL_GPL(nft_redir_dump);
+
+static void nft_redir_ipv4_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_redir *priv = nft_expr_priv(expr);
+	struct nf_nat_ipv4_multi_range_compat mr;
+
+	memset(&mr, 0, sizeof(mr));
+	if (priv->sreg_proto_min) {
+		mr.range[0].min.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		mr.range[0].max.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	mr.range[0].flags |= priv->flags;
+
+	regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_redir_ipv4_type;
+static const struct nft_expr_ops nft_redir_ipv4_ops = {
+	.type		= &nft_redir_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_ipv4_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv4_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv4_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_redir_ipv6_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_redir *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+	}
+
+	range.flags |= priv->flags;
+
+	regs->verdict.code =
+		nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_redir_ipv6_type;
+static const struct nft_expr_ops nft_redir_ipv6_ops = {
+	.type		= &nft_redir_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+	.eval		= nft_redir_ipv6_eval,
+	.init		= nft_redir_init,
+	.destroy	= nft_redir_ipv6_destroy,
+	.dump		= nft_redir_dump,
+	.validate	= nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "redir",
+	.ops		= &nft_redir_ipv6_ops,
+	.policy		= nft_redir_policy,
+	.maxattr	= NFTA_REDIR_MAX,
+	.owner		= THIS_MODULE,
+};
+#endif
+
+static int __init nft_redir_module_init(void)
+{
+	int ret = nft_register_expr(&nft_redir_ipv4_type);
+
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_NF_TABLES_IPV6
+	ret = nft_register_expr(&nft_redir_ipv6_type);
+	if (ret) {
+		nft_unregister_expr(&nft_redir_ipv4_type);
+		return ret;
+	}
+#endif
+
+	return ret;
+}
+
+static void __exit nft_redir_module_exit(void)
+{
+	nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+	nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+}
+
+module_init(nft_redir_module_init);
+module_exit(nft_redir_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
-- 
cgit v1.2.3


From a9ce849e786787af4b7dffd48d49b97b04671f8c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 28 Feb 2019 12:02:51 +0100
Subject: netfilter: nf_tables: nat: merge nft_masq protocol specific modules

The family specific masq modules are way too small to warrant
an extra module, just place all of them in nft_masq.

before:
  text	   data	    bss	    dec	    hex	filename
   1001	    832	      0	   1833	    729	nft_masq.ko
    766	    896	      0	   1662	    67e	nft_masq_ipv4.ko
    764	    896	      0	   1660	    67c	nft_masq_ipv6.ko

after:
   2010	    960	      0	   2970	    b9a	nft_masq.ko

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nft_masq.h   |  22 -----
 net/ipv4/netfilter/Kconfig         |   9 --
 net/ipv4/netfilter/Makefile        |   1 -
 net/ipv4/netfilter/nft_masq_ipv4.c |  90 -------------------
 net/ipv6/netfilter/Kconfig         |   9 --
 net/ipv6/netfilter/Makefile        |   1 -
 net/ipv6/netfilter/nft_masq_ipv6.c |  91 -------------------
 net/netfilter/Kconfig              |   1 +
 net/netfilter/nft_masq.c           | 180 ++++++++++++++++++++++++++++++++++---
 9 files changed, 168 insertions(+), 236 deletions(-)
 delete mode 100644 include/net/netfilter/nft_masq.h
 delete mode 100644 net/ipv4/netfilter/nft_masq_ipv4.c
 delete mode 100644 net/ipv6/netfilter/nft_masq_ipv6.c

(limited to 'include')

diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
deleted file mode 100644
index e51ab3815797..000000000000
--- a/include/net/netfilter/nft_masq.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NFT_MASQ_H_
-#define _NFT_MASQ_H_
-
-struct nft_masq {
-	u32			flags;
-	enum nft_registers      sreg_proto_min:8;
-	enum nft_registers      sreg_proto_max:8;
-};
-
-extern const struct nla_policy nft_masq_policy[];
-
-int nft_masq_init(const struct nft_ctx *ctx,
-		  const struct nft_expr *expr,
-		  const struct nlattr * const tb[]);
-
-int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr);
-
-int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		      const struct nft_data **data);
-
-#endif /* _NFT_MASQ_H_ */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 12cc3f7d7733..71c291a86245 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -106,15 +106,6 @@ config NFT_CHAIN_NAT_IPV4
 	  packet transformations such as the source, destination address and
 	  source and destination ports.
 
-config NFT_MASQ_IPV4
-	tristate "IPv4 masquerading support for nf_tables"
-	depends on NF_TABLES_IPV4
-	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE
-	help
-	  This is the expression that provides IPv4 masquerading support for
-	  nf_tables.
-
 endif # NF_TABLES
 
 config NF_NAT_SNMP_BASIC
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5558caf92578..1ae24d71d3cc 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -28,7 +28,6 @@ obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
 obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
 obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
-obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
 obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
 
 # flow table support
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
deleted file mode 100644
index 6847de1d1db8..000000000000
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_masq.h>
-#include <net/netfilter/ipv4/nf_nat_masquerade.h>
-
-static void nft_masq_ipv4_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	range.flags = priv->flags;
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-	}
-	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
-						    &range, nft_out(pkt));
-}
-
-static void
-nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
-}
-
-static struct nft_expr_type nft_masq_ipv4_type;
-static const struct nft_expr_ops nft_masq_ipv4_ops = {
-	.type		= &nft_masq_ipv4_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
-	.eval		= nft_masq_ipv4_eval,
-	.init		= nft_masq_init,
-	.destroy	= nft_masq_ipv4_destroy,
-	.dump		= nft_masq_dump,
-	.validate	= nft_masq_validate,
-};
-
-static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
-	.family		= NFPROTO_IPV4,
-	.name		= "masq",
-	.ops		= &nft_masq_ipv4_ops,
-	.policy		= nft_masq_policy,
-	.maxattr	= NFTA_MASQ_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_masq_ipv4_module_init(void)
-{
-	int ret;
-
-	ret = nft_register_expr(&nft_masq_ipv4_type);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv4_register_notifier();
-	if (ret)
-		nft_unregister_expr(&nft_masq_ipv4_type);
-
-	return ret;
-}
-
-static void __exit nft_masq_ipv4_module_exit(void)
-{
-	nft_unregister_expr(&nft_masq_ipv4_type);
-	nf_nat_masquerade_ipv4_unregister_notifier();
-}
-
-module_init(nft_masq_ipv4_module_init);
-module_exit(nft_masq_ipv4_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 7e7cc411003a..b95351a5cb83 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -40,15 +40,6 @@ config NFT_CHAIN_NAT_IPV6
 	  chain type is used to perform Network Address Translation (NAT)
 	  packet transformations such as the source, destination address and
 	  source and destination ports.
-
-config NFT_MASQ_IPV6
-	tristate "IPv6 masquerade support for nf_tables"
-	depends on NFT_MASQ
-	select NF_NAT_MASQUERADE
-	help
-	  This is the expression that provides IPv4 masquerading support for
-	  nf_tables.
-
 endif # NF_NAT
 
 config NFT_REJECT_IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 42a80d03245a..06c1829f8ffc 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
 obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
 obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
-obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
 obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
 
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
deleted file mode 100644
index e06c82e9dfcd..000000000000
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static void nft_masq_ipv6_eval(const struct nft_expr *expr,
-			       struct nft_regs *regs,
-			       const struct nft_pktinfo *pkt)
-{
-	struct nft_masq *priv = nft_expr_priv(expr);
-	struct nf_nat_range2 range;
-
-	memset(&range, 0, sizeof(range));
-	range.flags = priv->flags;
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-	}
-	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
-						    nft_out(pkt));
-}
-
-static void
-nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
-	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_masq_ipv6_type;
-static const struct nft_expr_ops nft_masq_ipv6_ops = {
-	.type		= &nft_masq_ipv6_type,
-	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
-	.eval		= nft_masq_ipv6_eval,
-	.init		= nft_masq_init,
-	.destroy	= nft_masq_ipv6_destroy,
-	.dump		= nft_masq_dump,
-	.validate	= nft_masq_validate,
-};
-
-static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
-	.family		= NFPROTO_IPV6,
-	.name		= "masq",
-	.ops		= &nft_masq_ipv6_ops,
-	.policy		= nft_masq_policy,
-	.maxattr	= NFTA_MASQ_MAX,
-	.owner		= THIS_MODULE,
-};
-
-static int __init nft_masq_ipv6_module_init(void)
-{
-	int ret;
-
-	ret = nft_register_expr(&nft_masq_ipv6_type);
-	if (ret < 0)
-		return ret;
-
-	ret = nf_nat_masquerade_ipv6_register_notifier();
-	if (ret)
-		nft_unregister_expr(&nft_masq_ipv6_type);
-
-	return ret;
-}
-
-static void __exit nft_masq_ipv6_module_exit(void)
-{
-	nft_unregister_expr(&nft_masq_ipv6_type);
-	nf_nat_masquerade_ipv6_unregister_notifier();
-}
-
-module_init(nft_masq_ipv6_module_init);
-module_exit(nft_masq_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 73857f9fdb25..537f23a8ed52 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -532,6 +532,7 @@ config NFT_LIMIT
 config NFT_MASQ
 	depends on NF_CONNTRACK
 	depends on NF_NAT
+	select NF_NAT_MASQUERADE
 	tristate "Netfilter nf_tables masquerade support"
 	help
 	  This option adds the "masquerade" expression that you can use
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9d8655bc1bea..bee156eaa400 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -14,18 +14,24 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
 
-const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+struct nft_masq {
+	u32			flags;
+	enum nft_registers      sreg_proto_min:8;
+	enum nft_registers      sreg_proto_max:8;
+};
+
+static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
 	[NFTA_MASQ_FLAGS]		= { .type = NLA_U32 },
 	[NFTA_MASQ_REG_PROTO_MIN]	= { .type = NLA_U32 },
 	[NFTA_MASQ_REG_PROTO_MAX]	= { .type = NLA_U32 },
 };
-EXPORT_SYMBOL_GPL(nft_masq_policy);
 
-int nft_masq_validate(const struct nft_ctx *ctx,
-		      const struct nft_expr *expr,
-		      const struct nft_data **data)
+static int nft_masq_validate(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nft_data **data)
 {
 	int err;
 
@@ -36,11 +42,10 @@ int nft_masq_validate(const struct nft_ctx *ctx,
 	return nft_chain_validate_hooks(ctx->chain,
 				        (1 << NF_INET_POST_ROUTING));
 }
-EXPORT_SYMBOL_GPL(nft_masq_validate);
 
-int nft_masq_init(const struct nft_ctx *ctx,
-		  const struct nft_expr *expr,
-		  const struct nlattr * const tb[])
+static int nft_masq_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
+			 const struct nlattr * const tb[])
 {
 	u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
 	struct nft_masq *priv = nft_expr_priv(expr);
@@ -75,9 +80,8 @@ int nft_masq_init(const struct nft_ctx *ctx,
 
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
-EXPORT_SYMBOL_GPL(nft_masq_init);
 
-int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_masq *priv = nft_expr_priv(expr);
 
@@ -98,7 +102,157 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
 nla_put_failure:
 	return -1;
 }
-EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+						    &range, nft_out(pkt));
+}
+
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+	.type		= &nft_masq_ipv4_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv4_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv4_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+	.family		= NFPROTO_IPV4,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv4_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_masq *priv = nft_expr_priv(expr);
+	struct nf_nat_range2 range;
+
+	memset(&range, 0, sizeof(range));
+	range.flags = priv->flags;
+	if (priv->sreg_proto_min) {
+		range.min_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_min]);
+		range.max_proto.all = (__force __be16)nft_reg_load16(
+			&regs->data[priv->sreg_proto_max]);
+	}
+	regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+						    nft_out(pkt));
+}
+
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+	.type		= &nft_masq_ipv6_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+	.eval		= nft_masq_ipv6_eval,
+	.init		= nft_masq_init,
+	.destroy	= nft_masq_ipv6_destroy,
+	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+	.family		= NFPROTO_IPV6,
+	.name		= "masq",
+	.ops		= &nft_masq_ipv6_ops,
+	.policy		= nft_masq_policy,
+	.maxattr	= NFTA_MASQ_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_ipv6(void)
+{
+	int ret = nft_register_expr(&nft_masq_ipv6_type);
+
+	if (ret)
+		return ret;
+
+	ret = nf_nat_masquerade_ipv6_register_notifier();
+	if (ret < 0)
+		nft_unregister_expr(&nft_masq_ipv6_type);
+
+	return ret;
+}
+
+static void nft_masq_module_exit_ipv6(void)
+{
+	nft_unregister_expr(&nft_masq_ipv6_type);
+	nf_nat_masquerade_ipv6_unregister_notifier();
+}
+#else
+static inline int nft_masq_module_init_ipv6(void) { return 0; }
+static inline void nft_masq_module_exit_ipv6(void) {}
+#endif
+
+static int __init nft_masq_module_init(void)
+{
+	int ret;
+
+	ret = nft_masq_module_init_ipv6();
+	if (ret < 0)
+		return ret;
+
+	ret = nft_register_expr(&nft_masq_ipv4_type);
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		return ret;
+	}
+
+	ret = nf_nat_masquerade_ipv4_register_notifier();
+	if (ret < 0) {
+		nft_masq_module_exit_ipv6();
+		nft_unregister_expr(&nft_masq_ipv4_type);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit nft_masq_module_exit(void)
+{
+	nft_masq_module_exit_ipv6();
+	nft_unregister_expr(&nft_masq_ipv4_type);
+	nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+module_init(nft_masq_module_init);
+module_exit(nft_masq_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
-- 
cgit v1.2.3


From e907bf3c9820c8480b1d83aca42a5668c5364be9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Mon, 18 Feb 2019 14:29:06 -0500
Subject: media: include: fix several typos

Use codespell to fix lots of typos over frontends.

Manually verified to avoid false-positives.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Reviewed-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/platform_data/media/si4713.h | 4 ++--
 include/media/davinci/dm355_ccdc.h         | 4 ++--
 include/media/davinci/dm644x_ccdc.h        | 2 +-
 include/media/drv-intf/exynos-fimc.h       | 2 +-
 include/media/drv-intf/saa7146.h           | 2 +-
 include/media/drv-intf/saa7146_vv.h        | 4 ++--
 include/media/dvb_frontend.h               | 8 ++++----
 include/media/rc-map.h                     | 4 ++--
 include/media/v4l2-ctrls.h                 | 2 +-
 include/media/v4l2-fwnode.h                | 4 ++--
 include/media/v4l2-subdev.h                | 2 +-
 include/media/videobuf-core.h              | 2 +-
 include/media/videobuf2-core.h             | 2 +-
 13 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/media/si4713.h b/include/linux/platform_data/media/si4713.h
index 932668ad54f7..13b3eb7a9059 100644
--- a/include/linux/platform_data/media/si4713.h
+++ b/include/linux/platform_data/media/si4713.h
@@ -31,7 +31,7 @@ struct si4713_platform_data {
  */
 struct si4713_rnl {
 	__u32 index;		/* modulator index */
-	__u32 frequency;	/* frequency to peform rnl measurement */
+	__u32 frequency;	/* frequency to perform rnl measurement */
 	__s32 rnl;		/* result of measurement in dBuV */
 	__u32 reserved[4];	/* drivers and apps must init this to 0 */
 };
@@ -40,7 +40,7 @@ struct si4713_rnl {
  * This is the ioctl number to query for rnl. Users must pass a
  * struct si4713_rnl pointer specifying desired frequency in 'frequency' field
  * following driver capabilities (i.e V4L2_TUNER_CAP_LOW).
- * Driver must return measured value in the same struture, filling 'rnl' field.
+ * Driver must return measured value in the same structure, filling 'rnl' field.
  */
 #define SI4713_IOC_MEASURE_RNL	_IOWR('V', BASE_VIDIOC_PRIVATE + 0, \
 						struct si4713_rnl)
diff --git a/include/media/davinci/dm355_ccdc.h b/include/media/davinci/dm355_ccdc.h
index e6bc72f6b60f..1cba42d805fa 100644
--- a/include/media/davinci/dm355_ccdc.h
+++ b/include/media/davinci/dm355_ccdc.h
@@ -228,7 +228,7 @@ struct ccdc_config_params_raw {
 	/* Threshold of median filter */
 	int med_filt_thres;
 	/*
-	 * horz and vertical data offset. Appliable for defect correction
+	 * horz and vertical data offset. Applicable for defect correction
 	 * and lsc
 	 */
 	struct ccdc_data_offset data_offset;
@@ -238,7 +238,7 @@ struct ccdc_config_params_raw {
 	struct ccdc_black_clamp blk_clamp;
 	/* Structure for Black Compensation */
 	struct ccdc_black_compensation blk_comp;
-	/* struture for vertical Defect Correction Module Configuration */
+	/* structure for vertical Defect Correction Module Configuration */
 	struct ccdc_vertical_dft vertical_dft;
 	/* structure for color space converter Module Configuration */
 	struct ccdc_csc csc;
diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h
index 6ea2ce241851..694fc8f6081f 100644
--- a/include/media/davinci/dm644x_ccdc.h
+++ b/include/media/davinci/dm644x_ccdc.h
@@ -152,7 +152,7 @@ struct ccdc_params_raw {
 	 * order in memory(bottom to top)
 	 */
 	unsigned char image_invert_enable;
-	/* configurable paramaters */
+	/* configurable parameters */
 	struct ccdc_config_params_raw config_params;
 };
 
diff --git a/include/media/drv-intf/exynos-fimc.h b/include/media/drv-intf/exynos-fimc.h
index f9c64338841f..54c214737142 100644
--- a/include/media/drv-intf/exynos-fimc.h
+++ b/include/media/drv-intf/exynos-fimc.h
@@ -81,7 +81,7 @@ struct fimc_source_info {
  * v4l2_device notification id. This is only for internal use in the kernel.
  * Sensor subdevs should issue S5P_FIMC_TX_END_NOTIFY notification in single
  * frame capture mode when there is only one VSYNC pulse issued by the sensor
- * at begining of the frame transmission.
+ * at beginning of the frame transmission.
  */
 #define S5P_FIMC_TX_END_NOTIFY _IO('e', 0)
 
diff --git a/include/media/drv-intf/saa7146.h b/include/media/drv-intf/saa7146.h
index a7bf2c4a2e4d..71ce63c99cb4 100644
--- a/include/media/drv-intf/saa7146.h
+++ b/include/media/drv-intf/saa7146.h
@@ -139,7 +139,7 @@ struct saa7146_dev
 	void				*ext_priv;	/* pointer for extension private use (most likely some private data) */
 	struct saa7146_ext_vv		*ext_vv_data;
 
-	/* per device video/vbi informations (if available) */
+	/* per device video/vbi information (if available) */
 	struct saa7146_vv	*vv_data;
 	void (*vv_callback)(struct saa7146_dev *dev, unsigned long status);
 
diff --git a/include/media/drv-intf/saa7146_vv.h b/include/media/drv-intf/saa7146_vv.h
index 6f80fb7f31a5..b34d86bb0664 100644
--- a/include/media/drv-intf/saa7146_vv.h
+++ b/include/media/drv-intf/saa7146_vv.h
@@ -151,7 +151,7 @@ struct saa7146_vv
 
 struct saa7146_ext_vv
 {
-	/* informations about the video capabilities of the device */
+	/* information about the video capabilities of the device */
 	int	inputs;
 	int	audios;
 	u32	capabilities;
@@ -241,7 +241,7 @@ void saa7146_res_free(struct saa7146_fh *fh, unsigned int bits);
 #define SAA7146_CLIPPING_MASK		0x6
 #define SAA7146_CLIPPING_MASK_INVERTED	0x7
 
-/* output formats: each entry holds four informations */
+/* output formats: each entry holds four information */
 #define RGB08_COMPOSED	0x0217 /* composed is used in the sense of "not-planar" */
 /* this means: planar?=0, yuv2rgb-conversation-mode=2, dither=yes(=1), format-mode = 7 */
 #define RGB15_COMPOSED	0x0213
diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h
index 6f7a85ab3541..f05cd7b94a2c 100644
--- a/include/media/dvb_frontend.h
+++ b/include/media/dvb_frontend.h
@@ -160,7 +160,7 @@ enum dvbfe_algo {
  *	The frontend search for a signal failed
  *
  * @DVBFE_ALGO_SEARCH_INVALID:
- *	The frontend search algorith was probably supplied with invalid
+ *	The frontend search algorithm was probably supplied with invalid
  *	parameters and the search is an invalid one
  *
  * @DVBFE_ALGO_SEARCH_ERROR:
@@ -204,7 +204,7 @@ enum dvbfe_search {
  * @set_config:		callback function used to send some tuner-specific
  *			parameters.
  * @get_frequency:	get the actual tuned frequency
- * @get_bandwidth:	get the bandwitdh used by the low pass filters
+ * @get_bandwidth:	get the bandwidth used by the low pass filters
  * @get_if_frequency:	get the Intermediate Frequency, in Hz. For baseband,
  *			should return 0.
  * @get_status:		returns the frontend lock status
@@ -232,7 +232,7 @@ struct dvb_tuner_ops {
 	int (*suspend)(struct dvb_frontend *fe);
 	int (*resume)(struct dvb_frontend *fe);
 
-	/* This is the recomended way to set the tuner */
+	/* This is the recommended way to set the tuner */
 	int (*set_params)(struct dvb_frontend *fe);
 	int (*set_analog_params)(struct dvb_frontend *fe, struct analog_parameters *p);
 
@@ -358,7 +358,7 @@ struct dvb_frontend_internal_info {
  * @release:		callback function called when frontend is ready to be
  *			freed.
  *			drivers should free any allocated memory.
- * @release_sec:	callback function requesting that the Satelite Equipment
+ * @release_sec:	callback function requesting that the Satellite Equipment
  *			Control (SEC) driver to release and free any memory
  *			allocated by the driver.
  * @init:		callback function used to initialize the tuner device.
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index e5e86d595645..5e684bb0d64c 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -144,14 +144,14 @@ struct rc_map_list {
 /* Routines from rc-map.c */
 
 /**
- * rc_map_register() - Registers a Remote Controler scancode map
+ * rc_map_register() - Registers a Remote Controller scancode map
  *
  * @map:	pointer to struct rc_map_list
  */
 int rc_map_register(struct rc_map_list *map);
 
 /**
- * rc_map_unregister() - Unregisters a Remote Controler scancode map
+ * rc_map_unregister() - Unregisters a Remote Controller scancode map
  *
  * @map:	pointer to struct rc_map_list
  */
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index d63cf227b0ab..e5cae37ced2d 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -648,7 +648,7 @@ struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(struct v4l2_ctrl_handler *hdl,
  * @def:	The control's default value.
  * @qmenu_int:	The control's menu entries.
  *
- * Same as v4l2_ctrl_new_std_menu(), but @mask is set to 0 and it additionaly
+ * Same as v4l2_ctrl_new_std_menu(), but @mask is set to 0 and it additionally
  * takes as an argument an array of integers determining the menu items.
  *
  * If @id refers to a non-integer-menu control, then this function will
diff --git a/include/media/v4l2-fwnode.h b/include/media/v4l2-fwnode.h
index 6d9d9f1839ac..6c07825e18b9 100644
--- a/include/media/v4l2-fwnode.h
+++ b/include/media/v4l2-fwnode.h
@@ -143,7 +143,7 @@ struct v4l2_fwnode_link {
  * @vep.bus_type to V4L2_MBUS_UNKNOWN. The caller may not provide a default
  * configuration in this case as the defaults are specific to a given bus type.
  * This functionality is deprecated and should not be used in new drivers and it
- * is only supported for CSI-2 D-PHY, parallel and Bt.656 busses.
+ * is only supported for CSI-2 D-PHY, parallel and Bt.656 buses.
  *
  * The function does not change the V4L2 fwnode endpoint state if it fails.
  *
@@ -186,7 +186,7 @@ void v4l2_fwnode_endpoint_free(struct v4l2_fwnode_endpoint *vep);
  * @vep.bus_type to V4L2_MBUS_UNKNOWN. The caller may not provide a default
  * configuration in this case as the defaults are specific to a given bus type.
  * This functionality is deprecated and should not be used in new drivers and it
- * is only supported for CSI-2 D-PHY, parallel and Bt.656 busses.
+ * is only supported for CSI-2 D-PHY, parallel and Bt.656 buses.
  *
  * The function does not change the V4L2 fwnode endpoint state if it fails.
  *
diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 34da094a3f40..349e1c18cf48 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -70,7 +70,7 @@ struct v4l2_decode_vbi_line {
  * device. These devices are usually audio/video muxers/encoders/decoders or
  * sensors and webcam controllers.
  *
- * Usually these devices are controlled through an i2c bus, but other busses
+ * Usually these devices are controlled through an i2c bus, but other buses
  * may also be used.
  *
  * The v4l2_subdev struct provides a way of accessing these devices in a
diff --git a/include/media/videobuf-core.h b/include/media/videobuf-core.h
index 5684dc6f0d0d..2c4db97cd96f 100644
--- a/include/media/videobuf-core.h
+++ b/include/media/videobuf-core.h
@@ -43,7 +43,7 @@ struct videobuf_queue;
  * (which v4l2 uses).
  *
  * If there is a valid mapping for a buffer, buffer->baddr/bsize holds
- * userspace address + size which can be feeded into the
+ * userspace address + size which can be fed into the
  * videobuf_dma_init_user function listed above.
  *
  */
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index a844abcae71e..910f3d469005 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -399,7 +399,7 @@ struct vb2_buffer {
  * @buf_queue:		passes buffer vb to the driver; driver may start
  *			hardware operation on this buffer; driver should give
  *			the buffer back by calling vb2_buffer_done() function;
- *			it is allways called after calling VIDIOC_STREAMON()
+ *			it is always called after calling VIDIOC_STREAMON()
  *			ioctl; might be called before @start_streaming callback
  *			if user pre-queued buffers before calling
  *			VIDIOC_STREAMON().
-- 
cgit v1.2.3


From e88619646971168e3baedc850c21243d303e31ca Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 1 Mar 2019 17:09:00 +0000
Subject: drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+

Having introduced per-context seqno, we now have a means to identity
progress across the system without feel of rollback as befell the
global_seqno. That is we can program a MI_SEMAPHORE_WAIT operation in
advance of submission safe in the knowledge that our target seqno and
address is stable.

However, since we are telling the GPU to busy-spin on the target address
until it matches the signaling seqno, we only want to do so when we are
sure that busy-spin will be completed quickly. To achieve this we only
submit the request to HW once the signaler is itself executing (modulo
preemption causing us to wait longer), and we only do so for default and
above priority requests (so that idle priority tasks never themselves
hog the GPU waiting for others).

As might be reasonably expected, HW semaphores excel in inter-engine
synchronisation microbenchmarks (where the 3x reduced latency / increased
throughput more than offset the power cost of spinning on a second ring)
and have significant improvement (can be up to ~10%, most see no change)
for single clients that utilize multiple engines (typically media players
and transcoders), without regressing multiple clients that can saturate
the system or changing the power envelope dramatically.

v3: Drop the older NEQ branch, now we pin the signaler's HWSP anyway.
v4: Tell the world and include it as part of scheduler caps.

Testcase: igt/gem_exec_whisper
Testcase: igt/benchmarks/gem_wsim
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190301170901.8340-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.c           |   2 +-
 drivers/gpu/drm/i915/i915_request.c       | 138 +++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_request.h       |  26 +++++-
 drivers/gpu/drm/i915/i915_sw_fence.c      |   4 +-
 drivers/gpu/drm/i915/i915_sw_fence.h      |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c    |   1 +
 drivers/gpu/drm/i915/intel_gpu_commands.h |   9 +-
 drivers/gpu/drm/i915/intel_lrc.c          |   1 +
 drivers/gpu/drm/i915/intel_ringbuffer.h   |   7 ++
 include/uapi/drm/i915_drm.h               |   1 +
 10 files changed, 181 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c6354f6cdbdb..c08abdef5eb6 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = min_t(int, INTEL_PPGTT(dev_priv), I915_GEM_PPGTT_FULL);
 		break;
 	case I915_PARAM_HAS_SEMAPHORES:
-		value = 0;
+		value = !!(dev_priv->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
 		break;
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index d354967d6ae8..59e30b8c4ee9 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -22,8 +22,9 @@
  *
  */
 
-#include <linux/prefetch.h>
 #include <linux/dma-fence-array.h>
+#include <linux/irq_work.h>
+#include <linux/prefetch.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
@@ -32,9 +33,16 @@
 #include "i915_active.h"
 #include "i915_reset.h"
 
+struct execute_cb {
+	struct list_head link;
+	struct irq_work work;
+	struct i915_sw_fence *fence;
+};
+
 static struct i915_global_request {
 	struct kmem_cache *slab_requests;
 	struct kmem_cache *slab_dependencies;
+	struct kmem_cache *slab_execute_cbs;
 } global;
 
 static const char *i915_fence_get_driver_name(struct dma_fence *fence)
@@ -325,6 +333,69 @@ void i915_request_retire_upto(struct i915_request *rq)
 	} while (tmp != rq);
 }
 
+static void irq_execute_cb(struct irq_work *wrk)
+{
+	struct execute_cb *cb = container_of(wrk, typeof(*cb), work);
+
+	i915_sw_fence_complete(cb->fence);
+	kmem_cache_free(global.slab_execute_cbs, cb);
+}
+
+static void __notify_execute_cb(struct i915_request *rq)
+{
+	struct execute_cb *cb;
+
+	lockdep_assert_held(&rq->lock);
+
+	if (list_empty(&rq->execute_cb))
+		return;
+
+	list_for_each_entry(cb, &rq->execute_cb, link)
+		irq_work_queue(&cb->work);
+
+	/*
+	 * XXX Rollback on __i915_request_unsubmit()
+	 *
+	 * In the future, perhaps when we have an active time-slicing scheduler,
+	 * it will be interesting to unsubmit parallel execution and remove
+	 * busywaits from the GPU until their master is restarted. This is
+	 * quite hairy, we have to carefully rollback the fence and do a
+	 * preempt-to-idle cycle on the target engine, all the while the
+	 * master execute_cb may refire.
+	 */
+	INIT_LIST_HEAD(&rq->execute_cb);
+}
+
+static int
+i915_request_await_execution(struct i915_request *rq,
+			     struct i915_request *signal,
+			     gfp_t gfp)
+{
+	struct execute_cb *cb;
+
+	if (i915_request_is_active(signal))
+		return 0;
+
+	cb = kmem_cache_alloc(global.slab_execute_cbs, gfp);
+	if (!cb)
+		return -ENOMEM;
+
+	cb->fence = &rq->submit;
+	i915_sw_fence_await(cb->fence);
+	init_irq_work(&cb->work, irq_execute_cb);
+
+	spin_lock_irq(&signal->lock);
+	if (i915_request_is_active(signal)) {
+		i915_sw_fence_complete(cb->fence);
+		kmem_cache_free(global.slab_execute_cbs, cb);
+	} else {
+		list_add_tail(&cb->link, &signal->execute_cb);
+	}
+	spin_unlock_irq(&signal->lock);
+
+	return 0;
+}
+
 static void move_to_timeline(struct i915_request *request,
 			     struct i915_timeline *timeline)
 {
@@ -361,6 +432,8 @@ void __i915_request_submit(struct i915_request *request)
 	    !i915_request_enable_breadcrumb(request))
 		intel_engine_queue_breadcrumbs(engine);
 
+	__notify_execute_cb(request);
+
 	spin_unlock(&request->lock);
 
 	engine->emit_fini_breadcrumb(request,
@@ -608,6 +681,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
 	}
 
 	INIT_LIST_HEAD(&rq->active_list);
+	INIT_LIST_HEAD(&rq->execute_cb);
 
 	tl = ce->ring->timeline;
 	ret = i915_timeline_get_seqno(tl, rq, &seqno);
@@ -696,6 +770,52 @@ err_unreserve:
 	return ERR_PTR(ret);
 }
 
+static int
+emit_semaphore_wait(struct i915_request *to,
+		    struct i915_request *from,
+		    gfp_t gfp)
+{
+	u32 hwsp_offset;
+	u32 *cs;
+	int err;
+
+	GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);
+	GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
+
+	/* We need to pin the signaler's HWSP until we are finished reading. */
+	err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
+	if (err)
+		return err;
+
+	/* Only submit our spinner after the signaler is running! */
+	err = i915_request_await_execution(to, from, gfp);
+	if (err)
+		return err;
+
+	cs = intel_ring_begin(to, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	/*
+	 * Using greater-than-or-equal here means we have to worry
+	 * about seqno wraparound. To side step that issue, we swap
+	 * the timeline HWSP upon wrapping, so that everyone listening
+	 * for the old (pre-wrap) values do not see the much smaller
+	 * (post-wrap) values than they were expecting (and so wait
+	 * forever).
+	 */
+	*cs++ = MI_SEMAPHORE_WAIT |
+		MI_SEMAPHORE_GLOBAL_GTT |
+		MI_SEMAPHORE_POLL |
+		MI_SEMAPHORE_SAD_GTE_SDD;
+	*cs++ = from->fence.seqno;
+	*cs++ = hwsp_offset;
+	*cs++ = 0;
+
+	intel_ring_advance(to, cs);
+	return 0;
+}
+
 static int
 i915_request_await_request(struct i915_request *to, struct i915_request *from)
 {
@@ -717,6 +837,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from)
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
 						       I915_FENCE_GFP);
+	} else if (intel_engine_has_semaphores(to->engine) &&
+		   to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) {
+		ret = emit_semaphore_wait(to, from, I915_FENCE_GFP);
 	} else {
 		ret = i915_sw_fence_await_dma_fence(&to->submit,
 						    &from->fence, 0,
@@ -1208,14 +1331,23 @@ int __init i915_global_request_init(void)
 	if (!global.slab_requests)
 		return -ENOMEM;
 
+	global.slab_execute_cbs = KMEM_CACHE(execute_cb,
+					     SLAB_HWCACHE_ALIGN |
+					     SLAB_RECLAIM_ACCOUNT |
+					     SLAB_TYPESAFE_BY_RCU);
+	if (!global.slab_execute_cbs)
+		goto err_requests;
+
 	global.slab_dependencies = KMEM_CACHE(i915_dependency,
 					      SLAB_HWCACHE_ALIGN |
 					      SLAB_RECLAIM_ACCOUNT);
 	if (!global.slab_dependencies)
-		goto err_requests;
+		goto err_execute_cbs;
 
 	return 0;
 
+err_execute_cbs:
+	kmem_cache_destroy(global.slab_execute_cbs);
 err_requests:
 	kmem_cache_destroy(global.slab_requests);
 	return -ENOMEM;
@@ -1224,11 +1356,13 @@ err_requests:
 void i915_global_request_shrink(void)
 {
 	kmem_cache_shrink(global.slab_dependencies);
+	kmem_cache_shrink(global.slab_execute_cbs);
 	kmem_cache_shrink(global.slab_requests);
 }
 
 void i915_global_request_exit(void)
 {
 	kmem_cache_destroy(global.slab_dependencies);
+	kmem_cache_destroy(global.slab_execute_cbs);
 	kmem_cache_destroy(global.slab_requests);
 }
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 09eaad06d2c6..4dd1dea1d1af 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -129,6 +129,7 @@ struct i915_request {
 	 */
 	struct i915_sw_fence submit;
 	wait_queue_entry_t submitq;
+	struct list_head execute_cb;
 
 	/*
 	 * A list of everyone we wait upon, and everyone who waits upon us.
@@ -343,10 +344,27 @@ static inline bool __i915_request_has_started(const struct i915_request *rq)
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
  *
- * Returns true if the request has been submitted to hardware, and the hardware
- * has advanced passed the end of the previous request and so should be either
- * currently processing the request (though it may be preempted and so
- * not necessarily the next request to complete) or have completed the request.
+ * If the timeline is not using initial breadcrumbs, a request is
+ * considered started if the previous request on its timeline (i.e.
+ * context) has been signaled.
+ *
+ * If the timeline is using semaphores, it will also be emitting an
+ * "initial breadcrumb" after the semaphores are complete and just before
+ * it began executing the user payload. A request can therefore be active
+ * on the HW and not yet started as it is still busywaiting on its
+ * dependencies (via HW semaphores).
+ *
+ * If the request has started, its dependencies will have been signaled
+ * (either by fences or by semaphores) and it will have begun processing
+ * the user payload.
+ *
+ * However, even if a request has started, it may have been preempted and
+ * so no longer active, or it may have already completed.
+ *
+ * See also i915_request_is_active().
+ *
+ * Returns true if the request has begun executing the user payload, or
+ * has completed:
  */
 static inline bool i915_request_started(const struct i915_request *rq)
 {
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 7c58b049ecb5..8d1400d378d7 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -192,7 +192,7 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,
 	__i915_sw_fence_notify(fence, FENCE_FREE);
 }
 
-static void i915_sw_fence_complete(struct i915_sw_fence *fence)
+void i915_sw_fence_complete(struct i915_sw_fence *fence)
 {
 	debug_fence_assert(fence);
 
@@ -202,7 +202,7 @@ static void i915_sw_fence_complete(struct i915_sw_fence *fence)
 	__i915_sw_fence_complete(fence, NULL);
 }
 
-static void i915_sw_fence_await(struct i915_sw_fence *fence)
+void i915_sw_fence_await(struct i915_sw_fence *fence)
 {
 	debug_fence_assert(fence);
 	WARN_ON(atomic_inc_return(&fence->pending) <= 1);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 0e055ea0179f..6dec9e1d1102 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -79,6 +79,9 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    unsigned long timeout,
 				    gfp_t gfp);
 
+void i915_sw_fence_await(struct i915_sw_fence *fence);
+void i915_sw_fence_complete(struct i915_sw_fence *fence);
+
 static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence)
 {
 	return atomic_read(&fence->pending) <= 0;
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index df8f88142f1d..4fc7e2ac6278 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -616,6 +616,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 	} map[] = {
 #define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
 		MAP(PREEMPTION, PREEMPTION),
+		MAP(SEMAPHORES, SEMAPHORES),
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h
index b96a31bc1080..a34ece53a771 100644
--- a/drivers/gpu/drm/i915/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/intel_gpu_commands.h
@@ -105,8 +105,13 @@
 #define MI_SEMAPHORE_SIGNAL	MI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)	((engine)<<15)
 #define MI_SEMAPHORE_WAIT	MI_INSTR(0x1c, 2) /* GEN8+ */
-#define   MI_SEMAPHORE_POLL		(1<<15)
-#define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
+#define   MI_SEMAPHORE_POLL		(1 << 15)
+#define   MI_SEMAPHORE_SAD_GT_SDD	(0 << 12)
+#define   MI_SEMAPHORE_SAD_GTE_SDD	(1 << 12)
+#define   MI_SEMAPHORE_SAD_LT_SDD	(2 << 12)
+#define   MI_SEMAPHORE_SAD_LTE_SDD	(3 << 12)
+#define   MI_SEMAPHORE_SAD_EQ_SDD	(4 << 12)
+#define   MI_SEMAPHORE_SAD_NEQ_SDD	(5 << 12)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define MI_STORE_DWORD_IMM_GEN4	MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3fd0c45a2920..6c9479acb433 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2330,6 +2330,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->park = NULL;
 	engine->unpark = NULL;
 
+	engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
 	if (engine->i915->preempt_context)
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b8ec7e40a59b..2e7264119ec4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -499,6 +499,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
 #define I915_ENGINE_SUPPORTS_STATS   BIT(1)
 #define I915_ENGINE_HAS_PREEMPTION   BIT(2)
+#define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 	unsigned int flags;
 
 	/*
@@ -576,6 +577,12 @@ intel_engine_has_preemption(const struct intel_engine_cs *engine)
 	return engine->flags & I915_ENGINE_HAS_PREEMPTION;
 }
 
+static inline bool
+intel_engine_has_semaphores(const struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_HAS_SEMAPHORES;
+}
+
 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);
 
 static inline bool __execlists_need_preempt(int prio, int last)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8304a7f1ec3f..b10eea3f6d24 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -479,6 +479,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+#define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
cgit v1.2.3


From d90c06d57027203f73021bb7ddb30b800d65c636 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 1 Mar 2019 14:03:47 +0000
Subject: drm/i915: Fix I915_EXEC_RING_MASK

This was supposed to be a mask of all known rings, but it is being used
by execbuffer to filter out invalid rings, and so is instead mapping high
unused values onto valid rings. Instead of a mask of all known rings,
we need it to be the mask of all possible rings.

Fixes: 549f7365820a ("drm/i915: Enable SandyBridge blitter ring")
Fixes: de1add360522 ("drm/i915: Decouple execbuf uAPI from internal implementation")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: <stable@vger.kernel.org> # v4.6+
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190301140404.26690-21-chris@chris-wilson.co.uk
---
 include/uapi/drm/i915_drm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b10eea3f6d24..1a60642c1d61 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -979,7 +979,7 @@ struct drm_i915_gem_execbuffer2 {
 	 * struct drm_i915_gem_exec_fence *fences.
 	 */
 	__u64 cliprects_ptr;
-#define I915_EXEC_RING_MASK              (7<<0)
+#define I915_EXEC_RING_MASK              (0x3f)
 #define I915_EXEC_DEFAULT                (0<<0)
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
-- 
cgit v1.2.3


From 1c7cf3d5e1c181caca75012b65252288c18a25f2 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 28 Feb 2019 20:38:16 -0800
Subject: wusb: Remove unnecessary static function ckhdid_printf

This static inline is unnecessary and can be removed
by using the vsprintf %ph extension.

This reduces overall object size by more than 2K.

Reported-by: Louis Taylor <louis@kragniz.eu>
Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Louis Taylor <louis@kragniz.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/wusbcore/cbaf.c       | 15 ++++-----------
 drivers/usb/wusbcore/dev-sysfs.c  |  5 ++---
 drivers/usb/wusbcore/devconnect.c |  2 +-
 drivers/usb/wusbcore/wusbhc.c     |  6 +-----
 include/linux/usb/wusb.h          | 16 ----------------
 5 files changed, 8 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c
index 222228c5c1e1..af77064c7456 100644
--- a/drivers/usb/wusbcore/cbaf.c
+++ b/drivers/usb/wusbcore/cbaf.c
@@ -302,10 +302,8 @@ static ssize_t cbaf_wusb_chid_show(struct device *dev,
 {
 	struct usb_interface *iface = to_usb_interface(dev);
 	struct cbaf *cbaf = usb_get_intfdata(iface);
-	char pr_chid[WUSB_CKHDID_STRSIZE];
 
-	ckhdid_printf(pr_chid, sizeof(pr_chid), &cbaf->chid);
-	return scnprintf(buf, PAGE_SIZE, "%s\n", pr_chid);
+	return sprintf(buf, "%16ph\n", cbaf->chid.data);
 }
 
 static ssize_t cbaf_wusb_chid_store(struct device *dev,
@@ -415,10 +413,8 @@ static ssize_t cbaf_wusb_cdid_show(struct device *dev,
 {
 	struct usb_interface *iface = to_usb_interface(dev);
 	struct cbaf *cbaf = usb_get_intfdata(iface);
-	char pr_cdid[WUSB_CKHDID_STRSIZE];
 
-	ckhdid_printf(pr_cdid, sizeof(pr_cdid), &cbaf->cdid);
-	return scnprintf(buf, PAGE_SIZE, "%s\n", pr_cdid);
+	return sprintf(buf, "%16ph\n", cbaf->cdid.data);
 }
 
 static ssize_t cbaf_wusb_cdid_store(struct device *dev,
@@ -503,7 +499,6 @@ static int cbaf_cc_upload(struct cbaf *cbaf)
 	int result;
 	struct device *dev = &cbaf->usb_iface->dev;
 	struct wusb_cbaf_cc_data *ccd;
-	char pr_cdid[WUSB_CKHDID_STRSIZE];
 
 	ccd =  cbaf->buffer;
 	*ccd = cbaf_cc_data_defaults;
@@ -513,10 +508,8 @@ static int cbaf_cc_upload(struct cbaf *cbaf)
 	ccd->BandGroups = cpu_to_le16(cbaf->host_band_groups);
 
 	dev_dbg(dev, "Trying to upload CC:\n");
-	ckhdid_printf(pr_cdid, sizeof(pr_cdid), &ccd->CHID);
-	dev_dbg(dev, "  CHID       %s\n", pr_cdid);
-	ckhdid_printf(pr_cdid, sizeof(pr_cdid), &ccd->CDID);
-	dev_dbg(dev, "  CDID       %s\n", pr_cdid);
+	dev_dbg(dev, "  CHID       %16ph\n", ccd->CHID.data);
+	dev_dbg(dev, "  CDID       %16ph\n", ccd->CDID.data);
 	dev_dbg(dev, "  Bandgroups 0x%04x\n", cbaf->host_band_groups);
 
 	result = usb_control_msg(
diff --git a/drivers/usb/wusbcore/dev-sysfs.c b/drivers/usb/wusbcore/dev-sysfs.c
index 85a1acf3a729..67b0a4c412b2 100644
--- a/drivers/usb/wusbcore/dev-sysfs.c
+++ b/drivers/usb/wusbcore/dev-sysfs.c
@@ -50,10 +50,9 @@ static ssize_t wusb_cdid_show(struct device *dev,
 	wusb_dev = wusb_dev_get_by_usb_dev(to_usb_device(dev));
 	if (wusb_dev == NULL)
 		return -ENODEV;
-	result = ckhdid_printf(buf, PAGE_SIZE, &wusb_dev->cdid);
-	strcat(buf, "\n");
+	result = sprintf(buf, "%16ph\n", wusb_dev->cdid.data);
 	wusb_dev_put(wusb_dev);
-	return result + 1;
+	return result;
 }
 static DEVICE_ATTR_RO(wusb_cdid);
 
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index fcb06aef2675..a93837d57d53 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -532,7 +532,7 @@ static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc,
 	}
 
 	dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr);
-	ckhdid_printf(pr_cdid, sizeof(pr_cdid), &dnc->CDID);
+	sprintf(pr_cdid, "%16ph", dnc->CDID.data);
 	dev_info(dev, "DN CONNECT: device %s @ %x (%s) wants to %s\n",
 		 pr_cdid,
 		 wusb_dn_connect_prev_dev_addr(dnc),
diff --git a/drivers/usb/wusbcore/wusbhc.c b/drivers/usb/wusbcore/wusbhc.c
index e5ba6140c1ba..d0b404d258e8 100644
--- a/drivers/usb/wusbcore/wusbhc.c
+++ b/drivers/usb/wusbcore/wusbhc.c
@@ -80,17 +80,13 @@ static ssize_t wusb_chid_show(struct device *dev,
 {
 	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
 	const struct wusb_ckhdid *chid;
-	ssize_t result = 0;
 
 	if (wusbhc->wuie_host_info != NULL)
 		chid = &wusbhc->wuie_host_info->CHID;
 	else
 		chid = &wusb_ckhdid_zero;
 
-	result += ckhdid_printf(buf, PAGE_SIZE, chid);
-	result += sprintf(buf + result, "\n");
-
-	return result;
+	return sprintf(buf, "%16ph\n", chid->data);
 }
 
 /*
diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h
index 9e4a3213f2c2..65adee629106 100644
--- a/include/linux/usb/wusb.h
+++ b/include/linux/usb/wusb.h
@@ -236,22 +236,6 @@ enum {
 	WUSB_TRUST_TIMEOUT_MS = 4000,	/* [WUSB] section 4.15.1 */
 };
 
-static inline size_t ckhdid_printf(char *pr_ckhdid, size_t size,
-				   const struct wusb_ckhdid *ckhdid)
-{
-	return scnprintf(pr_ckhdid, size,
-			 "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx "
-			 "%02hx %02hx %02hx %02hx %02hx %02hx %02hx %02hx",
-			 ckhdid->data[0],  ckhdid->data[1],
-			 ckhdid->data[2],  ckhdid->data[3],
-			 ckhdid->data[4],  ckhdid->data[5],
-			 ckhdid->data[6],  ckhdid->data[7],
-			 ckhdid->data[8],  ckhdid->data[9],
-			 ckhdid->data[10], ckhdid->data[11],
-			 ckhdid->data[12], ckhdid->data[13],
-			 ckhdid->data[14], ckhdid->data[15]);
-}
-
 /*
  * WUSB Crypto stuff (WUSB1.0[6])
  */
-- 
cgit v1.2.3


From 724b509ca02367dbd5f5f90b0c8546280c5abc72 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Thu, 21 Feb 2019 18:24:48 +0200
Subject: net/mlx5: Add multipath mode

In order to offload ecmp-on-host scheme where next-hop routes are used,
we will make use of HW LAG. Add accessor function to let upper layers
in the driver to realize if the lag acts in multi-path mode.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lag.h    |  4 +++-
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 23 +++++++++++++++++++++++
 include/linux/mlx5/driver.h                      |  1 +
 4 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 17f1a8b28c0a..1a16f6d73cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -30,7 +30,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o
 
 #
 # Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
index 58f93d411ad5..f8bea6ed4285 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h
@@ -9,9 +9,11 @@
 enum {
 	MLX5_LAG_FLAG_ROCE   = 1 << 0,
 	MLX5_LAG_FLAG_SRIOV  = 1 << 1,
+	MLX5_LAG_FLAG_MULTIPATH = 1 << 2,
 };
 
-#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV)
+#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\
+			     MLX5_LAG_FLAG_MULTIPATH)
 
 struct lag_func {
 	struct mlx5_core_dev *dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
new file mode 100644
index 000000000000..2d2861cd4e02
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include "lag.h"
+#include "mlx5_core.h"
+#include "eswitch.h"
+
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+}
+
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	bool res;
+
+	ldev = mlx5_lag_dev_get(dev);
+	res  = ldev && __mlx5_lag_is_multipath(ldev);
+
+	return res;
+}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c2de50f02b33..ee109b3fbfb8 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1041,6 +1041,7 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3


From 6997b1c9cace95c0e67de620a94ab6ba88d044fe Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Thu, 21 Feb 2019 16:29:27 +0200
Subject: net/mlx5: Emit port affinity event for multipath offloads

Under multipath offload scheme, as part of handling fib events, emit
mlx5 port affinity event on the enabled ports which will be handled by
the tc offloads code.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 11 +++++++++++
 include/linux/mlx5/driver.h                      |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index 5680beba8c07..5633f8572800 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -6,6 +6,7 @@
 #include "lag_mp.h"
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "lib/mlx5.h"
 
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
 {
@@ -73,6 +74,16 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, int port)
 		return;
 	}
 
+	if (tracker.netdev_state[0].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[0].dev->priv.events,
+					 MLX5_DEV_EVENT_PORT_AFFINITY,
+					 (void *)0);
+
+	if (tracker.netdev_state[1].tx_enabled)
+		mlx5_notifier_call_chain(ldev->pf[1].dev->priv.events,
+					 MLX5_DEV_EVENT_PORT_AFFINITY,
+					 (void *)0);
+
 	mlx5_modify_lag(ldev, &tracker);
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ee109b3fbfb8..5ffb5df1a2c2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -195,6 +195,7 @@ struct mlx5_rsc_debug {
 
 enum mlx5_dev_event {
 	MLX5_DEV_EVENT_SYS_ERROR = 128, /* 0 - 127 are FW events */
+	MLX5_DEV_EVENT_PORT_AFFINITY = 129,
 };
 
 enum mlx5_port_status {
-- 
cgit v1.2.3


From a79f194aa4879e9baad118c3f8bb2ca24dbef765 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 27 Feb 2019 15:37:36 -0500
Subject: NFSv4/flexfiles: Abort I/O early if the layout segment was
 invalidated

If a layout segment gets invalidated while a pNFS I/O operation
is queued for transmission, then we ideally want to abort
immediately. This is particularly the case when there is a large
number of I/O related RPCs queued in the RPC layer, and the layout
segment gets invalidated due to an ENOSPC error, or an EACCES (because
the client was fenced). We may end up forced to spam the MDS with a
lot of otherwise unnecessary LAYOUTERRORs after that I/O fails.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 17 +++++++++++++++++
 include/linux/sunrpc/sched.h           |  1 +
 net/sunrpc/xprt.c                      |  7 +++++++
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 244a03c22b31..a8e9bdd978e7 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1071,6 +1071,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 		break;
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		break;
+	case -EAGAIN:
+		return -NFS4ERR_RESET_TO_PNFS;
 	/* Invalidate Layout errors */
 	case -NFS4ERR_PNFS_NO_LAYOUT:
 	case -ESTALE:           /* mapped NFS4ERR_STALE */
@@ -1131,6 +1133,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
 	case -EBADHANDLE:
 	case -ELOOP:
 	case -ENOSPC:
+	case -EAGAIN:
 		break;
 	case -EJUKEBOX:
 		nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@@ -1369,6 +1372,16 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
 	ff_layout_read_prepare_common(task, hdr);
 }
 
+static void
+ff_layout_io_prepare_transmit(struct rpc_task *task,
+		void *data)
+{
+	struct nfs_pgio_header *hdr = data;
+
+	if (!pnfs_is_valid_lseg(hdr->lseg))
+		rpc_exit(task, -EAGAIN);
+}
+
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
@@ -1657,6 +1670,7 @@ static void ff_layout_commit_release(void *data)
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 	.rpc_call_prepare = ff_layout_read_prepare_v3,
+	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_read_call_done,
 	.rpc_count_stats = ff_layout_read_count_stats,
 	.rpc_release = ff_layout_read_release,
@@ -1664,6 +1678,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 	.rpc_call_prepare = ff_layout_read_prepare_v4,
+	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_read_call_done,
 	.rpc_count_stats = ff_layout_read_count_stats,
 	.rpc_release = ff_layout_read_release,
@@ -1671,6 +1686,7 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 	.rpc_call_prepare = ff_layout_write_prepare_v3,
+	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_write_call_done,
 	.rpc_count_stats = ff_layout_write_count_stats,
 	.rpc_release = ff_layout_write_release,
@@ -1678,6 +1694,7 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
 	.rpc_call_prepare = ff_layout_write_prepare_v4,
+	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_write_call_done,
 	.rpc_count_stats = ff_layout_write_count_stats,
 	.rpc_release = ff_layout_write_release,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 219aa3910a0c..52d41d0c1ae1 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -97,6 +97,7 @@ typedef void			(*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
 	void (*rpc_call_prepare)(struct rpc_task *, void *);
+	void (*rpc_call_prepare_transmit)(struct rpc_task *, void *);
 	void (*rpc_call_done)(struct rpc_task *, void *);
 	void (*rpc_count_stats)(struct rpc_task *, void *);
 	void (*rpc_release)(void *);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1cf4e379be7b..e096c5a725df 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1330,6 +1330,13 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 			status = -EBADMSG;
 			goto out_dequeue;
 		}
+		if (task->tk_ops->rpc_call_prepare_transmit) {
+			task->tk_ops->rpc_call_prepare_transmit(task,
+					task->tk_calldata);
+			status = task->tk_status;
+			if (status < 0)
+				goto out_dequeue;
+		}
 	}
 
 	/*
-- 
cgit v1.2.3


From 3eb86093ea400c58f444eac0debcf6c50d617418 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 8 Feb 2019 10:31:05 -0500
Subject: NFSv4.2: Add client support for the generic 'layouterror' RPC call

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs42.h            |   3 +
 fs/nfs/nfs42proc.c        | 164 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs42xdr.c         |  99 ++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c         |   3 +-
 fs/nfs/nfs4xdr.c          |   1 +
 include/linux/nfs4.h      |   1 +
 include/linux/nfs_fs_sb.h |   1 +
 include/linux/nfs_xdr.h   |  35 ++++++++++
 8 files changed, 306 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 19ec38f85ce0..901cca7542f9 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -20,5 +20,8 @@ loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 int nfs42_proc_layoutstats_generic(struct nfs_server *,
 				   struct nfs42_layoutstat_data *);
 int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
+int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
+			   const struct nfs42_layout_error *errors,
+			   size_t n);
 
 #endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index fed06fd9998d..ff6f85fb676b 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -672,6 +672,170 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
 	return 0;
 }
 
+static struct nfs42_layouterror_data *
+nfs42_alloc_layouterror_data(struct pnfs_layout_segment *lseg, gfp_t gfp_flags)
+{
+	struct nfs42_layouterror_data *data;
+	struct inode *inode = lseg->pls_layout->plh_inode;
+
+	data = kzalloc(sizeof(*data), gfp_flags);
+	if (data) {
+		data->args.inode = data->inode = nfs_igrab_and_active(inode);
+		if (data->inode) {
+			data->lseg = pnfs_get_lseg(lseg);
+			if (data->lseg)
+				return data;
+			nfs_iput_and_deactive(data->inode);
+		}
+		kfree(data);
+	}
+	return NULL;
+}
+
+static void
+nfs42_free_layouterror_data(struct nfs42_layouterror_data *data)
+{
+	pnfs_put_lseg(data->lseg);
+	nfs_iput_and_deactive(data->inode);
+	kfree(data);
+}
+
+static void
+nfs42_layouterror_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs42_layouterror_data *data = calldata;
+	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
+	unsigned i;
+
+	spin_lock(&inode->i_lock);
+	if (!pnfs_layout_is_valid(lo)) {
+		spin_unlock(&inode->i_lock);
+		rpc_exit(task, 0);
+		return;
+	}
+	for (i = 0; i < data->args.num_errors; i++)
+		nfs4_stateid_copy(&data->args.errors[i].stateid,
+				&lo->plh_stateid);
+	spin_unlock(&inode->i_lock);
+	nfs4_setup_sequence(server->nfs_client, &data->args.seq_args,
+			    &data->res.seq_res, task);
+}
+
+static void
+nfs42_layouterror_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs42_layouterror_data *data = calldata;
+	struct inode *inode = data->inode;
+	struct pnfs_layout_hdr *lo = data->lseg->pls_layout;
+
+	if (!nfs4_sequence_done(task, &data->res.seq_res))
+		return;
+
+	switch (task->tk_status) {
+	case 0:
+		break;
+	case -NFS4ERR_BADHANDLE:
+	case -ESTALE:
+		pnfs_destroy_layout(NFS_I(inode));
+		break;
+	case -NFS4ERR_EXPIRED:
+	case -NFS4ERR_ADMIN_REVOKED:
+	case -NFS4ERR_DELEG_REVOKED:
+	case -NFS4ERR_STALE_STATEID:
+	case -NFS4ERR_BAD_STATEID:
+		spin_lock(&inode->i_lock);
+		if (pnfs_layout_is_valid(lo) &&
+		    nfs4_stateid_match(&data->args.errors[0].stateid,
+					     &lo->plh_stateid)) {
+			LIST_HEAD(head);
+
+			/*
+			 * Mark the bad layout state as invalid, then retry
+			 * with the current stateid.
+			 */
+			pnfs_mark_layout_stateid_invalid(lo, &head);
+			spin_unlock(&inode->i_lock);
+			pnfs_free_lseg_list(&head);
+			nfs_commit_inode(inode, 0);
+		} else
+			spin_unlock(&inode->i_lock);
+		break;
+	case -NFS4ERR_OLD_STATEID:
+		spin_lock(&inode->i_lock);
+		if (pnfs_layout_is_valid(lo) &&
+		    nfs4_stateid_match_other(&data->args.errors[0].stateid,
+					&lo->plh_stateid)) {
+			/* Do we need to delay before resending? */
+			if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+						&data->args.errors[0].stateid))
+				rpc_delay(task, HZ);
+			rpc_restart_call_prepare(task);
+		}
+		spin_unlock(&inode->i_lock);
+		break;
+	case -ENOTSUPP:
+	case -EOPNOTSUPP:
+		NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTERROR;
+	}
+}
+
+static void
+nfs42_layouterror_release(void *calldata)
+{
+	struct nfs42_layouterror_data *data = calldata;
+
+	nfs42_free_layouterror_data(data);
+}
+
+static const struct rpc_call_ops nfs42_layouterror_ops = {
+	.rpc_call_prepare = nfs42_layouterror_prepare,
+	.rpc_call_done = nfs42_layouterror_done,
+	.rpc_release = nfs42_layouterror_release,
+};
+
+int nfs42_proc_layouterror(struct pnfs_layout_segment *lseg,
+		const struct nfs42_layout_error *errors, size_t n)
+{
+	struct inode *inode = lseg->pls_layout->plh_inode;
+	struct nfs42_layouterror_data *data;
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTERROR],
+	};
+	struct rpc_task_setup task_setup = {
+		.rpc_message = &msg,
+		.callback_ops = &nfs42_layouterror_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
+	unsigned int i;
+
+	if (!nfs_server_capable(inode, NFS_CAP_LAYOUTERROR))
+		return -EOPNOTSUPP;
+	if (n > NFS42_LAYOUTERROR_MAX)
+		return -EINVAL;
+	data = nfs42_alloc_layouterror_data(lseg, GFP_NOFS);
+	if (!data)
+		return -ENOMEM;
+	for (i = 0; i < n; i++) {
+		data->args.errors[i] = errors[i];
+		data->args.num_errors++;
+		data->res.num_errors++;
+	}
+	msg.rpc_argp = &data->args;
+	msg.rpc_resp = &data->res;
+	task_setup.callback_data = data;
+	task_setup.rpc_client = NFS_SERVER(inode)->client;
+	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0, 0);
+	task = rpc_run_task(&task_setup);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nfs42_proc_layouterror);
+
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
 		struct file *dst_f, struct nfs_lock_context *src_lock,
 		struct nfs_lock_context *dst_lock, loff_t src_offset,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 7d596e8a0941..aed865a84629 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -51,6 +51,15 @@
 					1 /* opaque devaddr4 length */ + \
 					XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
 #define decode_layoutstats_maxsz	(op_decode_hdr_maxsz)
+#define encode_device_error_maxsz	(XDR_QUADLEN(NFS4_DEVICEID4_SIZE) + \
+					1 /* status */ + 1 /* opnum */)
+#define encode_layouterror_maxsz	(op_decode_hdr_maxsz + \
+					2 /* offset */ + \
+					2 /* length */ + \
+					encode_stateid_maxsz + \
+					1 /* Array size */ + \
+					encode_device_error_maxsz)
+#define decode_layouterror_maxsz	(op_decode_hdr_maxsz)
 #define encode_clone_maxsz		(encode_stateid_maxsz + \
 					encode_stateid_maxsz + \
 					2 /* src offset */ + \
@@ -116,6 +125,16 @@
 					 decode_sequence_maxsz + \
 					 decode_putfh_maxsz + \
 					 PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
+#define NFS4_enc_layouterror_sz		(compound_encode_hdr_maxsz + \
+					 encode_sequence_maxsz + \
+					 encode_putfh_maxsz + \
+					 NFS42_LAYOUTERROR_MAX * \
+					 encode_layouterror_maxsz)
+#define NFS4_dec_layouterror_sz		(compound_decode_hdr_maxsz + \
+					 decode_sequence_maxsz + \
+					 decode_putfh_maxsz + \
+					 NFS42_LAYOUTERROR_MAX * \
+					 decode_layouterror_maxsz)
 #define NFS4_enc_clone_sz		(compound_encode_hdr_maxsz + \
 					 encode_sequence_maxsz + \
 					 encode_putfh_maxsz + \
@@ -233,6 +252,34 @@ static void encode_clone(struct xdr_stream *xdr,
 	xdr_encode_hyper(p, args->count);
 }
 
+static void encode_device_error(struct xdr_stream *xdr,
+				const struct nfs42_device_error *error)
+{
+	__be32 *p;
+
+	p = reserve_space(xdr, NFS4_DEVICEID4_SIZE + 2*4);
+	p = xdr_encode_opaque_fixed(p, error->dev_id.data,
+			NFS4_DEVICEID4_SIZE);
+	*p++ = cpu_to_be32(error->status);
+	*p = cpu_to_be32(error->opnum);
+}
+
+static void encode_layouterror(struct xdr_stream *xdr,
+			       const struct nfs42_layout_error *args,
+			       struct compound_hdr *hdr)
+{
+	__be32 *p;
+
+	encode_op_hdr(xdr, OP_LAYOUTERROR, decode_layouterror_maxsz, hdr);
+	p = reserve_space(xdr, 8 + 8);
+	p = xdr_encode_hyper(p, args->offset);
+	p = xdr_encode_hyper(p, args->length);
+	encode_nfs4_stateid(xdr, &args->stateid);
+	p = reserve_space(xdr, 4);
+	*p = cpu_to_be32(1);
+	encode_device_error(xdr, &args->errors[0]);
+}
+
 /*
  * Encode ALLOCATE request
  */
@@ -391,6 +438,27 @@ static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode LAYOUTERROR request
+ */
+static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req,
+				     struct xdr_stream *xdr,
+				     const void *data)
+{
+	const struct nfs42_layouterror_args *args = data;
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+	int i;
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, NFS_FH(args->inode), &hdr);
+	for (i = 0; i < args->num_errors; i++)
+		encode_layouterror(xdr, &args->errors[i], &hdr);
+	encode_nops(&hdr);
+}
+
 static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 {
 	return decode_op_hdr(xdr, OP_ALLOCATE);
@@ -494,6 +562,11 @@ static int decode_clone(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_CLONE);
 }
 
+static int decode_layouterror(struct xdr_stream *xdr)
+{
+	return decode_op_hdr(xdr, OP_LAYOUTERROR);
+}
+
 /*
  * Decode ALLOCATE request
  */
@@ -703,4 +776,30 @@ out:
 	return status;
 }
 
+/*
+ * Decode LAYOUTERROR request
+ */
+static int nfs4_xdr_dec_layouterror(struct rpc_rqst *rqstp,
+				    struct xdr_stream *xdr,
+				    void *data)
+{
+	struct nfs42_layouterror_res *res = data;
+	struct compound_hdr hdr;
+	int status, i;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+
+	for (i = 0; i < res->num_errors && status == 0; i++)
+		status = decode_layouterror(xdr);
+out:
+	res->rpc_status = status;
+	return status;
+}
+
 #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5b980246b035..73889ea7d196 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -9690,7 +9690,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK
 		| NFS_CAP_LAYOUTSTATS
-		| NFS_CAP_CLONE,
+		| NFS_CAP_CLONE
+		| NFS_CAP_LAYOUTERROR,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 6d9d5e2f6308..cfcabc33e24d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7572,6 +7572,7 @@ const struct rpc_procinfo nfs4_procedures[] = {
 	PROC42(COPY,		enc_copy,		dec_copy),
 	PROC42(OFFLOAD_CANCEL,	enc_offload_cancel,	dec_offload_cancel),
 	PROC(LOOKUPP,		enc_lookupp,		dec_lookupp),
+	PROC42(LAYOUTERROR,	enc_layouterror,	dec_layouterror),
 };
 
 static unsigned int nfs_version4_counts[ARRAY_SIZE(nfs4_procedures)];
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 1b06f0b28453..22494d170619 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -538,6 +538,7 @@ enum {
 	NFSPROC4_CLNT_OFFLOAD_CANCEL,
 
 	NFSPROC4_CLNT_LOOKUPP,
+	NFSPROC4_CLNT_LAYOUTERROR,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6aa8cc83c3b6..c827d31298cc 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -261,5 +261,6 @@ struct nfs_server {
 #define NFS_CAP_CLONE		(1U << 23)
 #define NFS_CAP_COPY		(1U << 24)
 #define NFS_CAP_OFFLOAD_CANCEL	(1U << 25)
+#define NFS_CAP_LAYOUTERROR	(1U << 26)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b4bd2bf5f585..9b8324ec08f3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -383,6 +383,41 @@ struct nfs42_layoutstat_data {
 	struct nfs42_layoutstat_res res;
 };
 
+struct nfs42_device_error {
+	struct nfs4_deviceid dev_id;
+	int status;
+	enum nfs_opnum4 opnum;
+};
+
+struct nfs42_layout_error {
+	__u64 offset;
+	__u64 length;
+	nfs4_stateid stateid;
+	struct nfs42_device_error errors[1];
+};
+
+#define NFS42_LAYOUTERROR_MAX 5
+
+struct nfs42_layouterror_args {
+	struct nfs4_sequence_args seq_args;
+	struct inode *inode;
+	unsigned int num_errors;
+	struct nfs42_layout_error errors[NFS42_LAYOUTERROR_MAX];
+};
+
+struct nfs42_layouterror_res {
+	struct nfs4_sequence_res seq_res;
+	unsigned int num_errors;
+	int rpc_status;
+};
+
+struct nfs42_layouterror_data {
+	struct nfs42_layouterror_args args;
+	struct nfs42_layouterror_res res;
+	struct inode *inode;
+	struct pnfs_layout_segment *lseg;
+};
+
 struct nfs42_clone_args {
 	struct nfs4_sequence_args	seq_args;
 	struct nfs_fh			*src_fh;
-- 
cgit v1.2.3


From 91cf8eceffc131d41f098351e1b290bdaab45ea7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Feb 2019 16:29:16 -0800
Subject: switchdev: Remove unused transaction item queue

There are no more in tree users of the
switchdev_trans_item_{dequeue,enqueue} or switchdev_trans_item structure
in the kernel since commit 00fc0c51e35b ("rocker: Change world_ops API
and implementation to be switchdev independant").

Remove this unused code and update the documentation accordingly since.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt |  19 -------
 include/net/switchdev.h                |  12 ----
 net/switchdev/switchdev.c              | 100 +--------------------------------
 3 files changed, 2 insertions(+), 129 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 633dd1fd81b7..86174ce8cd13 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -371,22 +371,3 @@ The driver can monitor for updates to arp_tbl using the netevent notifier
 NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
 for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
 to know when arp_tbl neighbor entries are purged from the port.
-
-Transaction item queue
-^^^^^^^^^^^^^^^^^^^^^^
-
-For switchdev ops attr_set and obj_add, there is a 2 phase transaction model
-used. First phase is to "prepare" anything needed, including various checks,
-memory allocation, etc. The goal is to handle the stuff that is not unlikely
-to fail here. The second phase is to "commit" the actual changes.
-
-Switchdev provides an infrastructure for sharing items (for example memory
-allocations) between the two phases.
-
-The object created by a driver in "prepare" phase and it is queued up by:
-switchdev_trans_item_enqueue()
-During the "commit" phase, the driver gets the object by:
-switchdev_trans_item_dequeue()
-
-If a transaction is aborted during "prepare" phase, switchdev code will handle
-cleanup of the queued-up objects.
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index e4f751e19ecf..0ebd67ae7012 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -20,14 +20,7 @@
 #define SWITCHDEV_F_SKIP_EOPNOTSUPP	BIT(1)
 #define SWITCHDEV_F_DEFER		BIT(2)
 
-struct switchdev_trans_item {
-	struct list_head list;
-	void *data;
-	void (*destructor)(const void *data);
-};
-
 struct switchdev_trans {
-	struct list_head item_list;
 	bool ph_prepare;
 };
 
@@ -105,11 +98,6 @@ struct switchdev_obj_port_mdb {
 #define SWITCHDEV_OBJ_PORT_MDB(OBJ) \
 	container_of((OBJ), struct switchdev_obj_port_mdb, obj)
 
-void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
-				  void *data, void (*destructor)(void const *),
-				  struct switchdev_trans_item *tritem);
-void *switchdev_trans_item_dequeue(struct switchdev_trans *trans);
-
 typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 
 enum switchdev_notifier_type {
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index d81cfcee9ad9..90ba4a1f0a6d 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -23,78 +23,6 @@
 #include <linux/rtnetlink.h>
 #include <net/switchdev.h>
 
-/**
- *	switchdev_trans_item_enqueue - Enqueue data item to transaction queue
- *
- *	@trans: transaction
- *	@data: pointer to data being queued
- *	@destructor: data destructor
- *	@tritem: transaction item being queued
- *
- *	Enqeueue data item to transaction queue. tritem is typically placed in
- *	cointainter pointed at by data pointer. Destructor is called on
- *	transaction abort and after successful commit phase in case
- *	the caller did not dequeue the item before.
- */
-void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
-				  void *data, void (*destructor)(void const *),
-				  struct switchdev_trans_item *tritem)
-{
-	tritem->data = data;
-	tritem->destructor = destructor;
-	list_add_tail(&tritem->list, &trans->item_list);
-}
-EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
-
-static struct switchdev_trans_item *
-__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	if (list_empty(&trans->item_list))
-		return NULL;
-	tritem = list_first_entry(&trans->item_list,
-				  struct switchdev_trans_item, list);
-	list_del(&tritem->list);
-	return tritem;
-}
-
-/**
- *	switchdev_trans_item_dequeue - Dequeue data item from transaction queue
- *
- *	@trans: transaction
- */
-void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	tritem = __switchdev_trans_item_dequeue(trans);
-	BUG_ON(!tritem);
-	return tritem->data;
-}
-EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
-
-static void switchdev_trans_init(struct switchdev_trans *trans)
-{
-	INIT_LIST_HEAD(&trans->item_list);
-}
-
-static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
-{
-	struct switchdev_trans_item *tritem;
-
-	while ((tritem = __switchdev_trans_item_dequeue(trans)))
-		tritem->destructor(tritem->data);
-}
-
-static void switchdev_trans_items_warn_destroy(struct net_device *dev,
-					       struct switchdev_trans *trans)
-{
-	WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
-	     dev->name);
-	switchdev_trans_items_destroy(trans);
-}
-
 static LIST_HEAD(deferred);
 static DEFINE_SPINLOCK(deferred_lock);
 
@@ -208,8 +136,6 @@ static int switchdev_port_attr_set_now(struct net_device *dev,
 	struct switchdev_trans trans;
 	int err;
 
-	switchdev_trans_init(&trans);
-
 	/* Phase I: prepare for attr set. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
 	 * such as lack of resources or support.  The driver/device
@@ -220,17 +146,8 @@ static int switchdev_port_attr_set_now(struct net_device *dev,
 	trans.ph_prepare = true;
 	err = switchdev_port_attr_notify(SWITCHDEV_PORT_ATTR_SET, dev, attr,
 					 &trans);
-	if (err) {
-		/* Prepare phase failed: abort the transaction.  Any
-		 * resources reserved in the prepare phase are
-		 * released.
-		 */
-
-		if (err != -EOPNOTSUPP)
-			switchdev_trans_items_destroy(&trans);
-
+	if (err)
 		return err;
-	}
 
 	/* Phase II: commit attr set.  This cannot fail as a fault
 	 * of driver/device.  If it does, it's a bug in the driver/device
@@ -242,7 +159,6 @@ static int switchdev_port_attr_set_now(struct net_device *dev,
 					 &trans);
 	WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
 	     dev->name, attr->id);
-	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
@@ -341,8 +257,6 @@ static int switchdev_port_obj_add_now(struct net_device *dev,
 
 	ASSERT_RTNL();
 
-	switchdev_trans_init(&trans);
-
 	/* Phase I: prepare for obj add. Driver/device should fail
 	 * here if there are going to be issues in the commit phase,
 	 * such as lack of resources or support.  The driver/device
@@ -353,17 +267,8 @@ static int switchdev_port_obj_add_now(struct net_device *dev,
 	trans.ph_prepare = true;
 	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
 					dev, obj, &trans, extack);
-	if (err) {
-		/* Prepare phase failed: abort the transaction.  Any
-		 * resources reserved in the prepare phase are
-		 * released.
-		 */
-
-		if (err != -EOPNOTSUPP)
-			switchdev_trans_items_destroy(&trans);
-
+	if (err)
 		return err;
-	}
 
 	/* Phase II: commit obj add.  This cannot fail as a fault
 	 * of driver/device.  If it does, it's a bug in the driver/device
@@ -374,7 +279,6 @@ static int switchdev_port_obj_add_now(struct net_device *dev,
 	err = switchdev_port_obj_notify(SWITCHDEV_PORT_OBJ_ADD,
 					dev, obj, &trans, extack);
 	WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
-	switchdev_trans_items_warn_destroy(dev, &trans);
 
 	return err;
 }
-- 
cgit v1.2.3


From 9f03161a1bd8cd9ccf11533e52326718c656036e Mon Sep 17 00:00:00 2001
From: Michael Shych <michaelsh@mellanox.com>
Date: Wed, 20 Feb 2019 09:34:22 +0000
Subject: platform_data/mlxreg: additions for Mellanox watchdog driver.

There are two new fields added to mlxreg core structure:
features - supported features of device and
identity - device identity name.
Add new defines for watchdog features.

Signed-off-by: Michael Shych <michaelsh@mellanox.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 include/linux/platform_data/mlxreg.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 19f5cb618c55..31f7c25a44da 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -35,6 +35,19 @@
 #define __LINUX_PLATFORM_DATA_MLXREG_H
 
 #define MLXREG_CORE_LABEL_MAX_SIZE	32
+#define MLXREG_CORE_WD_FEATURE_NOWAYOUT		BIT(0)
+#define MLXREG_CORE_WD_FEATURE_START_AT_BOOT	BIT(1)
+
+/**
+ * enum mlxreg_wdt_type - type of HW watchdog
+ *
+ * TYPE1 HW watchdog implementation exist in old systems.
+ * All new systems have TYPE2 HW watchdog.
+ */
+enum mlxreg_wdt_type {
+	MLX_WDT_TYPE1,
+	MLX_WDT_TYPE2,
+};
 
 /**
  * struct mlxreg_hotplug_device - I2C device data:
@@ -110,11 +123,17 @@ struct mlxreg_core_item {
  * @led_data: led private data;
  * @regmap: register map of parent device;
  * @counter: number of led instances;
+ * @features: supported features of device;
+ * @version: implementation version;
+ * @identity: device identity name;
  */
 struct mlxreg_core_platform_data {
 	struct mlxreg_core_data *data;
 	void *regmap;
 	int counter;
+	u32 features;
+	u32 version;
+	char identity[MLXREG_CORE_LABEL_MAX_SIZE];
 };
 
 /**
-- 
cgit v1.2.3


From f7c917ba11a67632a8452ea99fe132f626a7a2cc Mon Sep 17 00:00:00 2001
From: brakmo <brakmo@fb.com>
Date: Fri, 1 Mar 2019 12:38:46 -0800
Subject: bpf: add bpf helper bpf_skb_ecn_set_ce

This patch adds a new bpf helper BPF_FUNC_skb_ecn_set_ce
"int bpf_skb_ecn_set_ce(struct sk_buff *skb)". It is added to
BPF_PROG_TYPE_CGROUP_SKB typed bpf_prog which currently can
be attached to the ingress and egress path. The helper is needed
because his type of bpf_prog cannot modify the skb directly.

This helper is used to set the ECN field of ECN capable IP packets to ce
(congestion encountered) in the IPv6 or IPv4 header of the skb. It can be
used by a bpf_prog to manage egress or ingress network bandwdith limit
per cgroupv2 by inducing an ECN response in the TCP sender.
This works best when using DCTCP.

Signed-off-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 10 +++++++++-
 net/core/filter.c        | 28 ++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 2e308e90ffea..3c38ac9a92a7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2359,6 +2359,13 @@ union bpf_attr {
  *	Return
  *		A **struct bpf_tcp_sock** pointer on success, or NULL in
  *		case of failure.
+ *
+ * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
+ *     Description
+ *             Sets ECN of IP header to ce (congestion encountered) if
+ *             current value is ect (ECN capable). Works with IPv6 and IPv4.
+ *     Return
+ *             1 if set, 0 if not set.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2457,7 +2464,8 @@ union bpf_attr {
 	FN(spin_lock),			\
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
-	FN(tcp_sock),
+	FN(tcp_sock),			\
+	FN(skb_ecn_set_ce),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 85749f6ec789..558ca72f2254 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5426,6 +5426,32 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
 };
 
+BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
+{
+	unsigned int iphdr_len;
+
+	if (skb->protocol == cpu_to_be16(ETH_P_IP))
+		iphdr_len = sizeof(struct iphdr);
+	else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
+		iphdr_len = sizeof(struct ipv6hdr);
+	else
+		return 0;
+
+	if (skb_headlen(skb) < iphdr_len)
+		return 0;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
+		return 0;
+
+	return INET_ECN_set_ce(skb);
+}
+
+static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
+	.func           = bpf_skb_ecn_set_ce,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+};
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -5585,6 +5611,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_INET
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_skb_ecn_set_ce:
+		return &bpf_skb_ecn_set_ce_proto;
 #endif
 	default:
 		return sk_filter_func_proto(func_id, prog);
-- 
cgit v1.2.3


From 46b1c18f9deb326a7e18348e668e4c7ab7c7458b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 28 Feb 2019 12:55:43 -0800
Subject: net: sched: put back q.qlen into a single location

In the series fc8b81a5981f ("Merge branch 'lockless-qdisc-series'")
John made the assumption that the data path had no need to read
the qdisc qlen (number of packets in the qdisc).

It is true when pfifo_fast is used as the root qdisc, or as direct MQ/MQPRIO
children.

But pfifo_fast can be used as leaf in class full qdiscs, and existing
logic needs to access the child qlen in an efficient way.

HTB breaks badly, since it uses cl->leaf.q->q.qlen in :
  htb_activate() -> WARN_ON()
  htb_dequeue_tree() to decide if a class can be htb_deactivated
  when it has no more packets.

HFSC, DRR, CBQ, QFQ have similar issues, and some calls to
qdisc_tree_reduce_backlog() also read q.qlen directly.

Using qdisc_qlen_sum() (which iterates over all possible cpus)
in the data path is a non starter.

It seems we have to put back qlen in a central location,
at least for stable kernels.

For all qdisc but pfifo_fast, qlen is guarded by the qdisc lock,
so the existing q.qlen{++|--} are correct.

For 'lockless' qdisc (pfifo_fast so far), we need to use atomic_{inc|dec}()
because the spinlock might be not held (for example from
pfifo_fast_enqueue() and pfifo_fast_dequeue())

This patch adds atomic_qlen (in the same location than qlen)
and renames the following helpers, since we want to express
they can be used without qdisc lock, and that qlen is no longer percpu.

- qdisc_qstats_cpu_qlen_dec -> qdisc_qstats_atomic_qlen_dec()
- qdisc_qstats_cpu_qlen_inc -> qdisc_qstats_atomic_qlen_inc()

Later (net-next) we might revert this patch by tracking all these
qlen uses and replace them by a more efficient method (not having
to access a precise qlen, but an empty/non_empty status that might
be less expensive to maintain/track).

Another possibility is to have a legacy pfifo_fast version that would
be used when used a a child qdisc, since the parent qdisc needs
a spinlock anyway. But then, future lockless qdiscs would also
have the same problem.

Fixes: 7e66016f2c65 ("net: sched: helpers to sum qlen and qlen for per cpu logic")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 31 +++++++++++++------------------
 net/core/gen_stats.c      |  2 --
 net/sched/sch_generic.c   | 13 ++++++-------
 3 files changed, 19 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9481f2c142e2..e7eb4aa6ccc9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -51,7 +51,10 @@ struct qdisc_size_table {
 struct qdisc_skb_head {
 	struct sk_buff	*head;
 	struct sk_buff	*tail;
-	__u32		qlen;
+	union {
+		u32		qlen;
+		atomic_t	atomic_qlen;
+	};
 	spinlock_t	lock;
 };
 
@@ -408,27 +411,19 @@ static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 	BUILD_BUG_ON(sizeof(qcb->data) < sz);
 }
 
-static inline int qdisc_qlen_cpu(const struct Qdisc *q)
-{
-	return this_cpu_ptr(q->cpu_qstats)->qlen;
-}
-
 static inline int qdisc_qlen(const struct Qdisc *q)
 {
 	return q->q.qlen;
 }
 
-static inline int qdisc_qlen_sum(const struct Qdisc *q)
+static inline u32 qdisc_qlen_sum(const struct Qdisc *q)
 {
-	__u32 qlen = q->qstats.qlen;
-	int i;
+	u32 qlen = q->qstats.qlen;
 
-	if (q->flags & TCQ_F_NOLOCK) {
-		for_each_possible_cpu(i)
-			qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen;
-	} else {
+	if (q->flags & TCQ_F_NOLOCK)
+		qlen += atomic_read(&q->q.atomic_qlen);
+	else
 		qlen += q->q.qlen;
-	}
 
 	return qlen;
 }
@@ -825,14 +820,14 @@ static inline void qdisc_qstats_cpu_backlog_inc(struct Qdisc *sch,
 	this_cpu_add(sch->cpu_qstats->backlog, qdisc_pkt_len(skb));
 }
 
-static inline void qdisc_qstats_cpu_qlen_inc(struct Qdisc *sch)
+static inline void qdisc_qstats_atomic_qlen_inc(struct Qdisc *sch)
 {
-	this_cpu_inc(sch->cpu_qstats->qlen);
+	atomic_inc(&sch->q.atomic_qlen);
 }
 
-static inline void qdisc_qstats_cpu_qlen_dec(struct Qdisc *sch)
+static inline void qdisc_qstats_atomic_qlen_dec(struct Qdisc *sch)
 {
-	this_cpu_dec(sch->cpu_qstats->qlen);
+	atomic_dec(&sch->q.atomic_qlen);
 }
 
 static inline void qdisc_qstats_cpu_requeues_inc(struct Qdisc *sch)
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 9bf1b9ad1780..ac679f74ba47 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -291,7 +291,6 @@ __gnet_stats_copy_queue_cpu(struct gnet_stats_queue *qstats,
 	for_each_possible_cpu(i) {
 		const struct gnet_stats_queue *qcpu = per_cpu_ptr(q, i);
 
-		qstats->qlen = 0;
 		qstats->backlog += qcpu->backlog;
 		qstats->drops += qcpu->drops;
 		qstats->requeues += qcpu->requeues;
@@ -307,7 +306,6 @@ void __gnet_stats_copy_queue(struct gnet_stats_queue *qstats,
 	if (cpu) {
 		__gnet_stats_copy_queue_cpu(qstats, cpu);
 	} else {
-		qstats->qlen = q->qlen;
 		qstats->backlog = q->backlog;
 		qstats->drops = q->drops;
 		qstats->requeues = q->requeues;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 968a85fe4d4a..de31f2f3b973 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
 			skb = __skb_dequeue(&q->skb_bad_txq);
 			if (qdisc_is_percpu_stats(q)) {
 				qdisc_qstats_cpu_backlog_dec(q, skb);
-				qdisc_qstats_cpu_qlen_dec(q);
+				qdisc_qstats_atomic_qlen_dec(q);
 			} else {
 				qdisc_qstats_backlog_dec(q, skb);
 				q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
 
 	if (qdisc_is_percpu_stats(q)) {
 		qdisc_qstats_cpu_backlog_inc(q, skb);
-		qdisc_qstats_cpu_qlen_inc(q);
+		qdisc_qstats_atomic_qlen_inc(q);
 	} else {
 		qdisc_qstats_backlog_inc(q, skb);
 		q->q.qlen++;
@@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
 
 		qdisc_qstats_cpu_requeues_inc(q);
 		qdisc_qstats_cpu_backlog_inc(q, skb);
-		qdisc_qstats_cpu_qlen_inc(q);
+		qdisc_qstats_atomic_qlen_inc(q);
 
 		skb = next;
 	}
@@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
 			skb = __skb_dequeue(&q->gso_skb);
 			if (qdisc_is_percpu_stats(q)) {
 				qdisc_qstats_cpu_backlog_dec(q, skb);
-				qdisc_qstats_cpu_qlen_dec(q);
+				qdisc_qstats_atomic_qlen_dec(q);
 			} else {
 				qdisc_qstats_backlog_dec(q, skb);
 				q->q.qlen--;
@@ -645,7 +645,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 	if (unlikely(err))
 		return qdisc_drop_cpu(skb, qdisc, to_free);
 
-	qdisc_qstats_cpu_qlen_inc(qdisc);
+	qdisc_qstats_atomic_qlen_inc(qdisc);
 	/* Note: skb can not be used after skb_array_produce(),
 	 * so we better not use qdisc_qstats_cpu_backlog_inc()
 	 */
@@ -670,7 +670,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 	if (likely(skb)) {
 		qdisc_qstats_cpu_backlog_dec(qdisc, skb);
 		qdisc_bstats_cpu_update(qdisc, skb);
-		qdisc_qstats_cpu_qlen_dec(qdisc);
+		qdisc_qstats_atomic_qlen_dec(qdisc);
 	}
 
 	return skb;
@@ -714,7 +714,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
 		struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
 
 		q->backlog = 0;
-		q->qlen = 0;
 	}
 }
 
-- 
cgit v1.2.3


From 6377f787aeb945cae7abbb6474798de129e1f3ac Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 1 Mar 2019 10:57:57 +0800
Subject: appletalk: Fix use-after-free in atalk_proc_exit

KASAN report this:

BUG: KASAN: use-after-free in pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71
Read of size 8 at addr ffff8881f41fe5b0 by task syz-executor.0/2806

CPU: 0 PID: 2806 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0xfa/0x1ce lib/dump_stack.c:113
 print_address_description+0x65/0x270 mm/kasan/report.c:187
 kasan_report+0x149/0x18d mm/kasan/report.c:317
 pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71
 remove_proc_entry+0xe8/0x420 fs/proc/generic.c:667
 atalk_proc_exit+0x18/0x820 [appletalk]
 atalk_exit+0xf/0x5a [appletalk]
 __do_sys_delete_module kernel/module.c:1018 [inline]
 __se_sys_delete_module kernel/module.c:961 [inline]
 __x64_sys_delete_module+0x3dc/0x5e0 kernel/module.c:961
 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x462e99
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fb2de6b9c58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0
RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000200001c0
RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb2de6ba6bc
R13: 00000000004bccaa R14: 00000000006f6bc8 R15: 00000000ffffffff

Allocated by task 2806:
 set_track mm/kasan/common.c:85 [inline]
 __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496
 slab_post_alloc_hook mm/slab.h:444 [inline]
 slab_alloc_node mm/slub.c:2739 [inline]
 slab_alloc mm/slub.c:2747 [inline]
 kmem_cache_alloc+0xcf/0x250 mm/slub.c:2752
 kmem_cache_zalloc include/linux/slab.h:730 [inline]
 __proc_create+0x30f/0xa20 fs/proc/generic.c:408
 proc_mkdir_data+0x47/0x190 fs/proc/generic.c:469
 0xffffffffc10c01bb
 0xffffffffc10c0166
 do_one_initcall+0xfa/0x5ca init/main.c:887
 do_init_module+0x204/0x5f6 kernel/module.c:3460
 load_module+0x66b2/0x8570 kernel/module.c:3808
 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 2806:
 set_track mm/kasan/common.c:85 [inline]
 __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458
 slab_free_hook mm/slub.c:1409 [inline]
 slab_free_freelist_hook mm/slub.c:1436 [inline]
 slab_free mm/slub.c:2986 [inline]
 kmem_cache_free+0xa6/0x2a0 mm/slub.c:3002
 pde_put+0x6e/0x80 fs/proc/generic.c:647
 remove_proc_entry+0x1d3/0x420 fs/proc/generic.c:684
 0xffffffffc10c031c
 0xffffffffc10c0166
 do_one_initcall+0xfa/0x5ca init/main.c:887
 do_init_module+0x204/0x5f6 kernel/module.c:3460
 load_module+0x66b2/0x8570 kernel/module.c:3808
 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902
 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff8881f41fe500
 which belongs to the cache proc_dir_entry of size 256
The buggy address is located 176 bytes inside of
 256-byte region [ffff8881f41fe500, ffff8881f41fe600)
The buggy address belongs to the page:
page:ffffea0007d07f80 count:1 mapcount:0 mapping:ffff8881f6e69a00 index:0x0
flags: 0x2fffc0000000200(slab)
raw: 02fffc0000000200 dead000000000100 dead000000000200 ffff8881f6e69a00
raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8881f41fe480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
 ffff8881f41fe500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
>ffff8881f41fe580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                     ^
 ffff8881f41fe600: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
 ffff8881f41fe680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

It should check the return value of atalk_proc_init fails,
otherwise atalk_exit will trgger use-after-free in pde_subdir_find
while unload the module.This patch fix error cleanup path of atalk_init

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h            |  2 +-
 net/appletalk/atalk_proc.c       |  2 +-
 net/appletalk/ddp.c              | 37 +++++++++++++++++++++++++++++++------
 net/appletalk/sysctl_net_atalk.c |  5 ++++-
 4 files changed, 37 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 23f805562f4e..5a90f28d5ff2 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -158,7 +158,7 @@ extern int sysctl_aarp_retransmit_limit;
 extern int sysctl_aarp_resolve_time;
 
 #ifdef CONFIG_SYSCTL
-extern void atalk_register_sysctl(void);
+extern int atalk_register_sysctl(void);
 extern void atalk_unregister_sysctl(void);
 #else
 #define atalk_register_sysctl()		do { } while(0)
diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c
index bd8734ef80b8..77f203f1febc 100644
--- a/net/appletalk/atalk_proc.c
+++ b/net/appletalk/atalk_proc.c
@@ -237,7 +237,7 @@ out:
 	return -ENOMEM;
 }
 
-void __exit atalk_proc_exit(void)
+void atalk_proc_exit(void)
 {
 	remove_proc_subtree("atalk", init_net.proc_net);
 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5abe946..795fbc6c06aa 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1910,12 +1910,16 @@ static const char atalk_err_snap[] __initconst =
 /* Called by proto.c on kernel start up */
 static int __init atalk_init(void)
 {
-	int rc = proto_register(&ddp_proto, 0);
+	int rc;
 
-	if (rc != 0)
+	rc = proto_register(&ddp_proto, 0);
+	if (rc)
 		goto out;
 
-	(void)sock_register(&atalk_family_ops);
+	rc = sock_register(&atalk_family_ops);
+	if (rc)
+		goto out_proto;
+
 	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
 	if (!ddp_dl)
 		printk(atalk_err_snap);
@@ -1923,12 +1927,33 @@ static int __init atalk_init(void)
 	dev_add_pack(&ltalk_packet_type);
 	dev_add_pack(&ppptalk_packet_type);
 
-	register_netdevice_notifier(&ddp_notifier);
+	rc = register_netdevice_notifier(&ddp_notifier);
+	if (rc)
+		goto out_sock;
+
 	aarp_proto_init();
-	atalk_proc_init();
-	atalk_register_sysctl();
+	rc = atalk_proc_init();
+	if (rc)
+		goto out_aarp;
+
+	rc = atalk_register_sysctl();
+	if (rc)
+		goto out_proc;
 out:
 	return rc;
+out_proc:
+	atalk_proc_exit();
+out_aarp:
+	aarp_cleanup_module();
+	unregister_netdevice_notifier(&ddp_notifier);
+out_sock:
+	dev_remove_pack(&ppptalk_packet_type);
+	dev_remove_pack(&ltalk_packet_type);
+	unregister_snap_client(ddp_dl);
+	sock_unregister(PF_APPLETALK);
+out_proto:
+	proto_unregister(&ddp_proto);
+	goto out;
 }
 module_init(atalk_init);
 
diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c
index c744a853fa5f..d945b7c0176d 100644
--- a/net/appletalk/sysctl_net_atalk.c
+++ b/net/appletalk/sysctl_net_atalk.c
@@ -45,9 +45,12 @@ static struct ctl_table atalk_table[] = {
 
 static struct ctl_table_header *atalk_table_header;
 
-void atalk_register_sysctl(void)
+int __init atalk_register_sysctl(void)
 {
 	atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table);
+	if (!atalk_table_header)
+		return -ENOMEM;
+	return 0;
 }
 
 void atalk_unregister_sysctl(void)
-- 
cgit v1.2.3


From 88b573af91dafd60ba35b2dc6af3f9d8d7168146 Mon Sep 17 00:00:00 2001
From: Tristram Ha <Tristram.Ha@microchip.com>
Date: Thu, 28 Feb 2019 19:57:23 -0800
Subject: net: dsa: add KSZ9893 switch tagging support

KSZ9893 switch is similar to KSZ9477 switch except the ingress tail tag
has 1 byte instead of 2 bytes.  The size of the portmap is smaller and
so the override and lookup bits are also moved.

Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  1 +
 net/dsa/dsa.c      |  2 ++
 net/dsa/dsa_priv.h |  1 +
 net/dsa/tag_ksz.c  | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index e8ac5b35ac4a..ae480bba11f5 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -38,6 +38,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_EDSA,
 	DSA_TAG_PROTO_GSWIP,
 	DSA_TAG_PROTO_KSZ9477,
+	DSA_TAG_PROTO_KSZ9893,
 	DSA_TAG_PROTO_LAN9303,
 	DSA_TAG_PROTO_MTK,
 	DSA_TAG_PROTO_QCA,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index aee909bcddc4..36de4f2a3366 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -57,6 +57,7 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = {
 #endif
 #ifdef CONFIG_NET_DSA_TAG_KSZ9477
 	[DSA_TAG_PROTO_KSZ9477] = &ksz9477_netdev_ops,
+	[DSA_TAG_PROTO_KSZ9893] = &ksz9893_netdev_ops,
 #endif
 #ifdef CONFIG_NET_DSA_TAG_LAN9303
 	[DSA_TAG_PROTO_LAN9303] = &lan9303_netdev_ops,
@@ -93,6 +94,7 @@ const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops)
 #endif
 #ifdef CONFIG_NET_DSA_TAG_KSZ9477
 		[DSA_TAG_PROTO_KSZ9477] = "ksz9477",
+		[DSA_TAG_PROTO_KSZ9893] = "ksz9893",
 #endif
 #ifdef CONFIG_NET_DSA_TAG_LAN9303
 		[DSA_TAG_PROTO_LAN9303] = "lan9303",
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index c6caa58c5c71..093b7d145eb1 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -216,6 +216,7 @@ extern const struct dsa_device_ops gswip_netdev_ops;
 
 /* tag_ksz.c */
 extern const struct dsa_device_ops ksz9477_netdev_ops;
+extern const struct dsa_device_ops ksz9893_netdev_ops;
 
 /* tag_lan9303.c */
 extern const struct dsa_device_ops lan9303_netdev_ops;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 927e9c86f745..de246c93d3bb 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -16,6 +16,7 @@
 
 /* Typically only one byte is used for tail tag. */
 #define KSZ_EGRESS_TAG_LEN		1
+#define KSZ_INGRESS_TAG_LEN		1
 
 static struct sk_buff *ksz_common_xmit(struct sk_buff *skb,
 				       struct net_device *dev, int len)
@@ -141,3 +142,36 @@ const struct dsa_device_ops ksz9477_netdev_ops = {
 	.rcv	= ksz9477_rcv,
 	.overhead = KSZ9477_INGRESS_TAG_LEN,
 };
+
+#define KSZ9893_TAIL_TAG_OVERRIDE	BIT(5)
+#define KSZ9893_TAIL_TAG_LOOKUP		BIT(6)
+
+static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct sk_buff *nskb;
+	u8 *addr;
+	u8 *tag;
+
+	nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
+	if (!nskb)
+		return NULL;
+
+	/* Tag encoding */
+	tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
+	addr = skb_mac_header(nskb);
+
+	*tag = BIT(dp->index);
+
+	if (is_link_local_ether_addr(addr))
+		*tag |= KSZ9893_TAIL_TAG_OVERRIDE;
+
+	return nskb;
+}
+
+const struct dsa_device_ops ksz9893_netdev_ops = {
+	.xmit	= ksz9893_xmit,
+	.rcv	= ksz9477_rcv,
+	.overhead = KSZ_INGRESS_TAG_LEN,
+};
-- 
cgit v1.2.3


From 35d838ff98bc57c882eb610393c6b68455d3d9fe Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 28 Feb 2019 21:40:12 +0800
Subject: regulator: Fix comment for csel_reg and csel_mask

The csel_reg and csel_mask fields in struct regulator_desc needs to
be generic for drivers. Not just for TPS65218.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 05efe2b057c1..b9557c9623b5 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -284,8 +284,8 @@ enum regulator_type {
  * @vsel_range_mask: Mask for register bitfield used for range selector
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
  * @vsel_mask: Mask for register bitfield used for selector
- * @csel_reg: Register for TPS65218 LS3 current regulator
- * @csel_mask: Mask for TPS65218 LS3 current regulator
+ * @csel_reg: Register for current limit selector using regmap set_current_limit
+ * @csel_mask: Mask for register bitfield used for current limit selector
  * @apply_reg: Register for initiate voltage change on the output when
  *                using regulator_set_voltage_sel_regmap
  * @apply_bit: Register bitfield used for initiate voltage change on the
-- 
cgit v1.2.3


From a32e0c773b5f233b0589dbb621bb2b9681dbfec3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 28 Feb 2019 21:40:13 +0800
Subject: regulator: core: Add set/get_current_limit helpers for regmap users

By setting curr_table, n_current_limits, csel_reg and csel_mask, the
regmap users can use regulator_set_current_limit_regmap and
regulator_get_current_limit_regmap for set/get_current_limit callbacks.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c      | 86 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  7 ++++
 2 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 68ac6017ef28..32d3f0499e2d 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -780,3 +780,89 @@ int regulator_set_active_discharge_regmap(struct regulator_dev *rdev,
 				  rdev->desc->active_discharge_mask, val);
 }
 EXPORT_SYMBOL_GPL(regulator_set_active_discharge_regmap);
+
+/**
+ * regulator_set_current_limit_regmap - set_current_limit for regmap users
+ *
+ * @rdev: regulator to operate on
+ * @min_uA: Lower bound for current limit
+ * @max_uA: Upper bound for current limit
+ *
+ * Regulators that use regmap for their register I/O can set curr_table,
+ * csel_reg and csel_mask fields in their descriptor and then use this
+ * as their set_current_limit operation, saving some code.
+ */
+int regulator_set_current_limit_regmap(struct regulator_dev *rdev,
+				       int min_uA, int max_uA)
+{
+	unsigned int n_currents = rdev->desc->n_current_limits;
+	int i, sel = -1;
+
+	if (n_currents == 0)
+		return -EINVAL;
+
+	if (rdev->desc->curr_table) {
+		const unsigned int *curr_table = rdev->desc->curr_table;
+		bool ascend = curr_table[n_currents - 1] > curr_table[0];
+
+		/* search for closest to maximum */
+		if (ascend) {
+			for (i = n_currents - 1; i >= 0; i--) {
+				if (min_uA <= curr_table[i] &&
+				    curr_table[i] <= max_uA) {
+					sel = i;
+					break;
+				}
+			}
+		} else {
+			for (i = 0; i < n_currents; i++) {
+				if (min_uA <= curr_table[i] &&
+				    curr_table[i] <= max_uA) {
+					sel = i;
+					break;
+				}
+			}
+		}
+	}
+
+	if (sel < 0)
+		return -EINVAL;
+
+	sel <<= ffs(rdev->desc->csel_mask) - 1;
+
+	return regmap_update_bits(rdev->regmap, rdev->desc->csel_reg,
+				  rdev->desc->csel_mask, sel);
+}
+EXPORT_SYMBOL_GPL(regulator_set_current_limit_regmap);
+
+/**
+ * regulator_get_current_limit_regmap - get_current_limit for regmap users
+ *
+ * @rdev: regulator to operate on
+ *
+ * Regulators that use regmap for their register I/O can set the
+ * csel_reg and csel_mask fields in their descriptor and then use this
+ * as their get_current_limit operation, saving some code.
+ */
+int regulator_get_current_limit_regmap(struct regulator_dev *rdev)
+{
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(rdev->regmap, rdev->desc->csel_reg, &val);
+	if (ret != 0)
+		return ret;
+
+	val &= rdev->desc->csel_mask;
+	val >>= ffs(rdev->desc->csel_mask) - 1;
+
+	if (rdev->desc->curr_table) {
+		if (val >= rdev->desc->n_current_limits)
+			return -EINVAL;
+
+		return rdev->desc->curr_table[val];
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(regulator_get_current_limit_regmap);
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index b9557c9623b5..377da2357118 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -264,6 +264,7 @@ enum regulator_type {
  * @continuous_voltage_range: Indicates if the regulator can set any
  *                            voltage within constrains range.
  * @n_voltages: Number of selectors available for ops.list_voltage().
+ * @n_current_limits: Number of selectors available for current limits
  *
  * @min_uV: Voltage given by the lowest selector (if linear mapping)
  * @uV_step: Voltage increase with each selector (if linear mapping)
@@ -278,6 +279,7 @@ enum regulator_type {
  * @n_linear_ranges: Number of entries in the @linear_ranges (and in
  *		     linear_range_selectors if used) table(s).
  * @volt_table: Voltage mapping table (if table based mapping)
+ * @curr_table: Current limit mapping table (if table based mapping)
  *
  * @vsel_range_reg: Register for range selector when using pickable ranges
  *		    and regulator_regmap_X_voltage_X_pickable functions.
@@ -333,6 +335,7 @@ struct regulator_desc {
 	int id;
 	unsigned int continuous_voltage_range:1;
 	unsigned n_voltages;
+	unsigned int n_current_limits;
 	const struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
@@ -351,6 +354,7 @@ struct regulator_desc {
 	int n_linear_ranges;
 
 	const unsigned int *volt_table;
+	const unsigned int *curr_table;
 
 	unsigned int vsel_range_reg;
 	unsigned int vsel_range_mask;
@@ -534,6 +538,9 @@ int regulator_set_pull_down_regmap(struct regulator_dev *rdev);
 
 int regulator_set_active_discharge_regmap(struct regulator_dev *rdev,
 					  bool enable);
+int regulator_set_current_limit_regmap(struct regulator_dev *rdev,
+				       int min_uA, int max_uA);
+int regulator_get_current_limit_regmap(struct regulator_dev *rdev);
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 
 void regulator_lock(struct regulator_dev *rdev);
-- 
cgit v1.2.3


From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001
From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Date: Fri, 1 Mar 2019 16:04:05 +0100
Subject: sch_cake: Permit use of connmarks as tin classifiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add flag 'FWMARK' to enable use of firewall connmarks as tin selector.
The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie.
for diffserv3 the mark needs to be 1->3.

Background

Typically CAKE uses DSCP as the basis for tin selection.  DSCP values
are relatively easily changed as part of the egress path, usually with
iptables & the mangle table, ingress is more challenging.  CAKE is often
used on the WAN interface of a residential gateway where passthrough of
DSCP from the ISP is either missing or set to unhelpful values thus use
of ingress DSCP values for tin selection isn't helpful in that
environment.

An approach to solving the ingress tin selection problem is to use
CAKE's understanding of tc filters.  Naive tc filters could match on
source/destination port numbers and force tin selection that way, but
multiple filters don't scale particularly well as each filter must be
traversed whether it matches or not. e.g. a simple example to map 3
firewall marks to tins:

MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1
tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2
tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3

Another option is to use eBPF cls_act with tc filters e.g.

MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o

This has the disadvantages of a) needing someone to write & maintain
the bpf program, b) a bpf toolchain to compile it and c) needing to
hardcode the major number in the bpf program so it matches the cake
instance (or forcing the cake instance to a particular major number)
since the major number cannot be passed to the bpf program via tc
command line.

As already hinted at by the previous examples, it would be helpful
to associate tins with something that survives the Internet path and
ideally allows tin selection on both egress and ingress.  Netfilter's
conntrack permits setting an identifying mark on a connection which
can also be restored to an ingress packet with tc action connmark e.g.

tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \
	match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1

Since tc's connmark action has restored any connmark into skb->mark,
any of the previous solutions are based upon it and in one form or
another copy that mark to the skb->priority field where again CAKE
picks this up.

This change cuts out at least one of the (less intuitive &
non-scalable) middlemen and permit direct access to skb->mark.

Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  1 +
 net/sched/sch_cake.c           | 34 +++++++++++++++++++++++++++-------
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 1eb572ef3f27..7ee74c3474bf 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1021,6 +1021,7 @@ enum {
 	TCA_CAKE_INGRESS,
 	TCA_CAKE_ACK_FILTER,
 	TCA_CAKE_SPLIT_GSO,
+	TCA_CAKE_FWMARK,
 	__TCA_CAKE_MAX
 };
 #define TCA_CAKE_MAX	(__TCA_CAKE_MAX - 1)
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 4d688b3b471b..f1cc4779699b 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -258,7 +258,8 @@ enum {
 	CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
 	CAKE_FLAG_INGRESS	   = BIT(2),
 	CAKE_FLAG_WASH		   = BIT(3),
-	CAKE_FLAG_SPLIT_GSO	   = BIT(4)
+	CAKE_FLAG_SPLIT_GSO	   = BIT(4),
+	CAKE_FLAG_FWMARK	   = BIT(5)
 };
 
 /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
@@ -1566,12 +1567,20 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
 		if (q->rate_flags & CAKE_FLAG_WASH)
 			cake_wash_diffserv(skb);
 	} else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
-		/* extract the Diffserv Precedence field, if it exists */
-		/* and clear DSCP bits if washing */
-		tin = q->tin_index[cake_handle_diffserv(skb,
-				q->rate_flags & CAKE_FLAG_WASH)];
-		if (unlikely(tin >= q->tin_cnt))
-			tin = 0;
+		if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */
+		    skb->mark &&
+		    skb->mark <= q->tin_cnt) {
+			tin = q->tin_order[skb->mark - 1];
+			if (q->rate_flags & CAKE_FLAG_WASH)
+				cake_wash_diffserv(skb);
+		} else {
+			/* extract the Diffserv Precedence field, if it exists */
+			/* and clear DSCP bits if washing */
+			tin = q->tin_index[cake_handle_diffserv(skb,
+					q->rate_flags & CAKE_FLAG_WASH)];
+			if (unlikely(tin >= q->tin_cnt))
+				tin = 0;
+		}
 	} else {
 		tin = 0;
 		if (q->rate_flags & CAKE_FLAG_WASH)
@@ -2624,6 +2633,13 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
 			q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
 	}
 
+	if (tb[TCA_CAKE_FWMARK]) {
+		if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
+			q->rate_flags |= CAKE_FLAG_FWMARK;
+		else
+			q->rate_flags &= ~CAKE_FLAG_FWMARK;
+	}
+
 	if (q->tins) {
 		sch_tree_lock(sch);
 		cake_reconfigure(sch);
@@ -2783,6 +2799,10 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
 			!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_CAKE_FWMARK,
+			!!(q->rate_flags & CAKE_FLAG_FWMARK)))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-- 
cgit v1.2.3


From 9036b2fe092a107856edd1a3bad48b83f2b45000 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Fri, 1 Mar 2019 15:31:03 -0800
Subject: net: ipv6: add socket option IPV6_ROUTER_ALERT_ISOLATE

By default IPv6 socket with IPV6_ROUTER_ALERT socket option set will
receive all IPv6 RA packets from all namespaces.
IPV6_ROUTER_ALERT_ISOLATE socket option restricts packets received by
the socket to be only from the socket's namespace.

Signed-off-by: Maxim Martynov <maxim@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     |  3 ++-
 include/uapi/linux/in6.h |  1 +
 net/ipv6/ip6_output.c    |  6 ++++++
 net/ipv6/ipv6_sockglue.c | 10 ++++++++++
 4 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 6d45ce784bea..ea7c7906591e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -281,7 +281,8 @@ struct ipv6_pinfo {
 				dontfrag:1,
 				autoflowlabel:1,
 				autoflowlabel_set:1,
-				mc_all:1;
+				mc_all:1,
+				rtalert_isolate:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 71d82fe15b03..9f2273a08356 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -178,6 +178,7 @@ struct in6_flowlabel_req {
 #define IPV6_JOIN_ANYCAST	27
 #define IPV6_LEAVE_ANYCAST	28
 #define IPV6_MULTICAST_ALL	29
+#define IPV6_ROUTER_ALERT_ISOLATE	30
 
 /* IPV6_MTU_DISCOVER values */
 #define IPV6_PMTUDISC_DONT		0
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f9fa0302b5a..edbd12067170 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -300,6 +300,12 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 		if (sk && ra->sel == sel &&
 		    (!sk->sk_bound_dev_if ||
 		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
+			struct ipv6_pinfo *np = inet6_sk(sk);
+
+			if (np && np->rtalert_isolate &&
+			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
+				continue;
+			}
 			if (last) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 973e215c3114..40f21fef25ff 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -787,6 +787,12 @@ done:
 			goto e_inval;
 		retv = ip6_ra_control(sk, val);
 		break;
+	case IPV6_ROUTER_ALERT_ISOLATE:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rtalert_isolate = valbool;
+		retv = 0;
+		break;
 	case IPV6_MTU_DISCOVER:
 		if (optlen < sizeof(int))
 			goto e_inval;
@@ -1358,6 +1364,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.recvfragsize;
 		break;
 
+	case IPV6_ROUTER_ALERT_ISOLATE:
+		val = np->rtalert_isolate;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From a6d0aa97f453cc1a13ba93428590ef4fd29d005a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 2 Mar 2019 17:10:36 +0100
Subject: net: phy: remove gen10g_suspend and gen10g_resume

phy_suspend() and phy_resume() are no-ops anyway if no callback is
defined. Therefore we don't need these stubs.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 14 --------------
 include/linux/phy.h       |  2 --
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 49e7cd08b05f..3ddbb9c32dda 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -529,18 +529,6 @@ int gen10g_config_init(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(gen10g_config_init);
 
-int gen10g_suspend(struct phy_device *phydev)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(gen10g_suspend);
-
-int gen10g_resume(struct phy_device *phydev)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(gen10g_resume);
-
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
@@ -550,6 +538,4 @@ struct phy_driver genphy_10g_driver = {
 	.features       = PHY_10GBIT_FEATURES,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status    = gen10g_read_status,
-	.suspend        = gen10g_suspend,
-	.resume         = gen10g_resume,
 };
diff --git a/include/linux/phy.h b/include/linux/phy.h
index bfe60e2a5174..c69de3b87e87 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1122,8 +1122,6 @@ int gen10g_config_aneg(struct phy_device *phydev);
 int gen10g_read_status(struct phy_device *phydev);
 int gen10g_no_soft_reset(struct phy_device *phydev);
 int gen10g_config_init(struct phy_device *phydev);
-int gen10g_suspend(struct phy_device *phydev);
-int gen10g_resume(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From c5e91d39427d1759d6205599e145553b5b2bc19e Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 2 Mar 2019 17:11:40 +0100
Subject: net: phy: remove gen10g_config_init

ETHTOOL_LINK_MODE_10000baseT_Full_BIT is set anyway in the supported
and advertising bitmap because it's part of PHY_10GBIT_FEATURES.
And all users of gen10g_config_init use PHY_10GBIT_FEATURES.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/cortina.c    |  1 -
 drivers/net/phy/phy-c45.c    | 14 --------------
 drivers/net/phy/teranetics.c |  1 -
 include/linux/phy.h          |  1 -
 4 files changed, 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index c291dc014769..a64eb211cc56 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -80,7 +80,6 @@ static struct phy_driver cortina_driver[] = {
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Cortina CS4340",
 	.features       = PHY_10GBIT_FEATURES,
-	.config_init	= gen10g_config_init,
 	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
 	.soft_reset	= gen10g_no_soft_reset,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 3ddbb9c32dda..cdbcea8609df 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -516,25 +516,11 @@ int gen10g_no_soft_reset(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
 
-int gen10g_config_init(struct phy_device *phydev)
-{
-	/* Temporarily just say we support everything */
-	linkmode_zero(phydev->supported);
-
-	linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
-			 phydev->supported);
-	linkmode_copy(phydev->advertising, phydev->supported);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(gen10g_config_init);
-
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic 10G PHY",
 	.soft_reset	= gen10g_no_soft_reset,
-	.config_init    = gen10g_config_init,
 	.features       = PHY_10GBIT_FEATURES,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status    = gen10g_read_status,
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index 145c328b00fa..95280212d5d5 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -80,7 +80,6 @@ static struct phy_driver teranetics_driver[] = {
 	.features       = PHY_10GBIT_FEATURES,
 	.soft_reset	= gen10g_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
-	.config_init    = gen10g_config_init,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
 	.match_phy_device = teranetics_match_phy_device,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index c69de3b87e87..817c8453aeb5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1121,7 +1121,6 @@ int genphy_c45_read_status(struct phy_device *phydev);
 int gen10g_config_aneg(struct phy_device *phydev);
 int gen10g_read_status(struct phy_device *phydev);
 int gen10g_no_soft_reset(struct phy_device *phydev);
-int gen10g_config_init(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From d81210c25e17b5cca71138f3990ed8071d510ba9 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 2 Mar 2019 17:15:56 +0100
Subject: net: phy: don't export gen10g_read_status

gen10g_read_status is deprecated, therefore stop exporting it.
We don't want to encourage anybody to use it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 3 +--
 include/linux/phy.h       | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index cdbcea8609df..6cd4bd5e9e43 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -499,7 +499,7 @@ int gen10g_config_aneg(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(gen10g_config_aneg);
 
-int gen10g_read_status(struct phy_device *phydev)
+static int gen10g_read_status(struct phy_device *phydev)
 {
 	/* For now just lie and say it's 10G all the time */
 	phydev->speed = SPEED_10000;
@@ -507,7 +507,6 @@ int gen10g_read_status(struct phy_device *phydev)
 
 	return genphy_c45_read_link(phydev);
 }
-EXPORT_SYMBOL_GPL(gen10g_read_status);
 
 int gen10g_no_soft_reset(struct phy_device *phydev)
 {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 817c8453aeb5..60794240141f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1119,7 +1119,6 @@ int genphy_c45_read_status(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
-int gen10g_read_status(struct phy_device *phydev);
 int gen10g_no_soft_reset(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
-- 
cgit v1.2.3


From 7be3ad848f77eba893bd08b97e7383e8d5e873ac Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 2 Mar 2019 17:13:11 +0100
Subject: net: phy: remove gen10g_no_soft_reset

genphy_no_soft_reset and gen10g_no_soft_reset are both the same no-ops,
one is enough.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/cortina.c    | 2 +-
 drivers/net/phy/marvell10g.c | 4 ++--
 drivers/net/phy/phy-c45.c    | 9 +--------
 drivers/net/phy/teranetics.c | 2 +-
 include/linux/phy.h          | 1 -
 5 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index a64eb211cc56..856cdc36aacd 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -82,7 +82,7 @@ static struct phy_driver cortina_driver[] = {
 	.features       = PHY_10GBIT_FEATURES,
 	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
-	.soft_reset	= gen10g_no_soft_reset,
+	.soft_reset	= genphy_no_soft_reset,
 	.probe		= cortina_probe,
 },
 };
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 79106e70010f..100b401b1f4a 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -459,7 +459,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x3310",
 		.get_features	= mv3310_get_features,
-		.soft_reset	= gen10g_no_soft_reset,
+		.soft_reset	= genphy_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.probe		= mv3310_probe,
 		.suspend	= mv3310_suspend,
@@ -474,7 +474,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.name		= "mv88x2110",
 		.get_features	= genphy_c45_pma_read_abilities,
 		.probe		= mv3310_probe,
-		.soft_reset	= gen10g_no_soft_reset,
+		.soft_reset	= genphy_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.config_aneg	= mv3310_config_aneg,
 		.aneg_done	= mv3310_aneg_done,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 6cd4bd5e9e43..c596eb54e4ac 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -508,18 +508,11 @@ static int gen10g_read_status(struct phy_device *phydev)
 	return genphy_c45_read_link(phydev);
 }
 
-int gen10g_no_soft_reset(struct phy_device *phydev)
-{
-	/* Do nothing for now */
-	return 0;
-}
-EXPORT_SYMBOL_GPL(gen10g_no_soft_reset);
-
 struct phy_driver genphy_10g_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic 10G PHY",
-	.soft_reset	= gen10g_no_soft_reset,
+	.soft_reset	= genphy_no_soft_reset,
 	.features       = PHY_10GBIT_FEATURES,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status    = gen10g_read_status,
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index 95280212d5d5..beb054b931ee 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -78,7 +78,7 @@ static struct phy_driver teranetics_driver[] = {
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Teranetics TN2020",
 	.features       = PHY_10GBIT_FEATURES,
-	.soft_reset	= gen10g_no_soft_reset,
+	.soft_reset	= genphy_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 60794240141f..34084892a466 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1119,7 +1119,6 @@ int genphy_c45_read_status(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
-int gen10g_no_soft_reset(struct phy_device *phydev);
 
 static inline int phy_read_status(struct phy_device *phydev)
 {
-- 
cgit v1.2.3


From 94850257cf0f88b20db7644f28bfedc7d284de15 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Wed, 27 Feb 2019 17:38:03 +0200
Subject: tls: Fix tls_device handling of partial records

Cleanup the handling of partial records while fixing a bug where the
tls_push_pending_closed_record function is using the software tls
context instead of the hardware context.

The bug resulted in the following crash:
[   88.791229] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[   88.793271] #PF error: [normal kernel read fault]
[   88.794449] PGD 800000022a426067 P4D 800000022a426067 PUD 22a156067 PMD 0
[   88.795958] Oops: 0000 [#1] SMP PTI
[   88.796884] CPU: 2 PID: 4973 Comm: openssl Not tainted 5.0.0-rc4+ #3
[   88.798314] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
[   88.800067] RIP: 0010:tls_tx_records+0xef/0x1d0 [tls]
[   88.801256] Code: 00 02 48 89 43 08 e8 a0 0b 96 d9 48 89 df e8 48 dd
4d d9 4c 89 f8 4d 8b bf 98 00 00 00 48 05 98 00 00 00 48 89 04 24 49 39
c7 <49> 8b 1f 4d 89 fd 0f 84 af 00 00 00 41 8b 47 10 85 c0 0f 85 8d 00
[   88.805179] RSP: 0018:ffffbd888186fca8 EFLAGS: 00010213
[   88.806458] RAX: ffff9af1ed657c98 RBX: ffff9af1e88a1980 RCX: 0000000000000000
[   88.808050] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff9af1e88a1980
[   88.809724] RBP: ffff9af1e88a1980 R08: 0000000000000017 R09: ffff9af1ebeeb700
[   88.811294] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[   88.812917] R13: ffff9af1e88a1980 R14: ffff9af1ec13f800 R15: 0000000000000000
[   88.814506] FS:  00007fcad2240740(0000) GS:ffff9af1f7880000(0000) knlGS:0000000000000000
[   88.816337] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   88.817717] CR2: 0000000000000000 CR3: 0000000228b3e000 CR4: 00000000001406e0
[   88.819328] Call Trace:
[   88.820123]  tls_push_data+0x628/0x6a0 [tls]
[   88.821283]  ? remove_wait_queue+0x20/0x60
[   88.822383]  ? n_tty_read+0x683/0x910
[   88.823363]  tls_device_sendmsg+0x53/0xa0 [tls]
[   88.824505]  sock_sendmsg+0x36/0x50
[   88.825492]  sock_write_iter+0x87/0x100
[   88.826521]  __vfs_write+0x127/0x1b0
[   88.827499]  vfs_write+0xad/0x1b0
[   88.828454]  ksys_write+0x52/0xc0
[   88.829378]  do_syscall_64+0x5b/0x180
[   88.830369]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[   88.831603] RIP: 0033:0x7fcad1451680

[ 1248.470626] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[ 1248.472564] #PF error: [normal kernel read fault]
[ 1248.473790] PGD 0 P4D 0
[ 1248.474642] Oops: 0000 [#1] SMP PTI
[ 1248.475651] CPU: 3 PID: 7197 Comm: openssl Tainted: G           OE 5.0.0-rc4+ #3
[ 1248.477426] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
[ 1248.479310] RIP: 0010:tls_tx_records+0x110/0x1f0 [tls]
[ 1248.480644] Code: 00 02 48 89 43 08 e8 4f cb 63 d7 48 89 df e8 f7 9c
1b d7 4c 89 f8 4d 8b bf 98 00 00 00 48 05 98 00 00 00 48 89 04 24 49 39
c7 <49> 8b 1f 4d 89 fd 0f 84 af 00 00 00 41 8b 47 10 85 c0 0f 85 8d 00
[ 1248.484825] RSP: 0018:ffffaa0a41543c08 EFLAGS: 00010213
[ 1248.486154] RAX: ffff955a2755dc98 RBX: ffff955a36031980 RCX: 0000000000000006
[ 1248.487855] RDX: 0000000000000000 RSI: 000000000000002b RDI: 0000000000000286
[ 1248.489524] RBP: ffff955a36031980 R08: 0000000000000000 R09: 00000000000002b1
[ 1248.491394] R10: 0000000000000003 R11: 00000000ad55ad55 R12: 0000000000000000
[ 1248.493162] R13: 0000000000000000 R14: ffff955a2abe6c00 R15: 0000000000000000
[ 1248.494923] FS:  0000000000000000(0000) GS:ffff955a378c0000(0000) knlGS:0000000000000000
[ 1248.496847] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1248.498357] CR2: 0000000000000000 CR3: 000000020c40e000 CR4: 00000000001406e0
[ 1248.500136] Call Trace:
[ 1248.500998]  ? tcp_check_oom+0xd0/0xd0
[ 1248.502106]  tls_sk_proto_close+0x127/0x1e0 [tls]
[ 1248.503411]  inet_release+0x3c/0x60
[ 1248.504530]  __sock_release+0x3d/0xb0
[ 1248.505611]  sock_close+0x11/0x20
[ 1248.506612]  __fput+0xb4/0x220
[ 1248.507559]  task_work_run+0x88/0xa0
[ 1248.508617]  do_exit+0x2cb/0xbc0
[ 1248.509597]  ? core_sys_select+0x17a/0x280
[ 1248.510740]  do_group_exit+0x39/0xb0
[ 1248.511789]  get_signal+0x1d0/0x630
[ 1248.512823]  do_signal+0x36/0x620
[ 1248.513822]  exit_to_usermode_loop+0x5c/0xc6
[ 1248.515003]  do_syscall_64+0x157/0x180
[ 1248.516094]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1248.517456] RIP: 0033:0x7fb398bd3f53
[ 1248.518537] Code: Bad RIP value.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    | 20 ++++----------------
 net/tls/tls_device.c |  9 +++++----
 net/tls/tls_main.c   | 13 -------------
 3 files changed, 9 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 9f4117ae2297..a528a082da73 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -199,10 +199,6 @@ struct tls_offload_context_tx {
 	(ALIGN(sizeof(struct tls_offload_context_tx), sizeof(void *)) +        \
 	 TLS_DRIVER_STATE_SIZE)
 
-enum {
-	TLS_PENDING_CLOSED_RECORD
-};
-
 struct cipher_context {
 	char *iv;
 	char *rec_seq;
@@ -335,17 +331,14 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
 int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 			    int flags);
 
-int tls_push_pending_closed_record(struct sock *sk, struct tls_context *ctx,
-				   int flags, long *timeo);
-
 static inline struct tls_msg *tls_msg(struct sk_buff *skb)
 {
 	return (struct tls_msg *)strp_msg(skb);
 }
 
-static inline bool tls_is_pending_closed_record(struct tls_context *ctx)
+static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
 {
-	return test_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
+	return !!ctx->partially_sent_record;
 }
 
 static inline int tls_complete_pending_work(struct sock *sk,
@@ -357,17 +350,12 @@ static inline int tls_complete_pending_work(struct sock *sk,
 	if (unlikely(sk->sk_write_pending))
 		rc = wait_on_pending_writer(sk, timeo);
 
-	if (!rc && tls_is_pending_closed_record(ctx))
-		rc = tls_push_pending_closed_record(sk, ctx, flags, timeo);
+	if (!rc && tls_is_partially_sent_record(ctx))
+		rc = tls_push_partial_record(sk, ctx, flags);
 
 	return rc;
 }
 
-static inline bool tls_is_partially_sent_record(struct tls_context *ctx)
-{
-	return !!ctx->partially_sent_record;
-}
-
 static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx)
 {
 	return tls_ctx->pending_open_record_frags;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a5c17c47d08a..3e5e8e021a87 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -271,7 +271,6 @@ static int tls_push_record(struct sock *sk,
 	list_add_tail(&record->list, &offload_ctx->records_list);
 	spin_unlock_irq(&offload_ctx->lock);
 	offload_ctx->open_record = NULL;
-	set_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags);
 	tls_advance_record_sn(sk, &ctx->tx, ctx->crypto_send.info.version);
 
 	for (i = 0; i < record->num_frags; i++) {
@@ -368,9 +367,11 @@ static int tls_push_data(struct sock *sk,
 		return -sk->sk_err;
 
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-	rc = tls_complete_pending_work(sk, tls_ctx, flags, &timeo);
-	if (rc < 0)
-		return rc;
+	if (tls_is_partially_sent_record(tls_ctx)) {
+		rc = tls_push_partial_record(sk, tls_ctx, flags);
+		if (rc < 0)
+			return rc;
+	}
 
 	pfrag = sk_page_frag(sk);
 
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index caff15b2f9b2..7e05af75536d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -209,19 +209,6 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 	return tls_push_sg(sk, ctx, sg, offset, flags);
 }
 
-int tls_push_pending_closed_record(struct sock *sk,
-				   struct tls_context *tls_ctx,
-				   int flags, long *timeo)
-{
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-
-	if (tls_is_partially_sent_record(tls_ctx) ||
-	    !list_empty(&ctx->tx_list))
-		return tls_tx_records(sk, flags);
-	else
-		return tls_ctx->push_pending_record(sk, flags);
-}
-
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
-- 
cgit v1.2.3


From 7463d3a2db0efea3701aab5eeb310e0d8157aff7 Mon Sep 17 00:00:00 2001
From: Boris Pismenny <borisp@mellanox.com>
Date: Wed, 27 Feb 2019 17:38:04 +0200
Subject: tls: Fix write space handling

TLS device cannot use the sw context. This patch returns the original
tls device write space handler and moves the sw/device specific portions
to the relevant files.

Also, we remove the write_space call for the tls_sw flow, because it
handles partial records in its delayed tx work handler.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    |  3 +++
 net/tls/tls_device.c | 17 +++++++++++++++++
 net/tls/tls_main.c   | 15 ++++++---------
 net/tls/tls_sw.c     | 13 +++++++++++++
 4 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index a528a082da73..a5a938583295 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -519,6 +519,9 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 	return !!tls_sw_ctx_tx(ctx);
 }
 
+void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
+void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
+
 static inline struct tls_offload_context_rx *
 tls_offload_ctx_rx(const struct tls_context *tls_ctx)
 {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 3e5e8e021a87..4a1da837a733 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -546,6 +546,23 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
+void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	int rc = 0;
+
+	if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
+		gfp_t sk_allocation = sk->sk_allocation;
+
+		sk->sk_allocation = GFP_ATOMIC;
+		rc = tls_push_partial_record(sk, ctx,
+					     MSG_DONTWAIT | MSG_NOSIGNAL);
+		sk->sk_allocation = sk_allocation;
+	}
+
+	if (!rc)
+		ctx->sk_write_space(sk);
+}
+
 void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7e05af75536d..17e8667917aa 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -212,7 +212,6 @@ int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
 static void tls_write_space(struct sock *sk)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
 
 	/* If in_tcp_sendpages call lower protocol write space handler
 	 * to ensure we wake up any waiting operations there. For example
@@ -223,14 +222,12 @@ static void tls_write_space(struct sock *sk)
 		return;
 	}
 
-	/* Schedule the transmission if tx list is ready */
-	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
-		/* Schedule the transmission */
-		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
-			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
-	}
-
-	ctx->sk_write_space(sk);
+#ifdef CONFIG_TLS_DEVICE
+	if (ctx->tx_conf == TLS_HW)
+		tls_device_write_space(sk, ctx);
+	else
+#endif
+		tls_sw_write_space(sk, ctx);
 }
 
 static void tls_ctx_free(struct tls_context *ctx)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1cc830582fa8..917caacd4d31 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2126,6 +2126,19 @@ static void tx_work_handler(struct work_struct *work)
 	release_sock(sk);
 }
 
+void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
+{
+	struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
+
+	/* Schedule the transmission if tx list is ready */
+	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
+		/* Schedule the transmission */
+		if (!test_and_set_bit(BIT_TX_SCHEDULED,
+				      &tx_ctx->tx_bitmask))
+			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
+	}
+}
+
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-- 
cgit v1.2.3


From e36202a844d4eff2ab07bcef998d7b4beda9761f Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 22 Feb 2019 18:59:40 +0900
Subject: printk: Remove no longer used LOG_PREFIX.

When commit 5becfb1df5ac8e49 ("kmsg: merge continuation records while
printing") introduced LOG_PREFIX, we used KERN_DEFAULT etc. as a flag
for setting LOG_PREFIX in order to tell whether to call cont_add()
(i.e. whether to append the message to "struct cont").

But since commit 4bcc595ccd80decb ("printk: reinstate KERN_CONT for
printing continuation lines") inverted the behavior (i.e. don't append
the message to "struct cont" unless KERN_CONT is specified) and commit
5aa068ea4082b39e ("printk: remove games with previous record flags")
removed the last LOG_PREFIX check, setting LOG_PREFIX via KERN_DEFAULT
etc. is no longer meaningful.

Therefore, we can remove LOG_PREFIX and make KERN_DEFAULT empty string.

Link: http://lkml.kernel.org/r/1550829580-9189-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
To: Steven Rostedt <rostedt@goodmis.org>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
---
 include/linux/kern_levels.h | 2 +-
 include/linux/printk.h      | 1 -
 kernel/printk/printk.c      | 6 +-----
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kern_levels.h b/include/linux/kern_levels.h
index d237fe854ad9..bf2389c26ae3 100644
--- a/include/linux/kern_levels.h
+++ b/include/linux/kern_levels.h
@@ -14,7 +14,7 @@
 #define KERN_INFO	KERN_SOH "6"	/* informational */
 #define KERN_DEBUG	KERN_SOH "7"	/* debug-level messages */
 
-#define KERN_DEFAULT	KERN_SOH "d"	/* the default kernel loglevel */
+#define KERN_DEFAULT	""		/* the default kernel loglevel */
 
 /*
  * Annotation for a "continued" line of log printout (only done after a
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 55aa96975fa2..97aa12c928d4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -18,7 +18,6 @@ static inline int printk_get_level(const char *buffer)
 	if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
 		switch (buffer[1]) {
 		case '0' ... '7':
-		case 'd':	/* KERN_DEFAULT */
 		case 'c':	/* KERN_CONT */
 			return buffer[1];
 		}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b4d26388bc62..9b6783c158f9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -345,7 +345,6 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;
 
 enum log_flags {
 	LOG_NEWLINE	= 2,	/* text ended with a newline */
-	LOG_PREFIX	= 4,	/* text started with a prefix */
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
@@ -1922,9 +1921,6 @@ int vprintk_store(int facility, int level,
 			case '0' ... '7':
 				if (level == LOGLEVEL_DEFAULT)
 					level = kern_level - '0';
-				/* fallthrough */
-			case 'd':	/* KERN_DEFAULT */
-				lflags |= LOG_PREFIX;
 				break;
 			case 'c':	/* KERN_CONT */
 				lflags |= LOG_CONT;
@@ -1939,7 +1935,7 @@ int vprintk_store(int facility, int level,
 		level = default_message_loglevel;
 
 	if (dict)
-		lflags |= LOG_PREFIX|LOG_NEWLINE;
+		lflags |= LOG_NEWLINE;
 
 	return log_output(facility, level, lflags,
 			  dict, dictlen, text, text_len);
-- 
cgit v1.2.3


From 84c4e1f89fefe70554da0ab33be72c9be7994379 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Mar 2019 14:23:33 -0800
Subject: aio: simplify - and fix - fget/fput for io_submit()

Al Viro root-caused a race where the IOCB_CMD_POLL handling of
fget/fput() could cause us to access the file pointer after it had
already been freed:

 "In more details - normally IOCB_CMD_POLL handling looks so:

   1) io_submit(2) allocates aio_kiocb instance and passes it to
      aio_poll()

   2) aio_poll() resolves the descriptor to struct file by req->file =
      fget(iocb->aio_fildes)

   3) aio_poll() sets ->woken to false and raises ->ki_refcnt of that
      aio_kiocb to 2 (bumps by 1, that is).

   4) aio_poll() calls vfs_poll(). After sanity checks (basically,
      "poll_wait() had been called and only once") it locks the queue.
      That's what the extra reference to iocb had been for - we know we
      can safely access it.

   5) With queue locked, we check if ->woken has already been set to
      true (by aio_poll_wake()) and, if it had been, we unlock the
      queue, drop a reference to aio_kiocb and bugger off - at that
      point it's a responsibility to aio_poll_wake() and the stuff
      called/scheduled by it. That code will drop the reference to file
      in req->file, along with the other reference to our aio_kiocb.

   6) otherwise, we see whether we need to wait. If we do, we unlock the
      queue, drop one reference to aio_kiocb and go away - eventual
      wakeup (or cancel) will deal with the reference to file and with
      the other reference to aio_kiocb

   7) otherwise we remove ourselves from waitqueue (still under the
      queue lock), so that wakeup won't get us. No async activity will
      be happening, so we can safely drop req->file and iocb ourselves.

  If wakeup happens while we are in vfs_poll(), we are fine - aio_kiocb
  won't get freed under us, so we can do all the checks and locking
  safely. And we don't touch ->file if we detect that case.

  However, vfs_poll() most certainly *does* touch the file it had been
  given. So wakeup coming while we are still in ->poll() might end up
  doing fput() on that file. That case is not too rare, and usually we
  are saved by the still present reference from descriptor table - that
  fput() is not the final one.

  But if another thread closes that descriptor right after our fget()
  and wakeup does happen before ->poll() returns, we are in trouble -
  final fput() done while we are in the middle of a method:

Al also wrote a patch to take an extra reference to the file descriptor
to fix this, but I instead suggested we just streamline the whole file
pointer handling by submit_io() so that the generic aio submission code
simply keeps the file pointer around until the aio has completed.

Fixes: bfe4037e722e ("aio: implement IOCB_CMD_POLL")
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Reported-by: syzbot+503d4cc169fcec1cb18c@syzkaller.appspotmail.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c           | 72 ++++++++++++++++++++++--------------------------------
 include/linux/fs.h |  8 +++++-
 2 files changed, 36 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index aaaaf4d12c73..82c08422b0f4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -167,9 +167,13 @@ struct kioctx {
 	unsigned		id;
 };
 
+/*
+ * First field must be the file pointer in all the
+ * iocb unions! See also 'struct kiocb' in <linux/fs.h>
+ */
 struct fsync_iocb {
-	struct work_struct	work;
 	struct file		*file;
+	struct work_struct	work;
 	bool			datasync;
 };
 
@@ -183,8 +187,15 @@ struct poll_iocb {
 	struct work_struct	work;
 };
 
+/*
+ * NOTE! Each of the iocb union members has the file pointer
+ * as the first entry in their struct definition. So you can
+ * access the file pointer through any of the sub-structs,
+ * or directly as just 'ki_filp' in this struct.
+ */
 struct aio_kiocb {
 	union {
+		struct file		*ki_filp;
 		struct kiocb		rw;
 		struct fsync_iocb	fsync;
 		struct poll_iocb	poll;
@@ -1060,6 +1071,8 @@ static inline void iocb_put(struct aio_kiocb *iocb)
 {
 	if (refcount_read(&iocb->ki_refcnt) == 0 ||
 	    refcount_dec_and_test(&iocb->ki_refcnt)) {
+		if (iocb->ki_filp)
+			fput(iocb->ki_filp);
 		percpu_ref_put(&iocb->ki_ctx->reqs);
 		kmem_cache_free(kiocb_cachep, iocb);
 	}
@@ -1424,7 +1437,6 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 		file_end_write(kiocb->ki_filp);
 	}
 
-	fput(kiocb->ki_filp);
 	aio_complete(iocb, res, res2);
 }
 
@@ -1432,9 +1444,6 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 {
 	int ret;
 
-	req->ki_filp = fget(iocb->aio_fildes);
-	if (unlikely(!req->ki_filp))
-		return -EBADF;
 	req->ki_complete = aio_complete_rw;
 	req->private = NULL;
 	req->ki_pos = iocb->aio_offset;
@@ -1451,7 +1460,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 		ret = ioprio_check_cap(iocb->aio_reqprio);
 		if (ret) {
 			pr_debug("aio ioprio check cap error: %d\n", ret);
-			goto out_fput;
+			return ret;
 		}
 
 		req->ki_ioprio = iocb->aio_reqprio;
@@ -1460,14 +1469,10 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
 
 	ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
 	if (unlikely(ret))
-		goto out_fput;
+		return ret;
 
 	req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */
 	return 0;
-
-out_fput:
-	fput(req->ki_filp);
-	return ret;
 }
 
 static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
@@ -1521,24 +1526,19 @@ static ssize_t aio_read(struct kiocb *req, const struct iocb *iocb,
 	if (ret)
 		return ret;
 	file = req->ki_filp;
-
-	ret = -EBADF;
 	if (unlikely(!(file->f_mode & FMODE_READ)))
-		goto out_fput;
+		return -EBADF;
 	ret = -EINVAL;
 	if (unlikely(!file->f_op->read_iter))
-		goto out_fput;
+		return -EINVAL;
 
 	ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
 	if (ret)
-		goto out_fput;
+		return ret;
 	ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret)
 		aio_rw_done(req, call_read_iter(file, req, &iter));
 	kfree(iovec);
-out_fput:
-	if (unlikely(ret))
-		fput(file);
 	return ret;
 }
 
@@ -1555,16 +1555,14 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
 		return ret;
 	file = req->ki_filp;
 
-	ret = -EBADF;
 	if (unlikely(!(file->f_mode & FMODE_WRITE)))
-		goto out_fput;
-	ret = -EINVAL;
+		return -EBADF;
 	if (unlikely(!file->f_op->write_iter))
-		goto out_fput;
+		return -EINVAL;
 
 	ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
 	if (ret)
-		goto out_fput;
+		return ret;
 	ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
 	if (!ret) {
 		/*
@@ -1582,9 +1580,6 @@ static ssize_t aio_write(struct kiocb *req, const struct iocb *iocb,
 		aio_rw_done(req, call_write_iter(file, req, &iter));
 	}
 	kfree(iovec);
-out_fput:
-	if (unlikely(ret))
-		fput(file);
 	return ret;
 }
 
@@ -1594,7 +1589,6 @@ static void aio_fsync_work(struct work_struct *work)
 	int ret;
 
 	ret = vfs_fsync(req->file, req->datasync);
-	fput(req->file);
 	aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
 }
 
@@ -1605,13 +1599,8 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 			iocb->aio_rw_flags))
 		return -EINVAL;
 
-	req->file = fget(iocb->aio_fildes);
-	if (unlikely(!req->file))
-		return -EBADF;
-	if (unlikely(!req->file->f_op->fsync)) {
-		fput(req->file);
+	if (unlikely(!req->file->f_op->fsync))
 		return -EINVAL;
-	}
 
 	req->datasync = datasync;
 	INIT_WORK(&req->work, aio_fsync_work);
@@ -1621,10 +1610,7 @@ static int aio_fsync(struct fsync_iocb *req, const struct iocb *iocb,
 
 static inline void aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
 {
-	struct file *file = iocb->poll.file;
-
 	aio_complete(iocb, mangle_poll(mask), 0);
-	fput(file);
 }
 
 static void aio_poll_complete_work(struct work_struct *work)
@@ -1743,9 +1729,6 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 
 	INIT_WORK(&req->work, aio_poll_complete_work);
 	req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
-	req->file = fget(iocb->aio_fildes);
-	if (unlikely(!req->file))
-		return -EBADF;
 
 	req->head = NULL;
 	req->woken = false;
@@ -1788,10 +1771,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb)
 	spin_unlock_irq(&ctx->ctx_lock);
 
 out:
-	if (unlikely(apt.error)) {
-		fput(req->file);
+	if (unlikely(apt.error))
 		return apt.error;
-	}
 
 	if (mask)
 		aio_poll_complete(aiocb, mask);
@@ -1829,6 +1810,11 @@ static int __io_submit_one(struct kioctx *ctx, const struct iocb *iocb,
 	if (unlikely(!req))
 		goto out_put_reqs_available;
 
+	req->ki_filp = fget(iocb->aio_fildes);
+	ret = -EBADF;
+	if (unlikely(!req->ki_filp))
+		goto out_put_req;
+
 	if (iocb->aio_flags & IOCB_FLAG_RESFD) {
 		/*
 		 * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 29d8e2cfed0e..fd423fec8d83 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -304,13 +304,19 @@ enum rw_hint {
 
 struct kiocb {
 	struct file		*ki_filp;
+
+	/* The 'ki_filp' pointer is shared in a union for aio */
+	randomized_struct_fields_start
+
 	loff_t			ki_pos;
 	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
 	void			*private;
 	int			ki_flags;
 	u16			ki_hint;
 	u16			ki_ioprio; /* See linux/ioprio.h */
-} __randomize_layout;
+
+	randomized_struct_fields_end
+};
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
 {
-- 
cgit v1.2.3


From 736706bee3298208343a76096370e4f6a5c55915 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 4 Mar 2019 10:39:05 -0800
Subject: get rid of legacy 'get_ds()' function

Every in-kernel use of this function defined it to KERNEL_DS (either as
an actual define, or as an inline function).  It's an entirely
historical artifact, and long long long ago used to actually read the
segment selector valueof '%ds' on x86.

Which in the kernel is always KERNEL_DS.

Inspired by a patch from Jann Horn that just did this for a very small
subset of users (the ones in fs/), along with Al who suggested a script.
I then just took it to the logical extreme and removed all the remaining
gunk.

Roughly scripted with

   git grep -l '(get_ds())' -- :^tools/ | xargs sed -i 's/(get_ds())/(KERNEL_DS)/'
   git grep -lw 'get_ds' -- :^tools/ | xargs sed -i '/^#define get_ds()/d'

plus manual fixups to remove a few unusual usage patterns, the couple of
inline function cases and to fix up a comment that had become stale.

The 'get_ds()' function remains in an x86 kvm selftest, since in user
space it actually does something relevant.

Inspired-by: Jann Horn <jannh@google.com>
Inspired-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/uaccess.h                 | 1 -
 arch/arm/include/asm/uaccess.h                   | 1 -
 arch/arm/kernel/process.c                        | 2 +-
 arch/arm/xen/hypercall.S                         | 3 +--
 arch/arm64/include/asm/uaccess.h                 | 1 -
 arch/csky/include/asm/segment.h                  | 1 -
 arch/h8300/include/asm/segment.h                 | 6 ------
 arch/ia64/include/asm/uaccess.h                  | 1 -
 arch/m68k/include/asm/segment.h                  | 7 -------
 arch/microblaze/include/asm/uaccess.h            | 1 -
 arch/mips/include/asm/uaccess.h                  | 1 -
 arch/mips/kernel/ftrace.c                        | 6 +++---
 arch/mips/kernel/kgdb.c                          | 4 ++--
 arch/mips/kernel/traps.c                         | 2 +-
 arch/nds32/include/asm/uaccess.h                 | 1 -
 arch/nds32/kernel/process.c                      | 2 +-
 arch/nios2/include/asm/uaccess.h                 | 1 -
 arch/openrisc/include/asm/uaccess.h              | 1 -
 arch/parisc/include/asm/uaccess.h                | 1 -
 arch/powerpc/include/asm/uaccess.h               | 1 -
 arch/riscv/include/asm/uaccess.h                 | 1 -
 arch/s390/include/asm/uaccess.h                  | 1 -
 arch/sh/include/asm/segment.h                    | 1 -
 arch/sparc/include/asm/uaccess_32.h              | 1 -
 arch/sparc/include/asm/uaccess_64.h              | 1 -
 arch/x86/include/asm/uaccess.h                   | 1 -
 arch/xtensa/include/asm/asm-uaccess.h            | 2 --
 arch/xtensa/include/asm/uaccess.h                | 1 -
 drivers/staging/rtl8723bs/os_dep/osdep_service.c | 4 ++--
 fs/read_write.c                                  | 6 +++---
 fs/splice.c                                      | 2 +-
 include/asm-generic/uaccess.h                    | 1 -
 security/integrity/iint.c                        | 2 +-
 33 files changed, 16 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index cf4ac791a592..1fe2b56cb861 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -18,7 +18,6 @@
 #define USER_DS		((mm_segment_t) { -0x40000000000UL })
 
 #define get_fs()  (current_thread_info()->addr_limit)
-#define get_ds()  (KERNEL_DS)
 #define set_fs(x) (current_thread_info()->addr_limit = (x))
 
 #define segment_eq(a, b)	((a).seg == (b).seg)
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 42aa4a22803c..ae5a0df5316e 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -59,7 +59,6 @@ extern int __put_user_bad(void);
  * Note that this is actually 0x1,0000,0000
  */
 #define KERNEL_DS	0x00000000
-#define get_ds()	(KERNEL_DS)
 
 #ifdef CONFIG_MMU
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 16601d1442d1..72cc0862a30e 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -150,7 +150,7 @@ void __show_regs(struct pt_regs *regs)
 		if ((domain & domain_mask(DOMAIN_USER)) ==
 		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
 			segment = "none";
-		else if (fs == get_ds())
+		else if (fs == KERNEL_DS)
 			segment = "kernel";
 		else
 			segment = "user";
diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S
index b0b80c0f09f3..b11bba542fac 100644
--- a/arch/arm/xen/hypercall.S
+++ b/arch/arm/xen/hypercall.S
@@ -113,8 +113,7 @@ ENTRY(privcmd_call)
 
 	/*
 	 * Disable userspace access from kernel. This is fine to do it
-	 * unconditionally as no set_fs(KERNEL_DS)/set_fs(get_ds()) is
-	 * called before.
+	 * unconditionally as no set_fs(KERNEL_DS) is called before.
 	 */
 	uaccess_disable r4
 
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 547d7a0c9d05..f1e5c9165809 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -34,7 +34,6 @@
 #include <asm/memory.h>
 #include <asm/extable.h>
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 
 static inline void set_fs(mm_segment_t fs)
diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h
index ffdc4c47ff43..db2640d5f575 100644
--- a/arch/csky/include/asm/segment.h
+++ b/arch/csky/include/asm/segment.h
@@ -9,7 +9,6 @@ typedef struct {
 } mm_segment_t;
 
 #define KERNEL_DS		((mm_segment_t) { 0xFFFFFFFF })
-#define get_ds()		KERNEL_DS
 
 #define USER_DS			((mm_segment_t) { 0x80000000UL })
 #define get_fs()		(current_thread_info()->addr_limit)
diff --git a/arch/h8300/include/asm/segment.h b/arch/h8300/include/asm/segment.h
index 9adbf7e1aaa3..a407978f9f9f 100644
--- a/arch/h8300/include/asm/segment.h
+++ b/arch/h8300/include/asm/segment.h
@@ -33,12 +33,6 @@ static inline mm_segment_t get_fs(void)
 	return USER_DS;
 }
 
-static inline mm_segment_t get_ds(void)
-{
-	/* return the supervisor data space code */
-	return KERNEL_DS;
-}
-
 #define segment_eq(a, b)	((a).seg == (b).seg)
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 306d469e43da..89782ad3fb88 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -48,7 +48,6 @@
 #define KERNEL_DS	((mm_segment_t) { ~0UL })		/* cf. access_ok() */
 #define USER_DS		((mm_segment_t) { TASK_SIZE-1 })	/* cf. access_ok() */
 
-#define get_ds()  (KERNEL_DS)
 #define get_fs()  (current_thread_info()->addr_limit)
 #define set_fs(x) (current_thread_info()->addr_limit = (x))
 
diff --git a/arch/m68k/include/asm/segment.h b/arch/m68k/include/asm/segment.h
index 0b4cc1e079b5..c6686559e9b7 100644
--- a/arch/m68k/include/asm/segment.h
+++ b/arch/m68k/include/asm/segment.h
@@ -45,16 +45,9 @@ static inline void set_fs(mm_segment_t val)
 			      : /* no outputs */ : "r" (val.seg) : "memory");
 }
 
-static inline mm_segment_t get_ds(void)
-{
-    /* return the supervisor data space code */
-    return KERNEL_DS;
-}
-
 #else
 #define USER_DS		MAKE_MM_SEG(TASK_SIZE)
 #define KERNEL_DS	MAKE_MM_SEG(0xFFFFFFFF)
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
 #endif
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index dbfea093a7c7..bff2a71c828a 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -42,7 +42,6 @@
 #  define USER_DS	MAKE_MM_SEG(TASK_SIZE - 1)
 #  endif
 
-# define get_ds()	(KERNEL_DS)
 # define get_fs()	(current_thread_info()->addr_limit)
 # define set_fs(val)	(current_thread_info()->addr_limit = (val))
 
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index d43c1dc6ef15..62b298c50905 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -69,7 +69,6 @@ extern u64 __ua_limit;
 #define USER_DS		((mm_segment_t) { __UA_LIMIT })
 #endif
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
 
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 2ea0ec95efe9..4b5e1f2bfbce 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -86,7 +86,7 @@ static int ftrace_modify_code(unsigned long ip, unsigned int new_code)
 		return -EFAULT;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	flush_icache_range(ip, ip + 8);
 	set_fs(old_fs);
 
@@ -111,7 +111,7 @@ static int ftrace_modify_code_2(unsigned long ip, unsigned int new_code1,
 
 	ip -= 4;
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	flush_icache_range(ip, ip + 8);
 	set_fs(old_fs);
 
@@ -135,7 +135,7 @@ static int ftrace_modify_code_2r(unsigned long ip, unsigned int new_code1,
 		return -EFAULT;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	flush_icache_range(ip, ip + 8);
 	set_fs(old_fs);
 
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 149100e1bc7c..6e574c02e4c3 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -212,7 +212,7 @@ void kgdb_call_nmi_hook(void *ignored)
 	mm_segment_t old_fs;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 
 	kgdb_nmicallback(raw_smp_processor_id(), NULL);
 
@@ -318,7 +318,7 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd,
 
 	/* Kernel mode. Set correct address limit */
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 
 	if (atomic_read(&kgdb_active) != -1)
 		kgdb_nmicallback(smp_processor_id(), regs);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index c91097f7b32f..cbab46004e99 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -1077,7 +1077,7 @@ asmlinkage void do_tr(struct pt_regs *regs)
 
 	seg = get_fs();
 	if (!user_mode(regs))
-		set_fs(get_ds());
+		set_fs(KERNEL_DS);
 
 	prev_state = exception_enter();
 	current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f;
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 53dcb49b0b12..116598b47c4d 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -37,7 +37,6 @@ extern int fixup_exception(struct pt_regs *regs);
 #define KERNEL_DS 	((mm_segment_t) { ~0UL })
 #define USER_DS		((mm_segment_t) {TASK_SIZE - 1})
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 #define user_addr_max	get_fs
 
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index ab7ab46234b1..9712fd474f2c 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs)
 		regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
 	pr_info("  IRQs o%s  Segment %s\n",
 		interrupts_enabled(regs) ? "n" : "ff",
-		segment_eq(get_fs(), get_ds())? "kernel" : "user");
+		segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user");
 }
 
 EXPORT_SYMBOL(show_regs);
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index e0ea10806491..e83f831a76f9 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -26,7 +26,6 @@
 #define USER_DS			MAKE_MM_SEG(0x80000000UL)
 #define KERNEL_DS		MAKE_MM_SEG(0)
 
-#define get_ds()		(KERNEL_DS)
 
 #define get_fs()		(current_thread_info()->addr_limit)
 #define set_fs(seg)		(current_thread_info()->addr_limit = (seg))
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index a44682c8adc3..45afd9ab78c1 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -42,7 +42,6 @@
  */
 
 #define KERNEL_DS	(~0UL)
-#define get_ds()	(KERNEL_DS)
 
 #define USER_DS		(TASK_SIZE)
 #define get_fs()	(current_thread_info()->addr_limit)
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 30ac2865ea73..ebbb9ffe038c 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -16,7 +16,6 @@
 
 #define segment_eq(a, b) ((a).seg == (b).seg)
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
 
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index e3a731793ea2..4d6d905e9138 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -28,7 +28,6 @@
 #define USER_DS		MAKE_MM_SEG(TASK_SIZE - 1)
 #endif
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.addr_limit)
 
 static inline void set_fs(mm_segment_t fs)
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index 637b896894fc..a00168b980d2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -41,7 +41,6 @@
 #define KERNEL_DS	(~0UL)
 #define USER_DS		(TASK_SIZE)
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 
 static inline void set_fs(mm_segment_t fs)
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index bd2545977ad3..007fcb9aeeb8 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -31,7 +31,6 @@
 #define USER_DS		(2)
 #define USER_DS_SACF	(3)
 
-#define get_ds()        (KERNEL_DS)
 #define get_fs()        (current->thread.mm_segment)
 #define segment_eq(a,b) (((a) & 2) == ((b) & 2))
 
diff --git a/arch/sh/include/asm/segment.h b/arch/sh/include/asm/segment.h
index 101c13c0c6ad..33d1d28057cb 100644
--- a/arch/sh/include/asm/segment.h
+++ b/arch/sh/include/asm/segment.h
@@ -26,7 +26,6 @@ typedef struct {
 
 #define segment_eq(a, b) ((a).seg == (b).seg)
 
-#define get_ds()	(KERNEL_DS)
 
 #define get_fs()	(current_thread_info()->addr_limit)
 #define set_fs(x)	(current_thread_info()->addr_limit = (x))
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 5153798051fb..d6d8413eca83 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -25,7 +25,6 @@
 #define KERNEL_DS   ((mm_segment_t) { 0 })
 #define USER_DS     ((mm_segment_t) { -1 })
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.current_ds)
 #define set_fs(val)	((current->thread.current_ds) = (val))
 
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 87ae9ffb1521..bf9d330073b2 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -31,7 +31,6 @@
 #define USER_DS     ((mm_segment_t) { ASI_AIUS })	/* har har har */
 
 #define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
-#define get_ds() (KERNEL_DS)
 
 #define segment_eq(a, b)  ((a).seg == (b).seg)
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c1334aaaa78d..5e49a0acb5ee 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -25,7 +25,6 @@
 #define KERNEL_DS	MAKE_MM_SEG(-1UL)
 #define USER_DS 	MAKE_MM_SEG(TASK_SIZE_MAX)
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.addr_limit)
 static inline void set_fs(mm_segment_t fs)
 {
diff --git a/arch/xtensa/include/asm/asm-uaccess.h b/arch/xtensa/include/asm/asm-uaccess.h
index dfdf9fae1f84..7f6cf4151843 100644
--- a/arch/xtensa/include/asm/asm-uaccess.h
+++ b/arch/xtensa/include/asm/asm-uaccess.h
@@ -32,8 +32,6 @@
 #define KERNEL_DS	0
 #define USER_DS		1
 
-#define get_ds		(KERNEL_DS)
-
 /*
  * get_fs reads current->thread.current_ds into a register.
  * On Entry:
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 4b2480304bc3..6792928ba84a 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -32,7 +32,6 @@
 #define KERNEL_DS	((mm_segment_t) { 0 })
 #define USER_DS		((mm_segment_t) { 1 })
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current->thread.current_ds)
 #define set_fs(val)	(current->thread.current_ds = (val))
 
diff --git a/drivers/staging/rtl8723bs/os_dep/osdep_service.c b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
index e14d7cc411c9..73b87da15eb2 100644
--- a/drivers/staging/rtl8723bs/os_dep/osdep_service.c
+++ b/drivers/staging/rtl8723bs/os_dep/osdep_service.c
@@ -137,7 +137,7 @@ static int isFileReadable(char *path)
 		ret = PTR_ERR(fp);
 	}
 	else {
-		oldfs = get_fs(); set_fs(get_ds());
+		oldfs = get_fs(); set_fs(KERNEL_DS);
 
 		if (1!=readFile(fp, &buf, 1))
 			ret = -EINVAL;
@@ -165,7 +165,7 @@ static int retriveFromFile(char *path, u8 *buf, u32 sz)
 		if (0 == (ret =openFile(&fp, path, O_RDONLY, 0))) {
 			DBG_871X("%s openFile path:%s fp =%p\n", __func__, path , fp);
 
-			oldfs = get_fs(); set_fs(get_ds());
+			oldfs = get_fs(); set_fs(KERNEL_DS);
 			ret =readFile(fp, buf, sz);
 			set_fs(oldfs);
 			closeFile(fp);
diff --git a/fs/read_write.c b/fs/read_write.c
index ff3c5e6f87cf..30df848b7451 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -426,7 +426,7 @@ ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
 	ssize_t result;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	/* The cast to a user pointer is valid due to the set_fs() */
 	result = vfs_read(file, (void __user *)buf, count, pos);
 	set_fs(old_fs);
@@ -499,7 +499,7 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
 		return -EINVAL;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	p = (__force const char __user *)buf;
 	if (count > MAX_RW_COUNT)
 		count =  MAX_RW_COUNT;
@@ -521,7 +521,7 @@ ssize_t kernel_write(struct file *file, const void *buf, size_t count,
 	ssize_t res;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	/* The cast to a user pointer is valid due to the set_fs() */
 	res = vfs_write(file, (__force const char __user *)buf, count, pos);
 	set_fs(old_fs);
diff --git a/fs/splice.c b/fs/splice.c
index de2ede048473..ec8b54b5c0d2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -357,7 +357,7 @@ static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
 	ssize_t res;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	/* The cast to a user pointer is valid due to the set_fs() */
 	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
 	set_fs(old_fs);
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index d82c78a79da5..b3d2241e03f8 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -22,7 +22,6 @@
 #endif
 
 #ifndef get_fs
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 
 static inline void set_fs(mm_segment_t fs)
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index 88f04b3380d4..423876fca8b4 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -200,7 +200,7 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 		return -EBADF;
 
 	old_fs = get_fs();
-	set_fs(get_ds());
+	set_fs(KERNEL_DS);
 	ret = __vfs_read(file, buf, count, &offset);
 	set_fs(old_fs);
 
-- 
cgit v1.2.3


From 3167b27a7ded84623f50e4fb4cab846e75c4a210 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 3 Mar 2019 10:57:30 +0200
Subject: devlink: Add support for direct reporter health state update

It is possible that a reporter state will be updated due to a recover flow
which is not triggered by a devlink health related operation, but as a side
effect of some other operation in the system.

Expose devlink health API for a direct update of a reporter status.

Move devlink_health_reporter_state enum definition to devlink.h so it could
be used from drivers as a parameter of devlink_health_reporter_state_update.

In addition, add trace_devlink_health_reporter_state_update to provide user
notification for reporter state change.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h          | 14 ++++++++++++++
 include/trace/events/devlink.h | 31 +++++++++++++++++++++++++++++++
 net/core/devlink.c             | 22 +++++++++++++++++-----
 3 files changed, 62 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7f5a0bdca228..63de99e09f04 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -447,6 +447,11 @@ typedef void devlink_snapshot_data_dest_t(const void *data);
 struct devlink_fmsg;
 struct devlink_health_reporter;
 
+enum devlink_health_reporter_state {
+	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
+	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
+};
+
 /**
  * struct devlink_health_reporter_ops - Reporter operations
  * @name: reporter name
@@ -715,6 +720,9 @@ void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter);
 int devlink_health_report(struct devlink_health_reporter *reporter,
 			  const char *msg, void *priv_ctx);
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+				     enum devlink_health_reporter_state state);
 
 void devlink_compat_running_version(struct net_device *dev,
 				    char *buf, size_t len);
@@ -1204,6 +1212,12 @@ devlink_health_report(struct devlink_health_reporter *reporter,
 	return 0;
 }
 
+static inline void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+				     enum devlink_health_reporter_state state)
+{
+}
+
 static inline void
 devlink_compat_running_version(struct net_device *dev, char *buf, size_t len)
 {
diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h
index 191ddf67d769..6f60a78d9a7e 100644
--- a/include/trace/events/devlink.h
+++ b/include/trace/events/devlink.h
@@ -140,6 +140,37 @@ TRACE_EVENT(devlink_health_recover_aborted,
 		  __entry->time_since_last_recover)
 );
 
+/*
+ * Tracepoint for devlink health reporter state update:
+ */
+TRACE_EVENT(devlink_health_reporter_state_update,
+	TP_PROTO(const struct devlink *devlink, const char *reporter_name,
+		 bool new_state),
+
+	TP_ARGS(devlink, reporter_name, new_state),
+
+	TP_STRUCT__entry(
+		__string(bus_name, devlink->dev->bus->name)
+		__string(dev_name, dev_name(devlink->dev))
+		__string(driver_name, devlink->dev->driver->name)
+		__string(reporter_name, reporter_name)
+		__field(u8, new_state)
+	),
+
+	TP_fast_assign(
+		__assign_str(bus_name, devlink->dev->bus->name);
+		__assign_str(dev_name, dev_name(devlink->dev));
+		__assign_str(driver_name, devlink->dev->driver->name);
+		__assign_str(reporter_name, reporter_name);
+		__entry->new_state = new_state;
+	),
+
+	TP_printk("bus_name=%s dev_name=%s driver_name=%s reporter_name=%s: new_state=%d",
+		  __get_str(bus_name), __get_str(dev_name),
+		  __get_str(driver_name), __get_str(reporter_name),
+		  __entry->new_state)
+);
+
 #endif /* _TRACE_DEVLINK_H */
 
 /* This part must be outside protection */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 376e01a70c6d..78e22cea4cc7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4409,11 +4409,6 @@ struct devlink_health_reporter {
 	u64 last_recovery_ts;
 };
 
-enum devlink_health_reporter_state {
-	DEVLINK_HEALTH_REPORTER_STATE_HEALTHY,
-	DEVLINK_HEALTH_REPORTER_STATE_ERROR,
-};
-
 void *
 devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
 {
@@ -4498,6 +4493,23 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
 }
 EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
 
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+				     enum devlink_health_reporter_state state)
+{
+	if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+		    state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+		return;
+
+	if (reporter->health_state == state)
+		return;
+
+	reporter->health_state = state;
+	trace_devlink_health_reporter_state_update(reporter->devlink,
+						   reporter->ops->name, state);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
 static int
 devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
 				void *priv_ctx)
-- 
cgit v1.2.3


From 3eb39f47934f9d5a3027fe00d906a45fe3a15fad Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian@brauner.io>
Date: Mon, 19 Nov 2018 00:51:56 +0100
Subject: signal: add pidfd_send_signal() syscall

The kill() syscall operates on process identifiers (pid). After a process
has exited its pid can be reused by another process. If a caller sends a
signal to a reused pid it will end up signaling the wrong process. This
issue has often surfaced and there has been a push to address this problem [1].

This patch uses file descriptors (fd) from proc/<pid> as stable handles on
struct pid. Even if a pid is recycled the handle will not change. The fd
can be used to send signals to the process it refers to.
Thus, the new syscall pidfd_send_signal() is introduced to solve this
problem. Instead of pids it operates on process fds (pidfd).

/* prototype and argument /*
long pidfd_send_signal(int pidfd, int sig, siginfo_t *info, unsigned int flags);

/* syscall number 424 */
The syscall number was chosen to be 424 to align with Arnd's rework in his
y2038 to minimize merge conflicts (cf. [25]).

In addition to the pidfd and signal argument it takes an additional
siginfo_t and flags argument. If the siginfo_t argument is NULL then
pidfd_send_signal() is equivalent to kill(<positive-pid>, <signal>). If it
is not NULL pidfd_send_signal() is equivalent to rt_sigqueueinfo().
The flags argument is added to allow for future extensions of this syscall.
It currently needs to be passed as 0. Failing to do so will cause EINVAL.

/* pidfd_send_signal() replaces multiple pid-based syscalls */
The pidfd_send_signal() syscall currently takes on the job of
rt_sigqueueinfo(2) and parts of the functionality of kill(2), Namely, when a
positive pid is passed to kill(2). It will however be possible to also
replace tgkill(2) and rt_tgsigqueueinfo(2) if this syscall is extended.

/* sending signals to threads (tid) and process groups (pgid) */
Specifically, the pidfd_send_signal() syscall does currently not operate on
process groups or threads. This is left for future extensions.
In order to extend the syscall to allow sending signal to threads and
process groups appropriately named flags (e.g. PIDFD_TYPE_PGID, and
PIDFD_TYPE_TID) should be added. This implies that the flags argument will
determine what is signaled and not the file descriptor itself. Put in other
words, grouping in this api is a property of the flags argument not a
property of the file descriptor (cf. [13]). Clarification for this has been
requested by Eric (cf. [19]).
When appropriate extensions through the flags argument are added then
pidfd_send_signal() can additionally replace the part of kill(2) which
operates on process groups as well as the tgkill(2) and
rt_tgsigqueueinfo(2) syscalls.
How such an extension could be implemented has been very roughly sketched
in [14], [15], and [16]. However, this should not be taken as a commitment
to a particular implementation. There might be better ways to do it.
Right now this is intentionally left out to keep this patchset as simple as
possible (cf. [4]).

/* naming */
The syscall had various names throughout iterations of this patchset:
- procfd_signal()
- procfd_send_signal()
- taskfd_send_signal()
In the last round of reviews it was pointed out that given that if the
flags argument decides the scope of the signal instead of different types
of fds it might make sense to either settle for "procfd_" or "pidfd_" as
prefix. The community was willing to accept either (cf. [17] and [18]).
Given that one developer expressed strong preference for the "pidfd_"
prefix (cf. [13]) and with other developers less opinionated about the name
we should settle for "pidfd_" to avoid further bikeshedding.

The  "_send_signal" suffix was chosen to reflect the fact that the syscall
takes on the job of multiple syscalls. It is therefore intentional that the
name is not reminiscent of neither kill(2) nor rt_sigqueueinfo(2). Not the
fomer because it might imply that pidfd_send_signal() is a replacement for
kill(2), and not the latter because it is a hassle to remember the correct
spelling - especially for non-native speakers - and because it is not
descriptive enough of what the syscall actually does. The name
"pidfd_send_signal" makes it very clear that its job is to send signals.

/* zombies */
Zombies can be signaled just as any other process. No special error will be
reported since a zombie state is an unreliable state (cf. [3]). However,
this can be added as an extension through the @flags argument if the need
ever arises.

/* cross-namespace signals */
The patch currently enforces that the signaler and signalee either are in
the same pid namespace or that the signaler's pid namespace is an ancestor
of the signalee's pid namespace. This is done for the sake of simplicity
and because it is unclear to what values certain members of struct
siginfo_t would need to be set to (cf. [5], [6]).

/* compat syscalls */
It became clear that we would like to avoid adding compat syscalls
(cf. [7]).  The compat syscall handling is now done in kernel/signal.c
itself by adding __copy_siginfo_from_user_generic() which lets us avoid
compat syscalls (cf. [8]). It should be noted that the addition of
__copy_siginfo_from_user_any() is caused by a bug in the original
implementation of rt_sigqueueinfo(2) (cf. 12).
With upcoming rework for syscall handling things might improve
significantly (cf. [11]) and __copy_siginfo_from_user_any() will not gain
any additional callers.

/* testing */
This patch was tested on x64 and x86.

/* userspace usage */
An asciinema recording for the basic functionality can be found under [9].
With this patch a process can be killed via:

 #define _GNU_SOURCE
 #include <errno.h>
 #include <fcntl.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>

 static inline int do_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
                                         unsigned int flags)
 {
 #ifdef __NR_pidfd_send_signal
         return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
 #else
         return -ENOSYS;
 #endif
 }

 int main(int argc, char *argv[])
 {
         int fd, ret, saved_errno, sig;

         if (argc < 3)
                 exit(EXIT_FAILURE);

         fd = open(argv[1], O_DIRECTORY | O_CLOEXEC);
         if (fd < 0) {
                 printf("%s - Failed to open \"%s\"\n", strerror(errno), argv[1]);
                 exit(EXIT_FAILURE);
         }

         sig = atoi(argv[2]);

         printf("Sending signal %d to process %s\n", sig, argv[1]);
         ret = do_pidfd_send_signal(fd, sig, NULL, 0);

         saved_errno = errno;
         close(fd);
         errno = saved_errno;

         if (ret < 0) {
                 printf("%s - Failed to send signal %d to process %s\n",
                        strerror(errno), sig, argv[1]);
                 exit(EXIT_FAILURE);
         }

         exit(EXIT_SUCCESS);
 }

/* Q&A
 * Given that it seems the same questions get asked again by people who are
 * late to the party it makes sense to add a Q&A section to the commit
 * message so it's hopefully easier to avoid duplicate threads.
 *
 * For the sake of progress please consider these arguments settled unless
 * there is a new point that desperately needs to be addressed. Please make
 * sure to check the links to the threads in this commit message whether
 * this has not already been covered.
 */
Q-01: (Florian Weimer [20], Andrew Morton [21])
      What happens when the target process has exited?
A-01: Sending the signal will fail with ESRCH (cf. [22]).

Q-02:  (Andrew Morton [21])
       Is the task_struct pinned by the fd?
A-02:  No. A reference to struct pid is kept. struct pid - as far as I
       understand - was created exactly for the reason to not require to
       pin struct task_struct (cf. [22]).

Q-03: (Andrew Morton [21])
      Does the entire procfs directory remain visible? Just one entry
      within it?
A-03: The same thing that happens right now when you hold a file descriptor
      to /proc/<pid> open (cf. [22]).

Q-04: (Andrew Morton [21])
      Does the pid remain reserved?
A-04: No. This patchset guarantees a stable handle not that pids are not
      recycled (cf. [22]).

Q-05: (Andrew Morton [21])
      Do attempts to signal that fd return errors?
A-05: See {Q,A}-01.

Q-06: (Andrew Morton [22])
      Is there a cleaner way of obtaining the fd? Another syscall perhaps.
A-06: Userspace can already trivially retrieve file descriptors from procfs
      so this is something that we will need to support anyway. Hence,
      there's no immediate need to add another syscalls just to make
      pidfd_send_signal() not dependent on the presence of procfs. However,
      adding a syscalls to get such file descriptors is planned for a
      future patchset (cf. [22]).

Q-07: (Andrew Morton [21] and others)
      This fd-for-a-process sounds like a handy thing and people may well
      think up other uses for it in the future, probably unrelated to
      signals. Are the code and the interface designed to permit such
      future applications?
A-07: Yes (cf. [22]).

Q-08: (Andrew Morton [21] and others)
      Now I think about it, why a new syscall? This thing is looking
      rather like an ioctl?
A-08: This has been extensively discussed. It was agreed that a syscall is
      preferred for a variety or reasons. Here are just a few taken from
      prior threads. Syscalls are safer than ioctl()s especially when
      signaling to fds. Processes are a core kernel concept so a syscall
      seems more appropriate. The layout of the syscall with its four
      arguments would require the addition of a custom struct for the
      ioctl() thereby causing at least the same amount or even more
      complexity for userspace than a simple syscall. The new syscall will
      replace multiple other pid-based syscalls (see description above).
      The file-descriptors-for-processes concept introduced with this
      syscall will be extended with other syscalls in the future. See also
      [22], [23] and various other threads already linked in here.

Q-09: (Florian Weimer [24])
      What happens if you use the new interface with an O_PATH descriptor?
A-09:
      pidfds opened as O_PATH fds cannot be used to send signals to a
      process (cf. [2]). Signaling processes through pidfds is the
      equivalent of writing to a file. Thus, this is not an operation that
      operates "purely at the file descriptor level" as required by the
      open(2) manpage. See also [4].

/* References */
[1]:  https://lore.kernel.org/lkml/20181029221037.87724-1-dancol@google.com/
[2]:  https://lore.kernel.org/lkml/874lbtjvtd.fsf@oldenburg2.str.redhat.com/
[3]:  https://lore.kernel.org/lkml/20181204132604.aspfupwjgjx6fhva@brauner.io/
[4]:  https://lore.kernel.org/lkml/20181203180224.fkvw4kajtbvru2ku@brauner.io/
[5]:  https://lore.kernel.org/lkml/20181121213946.GA10795@mail.hallyn.com/
[6]:  https://lore.kernel.org/lkml/20181120103111.etlqp7zop34v6nv4@brauner.io/
[7]:  https://lore.kernel.org/lkml/36323361-90BD-41AF-AB5B-EE0D7BA02C21@amacapital.net/
[8]:  https://lore.kernel.org/lkml/87tvjxp8pc.fsf@xmission.com/
[9]:  https://asciinema.org/a/IQjuCHew6bnq1cr78yuMv16cy
[11]: https://lore.kernel.org/lkml/F53D6D38-3521-4C20-9034-5AF447DF62FF@amacapital.net/
[12]: https://lore.kernel.org/lkml/87zhtjn8ck.fsf@xmission.com/
[13]: https://lore.kernel.org/lkml/871s6u9z6u.fsf@xmission.com/
[14]: https://lore.kernel.org/lkml/20181206231742.xxi4ghn24z4h2qki@brauner.io/
[15]: https://lore.kernel.org/lkml/20181207003124.GA11160@mail.hallyn.com/
[16]: https://lore.kernel.org/lkml/20181207015423.4miorx43l3qhppfz@brauner.io/
[17]: https://lore.kernel.org/lkml/CAGXu5jL8PciZAXvOvCeCU3wKUEB_dU-O3q0tDw4uB_ojMvDEew@mail.gmail.com/
[18]: https://lore.kernel.org/lkml/20181206222746.GB9224@mail.hallyn.com/
[19]: https://lore.kernel.org/lkml/20181208054059.19813-1-christian@brauner.io/
[20]: https://lore.kernel.org/lkml/8736rebl9s.fsf@oldenburg.str.redhat.com/
[21]: https://lore.kernel.org/lkml/20181228152012.dbf0508c2508138efc5f2bbe@linux-foundation.org/
[22]: https://lore.kernel.org/lkml/20181228233725.722tdfgijxcssg76@brauner.io/
[23]: https://lwn.net/Articles/773459/
[24]: https://lore.kernel.org/lkml/8736rebl9s.fsf@oldenburg.str.redhat.com/
[25]: https://lore.kernel.org/lkml/CAK8P3a0ej9NcJM8wXNPbcGUyOUZYX+VLoDFdbenW3s3114oQZw@mail.gmail.com/

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Jann Horn <jannh@google.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Florian Weimer <fweimer@redhat.com>
Signed-off-by: Christian Brauner <christian@brauner.io>
Reviewed-by: Tycho Andersen <tycho@tycho.ws>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Serge Hallyn <serge@hallyn.com>
Acked-by: Aleksa Sarai <cyphar@cyphar.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 fs/proc/base.c                         |   9 +++
 include/linux/proc_fs.h                |   6 ++
 include/linux/syscalls.h               |   3 +
 include/uapi/asm-generic/unistd.h      |   4 +-
 kernel/signal.c                        | 133 +++++++++++++++++++++++++++++++--
 kernel/sys_ni.c                        |   1 +
 8 files changed, 151 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3cf7b533b3d1..234d91df8ca6 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,3 +398,4 @@
 384	i386	arch_prctl		sys_arch_prctl			__ia32_compat_sys_arch_prctl
 385	i386	io_pgetevents		sys_io_pgetevents		__ia32_compat_sys_io_pgetevents
 386	i386	rseq			sys_rseq			__ia32_sys_rseq
+424	i386	pidfd_send_signal	sys_pidfd_send_signal		__ia32_sys_pidfd_send_signal
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index f0b1709a5ffb..58f4b3ad4fe0 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,7 @@
 332	common	statx			__x64_sys_statx
 333	common	io_pgetevents		__x64_sys_io_pgetevents
 334	common	rseq			__x64_sys_rseq
+424	common	pidfd_send_signal	__x64_sys_pidfd_send_signal
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 633a63462573..b6627c471078 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3046,6 +3046,15 @@ static const struct file_operations proc_tgid_base_operations = {
 	.llseek		= generic_file_llseek,
 };
 
+struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+	if (!d_is_dir(file->f_path.dentry) ||
+	    (file->f_op != &proc_tgid_base_operations))
+		return ERR_PTR(-EBADF);
+
+	return proc_pid(file_inode(file));
+}
+
 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
 	return proc_pident_lookup(dir, dentry,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index d0e1f1522a78..52a283ba0465 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -73,6 +73,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
 						    int (*show)(struct seq_file *, void *),
 						    proc_write_t write,
 						    void *data);
+extern struct pid *tgid_pidfd_to_pid(const struct file *file);
 
 #else /* CONFIG_PROC_FS */
 
@@ -114,6 +115,11 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
 #define proc_create_net(name, mode, parent, state_size, ops) ({NULL;})
 #define proc_create_net_single(name, mode, parent, show, data) ({NULL;})
 
+static inline struct pid *tgid_pidfd_to_pid(const struct file *file)
+{
+	return ERR_PTR(-EBADF);
+}
+
 #endif /* CONFIG_PROC_FS */
 
 struct net;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 257cccba3062..5eb2e351675e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -926,6 +926,9 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags,
 			  unsigned mask, struct statx __user *buffer);
 asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len,
 			 int flags, uint32_t sig);
+asmlinkage long sys_pidfd_send_signal(int pidfd, int sig,
+				       siginfo_t __user *info,
+				       unsigned int flags);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index d90127298f12..c861e7d1053b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -740,9 +740,11 @@ __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
 __SYSCALL(__NR_rseq, sys_rseq)
 #define __NR_kexec_file_load 294
 __SYSCALL(__NR_kexec_file_load,     sys_kexec_file_load)
+#define __NR_pidfd_send_signal 424
+__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal)
 
 #undef __NR_syscalls
-#define __NR_syscalls 295
+#define __NR_syscalls 425
 
 /*
  * 32 bit systems traditionally used different
diff --git a/kernel/signal.c b/kernel/signal.c
index e1d7ad8e6ab1..268bed80244f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -19,7 +19,9 @@
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
 #include <linux/sched/cputime.h>
+#include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/proc_fs.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
 #include <linux/coredump.h>
@@ -3429,6 +3431,16 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 #endif
 #endif
 
+static inline void prepare_kill_siginfo(int sig, struct kernel_siginfo *info)
+{
+	clear_siginfo(info);
+	info->si_signo = sig;
+	info->si_errno = 0;
+	info->si_code = SI_USER;
+	info->si_pid = task_tgid_vnr(current);
+	info->si_uid = from_kuid_munged(current_user_ns(), current_uid());
+}
+
 /**
  *  sys_kill - send a signal to a process
  *  @pid: the PID of the process
@@ -3438,16 +3450,125 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 {
 	struct kernel_siginfo info;
 
-	clear_siginfo(&info);
-	info.si_signo = sig;
-	info.si_errno = 0;
-	info.si_code = SI_USER;
-	info.si_pid = task_tgid_vnr(current);
-	info.si_uid = from_kuid_munged(current_user_ns(), current_uid());
+	prepare_kill_siginfo(sig, &info);
 
 	return kill_something_info(sig, &info, pid);
 }
 
+#ifdef CONFIG_PROC_FS
+/*
+ * Verify that the signaler and signalee either are in the same pid namespace
+ * or that the signaler's pid namespace is an ancestor of the signalee's pid
+ * namespace.
+ */
+static bool access_pidfd_pidns(struct pid *pid)
+{
+	struct pid_namespace *active = task_active_pid_ns(current);
+	struct pid_namespace *p = ns_of_pid(pid);
+
+	for (;;) {
+		if (!p)
+			return false;
+		if (p == active)
+			break;
+		p = p->parent;
+	}
+
+	return true;
+}
+
+static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
+{
+#ifdef CONFIG_COMPAT
+	/*
+	 * Avoid hooking up compat syscalls and instead handle necessary
+	 * conversions here. Note, this is a stop-gap measure and should not be
+	 * considered a generic solution.
+	 */
+	if (in_compat_syscall())
+		return copy_siginfo_from_user32(
+			kinfo, (struct compat_siginfo __user *)info);
+#endif
+	return copy_siginfo_from_user(kinfo, info);
+}
+
+/**
+ * sys_pidfd_send_signal - send a signal to a process through a task file
+ *                          descriptor
+ * @pidfd:  the file descriptor of the process
+ * @sig:    signal to be sent
+ * @info:   the signal info
+ * @flags:  future flags to be passed
+ *
+ * The syscall currently only signals via PIDTYPE_PID which covers
+ * kill(<positive-pid>, <signal>. It does not signal threads or process
+ * groups.
+ * In order to extend the syscall to threads and process groups the @flags
+ * argument should be used. In essence, the @flags argument will determine
+ * what is signaled and not the file descriptor itself. Put in other words,
+ * grouping is a property of the flags argument not a property of the file
+ * descriptor.
+ *
+ * Return: 0 on success, negative errno on failure
+ */
+SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
+		siginfo_t __user *, info, unsigned int, flags)
+{
+	int ret;
+	struct fd f;
+	struct pid *pid;
+	kernel_siginfo_t kinfo;
+
+	/* Enforce flags be set to 0 until we add an extension. */
+	if (flags)
+		return -EINVAL;
+
+	f = fdget_raw(pidfd);
+	if (!f.file)
+		return -EBADF;
+
+	/* Is this a pidfd? */
+	pid = tgid_pidfd_to_pid(f.file);
+	if (IS_ERR(pid)) {
+		ret = PTR_ERR(pid);
+		goto err;
+	}
+
+	ret = -EINVAL;
+	if (!access_pidfd_pidns(pid))
+		goto err;
+
+	if (info) {
+		ret = copy_siginfo_from_user_any(&kinfo, info);
+		if (unlikely(ret))
+			goto err;
+
+		ret = -EINVAL;
+		if (unlikely(sig != kinfo.si_signo))
+			goto err;
+
+		if ((task_pid(current) != pid) &&
+		    (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) {
+			/* Only allow sending arbitrary signals to yourself. */
+			ret = -EPERM;
+			if (kinfo.si_code != SI_USER)
+				goto err;
+
+			/* Turn this into a regular kill signal. */
+			prepare_kill_siginfo(sig, &kinfo);
+		}
+	} else {
+		prepare_kill_siginfo(sig, &kinfo);
+	}
+
+	ret = kill_pid_info(sig, &kinfo, pid);
+
+err:
+	fdput(f);
+	return ret;
+}
+#endif /* CONFIG_PROC_FS */
+
 static int
 do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
 {
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index ab9d0e3c6d50..f905f4f9f677 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -163,6 +163,7 @@ COND_SYSCALL(syslog);
 /* kernel/sched/core.c */
 
 /* kernel/signal.c */
+COND_SYSCALL(pidfd_send_signal);
 
 /* kernel/sys.c */
 COND_SYSCALL(setregid);
-- 
cgit v1.2.3


From c8b502422bfe04422261cb2861977a5cd31cc1da Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 5 Mar 2019 16:26:43 +0000
Subject: drm/i915: Remove last traces of exec-id (GEM_BUSY)

As we allow per-context engine allows the legacy concept of
I915_EXEC_RING no longer applies universally. We are still exposing the
unrelated exec-id in GEM_BUSY, so transition this ioctl (once more
slightly changing its ABI, but no one cares) over to only reporting the
uabi-class (not instance as we can not foreseeably fit those into the
small bitmask).

The only user of the extended ring information from GEM_BUSY is ddx/sna,
which tries to use the non-rcs business information to guide which
engine to use for subsequent operations on foreign bo. All that matters
for it is the decision between rcs and !rcs, so it is unaffected by the
change in higher bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190305162643.20243-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem.c         | 32 +++++++++++++++++---------------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 10 ----------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 include/uapi/drm/i915_drm.h             | 32 +++++++++++++++++---------------
 4 files changed, 34 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a1ad5e137a97..69413f99ed04 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3825,20 +3825,17 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 static __always_inline unsigned int __busy_read_flag(unsigned int id)
 {
-	/* Note that we could alias engines in the execbuf API, but
-	 * that would be very unwise as it prevents userspace from
-	 * fine control over engine selection. Ahem.
-	 *
-	 * This should be something like EXEC_MAX_ENGINE instead of
-	 * I915_NUM_ENGINES.
-	 */
-	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000;
+
+	GEM_BUG_ON(id >= 16);
 	return 0x10000 << id;
 }
 
 static __always_inline unsigned int __busy_write_id(unsigned int id)
 {
-	/* The uABI guarantees an active writer is also amongst the read
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
 	 * engines. This would be true if we accessed the activity tracking
 	 * under the lock, but as we perform the lookup of the object and
 	 * its activity locklessly we can not guarantee that the last_write
@@ -3846,16 +3843,20 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
 	 * last_read - hence we always set both read and write busy for
 	 * last_write.
 	 */
-	return id | __busy_read_flag(id);
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffffffff;
+
+	return (id + 1) | __busy_read_flag(id);
 }
 
 static __always_inline unsigned int
 __busy_set_if_active(const struct dma_fence *fence,
 		     unsigned int (*flag)(unsigned int id))
 {
-	struct i915_request *rq;
+	const struct i915_request *rq;
 
-	/* We have to check the current hw status of the fence as the uABI
+	/*
+	 * We have to check the current hw status of the fence as the uABI
 	 * guarantees forward progress. We could rely on the idle worker
 	 * to eventually flush us, but to minimise latency just ask the
 	 * hardware.
@@ -3866,11 +3867,11 @@ __busy_set_if_active(const struct dma_fence *fence,
 		return 0;
 
 	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, struct i915_request, fence);
+	rq = container_of(fence, const struct i915_request, fence);
 	if (i915_request_completed(rq))
 		return 0;
 
-	return flag(rq->engine->uabi_id);
+	return flag(rq->engine->uabi_class);
 }
 
 static __always_inline unsigned int
@@ -3904,7 +3905,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	if (!obj)
 		goto out;
 
-	/* A discrepancy here is that we do not report the status of
+	/*
+	 * A discrepancy here is that we do not report the status of
 	 * non-i915 fences, i.e. even though we may report the object as idle,
 	 * a call to set-domain may still stall waiting for foreign rendering.
 	 * This also means that wait-ioctl may report an object as busy,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4fc7e2ac6278..f3f161c5627b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -84,7 +84,6 @@ static const struct engine_class_info intel_engine_classes[] = {
 #define MAX_MMIO_BASES 3
 struct engine_info {
 	unsigned int hw_id;
-	unsigned int uabi_id;
 	u8 class;
 	u8 instance;
 	/* mmio bases table *must* be sorted in reverse gen order */
@@ -97,7 +96,6 @@ struct engine_info {
 static const struct engine_info intel_engines[] = {
 	[RCS] = {
 		.hw_id = RCS_HW,
-		.uabi_id = I915_EXEC_RENDER,
 		.class = RENDER_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -106,7 +104,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[BCS] = {
 		.hw_id = BCS_HW,
-		.uabi_id = I915_EXEC_BLT,
 		.class = COPY_ENGINE_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -115,7 +112,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VCS] = {
 		.hw_id = VCS_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -126,7 +122,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VCS2] = {
 		.hw_id = VCS2_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 1,
 		.mmio_bases = {
@@ -136,7 +131,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VCS3] = {
 		.hw_id = VCS3_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 2,
 		.mmio_bases = {
@@ -145,7 +139,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VCS4] = {
 		.hw_id = VCS4_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 3,
 		.mmio_bases = {
@@ -154,7 +147,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VECS] = {
 		.hw_id = VECS_HW,
-		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -164,7 +156,6 @@ static const struct engine_info intel_engines[] = {
 	},
 	[VECS2] = {
 		.hw_id = VECS2_HW,
-		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 1,
 		.mmio_bases = {
@@ -321,7 +312,6 @@ intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->class = info->class;
 	engine->instance = info->instance;
 
-	engine->uabi_id = info->uabi_id;
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
 	engine->context_size = __intel_engine_context_size(dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2e7264119ec4..12e79828dbd5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -335,7 +335,6 @@ struct intel_engine_cs {
 	unsigned int hw_id;
 	unsigned int guc_id;
 
-	u8 uabi_id;
 	u8 uabi_class;
 
 	u8 class;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1a60642c1d61..aa2d4c73a97d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1127,32 +1127,34 @@ struct drm_i915_gem_busy {
 	 * as busy may become idle before the ioctl is completed.
 	 *
 	 * Furthermore, if the object is busy, which engine is busy is only
-	 * provided as a guide. There are race conditions which prevent the
-	 * report of which engines are busy from being always accurate.
-	 * However, the converse is not true. If the object is idle, the
-	 * result of the ioctl, that all engines are idle, is accurate.
+	 * provided as a guide and only indirectly by reporting its class
+	 * (there may be more than one engine in each class). There are race
+	 * conditions which prevent the report of which engines are busy from
+	 * being always accurate.  However, the converse is not true. If the
+	 * object is idle, the result of the ioctl, that all engines are idle,
+	 * is accurate.
 	 *
 	 * The returned dword is split into two fields to indicate both
-	 * the engines on which the object is being read, and the
-	 * engine on which it is currently being written (if any).
+	 * the engine classess on which the object is being read, and the
+	 * engine class on which it is currently being written (if any).
 	 *
 	 * The low word (bits 0:15) indicate if the object is being written
 	 * to by any engine (there can only be one, as the GEM implicit
 	 * synchronisation rules force writes to be serialised). Only the
-	 * engine for the last write is reported.
+	 * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
+	 * 1 not 0 etc) for the last write is reported.
 	 *
-	 * The high word (bits 16:31) are a bitmask of which engines are
-	 * currently reading from the object. Multiple engines may be
+	 * The high word (bits 16:31) are a bitmask of which engines classes
+	 * are currently reading from the object. Multiple engines may be
 	 * reading from the object simultaneously.
 	 *
-	 * The value of each engine is the same as specified in the
-	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
-	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
-	 * the I915_EXEC_RENDER engine for execution, and so it is never
+	 * The value of each engine class is the same as specified in the
+	 * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
+	 * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
 	 * reported as active itself. Some hardware may have parallel
 	 * execution engines, e.g. multiple media engines, which are
-	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
-	 * so are not separately reported for busyness.
+	 * mapped to the same class identifier and so are not separately
+	 * reported for busyness.
 	 *
 	 * Caveat emptor:
 	 * Only the boolean result of this query is reliable; that is whether
-- 
cgit v1.2.3


From fe33032daae2e584d9e7e33bab44c9eafced1f8f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Fri, 1 Feb 2019 14:57:15 +0800
Subject: ceph: add mount option to limit caps count

If number of caps exceed the limit, ceph_trim_dentires() also trim
dentries with valid leases. Trimming dentry releases references to
associated inode, which may evict inode and release caps.

By default, there is no limit for caps count.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 Documentation/filesystems/ceph.txt |  4 ++++
 fs/ceph/caps.c                     | 33 ++++++++++++++++++++++++++-------
 fs/ceph/dir.c                      | 20 +++++++++++++++++++-
 fs/ceph/mds_client.c               | 34 ++++++++++++++++++++++++++--------
 fs/ceph/mds_client.h               |  3 +++
 fs/ceph/super.c                    | 12 +++++++++---
 fs/ceph/super.h                    |  5 +++--
 include/linux/ceph/types.h         |  1 +
 8 files changed, 91 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
index 1177052701e1..bc4145ee5dba 100644
--- a/Documentation/filesystems/ceph.txt
+++ b/Documentation/filesystems/ceph.txt
@@ -118,6 +118,10 @@ Mount Options
 	of a non-responsive Ceph file system.  The default is 30
 	seconds.
 
+  caps_max=X
+	Specify the maximum number of caps to hold. Unused caps are released
+	when number of caps exceeds the limit. The default is 0 (no limit)
+
   rbytes
 	When stat() is called on a directory, set st_size to 'rbytes',
 	the summation of file sizes over all files nested beneath that
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6fbdc1a0afbe..36a8dc699448 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -148,11 +148,17 @@ void ceph_caps_finalize(struct ceph_mds_client *mdsc)
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
-void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta)
+void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
+			      struct ceph_mount_options *fsopt)
 {
 	spin_lock(&mdsc->caps_list_lock);
-	mdsc->caps_min_count += delta;
-	BUG_ON(mdsc->caps_min_count < 0);
+	mdsc->caps_min_count = fsopt->max_readdir;
+	if (mdsc->caps_min_count < 1024)
+		mdsc->caps_min_count = 1024;
+	mdsc->caps_use_max = fsopt->caps_max;
+	if (mdsc->caps_use_max > 0 &&
+	    mdsc->caps_use_max < mdsc->caps_min_count)
+		mdsc->caps_use_max = mdsc->caps_min_count;
 	spin_unlock(&mdsc->caps_list_lock);
 }
 
@@ -272,6 +278,7 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 	if (!err) {
 		BUG_ON(have + alloc != need);
 		ctx->count = need;
+		ctx->used = 0;
 	}
 
 	spin_lock(&mdsc->caps_list_lock);
@@ -295,13 +302,24 @@ int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 }
 
 void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
-			struct ceph_cap_reservation *ctx)
+			 struct ceph_cap_reservation *ctx)
 {
+	bool reclaim = false;
+	if (!ctx->count)
+		return;
+
 	dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
 	spin_lock(&mdsc->caps_list_lock);
 	__ceph_unreserve_caps(mdsc, ctx->count);
 	ctx->count = 0;
+
+	if (mdsc->caps_use_max > 0 &&
+	    mdsc->caps_use_count > mdsc->caps_use_max)
+		reclaim = true;
 	spin_unlock(&mdsc->caps_list_lock);
+
+	if (reclaim)
+		ceph_reclaim_caps_nr(mdsc, ctx->used);
 }
 
 struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
@@ -346,6 +364,7 @@ struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
 	BUG_ON(list_empty(&mdsc->caps_list));
 
 	ctx->count--;
+	ctx->used++;
 	mdsc->caps_reserve_count--;
 	mdsc->caps_use_count++;
 
@@ -500,12 +519,12 @@ static void __insert_cap_node(struct ceph_inode_info *ci,
 static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
 			       struct ceph_inode_info *ci)
 {
-	struct ceph_mount_options *ma = mdsc->fsc->mount_options;
+	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
 
 	ci->i_hold_caps_min = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_min * HZ);
+					    opt->caps_wanted_delay_min * HZ);
 	ci->i_hold_caps_max = round_jiffies(jiffies +
-					    ma->caps_wanted_delay_max * HZ);
+					    opt->caps_wanted_delay_max * HZ);
 	dout("__cap_set_timeouts %p min %lu max %lu\n", &ci->vfs_inode,
 	     ci->i_hold_caps_min - jiffies, ci->i_hold_caps_max - jiffies);
 }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index eba283557653..a8f429882249 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1224,6 +1224,7 @@ enum {
 
 struct ceph_lease_walk_control {
 	bool dir_lease;
+	bool expire_dir_lease;
 	unsigned long nr_to_scan;
 	unsigned long dir_lease_ttl;
 };
@@ -1345,7 +1346,13 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
 		/* Move dentry to tail of dir lease list if we don't want
 		 * to delete it. So dentries in the list are checked in a
 		 * round robin manner */
-		return TOUCH;
+		if (!lwc->expire_dir_lease)
+			return TOUCH;
+		if (dentry->d_lockref.count > 0 ||
+		    (di->flags & CEPH_DENTRY_REFERENCED))
+			return TOUCH;
+		/* invalidate dir lease */
+		di->lease_shared_gen = 0;
 	}
 	return DELETE;
 }
@@ -1353,8 +1360,17 @@ static int __dir_lease_check(struct dentry *dentry, void *arg)
 int ceph_trim_dentries(struct ceph_mds_client *mdsc)
 {
 	struct ceph_lease_walk_control lwc;
+	unsigned long count;
 	unsigned long freed;
 
+	spin_lock(&mdsc->caps_list_lock);
+        if (mdsc->caps_use_max > 0 &&
+            mdsc->caps_use_count > mdsc->caps_use_max)
+		count = mdsc->caps_use_count - mdsc->caps_use_max;
+	else
+		count = 0;
+        spin_unlock(&mdsc->caps_list_lock);
+
 	lwc.dir_lease = false;
 	lwc.nr_to_scan  = CEPH_CAPS_PER_RELEASE * 2;
 	freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check);
@@ -1365,6 +1381,8 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc)
 		lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE;
 
 	lwc.dir_lease = true;
+	lwc.expire_dir_lease = freed < count;
+	lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ;
 	freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check);
 	if (!lwc.nr_to_scan) /* more to check */
 		return -EAGAIN;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2095e5d038f8..21c33ed048ed 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1965,6 +1965,18 @@ void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
         }
 }
 
+void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
+{
+	int val;
+	if (!nr)
+		return;
+	val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
+	if (!(val % CEPH_CAPS_PER_RELEASE)) {
+		atomic_set(&mdsc->cap_reclaim_pending, 0);
+		ceph_queue_cap_reclaim_work(mdsc);
+	}
+}
+
 /*
  * requests
  */
@@ -2878,7 +2890,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
 				    req->r_op == CEPH_MDS_OP_LSSNAP))
 			ceph_readdir_prepopulate(req, req->r_session);
-		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
 	current->journal_info = NULL;
 	mutex_unlock(&req->r_fill_mutex);
@@ -2887,12 +2898,18 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	if (realm)
 		ceph_put_snap_realm(mdsc, realm);
 
-	if (err == 0 && req->r_target_inode &&
-	    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
-		struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
-		spin_lock(&ci->i_unsafe_lock);
-		list_add_tail(&req->r_unsafe_target_item, &ci->i_unsafe_iops);
-		spin_unlock(&ci->i_unsafe_lock);
+	if (err == 0) {
+		if (req->r_target_inode &&
+		    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
+			struct ceph_inode_info *ci =
+				ceph_inode(req->r_target_inode);
+			spin_lock(&ci->i_unsafe_lock);
+			list_add_tail(&req->r_unsafe_target_item,
+				      &ci->i_unsafe_iops);
+			spin_unlock(&ci->i_unsafe_lock);
+		}
+
+		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
 out_err:
 	mutex_lock(&mdsc->mutex);
@@ -4083,13 +4100,14 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	spin_lock_init(&mdsc->cap_dirty_lock);
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
+	atomic_set(&mdsc->cap_reclaim_pending, 0);
 
 	spin_lock_init(&mdsc->dentry_list_lock);
 	INIT_LIST_HEAD(&mdsc->dentry_leases);
 	INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
 
 	ceph_caps_init(mdsc);
-	ceph_adjust_min_caps(mdsc, fsc->min_caps);
+	ceph_adjust_caps_max_min(mdsc, fsc->mount_options);
 
 	spin_lock_init(&mdsc->snapid_map_lock);
 	mdsc->snapid_map_tree = RB_ROOT;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 580b235f343b..50385a481fdb 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -379,6 +379,7 @@ struct ceph_mds_client {
 	wait_queue_head_t cap_flushing_wq;
 
 	struct work_struct cap_reclaim_work;
+	atomic_t	   cap_reclaim_pending;
 
 	/*
 	 * Cap reservations
@@ -396,6 +397,7 @@ struct ceph_mds_client {
 						unreserved) */
 	int		caps_total_count;    /* total caps allocated */
 	int		caps_use_count;      /* in use */
+	int		caps_use_max;	     /* max used caps */
 	int		caps_reserve_count;  /* unused, reserved */
 	int		caps_avail_count;    /* unused, unreserved */
 	int		caps_min_count;      /* keep at least this many
@@ -465,6 +467,7 @@ extern void __ceph_queue_cap_release(struct ceph_mds_session *session,
 extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
 				    struct ceph_mds_session *session);
 extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
+extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
 extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc);
 
 extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 200836bcf542..6d5bb2f74612 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -133,6 +133,7 @@ enum {
 	Opt_rasize,
 	Opt_caps_wanted_delay_min,
 	Opt_caps_wanted_delay_max,
+	Opt_caps_max,
 	Opt_readdir_max_entries,
 	Opt_readdir_max_bytes,
 	Opt_congestion_kb,
@@ -175,6 +176,7 @@ static match_table_t fsopt_tokens = {
 	{Opt_rasize, "rasize=%d"},
 	{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 	{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
+	{Opt_caps_max, "caps_max=%d"},
 	{Opt_readdir_max_entries, "readdir_max_entries=%d"},
 	{Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 	{Opt_congestion_kb, "write_congestion_kb=%d"},
@@ -286,6 +288,11 @@ static int parse_fsopt_token(char *c, void *private)
 			return -EINVAL;
 		fsopt->caps_wanted_delay_max = intval;
 		break;
+	case Opt_caps_max:
+		if (intval < 0)
+			return -EINVAL;
+		fsopt->caps_max = intval;
+		break;
 	case Opt_readdir_max_entries:
 		if (intval < 1)
 			return -EINVAL;
@@ -576,6 +583,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 		seq_printf(m, ",rasize=%d", fsopt->rasize);
 	if (fsopt->congestion_kb != default_congestion_kb())
 		seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+	if (fsopt->caps_max)
+		seq_printf(m, ",caps_max=%d", fsopt->caps_max);
 	if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
 		seq_printf(m, ",caps_wanted_delay_min=%d",
 			 fsopt->caps_wanted_delay_min);
@@ -683,9 +692,6 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	if (!fsc->wb_pagevec_pool)
 		goto fail_cap_wq;
 
-	/* caps */
-	fsc->min_caps = fsopt->max_readdir;
-
 	return fsc;
 
 fail_cap_wq:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b3bcfb3c27bd..16c03188578e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -79,6 +79,7 @@ struct ceph_mount_options {
 	int rasize;           /* max readahead */
 	int congestion_kb;    /* max writeback in flight */
 	int caps_wanted_delay_min, caps_wanted_delay_max;
+	int caps_max;
 	int max_readdir;       /* max readdir result (entires) */
 	int max_readdir_bytes; /* max readdir result (bytes) */
 
@@ -100,7 +101,6 @@ struct ceph_fs_client {
 	struct ceph_client *client;
 
 	unsigned long mount_state;
-	int min_caps;                  /* min caps i added */
 	loff_t max_file_size;
 
 	struct ceph_mds_client *mdsc;
@@ -668,7 +668,8 @@ extern int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check);
 
 extern void ceph_caps_init(struct ceph_mds_client *mdsc);
 extern void ceph_caps_finalize(struct ceph_mds_client *mdsc);
-extern void ceph_adjust_min_caps(struct ceph_mds_client *mdsc, int delta);
+extern void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
+				     struct ceph_mount_options *fsopt);
 extern int ceph_reserve_caps(struct ceph_mds_client *mdsc,
 			     struct ceph_cap_reservation *ctx, int need);
 extern void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
index 27cd973d3881..bd3d532902d7 100644
--- a/include/linux/ceph/types.h
+++ b/include/linux/ceph/types.h
@@ -24,6 +24,7 @@ struct ceph_vino {
 /* context for the caps reservation mechanism */
 struct ceph_cap_reservation {
 	int count;
+	int used;
 };
 
 
-- 
cgit v1.2.3


From 0bdb50c531f7377a9da80d3ce2d61f389c84cb30 Mon Sep 17 00:00:00 2001
From: NeilBrown <neil@brown.name>
Date: Sun, 6 Jan 2019 21:06:25 +1100
Subject: dm: fix to_sector() for 32bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A dm-raid array with devices larger than 4GB won't assemble on
a 32 bit host since _check_data_dev_sectors() was added in 4.16.
This is because to_sector() treats its argument as an "unsigned long"
which is 32bits (4GB) on a 32bit host.  Using "unsigned long long"
is more correct.

Kernels as early as 4.2 can have other problems due to to_sector()
being used on the size of a device.

Fixes: 0cf4503174c1 ("dm raid: add support for the MD RAID0 personality")
cc: stable@vger.kernel.org (v4.2+)
Reported-and-tested-by: Guillaume Perréal <gperreal@free.fr>
Signed-off-by: NeilBrown <neil@brown.name>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/linux/device-mapper.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0f5b3d7c6cb3..52e8709c6df0 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -603,7 +603,7 @@ do {									\
  */
 #define dm_target_offset(ti, sector) ((sector) - (ti)->begin)
 
-static inline sector_t to_sector(unsigned long n)
+static inline sector_t to_sector(unsigned long long n)
 {
 	return (n >> SECTOR_SHIFT);
 }
-- 
cgit v1.2.3


From 6bbc923dfcf57d6b97388819a7393835664c7a8e Mon Sep 17 00:00:00 2001
From: Helen Koike <helen.koike@collabora.com>
Date: Thu, 21 Feb 2019 17:33:34 -0300
Subject: dm: add support to directly boot to a mapped device

Add a "create" module parameter, which allows device-mapper targets to
be configured at boot time. This enables early use of DM targets in the
boot process (as the root device or otherwise) without the need of an
initramfs.

The syntax used in the boot param is based on the concise format from
the dmsetup tool to follow the rule of least surprise:

	dmsetup table --concise /dev/mapper/lroot

Which is:
	dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]

Where,
	<name>		::= The device name.
	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
	<minor>		::= The device minor number | ""
	<flags>		::= "ro" | "rw"
	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
	<target_type>	::= "verity" | "linear" | ...

For example, the following could be added in the boot parameters:
dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0

Only the targets that were tested are allowed and the ones that don't
change any block device when the device is create as read-only. For
example, mirror and cache targets are not allowed. The rationale behind
this is that if the user makes a mistake, choosing the wrong device to
be the mirror or the cache can corrupt data.

The only targets initially allowed are:
* crypt
* delay
* linear
* snapshot-origin
* striped
* verity

Co-developed-by: Will Drewry <wad@chromium.org>
Co-developed-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Helen Koike <helen.koike@collabora.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/device-mapper/dm-init.txt | 114 ++++++++++++
 drivers/md/Kconfig                      |  12 ++
 drivers/md/Makefile                     |   4 +
 drivers/md/dm-init.c                    | 303 ++++++++++++++++++++++++++++++++
 drivers/md/dm-ioctl.c                   | 103 +++++++++++
 include/linux/device-mapper.h           |   9 +
 6 files changed, 545 insertions(+)
 create mode 100644 Documentation/device-mapper/dm-init.txt
 create mode 100644 drivers/md/dm-init.c

(limited to 'include')

diff --git a/Documentation/device-mapper/dm-init.txt b/Documentation/device-mapper/dm-init.txt
new file mode 100644
index 000000000000..8464ee7c01b8
--- /dev/null
+++ b/Documentation/device-mapper/dm-init.txt
@@ -0,0 +1,114 @@
+Early creation of mapped devices
+====================================
+
+It is possible to configure a device-mapper device to act as the root device for
+your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal userspace
+which configures the device, then pivot_root(8) in to it.
+
+The second is to create one or more device-mappers using the module parameter
+"dm-mod.create=" through the kernel boot command line argument.
+
+The format is specified as a string of data separated by commas and optionally
+semi-colons, where:
+ - a comma is used to separate fields like name, uuid, flags and table
+   (specifies one device)
+ - a semi-colon is used to separate devices.
+
+So the format will look like this:
+
+ dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+
+Where,
+	<name>		::= The device name.
+	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | ""
+	<minor>		::= The device minor number | ""
+	<flags>		::= "ro" | "rw"
+	<table>		::= <start_sector> <num_sectors> <target_type> <target_args>
+	<target_type>	::= "verity" | "linear" | ... (see list below)
+
+The dm line should be equivalent to the one used by the dmsetup tool with the
+--concise argument.
+
+Target types
+============
+
+Not all target types are available as there are serious risks in allowing
+activation of certain DM targets without first using userspace tools to check
+the validity of associated metadata.
+
+	"cache":		constrained, userspace should verify cache device
+	"crypt":		allowed
+	"delay":		allowed
+	"era":			constrained, userspace should verify metadata device
+	"flakey":		constrained, meant for test
+	"linear":		allowed
+	"log-writes":		constrained, userspace should verify metadata device
+	"mirror":		constrained, userspace should verify main/mirror device
+	"raid":			constrained, userspace should verify metadata device
+	"snapshot":		constrained, userspace should verify src/dst device
+	"snapshot-origin":	allowed
+	"snapshot-merge":	constrained, userspace should verify src/dst device
+	"striped":		allowed
+	"switch":		constrained, userspace should verify dev path
+	"thin":			constrained, requires dm target message from userspace
+	"thin-pool":		constrained, requires dm target message from userspace
+	"verity":		allowed
+	"writecache":		constrained, userspace should verify cache device
+	"zero":			constrained, not meant for rootfs
+
+If the target is not listed above, it is constrained by default (not tested).
+
+Examples
+========
+An example of booting to a linear array made up of user-mode linux block
+devices:
+
+  dm-mod.create="lroot,,,rw, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" root=/dev/dm-0
+
+This will boot to a rw dm-linear target of 8192 sectors split across two block
+devices identified by their major:minor numbers.  After boot, udev will rename
+this target to /dev/mapper/lroot (depending on the rules). No uuid was assigned.
+
+An example of multiple device-mappers, with the dm-mod.create="..." contents is shown here
+split on multiple lines for readability:
+
+  vroot,,,ro,
+    0 1740800 verity 254:0 254:0 1740800 sha1
+      76e9be054b15884a9fa85973e9cb274c93afadb6
+      5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe;
+  vram,,,rw,
+    0 32768 linear 1:0 0,
+    32768 32768 linear 1:1 0
+
+Other examples (per target):
+
+"crypt":
+  dm-crypt,,8,ro,
+    0 1048576 crypt aes-xts-plain64
+    babebabebabebabebabebabebabebabebabebabebabebabebabebabebabebabe 0
+    /dev/sda 0 1 allow_discards
+
+"delay":
+  dm-delay,,4,ro,0 409600 delay /dev/sda1 0 500
+
+"linear":
+  dm-linear,,,rw,
+    0 32768 linear /dev/sda1 0,
+    32768 1024000 linear /dev/sda2 0,
+    1056768 204800 linear /dev/sda3 0,
+    1261568 512000 linear /dev/sda4 0
+
+"snapshot-origin":
+  dm-snap-orig,,4,ro,0 409600 snapshot-origin 8:2
+
+"striped":
+  dm-striped,,4,ro,0 1638400 striped 4 4096
+  /dev/sda1 0 /dev/sda2 0 /dev/sda3 0 /dev/sda4 0
+
+"verity":
+  dm-verity,,4,ro,
+    0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256
+    fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd
+    51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 3db222509e44..2557f198e175 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -436,6 +436,18 @@ config DM_DELAY
 
 	If unsure, say N.
 
+config DM_INIT
+	bool "DM \"dm-mod.create=\" parameter support"
+	depends on BLK_DEV_DM=y
+	---help---
+	Enable "dm-mod.create=" parameter to create mapped devices at init time.
+	This option is useful to allow mounting rootfs without requiring an
+	initramfs.
+	See Documentation/device-mapper/dm-init.txt for dm-mod.create="..."
+	format.
+
+	If unsure, say N.
+
 config DM_UEVENT
 	bool "DM uevents"
 	depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 822f4e8753bc..a52b703e588e 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -69,6 +69,10 @@ obj-$(CONFIG_DM_INTEGRITY)	+= dm-integrity.o
 obj-$(CONFIG_DM_ZONED)		+= dm-zoned.o
 obj-$(CONFIG_DM_WRITECACHE)	+= dm-writecache.o
 
+ifeq ($(CONFIG_DM_INIT),y)
+dm-mod-objs			+= dm-init.o
+endif
+
 ifeq ($(CONFIG_DM_UEVENT),y)
 dm-mod-objs			+= dm-uevent.o
 endif
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
new file mode 100644
index 000000000000..b53f30f16b4d
--- /dev/null
+++ b/drivers/md/dm-init.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * dm-init.c
+ * Copyright (C) 2017 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/device-mapper.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+
+#define DM_MSG_PREFIX "init"
+#define DM_MAX_DEVICES 256
+#define DM_MAX_TARGETS 256
+#define DM_MAX_STR_SIZE 4096
+
+static char *create;
+
+/*
+ * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+ * Table format: <start_sector> <num_sectors> <target_type> <target_args>
+ *
+ * See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." format
+ * details.
+ */
+
+struct dm_device {
+	struct dm_ioctl dmi;
+	struct dm_target_spec *table[DM_MAX_TARGETS];
+	char *target_args_array[DM_MAX_TARGETS];
+	struct list_head list;
+};
+
+const char *dm_allowed_targets[] __initconst = {
+	"crypt",
+	"delay",
+	"linear",
+	"snapshot-origin",
+	"striped",
+	"verity",
+};
+
+static int __init dm_verify_target_type(const char *target)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(dm_allowed_targets); i++) {
+		if (!strcmp(dm_allowed_targets[i], target))
+			return 0;
+	}
+	return -EINVAL;
+}
+
+static void __init dm_setup_cleanup(struct list_head *devices)
+{
+	struct dm_device *dev, *tmp;
+	unsigned int i;
+
+	list_for_each_entry_safe(dev, tmp, devices, list) {
+		list_del(&dev->list);
+		for (i = 0; i < dev->dmi.target_count; i++) {
+			kfree(dev->table[i]);
+			kfree(dev->target_args_array[i]);
+		}
+		kfree(dev);
+	}
+}
+
+/**
+ * str_field_delimit - delimit a string based on a separator char.
+ * @str: the pointer to the string to delimit.
+ * @separator: char that delimits the field
+ *
+ * Find a @separator and replace it by '\0'.
+ * Remove leading and trailing spaces.
+ * Return the remainder string after the @separator.
+ */
+static char __init *str_field_delimit(char **str, char separator)
+{
+	char *s;
+
+	/* TODO: add support for escaped characters */
+	*str = skip_spaces(*str);
+	s = strchr(*str, separator);
+	/* Delimit the field and remove trailing spaces */
+	if (s)
+		*s = '\0';
+	*str = strim(*str);
+	return s ? ++s : NULL;
+}
+
+/**
+ * dm_parse_table_entry - parse a table entry
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ *	<start_sector> <num_sectors> <target_type> <target_args>[, ...]
+ *
+ * Return the remainder string after the table entry, i.e, after the comma which
+ * delimits the entry or NULL if reached the end of the string.
+ */
+static char __init *dm_parse_table_entry(struct dm_device *dev, char *str)
+{
+	const unsigned int n = dev->dmi.target_count - 1;
+	struct dm_target_spec *sp;
+	unsigned int i;
+	/* fields:  */
+	char *field[4];
+	char *next;
+
+	field[0] = str;
+	/* Delimit first 3 fields that are separated by space */
+	for (i = 0; i < ARRAY_SIZE(field) - 1; i++) {
+		field[i + 1] = str_field_delimit(&field[i], ' ');
+		if (!field[i + 1])
+			return ERR_PTR(-EINVAL);
+	}
+	/* Delimit last field that can be terminated by comma */
+	next = str_field_delimit(&field[i], ',');
+
+	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
+	if (!sp)
+		return ERR_PTR(-ENOMEM);
+	dev->table[n] = sp;
+
+	/* start_sector */
+	if (kstrtoull(field[0], 0, &sp->sector_start))
+		return ERR_PTR(-EINVAL);
+	/* num_sector */
+	if (kstrtoull(field[1], 0, &sp->length))
+		return ERR_PTR(-EINVAL);
+	/* target_type */
+	strscpy(sp->target_type, field[2], sizeof(sp->target_type));
+	if (dm_verify_target_type(sp->target_type)) {
+		DMERR("invalid type \"%s\"", sp->target_type);
+		return ERR_PTR(-EINVAL);
+	}
+	/* target_args */
+	dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL,
+					     DM_MAX_STR_SIZE);
+	if (!dev->target_args_array[n])
+		return ERR_PTR(-ENOMEM);
+
+	return next;
+}
+
+/**
+ * dm_parse_table - parse "dm-mod.create=" table field
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ *	<table>[,<table>+]
+ */
+static int __init dm_parse_table(struct dm_device *dev, char *str)
+{
+	char *table_entry = str;
+
+	while (table_entry) {
+		DMDEBUG("parsing table \"%s\"", str);
+		if (++dev->dmi.target_count >= DM_MAX_TARGETS) {
+			DMERR("too many targets %u > %d",
+			      dev->dmi.target_count, DM_MAX_TARGETS);
+			return -EINVAL;
+		}
+		table_entry = dm_parse_table_entry(dev, table_entry);
+		if (IS_ERR(table_entry)) {
+			DMERR("couldn't parse table");
+			return PTR_ERR(table_entry);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * dm_parse_device_entry - parse a device entry
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ *	name,uuid,minor,flags,table[; ...]
+ *
+ * Return the remainder string after the table entry, i.e, after the semi-colon
+ * which delimits the entry or NULL if reached the end of the string.
+ */
+static char __init *dm_parse_device_entry(struct dm_device *dev, char *str)
+{
+	/* There are 5 fields: name,uuid,minor,flags,table; */
+	char *field[5];
+	unsigned int i;
+	char *next;
+
+	field[0] = str;
+	/* Delimit first 4 fields that are separated by comma */
+	for (i = 0; i < ARRAY_SIZE(field) - 1; i++) {
+		field[i+1] = str_field_delimit(&field[i], ',');
+		if (!field[i+1])
+			return ERR_PTR(-EINVAL);
+	}
+	/* Delimit last field that can be delimited by semi-colon */
+	next = str_field_delimit(&field[i], ';');
+
+	/* name */
+	strscpy(dev->dmi.name, field[0], sizeof(dev->dmi.name));
+	/* uuid */
+	strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid));
+	/* minor */
+	if (strlen(field[2])) {
+		if (kstrtoull(field[2], 0, &dev->dmi.dev))
+			return ERR_PTR(-EINVAL);
+		dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG;
+	}
+	/* flags */
+	if (!strcmp(field[3], "ro"))
+		dev->dmi.flags |= DM_READONLY_FLAG;
+	else if (strcmp(field[3], "rw"))
+		return ERR_PTR(-EINVAL);
+	/* table */
+	if (dm_parse_table(dev, field[4]))
+		return ERR_PTR(-EINVAL);
+
+	return next;
+}
+
+/**
+ * dm_parse_devices - parse "dm-mod.create=" argument
+ * @devices: list of struct dm_device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ *	<device>[;<device>+]
+ */
+static int __init dm_parse_devices(struct list_head *devices, char *str)
+{
+	unsigned long ndev = 0;
+	struct dm_device *dev;
+	char *device = str;
+
+	DMDEBUG("parsing \"%s\"", str);
+	while (device) {
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (!dev)
+			return -ENOMEM;
+		list_add_tail(&dev->list, devices);
+
+		if (++ndev >= DM_MAX_DEVICES) {
+			DMERR("too many targets %u > %d",
+			      dev->dmi.target_count, DM_MAX_TARGETS);
+			return -EINVAL;
+		}
+
+		device = dm_parse_device_entry(dev, device);
+		if (IS_ERR(device)) {
+			DMERR("couldn't parse device");
+			return PTR_ERR(device);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * dm_init_init - parse "dm-mod.create=" argument and configure drivers
+ */
+static int __init dm_init_init(void)
+{
+	struct dm_device *dev;
+	LIST_HEAD(devices);
+	char *str;
+	int r;
+
+	if (!create)
+		return 0;
+
+	if (strlen(create) >= DM_MAX_STR_SIZE) {
+		DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
+		return -EINVAL;
+	}
+	str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE);
+	if (!str)
+		return -ENOMEM;
+
+	r = dm_parse_devices(&devices, str);
+	if (r)
+		goto out;
+
+	DMINFO("waiting for all devices to be available before creating mapped devices\n");
+	wait_for_device_probe();
+
+	list_for_each_entry(dev, &devices, list) {
+		if (dm_early_create(&dev->dmi, dev->table,
+				    dev->target_args_array))
+			break;
+	}
+out:
+	kfree(str);
+	dm_setup_cleanup(&devices);
+	return r;
+}
+
+late_initcall(dm_init_init);
+
+module_param(create, charp, 0);
+MODULE_PARM_DESC(create, "Create a mapped device in early boot");
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index f666778ad237..c740153b4e52 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -2018,3 +2018,106 @@ out:
 
 	return r;
 }
+
+
+/**
+ * dm_early_create - create a mapped device in early boot.
+ *
+ * @dmi: Contains main information of the device mapping to be created.
+ * @spec_array: array of pointers to struct dm_target_spec. Describes the
+ * mapping table of the device.
+ * @target_params_array: array of strings with the parameters to a specific
+ * target.
+ *
+ * Instead of having the struct dm_target_spec and the parameters for every
+ * target embedded at the end of struct dm_ioctl (as performed in a normal
+ * ioctl), pass them as arguments, so the caller doesn't need to serialize them.
+ * The size of the spec_array and target_params_array is given by
+ * @dmi->target_count.
+ * This function is supposed to be called in early boot, so locking mechanisms
+ * to protect against concurrent loads are not required.
+ */
+int __init dm_early_create(struct dm_ioctl *dmi,
+			   struct dm_target_spec **spec_array,
+			   char **target_params_array)
+{
+	int r, m = DM_ANY_MINOR;
+	struct dm_table *t, *old_map;
+	struct mapped_device *md;
+	unsigned int i;
+
+	if (!dmi->target_count)
+		return -EINVAL;
+
+	r = check_name(dmi->name);
+	if (r)
+		return r;
+
+	if (dmi->flags & DM_PERSISTENT_DEV_FLAG)
+		m = MINOR(huge_decode_dev(dmi->dev));
+
+	/* alloc dm device */
+	r = dm_create(m, &md);
+	if (r)
+		return r;
+
+	/* hash insert */
+	r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md);
+	if (r)
+		goto err_destroy_dm;
+
+	/* alloc table */
+	r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md);
+	if (r)
+		goto err_destroy_dm;
+
+	/* add targets */
+	for (i = 0; i < dmi->target_count; i++) {
+		r = dm_table_add_target(t, spec_array[i]->target_type,
+					(sector_t) spec_array[i]->sector_start,
+					(sector_t) spec_array[i]->length,
+					target_params_array[i]);
+		if (r) {
+			DMWARN("error adding target to table");
+			goto err_destroy_table;
+		}
+	}
+
+	/* finish table */
+	r = dm_table_complete(t);
+	if (r)
+		goto err_destroy_table;
+
+	md->type = dm_table_get_type(t);
+	/* setup md->queue to reflect md's type (may block) */
+	r = dm_setup_md_queue(md, t);
+	if (r) {
+		DMWARN("unable to set up device queue for new table.");
+		goto err_destroy_table;
+	}
+
+	/* Set new map */
+	dm_suspend(md, 0);
+	old_map = dm_swap_table(md, t);
+	if (IS_ERR(old_map)) {
+		r = PTR_ERR(old_map);
+		goto err_destroy_table;
+	}
+	set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG));
+
+	/* resume device */
+	r = dm_resume(md);
+	if (r)
+		goto err_destroy_table;
+
+	DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name);
+	dm_put(md);
+	return 0;
+
+err_destroy_table:
+	dm_table_destroy(t);
+err_destroy_dm:
+	dm_put(md);
+	dm_destroy(md);
+	return r;
+}
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 52e8709c6df0..b0672756d056 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -10,6 +10,7 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/dm-ioctl.h>
 #include <linux/math64.h>
 #include <linux/ratelimit.h>
 
@@ -425,6 +426,14 @@ void dm_remap_zone_report(struct dm_target *ti, sector_t start,
 			  struct blk_zone *zones, unsigned int *nr_zones);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
+/*
+ * Device mapper functions to parse and create devices specified by the
+ * parameter "dm-mod.create="
+ */
+int __init dm_early_create(struct dm_ioctl *dmi,
+			   struct dm_target_spec **spec_array,
+			   char **target_params_array);
+
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
 
 /*
-- 
cgit v1.2.3


From 500e0b28ecd3c5aade98f3c3a339d18dcb166bb6 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 15 Feb 2019 00:08:25 +0800
Subject: f2fs: fix to check inline_xattr_size boundary correctly

We use below condition to check inline_xattr_size boundary:

	if (!F2FS_OPTION(sbi).inline_xattr_size ||
		F2FS_OPTION(sbi).inline_xattr_size >=
				DEF_ADDRS_PER_INODE -
				F2FS_TOTAL_EXTRA_ATTR_SIZE -
				DEF_INLINE_RESERVED_SIZE -
				DEF_MIN_INLINE_SIZE)

There is there problems in that check:
- we should allow inline_xattr_size equaling to min size of inline
{data,dentry} area.
- F2FS_TOTAL_EXTRA_ATTR_SIZE and inline_xattr_size are based on
different size unit, previous one is 4 bytes, latter one is 1 bytes.
- DEF_MIN_INLINE_SIZE only indicate min size of inline data area,
however, we need to consider min size of inline dentry area as well,
minimal inline dentry should at least contain two entries: '.' and
'..', so that min inline_dentry size is 40 bytes.

.bitmap		1 * 1 = 1
.reserved	1 * 1 = 1
.dentry		11 * 2 = 22
.filename	8 * 2 = 16
total		40

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h          |  1 -
 fs/f2fs/super.c         | 13 +++++++------
 include/linux/f2fs_fs.h | 13 +++++++------
 3 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4665bff1bf55..f1f0d2810852 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -459,7 +459,6 @@ struct f2fs_flush_device {
 
 /* for inline stuff */
 #define DEF_INLINE_RESERVED_SIZE	1
-#define DEF_MIN_INLINE_SIZE		1
 static inline int get_extra_isize(struct inode *inode);
 static inline int get_inline_xattr_addrs(struct inode *inode);
 #define MAX_INLINE_DATA(inode)	(sizeof(__le32) *			\
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 83bbe7424fc1..be8be445c6ed 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -834,12 +834,13 @@ static int parse_options(struct super_block *sb, char *options)
 					"set with inline_xattr option");
 			return -EINVAL;
 		}
-		if (!F2FS_OPTION(sbi).inline_xattr_size ||
-			F2FS_OPTION(sbi).inline_xattr_size >=
-					DEF_ADDRS_PER_INODE -
-					F2FS_TOTAL_EXTRA_ATTR_SIZE -
-					DEF_INLINE_RESERVED_SIZE -
-					DEF_MIN_INLINE_SIZE) {
+		if (F2FS_OPTION(sbi).inline_xattr_size <
+			sizeof(struct f2fs_xattr_header) / sizeof(__le32) ||
+			F2FS_OPTION(sbi).inline_xattr_size >
+			DEF_ADDRS_PER_INODE -
+			F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) -
+			DEF_INLINE_RESERVED_SIZE -
+			MIN_INLINE_DENTRY_SIZE / sizeof(__le32)) {
 			f2fs_msg(sb, KERN_ERR,
 					"inline xattr size is out of range");
 			return -EINVAL;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 8d57aaee8166..666db8eb71e0 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -490,12 +490,12 @@ typedef __le32	f2fs_hash_t;
 
 /*
  * space utilization of regular dentry and inline dentry (w/o extra reservation)
- *		regular dentry			inline dentry
- * bitmap	1 * 27 = 27			1 * 23 = 23
- * reserved	1 * 3 = 3			1 * 7 = 7
- * dentry	11 * 214 = 2354			11 * 182 = 2002
- * filename	8 * 214 = 1712			8 * 182 = 1456
- * total	4096				3488
+ *		regular dentry		inline dentry (def)	inline dentry (min)
+ * bitmap	1 * 27 = 27		1 * 23 = 23		1 * 1 = 1
+ * reserved	1 * 3 = 3		1 * 7 = 7		1 * 1 = 1
+ * dentry	11 * 214 = 2354		11 * 182 = 2002		11 * 2 = 22
+ * filename	8 * 214 = 1712		8 * 182 = 1456		8 * 2 = 16
+ * total	4096			3488			40
  *
  * Note: there are more reserved space in inline dentry than in regular
  * dentry, when converting inline dentry we should handle this carefully.
@@ -507,6 +507,7 @@ typedef __le32	f2fs_hash_t;
 #define SIZE_OF_RESERVED	(PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
 				F2FS_SLOT_LEN) * \
 				NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
+#define MIN_INLINE_DENTRY_SIZE		40	/* just include '.' and '..' entries */
 
 /* One directory entry slot representing F2FS_SLOT_LEN-sized file name */
 struct f2fs_dir_entry {
-- 
cgit v1.2.3


From bcf6f55a0d05eedd8ebb6ecc60ae3f93205ad833 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Mar 2019 15:41:27 -0800
Subject: kasan: fix kasan_check_read/write definitions

Building little-endian allmodconfig kernels on arm64 started failing
with the generated atomic.h implementation, since we now try to call
kasan helpers from the EFI stub:

  aarch64-linux-gnu-ld: drivers/firmware/efi/libstub/arm-stub.stub.o: in function `atomic_set':
  include/generated/atomic-instrumented.h:44: undefined reference to `__efistub_kasan_check_write'

I suspect that we get similar problems in other files that explicitly
disable KASAN for some reason but call atomic_t based helper functions.

We can fix this by checking the predefined __SANITIZE_ADDRESS__ macro
that the compiler sets instead of checking CONFIG_KASAN, but this in
turn requires a small hack in mm/kasan/common.c so we do see the extern
declaration there instead of the inline function.

Link: http://lkml.kernel.org/r/20181211133453.2835077-1-arnd@arndb.de
Fixes: b1864b828644 ("locking/atomics: build atomic headers as required")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reported-by: Anders Roxell <anders.roxell@linaro.org>
Acked-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan-checks.h | 2 +-
 mm/kasan/common.c            | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h
index d314150658a4..a61dc075e2ce 100644
--- a/include/linux/kasan-checks.h
+++ b/include/linux/kasan-checks.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_KASAN_CHECKS_H
 #define _LINUX_KASAN_CHECKS_H
 
-#ifdef CONFIG_KASAN
+#if defined(__SANITIZE_ADDRESS__) || defined(__KASAN_INTERNAL)
 void kasan_check_read(const volatile void *p, unsigned int size);
 void kasan_check_write(const volatile void *p, unsigned int size);
 #else
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 09b534fbba17..80bbe62b16cd 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -14,6 +14,8 @@
  *
  */
 
+#define __KASAN_INTERNAL
+
 #include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-- 
cgit v1.2.3


From de810f490db7ed4c1db2bbfa458b2e27681d2ccb Mon Sep 17 00:00:00 2001
From: "Tobin C. Harding" <tobin@kernel.org>
Date: Tue, 5 Mar 2019 15:42:07 -0800
Subject: include/linux/slub_def.h: comment fixes

Capitialize comment string, use C89 comment style, correct
grammar/punctuation in comments.

Link: http://lkml.kernel.org/r/20190204005713.9463-2-tobin@kernel.org
Link: http://lkml.kernel.org/r/20190204005713.9463-3-tobin@kernel.org
Link: http://lkml.kernel.org/r/20190204005713.9463-4-tobin@kernel.org
Signed-off-by: Tobin C. Harding <tobin@kernel.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slub_def.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 3a1a1dbc6f49..d2153789bd9f 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -81,12 +81,12 @@ struct kmem_cache_order_objects {
  */
 struct kmem_cache {
 	struct kmem_cache_cpu __percpu *cpu_slab;
-	/* Used for retriving partial slabs etc */
+	/* Used for retrieving partial slabs, etc. */
 	slab_flags_t flags;
 	unsigned long min_partial;
-	unsigned int size;	/* The size of an object including meta data */
-	unsigned int object_size;/* The size of an object without meta data */
-	unsigned int offset;	/* Free pointer offset. */
+	unsigned int size;	/* The size of an object including metadata */
+	unsigned int object_size;/* The size of an object without metadata */
+	unsigned int offset;	/* Free pointer offset */
 #ifdef CONFIG_SLUB_CPU_PARTIAL
 	/* Number of per cpu partial objects to keep around */
 	unsigned int cpu_partial;
@@ -110,7 +110,7 @@ struct kmem_cache {
 #endif
 #ifdef CONFIG_MEMCG
 	struct memcg_cache_params memcg_params;
-	/* for propagation, maximum size of a stored attr */
+	/* For propagation, maximum size of a stored attr */
 	unsigned int max_attr_size;
 #ifdef CONFIG_SYSFS
 	struct kset *memcg_kset;
@@ -151,7 +151,7 @@ struct kmem_cache {
 #else
 #define slub_cpu_partial(s)		(0)
 #define slub_set_cpu_partial(s, n)
-#endif // CONFIG_SLUB_CPU_PARTIAL
+#endif /* CONFIG_SLUB_CPU_PARTIAL */
 
 #ifdef CONFIG_SYSFS
 #define SLAB_SUPPORTS_SYSFS
-- 
cgit v1.2.3


From a9cd410a3d296846a8125aa43d97a573a354c472 Mon Sep 17 00:00:00 2001
From: Arun KS <arunks@codeaurora.org>
Date: Tue, 5 Mar 2019 15:42:14 -0800
Subject: mm/page_alloc.c: memory hotplug: free pages as higher order

When freeing pages are done with higher order, time spent on coalescing
pages by buddy allocator can be reduced.  With section size of 256MB,
hot add latency of a single section shows improvement from 50-60 ms to
less than 1 ms, hence improving the hot add latency by 60 times.  Modify
external providers of online callback to align with the change.

[arunks@codeaurora.org: v11]
  Link: http://lkml.kernel.org/r/1547792588-18032-1-git-send-email-arunks@codeaurora.org
[akpm@linux-foundation.org: remove unused local, per Arun]
[akpm@linux-foundation.org: avoid return of void-returning __free_pages_core(), per Oscar]
[akpm@linux-foundation.org: fix it for mm-convert-totalram_pages-and-totalhigh_pages-variables-to-atomic.patch]
[arunks@codeaurora.org: v8]
  Link: http://lkml.kernel.org/r/1547032395-24582-1-git-send-email-arunks@codeaurora.org
[arunks@codeaurora.org: v9]
  Link: http://lkml.kernel.org/r/1547098543-26452-1-git-send-email-arunks@codeaurora.org
Link: http://lkml.kernel.org/r/1538727006-5727-1-git-send-email-arunks@codeaurora.org
Signed-off-by: Arun KS <arunks@codeaurora.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Srivatsa Vaddagiri <vatsa@codeaurora.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hv/hv_balloon.c        |  7 ++++---
 drivers/xen/balloon.c          | 15 ++++++++++-----
 include/linux/memory_hotplug.h |  2 +-
 mm/internal.h                  |  1 +
 mm/memory_hotplug.c            | 37 +++++++++++++++++++++++++------------
 mm/page_alloc.c                |  8 ++++----
 6 files changed, 45 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 7c6349a50ef1..a50b7624b2a3 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -771,7 +771,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 	}
 }
 
-static void hv_online_page(struct page *pg)
+static void hv_online_page(struct page *pg, unsigned int order)
 {
 	struct hv_hotadd_state *has;
 	unsigned long flags;
@@ -780,10 +780,11 @@ static void hv_online_page(struct page *pg)
 	spin_lock_irqsave(&dm_device.ha_lock, flags);
 	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/* The page belongs to a different HAS. */
-		if ((pfn < has->start_pfn) || (pfn >= has->end_pfn))
+		if ((pfn < has->start_pfn) ||
+				(pfn + (1UL << order) > has->end_pfn))
 			continue;
 
-		hv_page_online_one(has, pg);
+		hv_bring_pgs_online(has, pfn, 1UL << order);
 		break;
 	}
 	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index ceb5048de9a7..d107447c47de 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -369,14 +369,19 @@ static enum bp_state reserve_additional_memory(void)
 	return BP_ECANCELED;
 }
 
-static void xen_online_page(struct page *page)
+static void xen_online_page(struct page *page, unsigned int order)
 {
-	__online_page_set_limits(page);
+	unsigned long i, size = (1 << order);
+	unsigned long start_pfn = page_to_pfn(page);
+	struct page *p;
 
+	pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn);
 	mutex_lock(&balloon_mutex);
-
-	__balloon_append(page);
-
+	for (i = 0; i < size; i++) {
+		p = pfn_to_page(start_pfn + i);
+		__online_page_set_limits(p);
+		__balloon_append(p);
+	}
 	mutex_unlock(&balloon_mutex);
 }
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 368267c1b71b..52869d6d38b3 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -89,7 +89,7 @@ extern int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
 	unsigned long *valid_start, unsigned long *valid_end);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
-typedef void (*online_page_callback_t)(struct page *page);
+typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
 
 extern int set_online_page_callback(online_page_callback_t callback);
 extern int restore_online_page_callback(online_page_callback_t callback);
diff --git a/mm/internal.h b/mm/internal.h
index f4a7bb02decf..536bc2a839b9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -163,6 +163,7 @@ static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void memblock_free_pages(struct page *page, unsigned long pfn,
 					unsigned int order);
+extern void __free_pages_core(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
 extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1ad28323fb9f..4f07c8ddfdd7 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -47,7 +47,7 @@
  * and restore_online_page_callback() for generic callback restore.
  */
 
-static void generic_online_page(struct page *page);
+static void generic_online_page(struct page *page, unsigned int order);
 
 static online_page_callback_t online_page_callback = generic_online_page;
 static DEFINE_MUTEX(online_page_callback_lock);
@@ -656,26 +656,39 @@ void __online_page_free(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__online_page_free);
 
-static void generic_online_page(struct page *page)
+static void generic_online_page(struct page *page, unsigned int order)
 {
-	__online_page_set_limits(page);
-	__online_page_increment_counters(page);
-	__online_page_free(page);
+	__free_pages_core(page, order);
+	totalram_pages_add(1UL << order);
+#ifdef CONFIG_HIGHMEM
+	if (PageHighMem(page))
+		totalhigh_pages_add(1UL << order);
+#endif
+}
+
+static int online_pages_blocks(unsigned long start, unsigned long nr_pages)
+{
+	unsigned long end = start + nr_pages;
+	int order, onlined_pages = 0;
+
+	while (start < end) {
+		order = min(MAX_ORDER - 1,
+			get_order(PFN_PHYS(end) - PFN_PHYS(start)));
+		(*online_page_callback)(pfn_to_page(start), order);
+
+		onlined_pages += (1UL << order);
+		start += (1UL << order);
+	}
+	return onlined_pages;
 }
 
 static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages,
 			void *arg)
 {
-	unsigned long i;
 	unsigned long onlined_pages = *(unsigned long *)arg;
-	struct page *page;
 
 	if (PageReserved(pfn_to_page(start_pfn)))
-		for (i = 0; i < nr_pages; i++) {
-			page = pfn_to_page(start_pfn + i);
-			(*online_page_callback)(page);
-			onlined_pages++;
-		}
+		onlined_pages += online_pages_blocks(start_pfn, nr_pages);
 
 	online_mem_sections(start_pfn, start_pfn + nr_pages);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 10d0f2ed9f69..5361bd078493 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1303,7 +1303,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	local_irq_restore(flags);
 }
 
-static void __init __free_pages_boot_core(struct page *page, unsigned int order)
+void __free_pages_core(struct page *page, unsigned int order)
 {
 	unsigned int nr_pages = 1 << order;
 	struct page *p = page;
@@ -1382,7 +1382,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
 {
 	if (early_page_uninitialised(pfn))
 		return;
-	return __free_pages_boot_core(page, order);
+	__free_pages_core(page, order);
 }
 
 /*
@@ -1472,14 +1472,14 @@ static void __init deferred_free_range(unsigned long pfn,
 	if (nr_pages == pageblock_nr_pages &&
 	    (pfn & (pageblock_nr_pages - 1)) == 0) {
 		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-		__free_pages_boot_core(page, pageblock_order);
+		__free_pages_core(page, pageblock_order);
 		return;
 	}
 
 	for (i = 0; i < nr_pages; i++, page++, pfn++) {
 		if ((pfn & (pageblock_nr_pages - 1)) == 0)
 			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-		__free_pages_boot_core(page, 0);
+		__free_pages_core(page, 0);
 	}
 }
 
-- 
cgit v1.2.3


From 4d3467e171f8a8ef8f1dd205769cf2f21fbc8e1e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 5 Mar 2019 15:42:18 -0800
Subject: mm: balloon: update comment about isolation/migration/compaction

Patch series "mm/kdump: allow to exclude pages that are logically
offline"

Right now, pages inflated as part of a balloon driver will be dumped by
dump tools like makedumpfile.  While XEN is able to check in the crash
kernel whether a certain pfn is actuall backed by memory in the
hypervisor (see xen_oldmem_pfn_is_ram) and optimize this case, dumps of
virtio-balloon, hv-balloon and VMWare balloon inflated memory will
essentially result in zero pages getting allocated by the hypervisor and
the dump getting filled with this data.

The allocation and reading of zero pages can directly be avoided if a
dumping tool could know which pages only contain stale information not
to be dumped.

Also for XEN, calling into the kernel and asking the hypervisor if a pfn
is backed can be avoided if the duming tool would skip such pages right
from the beginning.

Dumping tools have no idea whether a given page is part of a balloon
driver and shall not be dumped.  Esp.  PG_reserved cannot be used for
that purpose as all memory allocated during early boot is also
PG_reserved, see discussion at [1].  So some other way of indication is
required and a new page flag is frowned upon.

We have PG_balloon (MAPCOUNT value), which is essentially unused now.  I
suggest renaming it to something more generic (PG_offline) to mark pages
as logically offline.  This flag can than e.g.  also be used by
virtio-mem in the future to mark subsections as offline.  Or by other
code that wants to put pages logically offline (e.g.  later maybe
poisoned pages that shall no longer be used).

This series converts PG_balloon to PG_offline, allows dumping tools to
query the value to detect such pages and marks pages in the hv-balloon
and XEN balloon properly as PG_offline.  Note that virtio-balloon
already set pages to PG_balloon (and now PG_offline).

Please note that this is also helpful for a problem we were seeing under
Hyper-V: Dumping logically offline memory (pages kept fake offline while
onlining a section via online_page_callback) would under some condicions
result in a kernel panic when dumping them.

As I don't have access to neither XEN nor Hyper-V nor VMWare
installations, this was only tested with the virtio-balloon and pages
were properly skipped when dumping.  I'll also attach the makedumpfile
patch to this series.

[1] https://lkml.org/lkml/2018/7/20/566

This patch (of 8):

Commit b1123ea6d3b3 ("mm: balloon: use general non-lru movable page
feature") reworked balloon handling to make use of the general non-lru
movable page feature.  The big comment block in balloon_compaction.h
contains quite some outdated information.  Let's fix this.

Link: http://lkml.kernel.org/r/20181119101616.8901-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Christian Hansen <chansen3@cisco.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Julien Freche <jfreche@vmware.com>
Cc: Kairui Song <kasong@redhat.com>
Cc: Kazuhito Hagio <k-hagio@ab.jp.nec.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Lianbo Jiang <lijiang@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Miles Chen <miles.chen@mediatek.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Pankaj gupta <pagupta@redhat.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Xavier Deguillard <xdeguillard@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/balloon_compaction.h | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 53051f3d8f25..cbe50da5a59d 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -4,15 +4,18 @@
  *
  * Common interface definitions for making balloon pages movable by compaction.
  *
- * Despite being perfectly possible to perform ballooned pages migration, they
- * make a special corner case to compaction scans because balloon pages are not
- * enlisted at any LRU list like the other pages we do compact / migrate.
+ * Balloon page migration makes use of the general non-lru movable page
+ * feature.
+ *
+ * page->private is used to reference the responsible balloon device.
+ * page->mapping is used in context of non-lru page migration to reference
+ * the address space operations for page isolation/migration/compaction.
  *
  * As the page isolation scanning step a compaction thread does is a lockless
  * procedure (from a page standpoint), it might bring some racy situations while
  * performing balloon page compaction. In order to sort out these racy scenarios
  * and safely perform balloon's page compaction and migration we must, always,
- * ensure following these three simple rules:
+ * ensure following these simple rules:
  *
  *   i. when updating a balloon's page ->mapping element, strictly do it under
  *      the following lock order, independently of the far superior
@@ -21,19 +24,8 @@
  *	      +--spin_lock_irq(&b_dev_info->pages_lock);
  *	            ... page->mapping updates here ...
  *
- *  ii. before isolating or dequeueing a balloon page from the balloon device
- *      pages list, the page reference counter must be raised by one and the
- *      extra refcount must be dropped when the page is enqueued back into
- *      the balloon device page list, thus a balloon page keeps its reference
- *      counter raised only while it is under our special handling;
- *
- * iii. after the lockless scan step have selected a potential balloon page for
- *      isolation, re-test the PageBalloon mark and the PagePrivate flag
- *      under the proper page lock, to ensure isolating a valid balloon page
- *      (not yet isolated, nor under release procedure)
- *
- *  iv. isolation or dequeueing procedure must clear PagePrivate flag under
- *      page lock together with removing page from balloon device page list.
+ *  ii. isolation or dequeueing procedure must remove the page from balloon
+ *      device page list under b_dev_info->pages_lock.
  *
  * The functions provided by this interface are placed to help on coping with
  * the aforementioned balloon page corner case, as well as to ensure the simple
-- 
cgit v1.2.3


From ca215086b14b89a0e70fc211314944aa6ce50020 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 5 Mar 2019 15:42:23 -0800
Subject: mm: convert PG_balloon to PG_offline

PG_balloon was introduced to implement page migration/compaction for
pages inflated in virtio-balloon.  Nowadays, it is only a marker that a
page is part of virtio-balloon and therefore logically offline.

We also want to make use of this flag in other balloon drivers - for
inflated pages or when onlining a section but keeping some pages offline
(e.g.  used right now by XEN and Hyper-V via set_online_page_callback()).

We are going to expose this flag to dump tools like makedumpfile.  But
instead of exposing PG_balloon, let's generalize the concept of marking
pages as logically offline, so it can be reused for other purposes later
on.

Rename PG_balloon to PG_offline.  This is an indicator that the page is
logically offline, the content stale and that it should not be touched
(e.g.  a hypervisor would have to allocate backing storage in order for
the guest to dump an unused page).  We can then e.g.  exclude such pages
from dumps.

We replace and reuse KPF_BALLOON (23), as this shouldn't really harm
(and for now the semantics stay the same).  In following patches, we
will make use of this bit also in other balloon drivers.  While at it,
document PGTABLE.

[akpm@linux-foundation.org: fix comment text, per David]
Link: http://lkml.kernel.org/r/20181119101616.8901-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Pankaj gupta <pagupta@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Christian Hansen <chansen3@cisco.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Miles Chen <miles.chen@mediatek.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Kazuhito Hagio <k-hagio@ab.jp.nec.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Julien Freche <jfreche@vmware.com>
Cc: Kairui Song <kasong@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Lianbo Jiang <lijiang@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Xavier Deguillard <xdeguillard@vmware.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/mm/pagemap.rst |  9 ++++++---
 fs/proc/page.c                           |  4 ++--
 include/linux/balloon_compaction.h       |  8 ++++----
 include/linux/page-flags.h               | 11 +++++++----
 include/uapi/linux/kernel-page-flags.h   |  2 +-
 tools/vm/page-types.c                    |  2 +-
 6 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/mm/pagemap.rst b/Documentation/admin-guide/mm/pagemap.rst
index 3f7bade2c231..340a5aee9b80 100644
--- a/Documentation/admin-guide/mm/pagemap.rst
+++ b/Documentation/admin-guide/mm/pagemap.rst
@@ -75,9 +75,10 @@ number of times a page is mapped.
     20. NOPAGE
     21. KSM
     22. THP
-    23. BALLOON
+    23. OFFLINE
     24. ZERO_PAGE
     25. IDLE
+    26. PGTABLE
 
  * ``/proc/kpagecgroup``.  This file contains a 64-bit inode number of the
    memory cgroup each page is charged to, indexed by PFN. Only available when
@@ -118,8 +119,8 @@ Short descriptions to the page flags
     identical memory pages dynamically shared between one or more processes
 22 - THP
     contiguous pages which construct transparent hugepages
-23 - BALLOON
-    balloon compaction page
+23 - OFFLINE
+    page is logically offline
 24 - ZERO_PAGE
     zero page for pfn_zero or huge_zero page
 25 - IDLE
@@ -128,6 +129,8 @@ Short descriptions to the page flags
     Note that this flag may be stale in case the page was accessed via
     a PTE. To make sure the flag is up-to-date one has to read
     ``/sys/kernel/mm/page_idle/bitmap`` first.
+26 - PGTABLE
+    page is in use as a page table
 
 IO related page flags
 ---------------------
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 40b05e0d4274..544d1ee15aee 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -152,8 +152,8 @@ u64 stable_page_flags(struct page *page)
 	else if (page_count(page) == 0 && is_free_buddy_page(page))
 		u |= 1 << KPF_BUDDY;
 
-	if (PageBalloon(page))
-		u |= 1 << KPF_BALLOON;
+	if (PageOffline(page))
+		u |= 1 << KPF_OFFLINE;
 	if (PageTable(page))
 		u |= 1 << KPF_PGTABLE;
 
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index cbe50da5a59d..f111c780ef1d 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -95,7 +95,7 @@ extern int balloon_page_migrate(struct address_space *mapping,
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
-	__SetPageBalloon(page);
+	__SetPageOffline(page);
 	__SetPageMovable(page, balloon->inode->i_mapping);
 	set_page_private(page, (unsigned long)balloon);
 	list_add(&page->lru, &balloon->pages);
@@ -111,7 +111,7 @@ static inline void balloon_page_insert(struct balloon_dev_info *balloon,
  */
 static inline void balloon_page_delete(struct page *page)
 {
-	__ClearPageBalloon(page);
+	__ClearPageOffline(page);
 	__ClearPageMovable(page);
 	set_page_private(page, 0);
 	/*
@@ -141,13 +141,13 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
-	__SetPageBalloon(page);
+	__SetPageOffline(page);
 	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
 {
-	__ClearPageBalloon(page);
+	__ClearPageOffline(page);
 	list_del(&page->lru);
 }
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 39b4494e29f1..808b4183e30d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -671,7 +671,7 @@ PAGEFLAG_FALSE(DoubleMap)
 /* Reserve		0x0000007f to catch underflows of page_mapcount */
 #define PAGE_MAPCOUNT_RESERVE	-128
 #define PG_buddy	0x00000080
-#define PG_balloon	0x00000100
+#define PG_offline	0x00000100
 #define PG_kmemcg	0x00000200
 #define PG_table	0x00000400
 
@@ -706,10 +706,13 @@ static __always_inline void __ClearPage##uname(struct page *page)	\
 PAGE_TYPE_OPS(Buddy, buddy)
 
 /*
- * PageBalloon() is true for pages that are on the balloon page list
- * (see mm/balloon_compaction.c).
+ * PageOffline() indicates that the page is logically offline although the
+ * containing section is online. (e.g. inflated in a balloon driver or
+ * not onlined when onlining the section).
+ * The content of these pages is effectively stale. Such pages should not
+ * be touched (read/write/dump/save) except by their owner.
  */
-PAGE_TYPE_OPS(Balloon, balloon)
+PAGE_TYPE_OPS(Offline, offline)
 
 /*
  * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 21b9113c69da..6f2f2720f3ac 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -32,7 +32,7 @@
 
 #define KPF_KSM			21
 #define KPF_THP			22
-#define KPF_BALLOON		23
+#define KPF_OFFLINE		23
 #define KPF_ZERO_PAGE		24
 #define KPF_IDLE		25
 #define KPF_PGTABLE		26
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 1ff3a6c0367b..6f64b2b93234 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -133,7 +133,7 @@ static const char * const page_flag_names[] = {
 	[KPF_NOPAGE]		= "n:nopage",
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
-	[KPF_BALLOON]		= "o:balloon",
+	[KPF_OFFLINE]		= "o:offline",
 	[KPF_PGTABLE]		= "g:pgtable",
 	[KPF_ZERO_PAGE]		= "z:zero_page",
 	[KPF_IDLE]              = "i:idle_page",
-- 
cgit v1.2.3


From 98fa15f34cb379864757670b8e8743b21456a20e Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 5 Mar 2019 15:42:58 -0800
Subject: mm: replace all open encodings for NUMA_NO_NODE

Patch series "Replace all open encodings for NUMA_NO_NODE", v3.

All these places for replacement were found by running the following
grep patterns on the entire kernel code.  Please let me know if this
might have missed some instances.  This might also have replaced some
false positives.  I will appreciate suggestions, inputs and review.

1. git grep "nid == -1"
2. git grep "node == -1"
3. git grep "nid = -1"
4. git grep "node = -1"

This patch (of 2):

At present there are multiple places where invalid node number is
encoded as -1.  Even though implicitly understood it is always better to
have macros in there.  Replace these open encodings for an invalid node
number with the global macro NUMA_NO_NODE.  This helps remove NUMA
related assumptions like 'invalid node' from various places redirecting
them to a common definition.

Link: http://lkml.kernel.org/r/1545127933-10711-2-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>	[ixgbe]
Acked-by: Jens Axboe <axboe@kernel.dk>			[mtip32xx]
Acked-by: Vinod Koul <vkoul@kernel.org>			[dmaengine.c]
Acked-by: Michael Ellerman <mpe@ellerman.id.au>		[powerpc]
Acked-by: Doug Ledford <dledford@redhat.com>		[drivers/infiniband]
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/topology.h             |  3 ++-
 arch/ia64/kernel/numa.c                       |  2 +-
 arch/ia64/mm/discontig.c                      |  6 +++---
 arch/powerpc/include/asm/pci-bridge.h         |  3 ++-
 arch/powerpc/kernel/paca.c                    |  3 ++-
 arch/powerpc/kernel/pci-common.c              |  3 ++-
 arch/powerpc/mm/numa.c                        | 14 +++++++-------
 arch/powerpc/platforms/powernv/memtrace.c     |  5 +++--
 arch/sparc/kernel/pci_fire.c                  |  3 ++-
 arch/sparc/kernel/pci_schizo.c                |  3 ++-
 arch/sparc/kernel/psycho_common.c             |  3 ++-
 arch/sparc/kernel/sbus.c                      |  3 ++-
 arch/sparc/mm/init_64.c                       |  6 +++---
 arch/x86/include/asm/pci.h                    |  3 ++-
 arch/x86/kernel/apic/x2apic_uv_x.c            |  7 ++++---
 arch/x86/kernel/smpboot.c                     |  3 ++-
 drivers/block/mtip32xx/mtip32xx.c             |  5 +++--
 drivers/dma/dmaengine.c                       |  4 +++-
 drivers/infiniband/hw/hfi1/affinity.c         |  3 ++-
 drivers/infiniband/hw/hfi1/init.c             |  3 ++-
 drivers/iommu/dmar.c                          |  5 +++--
 drivers/iommu/intel-iommu.c                   |  3 ++-
 drivers/misc/sgi-xp/xpc_uv.c                  |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 +++--
 include/linux/device.h                        |  2 +-
 init/init_task.c                              |  3 ++-
 kernel/kthread.c                              |  3 ++-
 kernel/sched/fair.c                           | 15 ++++++++-------
 lib/cpumask.c                                 |  3 ++-
 mm/huge_memory.c                              | 13 +++++++------
 mm/hugetlb.c                                  |  3 ++-
 mm/ksm.c                                      |  2 +-
 mm/memory.c                                   |  7 ++++---
 mm/memory_hotplug.c                           | 12 ++++++------
 mm/mempolicy.c                                |  2 +-
 mm/page_alloc.c                               |  4 ++--
 mm/page_ext.c                                 |  2 +-
 net/core/pktgen.c                             |  3 ++-
 net/qrtr/qrtr.c                               |  3 ++-
 39 files changed, 104 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index e6e13a85796a..5a77a40567fa 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -4,6 +4,7 @@
 
 #include <linux/smp.h>
 #include <linux/threads.h>
+#include <linux/numa.h>
 #include <asm/machvec.h>
 
 #ifdef CONFIG_NUMA
@@ -29,7 +30,7 @@ static const struct cpumask *cpumask_of_node(int node)
 {
 	int cpu;
 
-	if (node == -1)
+	if (node == NUMA_NO_NODE)
 		return cpu_all_mask;
 
 	cpumask_clear(&node_to_cpumask_map[node]);
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index 92c376279c6d..1315da6c7aeb 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -74,7 +74,7 @@ void __init build_cpu_to_node_map(void)
 		cpumask_clear(&node_to_cpu_mask[node]);
 
 	for_each_possible_early_cpu(cpu) {
-		node = -1;
+		node = NUMA_NO_NODE;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
 				node = node_cpuid[i].nid;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 8a965784340c..f9c36750c6a4 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -227,7 +227,7 @@ void __init setup_per_cpu_areas(void)
 	 * CPUs are put into groups according to node.  Walk cpu_map
 	 * and create new groups at node boundaries.
 	 */
-	prev_node = -1;
+	prev_node = NUMA_NO_NODE;
 	ai->nr_groups = 0;
 	for (unit = 0; unit < nr_units; unit++) {
 		cpu = cpu_map[unit];
@@ -435,7 +435,7 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 {
 	void *ptr = NULL;
 	u8 best = 0xff;
-	int bestnode = -1, node, anynode = 0;
+	int bestnode = NUMA_NO_NODE, node, anynode = 0;
 
 	for_each_online_node(node) {
 		if (node_isset(node, memory_less_mask))
@@ -447,7 +447,7 @@ static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize)
 		anynode = node;
 	}
 
-	if (bestnode == -1)
+	if (bestnode == NUMA_NO_NODE)
 		bestnode = anynode;
 
 	ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE,
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index aee4fcc24990..77fc21278fa2 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -10,6 +10,7 @@
 #include <linux/pci.h>
 #include <linux/list.h>
 #include <linux/ioport.h>
+#include <linux/numa.h>
 
 struct device_node;
 
@@ -265,7 +266,7 @@ extern int pcibios_map_io_space(struct pci_bus *bus);
 #ifdef CONFIG_NUMA
 #define PHB_SET_NODE(PHB, NODE)		((PHB)->node = (NODE))
 #else
-#define PHB_SET_NODE(PHB, NODE)		((PHB)->node = -1)
+#define PHB_SET_NODE(PHB, NODE)		((PHB)->node = NUMA_NO_NODE)
 #endif
 
 #endif	/* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 913bfca09c4f..b8480127793d 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/memblock.h>
 #include <linux/sched/task.h>
+#include <linux/numa.h>
 
 #include <asm/lppaca.h>
 #include <asm/paca.h>
@@ -36,7 +37,7 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
 	 * which will put its paca in the right place.
 	 */
 	if (cpu == boot_cpuid) {
-		nid = -1;
+		nid = NUMA_NO_NODE;
 		memblock_set_bottom_up(true);
 	} else {
 		nid = early_cpu_to_node(cpu);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 88e4f69a09e5..4538e8ddde80 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -32,6 +32,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
+#include <linux/numa.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -132,7 +133,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
 		int nid = of_node_to_nid(dev);
 
 		if (nid < 0 || !node_online(nid))
-			nid = -1;
+			nid = NUMA_NO_NODE;
 
 		PHB_SET_NODE(phb, nid);
 	}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 87f0dd004295..270cefb75cca 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -215,7 +215,7 @@ static void initialize_distance_lookup_table(int nid,
  */
 static int associativity_to_nid(const __be32 *associativity)
 {
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 
 	if (min_common_depth == -1)
 		goto out;
@@ -225,7 +225,7 @@ static int associativity_to_nid(const __be32 *associativity)
 
 	/* POWER4 LPAR uses 0xffff as invalid node */
 	if (nid == 0xffff || nid >= MAX_NUMNODES)
-		nid = -1;
+		nid = NUMA_NO_NODE;
 
 	if (nid > 0 &&
 		of_read_number(associativity, 1) >= distance_ref_points_depth) {
@@ -244,7 +244,7 @@ out:
  */
 static int of_node_to_nid_single(struct device_node *device)
 {
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 	const __be32 *tmp;
 
 	tmp = of_get_associativity(device);
@@ -256,7 +256,7 @@ static int of_node_to_nid_single(struct device_node *device)
 /* Walk the device tree upwards, looking for an associativity id */
 int of_node_to_nid(struct device_node *device)
 {
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 
 	of_node_get(device);
 	while (device) {
@@ -454,7 +454,7 @@ static int of_drconf_to_nid_single(struct drmem_lmb *lmb)
  */
 static int numa_setup_cpu(unsigned long lcpu)
 {
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 	struct device_node *cpu;
 
 	/*
@@ -930,7 +930,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
 {
 	struct drmem_lmb *lmb;
 	unsigned long lmb_size;
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 
 	lmb_size = drmem_lmb_size();
 
@@ -960,7 +960,7 @@ static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
 static int hot_add_node_scn_to_nid(unsigned long scn_addr)
 {
 	struct device_node *memory;
-	int nid = -1;
+	int nid = NUMA_NO_NODE;
 
 	for_each_node_by_type(memory, "memory") {
 		unsigned long start, size;
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 84d038ed3882..248a38ad25c7 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/memory.h>
 #include <linux/memory_hotplug.h>
+#include <linux/numa.h>
 #include <asm/machdep.h>
 #include <asm/debugfs.h>
 
@@ -223,7 +224,7 @@ static int memtrace_online(void)
 		ent = &memtrace_array[i];
 
 		/* We have onlined this chunk previously */
-		if (ent->nid == -1)
+		if (ent->nid == NUMA_NO_NODE)
 			continue;
 
 		/* Remove from io mappings */
@@ -257,7 +258,7 @@ static int memtrace_online(void)
 		 */
 		debugfs_remove_recursive(ent->dir);
 		pr_info("Added trace memory back to node %d\n", ent->nid);
-		ent->size = ent->start = ent->nid = -1;
+		ent->size = ent->start = ent->nid = NUMA_NO_NODE;
 	}
 	if (ret)
 		return ret;
diff --git a/arch/sparc/kernel/pci_fire.c b/arch/sparc/kernel/pci_fire.c
index be71ae086622..0ca08d455e80 100644
--- a/arch/sparc/kernel/pci_fire.c
+++ b/arch/sparc/kernel/pci_fire.c
@@ -11,6 +11,7 @@
 #include <linux/export.h>
 #include <linux/irq.h>
 #include <linux/of_device.h>
+#include <linux/numa.h>
 
 #include <asm/prom.h>
 #include <asm/irq.h>
@@ -416,7 +417,7 @@ static int pci_fire_pbm_init(struct pci_pbm_info *pbm,
 	struct device_node *dp = op->dev.of_node;
 	int err;
 
-	pbm->numa_node = -1;
+	pbm->numa_node = NUMA_NO_NODE;
 
 	pbm->pci_ops = &sun4u_pci_ops;
 	pbm->config_space_reg_bits = 12;
diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c
index 934b97c72f7c..421aba00e6b0 100644
--- a/arch/sparc/kernel/pci_schizo.c
+++ b/arch/sparc/kernel/pci_schizo.c
@@ -12,6 +12,7 @@
 #include <linux/export.h>
 #include <linux/interrupt.h>
 #include <linux/of_device.h>
+#include <linux/numa.h>
 
 #include <asm/iommu.h>
 #include <asm/irq.h>
@@ -1347,7 +1348,7 @@ static int schizo_pbm_init(struct pci_pbm_info *pbm,
 	pbm->next = pci_pbm_root;
 	pci_pbm_root = pbm;
 
-	pbm->numa_node = -1;
+	pbm->numa_node = NUMA_NO_NODE;
 
 	pbm->pci_ops = &sun4u_pci_ops;
 	pbm->config_space_reg_bits = 8;
diff --git a/arch/sparc/kernel/psycho_common.c b/arch/sparc/kernel/psycho_common.c
index 81aa91e5c0e6..e90bcb6bad7f 100644
--- a/arch/sparc/kernel/psycho_common.c
+++ b/arch/sparc/kernel/psycho_common.c
@@ -5,6 +5,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
+#include <linux/numa.h>
 
 #include <asm/upa.h>
 
@@ -454,7 +455,7 @@ void psycho_pbm_init_common(struct pci_pbm_info *pbm, struct platform_device *op
 	struct device_node *dp = op->dev.of_node;
 
 	pbm->name = dp->full_name;
-	pbm->numa_node = -1;
+	pbm->numa_node = NUMA_NO_NODE;
 	pbm->chip_type = chip_type;
 	pbm->chip_version = of_getintprop_default(dp, "version#", 0);
 	pbm->chip_revision = of_getintprop_default(dp, "module-revision#", 0);
diff --git a/arch/sparc/kernel/sbus.c b/arch/sparc/kernel/sbus.c
index 41c5deb581b8..32141e1006c4 100644
--- a/arch/sparc/kernel/sbus.c
+++ b/arch/sparc/kernel/sbus.c
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/numa.h>
 
 #include <asm/page.h>
 #include <asm/io.h>
@@ -561,7 +562,7 @@ static void __init sbus_iommu_init(struct platform_device *op)
 
 	op->dev.archdata.iommu = iommu;
 	op->dev.archdata.stc = strbuf;
-	op->dev.archdata.numa_node = -1;
+	op->dev.archdata.numa_node = NUMA_NO_NODE;
 
 	reg_base = regs + SYSIO_IOMMUREG_BASE;
 	iommu->iommu_control = reg_base + IOMMU_CONTROL;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index b4221d3727d0..9e6bd868ba6f 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -976,13 +976,13 @@ static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
 {
 	int prev_nid, new_nid;
 
-	prev_nid = -1;
+	prev_nid = NUMA_NO_NODE;
 	for ( ; start < end; start += PAGE_SIZE) {
 		for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
 			struct node_mem_mask *p = &node_masks[new_nid];
 
 			if ((start & p->mask) == p->match) {
-				if (prev_nid == -1)
+				if (prev_nid == NUMA_NO_NODE)
 					prev_nid = new_nid;
 				break;
 			}
@@ -1208,7 +1208,7 @@ int of_node_to_nid(struct device_node *dp)
 	md = mdesc_grab();
 
 	count = 0;
-	nid = -1;
+	nid = NUMA_NO_NODE;
 	mdesc_for_each_node_by_name(md, grp, "group") {
 		if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
 			nid = count;
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 662963681ea6..e662f987dfa2 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/scatterlist.h>
+#include <linux/numa.h>
 #include <asm/io.h>
 #include <asm/pat.h>
 #include <asm/x86_init.h>
@@ -141,7 +142,7 @@ cpumask_of_pcibus(const struct pci_bus *bus)
 	int node;
 
 	node = __pcibus_to_node(bus);
-	return (node == -1) ? cpu_online_mask :
+	return (node == NUMA_NO_NODE) ? cpu_online_mask :
 			      cpumask_of_node(node);
 }
 #endif
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index a555da094157..1e225528f0d7 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -27,6 +27,7 @@
 #include <linux/crash_dump.h>
 #include <linux/reboot.h>
 #include <linux/memory.h>
+#include <linux/numa.h>
 
 #include <asm/uv/uv_mmrs.h>
 #include <asm/uv/uv_hub.h>
@@ -1390,7 +1391,7 @@ static void __init build_socket_tables(void)
 	}
 
 	/* Set socket -> node values: */
-	lnid = -1;
+	lnid = NUMA_NO_NODE;
 	for_each_present_cpu(cpu) {
 		int nid = cpu_to_node(cpu);
 		int apicid, sockid;
@@ -1521,7 +1522,7 @@ static void __init uv_system_init_hub(void)
 			new_hub->pnode = 0xffff;
 
 		new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
-		new_hub->memory_nid = -1;
+		new_hub->memory_nid = NUMA_NO_NODE;
 		new_hub->nr_possible_cpus = 0;
 		new_hub->nr_online_cpus = 0;
 	}
@@ -1538,7 +1539,7 @@ static void __init uv_system_init_hub(void)
 
 		uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
 		uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
-		if (uv_cpu_hub_info(cpu)->memory_nid == -1)
+		if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
 			uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
 
 		/* Init memoryless node: */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ccd1f2a8e557..c91ff9f9fe8a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -56,6 +56,7 @@
 #include <linux/stackprotector.h>
 #include <linux/gfp.h>
 #include <linux/cpuidle.h>
+#include <linux/numa.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -841,7 +842,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 /* reduce the number of lines printed when booting a large cpu count system */
 static void announce_cpu(int cpu, int apicid)
 {
-	static int current_node = -1;
+	static int current_node = NUMA_NO_NODE;
 	int node = early_cpu_to_node(cpu);
 	static int width, node_width;
 
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 88e8440e75c3..2f3ee4d6af82 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -40,6 +40,7 @@
 #include <linux/export.h>
 #include <linux/debugfs.h>
 #include <linux/prefetch.h>
+#include <linux/numa.h>
 #include "mtip32xx.h"
 
 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
@@ -4018,9 +4019,9 @@ static int get_least_used_cpu_on_node(int node)
 /* Helper for selecting a node in round robin mode */
 static inline int mtip_get_next_rr_node(void)
 {
-	static int next_node = -1;
+	static int next_node = NUMA_NO_NODE;
 
-	if (next_node == -1) {
+	if (next_node == NUMA_NO_NODE) {
 		next_node = first_online_node;
 		return next_node;
 	}
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index f1a441ab395d..3a11b1092e80 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -63,6 +63,7 @@
 #include <linux/acpi_dma.h>
 #include <linux/of_dma.h>
 #include <linux/mempool.h>
+#include <linux/numa.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
 static DEFINE_IDA(dma_ida);
@@ -386,7 +387,8 @@ EXPORT_SYMBOL(dma_issue_pending_all);
 static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
 {
 	int node = dev_to_node(chan->device->dev);
-	return node == -1 || cpumask_test_cpu(cpu, cpumask_of_node(node));
+	return node == NUMA_NO_NODE ||
+		cpumask_test_cpu(cpu, cpumask_of_node(node));
 }
 
 /**
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 2baf38cc1e23..4fe662c3bbc1 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -48,6 +48,7 @@
 #include <linux/cpumask.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/numa.h>
 
 #include "hfi.h"
 #include "affinity.h"
@@ -777,7 +778,7 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
 	_dev_comp_vect_cpu_mask_clean_up(dd, entry);
 unlock:
 	mutex_unlock(&node_affinity.lock);
-	dd->node = -1;
+	dd->node = NUMA_NO_NODE;
 }
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 7835eb52e7c5..441b06e2a154 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -54,6 +54,7 @@
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
 #include <linux/bitmap.h>
+#include <linux/numa.h>
 #include <rdma/rdma_vt.h>
 
 #include "hfi.h"
@@ -1303,7 +1304,7 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
 		dd->unit = ret;
 		list_add(&dd->list, &hfi1_dev_list);
 	}
-	dd->node = -1;
+	dd->node = NUMA_NO_NODE;
 
 	spin_unlock_irqrestore(&hfi1_devs_lock, flags);
 	idr_preload_end();
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 58dc70bffd5b..9c49300e9fb7 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -39,6 +39,7 @@
 #include <linux/dmi.h>
 #include <linux/slab.h>
 #include <linux/iommu.h>
+#include <linux/numa.h>
 #include <asm/irq_remapping.h>
 #include <asm/iommu_table.h>
 
@@ -477,7 +478,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);
 
 			if (!node_online(node))
-				node = -1;
+				node = NUMA_NO_NODE;
 			drhd->iommu->node = node;
 			return 0;
 		}
@@ -1062,7 +1063,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	iommu->msagaw = msagaw;
 	iommu->segment = drhd->segment;
 
-	iommu->node = -1;
+	iommu->node = NUMA_NO_NODE;
 
 	ver = readl(iommu->reg + DMAR_VER_REG);
 	pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 78188bf7e90d..39a33dec4d0b 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -47,6 +47,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/dma-direct.h>
 #include <linux/crash_dump.h>
+#include <linux/numa.h>
 #include <asm/irq_remapping.h>
 #include <asm/cacheflush.h>
 #include <asm/iommu.h>
@@ -1716,7 +1717,7 @@ static struct dmar_domain *alloc_domain(int flags)
 		return NULL;
 
 	memset(domain, 0, sizeof(*domain));
-	domain->nid = -1;
+	domain->nid = NUMA_NO_NODE;
 	domain->flags = flags;
 	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index 0441abe87880..9e443df44b3b 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/numa.h>
 #include <asm/uv/uv_hub.h>
 #if defined CONFIG_X86_64
 #include <asm/uv/bios.h>
@@ -61,7 +62,7 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
 					 XPC_NOTIFY_MSG_SIZE_UV)
 #define XPC_NOTIFY_IRQ_NAME		"xpc_notify"
 
-static int xpc_mq_node = -1;
+static int xpc_mq_node = NUMA_NO_NODE;
 
 static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a4e7584a50cb..e100054a3765 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -27,6 +27,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
 #include <linux/atomic.h>
+#include <linux/numa.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <net/udp_tunnel.h>
 #include <net/pkt_cls.h>
@@ -6418,7 +6419,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
 {
 	struct device *dev = tx_ring->dev;
 	int orig_node = dev_to_node(dev);
-	int ring_node = -1;
+	int ring_node = NUMA_NO_NODE;
 	int size;
 
 	size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count;
@@ -6512,7 +6513,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 {
 	struct device *dev = rx_ring->dev;
 	int orig_node = dev_to_node(dev);
-	int ring_node = -1;
+	int ring_node = NUMA_NO_NODE;
 	int size;
 
 	size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count;
diff --git a/include/linux/device.h b/include/linux/device.h
index 6cb4640b6160..4d2f13e8c540 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1095,7 +1095,7 @@ static inline void set_dev_node(struct device *dev, int node)
 #else
 static inline int dev_to_node(struct device *dev)
 {
-	return -1;
+	return NUMA_NO_NODE;
 }
 static inline void set_dev_node(struct device *dev, int node)
 {
diff --git a/init/init_task.c b/init/init_task.c
index 5aebe3be4d7c..26131e73aa6d 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/audit.h>
+#include <linux/numa.h>
 
 #include <asm/pgtable.h>
 #include <linux/uaccess.h>
@@ -154,7 +155,7 @@ struct task_struct init_task
 	.vtime.state	= VTIME_SYS,
 #endif
 #ifdef CONFIG_NUMA_BALANCING
-	.numa_preferred_nid = -1,
+	.numa_preferred_nid = NUMA_NO_NODE,
 	.numa_group	= NULL,
 	.numa_faults	= NULL,
 #endif
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 087d18d771b5..ebebbcf3c5de 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
 #include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/uaccess.h>
+#include <linux/numa.h>
 #include <trace/events/sched.h>
 
 static DEFINE_SPINLOCK(kthread_create_lock);
@@ -675,7 +676,7 @@ __kthread_create_worker(int cpu, unsigned int flags,
 {
 	struct kthread_worker *worker;
 	struct task_struct *task;
-	int node = -1;
+	int node = NUMA_NO_NODE;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
 	if (!worker)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 310d0637fe4b..0e6a0ef129c5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1160,7 +1160,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
 
 	/* New address space, reset the preferred nid */
 	if (!(clone_flags & CLONE_VM)) {
-		p->numa_preferred_nid = -1;
+		p->numa_preferred_nid = NUMA_NO_NODE;
 		return;
 	}
 
@@ -1180,13 +1180,13 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
 
 static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
 {
-	rq->nr_numa_running += (p->numa_preferred_nid != -1);
+	rq->nr_numa_running += (p->numa_preferred_nid != NUMA_NO_NODE);
 	rq->nr_preferred_running += (p->numa_preferred_nid == task_node(p));
 }
 
 static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
 {
-	rq->nr_numa_running -= (p->numa_preferred_nid != -1);
+	rq->nr_numa_running -= (p->numa_preferred_nid != NUMA_NO_NODE);
 	rq->nr_preferred_running -= (p->numa_preferred_nid == task_node(p));
 }
 
@@ -1400,7 +1400,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 	 * two full passes of the "multi-stage node selection" test that is
 	 * executed below.
 	 */
-	if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
+	if ((p->numa_preferred_nid == NUMA_NO_NODE || p->numa_scan_seq <= 4) &&
 	    (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
 		return true;
 
@@ -1848,7 +1848,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 	unsigned long interval = HZ;
 
 	/* This task has no NUMA fault statistics yet */
-	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
+	if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults))
 		return;
 
 	/* Periodically retry migrating the task to the preferred node */
@@ -2095,7 +2095,7 @@ static int preferred_group_nid(struct task_struct *p, int nid)
 
 static void task_numa_placement(struct task_struct *p)
 {
-	int seq, nid, max_nid = -1;
+	int seq, nid, max_nid = NUMA_NO_NODE;
 	unsigned long max_faults = 0;
 	unsigned long fault_types[2] = { 0, 0 };
 	unsigned long total_faults;
@@ -2638,7 +2638,8 @@ static void update_scan_period(struct task_struct *p, int new_cpu)
 		 * the preferred node.
 		 */
 		if (dst_nid == p->numa_preferred_nid ||
-		    (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
+		    (p->numa_preferred_nid != NUMA_NO_NODE &&
+			src_nid != p->numa_preferred_nid))
 			return;
 	}
 
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 8d666ab84b5c..087a3e9a0202 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -5,6 +5,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/memblock.h>
+#include <linux/numa.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
@@ -206,7 +207,7 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
 	/* Wrap: we always want a cpu. */
 	i %= num_online_cpus();
 
-	if (node == -1) {
+	if (node == NUMA_NO_NODE) {
 		for_each_cpu(cpu, cpu_online_mask)
 			if (i-- == 0)
 				return cpu;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index faf357eaf0ce..d066f7ca1ee8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -33,6 +33,7 @@
 #include <linux/page_idle.h>
 #include <linux/shmem_fs.h>
 #include <linux/oom.h>
+#include <linux/numa.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
@@ -1475,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	struct anon_vma *anon_vma = NULL;
 	struct page *page;
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
-	int page_nid = -1, this_nid = numa_node_id();
+	int page_nid = NUMA_NO_NODE, this_nid = numa_node_id();
 	int target_nid, last_cpupid = -1;
 	bool page_locked;
 	bool migrated = false;
@@ -1520,7 +1521,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	 */
 	page_locked = trylock_page(page);
 	target_nid = mpol_misplaced(page, vma, haddr);
-	if (target_nid == -1) {
+	if (target_nid == NUMA_NO_NODE) {
 		/* If the page was locked, there are no parallel migrations */
 		if (page_locked)
 			goto clear_pmdnuma;
@@ -1528,7 +1529,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
-		page_nid = -1;
+		page_nid = NUMA_NO_NODE;
 		if (!get_page_unless_zero(page))
 			goto out_unlock;
 		spin_unlock(vmf->ptl);
@@ -1549,14 +1550,14 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	if (unlikely(!pmd_same(pmd, *vmf->pmd))) {
 		unlock_page(page);
 		put_page(page);
-		page_nid = -1;
+		page_nid = NUMA_NO_NODE;
 		goto out_unlock;
 	}
 
 	/* Bail if we fail to protect against THP splits for any reason */
 	if (unlikely(!anon_vma)) {
 		put_page(page);
-		page_nid = -1;
+		page_nid = NUMA_NO_NODE;
 		goto clear_pmdnuma;
 	}
 
@@ -1618,7 +1619,7 @@ out:
 	if (anon_vma)
 		page_unlock_anon_vma_read(anon_vma);
 
-	if (page_nid != -1)
+	if (page_nid != NUMA_NO_NODE)
 		task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR,
 				flags);
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8dfdffc34a99..3c504fa6b460 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -25,6 +25,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/jhash.h>
+#include <linux/numa.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -887,7 +888,7 @@ static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask,
 	struct zonelist *zonelist;
 	struct zone *zone;
 	struct zoneref *z;
-	int node = -1;
+	int node = NUMA_NO_NODE;
 
 	zonelist = node_zonelist(nid, gfp_mask);
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 6c48ad13b4c9..fd2db6a74d3c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -598,7 +598,7 @@ static struct stable_node *alloc_stable_node_chain(struct stable_node *dup,
 		chain->chain_prune_time = jiffies;
 		chain->rmap_hlist_len = STABLE_NODE_CHAIN;
 #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA)
-		chain->nid = -1; /* debug */
+		chain->nid = NUMA_NO_NODE; /* debug */
 #endif
 		ksm_stable_node_chains++;
 
diff --git a/mm/memory.c b/mm/memory.c
index e11ca9dd823f..eb40f32295d2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,6 +69,7 @@
 #include <linux/userfaultfd_k.h>
 #include <linux/dax.h>
 #include <linux/oom.h>
+#include <linux/numa.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -3586,7 +3587,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
-	int page_nid = -1;
+	int page_nid = NUMA_NO_NODE;
 	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
@@ -3653,7 +3654,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid,
 			&flags);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
-	if (target_nid == -1) {
+	if (target_nid == NUMA_NO_NODE) {
 		put_page(page);
 		goto out;
 	}
@@ -3667,7 +3668,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 		flags |= TNF_MIGRATE_FAIL;
 
 out:
-	if (page_nid != -1)
+	if (page_nid != NUMA_NO_NODE)
 		task_numa_fault(last_cpupid, page_nid, 1, flags);
 	return 0;
 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4f07c8ddfdd7..b3d3c64d15df 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -702,9 +702,9 @@ static void node_states_check_changes_online(unsigned long nr_pages,
 {
 	int nid = zone_to_nid(zone);
 
-	arg->status_change_nid = -1;
-	arg->status_change_nid_normal = -1;
-	arg->status_change_nid_high = -1;
+	arg->status_change_nid = NUMA_NO_NODE;
+	arg->status_change_nid_normal = NUMA_NO_NODE;
+	arg->status_change_nid_high = NUMA_NO_NODE;
 
 	if (!node_state(nid, N_MEMORY))
 		arg->status_change_nid = nid;
@@ -1509,9 +1509,9 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
 	unsigned long present_pages = 0;
 	enum zone_type zt;
 
-	arg->status_change_nid = -1;
-	arg->status_change_nid_normal = -1;
-	arg->status_change_nid_high = -1;
+	arg->status_change_nid = NUMA_NO_NODE;
+	arg->status_change_nid_normal = NUMA_NO_NODE;
+	arg->status_change_nid_high = NUMA_NO_NODE;
 
 	/*
 	 * Check whether node_states[N_NORMAL_MEMORY] will be changed.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ee2bce59d2bf..76e7e4bc3335 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2304,7 +2304,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 	unsigned long pgoff;
 	int thiscpu = raw_smp_processor_id();
 	int thisnid = cpu_to_node(thiscpu);
-	int polnid = -1;
+	int polnid = NUMA_NO_NODE;
 	int ret = -1;
 
 	pol = get_vma_policy(vma, addr);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5361bd078493..1f9f1409df9b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6016,7 +6016,7 @@ int __meminit __early_pfn_to_nid(unsigned long pfn,
 		return state->last_nid;
 
 	nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
-	if (nid != -1) {
+	if (nid != NUMA_NO_NODE) {
 		state->last_start = start_pfn;
 		state->last_end = end_pfn;
 		state->last_nid = nid;
@@ -6771,7 +6771,7 @@ unsigned long __init node_map_pfn_alignment(void)
 {
 	unsigned long accl_mask = 0, last_end = 0;
 	unsigned long start, end, mask;
-	int last_nid = -1;
+	int last_nid = NUMA_NO_NODE;
 	int i, nid;
 
 	for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 8c78b8d45117..762d5b7eb523 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -300,7 +300,7 @@ static int __meminit online_page_ext(unsigned long start_pfn,
 	start = SECTION_ALIGN_DOWN(start_pfn);
 	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
 
-	if (nid == -1) {
+	if (nid == NUMA_NO_NODE) {
 		/*
 		 * In this case, "nid" already exists and contains valid memory.
 		 * "start_pfn" passed to us is a pfn which is an arg for
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6ac919847ce6..f3f5a78cd062 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -158,6 +158,7 @@
 #include <linux/etherdevice.h>
 #include <linux/kthread.h>
 #include <linux/prefetch.h>
+#include <linux/mmzone.h>
 #include <net/net_namespace.h>
 #include <net/checksum.h>
 #include <net/ipv6.h>
@@ -3625,7 +3626,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
 	pkt_dev->burst = 1;
-	pkt_dev->node = -1;
+	pkt_dev->node = NUMA_NO_NODE;
 
 	err = pktgen_setup_dev(t->net, pkt_dev, ifname);
 	if (err)
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 86e1e37eb4e8..b37e6e0a1026 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -15,6 +15,7 @@
 #include <linux/netlink.h>
 #include <linux/qrtr.h>
 #include <linux/termios.h>	/* For TIOCINQ/OUTQ */
+#include <linux/numa.h>
 
 #include <net/sock.h>
 
@@ -101,7 +102,7 @@ static inline struct qrtr_sock *qrtr_sk(struct sock *sk)
 	return container_of(sk, struct qrtr_sock, sk);
 }
 
-static unsigned int qrtr_local_nid = -1;
+static unsigned int qrtr_local_nid = NUMA_NO_NODE;
 
 /* for node ids */
 static RADIX_TREE(qrtr_nodes, GFP_KERNEL);
-- 
cgit v1.2.3


From 52d1e606ee733921e984770d47539a6bb91e8506 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 5 Mar 2019 15:43:06 -0800
Subject: mm: reuse only-pte-mapped KSM page in do_wp_page()

Add an optimization for KSM pages almost in the same way that we have
for ordinary anonymous pages.  If there is a write fault in a page,
which is mapped to an only pte, and it is not related to swap cache; the
page may be reused without copying its content.

[ Note that we do not consider PageSwapCache() pages at least for now,
  since we don't want to complicate __get_ksm_page(), which has nice
  optimization based on this (for the migration case). Currenly it is
  spinning on PageSwapCache() pages, waiting for when they have
  unfreezed counters (i.e., for the migration finish). But we don't want
  to make it also spinning on swap cache pages, which we try to reuse,
  since there is not a very high probability to reuse them. So, for now
  we do not consider PageSwapCache() pages at all. ]

So in reuse_ksm_page() we check for 1) PageSwapCache() and 2)
page_stable_node(), to skip a page, which KSM is currently trying to
link to stable tree.  Then we do page_ref_freeze() to prohibit KSM to
merge one more page into the page, we are reusing.  After that, nobody
can refer to the reusing page: KSM skips !PageSwapCache() pages with
zero refcount; and the protection against of all other participants is
the same as for reused ordinary anon pages pte lock, page lock and
mmap_sem.

[akpm@linux-foundation.org: replace BUG_ON()s with WARN_ON()s]
Link: http://lkml.kernel.org/r/154471491016.31352.1168978849911555609.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: Yang Shi <yang.shi@linux.alibaba.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h |  7 +++++++
 mm/ksm.c            | 30 ++++++++++++++++++++++++++++--
 mm/memory.c         | 16 ++++++++++++++--
 3 files changed, 49 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..e48b1e453ff5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -53,6 +53,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
 
 void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
 void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+bool reuse_ksm_page(struct page *page,
+			struct vm_area_struct *vma, unsigned long address);
 
 #else  /* !CONFIG_KSM */
 
@@ -86,6 +88,11 @@ static inline void rmap_walk_ksm(struct page *page,
 static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
 }
+static inline bool reuse_ksm_page(struct page *page,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	return false;
+}
 #endif /* CONFIG_MMU */
 #endif /* !CONFIG_KSM */
 
diff --git a/mm/ksm.c b/mm/ksm.c
index fd2db6a74d3c..983fbac24bda 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -706,8 +706,9 @@ again:
 	 * case this node is no longer referenced, and should be freed;
 	 * however, it might mean that the page is under page_ref_freeze().
 	 * The __remove_mapping() case is easy, again the node is now stale;
-	 * but if page is swapcache in migrate_page_move_mapping(), it might
-	 * still be our page, in which case it's essential to keep the node.
+	 * the same is in reuse_ksm_page() case; but if page is swapcache
+	 * in migrate_page_move_mapping(), it might still be our page,
+	 * in which case it's essential to keep the node.
 	 */
 	while (!get_page_unless_zero(page)) {
 		/*
@@ -2642,6 +2643,31 @@ again:
 		goto again;
 }
 
+bool reuse_ksm_page(struct page *page,
+		    struct vm_area_struct *vma,
+		    unsigned long address)
+{
+#ifdef CONFIG_DEBUG_VM
+	if (WARN_ON(is_zero_pfn(page_to_pfn(page))) ||
+			WARN_ON(!page_mapped(page)) ||
+			WARN_ON(!PageLocked(page))) {
+		dump_page(page, "reuse_ksm_page");
+		return false;
+	}
+#endif
+
+	if (PageSwapCache(page) || !page_stable_node(page))
+		return false;
+	/* Prohibit parallel get_ksm_page() */
+	if (!page_ref_freeze(page, 1))
+		return false;
+
+	page_move_anon_rmap(page, vma);
+	page->index = linear_page_index(vma, address);
+	page_ref_unfreeze(page, 1);
+
+	return true;
+}
 #ifdef CONFIG_MIGRATION
 void ksm_migrate_page(struct page *newpage, struct page *oldpage)
 {
diff --git a/mm/memory.c b/mm/memory.c
index eb40f32295d2..222da66f16b4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2505,8 +2505,11 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	 * Take out anonymous pages first, anonymous shared vmas are
 	 * not dirty accountable.
 	 */
-	if (PageAnon(vmf->page) && !PageKsm(vmf->page)) {
+	if (PageAnon(vmf->page)) {
 		int total_map_swapcount;
+		if (PageKsm(vmf->page) && (PageSwapCache(vmf->page) ||
+					   page_count(vmf->page) != 1))
+			goto copy;
 		if (!trylock_page(vmf->page)) {
 			get_page(vmf->page);
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2521,6 +2524,15 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 			}
 			put_page(vmf->page);
 		}
+		if (PageKsm(vmf->page)) {
+			bool reused = reuse_ksm_page(vmf->page, vmf->vma,
+						     vmf->address);
+			unlock_page(vmf->page);
+			if (!reused)
+				goto copy;
+			wp_page_reuse(vmf);
+			return VM_FAULT_WRITE;
+		}
 		if (reuse_swap_page(vmf->page, &total_map_swapcount)) {
 			if (total_map_swapcount == 1) {
 				/*
@@ -2541,7 +2553,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 					(VM_WRITE|VM_SHARED))) {
 		return wp_page_shared(vmf);
 	}
-
+copy:
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */
-- 
cgit v1.2.3


From 60cd4bcd62384cfa1e5890cebacccf08b3161156 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Tue, 5 Mar 2019 15:43:13 -0800
Subject: memcg: localize memcg_kmem_enabled() check

Move the memcg_kmem_enabled() checks into memcg kmem charge/uncharge
functions, so, the users don't have to explicitly check that condition.

This is purely code cleanup patch without any functional change.  Only
the order of checks in memcg_charge_slab() can potentially be changed
but the functionally it will be same.  This should not matter as
memcg_charge_slab() is not in the hot path.

Link: http://lkml.kernel.org/r/20190103161203.162375-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c                  |  3 +--
 include/linux/memcontrol.h | 37 +++++++++++++++++++++++++++++++++----
 mm/memcontrol.c            | 16 ++++++++--------
 mm/page_alloc.c            |  4 ++--
 mm/slab.h                  |  4 ----
 5 files changed, 44 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..51d5fd8840ab 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -140,8 +140,7 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
 	struct page *page = buf->page;
 
 	if (page_count(page) == 1) {
-		if (memcg_kmem_enabled())
-			memcg_kmem_uncharge(page, 0);
+		memcg_kmem_uncharge(page, 0);
 		__SetPageLocked(page);
 		return 0;
 	}
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 83ae11cbd12c..b0eb29ea0d9c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1273,12 +1273,12 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 
 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
-int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
-			    struct mem_cgroup *memcg);
 
 #ifdef CONFIG_MEMCG_KMEM
-int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
-void memcg_kmem_uncharge(struct page *page, int order);
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
+void __memcg_kmem_uncharge(struct page *page, int order);
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+			      struct mem_cgroup *memcg);
 
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
@@ -1300,6 +1300,26 @@ static inline bool memcg_kmem_enabled(void)
 	return static_branch_unlikely(&memcg_kmem_enabled_key);
 }
 
+static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+{
+	if (memcg_kmem_enabled())
+		return __memcg_kmem_charge(page, gfp, order);
+	return 0;
+}
+
+static inline void memcg_kmem_uncharge(struct page *page, int order)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_uncharge(page, order);
+}
+
+static inline int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp,
+					  int order, struct mem_cgroup *memcg)
+{
+	if (memcg_kmem_enabled())
+		return __memcg_kmem_charge_memcg(page, gfp, order, memcg);
+	return 0;
+}
 /*
  * helper for accessing a memcg's index. It will be used as an index in the
  * child cache array in kmem_cache, and also to derive its name. This function
@@ -1325,6 +1345,15 @@ static inline void memcg_kmem_uncharge(struct page *page, int order)
 {
 }
 
+static inline int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+{
+	return 0;
+}
+
+static inline void __memcg_kmem_uncharge(struct page *page, int order)
+{
+}
+
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index af7f18b32389..72414bb7e226 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2573,7 +2573,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
 }
 
 /**
- * memcg_kmem_charge_memcg: charge a kmem page
+ * __memcg_kmem_charge_memcg: charge a kmem page
  * @page: page to charge
  * @gfp: reclaim mode
  * @order: allocation order
@@ -2581,7 +2581,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
  *
  * Returns 0 on success, an error code on failure.
  */
-int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 			    struct mem_cgroup *memcg)
 {
 	unsigned int nr_pages = 1 << order;
@@ -2604,24 +2604,24 @@ int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 }
 
 /**
- * memcg_kmem_charge: charge a kmem page to the current memory cgroup
+ * __memcg_kmem_charge: charge a kmem page to the current memory cgroup
  * @page: page to charge
  * @gfp: reclaim mode
  * @order: allocation order
  *
  * Returns 0 on success, an error code on failure.
  */
-int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 {
 	struct mem_cgroup *memcg;
 	int ret = 0;
 
-	if (mem_cgroup_disabled() || memcg_kmem_bypass())
+	if (memcg_kmem_bypass())
 		return 0;
 
 	memcg = get_mem_cgroup_from_current();
 	if (!mem_cgroup_is_root(memcg)) {
-		ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
+		ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
 		if (!ret)
 			__SetPageKmemcg(page);
 	}
@@ -2629,11 +2629,11 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
 	return ret;
 }
 /**
- * memcg_kmem_uncharge: uncharge a kmem page
+ * __memcg_kmem_uncharge: uncharge a kmem page
  * @page: page to uncharge
  * @order: allocation order
  */
-void memcg_kmem_uncharge(struct page *page, int order)
+void __memcg_kmem_uncharge(struct page *page, int order)
 {
 	struct mem_cgroup *memcg = page->mem_cgroup;
 	unsigned int nr_pages = 1 << order;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1f9f1409df9b..034b8b6043a3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1056,7 +1056,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
 	if (PageMappingFlags(page))
 		page->mapping = NULL;
 	if (memcg_kmem_enabled() && PageKmemcg(page))
-		memcg_kmem_uncharge(page, order);
+		__memcg_kmem_uncharge(page, order);
 	if (check_free)
 		bad += free_pages_check(page);
 	if (bad)
@@ -4568,7 +4568,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 
 out:
 	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
-	    unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
+	    unlikely(__memcg_kmem_charge(page, gfp_mask, order) != 0)) {
 		__free_pages(page, order);
 		page = NULL;
 	}
diff --git a/mm/slab.h b/mm/slab.h
index 384105318779..e5e6658eeacc 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -276,8 +276,6 @@ static __always_inline int memcg_charge_slab(struct page *page,
 					     gfp_t gfp, int order,
 					     struct kmem_cache *s)
 {
-	if (!memcg_kmem_enabled())
-		return 0;
 	if (is_root_cache(s))
 		return 0;
 	return memcg_kmem_charge_memcg(page, gfp, order, s->memcg_params.memcg);
@@ -286,8 +284,6 @@ static __always_inline int memcg_charge_slab(struct page *page,
 static __always_inline void memcg_uncharge_slab(struct page *page, int order,
 						struct kmem_cache *s)
 {
-	if (!memcg_kmem_enabled())
-		return;
 	memcg_kmem_uncharge(page, order);
 }
 
-- 
cgit v1.2.3


From 6b7e5cad651a2b1031a4c69a98f87e3532dd4cef Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 5 Mar 2019 15:43:41 -0800
Subject: mm: remove sysctl_extfrag_handler()

sysctl_extfrag_handler() neglects to propagate the return value from
proc_dointvec_minmax() to its caller.  It's a wrapper that doesn't need
to exist, so just use proc_dointvec_minmax() directly.

Link: http://lkml.kernel.org/r/20190104032557.3056-1-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reported-by: Aditya Pakki <pakki001@umn.edu>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 2 --
 kernel/sysctl.c            | 2 +-
 mm/compaction.c            | 8 --------
 3 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 68250a57aace..70d0256edd31 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -88,8 +88,6 @@ extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos);
 extern int sysctl_extfrag_threshold;
-extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos);
 extern int sysctl_compact_unevictable_allowed;
 
 extern int fragmentation_index(struct zone *zone, unsigned int order);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7578e21a711b..9c78e06f7ba4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1460,7 +1460,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &sysctl_extfrag_threshold,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= sysctl_extfrag_handler,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &min_extfrag_threshold,
 		.extra2		= &max_extfrag_threshold,
 	},
diff --git a/mm/compaction.c b/mm/compaction.c
index ef29490b0f46..c15b4bbc9e9e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1876,14 +1876,6 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-int sysctl_extfrag_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos)
-{
-	proc_dointvec_minmax(table, write, buffer, length, ppos);
-
-	return 0;
-}
-
 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
 static ssize_t sysfs_compact_node(struct device *dev,
 			struct device_attribute *attr,
-- 
cgit v1.2.3


From 7ed2c31dabdeb3ee6abe8ff5aac7287821a50cba Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 5 Mar 2019 15:43:44 -0800
Subject: mm/hugetlb: distinguish between migratability and movability

Patch series "arm64/mm: Enable HugeTLB migration", v4.

This patch series enables HugeTLB migration support for all supported
huge page sizes at all levels including contiguous bit implementation.
Following HugeTLB migration support matrix has been enabled with this
patch series.  All permutations have been tested except for the 16GB.

           CONT PTE    PMD    CONT PMD    PUD
           --------    ---    --------    ---
  4K:         64K     2M         32M     1G
  16K:         2M    32M          1G
  64K:         2M   512M         16G

First the series adds migration support for PUD based huge pages.  It
then adds a platform specific hook to query an architecture if a given
huge page size is supported for migration while also providing a default
fallback option preserving the existing semantics which just checks for
(PMD|PUD|PGDIR)_SHIFT macros.  The last two patches enables HugeTLB
migration on arm64 and subscribe to this new platform specific hook by
defining an override.

The second patch differentiates between movability and migratability
aspects of huge pages and implements hugepage_movable_supported() which
can then be used during allocation to decide whether to place the huge
page in movable zone or not.

This patch (of 5):

During huge page allocation it's migratability is checked to determine
if it should be placed under movable zones with GFP_HIGHUSER_MOVABLE.
But the movability aspect of the huge page could depend on other factors
than just migratability.  Movability in itself is a distinct property
which should not be tied with migratability alone.

This differentiates these two and implements an enhanced movability check
which also considers huge page size to determine if it is feasible to be
placed under a movable zone.  At present it just checks for gigantic pages
but going forward it can incorporate other enhanced checks.

Link: http://lkml.kernel.org/r/1545121450-1663-2-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Suggested-by: Michal Hocko <mhocko@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 30 ++++++++++++++++++++++++++++++
 mm/hugetlb.c            |  2 +-
 mm/migrate.c            |  2 +-
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 087fd5f48c91..1b858d795731 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -506,6 +506,31 @@ static inline bool hugepage_migration_supported(struct hstate *h)
 #endif
 }
 
+/*
+ * Movability check is different as compared to migration check.
+ * It determines whether or not a huge page should be placed on
+ * movable zone or not. Movability of any huge page should be
+ * required only if huge page size is supported for migration.
+ * There wont be any reason for the huge page to be movable if
+ * it is not migratable to start with. Also the size of the huge
+ * page should be large enough to be placed under a movable zone
+ * and still feasible enough to be migratable. Just the presence
+ * in movable zone does not make the migration feasible.
+ *
+ * So even though large huge page sizes like the gigantic ones
+ * are migratable they should not be movable because its not
+ * feasible to migrate them from movable zone.
+ */
+static inline bool hugepage_movable_supported(struct hstate *h)
+{
+	if (!hugepage_migration_supported(h))
+		return false;
+
+	if (hstate_is_gigantic(h))
+		return false;
+	return true;
+}
+
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
 {
@@ -602,6 +627,11 @@ static inline bool hugepage_migration_supported(struct hstate *h)
 	return false;
 }
 
+static inline bool hugepage_movable_supported(struct hstate *h)
+{
+	return false;
+}
+
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c504fa6b460..2fb3062a3595 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -920,7 +920,7 @@ retry_cpuset:
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
-	if (hugepage_migration_supported(h))
+	if (hugepage_movable_supported(h))
 		return GFP_HIGHUSER_MOVABLE;
 	else
 		return GFP_HIGHUSER;
diff --git a/mm/migrate.c b/mm/migrate.c
index 181f5d2718a9..0413596fc523 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1287,7 +1287,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	struct anon_vma *anon_vma = NULL;
 
 	/*
-	 * Movability of hugepages depends on architectures and hugepage size.
+	 * Migratability of hugepages depends on architectures and their size.
 	 * This check is necessary because some callers of hugepage migration
 	 * like soft offline and memory hotremove don't walk through page
 	 * tables or check whether the hugepage is pmd-based or not before
-- 
cgit v1.2.3


From 9b553bf5eb99dd1b2d8ae23136da46da5c205dfd Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 5 Mar 2019 15:43:48 -0800
Subject: mm/hugetlb: enable PUD level huge page migration

Architectures like arm64 have PUD level HugeTLB pages for certain configs
(1GB huge page is PUD based on ARM64_4K_PAGES base page size) that can
be enabled for migration.  It can be achieved through checking for
PUD_SHIFT order based HugeTLB pages during migration.

Link: http://lkml.kernel.org/r/1545121450-1663-3-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1b858d795731..70bcd8973323 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -497,7 +497,8 @@ static inline bool hugepage_migration_supported(struct hstate *h)
 {
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 	if ((huge_page_shift(h) == PMD_SHIFT) ||
-		(huge_page_shift(h) == PGDIR_SHIFT))
+		(huge_page_shift(h) == PUD_SHIFT) ||
+			(huge_page_shift(h) == PGDIR_SHIFT))
 		return true;
 	else
 		return false;
-- 
cgit v1.2.3


From e693de186414ae66f2a316ff9befcd2b7a6d07b6 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Tue, 5 Mar 2019 15:43:51 -0800
Subject: mm/hugetlb: enable arch specific huge page size support for migration

Architectures like arm64 have HugeTLB page sizes which are different
than generic sizes at PMD, PUD, PGD level and implemented via contiguous
bits.  At present these special size HugeTLB pages cannot be identified
through macros like (PMD|PUD|PGDIR)_SHIFT and hence chosen not be
migrated.

Enabling migration support for these special HugeTLB page sizes along
with the generic ones (PMD|PUD|PGD) would require identifying all of
them on a given platform.  A platform specific hook can precisely
enumerate all huge page sizes supported for migration.  Instead of
comparing against standard huge page orders let
hugetlb_migration_support() function call a platform hook
arch_hugetlb_migration_support().  Default definition for the platform
hook maintains existing semantics which checks standard huge page order.
But an architecture can choose to override the default and provide
support for a comprehensive set of huge page sizes.

Link: http://lkml.kernel.org/r/1545121450-1663-4-git-send-email-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Steve Capper <steve.capper@arm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 70bcd8973323..4cc3871b65fc 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -493,18 +493,29 @@ static inline pgoff_t basepage_index(struct page *page)
 extern int dissolve_free_huge_page(struct page *page);
 extern int dissolve_free_huge_pages(unsigned long start_pfn,
 				    unsigned long end_pfn);
-static inline bool hugepage_migration_supported(struct hstate *h)
-{
+
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+#ifndef arch_hugetlb_migration_supported
+static inline bool arch_hugetlb_migration_supported(struct hstate *h)
+{
 	if ((huge_page_shift(h) == PMD_SHIFT) ||
 		(huge_page_shift(h) == PUD_SHIFT) ||
 			(huge_page_shift(h) == PGDIR_SHIFT))
 		return true;
 	else
 		return false;
+}
+#endif
 #else
+static inline bool arch_hugetlb_migration_supported(struct hstate *h)
+{
 	return false;
+}
 #endif
+
+static inline bool hugepage_migration_supported(struct hstate *h)
+{
+	return arch_hugetlb_migration_supported(h);
 }
 
 /*
-- 
cgit v1.2.3


From d71e53cee7c2e553b85c572e76da778a93d32135 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 5 Mar 2019 15:44:18 -0800
Subject: mm: shuffle GFP_* flags

GFP_KERNEL is one of the most used constant but on archs like arm with
fixed length instruction some constants are more equal than the others.
Constants with tightly packed bits can be injected directly into
instruction stream:

	   0:   e3a00d33        mov     r0, #3264       ; 0xcc0

Others require multiple instructions or even loading out of instruction
stream:

	   0:   e3a000c0        mov     r0, #192        ; 0xc0
	   4:   e3400060        movt    r0, #96		; 0x60

Shuffle GFP_* flags so that GFP_KERNEL/GFP_ATOMIC + __GFP_ZERO bits are
close to each other.

Savings on arm configs are ~0.1%.

Link: http://lkml.kernel.org/r/20190109201838.GA9140@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5f5e25fd6149..fdab7de7490d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -24,21 +24,21 @@ struct vm_area_struct;
 #define ___GFP_HIGH		0x20u
 #define ___GFP_IO		0x40u
 #define ___GFP_FS		0x80u
-#define ___GFP_WRITE		0x100u
-#define ___GFP_NOWARN		0x200u
-#define ___GFP_RETRY_MAYFAIL	0x400u
-#define ___GFP_NOFAIL		0x800u
-#define ___GFP_NORETRY		0x1000u
-#define ___GFP_MEMALLOC		0x2000u
-#define ___GFP_COMP		0x4000u
-#define ___GFP_ZERO		0x8000u
-#define ___GFP_NOMEMALLOC	0x10000u
-#define ___GFP_HARDWALL		0x20000u
-#define ___GFP_THISNODE		0x40000u
-#define ___GFP_ATOMIC		0x80000u
-#define ___GFP_ACCOUNT		0x100000u
-#define ___GFP_DIRECT_RECLAIM	0x200000u
-#define ___GFP_KSWAPD_RECLAIM	0x400000u
+#define ___GFP_ZERO		0x100u
+#define ___GFP_ATOMIC		0x200u
+#define ___GFP_DIRECT_RECLAIM	0x400u
+#define ___GFP_KSWAPD_RECLAIM	0x800u
+#define ___GFP_WRITE		0x1000u
+#define ___GFP_NOWARN		0x2000u
+#define ___GFP_RETRY_MAYFAIL	0x4000u
+#define ___GFP_NOFAIL		0x8000u
+#define ___GFP_NORETRY		0x10000u
+#define ___GFP_MEMALLOC		0x20000u
+#define ___GFP_COMP		0x40000u
+#define ___GFP_NOMEMALLOC	0x80000u
+#define ___GFP_HARDWALL		0x100000u
+#define ___GFP_THISNODE		0x200000u
+#define ___GFP_ACCOUNT		0x400000u
 #ifdef CONFIG_LOCKDEP
 #define ___GFP_NOLOCKDEP	0x800000u
 #else
-- 
cgit v1.2.3


From 70b44595eafe9c7c235f076d653a268ca1ab9fdb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 5 Mar 2019 15:44:54 -0800
Subject: mm, compaction: use free lists to quickly locate a migration source

The migration scanner is a linear scan of a zone with a potentiall large
search space.  Furthermore, many pageblocks are unusable such as those
filled with reserved pages or partially filled with pages that cannot
migrate.  These still get scanned in the common case of allocating a THP
and the cost accumulates.

The patch uses a partial search of the free lists to locate a migration
source candidate that is marked as MOVABLE when allocating a THP.  It
prefers picking a block with a larger number of free pages already on
the basis that there are fewer pages to migrate to free the entire
block.  The lowest PFN found during searches is tracked as the basis of
the start for the linear search after the first search of the free list
fails.  After the search, the free list is shuffled so that the next
search will not encounter the same page.  If the search fails then the
subsequent searches will be shorter and the linear scanner is used.

If this search fails, or if the request is for a small or
unmovable/reclaimable allocation then the linear scanner is still used.
It is somewhat pointless to use the list search in those cases.  Small
free pages must be used for the search and there is no guarantee that
movable pages are located within that block that are contiguous.

                                     5.0.0-rc1              5.0.0-rc1
                                 noboost-v3r10          findmig-v3r15
Amean     fault-both-3      3771.41 (   0.00%)     3390.40 (  10.10%)
Amean     fault-both-5      5409.05 (   0.00%)     5082.28 (   6.04%)
Amean     fault-both-7      7040.74 (   0.00%)     7012.51 (   0.40%)
Amean     fault-both-12    11887.35 (   0.00%)    11346.63 (   4.55%)
Amean     fault-both-18    16718.19 (   0.00%)    15324.19 (   8.34%)
Amean     fault-both-24    21157.19 (   0.00%)    16088.50 *  23.96%*
Amean     fault-both-30    21175.92 (   0.00%)    18723.42 *  11.58%*
Amean     fault-both-32    21339.03 (   0.00%)    18612.01 *  12.78%*

                                5.0.0-rc1              5.0.0-rc1
                            noboost-v3r10          findmig-v3r15
Percentage huge-3        86.50 (   0.00%)       89.83 (   3.85%)
Percentage huge-5        92.52 (   0.00%)       91.96 (  -0.61%)
Percentage huge-7        92.44 (   0.00%)       92.85 (   0.44%)
Percentage huge-12       92.98 (   0.00%)       92.74 (  -0.25%)
Percentage huge-18       91.70 (   0.00%)       91.71 (   0.02%)
Percentage huge-24       91.59 (   0.00%)       92.13 (   0.60%)
Percentage huge-30       90.14 (   0.00%)       93.79 (   4.04%)
Percentage huge-32       90.03 (   0.00%)       91.27 (   1.37%)

This shows an improvement in allocation latencies with similar
allocation success rates.  While not presented, there was a 31%
reduction in migration scanning and a 8% reduction on system CPU usage.
A 2-socket machine showed similar benefits.

[mgorman@techsingularity.net: several fixes]
  Link: http://lkml.kernel.org/r/20190204120111.GL9565@techsingularity.net
[vbabka@suse.cz: migrate block that was found-fast, some optimisations]
Link: http://lkml.kernel.org/r/20190118175136.31341-10-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <Vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/i915/i915_utils.h |   6 --
 include/linux/list.h              |  11 +++
 mm/compaction.c                   | 178 +++++++++++++++++++++++++++++++++++++-
 mm/internal.h                     |   2 +
 4 files changed, 188 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9726df37c4c4..540e20eb032c 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -123,12 +123,6 @@ static inline u64 ptr_to_u64(const void *ptr)
 
 #include <linux/list.h>
 
-static inline int list_is_first(const struct list_head *list,
-				const struct list_head *head)
-{
-	return head->next == list;
-}
-
 static inline void __list_del_many(struct list_head *head,
 				   struct list_head *first)
 {
diff --git a/include/linux/list.h b/include/linux/list.h
index edb7628e46ed..79626b5ab36c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -206,6 +206,17 @@ static inline void list_bulk_move_tail(struct list_head *head,
 	head->prev = last;
 }
 
+/**
+ * list_is_first -- tests whether @ list is the first entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_first(const struct list_head *list,
+					const struct list_head *head)
+{
+	return list->prev == head;
+}
+
 /**
  * list_is_last - tests whether @list is the last entry in list @head
  * @list: the entry to test
diff --git a/mm/compaction.c b/mm/compaction.c
index 3d11c209614a..55f7ab142af2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1040,6 +1040,12 @@ static bool suitable_migration_target(struct compact_control *cc,
 	return false;
 }
 
+static inline unsigned int
+freelist_scan_limit(struct compact_control *cc)
+{
+	return (COMPACT_CLUSTER_MAX >> cc->fast_search_fail) + 1;
+}
+
 /*
  * Test whether the free scanner has reached the same or lower pageblock than
  * the migration scanner, and compaction should thus terminate.
@@ -1050,6 +1056,19 @@ static inline bool compact_scanners_met(struct compact_control *cc)
 		<= (cc->migrate_pfn >> pageblock_order);
 }
 
+/* Reorder the free list to reduce repeated future searches */
+static void
+move_freelist_tail(struct list_head *freelist, struct page *freepage)
+{
+	LIST_HEAD(sublist);
+
+	if (!list_is_first(freelist, &freepage->lru)) {
+		list_cut_position(&sublist, freelist, &freepage->lru);
+		if (!list_empty(&sublist))
+			list_splice_tail(&sublist, freelist);
+	}
+}
+
 /*
  * Based on information in the current compact_control, find blocks
  * suitable for isolating free pages from and then isolate them.
@@ -1207,6 +1226,148 @@ typedef enum {
  */
 int sysctl_compact_unevictable_allowed __read_mostly = 1;
 
+static inline void
+update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
+{
+	if (cc->fast_start_pfn == ULONG_MAX)
+		return;
+
+	if (!cc->fast_start_pfn)
+		cc->fast_start_pfn = pfn;
+
+	cc->fast_start_pfn = min(cc->fast_start_pfn, pfn);
+}
+
+static inline unsigned long
+reinit_migrate_pfn(struct compact_control *cc)
+{
+	if (!cc->fast_start_pfn || cc->fast_start_pfn == ULONG_MAX)
+		return cc->migrate_pfn;
+
+	cc->migrate_pfn = cc->fast_start_pfn;
+	cc->fast_start_pfn = ULONG_MAX;
+
+	return cc->migrate_pfn;
+}
+
+/*
+ * Briefly search the free lists for a migration source that already has
+ * some free pages to reduce the number of pages that need migration
+ * before a pageblock is free.
+ */
+static unsigned long fast_find_migrateblock(struct compact_control *cc)
+{
+	unsigned int limit = freelist_scan_limit(cc);
+	unsigned int nr_scanned = 0;
+	unsigned long distance;
+	unsigned long pfn = cc->migrate_pfn;
+	unsigned long high_pfn;
+	int order;
+
+	/* Skip hints are relied on to avoid repeats on the fast search */
+	if (cc->ignore_skip_hint)
+		return pfn;
+
+	/*
+	 * If the migrate_pfn is not at the start of a zone or the start
+	 * of a pageblock then assume this is a continuation of a previous
+	 * scan restarted due to COMPACT_CLUSTER_MAX.
+	 */
+	if (pfn != cc->zone->zone_start_pfn && pfn != pageblock_start_pfn(pfn))
+		return pfn;
+
+	/*
+	 * For smaller orders, just linearly scan as the number of pages
+	 * to migrate should be relatively small and does not necessarily
+	 * justify freeing up a large block for a small allocation.
+	 */
+	if (cc->order <= PAGE_ALLOC_COSTLY_ORDER)
+		return pfn;
+
+	/*
+	 * Only allow kcompactd and direct requests for movable pages to
+	 * quickly clear out a MOVABLE pageblock for allocation. This
+	 * reduces the risk that a large movable pageblock is freed for
+	 * an unmovable/reclaimable small allocation.
+	 */
+	if (cc->direct_compaction && cc->migratetype != MIGRATE_MOVABLE)
+		return pfn;
+
+	/*
+	 * When starting the migration scanner, pick any pageblock within the
+	 * first half of the search space. Otherwise try and pick a pageblock
+	 * within the first eighth to reduce the chances that a migration
+	 * target later becomes a source.
+	 */
+	distance = (cc->free_pfn - cc->migrate_pfn) >> 1;
+	if (cc->migrate_pfn != cc->zone->zone_start_pfn)
+		distance >>= 2;
+	high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
+
+	for (order = cc->order - 1;
+	     order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
+	     order--) {
+		struct free_area *area = &cc->zone->free_area[order];
+		struct list_head *freelist;
+		unsigned long flags;
+		struct page *freepage;
+
+		if (!area->nr_free)
+			continue;
+
+		spin_lock_irqsave(&cc->zone->lock, flags);
+		freelist = &area->free_list[MIGRATE_MOVABLE];
+		list_for_each_entry(freepage, freelist, lru) {
+			unsigned long free_pfn;
+
+			nr_scanned++;
+			free_pfn = page_to_pfn(freepage);
+			if (free_pfn < high_pfn) {
+				update_fast_start_pfn(cc, free_pfn);
+
+				/*
+				 * Avoid if skipped recently. Ideally it would
+				 * move to the tail but even safe iteration of
+				 * the list assumes an entry is deleted, not
+				 * reordered.
+				 */
+				if (get_pageblock_skip(freepage)) {
+					if (list_is_last(freelist, &freepage->lru))
+						break;
+
+					continue;
+				}
+
+				/* Reorder to so a future search skips recent pages */
+				move_freelist_tail(freelist, freepage);
+
+				pfn = pageblock_start_pfn(free_pfn);
+				cc->fast_search_fail = 0;
+				set_pageblock_skip(freepage);
+				break;
+			}
+
+			if (nr_scanned >= limit) {
+				cc->fast_search_fail++;
+				move_freelist_tail(freelist, freepage);
+				break;
+			}
+		}
+		spin_unlock_irqrestore(&cc->zone->lock, flags);
+	}
+
+	cc->total_migrate_scanned += nr_scanned;
+
+	/*
+	 * If fast scanning failed then use a cached entry for a page block
+	 * that had free pages as the basis for starting a linear scan.
+	 */
+	if (pfn == cc->migrate_pfn)
+		pfn = reinit_migrate_pfn(cc);
+
+	return pfn;
+}
+
 /*
  * Isolate all pages that can be migrated from the first suitable block,
  * starting at the block pointed to by the migrate scanner pfn within
@@ -1222,16 +1383,25 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 	const isolate_mode_t isolate_mode =
 		(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
 		(cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
+	bool fast_find_block;
 
 	/*
 	 * Start at where we last stopped, or beginning of the zone as
-	 * initialized by compact_zone()
+	 * initialized by compact_zone(). The first failure will use
+	 * the lowest PFN as the starting point for linear scanning.
 	 */
-	low_pfn = cc->migrate_pfn;
+	low_pfn = fast_find_migrateblock(cc);
 	block_start_pfn = pageblock_start_pfn(low_pfn);
 	if (block_start_pfn < zone->zone_start_pfn)
 		block_start_pfn = zone->zone_start_pfn;
 
+	/*
+	 * fast_find_migrateblock marks a pageblock skipped so to avoid
+	 * the isolation_suitable check below, check whether the fast
+	 * search was successful.
+	 */
+	fast_find_block = low_pfn != cc->migrate_pfn && !cc->fast_search_fail;
+
 	/* Only scan within a pageblock boundary */
 	block_end_pfn = pageblock_end_pfn(low_pfn);
 
@@ -1240,6 +1410,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 	 * Do not cross the free scanner.
 	 */
 	for (; block_end_pfn <= cc->free_pfn;
+			fast_find_block = false,
 			low_pfn = block_end_pfn,
 			block_start_pfn = block_end_pfn,
 			block_end_pfn += pageblock_nr_pages) {
@@ -1259,7 +1430,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
 			continue;
 
 		/* If isolation recently failed, do not retry */
-		if (!isolation_suitable(cc, page))
+		if (!isolation_suitable(cc, page) && !fast_find_block)
 			continue;
 
 		/*
@@ -1550,6 +1721,7 @@ static enum compact_result compact_zone(struct compact_control *cc)
 	 * want to compact the whole zone), but check that it is initialised
 	 * by ensuring the values are within zone boundaries.
 	 */
+	cc->fast_start_pfn = 0;
 	if (cc->whole_zone) {
 		cc->migrate_pfn = start_pfn;
 		cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
diff --git a/mm/internal.h b/mm/internal.h
index 9b32f4cab0ae..983cb975545f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -188,9 +188,11 @@ struct compact_control {
 	unsigned int nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
+	unsigned long fast_start_pfn;	/* a pfn to start linear scan from */
 	struct zone *zone;
 	unsigned long total_migrate_scanned;
 	unsigned long total_free_scanned;
+	unsigned int fast_search_fail;	/* failures to use free list searches */
 	const gfp_t gfp_mask;		/* gfp mask of a direct compactor */
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* migratetype of direct compactor */
-- 
cgit v1.2.3


From e332f741a8dd1ec9a6dc8aa997296ecbfe64323e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 5 Mar 2019 15:45:38 -0800
Subject: mm, compaction: be selective about what pageblocks to clear skip
 hints

Pageblock hints are cleared when compaction restarts or kswapd makes
enough progress that it can sleep but it's over-eager in that the bit is
cleared for migration sources with no LRU pages and migration targets
with no free pages.  As pageblock skip hint flushes are relatively rare
and out-of-band with respect to kswapd, this patch makes a few more
expensive checks to see if it's appropriate to even clear the bit.
Every pageblock that is not cleared will avoid 512 pages being scanned
unnecessarily on x86-64.

The impact is variable with different workloads showing small
differences in latency, success rates and scan rates.  This is expected
as clearing the hints is not that common but doing a small amount of
work out-of-band to avoid a large amount of work in-band later is
generally a good thing.

Link: http://lkml.kernel.org/r/20190118175136.31341-22-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: YueHaibing <yuehaibing@huawei.com>
[cai@lca.pw: no stuck in __reset_isolation_pfn()]
  Link: http://lkml.kernel.org/r/20190206034732.75687-1-cai@lca.pw
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   2 +
 mm/compaction.c        | 124 ++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 108 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 842f9189537b..90c13cdeefb5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -480,6 +480,8 @@ struct zone {
 	unsigned long		compact_cached_free_pfn;
 	/* pfn where async and sync compaction migration scanner should start */
 	unsigned long		compact_cached_migrate_pfn[2];
+	unsigned long		compact_init_migrate_pfn;
+	unsigned long		compact_init_free_pfn;
 #endif
 
 #ifdef CONFIG_COMPACTION
diff --git a/mm/compaction.c b/mm/compaction.c
index b83cdb42f249..3084cee77fda 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -237,6 +237,70 @@ static bool pageblock_skip_persistent(struct page *page)
 	return false;
 }
 
+static bool
+__reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
+							bool check_target)
+{
+	struct page *page = pfn_to_online_page(pfn);
+	struct page *end_page;
+	unsigned long block_pfn;
+
+	if (!page)
+		return false;
+	if (zone != page_zone(page))
+		return false;
+	if (pageblock_skip_persistent(page))
+		return false;
+
+	/*
+	 * If skip is already cleared do no further checking once the
+	 * restart points have been set.
+	 */
+	if (check_source && check_target && !get_pageblock_skip(page))
+		return true;
+
+	/*
+	 * If clearing skip for the target scanner, do not select a
+	 * non-movable pageblock as the starting point.
+	 */
+	if (!check_source && check_target &&
+	    get_pageblock_migratetype(page) != MIGRATE_MOVABLE)
+		return false;
+
+	/*
+	 * Only clear the hint if a sample indicates there is either a
+	 * free page or an LRU page in the block. One or other condition
+	 * is necessary for the block to be a migration source/target.
+	 */
+	block_pfn = pageblock_start_pfn(pfn);
+	pfn = max(block_pfn, zone->zone_start_pfn);
+	page = pfn_to_page(pfn);
+	if (zone != page_zone(page))
+		return false;
+	pfn = block_pfn + pageblock_nr_pages;
+	pfn = min(pfn, zone_end_pfn(zone));
+	end_page = pfn_to_page(pfn);
+
+	do {
+		if (pfn_valid_within(pfn)) {
+			if (check_source && PageLRU(page)) {
+				clear_pageblock_skip(page);
+				return true;
+			}
+
+			if (check_target && PageBuddy(page)) {
+				clear_pageblock_skip(page);
+				return true;
+			}
+		}
+
+		page += (1 << PAGE_ALLOC_COSTLY_ORDER);
+		pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
+	} while (page < end_page);
+
+	return false;
+}
+
 /*
  * This function is called to clear all cached information on pageblocks that
  * should be skipped for page isolation when the migrate and free page scanner
@@ -244,30 +308,54 @@ static bool pageblock_skip_persistent(struct page *page)
  */
 static void __reset_isolation_suitable(struct zone *zone)
 {
-	unsigned long start_pfn = zone->zone_start_pfn;
-	unsigned long end_pfn = zone_end_pfn(zone);
-	unsigned long pfn;
+	unsigned long migrate_pfn = zone->zone_start_pfn;
+	unsigned long free_pfn = zone_end_pfn(zone);
+	unsigned long reset_migrate = free_pfn;
+	unsigned long reset_free = migrate_pfn;
+	bool source_set = false;
+	bool free_set = false;
 
-	zone->compact_blockskip_flush = false;
+	if (!zone->compact_blockskip_flush)
+		return;
 
-	/* Walk the zone and mark every pageblock as suitable for isolation */
-	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
-		struct page *page;
+	zone->compact_blockskip_flush = false;
 
+	/*
+	 * Walk the zone and update pageblock skip information. Source looks
+	 * for PageLRU while target looks for PageBuddy. When the scanner
+	 * is found, both PageBuddy and PageLRU are checked as the pageblock
+	 * is suitable as both source and target.
+	 */
+	for (; migrate_pfn < free_pfn; migrate_pfn += pageblock_nr_pages,
+					free_pfn -= pageblock_nr_pages) {
 		cond_resched();
 
-		page = pfn_to_online_page(pfn);
-		if (!page)
-			continue;
-		if (zone != page_zone(page))
-			continue;
-		if (pageblock_skip_persistent(page))
-			continue;
+		/* Update the migrate PFN */
+		if (__reset_isolation_pfn(zone, migrate_pfn, true, source_set) &&
+		    migrate_pfn < reset_migrate) {
+			source_set = true;
+			reset_migrate = migrate_pfn;
+			zone->compact_init_migrate_pfn = reset_migrate;
+			zone->compact_cached_migrate_pfn[0] = reset_migrate;
+			zone->compact_cached_migrate_pfn[1] = reset_migrate;
+		}
 
-		clear_pageblock_skip(page);
+		/* Update the free PFN */
+		if (__reset_isolation_pfn(zone, free_pfn, free_set, true) &&
+		    free_pfn > reset_free) {
+			free_set = true;
+			reset_free = free_pfn;
+			zone->compact_init_free_pfn = reset_free;
+			zone->compact_cached_free_pfn = reset_free;
+		}
 	}
 
-	reset_cached_positions(zone);
+	/* Leave no distance if no suitable block was reset */
+	if (reset_migrate >= reset_free) {
+		zone->compact_cached_migrate_pfn[0] = migrate_pfn;
+		zone->compact_cached_migrate_pfn[1] = migrate_pfn;
+		zone->compact_cached_free_pfn = free_pfn;
+	}
 }
 
 void reset_isolation_suitable(pg_data_t *pgdat)
@@ -1190,7 +1278,7 @@ fast_isolate_freepages(struct compact_control *cc)
 	 * If starting the scan, use a deeper search and use the highest
 	 * PFN found if a suitable one is not found.
 	 */
-	if (cc->free_pfn == pageblock_start_pfn(zone_end_pfn(cc->zone) - 1)) {
+	if (cc->free_pfn >= cc->zone->compact_init_free_pfn) {
 		limit = pageblock_nr_pages >> 1;
 		scan_start = true;
 	}
@@ -2017,7 +2105,7 @@ static enum compact_result compact_zone(struct compact_control *cc)
 			cc->zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
 		}
 
-		if (cc->migrate_pfn == start_pfn)
+		if (cc->migrate_pfn <= cc->zone->compact_init_migrate_pfn)
 			cc->whole_zone = true;
 	}
 
-- 
cgit v1.2.3


From 5e1f0f098b4649fad53011246bcaeff011ffdf5d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 5 Mar 2019 15:45:41 -0800
Subject: mm, compaction: capture a page under direct compaction

Compaction is inherently race-prone as a suitable page freed during
compaction can be allocated by any parallel task.  This patch uses a
capture_control structure to isolate a page immediately when it is freed
by a direct compactor in the slow path of the page allocator.  The
intent is to avoid redundant scanning.

                                     5.0.0-rc1              5.0.0-rc1
                               selective-v3r17          capture-v3r19
Amean     fault-both-1         0.00 (   0.00%)        0.00 *   0.00%*
Amean     fault-both-3      2582.11 (   0.00%)     2563.68 (   0.71%)
Amean     fault-both-5      4500.26 (   0.00%)     4233.52 (   5.93%)
Amean     fault-both-7      5819.53 (   0.00%)     6333.65 (  -8.83%)
Amean     fault-both-12     9321.18 (   0.00%)     9759.38 (  -4.70%)
Amean     fault-both-18     9782.76 (   0.00%)    10338.76 (  -5.68%)
Amean     fault-both-24    15272.81 (   0.00%)    13379.55 *  12.40%*
Amean     fault-both-30    15121.34 (   0.00%)    16158.25 (  -6.86%)
Amean     fault-both-32    18466.67 (   0.00%)    18971.21 (  -2.73%)

Latency is only moderately affected but the devil is in the details.  A
closer examination indicates that base page fault latency is reduced but
latency of huge pages is increased as it takes creater care to succeed.
Part of the "problem" is that allocation success rates are close to 100%
even when under pressure and compaction gets harder

                                5.0.0-rc1              5.0.0-rc1
                          selective-v3r17          capture-v3r19
Percentage huge-3        96.70 (   0.00%)       98.23 (   1.58%)
Percentage huge-5        96.99 (   0.00%)       95.30 (  -1.75%)
Percentage huge-7        94.19 (   0.00%)       97.24 (   3.24%)
Percentage huge-12       94.95 (   0.00%)       97.35 (   2.53%)
Percentage huge-18       96.74 (   0.00%)       97.30 (   0.58%)
Percentage huge-24       97.07 (   0.00%)       97.55 (   0.50%)
Percentage huge-30       95.69 (   0.00%)       98.50 (   2.95%)
Percentage huge-32       96.70 (   0.00%)       99.27 (   2.65%)

And scan rates are reduced as expected by 6% for the migration scanner
and 29% for the free scanner indicating that there is less redundant
work.

Compaction migrate scanned    20815362    19573286
Compaction free scanned       16352612    11510663

[mgorman@techsingularity.net: remove redundant check]
  Link: http://lkml.kernel.org/r/20190201143853.GH9565@techsingularity.net
Link: http://lkml.kernel.org/r/20190118175136.31341-23-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  3 +-
 include/linux/sched.h      |  4 +++
 kernel/sched/core.c        |  3 ++
 mm/compaction.c            | 31 +++++++++++++++-----
 mm/internal.h              |  9 ++++++
 mm/page_alloc.c            | 73 +++++++++++++++++++++++++++++++++++++++++++---
 6 files changed, 111 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 70d0256edd31..c960923d9ec2 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -93,7 +93,8 @@ extern int sysctl_compact_unevictable_allowed;
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
 		unsigned int order, unsigned int alloc_flags,
-		const struct alloc_context *ac, enum compact_priority prio);
+		const struct alloc_context *ac, enum compact_priority prio,
+		struct page **page);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern enum compact_result compaction_suitable(struct zone *zone, int order,
 		unsigned int alloc_flags, int classzone_idx);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f9b43c989577..ebfb34fb9b30 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@ struct pid_namespace;
 struct pipe_inode_info;
 struct rcu_node;
 struct reclaim_state;
+struct capture_control;
 struct robust_list_head;
 struct sched_attr;
 struct sched_param;
@@ -958,6 +959,9 @@ struct task_struct {
 
 	struct io_context		*io_context;
 
+#ifdef CONFIG_COMPACTION
+	struct capture_control		*capture_control;
+#endif
 	/* Ptrace state: */
 	unsigned long			ptrace_message;
 	kernel_siginfo_t		*last_siginfo;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7cbb5658be80..916e956e92be 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2190,6 +2190,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	INIT_HLIST_HEAD(&p->preempt_notifiers);
 #endif
 
+#ifdef CONFIG_COMPACTION
+	p->capture_control = NULL;
+#endif
 	init_numa_balancing(clone_flags, p);
 }
 
diff --git a/mm/compaction.c b/mm/compaction.c
index 3084cee77fda..1cc871da3fda 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2056,7 +2056,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 	return false;
 }
 
-static enum compact_result compact_zone(struct compact_control *cc)
+static enum compact_result
+compact_zone(struct compact_control *cc, struct capture_control *capc)
 {
 	enum compact_result ret;
 	unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2225,6 +2226,11 @@ check_drain:
 			}
 		}
 
+		/* Stop if a page has been captured */
+		if (capc && capc->page) {
+			ret = COMPACT_SUCCESS;
+			break;
+		}
 	}
 
 out:
@@ -2258,7 +2264,8 @@ out:
 
 static enum compact_result compact_zone_order(struct zone *zone, int order,
 		gfp_t gfp_mask, enum compact_priority prio,
-		unsigned int alloc_flags, int classzone_idx)
+		unsigned int alloc_flags, int classzone_idx,
+		struct page **capture)
 {
 	enum compact_result ret;
 	struct compact_control cc = {
@@ -2279,14 +2286,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
 		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
 	};
+	struct capture_control capc = {
+		.cc = &cc,
+		.page = NULL,
+	};
+
+	if (capture)
+		current->capture_control = &capc;
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
 
-	ret = compact_zone(&cc);
+	ret = compact_zone(&cc, &capc);
 
 	VM_BUG_ON(!list_empty(&cc.freepages));
 	VM_BUG_ON(!list_empty(&cc.migratepages));
 
+	*capture = capc.page;
+	current->capture_control = NULL;
+
 	return ret;
 }
 
@@ -2304,7 +2321,7 @@ int sysctl_extfrag_threshold = 500;
  */
 enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 		unsigned int alloc_flags, const struct alloc_context *ac,
-		enum compact_priority prio)
+		enum compact_priority prio, struct page **capture)
 {
 	int may_perform_io = gfp_mask & __GFP_IO;
 	struct zoneref *z;
@@ -2332,7 +2349,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 		}
 
 		status = compact_zone_order(zone, order, gfp_mask, prio,
-					alloc_flags, ac_classzone_idx(ac));
+				alloc_flags, ac_classzone_idx(ac), capture);
 		rc = max(status, rc);
 
 		/* The allocation should succeed, stop compacting */
@@ -2400,7 +2417,7 @@ static void compact_node(int nid)
 		INIT_LIST_HEAD(&cc.freepages);
 		INIT_LIST_HEAD(&cc.migratepages);
 
-		compact_zone(&cc);
+		compact_zone(&cc, NULL);
 
 		VM_BUG_ON(!list_empty(&cc.freepages));
 		VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -2535,7 +2552,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 
 		if (kthread_should_stop())
 			return;
-		status = compact_zone(&cc);
+		status = compact_zone(&cc, NULL);
 
 		if (status == COMPACT_SUCCESS) {
 			compaction_defer_reset(zone, cc.order, false);
diff --git a/mm/internal.h b/mm/internal.h
index 31bb0be6fd52..9eeaf2b95166 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -209,6 +209,15 @@ struct compact_control {
 	bool rescan;			/* Rescanning the same pageblock */
 };
 
+/*
+ * Used in direct compaction when a page should be taken from the freelists
+ * immediately when one is created during the free path.
+ */
+struct capture_control {
+	struct compact_control *cc;
+	struct page *page;
+};
+
 unsigned long
 isolate_freepages_range(struct compact_control *cc,
 			unsigned long start_pfn, unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e132b9e7a93..09bf2c5f8b4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
 	return 0;
 }
 
+#ifdef CONFIG_COMPACTION
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+	struct capture_control *capc = current->capture_control;
+
+	return capc &&
+		!(current->flags & PF_KTHREAD) &&
+		!capc->page &&
+		capc->cc->zone == zone &&
+		capc->cc->direct_compaction ? capc : NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page,
+		   int order, int migratetype)
+{
+	if (!capc || order != capc->cc->order)
+		return false;
+
+	/* Do not accidentally pollute CMA or isolated regions*/
+	if (is_migrate_cma(migratetype) ||
+	    is_migrate_isolate(migratetype))
+		return false;
+
+	/*
+	 * Do not let lower order allocations polluate a movable pageblock.
+	 * This might let an unmovable request use a reclaimable pageblock
+	 * and vice-versa but no more than normal fallback logic which can
+	 * have trouble finding a high-order free page.
+	 */
+	if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
+		return false;
+
+	capc->page = page;
+	return true;
+}
+
+#else
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+	return NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page,
+		   int order, int migratetype)
+{
+	return false;
+}
+#endif /* CONFIG_COMPACTION */
+
 /*
  * Freeing function for a buddy system allocator.
  *
@@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,
 	unsigned long uninitialized_var(buddy_pfn);
 	struct page *buddy;
 	unsigned int max_order;
+	struct capture_control *capc = task_capc(zone);
 
 	max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
 
@@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,
 
 continue_merging:
 	while (order < max_order - 1) {
+		if (compaction_capture(capc, page, order, migratetype)) {
+			__mod_zone_freepage_state(zone, -(1 << order),
+								migratetype);
+			return;
+		}
 		buddy_pfn = __find_buddy_pfn(pfn, order);
 		buddy = page + (buddy_pfn - pfn);
 
@@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		unsigned int alloc_flags, const struct alloc_context *ac,
 		enum compact_priority prio, enum compact_result *compact_result)
 {
-	struct page *page;
+	struct page *page = NULL;
 	unsigned long pflags;
 	unsigned int noreclaim_flag;
 
@@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	noreclaim_flag = memalloc_noreclaim_save();
 
 	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
-									prio);
+								prio, &page);
 
 	memalloc_noreclaim_restore(noreclaim_flag);
 	psi_memstall_leave(&pflags);
 
-	if (*compact_result <= COMPACT_INACTIVE)
+	if (*compact_result <= COMPACT_INACTIVE) {
+		WARN_ON_ONCE(page);
 		return NULL;
+	}
 
 	/*
 	 * At least in one zone compaction wasn't deferred or skipped, so let's
@@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	 */
 	count_vm_event(COMPACTSTALL);
 
-	page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
+	/* Prep a captured page if available */
+	if (page)
+		prep_new_page(page, order, gfp_mask, alloc_flags);
+
+	/* Try get a page from the freelist if available */
+	if (!page)
+		page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
 
 	if (page) {
 		struct zone *zone = page_zone(page);
-- 
cgit v1.2.3


From 147e1a97c4a0bdd43f55a582a9416bb9092563a9 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 5 Mar 2019 15:45:45 -0800
Subject: fs: kernfs: add poll file operation

Patch series "psi: pressure stall monitors", v3.

Android is adopting psi to detect and remedy memory pressure that
results in stuttering and decreased responsiveness on mobile devices.

Psi gives us the stall information, but because we're dealing with
latencies in the millisecond range, periodically reading the pressure
files to detect stalls in a timely fashion is not feasible.  Psi also
doesn't aggregate its averages at a high enough frequency right now.

This patch series extends the psi interface such that users can
configure sensitive latency thresholds and use poll() and friends to be
notified when these are breached.

As high-frequency aggregation is costly, it implements an aggregation
method that is optimized for fast, short-interval averaging, and makes
the aggregation frequency adaptive, such that high-frequency updates
only happen while monitored stall events are actively occurring.

With these patches applied, Android can monitor for, and ward off,
mounting memory shortages before they cause problems for the user.  For
example, using memory stall monitors in userspace low memory killer
daemon (lmkd) we can detect mounting pressure and kill less important
processes before device becomes visibly sluggish.

In our memory stress testing psi memory monitors produce roughly 10x
less false positives compared to vmpressure signals.  Having ability to
specify multiple triggers for the same psi metric allows other parts of
Android framework to monitor memory state of the device and act
accordingly.

The new interface is straightforward.  The user opens one of the
pressure files for writing and writes a trigger description into the
file descriptor that defines the stall state - some or full, and the
maximum stall time over a given window of time.  E.g.:

        /* Signal when stall time exceeds 100ms of a 1s window */
        char trigger[] = "full 100000 1000000";
        fd = open("/proc/pressure/memory");
        write(fd, trigger, sizeof(trigger));
        while (poll() >= 0) {
                ...
        }
        close(fd);

When the monitored stall state is entered, psi adapts its aggregation
frequency according to what the configured time window requires in order
to emit event signals in a timely fashion.  Once the stalling subsides,
aggregation reverts back to normal.

The trigger is associated with the open file descriptor.  To stop
monitoring, the user only needs to close the file descriptor and the
trigger is discarded.

Patches 1-4 prepare the psi code for polling support.  Patch 5
implements the adaptive polling logic, the pressure growth detection
optimized for short intervals, and hooks up write() and poll() on the
pressure files.

The patches were developed in collaboration with Johannes Weiner.

This patch (of 5):

Kernfs has a standardized poll/notification mechanism for waking all
pollers on all fds when a filesystem node changes.  To allow polling for
custom events, add a .poll callback that can override the default.

This is in preparation for pollable cgroup pressure files which have
per-fd trigger configurations.

Link: http://lkml.kernel.org/r/20190124211518.244221-2-surenb@google.com
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/kernfs/file.c       | 31 ++++++++++++++++++++-----------
 include/linux/kernfs.h |  6 ++++++
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index f8d5021a652e..ae948aaa4c53 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -832,26 +832,35 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
  * to see if it supports poll (Neither 'poll' nor 'select' return
  * an appropriate error code).  When in doubt, set a suitable timeout value.
  */
+__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
+{
+	struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
+	struct kernfs_open_node *on = kn->attr.open;
+
+	poll_wait(of->file, &on->poll, wait);
+
+	if (of->event != atomic_read(&on->event))
+		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
+
+	return DEFAULT_POLLMASK;
+}
+
 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
 {
 	struct kernfs_open_file *of = kernfs_of(filp);
 	struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
-	struct kernfs_open_node *on = kn->attr.open;
+	__poll_t ret;
 
 	if (!kernfs_get_active(kn))
-		goto trigger;
+		return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
 
-	poll_wait(filp, &on->poll, wait);
+	if (kn->attr.ops->poll)
+		ret = kn->attr.ops->poll(of, wait);
+	else
+		ret = kernfs_generic_poll(of, wait);
 
 	kernfs_put_active(kn);
-
-	if (of->event != atomic_read(&on->event))
-		goto trigger;
-
-	return DEFAULT_POLLMASK;
-
- trigger:
-	return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
+	return ret;
 }
 
 static void kernfs_notify_workfn(struct work_struct *work)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 5b36b1287a5a..0cac1207bb00 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -25,6 +25,7 @@ struct seq_file;
 struct vm_area_struct;
 struct super_block;
 struct file_system_type;
+struct poll_table_struct;
 
 struct kernfs_open_node;
 struct kernfs_iattrs;
@@ -261,6 +262,9 @@ struct kernfs_ops {
 	ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
 			 loff_t off);
 
+	__poll_t (*poll)(struct kernfs_open_file *of,
+			 struct poll_table_struct *pt);
+
 	int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -350,6 +354,8 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 		     const char *new_name, const void *new_ns);
 int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
+__poll_t kernfs_generic_poll(struct kernfs_open_file *of,
+			     struct poll_table_struct *pt);
 void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);
-- 
cgit v1.2.3


From dc50537bdd1a0804fa2cbc990565ee9a944e66fa Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 5 Mar 2019 15:45:48 -0800
Subject: kernel: cgroup: add poll file operation

Cgroup has a standardized poll/notification mechanism for waking all
pollers on all fds when a filesystem node changes.  To allow polling for
custom events, add a .poll callback that can override the default.

This is in preparation for pollable cgroup pressure files which have
per-fd trigger configurations.

Link: http://lkml.kernel.org/r/20190124211518.244221-3-surenb@google.com
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup-defs.h |  4 ++++
 kernel/cgroup/cgroup.c      | 12 ++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 8fcbae1b8db0..aad3babef007 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -32,6 +32,7 @@ struct kernfs_node;
 struct kernfs_ops;
 struct kernfs_open_file;
 struct seq_file;
+struct poll_table_struct;
 
 #define MAX_CGROUP_TYPE_NAMELEN 32
 #define MAX_CGROUP_ROOT_NAMELEN 64
@@ -574,6 +575,9 @@ struct cftype {
 	ssize_t (*write)(struct kernfs_open_file *of,
 			 char *buf, size_t nbytes, loff_t off);
 
+	__poll_t (*poll)(struct kernfs_open_file *of,
+			 struct poll_table_struct *pt);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
 #endif
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index cef98502b124..17828333f7c3 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3534,6 +3534,16 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 	return ret ?: nbytes;
 }
 
+static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
+{
+	struct cftype *cft = of->kn->priv;
+
+	if (cft->poll)
+		return cft->poll(of, pt);
+
+	return kernfs_generic_poll(of, pt);
+}
+
 static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
 {
 	return seq_cft(seq)->seq_start(seq, ppos);
@@ -3572,6 +3582,7 @@ static struct kernfs_ops cgroup_kf_single_ops = {
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
 	.write			= cgroup_file_write,
+	.poll			= cgroup_file_poll,
 	.seq_show		= cgroup_seqfile_show,
 };
 
@@ -3580,6 +3591,7 @@ static struct kernfs_ops cgroup_kf_ops = {
 	.open			= cgroup_file_open,
 	.release		= cgroup_file_release,
 	.write			= cgroup_file_write,
+	.poll			= cgroup_file_poll,
 	.seq_start		= cgroup_seqfile_start,
 	.seq_next		= cgroup_seqfile_next,
 	.seq_stop		= cgroup_seqfile_stop,
-- 
cgit v1.2.3


From aa9694bb78bf6eb03810108d5f6064fafa4ae1e1 Mon Sep 17 00:00:00 2001
From: Chris Down <chris@chrisdown.name>
Date: Tue, 5 Mar 2019 15:45:52 -0800
Subject: mm, memcg: create mem_cgroup_from_seq

This is the start of a series of patches similar to my earlier
DEFINE_MEMCG_MAX_OR_VAL work, but with less Macro Magic(tm).

There are a bunch of places we go from seq_file to mem_cgroup, which
currently requires manually getting the css, then getting the mem_cgroup
from the css.  It's in enough places now that having mem_cgroup_from_seq
makes sense (and also makes the next patch a bit nicer).

Link: http://lkml.kernel.org/r/20190124194050.GA31341@chrisdown.name
Signed-off-by: Chris Down <chris@chrisdown.name>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 10 ++++++++++
 mm/memcontrol.c            | 24 ++++++++++++------------
 mm/slab_common.c           |  6 +++---
 3 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b0eb29ea0d9c..1f3d880b7ca1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -429,6 +429,11 @@ static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 }
 struct mem_cgroup *mem_cgroup_from_id(unsigned short id);
 
+static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
+{
+	return mem_cgroup_from_css(seq_css(m));
+}
+
 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 {
 	struct mem_cgroup_per_node *mz;
@@ -937,6 +942,11 @@ static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 	return NULL;
 }
 
+static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
+{
+	return NULL;
+}
+
 static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
 {
 	return NULL;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f93f7f22a6f4..027abf9935d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3337,7 +3337,7 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
 	const struct numa_stat *stat;
 	int nid;
 	unsigned long nr;
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
 		nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
@@ -3388,7 +3388,7 @@ static const char *const memcg1_event_names[] = {
 
 static int memcg_stat_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long memory, memsw;
 	struct mem_cgroup *mi;
 	unsigned int i;
@@ -3820,7 +3820,7 @@ static void mem_cgroup_oom_unregister_event(struct mem_cgroup *memcg,
 
 static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(sf));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
 
 	seq_printf(sf, "oom_kill_disable %d\n", memcg->oom_kill_disable);
 	seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
@@ -5363,7 +5363,7 @@ static u64 memory_current_read(struct cgroup_subsys_state *css,
 
 static int memory_min_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long min = READ_ONCE(memcg->memory.min);
 
 	if (min == PAGE_COUNTER_MAX)
@@ -5393,7 +5393,7 @@ static ssize_t memory_min_write(struct kernfs_open_file *of,
 
 static int memory_low_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long low = READ_ONCE(memcg->memory.low);
 
 	if (low == PAGE_COUNTER_MAX)
@@ -5423,7 +5423,7 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
 
 static int memory_high_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long high = READ_ONCE(memcg->high);
 
 	if (high == PAGE_COUNTER_MAX)
@@ -5460,7 +5460,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 
 static int memory_max_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long max = READ_ONCE(memcg->memory.max);
 
 	if (max == PAGE_COUNTER_MAX)
@@ -5522,7 +5522,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 
 static int memory_events_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	seq_printf(m, "low %lu\n",
 		   atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
@@ -5540,7 +5540,7 @@ static int memory_events_show(struct seq_file *m, void *v)
 
 static int memory_stat_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	struct accumulated_stats acc;
 	int i;
 
@@ -5617,7 +5617,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
 
 static int memory_oom_group_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	seq_printf(m, "%d\n", memcg->oom_group);
 
@@ -6600,7 +6600,7 @@ static u64 swap_current_read(struct cgroup_subsys_state *css,
 
 static int swap_max_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 	unsigned long max = READ_ONCE(memcg->swap.max);
 
 	if (max == PAGE_COUNTER_MAX)
@@ -6630,7 +6630,7 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
 
 static int swap_events_show(struct seq_file *m, void *v)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	seq_printf(m, "max %lu\n",
 		   atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f9d89c1b5977..cd75b8985707 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1425,7 +1425,7 @@ void dump_unreclaimable_slab(void)
 #if defined(CONFIG_MEMCG)
 void *memcg_slab_start(struct seq_file *m, loff_t *pos)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	mutex_lock(&slab_mutex);
 	return seq_list_start(&memcg->kmem_caches, *pos);
@@ -1433,7 +1433,7 @@ void *memcg_slab_start(struct seq_file *m, loff_t *pos)
 
 void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	return seq_list_next(p, &memcg->kmem_caches, pos);
 }
@@ -1447,7 +1447,7 @@ int memcg_slab_show(struct seq_file *m, void *p)
 {
 	struct kmem_cache *s = list_entry(p, struct kmem_cache,
 					  memcg_params.kmem_caches_node);
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
 	if (p == memcg->kmem_caches.next)
 		print_slabinfo_header(m);
-- 
cgit v1.2.3


From 8bb4e7a2ee26c05a94ae6cb0aec2f82a3523cf35 Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Tue, 5 Mar 2019 15:46:22 -0800
Subject: mm: fix some typos in mm directory

No functional change.

Link: http://lkml.kernel.org/r/20190118235123.27843-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Pekka Enberg <penberg@kernel.org>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 mm/migrate.c           | 2 +-
 mm/mmap.c              | 8 ++++----
 mm/page_alloc.c        | 4 ++--
 mm/slub.c              | 2 +-
 mm/vmscan.c            | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 90c13cdeefb5..6d3290cd1f6f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1301,7 +1301,7 @@ void memory_present(int nid, unsigned long start, unsigned long end);
 
 /*
  * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
- * need to check pfn validility within that MAX_ORDER_NR_PAGES block.
+ * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
  * pfn_valid_within() should be used in this case; we optimise this away
  * when we have no holes within a MAX_ORDER_NR_PAGES block.
  */
diff --git a/mm/migrate.c b/mm/migrate.c
index 0e9888cb33ad..5308d6abd384 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -100,7 +100,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode)
 	/*
 	 * Check PageMovable before holding a PG_lock because page's owner
 	 * assumes anybody doesn't touch PG_lock of newly allocated page
-	 * so unconditionally grapping the lock ruins page's owner side.
+	 * so unconditionally grabbing the lock ruins page's owner side.
 	 */
 	if (unlikely(!__PageMovable(page)))
 		goto out_putpage;
diff --git a/mm/mmap.c b/mm/mmap.c
index eccba2650ef6..41eb48d9b527 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -438,7 +438,7 @@ static void vma_gap_update(struct vm_area_struct *vma)
 {
 	/*
 	 * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
-	 * function that does exacltly what we want.
+	 * function that does exactly what we want.
 	 */
 	vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
 }
@@ -1012,7 +1012,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 	 * VM_SOFTDIRTY should not prevent from VMA merging, if we
 	 * match the flags but dirty bit -- the caller should mark
 	 * merged VMA as dirty. If dirty bit won't be excluded from
-	 * comparison, we increase pressue on the memory system forcing
+	 * comparison, we increase pressure on the memory system forcing
 	 * the kernel to generate new VMAs when old one could be
 	 * extended instead.
 	 */
@@ -1115,7 +1115,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
  *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
  *    might become    case 1 below    case 2 below    case 3 below
  *
- * It is important for case 8 that the the vma NNNN overlapping the
+ * It is important for case 8 that the vma NNNN overlapping the
  * region AAAA is never going to extended over XXXX. Instead XXXX must
  * be extended in region AAAA and NNNN must be removed. This way in
  * all cases where vma_merge succeeds, the moment vma_adjust drops the
@@ -1645,7 +1645,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
 #endif /* __ARCH_WANT_SYS_OLD_MMAP */
 
 /*
- * Some shared mappigns will want the pages marked read-only
+ * Some shared mappings will want the pages marked read-only
  * to track write events. If so, we'll downgrade vm_page_prot
  * to the private version (using protection_map[] without the
  * VM_SHARED bit).
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9be9a22ebe35..ec250453f5e8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7551,7 +7551,7 @@ static void __setup_per_zone_wmarks(void)
 			 * value here.
 			 *
 			 * The WMARK_HIGH-WMARK_LOW and (WMARK_LOW-WMARK_MIN)
-			 * deltas control asynch page reclaim, and so should
+			 * deltas control async page reclaim, and so should
 			 * not be capped for highmem.
 			 */
 			unsigned long min_pages;
@@ -8028,7 +8028,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 
 		/*
 		 * Hugepages are not in LRU lists, but they're movable.
-		 * We need not scan over tail pages bacause we don't
+		 * We need not scan over tail pages because we don't
 		 * handle each tail page individually in migration.
 		 */
 		if (PageHuge(page)) {
diff --git a/mm/slub.c b/mm/slub.c
index d8b1eee2dd86..017a2ce5ba23 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2129,7 +2129,7 @@ redo:
 		if (!lock) {
 			lock = 1;
 			/*
-			 * Taking the spinlock removes the possiblity
+			 * Taking the spinlock removes the possibility
 			 * that acquire_slab() will see a slab page that
 			 * is frozen
 			 */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e979705bbf32..63195364ab2e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3527,7 +3527,7 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
  *
  * kswapd scans the zones in the highmem->normal->dma direction.  It skips
  * zones which have free_pages > high_wmark_pages(zone), but once a zone is
- * found to have free_pages <= high_wmark_pages(zone), any page is that zone
+ * found to have free_pages <= high_wmark_pages(zone), any page in that zone
  * or lower is eligible for reclaim until at least one usable zone is
  * balanced.
  */
-- 
cgit v1.2.3


From 0cbe3e26abe0cfe7effb67f620a77d46cce628b2 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:46:26 -0800
Subject: mm: update ptep_modify_prot_start/commit to take vm_area_struct as
 arg

Patch series "NestMMU pte upgrade workaround for mprotect", v5.

We can upgrade pte access (R -> RW transition) via mprotect.  We need to
make sure we follow the recommended pte update sequence as outlined in
commit bd5050e38aec ("powerpc/mm/radix: Change pte relax sequence to
handle nest MMU hang") for such updates.  This patch series does that.

This patch (of 5):

Some architectures may want to call flush_tlb_range from these helpers.

Link: http://lkml.kernel.org/r/20190116085035.29729-2-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h       |  4 ++--
 arch/s390/mm/pgtable.c                |  6 ++++--
 arch/x86/include/asm/paravirt.h       | 11 ++++++-----
 arch/x86/include/asm/paravirt_types.h |  5 +++--
 arch/x86/xen/mmu.h                    |  4 ++--
 arch/x86/xen/mmu_pv.c                 |  8 ++++----
 fs/proc/task_mmu.c                    |  4 ++--
 include/asm-generic/pgtable.h         | 16 ++++++++--------
 mm/memory.c                           |  4 ++--
 mm/mprotect.c                         |  4 ++--
 10 files changed, 35 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 063732414dfb..5d730199e37b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1069,8 +1069,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 }
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t);
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, pte_t);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 689b66f29fc6..71aa01170768 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -301,12 +301,13 @@ pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(ptep_xchg_lazy);
 
-pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t *ptep)
 {
 	pgste_t pgste;
 	pte_t old;
 	int nodat;
+	struct mm_struct *mm = vma->vm_mm;
 
 	preempt_disable();
 	pgste = ptep_xchg_start(mm, addr, ptep);
@@ -319,10 +320,11 @@ pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
 	return old;
 }
 
-void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t *ptep, pte_t pte)
 {
 	pgste_t pgste;
+	struct mm_struct *mm = vma->vm_mm;
 
 	if (!MACHINE_HAS_NX)
 		pte_val(pte) &= ~_PAGE_NOEXEC;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a97f28d914d5..c5a7f18cce7e 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -422,25 +422,26 @@ static inline pgdval_t pgd_val(pgd_t pgd)
 }
 
 #define  __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 					   pte_t *ptep)
 {
 	pteval_t ret;
 
-	ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, mm, addr, ptep);
+	ret = PVOP_CALL3(pteval_t, mmu.ptep_modify_prot_start, vma, addr, ptep);
 
 	return (pte_t) { .pte = ret };
 }
 
-static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 					   pte_t *ptep, pte_t pte)
 {
+
 	if (sizeof(pteval_t) > sizeof(long))
 		/* 5 arg words */
-		pv_ops.mmu.ptep_modify_prot_commit(mm, addr, ptep, pte);
+		pv_ops.mmu.ptep_modify_prot_commit(vma, addr, ptep, pte);
 	else
 		PVOP_VCALL4(mmu.ptep_modify_prot_commit,
-			    mm, addr, ptep, pte.pte);
+			    vma, addr, ptep, pte.pte);
 }
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 488c59686a73..2474e434a6f7 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -55,6 +55,7 @@ struct task_struct;
 struct cpumask;
 struct flush_tlb_info;
 struct mmu_gather;
+struct vm_area_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -254,9 +255,9 @@ struct pv_mmu_ops {
 			   pte_t *ptep, pte_t pteval);
 	void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
 
-	pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
+	pte_t (*ptep_modify_prot_start)(struct vm_area_struct *vma, unsigned long addr,
 					pte_t *ptep);
-	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+	void (*ptep_modify_prot_commit)(struct vm_area_struct *vma, unsigned long addr,
 					pte_t *ptep, pte_t pte);
 
 	struct paravirt_callee_save pte_val;
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index a7e47cf7ec6c..6e4c6bd62203 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -17,8 +17,8 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
 
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
-void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep);
+void  xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 				  pte_t *ptep, pte_t pte);
 
 unsigned long xen_read_cr2_direct(void);
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 0f4fe206dcc2..856a85814f00 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -306,20 +306,20 @@ static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	__xen_set_pte(ptep, pteval);
 }
 
-pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
+pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
 				 unsigned long addr, pte_t *ptep)
 {
 	/* Just return the pte as-is.  We preserve the bits on commit */
-	trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
+	trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
 	return *ptep;
 }
 
-void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
 				 pte_t *ptep, pte_t pte)
 {
 	struct mmu_update u;
 
-	trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
+	trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
 	xen_mc_batch();
 
 	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 85b0ef890b28..9c2ef731dd5f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -948,10 +948,10 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	pte_t ptent = *pte;
 
 	if (pte_present(ptent)) {
-		ptent = ptep_modify_prot_start(vma->vm_mm, addr, pte);
+		ptent = ptep_modify_prot_start(vma, addr, pte);
 		ptent = pte_wrprotect(ptent);
 		ptent = pte_clear_soft_dirty(ptent);
-		ptep_modify_prot_commit(vma->vm_mm, addr, pte, ptent);
+		ptep_modify_prot_commit(vma, addr, pte, ptent);
 	} else if (is_swap_pte(ptent)) {
 		ptent = pte_swp_clear_soft_dirty(ptent);
 		set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 05e61e6c843f..8b0e933efe26 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -606,7 +606,7 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
 	return 0;
 }
 
-static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
+static inline pte_t __ptep_modify_prot_start(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     pte_t *ptep)
 {
@@ -615,10 +615,10 @@ static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
 	 * non-present, preventing the hardware from asynchronously
 	 * updating it.
 	 */
-	return ptep_get_and_clear(mm, addr, ptep);
+	return ptep_get_and_clear(vma->vm_mm, addr, ptep);
 }
 
-static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
+static inline void __ptep_modify_prot_commit(struct vm_area_struct *vma,
 					     unsigned long addr,
 					     pte_t *ptep, pte_t pte)
 {
@@ -626,7 +626,7 @@ static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
 	 * The pte is non-present, so there's no hardware state to
 	 * preserve.
 	 */
-	set_pte_at(mm, addr, ptep, pte);
+	set_pte_at(vma->vm_mm, addr, ptep, pte);
 }
 
 #ifndef __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
@@ -644,22 +644,22 @@ static inline void __ptep_modify_prot_commit(struct mm_struct *mm,
  * queue the update to be done at some later time.  The update must be
  * actually committed before the pte lock is released, however.
  */
-static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
+static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
 					   unsigned long addr,
 					   pte_t *ptep)
 {
-	return __ptep_modify_prot_start(mm, addr, ptep);
+	return __ptep_modify_prot_start(vma, addr, ptep);
 }
 
 /*
  * Commit an update to a pte, leaving any hardware-controlled bits in
  * the PTE unmodified.
  */
-static inline void ptep_modify_prot_commit(struct mm_struct *mm,
+static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
 					   unsigned long addr,
 					   pte_t *ptep, pte_t pte)
 {
-	__ptep_modify_prot_commit(mm, addr, ptep, pte);
+	__ptep_modify_prot_commit(vma, addr, ptep, pte);
 }
 #endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
 #endif /* CONFIG_MMU */
diff --git a/mm/memory.c b/mm/memory.c
index 6aff43171a7b..5ade52502ea0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3619,12 +3619,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * Make it present again, Depending on how arch implementes non
 	 * accessible ptes, some can allow access by kernel mode.
 	 */
-	pte = ptep_modify_prot_start(vma->vm_mm, vmf->address, vmf->pte);
+	pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
 	pte = pte_modify(pte, vma->vm_page_prot);
 	pte = pte_mkyoung(pte);
 	if (was_writable)
 		pte = pte_mkwrite(pte);
-	ptep_modify_prot_commit(vma->vm_mm, vmf->address, vmf->pte, pte);
+	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);
 	update_mmu_cache(vma, vmf->address, vmf->pte);
 
 	page = vm_normal_page(vma, vmf->address, pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 36cb358db170..c89ce07923c8 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -110,7 +110,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					continue;
 			}
 
-			ptent = ptep_modify_prot_start(mm, addr, pte);
+			ptent = ptep_modify_prot_start(vma, addr, pte);
 			ptent = pte_modify(ptent, newprot);
 			if (preserve_write)
 				ptent = pte_mk_savedwrite(ptent);
@@ -121,7 +121,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					 !(vma->vm_flags & VM_SOFTDIRTY))) {
 				ptent = pte_mkwrite(ptent);
 			}
-			ptep_modify_prot_commit(mm, addr, pte, ptent);
+			ptep_modify_prot_commit(vma, addr, pte, ptent);
 			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
-- 
cgit v1.2.3


From 04a8645304500be88b3345b65fef7efe58016166 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:46:29 -0800
Subject: mm: update ptep_modify_prot_commit to take old pte value as arg

Architectures like ppc64 require to do a conditional tlb flush based on
the old and new value of pte.  Enable that by passing old pte value as
the arg.

Link: http://lkml.kernel.org/r/20190116085035.29729-3-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h | 3 ++-
 arch/s390/mm/pgtable.c          | 2 +-
 arch/x86/include/asm/paravirt.h | 2 +-
 fs/proc/task_mmu.c              | 8 +++++---
 include/asm-generic/pgtable.h   | 2 +-
 mm/memory.c                     | 8 ++++----
 mm/mprotect.c                   | 6 +++---
 7 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 5d730199e37b..76dc344edb8c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1070,7 +1070,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
 
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
-void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long, pte_t *, pte_t);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+			     pte_t *, pte_t, pte_t);
 
 #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
 static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71aa01170768..8485d6dc2754 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -321,7 +321,7 @@ pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
 }
 
 void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-			     pte_t *ptep, pte_t pte)
+			     pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 	pgste_t pgste;
 	struct mm_struct *mm = vma->vm_mm;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index c5a7f18cce7e..c25c38a05c1c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -433,7 +433,7 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned
 }
 
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
-					   pte_t *ptep, pte_t pte)
+					   pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 
 	if (sizeof(pteval_t) > sizeof(long))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 9c2ef731dd5f..beccb0b1d57c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -948,10 +948,12 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	pte_t ptent = *pte;
 
 	if (pte_present(ptent)) {
-		ptent = ptep_modify_prot_start(vma, addr, pte);
-		ptent = pte_wrprotect(ptent);
+		pte_t old_pte;
+
+		old_pte = ptep_modify_prot_start(vma, addr, pte);
+		ptent = pte_wrprotect(old_pte);
 		ptent = pte_clear_soft_dirty(ptent);
-		ptep_modify_prot_commit(vma, addr, pte, ptent);
+		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
 	} else if (is_swap_pte(ptent)) {
 		ptent = pte_swp_clear_soft_dirty(ptent);
 		set_pte_at(vma->vm_mm, addr, pte, ptent);
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 8b0e933efe26..fa782fba51ee 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -657,7 +657,7 @@ static inline pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
  */
 static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
 					   unsigned long addr,
-					   pte_t *ptep, pte_t pte)
+					   pte_t *ptep, pte_t old_pte, pte_t pte)
 {
 	__ptep_modify_prot_commit(vma, addr, ptep, pte);
 }
diff --git a/mm/memory.c b/mm/memory.c
index 5ade52502ea0..557c6fffedd1 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3599,7 +3599,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	int last_cpupid;
 	int target_nid;
 	bool migrated = false;
-	pte_t pte;
+	pte_t pte, old_pte;
 	bool was_writable = pte_savedwrite(vmf->orig_pte);
 	int flags = 0;
 
@@ -3619,12 +3619,12 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * Make it present again, Depending on how arch implementes non
 	 * accessible ptes, some can allow access by kernel mode.
 	 */
-	pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
-	pte = pte_modify(pte, vma->vm_page_prot);
+	old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
+	pte = pte_modify(old_pte, vma->vm_page_prot);
 	pte = pte_mkyoung(pte);
 	if (was_writable)
 		pte = pte_mkwrite(pte);
-	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, pte);
+	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
 	update_mmu_cache(vma, vmf->address, vmf->pte);
 
 	page = vm_normal_page(vma, vmf->address, pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c89ce07923c8..028c724dcb1a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -110,8 +110,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					continue;
 			}
 
-			ptent = ptep_modify_prot_start(vma, addr, pte);
-			ptent = pte_modify(ptent, newprot);
+			oldpte = ptep_modify_prot_start(vma, addr, pte);
+			ptent = pte_modify(oldpte, newprot);
 			if (preserve_write)
 				ptent = pte_mk_savedwrite(ptent);
 
@@ -121,7 +121,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 					 !(vma->vm_flags & VM_SOFTDIRTY))) {
 				ptent = pte_mkwrite(ptent);
 			}
-			ptep_modify_prot_commit(vma, addr, pte, ptent);
+			ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
 			pages++;
 		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
-- 
cgit v1.2.3


From 023bdd00235eb0dcb71fd98f0b8347a9bb85d417 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:46:37 -0800
Subject: mm/hugetlb: add prot_modify_start/commit sequence for hugetlb update

Architectures like ppc64 require to do a conditional tlb flush based on
the old and new value of pte.  Follow the regular pte change protection
sequence for hugetlb too.  This allows the architectures to override the
update sequence.

Link: http://lkml.kernel.org/r/20190116085035.29729-5-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 20 ++++++++++++++++++++
 mm/hugetlb.c            |  8 +++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4cc3871b65fc..54c317c8355f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -580,6 +580,26 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 	set_huge_pte_at(mm, addr, ptep, pte);
 }
 #endif
+
+#ifndef huge_ptep_modify_prot_start
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+						unsigned long addr, pte_t *ptep)
+{
+	return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
+}
+#endif
+
+#ifndef huge_ptep_modify_prot_commit
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+						unsigned long addr, pte_t *ptep,
+						pte_t old_pte, pte_t pte)
+{
+	set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+#endif
+
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page(v, a, r) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2fb3062a3595..0c7848fccf93 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4399,10 +4399,12 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 			continue;
 		}
 		if (!huge_pte_none(pte)) {
-			pte = huge_ptep_get_and_clear(mm, address, ptep);
-			pte = pte_mkhuge(huge_pte_modify(pte, newprot));
+			pte_t old_pte;
+
+			old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
+			pte = pte_mkhuge(huge_pte_modify(old_pte, newprot));
 			pte = arch_make_huge_pte(pte, vma, NULL, 0);
-			set_huge_pte_at(mm, address, ptep, pte);
+			huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
 			pages++;
 		}
 		spin_unlock(ptl);
-- 
cgit v1.2.3


From b56a2d8af9147a4efe4011b60d93779c0461ca97 Mon Sep 17 00:00:00 2001
From: Vineeth Remanan Pillai <vpillai@digitalocean.com>
Date: Tue, 5 Mar 2019 15:47:03 -0800
Subject: mm: rid swapoff of quadratic complexity

This patch was initially posted by Kelley Nielsen.  Reposting the patch
with all review comments addressed and with minor modifications and
optimizations.  Also, folding in the fixes offered by Hugh Dickins and
Huang Ying.  Tests were rerun and commit message updated with new
results.

try_to_unuse() is of quadratic complexity, with a lot of wasted effort.
It unuses swap entries one by one, potentially iterating over all the
page tables for all the processes in the system for each one.

This new proposed implementation of try_to_unuse simplifies its
complexity to linear.  It iterates over the system's mms once, unusing
all the affected entries as it walks each set of page tables.  It also
makes similar changes to shmem_unuse.

Improvement

swapoff was called on a swap partition containing about 6G of data, in a
VM(8cpu, 16G RAM), and calls to unuse_pte_range() were counted.

Present implementation....about 1200M calls(8min, avg 80% cpu util).
Prototype.................about  9.0K calls(3min, avg 5% cpu util).

Details

In shmem_unuse(), iterate over the shmem_swaplist and, for each
shmem_inode_info that contains a swap entry, pass it to
shmem_unuse_inode(), along with the swap type.  In shmem_unuse_inode(),
iterate over its associated xarray, and store the index and value of
each swap entry in an array for passing to shmem_swapin_page() outside
of the RCU critical section.

In try_to_unuse(), instead of iterating over the entries in the type and
unusing them one by one, perhaps walking all the page tables for all the
processes for each one, iterate over the mmlist, making one pass.  Pass
each mm to unuse_mm() to begin its page table walk, and during the walk,
unuse all the ptes that have backing store in the swap type received by
try_to_unuse().  After the walk, check the type for orphaned swap
entries with find_next_to_unuse(), and remove them from the swap cache.
If find_next_to_unuse() starts over at the beginning of the type, repeat
the check of the shmem_swaplist and the walk a maximum of three times.

Change unuse_mm() and the intervening walk functions down to
unuse_pte_range() to take the type as a parameter, and to iterate over
their entire range, calling the next function down on every iteration.
In unuse_pte_range(), make a swap entry from each pte in the range using
the passed in type.  If it has backing store in the type, call
swapin_readahead() to retrieve the page and pass it to unuse_pte().

Pass the count of pages_to_unuse down the page table walks in
try_to_unuse(), and return from the walk when the desired number of
pages has been swapped back in.

Link: http://lkml.kernel.org/r/20190114153129.4852-2-vpillai@digitalocean.com
Signed-off-by: Vineeth Remanan Pillai <vpillai@digitalocean.com>
Signed-off-by: Kelley Nielsen <kelleynnn@gmail.com>
Signed-off-by: Huang Ying <ying.huang@intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/frontswap.h |   7 +
 include/linux/shmem_fs.h  |   3 +-
 mm/shmem.c                | 267 +++++++++++++++-------------
 mm/swapfile.c             | 433 +++++++++++++++++-----------------------------
 4 files changed, 319 insertions(+), 391 deletions(-)

(limited to 'include')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 011965c08b93..6d775984905b 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,6 +7,13 @@
 #include <linux/bitops.h>
 #include <linux/jump_label.h>
 
+/*
+ * Return code to denote that requested number of
+ * frontswap pages are unused(moved to page cache).
+ * Used in in shmem_unuse and try_to_unuse.
+ */
+#define FRONTSWAP_PAGES_UNUSED	2
+
 struct frontswap_ops {
 	void (*init)(unsigned); /* this swap type was just swapon'ed */
 	int (*store)(unsigned, pgoff_t, struct page *); /* store a page */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index f155dc607112..f3fb1edb3526 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -72,7 +72,8 @@ extern void shmem_unlock_mapping(struct address_space *mapping);
 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 					pgoff_t index, gfp_t gfp_mask);
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
-extern int shmem_unuse(swp_entry_t entry, struct page *page);
+extern int shmem_unuse(unsigned int type, bool frontswap,
+		       unsigned long *fs_pages_to_unuse);
 
 extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
 extern unsigned long shmem_partial_swap_usage(struct address_space *mapping,
diff --git a/mm/shmem.c b/mm/shmem.c
index b4d27ef87496..283a1833dafc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -36,6 +36,7 @@
 #include <linux/uio.h>
 #include <linux/khugepaged.h>
 #include <linux/hugetlb.h>
+#include <linux/frontswap.h>
 
 #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
 
@@ -1093,159 +1094,184 @@ static void shmem_evict_inode(struct inode *inode)
 	clear_inode(inode);
 }
 
-static unsigned long find_swap_entry(struct xarray *xa, void *item)
+extern struct swap_info_struct *swap_info[];
+
+static int shmem_find_swap_entries(struct address_space *mapping,
+				   pgoff_t start, unsigned int nr_entries,
+				   struct page **entries, pgoff_t *indices,
+				   bool frontswap)
 {
-	XA_STATE(xas, xa, 0);
-	unsigned int checked = 0;
-	void *entry;
+	XA_STATE(xas, &mapping->i_pages, start);
+	struct page *page;
+	unsigned int ret = 0;
+
+	if (!nr_entries)
+		return 0;
 
 	rcu_read_lock();
-	xas_for_each(&xas, entry, ULONG_MAX) {
-		if (xas_retry(&xas, entry))
+	xas_for_each(&xas, page, ULONG_MAX) {
+		if (xas_retry(&xas, page))
 			continue;
-		if (entry == item)
-			break;
-		checked++;
-		if ((checked % XA_CHECK_SCHED) != 0)
+
+		if (!xa_is_value(page))
 			continue;
-		xas_pause(&xas);
-		cond_resched_rcu();
+
+		if (frontswap) {
+			swp_entry_t entry = radix_to_swp_entry(page);
+
+			if (!frontswap_test(swap_info[swp_type(entry)],
+					    swp_offset(entry)))
+				continue;
+		}
+
+		indices[ret] = xas.xa_index;
+		entries[ret] = page;
+
+		if (need_resched()) {
+			xas_pause(&xas);
+			cond_resched_rcu();
+		}
+		if (++ret == nr_entries)
+			break;
 	}
 	rcu_read_unlock();
 
-	return entry ? xas.xa_index : -1;
+	return ret;
 }
 
 /*
- * If swap found in inode, free it and move page from swapcache to filecache.
+ * Move the swapped pages for an inode to page cache. Returns the count
+ * of pages swapped in, or the error in case of failure.
  */
-static int shmem_unuse_inode(struct shmem_inode_info *info,
-			     swp_entry_t swap, struct page **pagep)
+static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
+				    pgoff_t *indices)
 {
-	struct address_space *mapping = info->vfs_inode.i_mapping;
-	void *radswap;
-	pgoff_t index;
-	gfp_t gfp;
+	int i = 0;
+	int ret = 0;
 	int error = 0;
+	struct address_space *mapping = inode->i_mapping;
 
-	radswap = swp_to_radix_entry(swap);
-	index = find_swap_entry(&mapping->i_pages, radswap);
-	if (index == -1)
-		return -EAGAIN;	/* tell shmem_unuse we found nothing */
-
-	/*
-	 * Move _head_ to start search for next from here.
-	 * But be careful: shmem_evict_inode checks list_empty without taking
-	 * mutex, and there's an instant in list_move_tail when info->swaplist
-	 * would appear empty, if it were the only one on shmem_swaplist.
-	 */
-	if (shmem_swaplist.next != &info->swaplist)
-		list_move_tail(&shmem_swaplist, &info->swaplist);
+	for (i = 0; i < pvec.nr; i++) {
+		struct page *page = pvec.pages[i];
 
-	gfp = mapping_gfp_mask(mapping);
-	if (shmem_should_replace_page(*pagep, gfp)) {
-		mutex_unlock(&shmem_swaplist_mutex);
-		error = shmem_replace_page(pagep, gfp, info, index);
-		mutex_lock(&shmem_swaplist_mutex);
-		/*
-		 * We needed to drop mutex to make that restrictive page
-		 * allocation, but the inode might have been freed while we
-		 * dropped it: although a racing shmem_evict_inode() cannot
-		 * complete without emptying the page cache, our page lock
-		 * on this swapcache page is not enough to prevent that -
-		 * free_swap_and_cache() of our swap entry will only
-		 * trylock_page(), removing swap from page cache whatever.
-		 *
-		 * We must not proceed to shmem_add_to_page_cache() if the
-		 * inode has been freed, but of course we cannot rely on
-		 * inode or mapping or info to check that.  However, we can
-		 * safely check if our swap entry is still in use (and here
-		 * it can't have got reused for another page): if it's still
-		 * in use, then the inode cannot have been freed yet, and we
-		 * can safely proceed (if it's no longer in use, that tells
-		 * nothing about the inode, but we don't need to unuse swap).
-		 */
-		if (!page_swapcount(*pagep))
-			error = -ENOENT;
+		if (!xa_is_value(page))
+			continue;
+		error = shmem_swapin_page(inode, indices[i],
+					  &page, SGP_CACHE,
+					  mapping_gfp_mask(mapping),
+					  NULL, NULL);
+		if (error == 0) {
+			unlock_page(page);
+			put_page(page);
+			ret++;
+		}
+		if (error == -ENOMEM)
+			break;
+		error = 0;
 	}
+	return error ? error : ret;
+}
 
-	/*
-	 * We rely on shmem_swaplist_mutex, not only to protect the swaplist,
-	 * but also to hold up shmem_evict_inode(): so inode cannot be freed
-	 * beneath us (pagelock doesn't help until the page is in pagecache).
-	 */
-	if (!error)
-		error = shmem_add_to_page_cache(*pagep, mapping, index,
-						radswap, gfp);
-	if (error != -ENOMEM) {
-		/*
-		 * Truncation and eviction use free_swap_and_cache(), which
-		 * only does trylock page: if we raced, best clean up here.
-		 */
-		delete_from_swap_cache(*pagep);
-		set_page_dirty(*pagep);
-		if (!error) {
-			spin_lock_irq(&info->lock);
-			info->swapped--;
-			spin_unlock_irq(&info->lock);
-			swap_free(swap);
+/*
+ * If swap found in inode, free it and move page from swapcache to filecache.
+ */
+static int shmem_unuse_inode(struct inode *inode, unsigned int type,
+			     bool frontswap, unsigned long *fs_pages_to_unuse)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t start = 0;
+	struct pagevec pvec;
+	pgoff_t indices[PAGEVEC_SIZE];
+	bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0);
+	int ret = 0;
+
+	pagevec_init(&pvec);
+	do {
+		unsigned int nr_entries = PAGEVEC_SIZE;
+
+		if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE)
+			nr_entries = *fs_pages_to_unuse;
+
+		pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
+						  pvec.pages, indices,
+						  frontswap);
+		if (pvec.nr == 0) {
+			ret = 0;
+			break;
 		}
-	}
-	return error;
+
+		ret = shmem_unuse_swap_entries(inode, pvec, indices);
+		if (ret < 0)
+			break;
+
+		if (frontswap_partial) {
+			*fs_pages_to_unuse -= ret;
+			if (*fs_pages_to_unuse == 0) {
+				ret = FRONTSWAP_PAGES_UNUSED;
+				break;
+			}
+		}
+
+		start = indices[pvec.nr - 1];
+	} while (true);
+
+	return ret;
 }
 
 /*
- * Search through swapped inodes to find and replace swap by page.
+ * Read all the shared memory data that resides in the swap
+ * device 'type' back into memory, so the swap device can be
+ * unused.
  */
-int shmem_unuse(swp_entry_t swap, struct page *page)
+int shmem_unuse(unsigned int type, bool frontswap,
+		unsigned long *fs_pages_to_unuse)
 {
-	struct list_head *this, *next;
-	struct shmem_inode_info *info;
-	struct mem_cgroup *memcg;
+	struct shmem_inode_info *info, *next;
+	struct inode *inode;
+	struct inode *prev_inode = NULL;
 	int error = 0;
 
-	/*
-	 * There's a faint possibility that swap page was replaced before
-	 * caller locked it: caller will come back later with the right page.
-	 */
-	if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val))
-		goto out;
+	if (list_empty(&shmem_swaplist))
+		return 0;
+
+	mutex_lock(&shmem_swaplist_mutex);
 
 	/*
-	 * Charge page using GFP_KERNEL while we can wait, before taking
-	 * the shmem_swaplist_mutex which might hold up shmem_writepage().
-	 * Charged back to the user (not to caller) when swap account is used.
+	 * The extra refcount on the inode is necessary to safely dereference
+	 * p->next after re-acquiring the lock. New shmem inodes with swap
+	 * get added to the end of the list and we will scan them all.
 	 */
-	error = mem_cgroup_try_charge_delay(page, current->mm, GFP_KERNEL,
-					    &memcg, false);
-	if (error)
-		goto out;
-	/* No memory allocation: swap entry occupies the slot for the page */
-	error = -EAGAIN;
-
-	mutex_lock(&shmem_swaplist_mutex);
-	list_for_each_safe(this, next, &shmem_swaplist) {
-		info = list_entry(this, struct shmem_inode_info, swaplist);
-		if (info->swapped)
-			error = shmem_unuse_inode(info, swap, &page);
-		else
+	list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) {
+		if (!info->swapped) {
 			list_del_init(&info->swaplist);
+			continue;
+		}
+
+		inode = igrab(&info->vfs_inode);
+		if (!inode)
+			continue;
+
+		mutex_unlock(&shmem_swaplist_mutex);
+		if (prev_inode)
+			iput(prev_inode);
+		prev_inode = inode;
+
+		error = shmem_unuse_inode(inode, type, frontswap,
+					  fs_pages_to_unuse);
 		cond_resched();
-		if (error != -EAGAIN)
+
+		mutex_lock(&shmem_swaplist_mutex);
+		next = list_next_entry(info, swaplist);
+		if (!info->swapped)
+			list_del_init(&info->swaplist);
+		if (error)
 			break;
-		/* found nothing in this: move on to search the next */
 	}
 	mutex_unlock(&shmem_swaplist_mutex);
 
-	if (error) {
-		if (error != -ENOMEM)
-			error = 0;
-		mem_cgroup_cancel_charge(page, memcg, false);
-	} else
-		mem_cgroup_commit_charge(page, memcg, true, false);
-out:
-	unlock_page(page);
-	put_page(page);
+	if (prev_inode)
+		iput(prev_inode);
+
 	return error;
 }
 
@@ -1329,7 +1355,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
 	 */
 	mutex_lock(&shmem_swaplist_mutex);
 	if (list_empty(&info->swaplist))
-		list_add_tail(&info->swaplist, &shmem_swaplist);
+		list_add(&info->swaplist, &shmem_swaplist);
 
 	if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
 		spin_lock_irq(&info->lock);
@@ -3886,7 +3912,8 @@ int __init shmem_init(void)
 	return 0;
 }
 
-int shmem_unuse(swp_entry_t swap, struct page *page)
+int shmem_unuse(unsigned int type, bool frontswap,
+		unsigned long *fs_pages_to_unuse)
 {
 	return 0;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index dbac1d49469d..6de46984d59d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1799,44 +1799,77 @@ out_nolock:
 }
 
 static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+			unsigned long addr, unsigned long end,
+			unsigned int type, bool frontswap,
+			unsigned long *fs_pages_to_unuse)
 {
-	pte_t swp_pte = swp_entry_to_pte(entry);
+	struct page *page;
+	swp_entry_t entry;
 	pte_t *pte;
+	struct swap_info_struct *si;
+	unsigned long offset;
 	int ret = 0;
+	volatile unsigned char *swap_map;
 
-	/*
-	 * We don't actually need pte lock while scanning for swp_pte: since
-	 * we hold page lock and mmap_sem, swp_pte cannot be inserted into the
-	 * page table while we're scanning; though it could get zapped, and on
-	 * some architectures (e.g. x86_32 with PAE) we might catch a glimpse
-	 * of unmatched parts which look like swp_pte, so unuse_pte must
-	 * recheck under pte lock.  Scanning without pte lock lets it be
-	 * preemptable whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
-	 */
+	si = swap_info[type];
 	pte = pte_offset_map(pmd, addr);
 	do {
-		/*
-		 * swapoff spends a _lot_ of time in this loop!
-		 * Test inline before going to call unuse_pte.
-		 */
-		if (unlikely(pte_same_as_swp(*pte, swp_pte))) {
-			pte_unmap(pte);
-			ret = unuse_pte(vma, pmd, addr, entry, page);
-			if (ret)
-				goto out;
-			pte = pte_offset_map(pmd, addr);
+		struct vm_fault vmf;
+
+		if (!is_swap_pte(*pte))
+			continue;
+
+		entry = pte_to_swp_entry(*pte);
+		if (swp_type(entry) != type)
+			continue;
+
+		offset = swp_offset(entry);
+		if (frontswap && !frontswap_test(si, offset))
+			continue;
+
+		pte_unmap(pte);
+		swap_map = &si->swap_map[offset];
+		vmf.vma = vma;
+		vmf.address = addr;
+		vmf.pmd = pmd;
+		page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, &vmf);
+		if (!page) {
+			if (*swap_map == 0 || *swap_map == SWAP_MAP_BAD)
+				goto try_next;
+			return -ENOMEM;
+		}
+
+		lock_page(page);
+		wait_on_page_writeback(page);
+		ret = unuse_pte(vma, pmd, addr, entry, page);
+		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
+			goto out;
+		}
+
+		try_to_free_swap(page);
+		unlock_page(page);
+		put_page(page);
+
+		if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) {
+			ret = FRONTSWAP_PAGES_UNUSED;
+			goto out;
 		}
+try_next:
+		pte = pte_offset_map(pmd, addr);
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	pte_unmap(pte - 1);
+
+	ret = 0;
 out:
 	return ret;
 }
 
 static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				unsigned int type, bool frontswap,
+				unsigned long *fs_pages_to_unuse)
 {
 	pmd_t *pmd;
 	unsigned long next;
@@ -1848,7 +1881,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 			continue;
-		ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
+		ret = unuse_pte_range(vma, pmd, addr, next, type,
+				      frontswap, fs_pages_to_unuse);
 		if (ret)
 			return ret;
 	} while (pmd++, addr = next, addr != end);
@@ -1857,7 +1891,8 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
 
 static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				unsigned int type, bool frontswap,
+				unsigned long *fs_pages_to_unuse)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -1868,7 +1903,8 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
+		ret = unuse_pmd_range(vma, pud, addr, next, type,
+				      frontswap, fs_pages_to_unuse);
 		if (ret)
 			return ret;
 	} while (pud++, addr = next, addr != end);
@@ -1877,7 +1913,8 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d,
 
 static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
 				unsigned long addr, unsigned long end,
-				swp_entry_t entry, struct page *page)
+				unsigned int type, bool frontswap,
+				unsigned long *fs_pages_to_unuse)
 {
 	p4d_t *p4d;
 	unsigned long next;
@@ -1888,78 +1925,66 @@ static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd,
 		next = p4d_addr_end(addr, end);
 		if (p4d_none_or_clear_bad(p4d))
 			continue;
-		ret = unuse_pud_range(vma, p4d, addr, next, entry, page);
+		ret = unuse_pud_range(vma, p4d, addr, next, type,
+				      frontswap, fs_pages_to_unuse);
 		if (ret)
 			return ret;
 	} while (p4d++, addr = next, addr != end);
 	return 0;
 }
 
-static int unuse_vma(struct vm_area_struct *vma,
-				swp_entry_t entry, struct page *page)
+static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
+		     bool frontswap, unsigned long *fs_pages_to_unuse)
 {
 	pgd_t *pgd;
 	unsigned long addr, end, next;
 	int ret;
 
-	if (page_anon_vma(page)) {
-		addr = page_address_in_vma(page, vma);
-		if (addr == -EFAULT)
-			return 0;
-		else
-			end = addr + PAGE_SIZE;
-	} else {
-		addr = vma->vm_start;
-		end = vma->vm_end;
-	}
+	addr = vma->vm_start;
+	end = vma->vm_end;
 
 	pgd = pgd_offset(vma->vm_mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
 			continue;
-		ret = unuse_p4d_range(vma, pgd, addr, next, entry, page);
+		ret = unuse_p4d_range(vma, pgd, addr, next, type,
+				      frontswap, fs_pages_to_unuse);
 		if (ret)
 			return ret;
 	} while (pgd++, addr = next, addr != end);
 	return 0;
 }
 
-static int unuse_mm(struct mm_struct *mm,
-				swp_entry_t entry, struct page *page)
+static int unuse_mm(struct mm_struct *mm, unsigned int type,
+		    bool frontswap, unsigned long *fs_pages_to_unuse)
 {
 	struct vm_area_struct *vma;
 	int ret = 0;
 
-	if (!down_read_trylock(&mm->mmap_sem)) {
-		/*
-		 * Activate page so shrink_inactive_list is unlikely to unmap
-		 * its ptes while lock is dropped, so swapoff can make progress.
-		 */
-		activate_page(page);
-		unlock_page(page);
-		down_read(&mm->mmap_sem);
-		lock_page(page);
-	}
+	down_read(&mm->mmap_sem);
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
-			break;
+		if (vma->anon_vma) {
+			ret = unuse_vma(vma, type, frontswap,
+					fs_pages_to_unuse);
+			if (ret)
+				break;
+		}
 		cond_resched();
 	}
 	up_read(&mm->mmap_sem);
-	return (ret < 0)? ret: 0;
+	return ret;
 }
 
 /*
  * Scan swap_map (or frontswap_map if frontswap parameter is true)
- * from current position to next entry still in use.
- * Recycle to start on reaching the end, returning 0 when empty.
+ * from current position to next entry still in use. Return 0
+ * if there are no inuse entries after prev till end of the map.
  */
 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 					unsigned int prev, bool frontswap)
 {
-	unsigned int max = si->max;
-	unsigned int i = prev;
+	unsigned int i;
 	unsigned char count;
 
 	/*
@@ -1968,20 +1993,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 	 * hits are okay, and sys_swapoff() has already prevented new
 	 * allocations from this area (while holding swap_lock).
 	 */
-	for (;;) {
-		if (++i >= max) {
-			if (!prev) {
-				i = 0;
-				break;
-			}
-			/*
-			 * No entries in use at top of swap_map,
-			 * loop back to start and recheck there.
-			 */
-			max = prev + 1;
-			prev = 0;
-			i = 1;
-		}
+	for (i = prev + 1; i < si->max; i++) {
 		count = READ_ONCE(si->swap_map[i]);
 		if (count && swap_count(count) != SWAP_MAP_BAD)
 			if (!frontswap || frontswap_test(si, i))
@@ -1989,240 +2001,121 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 		if ((i % LATENCY_LIMIT) == 0)
 			cond_resched();
 	}
+
+	if (i == si->max)
+		i = 0;
+
 	return i;
 }
 
 /*
- * We completely avoid races by reading each swap page in advance,
- * and then search for the process using it.  All the necessary
- * page table adjustments can then be made atomically.
- *
- * if the boolean frontswap is true, only unuse pages_to_unuse pages;
+ * If the boolean frontswap is true, only unuse pages_to_unuse pages;
  * pages_to_unuse==0 means all pages; ignored if frontswap is false
  */
+#define SWAP_UNUSE_MAX_TRIES 3
 int try_to_unuse(unsigned int type, bool frontswap,
 		 unsigned long pages_to_unuse)
 {
+	struct mm_struct *prev_mm;
+	struct mm_struct *mm;
+	struct list_head *p;
+	int retval = 0;
 	struct swap_info_struct *si = swap_info[type];
-	struct mm_struct *start_mm;
-	volatile unsigned char *swap_map; /* swap_map is accessed without
-					   * locking. Mark it as volatile
-					   * to prevent compiler doing
-					   * something odd.
-					   */
-	unsigned char swcount;
 	struct page *page;
 	swp_entry_t entry;
-	unsigned int i = 0;
-	int retval = 0;
+	unsigned int i;
+	int retries = 0;
 
-	/*
-	 * When searching mms for an entry, a good strategy is to
-	 * start at the first mm we freed the previous entry from
-	 * (though actually we don't notice whether we or coincidence
-	 * freed the entry).  Initialize this start_mm with a hold.
-	 *
-	 * A simpler strategy would be to start at the last mm we
-	 * freed the previous entry from; but that would take less
-	 * advantage of mmlist ordering, which clusters forked mms
-	 * together, child after parent.  If we race with dup_mmap(), we
-	 * prefer to resolve parent before child, lest we miss entries
-	 * duplicated after we scanned child: using last mm would invert
-	 * that.
-	 */
-	start_mm = &init_mm;
-	mmget(&init_mm);
+	if (!si->inuse_pages)
+		return 0;
 
-	/*
-	 * Keep on scanning until all entries have gone.  Usually,
-	 * one pass through swap_map is enough, but not necessarily:
-	 * there are races when an instance of an entry might be missed.
-	 */
-	while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
+	if (!frontswap)
+		pages_to_unuse = 0;
+
+retry:
+	retval = shmem_unuse(type, frontswap, &pages_to_unuse);
+	if (retval)
+		goto out;
+
+	prev_mm = &init_mm;
+	mmget(prev_mm);
+
+	spin_lock(&mmlist_lock);
+	p = &init_mm.mmlist;
+	while ((p = p->next) != &init_mm.mmlist) {
 		if (signal_pending(current)) {
 			retval = -EINTR;
 			break;
 		}
 
-		/*
-		 * Get a page for the entry, using the existing swap
-		 * cache page if there is one.  Otherwise, get a clean
-		 * page and read the swap into it.
-		 */
-		swap_map = &si->swap_map[i];
-		entry = swp_entry(type, i);
-		page = read_swap_cache_async(entry,
-					GFP_HIGHUSER_MOVABLE, NULL, 0, false);
-		if (!page) {
-			/*
-			 * Either swap_duplicate() failed because entry
-			 * has been freed independently, and will not be
-			 * reused since sys_swapoff() already disabled
-			 * allocation from here, or alloc_page() failed.
-			 */
-			swcount = *swap_map;
-			/*
-			 * We don't hold lock here, so the swap entry could be
-			 * SWAP_MAP_BAD (when the cluster is discarding).
-			 * Instead of fail out, We can just skip the swap
-			 * entry because swapoff will wait for discarding
-			 * finish anyway.
-			 */
-			if (!swcount || swcount == SWAP_MAP_BAD)
-				continue;
-			retval = -ENOMEM;
-			break;
-		}
+		mm = list_entry(p, struct mm_struct, mmlist);
+		if (!mmget_not_zero(mm))
+			continue;
+		spin_unlock(&mmlist_lock);
+		mmput(prev_mm);
+		prev_mm = mm;
+		retval = unuse_mm(mm, type, frontswap, &pages_to_unuse);
 
-		/*
-		 * Don't hold on to start_mm if it looks like exiting.
-		 */
-		if (atomic_read(&start_mm->mm_users) == 1) {
-			mmput(start_mm);
-			start_mm = &init_mm;
-			mmget(&init_mm);
+		if (retval) {
+			mmput(prev_mm);
+			goto out;
 		}
 
 		/*
-		 * Wait for and lock page.  When do_swap_page races with
-		 * try_to_unuse, do_swap_page can handle the fault much
-		 * faster than try_to_unuse can locate the entry.  This
-		 * apparently redundant "wait_on_page_locked" lets try_to_unuse
-		 * defer to do_swap_page in such a case - in some tests,
-		 * do_swap_page and try_to_unuse repeatedly compete.
-		 */
-		wait_on_page_locked(page);
-		wait_on_page_writeback(page);
-		lock_page(page);
-		wait_on_page_writeback(page);
-
-		/*
-		 * Remove all references to entry.
+		 * Make sure that we aren't completely killing
+		 * interactive performance.
 		 */
-		swcount = *swap_map;
-		if (swap_count(swcount) == SWAP_MAP_SHMEM) {
-			retval = shmem_unuse(entry, page);
-			/* page has already been unlocked and released */
-			if (retval < 0)
-				break;
-			continue;
-		}
-		if (swap_count(swcount) && start_mm != &init_mm)
-			retval = unuse_mm(start_mm, entry, page);
-
-		if (swap_count(*swap_map)) {
-			int set_start_mm = (*swap_map >= swcount);
-			struct list_head *p = &start_mm->mmlist;
-			struct mm_struct *new_start_mm = start_mm;
-			struct mm_struct *prev_mm = start_mm;
-			struct mm_struct *mm;
-
-			mmget(new_start_mm);
-			mmget(prev_mm);
-			spin_lock(&mmlist_lock);
-			while (swap_count(*swap_map) && !retval &&
-					(p = p->next) != &start_mm->mmlist) {
-				mm = list_entry(p, struct mm_struct, mmlist);
-				if (!mmget_not_zero(mm))
-					continue;
-				spin_unlock(&mmlist_lock);
-				mmput(prev_mm);
-				prev_mm = mm;
+		cond_resched();
+		spin_lock(&mmlist_lock);
+	}
+	spin_unlock(&mmlist_lock);
 
-				cond_resched();
+	mmput(prev_mm);
 
-				swcount = *swap_map;
-				if (!swap_count(swcount)) /* any usage ? */
-					;
-				else if (mm == &init_mm)
-					set_start_mm = 1;
-				else
-					retval = unuse_mm(mm, entry, page);
-
-				if (set_start_mm && *swap_map < swcount) {
-					mmput(new_start_mm);
-					mmget(mm);
-					new_start_mm = mm;
-					set_start_mm = 0;
-				}
-				spin_lock(&mmlist_lock);
-			}
-			spin_unlock(&mmlist_lock);
-			mmput(prev_mm);
-			mmput(start_mm);
-			start_mm = new_start_mm;
-		}
-		if (retval) {
-			unlock_page(page);
-			put_page(page);
-			break;
-		}
+	i = 0;
+	while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
 
-		/*
-		 * If a reference remains (rare), we would like to leave
-		 * the page in the swap cache; but try_to_unmap could
-		 * then re-duplicate the entry once we drop page lock,
-		 * so we might loop indefinitely; also, that page could
-		 * not be swapped out to other storage meanwhile.  So:
-		 * delete from cache even if there's another reference,
-		 * after ensuring that the data has been saved to disk -
-		 * since if the reference remains (rarer), it will be
-		 * read from disk into another page.  Splitting into two
-		 * pages would be incorrect if swap supported "shared
-		 * private" pages, but they are handled by tmpfs files.
-		 *
-		 * Given how unuse_vma() targets one particular offset
-		 * in an anon_vma, once the anon_vma has been determined,
-		 * this splitting happens to be just what is needed to
-		 * handle where KSM pages have been swapped out: re-reading
-		 * is unnecessarily slow, but we can fix that later on.
-		 */
-		if (swap_count(*swap_map) &&
-		     PageDirty(page) && PageSwapCache(page)) {
-			struct writeback_control wbc = {
-				.sync_mode = WB_SYNC_NONE,
-			};
-
-			swap_writepage(compound_head(page), &wbc);
-			lock_page(page);
-			wait_on_page_writeback(page);
-		}
+		entry = swp_entry(type, i);
+		page = find_get_page(swap_address_space(entry), i);
+		if (!page)
+			continue;
 
 		/*
 		 * It is conceivable that a racing task removed this page from
-		 * swap cache just before we acquired the page lock at the top,
-		 * or while we dropped it in unuse_mm().  The page might even
-		 * be back in swap cache on another swap area: that we must not
-		 * delete, since it may not have been written out to swap yet.
-		 */
-		if (PageSwapCache(page) &&
-		    likely(page_private(page) == entry.val) &&
-		    (!PageTransCompound(page) ||
-		     !swap_page_trans_huge_swapped(si, entry)))
-			delete_from_swap_cache(compound_head(page));
-
-		/*
-		 * So we could skip searching mms once swap count went
-		 * to 1, we did not mark any present ptes as dirty: must
-		 * mark page dirty so shrink_page_list will preserve it.
+		 * swap cache just before we acquired the page lock. The page
+		 * might even be back in swap cache on another swap area. But
+		 * that is okay, try_to_free_swap() only removes stale pages.
 		 */
-		SetPageDirty(page);
+		lock_page(page);
+		wait_on_page_writeback(page);
+		try_to_free_swap(page);
 		unlock_page(page);
 		put_page(page);
 
 		/*
-		 * Make sure that we aren't completely killing
-		 * interactive performance.
+		 * For frontswap, we just need to unuse pages_to_unuse, if
+		 * it was specified. Need not check frontswap again here as
+		 * we already zeroed out pages_to_unuse if not frontswap.
 		 */
-		cond_resched();
-		if (frontswap && pages_to_unuse > 0) {
-			if (!--pages_to_unuse)
-				break;
-		}
+		if (pages_to_unuse && --pages_to_unuse == 0)
+			goto out;
 	}
 
-	mmput(start_mm);
-	return retval;
+	/*
+	 * Lets check again to see if there are still swap entries in the map.
+	 * If yes, we would need to do retry the unuse logic again.
+	 * Under global memory pressure, swap entries can be reinserted back
+	 * into process space after the mmlist loop above passes over them.
+	 * Its not worth continuosuly retrying to unuse the swap in this case.
+	 * So we try SWAP_UNUSE_MAX_TRIES times.
+	 */
+	if (++retries >= SWAP_UNUSE_MAX_TRIES)
+		retval = -EBUSY;
+	else if (si->inuse_pages)
+		goto retry;
+
+out:
+	return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval;
 }
 
 /*
-- 
cgit v1.2.3


From 6e2e07cd35f6f72d1950453b170f6bfb6c668c46 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 5 Mar 2019 15:47:36 -0800
Subject: mm: better document PG_reserved

The usage of PG_reserved and how PG_reserved pages are to be treated is
buried deep down in different parts of the kernel.  Let's shine some
light onto these details by documenting current users and expected
behavior.

Especially, clarify on the "Some of them might not even exist" case.
These are physical memory gaps that will never be dumped as they are not
marked as IORESOURCE_SYSRAM.  PG_reserved does in general not hinder
anybody from dumping or swapping.  In some cases, these pages will not
be stored in the hibernation image.

Link: http://lkml.kernel.org/r/20190114125903.24845-10-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Anthony Yznaga <anthony.yznaga@oracle.com>
Cc: Miles Chen <miles.chen@mediatek.com>
Cc: <yi.z.zhang@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 808b4183e30d..9f8712a4b1a5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -17,8 +17,37 @@
 /*
  * Various page->flags bits:
  *
- * PG_reserved is set for special pages, which can never be swapped out. Some
- * of them might not even exist...
+ * PG_reserved is set for special pages. The "struct page" of such a page
+ * should in general not be touched (e.g. set dirty) except by its owner.
+ * Pages marked as PG_reserved include:
+ * - Pages part of the kernel image (including vDSO) and similar (e.g. BIOS,
+ *   initrd, HW tables)
+ * - Pages reserved or allocated early during boot (before the page allocator
+ *   was initialized). This includes (depending on the architecture) the
+ *   initial vmemmap, initial page tables, crashkernel, elfcorehdr, and much
+ *   much more. Once (if ever) freed, PG_reserved is cleared and they will
+ *   be given to the page allocator.
+ * - Pages falling into physical memory gaps - not IORESOURCE_SYSRAM. Trying
+ *   to read/write these pages might end badly. Don't touch!
+ * - The zero page(s)
+ * - Pages not added to the page allocator when onlining a section because
+ *   they were excluded via the online_page_callback() or because they are
+ *   PG_hwpoison.
+ * - Pages allocated in the context of kexec/kdump (loaded kernel image,
+ *   control pages, vmcoreinfo)
+ * - MMIO/DMA pages. Some architectures don't allow to ioremap pages that are
+ *   not marked PG_reserved (as they might be in use by somebody else who does
+ *   not respect the caching strategy).
+ * - Pages part of an offline section (struct pages of offline sections should
+ *   not be trusted as they will be initialized when first onlined).
+ * - MCA pages on ia64
+ * - Pages holding CPU notes for POWER Firmware Assisted Dump
+ * - Device memory (e.g. PMEM, DAX, HMM)
+ * Some PG_reserved pages will be excluded from the hibernation image.
+ * PG_reserved does in general not hinder anybody from dumping or swapping
+ * and is no longer required for remap_pfn_range(). ioremap might require it.
+ * Consequently, PG_reserved for a page mapped into user space can indicate
+ * the zero page, the vDSO, MMIO pages or device memory.
  *
  * The PG_private bitflag is set on pagecache pages if they contain filesystem
  * specific data (which is normally at page->private). It can be used by
-- 
cgit v1.2.3


From d7fefcc8de9147cc37d0c00df12e7ea4f77999b5 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:47:40 -0800
Subject: mm/cma: add PF flag to force non cma alloc

Patch series "mm/kvm/vfio/ppc64: Migrate compound pages out of CMA
region", v8.

ppc64 uses the CMA area for the allocation of guest page table (hash
page table).  We won't be able to start guest if we fail to allocate
hash page table.  We have observed hash table allocation failure because
we failed to migrate pages out of CMA region because they were pinned.
This happen when we are using VFIO.  VFIO on ppc64 pins the entire guest
RAM.  If the guest RAM pages get allocated out of CMA region, we won't
be able to migrate those pages.  The pages are also pinned for the
lifetime of the guest.

Currently we support migration of non-compound pages.  With THP and with
the addition of hugetlb migration we can end up allocating compound
pages from CMA region.  This patch series add support for migrating
compound pages.

This patch (of 4):

Add PF_MEMALLOC_NOCMA which make sure any allocation in that context is
marked non-movable and hence cannot be satisfied by CMA region.

This is useful with get_user_pages_longterm where we want to take a page
pin by migrating pages from CMA region.  Marking the section
PF_MEMALLOC_NOCMA ensures that we avoid unnecessary page migration
later.

Link: http://lkml.kernel.org/r/20190114095438.32470-2-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h    |  1 +
 include/linux/sched/mm.h | 48 ++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 41 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ebfb34fb9b30..36ec6e7e8291 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1407,6 +1407,7 @@ extern struct pid *cad_pid;
 #define PF_UMH			0x02000000	/* I'm an Usermodehelper process */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
+#define PF_MEMALLOC_NOCMA	0x10000000	/* All allocation request will have _GFP_MOVABLE cleared */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP		0x40000000	/* Freezer should not count it as freezable */
 #define PF_SUSPEND_TASK		0x80000000      /* This thread called freeze_processes() and should not be frozen */
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 3bfa6a0cbba4..0cd9f10423fb 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -148,17 +148,25 @@ static inline bool in_vfork(struct task_struct *tsk)
  * Applies per-task gfp context to the given allocation flags.
  * PF_MEMALLOC_NOIO implies GFP_NOIO
  * PF_MEMALLOC_NOFS implies GFP_NOFS
+ * PF_MEMALLOC_NOCMA implies no allocation from CMA region.
  */
 static inline gfp_t current_gfp_context(gfp_t flags)
 {
-	/*
-	 * NOIO implies both NOIO and NOFS and it is a weaker context
-	 * so always make sure it makes precedence
-	 */
-	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~(__GFP_IO | __GFP_FS);
-	else if (unlikely(current->flags & PF_MEMALLOC_NOFS))
-		flags &= ~__GFP_FS;
+	if (unlikely(current->flags &
+		     (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) {
+		/*
+		 * NOIO implies both NOIO and NOFS and it is a weaker context
+		 * so always make sure it makes precedence
+		 */
+		if (current->flags & PF_MEMALLOC_NOIO)
+			flags &= ~(__GFP_IO | __GFP_FS);
+		else if (current->flags & PF_MEMALLOC_NOFS)
+			flags &= ~__GFP_FS;
+#ifdef CONFIG_CMA
+		if (current->flags & PF_MEMALLOC_NOCMA)
+			flags &= ~__GFP_MOVABLE;
+#endif
+	}
 	return flags;
 }
 
@@ -248,6 +256,30 @@ static inline void memalloc_noreclaim_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC) | flags;
 }
 
+#ifdef CONFIG_CMA
+static inline unsigned int memalloc_nocma_save(void)
+{
+	unsigned int flags = current->flags & PF_MEMALLOC_NOCMA;
+
+	current->flags |= PF_MEMALLOC_NOCMA;
+	return flags;
+}
+
+static inline void memalloc_nocma_restore(unsigned int flags)
+{
+	current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags;
+}
+#else
+static inline unsigned int memalloc_nocma_save(void)
+{
+	return 0;
+}
+
+static inline void memalloc_nocma_restore(unsigned int flags)
+{
+}
+#endif
+
 #ifdef CONFIG_MEMCG
 /**
  * memalloc_use_memcg - Starts the remote memcg charging scope.
-- 
cgit v1.2.3


From 9a4e9f3b2d7393d50256762c21e7466b4b6b1c9c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 5 Mar 2019 15:47:44 -0800
Subject: mm: update get_user_pages_longterm to migrate pages allocated from
 CMA region

This patch updates get_user_pages_longterm to migrate pages allocated
out of CMA region.  This makes sure that we don't keep non-movable pages
(due to page reference count) in the CMA area.

This will be used by ppc64 in a later patch to avoid pinning pages in
the CMA region.  ppc64 uses CMA region for allocation of the hardware
page table (hash page table) and not able to migrate pages out of CMA
region results in page table allocation failures.

One case where we hit this easy is when a guest using a VFIO passthrough
device.  VFIO locks all the guest's memory and if the guest memory is
backed by CMA region, it becomes unmovable resulting in fragmenting the
CMA and possibly preventing other guests from allocation a large enough
hash page table.

NOTE: We allocate the new page without using __GFP_THISNODE

Link: http://lkml.kernel.org/r/20190114095438.32470-3-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   2 +
 include/linux/mm.h      |   3 +-
 mm/gup.c                | 200 ++++++++++++++++++++++++++++++++++++++++++------
 mm/hugetlb.c            |   4 +-
 4 files changed, 182 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 54c317c8355f..ea35263eb76b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -371,6 +371,8 @@ struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
 				nodemask_t *nmask);
 struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address);
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+				     int nid, nodemask_t *nmask);
 int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 			pgoff_t idx);
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..20ec56f8e2bb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1536,7 +1536,8 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages, int *locked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
-#ifdef CONFIG_FS_DAX
+
+#if defined(CONFIG_FS_DAX) || defined(CONFIG_CMA)
 long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
 			    unsigned int gup_flags, struct page **pages,
 			    struct vm_area_struct **vmas);
diff --git a/mm/gup.c b/mm/gup.c
index 75029649baca..22291db50013 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -13,6 +13,9 @@
 #include <linux/sched/signal.h>
 #include <linux/rwsem.h>
 #include <linux/hugetlb.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
+#include <linux/sched/mm.h>
 
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -1126,7 +1129,167 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages);
 
+#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA)
+
 #ifdef CONFIG_FS_DAX
+static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+	long i;
+	struct vm_area_struct *vma_prev = NULL;
+
+	for (i = 0; i < nr_pages; i++) {
+		struct vm_area_struct *vma = vmas[i];
+
+		if (vma == vma_prev)
+			continue;
+
+		vma_prev = vma;
+
+		if (vma_is_fsdax(vma))
+			return true;
+	}
+	return false;
+}
+#else
+static inline bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages)
+{
+	return false;
+}
+#endif
+
+#ifdef CONFIG_CMA
+static struct page *new_non_cma_page(struct page *page, unsigned long private)
+{
+	/*
+	 * We want to make sure we allocate the new page from the same node
+	 * as the source page.
+	 */
+	int nid = page_to_nid(page);
+	/*
+	 * Trying to allocate a page for migration. Ignore allocation
+	 * failure warnings. We don't force __GFP_THISNODE here because
+	 * this node here is the node where we have CMA reservation and
+	 * in some case these nodes will have really less non movable
+	 * allocation memory.
+	 */
+	gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+
+	if (PageHighMem(page))
+		gfp_mask |= __GFP_HIGHMEM;
+
+#ifdef CONFIG_HUGETLB_PAGE
+	if (PageHuge(page)) {
+		struct hstate *h = page_hstate(page);
+		/*
+		 * We don't want to dequeue from the pool because pool pages will
+		 * mostly be from the CMA region.
+		 */
+		return alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
+	}
+#endif
+	if (PageTransHuge(page)) {
+		struct page *thp;
+		/*
+		 * ignore allocation failure warnings
+		 */
+		gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
+
+		/*
+		 * Remove the movable mask so that we don't allocate from
+		 * CMA area again.
+		 */
+		thp_gfpmask &= ~__GFP_MOVABLE;
+		thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
+		if (!thp)
+			return NULL;
+		prep_transhuge_page(thp);
+		return thp;
+	}
+
+	return __alloc_pages_node(nid, gfp_mask, 0);
+}
+
+static long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+					unsigned int gup_flags,
+					struct page **pages,
+					struct vm_area_struct **vmas)
+{
+	long i;
+	bool drain_allow = true;
+	bool migrate_allow = true;
+	LIST_HEAD(cma_page_list);
+
+check_again:
+	for (i = 0; i < nr_pages; i++) {
+		/*
+		 * If we get a page from the CMA zone, since we are going to
+		 * be pinning these entries, we might as well move them out
+		 * of the CMA zone if possible.
+		 */
+		if (is_migrate_cma_page(pages[i])) {
+
+			struct page *head = compound_head(pages[i]);
+
+			if (PageHuge(head)) {
+				isolate_huge_page(head, &cma_page_list);
+			} else {
+				if (!PageLRU(head) && drain_allow) {
+					lru_add_drain_all();
+					drain_allow = false;
+				}
+
+				if (!isolate_lru_page(head)) {
+					list_add_tail(&head->lru, &cma_page_list);
+					mod_node_page_state(page_pgdat(head),
+							    NR_ISOLATED_ANON +
+							    page_is_file_cache(head),
+							    hpage_nr_pages(head));
+				}
+			}
+		}
+	}
+
+	if (!list_empty(&cma_page_list)) {
+		/*
+		 * drop the above get_user_pages reference.
+		 */
+		for (i = 0; i < nr_pages; i++)
+			put_page(pages[i]);
+
+		if (migrate_pages(&cma_page_list, new_non_cma_page,
+				  NULL, 0, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
+			/*
+			 * some of the pages failed migration. Do get_user_pages
+			 * without migration.
+			 */
+			migrate_allow = false;
+
+			if (!list_empty(&cma_page_list))
+				putback_movable_pages(&cma_page_list);
+		}
+		/*
+		 * We did migrate all the pages, Try to get the page references again
+		 * migrating any new CMA pages which we failed to isolate earlier.
+		 */
+		nr_pages = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+		if ((nr_pages > 0) && migrate_allow) {
+			drain_allow = true;
+			goto check_again;
+		}
+	}
+
+	return nr_pages;
+}
+#else
+static inline long check_and_migrate_cma_pages(unsigned long start, long nr_pages,
+					       unsigned int gup_flags,
+					       struct page **pages,
+					       struct vm_area_struct **vmas)
+{
+	return nr_pages;
+}
+#endif
+
 /*
  * This is the same as get_user_pages() in that it assumes we are
  * operating on the current task's mm, but it goes further to validate
@@ -1140,11 +1303,11 @@ EXPORT_SYMBOL(get_user_pages);
  * Contrast this to iov_iter_get_pages() usages which are transient.
  */
 long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
-		unsigned int gup_flags, struct page **pages,
-		struct vm_area_struct **vmas_arg)
+			     unsigned int gup_flags, struct page **pages,
+			     struct vm_area_struct **vmas_arg)
 {
 	struct vm_area_struct **vmas = vmas_arg;
-	struct vm_area_struct *vma_prev = NULL;
+	unsigned long flags;
 	long rc, i;
 
 	if (!pages)
@@ -1157,31 +1320,20 @@ long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
 			return -ENOMEM;
 	}
 
+	flags = memalloc_nocma_save();
 	rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+	memalloc_nocma_restore(flags);
+	if (rc < 0)
+		goto out;
 
-	for (i = 0; i < rc; i++) {
-		struct vm_area_struct *vma = vmas[i];
-
-		if (vma == vma_prev)
-			continue;
-
-		vma_prev = vma;
-
-		if (vma_is_fsdax(vma))
-			break;
-	}
-
-	/*
-	 * Either get_user_pages() failed, or the vma validation
-	 * succeeded, in either case we don't need to put_page() before
-	 * returning.
-	 */
-	if (i >= rc)
+	if (check_dax_vmas(vmas, rc)) {
+		for (i = 0; i < rc; i++)
+			put_page(pages[i]);
+		rc = -EOPNOTSUPP;
 		goto out;
+	}
 
-	for (i = 0; i < rc; i++)
-		put_page(pages[i]);
-	rc = -EOPNOTSUPP;
+	rc = check_and_migrate_cma_pages(start, rc, gup_flags, pages, vmas);
 out:
 	if (vmas != vmas_arg)
 		kfree(vmas);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0c7848fccf93..97b1e0290c66 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1587,8 +1587,8 @@ out_unlock:
 	return page;
 }
 
-static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
-		int nid, nodemask_t *nmask)
+struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+				     int nid, nodemask_t *nmask)
 {
 	struct page *page;
 
-- 
cgit v1.2.3


From ab3948f58ff841e51feb845720624665ef5b7ef3 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 5 Mar 2019 15:47:54 -0800
Subject: mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd

Android uses ashmem for sharing memory regions.  We are looking forward
to migrating all usecases of ashmem to memfd so that we can possibly
remove the ashmem driver in the future from staging while also
benefiting from using memfd and contributing to it.  Note staging
drivers are also not ABI and generally can be removed at anytime.

One of the main usecases Android has is the ability to create a region
and mmap it as writeable, then add protection against making any
"future" writes while keeping the existing already mmap'ed
writeable-region active.  This allows us to implement a usecase where
receivers of the shared memory buffer can get a read-only view, while
the sender continues to write to the buffer.  See CursorWindow
documentation in Android for more details:

  https://developer.android.com/reference/android/database/CursorWindow

This usecase cannot be implemented with the existing F_SEAL_WRITE seal.
To support the usecase, this patch adds a new F_SEAL_FUTURE_WRITE seal
which prevents any future mmap and write syscalls from succeeding while
keeping the existing mmap active.

A better way to do F_SEAL_FUTURE_WRITE seal was discussed [1] last week
where we don't need to modify core VFS structures to get the same
behavior of the seal.  This solves several side-effects pointed by Andy.
self-tests are provided in later patch to verify the expected semantics.

[1] https://lore.kernel.org/lkml/20181111173650.GA256781@google.com/

Thanks a lot to Andy for suggestions to improve code.

Link: http://lkml.kernel.org/r/20190112203816.85534-2-joel@joelfernandes.org
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Marc-Andr Lureau <marcandre.lureau@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c       |  2 +-
 include/uapi/linux/fcntl.h |  1 +
 mm/memfd.c                 |  3 ++-
 mm/shmem.c                 | 25 ++++++++++++++++++++++---
 4 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a7fa037b876b..b0eef008de67 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 		inode_lock(inode);
 
 		/* protected by i_mutex */
-		if (info->seals & F_SEAL_WRITE) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 			inode_unlock(inode);
 			return -EPERM;
 		}
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 6448cdd9a350..a2f8658f1c55 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -41,6 +41,7 @@
 #define F_SEAL_SHRINK	0x0002	/* prevent file from shrinking */
 #define F_SEAL_GROW	0x0004	/* prevent file from growing */
 #define F_SEAL_WRITE	0x0008	/* prevent writes */
+#define F_SEAL_FUTURE_WRITE	0x0010  /* prevent future writes while mapped */
 /* (1U << 31) is reserved for signed error codes */
 
 /*
diff --git a/mm/memfd.c b/mm/memfd.c
index 97264c79d2cd..650e65a46b9c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -131,7 +131,8 @@ static unsigned int *memfd_file_seals_ptr(struct file *file)
 #define F_ALL_SEALS (F_SEAL_SEAL | \
 		     F_SEAL_SHRINK | \
 		     F_SEAL_GROW | \
-		     F_SEAL_WRITE)
+		     F_SEAL_WRITE | \
+		     F_SEAL_FUTURE_WRITE)
 
 static int memfd_add_seals(struct file *file, unsigned int seals)
 {
diff --git a/mm/shmem.c b/mm/shmem.c
index 283a1833dafc..b3db3779a30a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2190,6 +2190,24 @@ out_nomem:
 
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+
+	if (info->seals & F_SEAL_FUTURE_WRITE) {
+		/*
+		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+		 * "future write" seal active.
+		 */
+		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+			return -EPERM;
+
+		/*
+		 * Since the F_SEAL_FUTURE_WRITE seals allow for a MAP_SHARED
+		 * read-only mapping, take care to not allow mprotect to revert
+		 * protections.
+		 */
+		vma->vm_flags &= ~(VM_MAYWRITE);
+	}
+
 	file_accessed(file);
 	vma->vm_ops = &shmem_vm_ops;
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE) &&
@@ -2440,8 +2458,9 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 	pgoff_t index = pos >> PAGE_SHIFT;
 
 	/* i_mutex is held by caller */
-	if (unlikely(info->seals & (F_SEAL_WRITE | F_SEAL_GROW))) {
-		if (info->seals & F_SEAL_WRITE)
+	if (unlikely(info->seals & (F_SEAL_GROW |
+				   F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
 			return -EPERM;
 		if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
 			return -EPERM;
@@ -2704,7 +2723,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
 
 		/* protected by i_mutex */
-		if (info->seals & F_SEAL_WRITE) {
+		if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
 			error = -EPERM;
 			goto out;
 		}
-- 
cgit v1.2.3


From 59118c42a60b997d277ad04d2309a6ec30682e5e Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 5 Mar 2019 15:48:02 -0800
Subject: mm: swap: use mem_cgroup_is_root() instead of deferencing css->parent

mem_cgroup_is_root() is the preferred API to check if memcg is root or
not.  Use it instead of deferencing css->parent.

Link: http://lkml.kernel.org/r/1547232913-118148-1-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Tim Chen <tim.c.chen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 622025ac1461..649529be91f2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -625,7 +625,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 		return vm_swappiness;
 
 	/* root ? */
-	if (mem_cgroup_disabled() || !memcg->css.parent)
+	if (mem_cgroup_disabled() || mem_cgroup_is_root(memcg))
 		return vm_swappiness;
 
 	return memcg->swappiness;
-- 
cgit v1.2.3


From b9726c26dc21b15a2faea96fae3a42f2f7fffdcb Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 5 Mar 2019 15:48:26 -0800
Subject: numa: make "nr_node_ids" unsigned int

Number of NUMA nodes can't be negative.

This saves a few bytes on x86_64:

	add/remove: 0/0 grow/shrink: 4/21 up/down: 27/-265 (-238)
	Function                                     old     new   delta
	hv_synic_alloc.cold                           88     110     +22
	prealloc_shrinker                            260     262      +2
	bootstrap                                    249     251      +2
	sched_init_numa                             1566    1567      +1
	show_slab_objects                            778     777      -1
	s_show                                      1201    1200      -1
	kmem_cache_init                              346     345      -1
	__alloc_workqueue_key                       1146    1145      -1
	mem_cgroup_css_alloc                        1614    1612      -2
	__do_sys_swapon                             4702    4699      -3
	__list_lru_init                              655     651      -4
	nic_probe                                   2379    2374      -5
	store_user_store                             118     111      -7
	red_zone_store                               106      99      -7
	poison_store                                 106      99      -7
	wq_numa_init                                 348     338     -10
	__kmem_cache_empty                            75      65     -10
	task_numa_free                               186     173     -13
	merge_across_nodes_store                     351     336     -15
	irq_create_affinity_masks                   1261    1246     -15
	do_numa_crng_init                            343     321     -22
	task_numa_fault                             4760    4737     -23
	swapfile_init                                179     156     -23
	hv_synic_alloc                               536     492     -44
	apply_wqattrs_prepare                        746     695     -51

Link: http://lkml.kernel.org/r/20190201223029.GA15820@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/numa.c           | 2 +-
 arch/powerpc/mm/numa.c         | 2 +-
 arch/x86/kernel/setup_percpu.c | 2 +-
 arch/x86/mm/numa.c             | 4 ++--
 include/linux/nodemask.h       | 4 ++--
 mm/list_lru.c                  | 3 +--
 mm/memcontrol.c                | 2 +-
 mm/page_alloc.c                | 2 +-
 mm/slab.c                      | 3 +--
 mm/slub.c                      | 2 +-
 mm/swapfile.c                  | 2 +-
 mm/vmscan.c                    | 2 +-
 12 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index ae34e3a1cef1..7a0a555b366a 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -120,7 +120,7 @@ static void __init setup_node_to_cpumask_map(void)
 	}
 
 	/* cpumask_of_node() will now work */
-	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
+	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
 }
 
 /*
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 270cefb75cca..df1e11ebbabb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -84,7 +84,7 @@ static void __init setup_node_to_cpumask_map(void)
 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 
 	/* cpumask_of_node() will now work */
-	dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
+	dbg("Node to cpumask map for %u nodes\n", nr_node_ids);
 }
 
 static int __init fake_numa_create_new_node(unsigned long end_pfn,
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e8796fcd7e5a..13af08827eef 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -171,7 +171,7 @@ void __init setup_per_cpu_areas(void)
 	unsigned long delta;
 	int rc;
 
-	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%d\n",
+	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%u nr_node_ids:%u\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
 
 	/*
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1308f5408bf7..12c1b7a83ed7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -123,7 +123,7 @@ void __init setup_node_to_cpumask_map(void)
 		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
 
 	/* cpumask_of_node() will now work */
-	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
+	pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
 }
 
 static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
@@ -866,7 +866,7 @@ const struct cpumask *cpumask_of_node(int node)
 {
 	if (node >= nr_node_ids) {
 		printk(KERN_WARNING
-			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+			"cpumask_of_node(%d): node > nr_node_ids(%u)\n",
 			node, nr_node_ids);
 		dump_stack();
 		return cpu_none_mask;
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 5a30ad594ccc..962c5e783d50 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -444,7 +444,7 @@ static inline int next_memory_node(int nid)
 	return next_node(nid, node_states[N_MEMORY]);
 }
 
-extern int nr_node_ids;
+extern unsigned int nr_node_ids;
 extern int nr_online_nodes;
 
 static inline void node_set_online(int nid)
@@ -485,7 +485,7 @@ static inline int num_node_state(enum node_states state)
 #define first_online_node	0
 #define first_memory_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
-#define nr_node_ids		1
+#define nr_node_ids		1U
 #define nr_online_nodes		1
 
 #define node_set_online(node)	   node_set_state((node), N_ONLINE)
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 5b30625fd365..0730bf8ff39f 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -601,7 +601,6 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 		    struct lock_class_key *key, struct shrinker *shrinker)
 {
 	int i;
-	size_t size = sizeof(*lru->node) * nr_node_ids;
 	int err = -ENOMEM;
 
 #ifdef CONFIG_MEMCG_KMEM
@@ -612,7 +611,7 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
 #endif
 	memcg_get_cache_ids();
 
-	lru->node = kzalloc(size, GFP_KERNEL);
+	lru->node = kcalloc(nr_node_ids, sizeof(*lru->node), GFP_KERNEL);
 	if (!lru->node)
 		goto out;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 30bda8d7fb5c..45cd1f84268a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4429,7 +4429,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
 static struct mem_cgroup *mem_cgroup_alloc(void)
 {
 	struct mem_cgroup *memcg;
-	size_t size;
+	unsigned int size;
 	int node;
 
 	size = sizeof(struct mem_cgroup);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 11a5f50efd97..8df43caf2eb7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -289,7 +289,7 @@ EXPORT_SYMBOL(movable_zone);
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 #if MAX_NUMNODES > 1
-int nr_node_ids __read_mostly = MAX_NUMNODES;
+unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
 int nr_online_nodes __read_mostly = 1;
 EXPORT_SYMBOL(nr_node_ids);
 EXPORT_SYMBOL(nr_online_nodes);
diff --git a/mm/slab.c b/mm/slab.c
index 757e646baa5d..7510a1b489df 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -677,12 +677,11 @@ static struct alien_cache *__alloc_alien_cache(int node, int entries,
 static struct alien_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
 {
 	struct alien_cache **alc_ptr;
-	size_t memsize = sizeof(void *) * nr_node_ids;
 	int i;
 
 	if (limit > 1)
 		limit = 12;
-	alc_ptr = kzalloc_node(memsize, gfp, node);
+	alc_ptr = kcalloc_node(nr_node_ids, sizeof(void *), gfp, node);
 	if (!alc_ptr)
 		return NULL;
 
diff --git a/mm/slub.c b/mm/slub.c
index 017a2ce5ba23..1b08fbcb7e61 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4262,7 +4262,7 @@ void __init kmem_cache_init(void)
 	cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
 				  slub_cpu_dead);
 
-	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%d\n",
+	pr_info("SLUB: HWalign=%d, Order=%u-%u, MinObjects=%u, CPUs=%u, Nodes=%u\n",
 		cache_line_size(),
 		slub_min_order, slub_max_order, slub_min_objects,
 		nr_cpu_ids, nr_node_ids);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 57e9b1b31d55..a14257ac0476 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2713,7 +2713,7 @@ static struct swap_info_struct *alloc_swap_info(void)
 	struct swap_info_struct *p;
 	unsigned int type;
 	int i;
-	int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node);
+	unsigned int size = sizeof(*p) + nr_node_ids * sizeof(struct plist_node);
 
 	p = kvzalloc(size, GFP_KERNEL);
 	if (!p)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 209c2c78a087..e1f7ccdc0a90 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -374,7 +374,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone
  */
 int prealloc_shrinker(struct shrinker *shrinker)
 {
-	size_t size = sizeof(*shrinker->nr_deferred);
+	unsigned int size = sizeof(*shrinker->nr_deferred);
 
 	if (shrinker->flags & SHRINKER_NUMA_AWARE)
 		size *= nr_node_ids;
-- 
cgit v1.2.3


From ce0725f78a56a59bdb07cef003bc6fef722da38e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 5 Mar 2019 15:48:29 -0800
Subject: numa: make "nr_online_nodes" unsigned int

Number of online NUMA nodes can't be negative as well.  This doesn't
save space as the variable is used only in 32-bit context, but do it
anyway for consistency.

Link: http://lkml.kernel.org/r/20190201223151.GB15820@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 4 ++--
 mm/page_alloc.c          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 962c5e783d50..27e7fa36f707 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -445,7 +445,7 @@ static inline int next_memory_node(int nid)
 }
 
 extern unsigned int nr_node_ids;
-extern int nr_online_nodes;
+extern unsigned int nr_online_nodes;
 
 static inline void node_set_online(int nid)
 {
@@ -486,7 +486,7 @@ static inline int num_node_state(enum node_states state)
 #define first_memory_node	0
 #define next_online_node(nid)	(MAX_NUMNODES)
 #define nr_node_ids		1U
-#define nr_online_nodes		1
+#define nr_online_nodes		1U
 
 #define node_set_online(node)	   node_set_state((node), N_ONLINE)
 #define node_set_offline(node)	   node_clear_state((node), N_ONLINE)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8df43caf2eb7..c29828ec9183 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -290,7 +290,7 @@ EXPORT_SYMBOL(movable_zone);
 
 #if MAX_NUMNODES > 1
 unsigned int nr_node_ids __read_mostly = MAX_NUMNODES;
-int nr_online_nodes __read_mostly = 1;
+unsigned int nr_online_nodes __read_mostly = 1;
 EXPORT_SYMBOL(nr_node_ids);
 EXPORT_SYMBOL(nr_online_nodes);
 #endif
@@ -5664,7 +5664,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
 	else
 		page_group_by_mobility_disabled = 0;
 
-	pr_info("Built %i zonelists, mobility grouping %s.  Total pages: %ld\n",
+	pr_info("Built %u zonelists, mobility grouping %s.  Total pages: %ld\n",
 		nr_online_nodes,
 		page_group_by_mobility_disabled ? "off" : "on",
 		vm_total_pages);
-- 
cgit v1.2.3


From 6d2bef9df7ccf3a2db0160be24f8b92a3f24708a Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 5 Mar 2019 15:48:33 -0800
Subject: mm/page_poison: update comment after code moved

mm/debug-pagealloc.c is no more, so of course header now needs to be
updated.  This seems like something checkpatch should be able to catch -
worth looking into?

Link: http://lkml.kernel.org/r/20190207191113.14039-1-mst@redhat.com
Fixes: 8823b1dbc05f ("mm/page_poison.c: enable PAGE_POISONING as a separate option")
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poison.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 15927ebc22f2..5046bad0c1c5 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -30,7 +30,7 @@
  */
 #define TIMER_ENTRY_STATIC	((void *) 0x300 + POISON_POINTER_DELTA)
 
-/********** mm/debug-pagealloc.c **********/
+/********** mm/page_poison.c **********/
 #ifdef CONFIG_PAGE_POISONING_ZERO
 #define PAGE_POISON 0x00
 #else
-- 
cgit v1.2.3


From 494eec70f054965e2e699db450cde2c08db1c008 Mon Sep 17 00:00:00 2001
From: "john.hubbard@gmail.com" <john.hubbard@gmail.com>
Date: Tue, 5 Mar 2019 15:48:49 -0800
Subject: mm: page_cache_add_speculative(): refactor out some code duplication

From: John Hubbard <jhubbard@nvidia.com>

This combines the common elements of these routines:

    page_cache_get_speculative()
    page_cache_add_speculative()

This was anticipated by the original author, as shown by the comment in
commit ce0ad7f095258 ("powerpc/mm: Lockless get_user_pages_fast() for
64-bit (v3)"):

    "Same as above, but add instead of inc (could just be merged)"

There is no intention to introduce any behavioral change, but there is a
small risk of that, due to slightly differing ways of expressing the
TINY_RCU and related configurations.

This also removes the VM_BUG_ON(in_interrupt()) that was in
page_cache_add_speculative(), but not in page_cache_get_speculative().
This provides slightly less detection of such bugs, but it given that it
was only there on the "add" path anyway, we can likely do without it
just fine.

And it removes the
VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);
that page_cache_add_speculative() had.

Link: http://lkml.kernel.org/r/20190206231016.22734-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e2d7039af6a3..b477a70cc2e4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -164,7 +164,7 @@ void release_pages(struct page **pages, int nr);
  * will find the page or it will not. Likewise, the old find_get_page could run
  * either before the insertion or afterwards, depending on timing.
  */
-static inline int page_cache_get_speculative(struct page *page)
+static inline int __page_cache_add_speculative(struct page *page, int count)
 {
 #ifdef CONFIG_TINY_RCU
 # ifdef CONFIG_PREEMPT_COUNT
@@ -180,10 +180,10 @@ static inline int page_cache_get_speculative(struct page *page)
 	 * SMP requires.
 	 */
 	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	page_ref_inc(page);
+	page_ref_add(page, count);
 
 #else
-	if (unlikely(!get_page_unless_zero(page))) {
+	if (unlikely(!page_ref_add_unless(page, count, 0))) {
 		/*
 		 * Either the page has been freed, or will be freed.
 		 * In either case, retry here and the caller should
@@ -197,27 +197,14 @@ static inline int page_cache_get_speculative(struct page *page)
 	return 1;
 }
 
-/*
- * Same as above, but add instead of inc (could just be merged)
- */
-static inline int page_cache_add_speculative(struct page *page, int count)
+static inline int page_cache_get_speculative(struct page *page)
 {
-	VM_BUG_ON(in_interrupt());
-
-#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
-# ifdef CONFIG_PREEMPT_COUNT
-	VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
-	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	page_ref_add(page, count);
-
-#else
-	if (unlikely(!page_ref_add_unless(page, count, 0)))
-		return 0;
-#endif
-	VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page);
+	return __page_cache_add_speculative(page, 1);
+}
 
-	return 1;
+static inline int page_cache_add_speculative(struct page *page, int count)
+{
+	return __page_cache_add_speculative(page, count);
 }
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From ace451eb5ec5bb432fc28d8a723838b88e28643e Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 5 Mar 2019 15:48:56 -0800
Subject: include/linux/compaction.h: fix potential build error

Declaration of struct node is required regardless.  On UMA systems,
including compaction.h without preceding node.h shouldn't cause a build
error.

Link: http://lkml.kernel.org/r/20190208080437.253322-1-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index c960923d9ec2..9569e7c786d3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -226,8 +226,8 @@ static inline void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_i
 
 #endif /* CONFIG_COMPACTION */
 
-#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
 struct node;
+#if defined(CONFIG_COMPACTION) && defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
 extern int compaction_register_node(struct node *node);
 extern void compaction_unregister_node(struct node *node);
 
-- 
cgit v1.2.3


From a7ca12f9d905e7437dd3beb9cbb8e85bc2b991f4 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Tue, 5 Mar 2019 15:49:35 -0800
Subject: mm/workingset: remove unused @mapping argument in
 workingset_eviction()

workingset_eviction() doesn't use and never did use the @mapping
argument.  Remove it.

Link: http://lkml.kernel.org/r/20190228083329.31892-1-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@surriel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 2 +-
 mm/vmscan.c          | 2 +-
 mm/workingset.c      | 5 ++---
 3 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 649529be91f2..fc50e21b3b88 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -307,7 +307,7 @@ struct vma_swap_readahead {
 };
 
 /* linux/mm/workingset.c */
-void *workingset_eviction(struct address_space *mapping, struct page *page);
+void *workingset_eviction(struct page *page);
 void workingset_refault(struct page *page, void *shadow);
 void workingset_activation(struct page *page);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e1f7ccdc0a90..dda6b80d045f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -952,7 +952,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 		 */
 		if (reclaimed && page_is_file_cache(page) &&
 		    !mapping_exiting(mapping) && !dax_mapping(mapping))
-			shadow = workingset_eviction(mapping, page);
+			shadow = workingset_eviction(page);
 		__delete_from_page_cache(page, shadow);
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 
diff --git a/mm/workingset.c b/mm/workingset.c
index dcb994f2acc2..0bedf67502d5 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -215,13 +215,12 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 
 /**
  * workingset_eviction - note the eviction of a page from memory
- * @mapping: address space the page was backing
  * @page: the page being evicted
  *
- * Returns a shadow entry to be stored in @mapping->i_pages in place
+ * Returns a shadow entry to be stored in @page->mapping->i_pages in place
  * of the evicted @page so that a later refault can be detected.
  */
-void *workingset_eviction(struct address_space *mapping, struct page *page)
+void *workingset_eviction(struct page *page)
 {
 	struct pglist_data *pgdat = page_pgdat(page);
 	struct mem_cgroup *memcg = page_memcg(page);
-- 
cgit v1.2.3


From f4b7e272b5c0425915e2115068e0a5a20a3a628e Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Tue, 5 Mar 2019 15:49:39 -0800
Subject: mm: remove zone_lru_lock() function, access ->lru_lock directly

We have common pattern to access lru_lock from a page pointer:
	zone_lru_lock(page_zone(page))

Which is silly, because it unfolds to this:
	&NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]->zone_pgdat->lru_lock
while we can simply do
	&NODE_DATA(page_to_nid(page))->lru_lock

Remove zone_lru_lock() function, since it's only complicate things.  Use
'page_pgdat(page)->lru_lock' pattern instead.

[aryabinin@virtuozzo.com: a slightly better version of __split_huge_page()]
  Link: http://lkml.kernel.org/r/20190301121651.7741-1-aryabinin@virtuozzo.com
Link: http://lkml.kernel.org/r/20190228083329.31892-2-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroup-v1/memcg_test.txt |  4 ++--
 Documentation/cgroup-v1/memory.txt     |  4 ++--
 include/linux/mm_types.h               |  2 +-
 include/linux/mmzone.h                 |  4 ----
 mm/compaction.c                        | 15 ++++++++-------
 mm/filemap.c                           |  4 ++--
 mm/huge_memory.c                       | 10 +++++-----
 mm/memcontrol.c                        | 14 +++++++-------
 mm/mlock.c                             | 14 +++++++-------
 mm/page_idle.c                         |  8 ++++----
 mm/rmap.c                              |  2 +-
 mm/swap.c                              | 16 ++++++++--------
 mm/vmscan.c                            | 16 ++++++++--------
 13 files changed, 55 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroup-v1/memcg_test.txt b/Documentation/cgroup-v1/memcg_test.txt
index 5c7f310f32bb..621e29ffb358 100644
--- a/Documentation/cgroup-v1/memcg_test.txt
+++ b/Documentation/cgroup-v1/memcg_test.txt
@@ -107,9 +107,9 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
 
 8. LRU
         Each memcg has its own private LRU. Now, its handling is under global
-	VM's control (means that it's handled under global zone_lru_lock).
+	VM's control (means that it's handled under global pgdat->lru_lock).
 	Almost all routines around memcg's LRU is called by global LRU's
-	list management functions under zone_lru_lock().
+	list management functions under pgdat->lru_lock.
 
 	A special function is mem_cgroup_isolate_pages(). This scans
 	memcg's private LRU and call __isolate_lru_page() to extract a page
diff --git a/Documentation/cgroup-v1/memory.txt b/Documentation/cgroup-v1/memory.txt
index 3682e99234c2..a347fc9293e5 100644
--- a/Documentation/cgroup-v1/memory.txt
+++ b/Documentation/cgroup-v1/memory.txt
@@ -267,11 +267,11 @@ When oom event notifier is registered, event will be delivered.
    Other lock order is following:
    PG_locked.
    mm->page_table_lock
-       zone_lru_lock
+       pgdat->lru_lock
 	  lock_page_cgroup.
   In many cases, just lock_page_cgroup() is called.
   per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
-  zone_lru_lock, it has no lock of its own.
+  pgdat->lru_lock, it has no lock of its own.
 
 2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0a36a22228e7..ab9b48420200 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -80,7 +80,7 @@ struct page {
 		struct {	/* Page cache and anonymous pages */
 			/**
 			 * @lru: Pageout list, eg. active_list protected by
-			 * zone_lru_lock.  Sometimes used as a generic list
+			 * pgdat->lru_lock.  Sometimes used as a generic list
 			 * by the page owner.
 			 */
 			struct list_head lru;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6d3290cd1f6f..fba7741533be 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -730,10 +730,6 @@ typedef struct pglist_data {
 
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
-static inline spinlock_t *zone_lru_lock(struct zone *zone)
-{
-	return &zone->zone_pgdat->lru_lock;
-}
 
 static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
 {
diff --git a/mm/compaction.c b/mm/compaction.c
index 1cc871da3fda..e054276cf397 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -775,6 +775,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			unsigned long end_pfn, isolate_mode_t isolate_mode)
 {
 	struct zone *zone = cc->zone;
+	pg_data_t *pgdat = zone->zone_pgdat;
 	unsigned long nr_scanned = 0, nr_isolated = 0;
 	struct lruvec *lruvec;
 	unsigned long flags = 0;
@@ -839,8 +840,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 * if contended.
 		 */
 		if (!(low_pfn % SWAP_CLUSTER_MAX)
-		    && compact_unlock_should_abort(zone_lru_lock(zone), flags,
-								&locked, cc))
+		    && compact_unlock_should_abort(&pgdat->lru_lock,
+					    flags, &locked, cc))
 			break;
 
 		if (!pfn_valid_within(low_pfn))
@@ -910,7 +911,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			if (unlikely(__PageMovable(page)) &&
 					!PageIsolated(page)) {
 				if (locked) {
-					spin_unlock_irqrestore(zone_lru_lock(zone),
+					spin_unlock_irqrestore(&pgdat->lru_lock,
 									flags);
 					locked = false;
 				}
@@ -940,7 +941,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
 		/* If we already hold the lock, we can skip some rechecking */
 		if (!locked) {
-			locked = compact_lock_irqsave(zone_lru_lock(zone),
+			locked = compact_lock_irqsave(&pgdat->lru_lock,
 								&flags, cc);
 
 			/* Try get exclusive access under lock */
@@ -965,7 +966,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			}
 		}
 
-		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 
 		/* Try isolate the page */
 		if (__isolate_lru_page(page, isolate_mode) != 0)
@@ -1007,7 +1008,7 @@ isolate_fail:
 		 */
 		if (nr_isolated) {
 			if (locked) {
-				spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+				spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 				locked = false;
 			}
 			putback_movable_pages(&cc->migratepages);
@@ -1034,7 +1035,7 @@ isolate_fail:
 
 isolate_abort:
 	if (locked)
-		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 
 	/*
 	 * Updated the cached scanner pfn once the pageblock has been scanned
diff --git a/mm/filemap.c b/mm/filemap.c
index a41e01c472f3..a3b4021c448f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -98,8 +98,8 @@
  *    ->swap_lock		(try_to_unmap_one)
  *    ->private_lock		(try_to_unmap_one)
  *    ->i_pages lock		(try_to_unmap_one)
- *    ->zone_lru_lock(zone)	(follow_page->mark_page_accessed)
- *    ->zone_lru_lock(zone)	(check_pte_range->isolate_lru_page)
+ *    ->pgdat->lru_lock		(follow_page->mark_page_accessed)
+ *    ->pgdat->lru_lock		(check_pte_range->isolate_lru_page)
  *    ->private_lock		(page_remove_rmap->set_page_dirty)
  *    ->i_pages lock		(page_remove_rmap->set_page_dirty)
  *    bdi.wb->list_lock		(page_remove_rmap->set_page_dirty)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d4847026d4b1..fcf657886b4b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2440,11 +2440,11 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 		pgoff_t end, unsigned long flags)
 {
 	struct page *head = compound_head(page);
-	struct zone *zone = page_zone(head);
+	pg_data_t *pgdat = page_pgdat(head);
 	struct lruvec *lruvec;
 	int i;
 
-	lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
+	lruvec = mem_cgroup_page_lruvec(head, pgdat);
 
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
@@ -2475,7 +2475,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 		xa_unlock(&head->mapping->i_pages);
 	}
 
-	spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+	spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 
 	remap_page(head);
 
@@ -2686,7 +2686,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		lru_add_drain();
 
 	/* prevent PageLRU to go away from under us, and freeze lru stats */
-	spin_lock_irqsave(zone_lru_lock(page_zone(head)), flags);
+	spin_lock_irqsave(&pgdata->lru_lock, flags);
 
 	if (mapping) {
 		XA_STATE(xas, &mapping->i_pages, page_index(head));
@@ -2731,7 +2731,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 		spin_unlock(&pgdata->split_queue_lock);
 fail:		if (mapping)
 			xa_unlock(&mapping->i_pages);
-		spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
+		spin_unlock_irqrestore(&pgdata->lru_lock, flags);
 		remap_page(head);
 		ret = -EBUSY;
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 45cd1f84268a..7160cfab8107 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2362,13 +2362,13 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 
 static void lock_page_lru(struct page *page, int *isolated)
 {
-	struct zone *zone = page_zone(page);
+	pg_data_t *pgdat = page_pgdat(page);
 
-	spin_lock_irq(zone_lru_lock(zone));
+	spin_lock_irq(&pgdat->lru_lock);
 	if (PageLRU(page)) {
 		struct lruvec *lruvec;
 
-		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		ClearPageLRU(page);
 		del_page_from_lru_list(page, lruvec, page_lru(page));
 		*isolated = 1;
@@ -2378,17 +2378,17 @@ static void lock_page_lru(struct page *page, int *isolated)
 
 static void unlock_page_lru(struct page *page, int isolated)
 {
-	struct zone *zone = page_zone(page);
+	pg_data_t *pgdat = page_pgdat(page);
 
 	if (isolated) {
 		struct lruvec *lruvec;
 
-		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		VM_BUG_ON_PAGE(PageLRU(page), page);
 		SetPageLRU(page);
 		add_page_to_lru_list(page, lruvec, page_lru(page));
 	}
-	spin_unlock_irq(zone_lru_lock(zone));
+	spin_unlock_irq(&pgdat->lru_lock);
 }
 
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,
@@ -2674,7 +2674,7 @@ void __memcg_kmem_uncharge(struct page *page, int order)
 
 /*
  * Because tail pages are not marked as "used", set it. We're under
- * zone_lru_lock and migration entries setup in all page mappings.
+ * pgdat->lru_lock and migration entries setup in all page mappings.
  */
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
diff --git a/mm/mlock.c b/mm/mlock.c
index 41cc47e28ad6..080f3b36415b 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -182,7 +182,7 @@ static void __munlock_isolation_failed(struct page *page)
 unsigned int munlock_vma_page(struct page *page)
 {
 	int nr_pages;
-	struct zone *zone = page_zone(page);
+	pg_data_t *pgdat = page_pgdat(page);
 
 	/* For try_to_munlock() and to serialize with page migration */
 	BUG_ON(!PageLocked(page));
@@ -194,7 +194,7 @@ unsigned int munlock_vma_page(struct page *page)
 	 * might otherwise copy PageMlocked to part of the tail pages before
 	 * we clear it in the head page. It also stabilizes hpage_nr_pages().
 	 */
-	spin_lock_irq(zone_lru_lock(zone));
+	spin_lock_irq(&pgdat->lru_lock);
 
 	if (!TestClearPageMlocked(page)) {
 		/* Potentially, PTE-mapped THP: do not skip the rest PTEs */
@@ -203,17 +203,17 @@ unsigned int munlock_vma_page(struct page *page)
 	}
 
 	nr_pages = hpage_nr_pages(page);
-	__mod_zone_page_state(zone, NR_MLOCK, -nr_pages);
+	__mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 
 	if (__munlock_isolate_lru_page(page, true)) {
-		spin_unlock_irq(zone_lru_lock(zone));
+		spin_unlock_irq(&pgdat->lru_lock);
 		__munlock_isolated_page(page);
 		goto out;
 	}
 	__munlock_isolation_failed(page);
 
 unlock_out:
-	spin_unlock_irq(zone_lru_lock(zone));
+	spin_unlock_irq(&pgdat->lru_lock);
 
 out:
 	return nr_pages - 1;
@@ -298,7 +298,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 	pagevec_init(&pvec_putback);
 
 	/* Phase 1: page isolation */
-	spin_lock_irq(zone_lru_lock(zone));
+	spin_lock_irq(&zone->zone_pgdat->lru_lock);
 	for (i = 0; i < nr; i++) {
 		struct page *page = pvec->pages[i];
 
@@ -325,7 +325,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 		pvec->pages[i] = NULL;
 	}
 	__mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
-	spin_unlock_irq(zone_lru_lock(zone));
+	spin_unlock_irq(&zone->zone_pgdat->lru_lock);
 
 	/* Now we can release pins of pages that we are not munlocking */
 	pagevec_release(&pvec_putback);
diff --git a/mm/page_idle.c b/mm/page_idle.c
index b9e4b42b33ab..0b39ec0c945c 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -31,7 +31,7 @@
 static struct page *page_idle_get_page(unsigned long pfn)
 {
 	struct page *page;
-	struct zone *zone;
+	pg_data_t *pgdat;
 
 	if (!pfn_valid(pfn))
 		return NULL;
@@ -41,13 +41,13 @@ static struct page *page_idle_get_page(unsigned long pfn)
 	    !get_page_unless_zero(page))
 		return NULL;
 
-	zone = page_zone(page);
-	spin_lock_irq(zone_lru_lock(zone));
+	pgdat = page_pgdat(page);
+	spin_lock_irq(&pgdat->lru_lock);
 	if (unlikely(!PageLRU(page))) {
 		put_page(page);
 		page = NULL;
 	}
-	spin_unlock_irq(zone_lru_lock(zone));
+	spin_unlock_irq(&pgdat->lru_lock);
 	return page;
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 0454ecc29537..b30c7c71d1d9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -27,7 +27,7 @@
  *         mapping->i_mmap_rwsem
  *           anon_vma->rwsem
  *             mm->page_table_lock or pte_lock
- *               zone_lru_lock (in mark_page_accessed, isolate_lru_page)
+ *               pgdat->lru_lock (in mark_page_accessed, isolate_lru_page)
  *               swap_lock (in swap_duplicate, swap_info_get)
  *                 mmlist_lock (in mmput, drain_mmlist and others)
  *                 mapping->private_lock (in __set_page_dirty_buffers)
diff --git a/mm/swap.c b/mm/swap.c
index 4d7d37eb3c40..301ed4e04320 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -58,16 +58,16 @@ static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
 static void __page_cache_release(struct page *page)
 {
 	if (PageLRU(page)) {
-		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
 		struct lruvec *lruvec;
 		unsigned long flags;
 
-		spin_lock_irqsave(zone_lru_lock(zone), flags);
-		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+		spin_lock_irqsave(&pgdat->lru_lock, flags);
+		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		VM_BUG_ON_PAGE(!PageLRU(page), page);
 		__ClearPageLRU(page);
 		del_page_from_lru_list(page, lruvec, page_off_lru(page));
-		spin_unlock_irqrestore(zone_lru_lock(zone), flags);
+		spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 	}
 	__ClearPageWaiters(page);
 	mem_cgroup_uncharge(page);
@@ -322,12 +322,12 @@ static inline void activate_page_drain(int cpu)
 
 void activate_page(struct page *page)
 {
-	struct zone *zone = page_zone(page);
+	pg_data_t *pgdat = page_pgdat(page);
 
 	page = compound_head(page);
-	spin_lock_irq(zone_lru_lock(zone));
-	__activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
-	spin_unlock_irq(zone_lru_lock(zone));
+	spin_lock_irq(&pgdat->lru_lock);
+	__activate_page(page, mem_cgroup_page_lruvec(page, pgdat), NULL);
+	spin_unlock_irq(&pgdat->lru_lock);
 }
 #endif
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dda6b80d045f..a5ad0b35ab8e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1614,8 +1614,8 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 
 }
 
-/*
- * zone_lru_lock is heavily contended.  Some of the functions that
+/**
+ * pgdat->lru_lock is heavily contended.  Some of the functions that
  * shrink the lists perform better by taking out a batch of pages
  * and working on them outside the LRU lock.
  *
@@ -1750,11 +1750,11 @@ int isolate_lru_page(struct page *page)
 	WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
 
 	if (PageLRU(page)) {
-		struct zone *zone = page_zone(page);
+		pg_data_t *pgdat = page_pgdat(page);
 		struct lruvec *lruvec;
 
-		spin_lock_irq(zone_lru_lock(zone));
-		lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
+		spin_lock_irq(&pgdat->lru_lock);
+		lruvec = mem_cgroup_page_lruvec(page, pgdat);
 		if (PageLRU(page)) {
 			int lru = page_lru(page);
 			get_page(page);
@@ -1762,7 +1762,7 @@ int isolate_lru_page(struct page *page)
 			del_page_from_lru_list(page, lruvec, lru);
 			ret = 0;
 		}
-		spin_unlock_irq(zone_lru_lock(zone));
+		spin_unlock_irq(&pgdat->lru_lock);
 	}
 	return ret;
 }
@@ -1990,9 +1990,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  * processes, from rmap.
  *
  * If the pages are mostly unmapped, the processing is fast and it is
- * appropriate to hold zone_lru_lock across the whole operation.  But if
+ * appropriate to hold pgdat->lru_lock across the whole operation.  But if
  * the pages are mapped, the processing is slow (page_referenced()) so we
- * should drop zone_lru_lock around each page.  It's impossible to balance
+ * should drop pgdat->lru_lock around each page.  It's impossible to balance
  * this, so instead we remove the pages from the LRU while processing them.
  * It is safe to rely on PG_active against the non-LRU pages in here because
  * nobody will play with that bit on a non-LRU page.
-- 
cgit v1.2.3


From a9519defc771d574888ffe01e84747889152ec35 Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Tue, 5 Mar 2019 15:50:03 -0800
Subject: writeback: fix inode cgroup switching comment

Commit 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb
transaction and use it for stat updates") refers to
inode_switch_wb_work_fn() which never got merged.

Switch the comments to inode_switch_wbs_work_fn().

Link: http://lkml.kernel.org/r/20190305004617.142590-1-gthelen@google.com
Fixes: 682aa8e1a6a1 ("writeback: implement unlocked_inode_to_wb transaction and use it for stat updates")
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 2 +-
 include/linux/fs.h          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c28a47cbe355..f9b029180241 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -365,7 +365,7 @@ unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
 	rcu_read_lock();
 
 	/*
-	 * Paired with store_release in inode_switch_wb_work_fn() and
+	 * Paired with store_release in inode_switch_wbs_work_fn() and
 	 * ensures that we see the new wb if we see cleared I_WB_SWITCH.
 	 */
 	cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd423fec8d83..08f26046233e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2091,7 +2091,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
  * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
  *			synchronize competing switching instances and to tell
  *			wb stat updates to grab the i_pages lock.  See
- *			inode_switch_wb_work_fn() for details.
+ *			inode_switch_wbs_work_fn() for details.
  *
  * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
  *			and work dirs among overlayfs mounts.
-- 
cgit v1.2.3


From abe420bfae528c92bd8cc5ecb62dc95672b1fd6f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 7 Feb 2019 12:59:13 +0100
Subject: swiotlb: Introduce swiotlb_max_mapping_size()

The function returns the maximum size that can be remapped
by the SWIOTLB implementation. This function will be later
exposed to users through the DMA-API.

Cc: stable@vger.kernel.org
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/swiotlb.h | 5 +++++
 kernel/dma/swiotlb.c    | 5 +++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7c007ed7505f..d3980aeed4a0 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -76,6 +76,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
 		size_t size, enum dma_data_direction dir, unsigned long attrs);
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
+size_t swiotlb_max_mapping_size(struct device *dev);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
@@ -95,6 +96,10 @@ static inline unsigned int swiotlb_max_segment(void)
 {
 	return 0;
 }
+static inline size_t swiotlb_max_mapping_size(struct device *dev)
+{
+	return SIZE_MAX;
+}
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1fb6fd68b9c7..9cb21259cb0b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -662,3 +662,8 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
 	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
+
+size_t swiotlb_max_mapping_size(struct device *dev)
+{
+	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+}
-- 
cgit v1.2.3


From 492366f7b4237257ef50ca9c431a6a0d50225aca Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 7 Feb 2019 12:59:14 +0100
Subject: swiotlb: Add is_swiotlb_active() function

This function will be used from dma_direct code to determine
the maximum segment size of a dma mapping.

Cc: stable@vger.kernel.org
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/swiotlb.h | 6 ++++++
 kernel/dma/swiotlb.c    | 9 +++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d3980aeed4a0..29bc3a203283 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -77,6 +77,7 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
+bool is_swiotlb_active(void);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
@@ -100,6 +101,11 @@ static inline size_t swiotlb_max_mapping_size(struct device *dev)
 {
 	return SIZE_MAX;
 }
+
+static inline bool is_swiotlb_active(void)
+{
+	return false;
+}
 #endif /* CONFIG_SWIOTLB */
 
 extern void swiotlb_print_info(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9cb21259cb0b..c873f9cc2146 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -667,3 +667,12 @@ size_t swiotlb_max_mapping_size(struct device *dev)
 {
 	return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
 }
+
+bool is_swiotlb_active(void)
+{
+	/*
+	 * When SWIOTLB is initialized, even if io_tlb_start points to physical
+	 * address zero, io_tlb_end surely doesn't.
+	 */
+	return io_tlb_end != 0;
+}
-- 
cgit v1.2.3


From 133d624b1cee16906134e92d5befb843b58bcf31 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 7 Feb 2019 12:59:15 +0100
Subject: dma: Introduce dma_max_mapping_size()

The function returns the maximum size that can be mapped
using DMA-API functions. The patch also adds the
implementation for direct DMA and a new dma_map_ops pointer
so that other implementations can expose their limit.

Cc: stable@vger.kernel.org
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 Documentation/DMA-API.txt   |  8 ++++++++
 include/linux/dma-mapping.h |  8 ++++++++
 kernel/dma/direct.c         | 11 +++++++++++
 kernel/dma/mapping.c        | 14 ++++++++++++++
 4 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index e133ccd60228..acfe3d0f78d1 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -195,6 +195,14 @@ Requesting the required mask does not alter the current mask.  If you
 wish to take advantage of it, you should issue a dma_set_mask()
 call to set the mask to the value returned.
 
+::
+
+	size_t
+	dma_direct_max_mapping_size(struct device *dev);
+
+Returns the maximum size of a mapping for the device. The size parameter
+of the mapping functions like dma_map_single(), dma_map_page() and
+others should not be larger than the returned value.
 
 Part Id - Streaming DMA mappings
 --------------------------------
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f6ded992c183..5b21f14802e1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -130,6 +130,7 @@ struct dma_map_ops {
 			enum dma_data_direction direction);
 	int (*dma_supported)(struct device *dev, u64 mask);
 	u64 (*get_required_mask)(struct device *dev);
+	size_t (*max_mapping_size)(struct device *dev);
 };
 
 #define DMA_MAPPING_ERROR		(~(dma_addr_t)0)
@@ -257,6 +258,8 @@ static inline void dma_direct_sync_sg_for_cpu(struct device *dev,
 }
 #endif
 
+size_t dma_direct_max_mapping_size(struct device *dev);
+
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
 
@@ -460,6 +463,7 @@ int dma_supported(struct device *dev, u64 mask);
 int dma_set_mask(struct device *dev, u64 mask);
 int dma_set_coherent_mask(struct device *dev, u64 mask);
 u64 dma_get_required_mask(struct device *dev);
+size_t dma_max_mapping_size(struct device *dev);
 #else /* CONFIG_HAS_DMA */
 static inline dma_addr_t dma_map_page_attrs(struct device *dev,
 		struct page *page, size_t offset, size_t size,
@@ -561,6 +565,10 @@ static inline u64 dma_get_required_mask(struct device *dev)
 {
 	return 0;
 }
+static inline size_t dma_max_mapping_size(struct device *dev)
+{
+	return 0;
+}
 #endif /* CONFIG_HAS_DMA */
 
 static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 355d16acee6d..6310ad01f915 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -380,3 +380,14 @@ int dma_direct_supported(struct device *dev, u64 mask)
 	 */
 	return mask >= __phys_to_dma(dev, min_mask);
 }
+
+size_t dma_direct_max_mapping_size(struct device *dev)
+{
+	size_t size = SIZE_MAX;
+
+	/* If SWIOTLB is active, use its maximum mapping size */
+	if (is_swiotlb_active())
+		size = swiotlb_max_mapping_size(dev);
+
+	return size;
+}
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index a11006b6d8e8..5753008ab286 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -357,3 +357,17 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 		ops->cache_sync(dev, vaddr, size, dir);
 }
 EXPORT_SYMBOL(dma_cache_sync);
+
+size_t dma_max_mapping_size(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	size_t size = SIZE_MAX;
+
+	if (dma_is_direct(ops))
+		size = dma_direct_max_mapping_size(dev);
+	else if (ops && ops->max_mapping_size)
+		size = ops->max_mapping_size(dev);
+
+	return size;
+}
+EXPORT_SYMBOL_GPL(dma_max_mapping_size);
-- 
cgit v1.2.3


From e6d6dd6c875eb3c9b69bb640419405726e6e0bbe Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 7 Feb 2019 12:59:16 +0100
Subject: virtio: Introduce virtio_max_dma_size()

This function returns the maximum segment size for a single
dma transaction of a virtio device. The possible limit comes
from the SWIOTLB implementation in the Linux kernel, that
has an upper limit of (currently) 256kb of contiguous
memory it can map. Other DMA-API implementations might also
have limits.

Use the new dma_max_mapping_size() function to determine the
maximum mapping size when DMA-API is in use for virtio.

Cc: stable@vger.kernel.org
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 11 +++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a0b07c331255..18846afb39da 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -271,6 +271,17 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
 	return false;
 }
 
+size_t virtio_max_dma_size(struct virtio_device *vdev)
+{
+	size_t max_segment_size = SIZE_MAX;
+
+	if (vring_use_dma_api(vdev))
+		max_segment_size = dma_max_mapping_size(&vdev->dev);
+
+	return max_segment_size;
+}
+EXPORT_SYMBOL_GPL(virtio_max_dma_size);
+
 static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
 			      dma_addr_t *dma_handle, gfp_t flag)
 {
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index fa1b5da2804e..673fe3ef3607 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -157,6 +157,8 @@ int virtio_device_freeze(struct virtio_device *dev);
 int virtio_device_restore(struct virtio_device *dev);
 #endif
 
+size_t virtio_max_dma_size(struct virtio_device *vdev);
+
 #define virtio_device_for_each_vq(vdev, vq) \
 	list_for_each_entry(vq, &vdev->vqs, list)
 
-- 
cgit v1.2.3


From ab7a2375fb8e83f8744c34442f476fa5a9df5e35 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Thu, 31 Jan 2019 13:53:14 +0100
Subject: virtio: hint if callbacks surprisingly might sleep

A virtio transport is free to implement some of the callbacks in
virtio_config_ops in a matter that they cannot be called from
atomic context (e.g. virtio-ccw, which maps a lot of the callbacks
to channel I/O, which is an inherently asynchronous mechanism).
This can be very surprising for developers using the much more
common virtio-pci transport, just to find out that things break
when used on s390.

The documentation for virtio_config_ops now contains a comment
explaining this, but it makes sense to add a might_sleep() annotation
to various wrapper functions in the virtio core to avoid surprises
later.

Note that annotations are NOT added to two classes of calls:
- direct calls from device drivers (all current callers should be
  fine, however)
- calls which clearly won't be made from atomic context (such as
  those ultimately coming in via the driver core)

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio.c       |  2 ++
 include/linux/virtio_config.h | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 59e36ef4920f..98b30f54342c 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -161,6 +161,7 @@ EXPORT_SYMBOL_GPL(virtio_config_enable);
 
 void virtio_add_status(struct virtio_device *dev, unsigned int status)
 {
+	might_sleep();
 	dev->config->set_status(dev, dev->config->get_status(dev) | status);
 }
 EXPORT_SYMBOL_GPL(virtio_add_status);
@@ -170,6 +171,7 @@ int virtio_finalize_features(struct virtio_device *dev)
 	int ret = dev->config->finalize_features(dev);
 	unsigned status;
 
+	might_sleep();
 	if (ret)
 		return ret;
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 987b6491b946..bb4cc4910750 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -290,6 +290,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
 /* Config space accessors. */
 #define virtio_cread(vdev, structname, member, ptr)			\
 	do {								\
+		might_sleep();						\
 		/* Must match the member's type, and be integer */	\
 		if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
 			(*ptr) = 1;					\
@@ -319,6 +320,7 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
 /* Config space accessors. */
 #define virtio_cwrite(vdev, structname, member, ptr)			\
 	do {								\
+		might_sleep();						\
 		/* Must match the member's type, and be integer */	\
 		if (!typecheck(typeof((((structname*)0)->member)), *(ptr))) \
 			BUG_ON((*ptr) == 1);				\
@@ -358,6 +360,7 @@ static inline void __virtio_cread_many(struct virtio_device *vdev,
 		vdev->config->generation(vdev) : 0;
 	int i;
 
+	might_sleep();
 	do {
 		old = gen;
 
@@ -380,6 +383,8 @@ static inline void virtio_cread_bytes(struct virtio_device *vdev,
 static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
 {
 	u8 ret;
+
+	might_sleep();
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
 	return ret;
 }
@@ -387,6 +392,7 @@ static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
 static inline void virtio_cwrite8(struct virtio_device *vdev,
 				  unsigned int offset, u8 val)
 {
+	might_sleep();
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
 
@@ -394,6 +400,8 @@ static inline u16 virtio_cread16(struct virtio_device *vdev,
 				 unsigned int offset)
 {
 	u16 ret;
+
+	might_sleep();
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
 	return virtio16_to_cpu(vdev, (__force __virtio16)ret);
 }
@@ -401,6 +409,7 @@ static inline u16 virtio_cread16(struct virtio_device *vdev,
 static inline void virtio_cwrite16(struct virtio_device *vdev,
 				   unsigned int offset, u16 val)
 {
+	might_sleep();
 	val = (__force u16)cpu_to_virtio16(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
@@ -409,6 +418,8 @@ static inline u32 virtio_cread32(struct virtio_device *vdev,
 				 unsigned int offset)
 {
 	u32 ret;
+
+	might_sleep();
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
 	return virtio32_to_cpu(vdev, (__force __virtio32)ret);
 }
@@ -416,6 +427,7 @@ static inline u32 virtio_cread32(struct virtio_device *vdev,
 static inline void virtio_cwrite32(struct virtio_device *vdev,
 				   unsigned int offset, u32 val)
 {
+	might_sleep();
 	val = (__force u32)cpu_to_virtio32(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
@@ -431,6 +443,7 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
 static inline void virtio_cwrite64(struct virtio_device *vdev,
 				   unsigned int offset, u64 val)
 {
+	might_sleep();
 	val = (__force u64)cpu_to_virtio64(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
-- 
cgit v1.2.3


From 27da0d2ef998e222a876c0cec72aa7829a626266 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 6 Mar 2019 11:52:36 +0100
Subject: appletalk: Fix compile regression

A bugfix just broke compilation of appletalk when CONFIG_SYSCTL
is disabled:

In file included from net/appletalk/ddp.c:65:
net/appletalk/ddp.c: In function 'atalk_init':
include/linux/atalk.h:164:34: error: expected expression before 'do'
 #define atalk_register_sysctl()  do { } while(0)
                                  ^~
net/appletalk/ddp.c:1934:7: note: in expansion of macro 'atalk_register_sysctl'
  rc = atalk_register_sysctl();

This is easier to avoid by using conventional inline functions
as stubs rather than macros. The header already has inline
functions for other purposes, so I'm changing over all the
macros for consistency.

Fixes: 6377f787aeb9 ("appletalk: Fix use-after-free in atalk_proc_exit")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index 5a90f28d5ff2..d5cfc0b15b76 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -161,16 +161,26 @@ extern int sysctl_aarp_resolve_time;
 extern int atalk_register_sysctl(void);
 extern void atalk_unregister_sysctl(void);
 #else
-#define atalk_register_sysctl()		do { } while(0)
-#define atalk_unregister_sysctl()	do { } while(0)
+static inline int atalk_register_sysctl(void)
+{
+	return 0;
+}
+static inline void atalk_unregister_sysctl(void)
+{
+}
 #endif
 
 #ifdef CONFIG_PROC_FS
 extern int atalk_proc_init(void);
 extern void atalk_proc_exit(void);
 #else
-#define atalk_proc_init()	({ 0; })
-#define atalk_proc_exit()	do { } while(0)
+static inline int atalk_proc_init(void)
+{
+	return 0;
+}
+static inline void atalk_proc_exit(void)
+{
+}
 #endif /* CONFIG_PROC_FS */
 
 #endif /* __LINUX_ATALK_H__ */
-- 
cgit v1.2.3


From f76a16adc485699f95bb71fce114f97c832fe664 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 6 Mar 2019 11:07:24 -0600
Subject: x86/unwind/orc: Fix ORC unwind table alignment

The .orc_unwind section is a packed array of 6-byte structs.  It's
currently aligned to 6 bytes, which is causing warnings in the LLD
linker.

Six isn't a power of two, so it's not a valid alignment value.  The
actual alignment doesn't matter much because it's an array of packed
structs.  An alignment of two is sufficient.  In reality it always gets
aligned to four bytes because it comes immediately after the
4-byte-aligned .orc_unwind_ip section.

Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder")
Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Reported-by: Dmitry Golovin <dima@golovin.in>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Link: https://github.com/ClangBuiltLinux/linux/issues/218
Link: https://lkml.kernel.org/r/d55027ee95fe73e952dcd8be90aebd31b0095c45.1551892041.git.jpoimboe@redhat.com
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 3d7a6a9c2370..f8f6f04c4453 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -733,7 +733,7 @@
 		KEEP(*(.orc_unwind_ip))					\
 		__stop_orc_unwind_ip = .;				\
 	}								\
-	. = ALIGN(6);							\
+	. = ALIGN(2);							\
 	.orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) {		\
 		__start_orc_unwind = .;					\
 		KEEP(*(.orc_unwind))					\
-- 
cgit v1.2.3


From 221c5eb2338232f7340386de1c43decc32682e58 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 17 Jan 2019 09:41:58 -0700
Subject: io_uring: add support for IORING_OP_POLL

This is basically a direct port of bfe4037e722e, which implements a
one-shot poll command through aio. Description below is based on that
commit as well. However, instead of adding a POLL command and relying
on io_cancel(2) to remove it, we mimic the epoll(2) interface of
having a command to add a poll notification, IORING_OP_POLL_ADD,
and one to remove it again, IORING_OP_POLL_REMOVE.

To poll for a file descriptor the application should submit an sqe of
type IORING_OP_POLL. It will poll the fd for the events specified in the
poll_events field.

Unlike poll or epoll without EPOLLONESHOT this interface always works in
one shot mode, that is once the sqe is completed, it will have to be
resubmitted.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Based-on-code-from: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 263 +++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/io_uring.h |   3 +
 2 files changed, 265 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d2a3a1bc85cc..85b914bd823c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -161,6 +161,7 @@ struct io_ring_ctx {
 		 * manipulate the list, hence no extra locking is needed there.
 		 */
 		struct list_head	poll_list;
+		struct list_head	cancel_list;
 	} ____cacheline_aligned_in_smp;
 
 #if defined(CONFIG_UNIX)
@@ -176,8 +177,20 @@ struct sqe_submit {
 	bool				needs_fixed_file;
 };
 
+struct io_poll_iocb {
+	struct file			*file;
+	struct wait_queue_head		*head;
+	__poll_t			events;
+	bool				woken;
+	bool				canceled;
+	struct wait_queue_entry		wait;
+};
+
 struct io_kiocb {
-	struct kiocb		rw;
+	union {
+		struct kiocb		rw;
+		struct io_poll_iocb	poll;
+	};
 
 	struct sqe_submit	submit;
 
@@ -261,6 +274,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	init_waitqueue_head(&ctx->wait);
 	spin_lock_init(&ctx->completion_lock);
 	INIT_LIST_HEAD(&ctx->poll_list);
+	INIT_LIST_HEAD(&ctx->cancel_list);
 	return ctx;
 }
 
@@ -1058,6 +1072,246 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	return 0;
 }
 
+static void io_poll_remove_one(struct io_kiocb *req)
+{
+	struct io_poll_iocb *poll = &req->poll;
+
+	spin_lock(&poll->head->lock);
+	WRITE_ONCE(poll->canceled, true);
+	if (!list_empty(&poll->wait.entry)) {
+		list_del_init(&poll->wait.entry);
+		queue_work(req->ctx->sqo_wq, &req->work);
+	}
+	spin_unlock(&poll->head->lock);
+
+	list_del_init(&req->list);
+}
+
+static void io_poll_remove_all(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req;
+
+	spin_lock_irq(&ctx->completion_lock);
+	while (!list_empty(&ctx->cancel_list)) {
+		req = list_first_entry(&ctx->cancel_list, struct io_kiocb,list);
+		io_poll_remove_one(req);
+	}
+	spin_unlock_irq(&ctx->completion_lock);
+}
+
+/*
+ * Find a running poll command that matches one specified in sqe->addr,
+ * and remove it if found.
+ */
+static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_kiocb *poll_req, *next;
+	int ret = -ENOENT;
+
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
+	    sqe->poll_events)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->completion_lock);
+	list_for_each_entry_safe(poll_req, next, &ctx->cancel_list, list) {
+		if (READ_ONCE(sqe->addr) == poll_req->user_data) {
+			io_poll_remove_one(poll_req);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_cqring_add_event(req->ctx, sqe->user_data, ret, 0);
+	io_free_req(req);
+	return 0;
+}
+
+static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
+{
+	io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0);
+	io_fput(req);
+	io_free_req(req);
+}
+
+static void io_poll_complete_work(struct work_struct *work)
+{
+	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+	struct io_poll_iocb *poll = &req->poll;
+	struct poll_table_struct pt = { ._key = poll->events };
+	struct io_ring_ctx *ctx = req->ctx;
+	__poll_t mask = 0;
+
+	if (!READ_ONCE(poll->canceled))
+		mask = vfs_poll(poll->file, &pt) & poll->events;
+
+	/*
+	 * Note that ->ki_cancel callers also delete iocb from active_reqs after
+	 * calling ->ki_cancel.  We need the ctx_lock roundtrip here to
+	 * synchronize with them.  In the cancellation case the list_del_init
+	 * itself is not actually needed, but harmless so we keep it in to
+	 * avoid further branches in the fast path.
+	 */
+	spin_lock_irq(&ctx->completion_lock);
+	if (!mask && !READ_ONCE(poll->canceled)) {
+		add_wait_queue(poll->head, &poll->wait);
+		spin_unlock_irq(&ctx->completion_lock);
+		return;
+	}
+	list_del_init(&req->list);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	io_poll_complete(req, mask);
+}
+
+static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+			void *key)
+{
+	struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb,
+							wait);
+	struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
+	struct io_ring_ctx *ctx = req->ctx;
+	__poll_t mask = key_to_poll(key);
+
+	poll->woken = true;
+
+	/* for instances that support it check for an event match first: */
+	if (mask) {
+		unsigned long flags;
+
+		if (!(mask & poll->events))
+			return 0;
+
+		/* try to complete the iocb inline if we can: */
+		if (spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+			list_del(&req->list);
+			spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+			list_del_init(&poll->wait.entry);
+			io_poll_complete(req, mask);
+			return 1;
+		}
+	}
+
+	list_del_init(&poll->wait.entry);
+	queue_work(ctx->sqo_wq, &req->work);
+	return 1;
+}
+
+struct io_poll_table {
+	struct poll_table_struct pt;
+	struct io_kiocb *req;
+	int error;
+};
+
+static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+			       struct poll_table_struct *p)
+{
+	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+
+	if (unlikely(pt->req->poll.head)) {
+		pt->error = -EINVAL;
+		return;
+	}
+
+	pt->error = 0;
+	pt->req->poll.head = head;
+	add_wait_queue(head, &pt->req->poll.wait);
+}
+
+static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	struct io_poll_iocb *poll = &req->poll;
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_poll_table ipt;
+	unsigned flags;
+	__poll_t mask;
+	u16 events;
+	int fd;
+
+	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
+		return -EINVAL;
+
+	INIT_WORK(&req->work, io_poll_complete_work);
+	events = READ_ONCE(sqe->poll_events);
+	poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+
+	flags = READ_ONCE(sqe->flags);
+	fd = READ_ONCE(sqe->fd);
+
+	if (flags & IOSQE_FIXED_FILE) {
+		if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files))
+			return -EBADF;
+		poll->file = ctx->user_files[fd];
+		req->flags |= REQ_F_FIXED_FILE;
+	} else {
+		poll->file = fget(fd);
+	}
+	if (unlikely(!poll->file))
+		return -EBADF;
+
+	poll->head = NULL;
+	poll->woken = false;
+	poll->canceled = false;
+
+	ipt.pt._qproc = io_poll_queue_proc;
+	ipt.pt._key = poll->events;
+	ipt.req = req;
+	ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
+
+	/* initialized the list so that we can do list_empty checks */
+	INIT_LIST_HEAD(&poll->wait.entry);
+	init_waitqueue_func_entry(&poll->wait, io_poll_wake);
+
+	/* one for removal from waitqueue, one for this function */
+	refcount_set(&req->refs, 2);
+
+	mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
+	if (unlikely(!poll->head)) {
+		/* we did not manage to set up a waitqueue, done */
+		goto out;
+	}
+
+	spin_lock_irq(&ctx->completion_lock);
+	spin_lock(&poll->head->lock);
+	if (poll->woken) {
+		/* wake_up context handles the rest */
+		mask = 0;
+		ipt.error = 0;
+	} else if (mask || ipt.error) {
+		/* if we get an error or a mask we are done */
+		WARN_ON_ONCE(list_empty(&poll->wait.entry));
+		list_del_init(&poll->wait.entry);
+	} else {
+		/* actually waiting for an event */
+		list_add_tail(&req->list, &ctx->cancel_list);
+	}
+	spin_unlock(&poll->head->lock);
+	spin_unlock_irq(&ctx->completion_lock);
+
+out:
+	if (unlikely(ipt.error)) {
+		if (!(flags & IOSQE_FIXED_FILE))
+			fput(poll->file);
+		/*
+		 * Drop one of our refs to this req, __io_submit_sqe() will
+		 * drop the other one since we're returning an error.
+		 */
+		io_free_req(req);
+		return ipt.error;
+	}
+
+	if (mask)
+		io_poll_complete(req, mask);
+	io_free_req(req);
+	return 0;
+}
+
 static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			   const struct sqe_submit *s, bool force_nonblock,
 			   struct io_submit_state *state)
@@ -1093,6 +1347,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	case IORING_OP_FSYNC:
 		ret = io_fsync(req, s->sqe, force_nonblock);
 		break;
+	case IORING_OP_POLL_ADD:
+		ret = io_poll_add(req, s->sqe);
+		break;
+	case IORING_OP_POLL_REMOVE:
+		ret = io_poll_remove(req, s->sqe);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -2131,6 +2391,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	mutex_unlock(&ctx->uring_lock);
 
+	io_poll_remove_all(ctx);
 	io_iopoll_reap_events(ctx);
 	wait_for_completion(&ctx->ctx_done);
 	io_ring_ctx_free(ctx);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0ec74bab8dbe..e23408692118 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -25,6 +25,7 @@ struct io_uring_sqe {
 	union {
 		__kernel_rwf_t	rw_flags;
 		__u32		fsync_flags;
+		__u16		poll_events;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	union {
@@ -51,6 +52,8 @@ struct io_uring_sqe {
 #define IORING_OP_FSYNC		3
 #define IORING_OP_READ_FIXED	4
 #define IORING_OP_WRITE_FIXED	5
+#define IORING_OP_POLL_ADD	6
+#define IORING_OP_POLL_REMOVE	7
 
 /*
  * sqe->fsync_flags
-- 
cgit v1.2.3


From 4981b82ba2ff87df6a711fcd7a233c615df5fc79 Mon Sep 17 00:00:00 2001
From: Wendy Liang <wendy.liang@xilinx.com>
Date: Thu, 21 Feb 2019 16:36:33 -0800
Subject: mailbox: ZynqMP IPI mailbox controller

This patch is to introduce ZynqMP IPI mailbox controller driver
to use the ZynqMP IPI block as mailboxes.

Signed-off-by: Wendy Liang <wendy.liang@xilinx.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig                    |  11 +
 drivers/mailbox/Makefile                   |   2 +
 drivers/mailbox/zynqmp-ipi-mailbox.c       | 725 +++++++++++++++++++++++++++++
 include/linux/mailbox/zynqmp-ipi-message.h |  20 +
 4 files changed, 758 insertions(+)
 create mode 100644 drivers/mailbox/zynqmp-ipi-mailbox.c
 create mode 100644 include/linux/mailbox/zynqmp-ipi-message.h

(limited to 'include')

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 3eeb12e93e98..d86e7a4ac04d 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -205,4 +205,15 @@ config MTK_CMDQ_MBOX
 	  mailbox driver. The CMDQ is used to help read/write registers with
 	  critical time limitation, such as updating display configuration
 	  during the vblank.
+
+config ZYNQMP_IPI_MBOX
+	bool "Xilinx ZynqMP IPI Mailbox"
+	depends on ARCH_ZYNQMP && OF
+	help
+	  Say yes here to add support for Xilinx IPI mailbox driver.
+	  This mailbox driver is used to send notification or short message
+	  between processors with Xilinx ZynqMP IPI. It will place the
+	  message to the IPI buffer and will access the IPI control
+	  registers to kick the other processor or enquire status.
+
 endif
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index c818b5d011ae..8be3bcbcf882 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -44,3 +44,5 @@ obj-$(CONFIG_TEGRA_HSP_MBOX)	+= tegra-hsp.o
 obj-$(CONFIG_STM32_IPCC) 	+= stm32-ipcc.o
 
 obj-$(CONFIG_MTK_CMDQ_MBOX)	+= mtk-cmdq-mailbox.o
+
+obj-$(CONFIG_ZYNQMP_IPI_MBOX)	+= zynqmp-ipi-mailbox.o
diff --git a/drivers/mailbox/zynqmp-ipi-mailbox.c b/drivers/mailbox/zynqmp-ipi-mailbox.c
new file mode 100644
index 000000000000..86887c9a349a
--- /dev/null
+++ b/drivers/mailbox/zynqmp-ipi-mailbox.c
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Inter Processor Interrupt(IPI) Mailbox Driver
+ *
+ * Copyright (C) 2018 Xilinx, Inc.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox/zynqmp-ipi-message.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+/* IPI agent ID any */
+#define IPI_ID_ANY 0xFFUL
+
+/* indicate if ZynqMP IPI mailbox driver uses SMC calls or HVC calls */
+#define USE_SMC 0
+#define USE_HVC 1
+
+/* Default IPI SMC function IDs */
+#define SMC_IPI_MAILBOX_OPEN		0x82001000U
+#define SMC_IPI_MAILBOX_RELEASE		0x82001001U
+#define SMC_IPI_MAILBOX_STATUS_ENQUIRY	0x82001002U
+#define SMC_IPI_MAILBOX_NOTIFY		0x82001003U
+#define SMC_IPI_MAILBOX_ACK		0x82001004U
+#define SMC_IPI_MAILBOX_ENABLE_IRQ	0x82001005U
+#define SMC_IPI_MAILBOX_DISABLE_IRQ	0x82001006U
+
+/* IPI SMC Macros */
+#define IPI_SMC_ENQUIRY_DIRQ_MASK	0x00000001UL /* Flag to indicate if
+						      * notification interrupt
+						      * to be disabled.
+						      */
+#define IPI_SMC_ACK_EIRQ_MASK		0x00000001UL /* Flag to indicate if
+						      * notification interrupt
+						      * to be enabled.
+						      */
+
+/* IPI mailbox status */
+#define IPI_MB_STATUS_IDLE		0
+#define IPI_MB_STATUS_SEND_PENDING	1
+#define IPI_MB_STATUS_RECV_PENDING	2
+
+#define IPI_MB_CHNL_TX	0 /* IPI mailbox TX channel */
+#define IPI_MB_CHNL_RX	1 /* IPI mailbox RX channel */
+
+/**
+ * struct zynqmp_ipi_mchan - Description of a Xilinx ZynqMP IPI mailbox channel
+ * @is_opened: indicate if the IPI channel is opened
+ * @req_buf: local to remote request buffer start address
+ * @resp_buf: local to remote response buffer start address
+ * @req_buf_size: request buffer size
+ * @resp_buf_size: response buffer size
+ * @rx_buf: receive buffer to pass received message to client
+ * @chan_type: channel type
+ */
+struct zynqmp_ipi_mchan {
+	int is_opened;
+	void __iomem *req_buf;
+	void __iomem *resp_buf;
+	void *rx_buf;
+	size_t req_buf_size;
+	size_t resp_buf_size;
+	unsigned int chan_type;
+};
+
+/**
+ * struct zynqmp_ipi_mbox - Description of a ZynqMP IPI mailbox
+ *                          platform data.
+ * @pdata:		  pointer to the IPI private data
+ * @dev:                  device pointer corresponding to the Xilinx ZynqMP
+ *                        IPI mailbox
+ * @remote_id:            remote IPI agent ID
+ * @mbox:                 mailbox Controller
+ * @mchans:               array for channels, tx channel and rx channel.
+ * @irq:                  IPI agent interrupt ID
+ */
+struct zynqmp_ipi_mbox {
+	struct zynqmp_ipi_pdata *pdata;
+	struct device dev;
+	u32 remote_id;
+	struct mbox_controller mbox;
+	struct zynqmp_ipi_mchan mchans[2];
+};
+
+/**
+ * struct zynqmp_ipi_pdata - Description of z ZynqMP IPI agent platform data.
+ *
+ * @dev:                  device pointer corresponding to the Xilinx ZynqMP
+ *                        IPI agent
+ * @irq:                  IPI agent interrupt ID
+ * @method:               IPI SMC or HVC is going to be used
+ * @local_id:             local IPI agent ID
+ * @num_mboxes:           number of mailboxes of this IPI agent
+ * @ipi_mboxes:           IPI mailboxes of this IPI agent
+ */
+struct zynqmp_ipi_pdata {
+	struct device *dev;
+	int irq;
+	unsigned int method;
+	u32 local_id;
+	int num_mboxes;
+	struct zynqmp_ipi_mbox *ipi_mboxes;
+};
+
+static struct device_driver zynqmp_ipi_mbox_driver = {
+	.owner = THIS_MODULE,
+	.name = "zynqmp-ipi-mbox",
+};
+
+static void zynqmp_ipi_fw_call(struct zynqmp_ipi_mbox *ipi_mbox,
+			       unsigned long a0, unsigned long a3,
+			       struct arm_smccc_res *res)
+{
+	struct zynqmp_ipi_pdata *pdata = ipi_mbox->pdata;
+	unsigned long a1, a2;
+
+	a1 = pdata->local_id;
+	a2 = ipi_mbox->remote_id;
+	if (pdata->method == USE_SMC)
+		arm_smccc_smc(a0, a1, a2, a3, 0, 0, 0, 0, res);
+	else
+		arm_smccc_hvc(a0, a1, a2, a3, 0, 0, 0, 0, res);
+}
+
+/**
+ * zynqmp_ipi_interrupt - Interrupt handler for IPI notification
+ *
+ * @irq:  Interrupt number
+ * @data: ZynqMP IPI mailbox platform data.
+ *
+ * Return: -EINVAL if there is no instance
+ * IRQ_NONE if the interrupt is not ours.
+ * IRQ_HANDLED if the rx interrupt was successfully handled.
+ */
+static irqreturn_t zynqmp_ipi_interrupt(int irq, void *data)
+{
+	struct zynqmp_ipi_pdata *pdata = data;
+	struct mbox_chan *chan;
+	struct zynqmp_ipi_mbox *ipi_mbox;
+	struct zynqmp_ipi_mchan *mchan;
+	struct zynqmp_ipi_message *msg;
+	u64 arg0, arg3;
+	struct arm_smccc_res res;
+	int ret, i;
+
+	(void)irq;
+	arg0 = SMC_IPI_MAILBOX_STATUS_ENQUIRY;
+	arg3 = IPI_SMC_ENQUIRY_DIRQ_MASK;
+	for (i = 0; i < pdata->num_mboxes; i++) {
+		ipi_mbox = &pdata->ipi_mboxes[i];
+		mchan = &ipi_mbox->mchans[IPI_MB_CHNL_RX];
+		chan = &ipi_mbox->mbox.chans[IPI_MB_CHNL_RX];
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, arg3, &res);
+		ret = (int)(res.a0 & 0xFFFFFFFF);
+		if (ret > 0 && ret & IPI_MB_STATUS_RECV_PENDING) {
+			if (mchan->is_opened) {
+				msg = mchan->rx_buf;
+				msg->len = mchan->req_buf_size;
+				memcpy_fromio(msg->data, mchan->req_buf,
+					      msg->len);
+				mbox_chan_received_data(chan, (void *)msg);
+				return IRQ_HANDLED;
+			}
+		}
+	}
+	return IRQ_NONE;
+}
+
+/**
+ * zynqmp_ipi_peek_data - Peek to see if there are any rx messages.
+ *
+ * @chan: Channel Pointer
+ *
+ * Return: 'true' if there is pending rx data, 'false' if there is none.
+ */
+static bool zynqmp_ipi_peek_data(struct mbox_chan *chan)
+{
+	struct device *dev = chan->mbox->dev;
+	struct zynqmp_ipi_mbox *ipi_mbox = dev_get_drvdata(dev);
+	struct zynqmp_ipi_mchan *mchan = chan->con_priv;
+	int ret;
+	u64 arg0;
+	struct arm_smccc_res res;
+
+	if (WARN_ON(!ipi_mbox)) {
+		dev_err(dev, "no platform drv data??\n");
+		return false;
+	}
+
+	arg0 = SMC_IPI_MAILBOX_STATUS_ENQUIRY;
+	zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+	ret = (int)(res.a0 & 0xFFFFFFFF);
+
+	if (mchan->chan_type == IPI_MB_CHNL_TX) {
+		/* TX channel, check if the message has been acked
+		 * by the remote, if yes, response is available.
+		 */
+		if (ret < 0 || ret & IPI_MB_STATUS_SEND_PENDING)
+			return false;
+		else
+			return true;
+	} else if (ret > 0 && ret & IPI_MB_STATUS_RECV_PENDING) {
+		/* RX channel, check if there is message arrived. */
+		return true;
+	}
+	return false;
+}
+
+/**
+ * zynqmp_ipi_last_tx_done - See if the last tx message is sent
+ *
+ * @chan: Channel pointer
+ *
+ * Return: 'true' is no pending tx data, 'false' if there are any.
+ */
+static bool zynqmp_ipi_last_tx_done(struct mbox_chan *chan)
+{
+	struct device *dev = chan->mbox->dev;
+	struct zynqmp_ipi_mbox *ipi_mbox = dev_get_drvdata(dev);
+	struct zynqmp_ipi_mchan *mchan = chan->con_priv;
+	int ret;
+	u64 arg0;
+	struct arm_smccc_res res;
+
+	if (WARN_ON(!ipi_mbox)) {
+		dev_err(dev, "no platform drv data??\n");
+		return false;
+	}
+
+	if (mchan->chan_type == IPI_MB_CHNL_TX) {
+		/* We only need to check if the message been taken
+		 * by the remote in the TX channel
+		 */
+		arg0 = SMC_IPI_MAILBOX_STATUS_ENQUIRY;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+		/* Check the SMC call status, a0 of the result */
+		ret = (int)(res.a0 & 0xFFFFFFFF);
+		if (ret < 0 || ret & IPI_MB_STATUS_SEND_PENDING)
+			return false;
+		return true;
+	}
+	/* Always true for the response message in RX channel */
+	return true;
+}
+
+/**
+ * zynqmp_ipi_send_data - Send data
+ *
+ * @chan: Channel Pointer
+ * @data: Message Pointer
+ *
+ * Return: 0 if all goes good, else appropriate error messages.
+ */
+static int zynqmp_ipi_send_data(struct mbox_chan *chan, void *data)
+{
+	struct device *dev = chan->mbox->dev;
+	struct zynqmp_ipi_mbox *ipi_mbox = dev_get_drvdata(dev);
+	struct zynqmp_ipi_mchan *mchan = chan->con_priv;
+	struct zynqmp_ipi_message *msg = data;
+	u64 arg0;
+	struct arm_smccc_res res;
+
+	if (WARN_ON(!ipi_mbox)) {
+		dev_err(dev, "no platform drv data??\n");
+		return -EINVAL;
+	}
+
+	if (mchan->chan_type == IPI_MB_CHNL_TX) {
+		/* Send request message */
+		if (msg && msg->len > mchan->req_buf_size) {
+			dev_err(dev, "channel %d message length %u > max %lu\n",
+				mchan->chan_type, (unsigned int)msg->len,
+				mchan->req_buf_size);
+			return -EINVAL;
+		}
+		if (msg && msg->len)
+			memcpy_toio(mchan->req_buf, msg->data, msg->len);
+		/* Kick IPI mailbox to send message */
+		arg0 = SMC_IPI_MAILBOX_NOTIFY;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+	} else {
+		/* Send response message */
+		if (msg && msg->len > mchan->resp_buf_size) {
+			dev_err(dev, "channel %d message length %u > max %lu\n",
+				mchan->chan_type, (unsigned int)msg->len,
+				mchan->resp_buf_size);
+			return -EINVAL;
+		}
+		if (msg && msg->len)
+			memcpy_toio(mchan->resp_buf, msg->data, msg->len);
+		arg0 = SMC_IPI_MAILBOX_ACK;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, IPI_SMC_ACK_EIRQ_MASK,
+				   &res);
+	}
+	return 0;
+}
+
+/**
+ * zynqmp_ipi_startup - Startup the IPI channel
+ *
+ * @chan: Channel pointer
+ *
+ * Return: 0 if all goes good, else return corresponding error message
+ */
+static int zynqmp_ipi_startup(struct mbox_chan *chan)
+{
+	struct device *dev = chan->mbox->dev;
+	struct zynqmp_ipi_mbox *ipi_mbox = dev_get_drvdata(dev);
+	struct zynqmp_ipi_mchan *mchan = chan->con_priv;
+	u64 arg0;
+	struct arm_smccc_res res;
+	int ret = 0;
+	unsigned int nchan_type;
+
+	if (mchan->is_opened)
+		return 0;
+
+	/* If no channel has been opened, open the IPI mailbox */
+	nchan_type = (mchan->chan_type + 1) % 2;
+	if (!ipi_mbox->mchans[nchan_type].is_opened) {
+		arg0 = SMC_IPI_MAILBOX_OPEN;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+		/* Check the SMC call status, a0 of the result */
+		ret = (int)(res.a0 & 0xFFFFFFFF);
+		if (ret < 0) {
+			dev_err(dev, "SMC to open the IPI channel failed.\n");
+			return ret;
+		}
+		ret = 0;
+	}
+
+	/* If it is RX channel, enable the IPI notification interrupt */
+	if (mchan->chan_type == IPI_MB_CHNL_RX) {
+		arg0 = SMC_IPI_MAILBOX_ENABLE_IRQ;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+	}
+	mchan->is_opened = 1;
+
+	return ret;
+}
+
+/**
+ * zynqmp_ipi_shutdown - Shutdown the IPI channel
+ *
+ * @chan: Channel pointer
+ */
+static void zynqmp_ipi_shutdown(struct mbox_chan *chan)
+{
+	struct device *dev = chan->mbox->dev;
+	struct zynqmp_ipi_mbox *ipi_mbox = dev_get_drvdata(dev);
+	struct zynqmp_ipi_mchan *mchan = chan->con_priv;
+	u64 arg0;
+	struct arm_smccc_res res;
+	unsigned int chan_type;
+
+	if (!mchan->is_opened)
+		return;
+
+	/* If it is RX channel, disable notification interrupt */
+	chan_type = mchan->chan_type;
+	if (chan_type == IPI_MB_CHNL_RX) {
+		arg0 = SMC_IPI_MAILBOX_DISABLE_IRQ;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+	}
+	/* Release IPI mailbox if no other channel is opened */
+	chan_type = (chan_type + 1) % 2;
+	if (!ipi_mbox->mchans[chan_type].is_opened) {
+		arg0 = SMC_IPI_MAILBOX_RELEASE;
+		zynqmp_ipi_fw_call(ipi_mbox, arg0, 0, &res);
+	}
+
+	mchan->is_opened = 0;
+}
+
+/* ZynqMP IPI mailbox operations */
+static const struct mbox_chan_ops zynqmp_ipi_chan_ops = {
+	.startup = zynqmp_ipi_startup,
+	.shutdown = zynqmp_ipi_shutdown,
+	.peek_data = zynqmp_ipi_peek_data,
+	.last_tx_done = zynqmp_ipi_last_tx_done,
+	.send_data = zynqmp_ipi_send_data,
+};
+
+/**
+ * zynqmp_ipi_of_xlate - Translate of phandle to IPI mailbox channel
+ *
+ * @mbox: mailbox controller pointer
+ * @p:    phandle pointer
+ *
+ * Return: Mailbox channel, else return error pointer.
+ */
+static struct mbox_chan *zynqmp_ipi_of_xlate(struct mbox_controller *mbox,
+					     const struct of_phandle_args *p)
+{
+	struct mbox_chan *chan;
+	struct device *dev = mbox->dev;
+	unsigned int chan_type;
+
+	/* Only supports TX and RX channels */
+	chan_type = p->args[0];
+	if (chan_type != IPI_MB_CHNL_TX && chan_type != IPI_MB_CHNL_RX) {
+		dev_err(dev, "req chnl failure: invalid chnl type %u.\n",
+			chan_type);
+		return ERR_PTR(-EINVAL);
+	}
+	chan = &mbox->chans[chan_type];
+	return chan;
+}
+
+static const struct of_device_id zynqmp_ipi_of_match[] = {
+	{ .compatible = "xlnx,zynqmp-ipi-mailbox" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, zynqmp_ipi_of_match);
+
+/**
+ * zynqmp_ipi_mbox_get_buf_res - Get buffer resource from the IPI dev node
+ *
+ * @node: IPI mbox device child node
+ * @name: name of the IPI buffer
+ * @res: pointer to where the resource information will be stored.
+ *
+ * Return: 0 for success, negative value for failure
+ */
+static int zynqmp_ipi_mbox_get_buf_res(struct device_node *node,
+				       const char *name,
+				       struct resource *res)
+{
+	int ret, index;
+
+	index = of_property_match_string(node, "reg-names", name);
+	if (index >= 0) {
+		ret = of_address_to_resource(node, index, res);
+		if (ret < 0)
+			return -EINVAL;
+		return 0;
+	}
+	return -ENODEV;
+}
+
+/**
+ * zynqmp_ipi_mbox_dev_release() - release the existence of a ipi mbox dev
+ *
+ * @dev: the ipi mailbox device
+ *
+ * This is to avoid the no device release() function kernel warning.
+ *
+ */
+static void zynqmp_ipi_mbox_dev_release(struct device *dev)
+{
+	(void)dev;
+}
+
+/**
+ * zynqmp_ipi_mbox_probe - probe IPI mailbox resource from device node
+ *
+ * @ipi_mbox: pointer to IPI mailbox private data structure
+ * @node: IPI mailbox device node
+ *
+ * Return: 0 for success, negative value for failure
+ */
+static int zynqmp_ipi_mbox_probe(struct zynqmp_ipi_mbox *ipi_mbox,
+				 struct device_node *node)
+{
+	struct zynqmp_ipi_mchan *mchan;
+	struct mbox_chan *chans;
+	struct mbox_controller *mbox;
+	struct resource res;
+	struct device *dev, *mdev;
+	const char *name;
+	int ret;
+
+	dev = ipi_mbox->pdata->dev;
+	/* Initialize dev for IPI mailbox */
+	ipi_mbox->dev.parent = dev;
+	ipi_mbox->dev.release = NULL;
+	ipi_mbox->dev.of_node = node;
+	dev_set_name(&ipi_mbox->dev, "%s", of_node_full_name(node));
+	dev_set_drvdata(&ipi_mbox->dev, ipi_mbox);
+	ipi_mbox->dev.release = zynqmp_ipi_mbox_dev_release;
+	ipi_mbox->dev.driver = &zynqmp_ipi_mbox_driver;
+	ret = device_register(&ipi_mbox->dev);
+	if (ret) {
+		dev_err(dev, "Failed to register ipi mbox dev.\n");
+		return ret;
+	}
+	mdev = &ipi_mbox->dev;
+
+	mchan = &ipi_mbox->mchans[IPI_MB_CHNL_TX];
+	name = "local_request_region";
+	ret = zynqmp_ipi_mbox_get_buf_res(node, name, &res);
+	if (!ret) {
+		mchan->req_buf_size = resource_size(&res);
+		mchan->req_buf = devm_ioremap(mdev, res.start,
+					      mchan->req_buf_size);
+		if (IS_ERR(mchan->req_buf)) {
+			dev_err(mdev, "Unable to map IPI buffer I/O memory\n");
+			ret = PTR_ERR(mchan->req_buf);
+			return ret;
+		}
+	} else if (ret != -ENODEV) {
+		dev_err(mdev, "Unmatched resource %s, %d.\n", name, ret);
+		return ret;
+	}
+
+	name = "remote_response_region";
+	ret = zynqmp_ipi_mbox_get_buf_res(node, name, &res);
+	if (!ret) {
+		mchan->resp_buf_size = resource_size(&res);
+		mchan->resp_buf = devm_ioremap(mdev, res.start,
+					       mchan->resp_buf_size);
+		if (IS_ERR(mchan->resp_buf)) {
+			dev_err(mdev, "Unable to map IPI buffer I/O memory\n");
+			ret = PTR_ERR(mchan->resp_buf);
+			return ret;
+		}
+	} else if (ret != -ENODEV) {
+		dev_err(mdev, "Unmatched resource %s.\n", name);
+		return ret;
+	}
+	mchan->rx_buf = devm_kzalloc(mdev,
+				     mchan->resp_buf_size +
+				     sizeof(struct zynqmp_ipi_message),
+				     GFP_KERNEL);
+	if (!mchan->rx_buf)
+		return -ENOMEM;
+
+	mchan = &ipi_mbox->mchans[IPI_MB_CHNL_RX];
+	name = "remote_request_region";
+	ret = zynqmp_ipi_mbox_get_buf_res(node, name, &res);
+	if (!ret) {
+		mchan->req_buf_size = resource_size(&res);
+		mchan->req_buf = devm_ioremap(mdev, res.start,
+					      mchan->req_buf_size);
+		if (IS_ERR(mchan->req_buf)) {
+			dev_err(mdev, "Unable to map IPI buffer I/O memory\n");
+			ret = PTR_ERR(mchan->req_buf);
+			return ret;
+		}
+	} else if (ret != -ENODEV) {
+		dev_err(mdev, "Unmatched resource %s.\n", name);
+		return ret;
+	}
+
+	name = "local_response_region";
+	ret = zynqmp_ipi_mbox_get_buf_res(node, name, &res);
+	if (!ret) {
+		mchan->resp_buf_size = resource_size(&res);
+		mchan->resp_buf = devm_ioremap(mdev, res.start,
+					       mchan->resp_buf_size);
+		if (IS_ERR(mchan->resp_buf)) {
+			dev_err(mdev, "Unable to map IPI buffer I/O memory\n");
+			ret = PTR_ERR(mchan->resp_buf);
+			return ret;
+		}
+	} else if (ret != -ENODEV) {
+		dev_err(mdev, "Unmatched resource %s.\n", name);
+		return ret;
+	}
+	mchan->rx_buf = devm_kzalloc(mdev,
+				     mchan->resp_buf_size +
+				     sizeof(struct zynqmp_ipi_message),
+				     GFP_KERNEL);
+	if (!mchan->rx_buf)
+		return -ENOMEM;
+
+	/* Get the IPI remote agent ID */
+	ret = of_property_read_u32(node, "xlnx,ipi-id", &ipi_mbox->remote_id);
+	if (ret < 0) {
+		dev_err(dev, "No IPI remote ID is specified.\n");
+		return ret;
+	}
+
+	mbox = &ipi_mbox->mbox;
+	mbox->dev = mdev;
+	mbox->ops = &zynqmp_ipi_chan_ops;
+	mbox->num_chans = 2;
+	mbox->txdone_irq = false;
+	mbox->txdone_poll = true;
+	mbox->txpoll_period = 5;
+	mbox->of_xlate = zynqmp_ipi_of_xlate;
+	chans = devm_kzalloc(mdev, 2 * sizeof(*chans), GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+	mbox->chans = chans;
+	chans[IPI_MB_CHNL_TX].con_priv = &ipi_mbox->mchans[IPI_MB_CHNL_TX];
+	chans[IPI_MB_CHNL_RX].con_priv = &ipi_mbox->mchans[IPI_MB_CHNL_RX];
+	ipi_mbox->mchans[IPI_MB_CHNL_TX].chan_type = IPI_MB_CHNL_TX;
+	ipi_mbox->mchans[IPI_MB_CHNL_RX].chan_type = IPI_MB_CHNL_RX;
+	ret = devm_mbox_controller_register(mdev, mbox);
+	if (ret)
+		dev_err(mdev,
+			"Failed to register mbox_controller(%d)\n", ret);
+	else
+		dev_info(mdev,
+			 "Registered ZynqMP IPI mbox with TX/RX channels.\n");
+	return ret;
+}
+
+/**
+ * zynqmp_ipi_free_mboxes - Free IPI mailboxes devices
+ *
+ * @pdata: IPI private data
+ */
+static void zynqmp_ipi_free_mboxes(struct zynqmp_ipi_pdata *pdata)
+{
+	struct zynqmp_ipi_mbox *ipi_mbox;
+	int i;
+
+	i = pdata->num_mboxes;
+	for (; i >= 0; i--) {
+		ipi_mbox = &pdata->ipi_mboxes[i];
+		if (ipi_mbox->dev.parent) {
+			mbox_controller_unregister(&ipi_mbox->mbox);
+			device_unregister(&ipi_mbox->dev);
+		}
+	}
+}
+
+static int zynqmp_ipi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *nc, *np = pdev->dev.of_node;
+	struct zynqmp_ipi_pdata *pdata;
+	struct zynqmp_ipi_mbox *mbox;
+	int num_mboxes, ret = -EINVAL;
+
+	num_mboxes = of_get_child_count(np);
+	pdata = devm_kzalloc(dev, sizeof(*pdata) + (num_mboxes * sizeof(*mbox)),
+			     GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	pdata->dev = dev;
+
+	/* Get the IPI local agents ID */
+	ret = of_property_read_u32(np, "xlnx,ipi-id", &pdata->local_id);
+	if (ret < 0) {
+		dev_err(dev, "No IPI local ID is specified.\n");
+		return ret;
+	}
+
+	pdata->num_mboxes = num_mboxes;
+	pdata->ipi_mboxes = (struct zynqmp_ipi_mbox *)
+			    ((char *)pdata + sizeof(*pdata));
+
+	mbox = pdata->ipi_mboxes;
+	for_each_available_child_of_node(np, nc) {
+		mbox->pdata = pdata;
+		ret = zynqmp_ipi_mbox_probe(mbox, nc);
+		if (ret) {
+			dev_err(dev, "failed to probe subdev.\n");
+			ret = -EINVAL;
+			goto free_mbox_dev;
+		}
+		mbox++;
+	}
+
+	/* IPI IRQ */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "unable to find IPI IRQ.\n");
+		goto free_mbox_dev;
+	}
+	pdata->irq = ret;
+	ret = devm_request_irq(dev, pdata->irq, zynqmp_ipi_interrupt,
+			       IRQF_SHARED, dev_name(dev), pdata);
+	if (ret) {
+		dev_err(dev, "IRQ %d is not requested successfully.\n",
+			pdata->irq);
+		goto free_mbox_dev;
+	}
+
+	platform_set_drvdata(pdev, pdata);
+	return ret;
+
+free_mbox_dev:
+	zynqmp_ipi_free_mboxes(pdata);
+	return ret;
+}
+
+static int zynqmp_ipi_remove(struct platform_device *pdev)
+{
+	struct zynqmp_ipi_pdata *pdata;
+
+	pdata = platform_get_drvdata(pdev);
+	zynqmp_ipi_free_mboxes(pdata);
+
+	return 0;
+}
+
+static struct platform_driver zynqmp_ipi_driver = {
+	.probe = zynqmp_ipi_probe,
+	.remove = zynqmp_ipi_remove,
+	.driver = {
+		   .name = "zynqmp-ipi",
+		   .of_match_table = of_match_ptr(zynqmp_ipi_of_match),
+	},
+};
+
+static int __init zynqmp_ipi_init(void)
+{
+	return platform_driver_register(&zynqmp_ipi_driver);
+}
+subsys_initcall(zynqmp_ipi_init);
+
+static void __exit zynqmp_ipi_exit(void)
+{
+	platform_driver_unregister(&zynqmp_ipi_driver);
+}
+module_exit(zynqmp_ipi_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Xilinx ZynqMP IPI Mailbox driver");
+MODULE_AUTHOR("Xilinx Inc.");
diff --git a/include/linux/mailbox/zynqmp-ipi-message.h b/include/linux/mailbox/zynqmp-ipi-message.h
new file mode 100644
index 000000000000..9542b41eacfd
--- /dev/null
+++ b/include/linux/mailbox/zynqmp-ipi-message.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ZYNQMP_IPI_MESSAGE_H_
+#define _LINUX_ZYNQMP_IPI_MESSAGE_H_
+
+/**
+ * struct zynqmp_ipi_message - ZynqMP IPI message structure
+ * @len:  Length of message
+ * @data: message payload
+ *
+ * This is the structure for data used in mbox_send_message
+ * the maximum length of data buffer is fixed to 12 bytes.
+ * Client is supposed to be aware of this.
+ */
+struct zynqmp_ipi_message {
+	size_t len;
+	u8 data[0];
+};
+
+#endif /* _LINUX_ZYNQMP_IPI_MESSAGE_H_ */
-- 
cgit v1.2.3


From 4c3024debf62de4c6ac6d3cb4c0063be21d4f652 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 6 Mar 2019 14:35:15 -0500
Subject: bpf: only test gso type on gso packets

BPF can adjust gso only for tcp bytestreams. Fail on other gso types.

But only on gso packets. It does not touch this field if !gso_size.

Fixes: b90efd225874 ("bpf: only adjust gso_size on bytestream protocols")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skbuff.h | 4 ++--
 net/core/filter.c      | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 27beb549ffbe..f32f32407dc4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4232,10 +4232,10 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
 }
 
+/* Note: Should be called only if skb_is_gso(skb) is true */
 static inline bool skb_is_gso_tcp(const struct sk_buff *skb)
 {
-	return skb_is_gso(skb) &&
-	       skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
+	return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
 }
 
 static inline void skb_gso_reset(struct sk_buff *skb)
diff --git a/net/core/filter.c b/net/core/filter.c
index 5ceba98069d4..f274620945ff 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2804,7 +2804,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2845,7 +2845,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2970,7 +2970,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2999,7 +2999,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	if (!skb_is_gso_tcp(skb))
+	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
-- 
cgit v1.2.3


From c6b38fbbde91ee7b072febe4b83022e4850f934f Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Fri, 1 Mar 2019 10:24:59 +0100
Subject: drm: move i915_kick_out_vgacon to vgaarb

Also rename it to vga_remove_vgacon and add kerneldoc text.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20190301092502.30948-2-kraxel@redhat.com
---
 drivers/gpu/drm/i915/i915_drv.c | 35 +----------------------------
 drivers/gpu/vga/vgaarb.c        | 49 +++++++++++++++++++++++++++++++++++++++++
 include/linux/vgaarb.h          |  2 ++
 3 files changed, 52 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 6630212f2faf..9df65d386d11 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -757,39 +757,6 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-#if !defined(CONFIG_VGA_CONSOLE)
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-	return 0;
-}
-#elif !defined(CONFIG_DUMMY_CONSOLE)
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-	return -ENODEV;
-}
-#else
-static int i915_kick_out_vgacon(struct drm_i915_private *dev_priv)
-{
-	int ret = 0;
-
-	DRM_INFO("Replacing VGA console driver\n");
-
-	console_lock();
-	if (con_is_bound(&vga_con))
-		ret = do_take_over_console(&dummy_con, 0, MAX_NR_CONSOLES - 1, 1);
-	if (ret == 0) {
-		ret = do_unregister_con_driver(&vga_con);
-
-		/* Ignore "already unregistered". */
-		if (ret == -ENODEV)
-			ret = 0;
-	}
-	console_unlock();
-
-	return ret;
-}
-#endif
-
 static void intel_init_dpio(struct drm_i915_private *dev_priv)
 {
 	/*
@@ -1420,7 +1387,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 		goto err_ggtt;
 	}
 
-	ret = i915_kick_out_vgacon(dev_priv);
+	ret = vga_remove_vgacon(pdev);
 	if (ret) {
 		DRM_ERROR("failed to remove conflicting VGA console\n");
 		goto err_ggtt;
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index dc8e039bfab5..f2f3ef8af271 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -48,6 +48,8 @@
 #include <linux/miscdevice.h>
 #include <linux/slab.h>
 #include <linux/screen_info.h>
+#include <linux/vt.h>
+#include <linux/console.h>
 
 #include <linux/uaccess.h>
 
@@ -168,6 +170,53 @@ void vga_set_default_device(struct pci_dev *pdev)
 	vga_default = pci_dev_get(pdev);
 }
 
+/**
+ * vga_remove_vgacon - deactivete vga console
+ *
+ * Unbind and unregister vgacon in case pdev is the default vga
+ * device.  Can be called by gpu drivers on initialization to make
+ * sure vga register access done by vgacon will not disturb the
+ * device.
+ *
+ * @pdev: pci device.
+ */
+#if !defined(CONFIG_VGA_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	return 0;
+}
+#elif !defined(CONFIG_DUMMY_CONSOLE)
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	return -ENODEV;
+}
+#else
+int vga_remove_vgacon(struct pci_dev *pdev)
+{
+	int ret = 0;
+
+	if (pdev != vga_default)
+		return 0;
+	vgaarb_info(&pdev->dev, "deactivate vga console\n");
+
+	console_lock();
+	if (con_is_bound(&vga_con))
+		ret = do_take_over_console(&dummy_con, 0,
+					   MAX_NR_CONSOLES - 1, 1);
+	if (ret == 0) {
+		ret = do_unregister_con_driver(&vga_con);
+
+		/* Ignore "already unregistered". */
+		if (ret == -ENODEV)
+			ret = 0;
+	}
+	console_unlock();
+
+	return ret;
+}
+#endif
+EXPORT_SYMBOL(vga_remove_vgacon);
+
 static inline void vga_irq_set_state(struct vga_device *vgadev, bool state)
 {
 	if (vgadev->irq_set_state)
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index ee162e3e879b..553b34c8b5f7 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -125,9 +125,11 @@ extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
 #ifdef CONFIG_VGA_ARB
 extern struct pci_dev *vga_default_device(void);
 extern void vga_set_default_device(struct pci_dev *pdev);
+extern int vga_remove_vgacon(struct pci_dev *pdev);
 #else
 static inline struct pci_dev *vga_default_device(void) { return NULL; };
 static inline void vga_set_default_device(struct pci_dev *pdev) { };
+static inline int vga_remove_vgacon(struct pci_dev *pdev) { return 0; };
 #endif
 
 /*
-- 
cgit v1.2.3


From 1c74ca7a1a9ae6ce71032c997a586f061a0029f8 Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Fri, 1 Mar 2019 10:25:00 +0100
Subject: drm/fb-helper: call vga_remove_vgacon automatically.

Add vga_remove_vgacon() call to
drm_fb_helper_remove_conflicting_pci_framebuffers().

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20190301092502.30948-3-kraxel@redhat.com
---
 include/drm/drm_fb_helper.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index bb9acea61369..286d58efed5d 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -36,6 +36,7 @@ struct drm_fb_helper;
 #include <drm/drm_crtc.h>
 #include <drm/drm_device.h>
 #include <linux/kgdb.h>
+#include <linux/vgaarb.h>
 
 enum mode_set_atomic {
 	LEAVE_ATOMIC_MODE_SET,
@@ -642,11 +643,18 @@ drm_fb_helper_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
 						  int resource_id,
 						  const char *name)
 {
+	int ret = 0;
+
+	/*
+	 * WARNING: Apparently we must kick fbdev drivers before vgacon,
+	 * otherwise the vga fbdev driver falls over.
+	 */
 #if IS_REACHABLE(CONFIG_FB)
-	return remove_conflicting_pci_framebuffers(pdev, resource_id, name);
-#else
-	return 0;
+	ret = remove_conflicting_pci_framebuffers(pdev, resource_id, name);
 #endif
+	if (ret == 0)
+		ret = vga_remove_vgacon(pdev);
+	return ret;
 }
 
 #endif
-- 
cgit v1.2.3


From 0996584b3026bed7f38abe02e8535e6a6c474118 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 5 Mar 2019 13:55:35 +0100
Subject: PM-runtime: Call pm_runtime_active|suspended_time() from sysfs

Avoid the open-coding of the accounted time acquisition in
runtime_active|suspend_time_show() and make them call
pm_runtime_active|suspended_time() instead.

Note that this change also indirectly avoids holding dev->power.lock
around the do_div() computation and the sprintf() call which is an
additional improvement.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/runtime.c |  2 +-
 drivers/base/power/sysfs.c   | 12 ++----------
 include/linux/pm.h           |  1 -
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 32f6bf076bd7..a2d22e3ecf3a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -64,7 +64,7 @@ static int rpm_suspend(struct device *dev, int rpmflags);
  * runtime_status field is updated, to account the time in the old state
  * correctly.
  */
-void update_pm_runtime_accounting(struct device *dev)
+static void update_pm_runtime_accounting(struct device *dev)
 {
 	u64 now, last, delta;
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index c6bf76124184..1226e441ddfe 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -125,13 +125,9 @@ static ssize_t runtime_active_time_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	int ret;
-	u64 tmp;
-	spin_lock_irq(&dev->power.lock);
-	update_pm_runtime_accounting(dev);
-	tmp = dev->power.active_time;
+	u64 tmp = pm_runtime_active_time(dev);
 	do_div(tmp, NSEC_PER_MSEC);
 	ret = sprintf(buf, "%llu\n", tmp);
-	spin_unlock_irq(&dev->power.lock);
 	return ret;
 }
 
@@ -141,13 +137,9 @@ static ssize_t runtime_suspended_time_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
 	int ret;
-	u64 tmp;
-	spin_lock_irq(&dev->power.lock);
-	update_pm_runtime_accounting(dev);
-	tmp = dev->power.suspended_time;
+	u64 tmp = pm_runtime_suspended_time(dev);
 	do_div(tmp, NSEC_PER_MSEC);
 	ret = sprintf(buf, "%llu\n", tmp);
-	spin_unlock_irq(&dev->power.lock);
 	return ret;
 }
 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 06f7ed893928..66c19a65a514 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -643,7 +643,6 @@ struct dev_pm_info {
 	struct dev_pm_qos	*qos;
 };
 
-extern void update_pm_runtime_accounting(struct device *dev);
 extern int dev_pm_get_subsys_data(struct device *dev);
 extern void dev_pm_put_subsys_data(struct device *dev);
 
-- 
cgit v1.2.3


From eacc95eae6837d3f41aed7d30b855a79ab2cb101 Mon Sep 17 00:00:00 2001
From: Mattias Jacobsson <2pi@mok.nu>
Date: Tue, 19 Feb 2019 20:59:49 +0100
Subject: platform/x86: wmi: move struct wmi_device_id to mod_devicetable.h

In preparation for adding WMI support to MODULE_DEVICE_TABLE() move the
definition of struct wmi_device_id to mod_devicetable.h and inline
guid_string in the struct.

Changing guid_string to an inline char array changes the loop conditions
when looping over an array of struct wmi_device_id. Therefore update
wmi_dev_match()'s loop to check for an empty guid_string instead of a
NULL pointer.

Signed-off-by: Mattias Jacobsson <2pi@mok.nu>
[dvhart: Move UUID_STRING_LEN define to this patch]
Signed-off-by: Darren Hart (VMware) <dvhart@infradead.org>
---
 drivers/platform/x86/wmi.c      |  2 +-
 include/linux/mod_devicetable.h | 12 ++++++++++++
 include/linux/wmi.h             |  5 +----
 scripts/mod/file2alias.c        |  1 +
 4 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index b0f3d8ecd898..7b26b6ccf1a0 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -771,7 +771,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
 	if (id == NULL)
 		return 0;
 
-	while (id->guid_string) {
+	while (*id->guid_string) {
 		uuid_le driver_guid;
 
 		if (WARN_ON(uuid_le_to_bin(id->guid_string, &driver_guid)))
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f9bd2f34b99f..e44b90fa0aef 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -779,4 +779,16 @@ struct typec_device_id {
 	kernel_ulong_t driver_data;
 };
 
+/* WMI */
+
+#define WMI_MODULE_PREFIX	"wmi:"
+
+/**
+ * struct wmi_device_id - WMI device identifier
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ */
+struct wmi_device_id {
+	const char guid_string[UUID_STRING_LEN+1];
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/wmi.h b/include/linux/wmi.h
index 4757cb5077e5..592f81afecbb 100644
--- a/include/linux/wmi.h
+++ b/include/linux/wmi.h
@@ -18,6 +18,7 @@
 
 #include <linux/device.h>
 #include <linux/acpi.h>
+#include <linux/mod_devicetable.h>
 #include <uapi/linux/wmi.h>
 
 struct wmi_device {
@@ -39,10 +40,6 @@ extern union acpi_object *wmidev_block_query(struct wmi_device *wdev,
 
 extern int set_required_buffer_size(struct wmi_device *wdev, u64 length);
 
-struct wmi_device_id {
-	const char *guid_string;
-};
-
 struct wmi_driver {
 	struct device_driver driver;
 	const struct wmi_device_id *id_table;
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index afe22af20d7d..4e4f03a12cc0 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -37,6 +37,7 @@ typedef unsigned char	__u8;
 typedef struct {
 	__u8 b[16];
 } uuid_le;
+#define	UUID_STRING_LEN		36
 
 /* Big exception to the "don't include kernel headers into userspace, which
  * even potentially has different endianness and word sizes, since
-- 
cgit v1.2.3


From c461aed3a423dda442aad38047c3f2bb0f9e2012 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Thu, 7 Mar 2019 16:26:32 -0800
Subject: kernel.h: unconditionally include asm/div64.h for do_div()

Include asm/div64.h for do_div() usage in DIV_ROUND_DOWN_ULL() and
DIV_ROUND_CLOSEST_ULL().  Remove the old CONFIG_LBDAF=y conditional
include.

Link: http://lkml.kernel.org/r/20181228153430.23763-1-jani.nikula@intel.com
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a8868a32098c..3b9d2bade8ad 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -14,6 +14,7 @@
 #include <linux/printk.h>
 #include <linux/build_bug.h>
 #include <asm/byteorder.h>
+#include <asm/div64.h>
 #include <uapi/linux/kernel.h>
 
 #define USHRT_MAX	((u16)(~0U))
@@ -204,7 +205,6 @@
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
 
 #ifdef CONFIG_LBDAF
-# include <asm/div64.h>
 # define sector_div(a, b) do_div(a, b)
 #else
 # define sector_div(n, b)( \
-- 
cgit v1.2.3


From b95c4d18d5936c9f2c1a39347d73acb3e523ca24 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 7 Mar 2019 16:26:39 -0800
Subject: <linux/kernel.h>: drop the gcc-3.3 'const' hack in roundup()

The single quotation marks around "const" were causing a documentation
markup warning with reST.  Instead of fixing that warning, just delete
that comment line and the gcc-3.3 hack of using "const" in the roundup()
macro since gcc-3.3 is no longer supported for kernel builds.

I did around 20 different $arch builds with no problems, but we'll just
have to see if this causes problems for anyone else out there.

Link: http://lkml.kernel.org/r/ec5dcf72-7c3e-3513-af0c-4003ed598854@infradead.org
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3b9d2bade8ad..43b4036e36fa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -134,12 +134,10 @@
  *
  * Rounds @x up to next multiple of @y. If @y will always be a power
  * of 2, consider using the faster round_up().
- *
- * The `const' here prevents gcc-3.3 from calling __divdi3
  */
 #define roundup(x, y) (					\
 {							\
-	const typeof(y) __y = y;			\
+	typeof(y) __y = y;				\
 	(((x) + (__y - 1)) / __y) * __y;		\
 }							\
 )
-- 
cgit v1.2.3


From 30ff9ec457e66fcd73567b830aaca21e5833cf84 Mon Sep 17 00:00:00 2001
From: WangBo <wdjjwb@163.com>
Date: Thu, 7 Mar 2019 16:26:43 -0800
Subject: include/linux/types.h: use "unsigned int" instead of "unsigned"

Use "unsigned int" instead of "unsigned", to make code more clear.

Link: http://lkml.kernel.org/r/1551354739-6648-1-git-send-email-wdjjwb@163.com
Signed-off-by: WangBo <wang.bo116@zte.com.cn>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/types.h b/include/linux/types.h
index c2615d6a019e..cc0dbbe551d5 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -155,9 +155,9 @@ typedef u64 dma_addr_t;
 typedef u32 dma_addr_t;
 #endif
 
-typedef unsigned __bitwise gfp_t;
-typedef unsigned __bitwise slab_flags_t;
-typedef unsigned __bitwise fmode_t;
+typedef unsigned int __bitwise gfp_t;
+typedef unsigned int __bitwise slab_flags_t;
+typedef unsigned int __bitwise fmode_t;
 
 #ifdef CONFIG_PHYS_ADDR_T_64BIT
 typedef u64 phys_addr_t;
-- 
cgit v1.2.3


From 6bab69c65013bed5fce9f101a64a84d0385b3946 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:00 -0800
Subject: build_bug.h: add wrapper for _Static_assert

BUILD_BUG_ON() is a little annoying, since it cannot be used outside
function scope.  So one cannot put assertions about the sizeof() a
struct next to the struct definition, but has to hide that in some more
or less arbitrary function.

Since gcc 4.6 (which is now also the required minimum), there is support
for the C11 _Static_assert in all C modes, including gnu89.  So add a
simple wrapper for that.

_Static_assert() requires a message argument, which is usually quite
redundant (and I believe that bug got fixed at least in newer C++
standards), but we can easily work around that with a little macro
magic, making it optional.

For example, adding

  static_assert(sizeof(struct printf_spec) == 8);

in vsprintf.c and modifying that struct to violate it, one gets

./include/linux/build_bug.h:78:41: error: static assertion failed: "sizeof(struct printf_spec) == 8"
 #define __static_assert(expr, msg, ...) _Static_assert(expr, "" msg "")

godbolt.org suggests that _Static_assert() has been support by clang
since at least 3.0.0.

Link: http://lkml.kernel.org/r/20190208203015.29702-1-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/build_bug.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index faeec7433aab..0fe5426f2bdc 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -58,4 +58,23 @@
  */
 #define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
 
+/**
+ * static_assert - check integer constant expression at build time
+ *
+ * static_assert() is a wrapper for the C11 _Static_assert, with a
+ * little macro magic to make the message optional (defaulting to the
+ * stringification of the tested expression).
+ *
+ * Contrary to BUILD_BUG_ON(), static_assert() can be used at global
+ * scope, but requires the expression to be an integer constant
+ * expression (i.e., it is not enough that __builtin_constant_p() is
+ * true for expr).
+ *
+ * Also note that BUILD_BUG_ON() fails the build if the condition is
+ * true, while static_assert() fails the build if the expression is
+ * false.
+ */
+#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
+#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
+
 #endif	/* _LINUX_BUILD_BUG_H */
-- 
cgit v1.2.3


From f1fffbd44722cec9b8dd54d5cc86bd081ce39217 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:07 -0800
Subject: linux/fs.h: move member alignment check next to definition of struct
 filename

Instead of doing this compile-time check in some slightly arbitrary user
of struct filename, put it next to the definition.

Link: http://lkml.kernel.org/r/20190208203015.29702-3-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Kees Cook <keescook@chromium.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c         | 2 --
 include/linux/fs.h | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 914178cdbe94..d604f6b3bcc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -39,7 +39,6 @@
 #include <linux/bitops.h>
 #include <linux/init_task.h>
 #include <linux/uaccess.h>
-#include <linux/build_bug.h>
 
 #include "internal.h"
 #include "mount.h"
@@ -131,7 +130,6 @@ getname_flags(const char __user *filename, int flags, int *empty)
 	struct filename *result;
 	char *kname;
 	int len;
-	BUILD_BUG_ON(offsetof(struct filename, iname) % sizeof(long) != 0);
 
 	result = audit_reusename(filename);
 	if (result)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 08f26046233e..1a775aa3e349 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -37,6 +37,8 @@
 #include <linux/uuid.h>
 #include <linux/errseq.h>
 #include <linux/ioprio.h>
+#include <linux/build_bug.h>
+#include <linux/stddef.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -2493,6 +2495,7 @@ struct filename {
 	struct audit_names	*aname;
 	const char		iname[];
 };
+static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
 
 extern long vfs_truncate(const struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
-- 
cgit v1.2.3


From 2dc0e68d5ada6d29554c760bee498c2612530d12 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 7 Mar 2019 16:27:11 -0800
Subject: linux/kernel.h: use 'short' to define USHRT_MAX, SHRT_MAX, SHRT_MIN

The commit log of 44f564a4bf6a ("ipc: add definitions of USHORT_MAX and
others") did not explain why it used (s16) and (u16) instead of (short)
and (unsigned short).

Let's use (short) and (unsigned short), which is more sensible, and more
consistent with the other MAX/MIN defines.

As you see in include/uapi/asm-generic/int-ll64.h, s16/u16 are
typedef'ed as signed/unsigned short.  So, this commit does not have a
functional change.

Remove the unneeded parentheses around ~0U while we are here.

Link: http://lkml.kernel.org/r/1549156242-20806-1-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Zhang Yanmin <yanmin.zhang@intel.com>
Cc: Alex Elder <elder@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 43b4036e36fa..a9ff66977e10 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -17,9 +17,9 @@
 #include <asm/div64.h>
 #include <uapi/linux/kernel.h>
 
-#define USHRT_MAX	((u16)(~0U))
-#define SHRT_MAX	((s16)(USHRT_MAX>>1))
-#define SHRT_MIN	((s16)(-SHRT_MAX - 1))
+#define USHRT_MAX	((unsigned short)~0U)
+#define SHRT_MAX	((short)(USHRT_MAX>>1))
+#define SHRT_MIN	((short)(-SHRT_MAX - 1))
 #define INT_MAX		((int)(~0U>>1))
 #define INT_MIN		(-INT_MAX - 1)
 #define UINT_MAX	(~0U)
-- 
cgit v1.2.3


From 54d50897d544c874562253e2a8f70dfcad22afe8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 7 Mar 2019 16:27:14 -0800
Subject: linux/kernel.h: split *_MAX and *_MIN macros into <linux/limits.h>

<linux/kernel.h> tends to be cluttered because we often put various sort
of unrelated stuff in it.  So, we have split out a sensible chunk of
code into a separate header from time to time.

This commit splits out the *_MAX and *_MIN defines.

The standard header <limits.h> contains various MAX, MIN constants
including numerial limits.  [1]

I think it makes sense to move in-kernel MAX, MIN constants into
include/linux/limits.h.

We already have include/uapi/linux/limits.h to contain some user-space
constants.  I changed its include guard to _UAPI_LINUX_LIMITS_H.  This
change has no impact to the user-space because
scripts/headers_install.sh rips off the '_UAPI' prefix from the include
guards of exported headers.

[1] http://pubs.opengroup.org/onlinepubs/009604499/basedefs/limits.h.html

Link: http://lkml.kernel.org/r/1549156242-20806-2-git-send-email-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Alex Elder <elder@linaro.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h      | 29 +----------------------------
 include/linux/limits.h      | 36 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/limits.h |  4 ++--
 3 files changed, 39 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/limits.h

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index a9ff66977e10..34a5036debd3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -4,6 +4,7 @@
 
 
 #include <stdarg.h>
+#include <linux/limits.h>
 #include <linux/linkage.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
@@ -17,34 +18,6 @@
 #include <asm/div64.h>
 #include <uapi/linux/kernel.h>
 
-#define USHRT_MAX	((unsigned short)~0U)
-#define SHRT_MAX	((short)(USHRT_MAX>>1))
-#define SHRT_MIN	((short)(-SHRT_MAX - 1))
-#define INT_MAX		((int)(~0U>>1))
-#define INT_MIN		(-INT_MAX - 1)
-#define UINT_MAX	(~0U)
-#define LONG_MAX	((long)(~0UL>>1))
-#define LONG_MIN	(-LONG_MAX - 1)
-#define ULONG_MAX	(~0UL)
-#define LLONG_MAX	((long long)(~0ULL>>1))
-#define LLONG_MIN	(-LLONG_MAX - 1)
-#define ULLONG_MAX	(~0ULL)
-#define SIZE_MAX	(~(size_t)0)
-#define PHYS_ADDR_MAX	(~(phys_addr_t)0)
-
-#define U8_MAX		((u8)~0U)
-#define S8_MAX		((s8)(U8_MAX>>1))
-#define S8_MIN		((s8)(-S8_MAX - 1))
-#define U16_MAX		((u16)~0U)
-#define S16_MAX		((s16)(U16_MAX>>1))
-#define S16_MIN		((s16)(-S16_MAX - 1))
-#define U32_MAX		((u32)~0U)
-#define S32_MAX		((s32)(U32_MAX>>1))
-#define S32_MIN		((s32)(-S32_MAX - 1))
-#define U64_MAX		((u64)~0ULL)
-#define S64_MAX		((s64)(U64_MAX>>1))
-#define S64_MIN		((s64)(-S64_MAX - 1))
-
 #define STACK_MAGIC	0xdeadbeef
 
 /**
diff --git a/include/linux/limits.h b/include/linux/limits.h
new file mode 100644
index 000000000000..76afcd24ff8c
--- /dev/null
+++ b/include/linux/limits.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_LIMITS_H
+#define _LINUX_LIMITS_H
+
+#include <uapi/linux/limits.h>
+#include <linux/types.h>
+
+#define USHRT_MAX	((unsigned short)~0U)
+#define SHRT_MAX	((short)(USHRT_MAX >> 1))
+#define SHRT_MIN	((short)(-SHRT_MAX - 1))
+#define INT_MAX		((int)(~0U >> 1))
+#define INT_MIN		(-INT_MAX - 1)
+#define UINT_MAX	(~0U)
+#define LONG_MAX	((long)(~0UL >> 1))
+#define LONG_MIN	(-LONG_MAX - 1)
+#define ULONG_MAX	(~0UL)
+#define LLONG_MAX	((long long)(~0ULL >> 1))
+#define LLONG_MIN	(-LLONG_MAX - 1)
+#define ULLONG_MAX	(~0ULL)
+#define SIZE_MAX	(~(size_t)0)
+#define PHYS_ADDR_MAX	(~(phys_addr_t)0)
+
+#define U8_MAX		((u8)~0U)
+#define S8_MAX		((s8)(U8_MAX >> 1))
+#define S8_MIN		((s8)(-S8_MAX - 1))
+#define U16_MAX		((u16)~0U)
+#define S16_MAX		((s16)(U16_MAX >> 1))
+#define S16_MIN		((s16)(-S16_MAX - 1))
+#define U32_MAX		((u32)~0U)
+#define S32_MAX		((s32)(U32_MAX >> 1))
+#define S32_MIN		((s32)(-S32_MAX - 1))
+#define U64_MAX		((u64)~0ULL)
+#define S64_MAX		((s64)(U64_MAX >> 1))
+#define S64_MIN		((s64)(-S64_MAX - 1))
+
+#endif /* _LINUX_LIMITS_H */
diff --git a/include/uapi/linux/limits.h b/include/uapi/linux/limits.h
index c3547f07605c..6bcbe3068761 100644
--- a/include/uapi/linux/limits.h
+++ b/include/uapi/linux/limits.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _LINUX_LIMITS_H
-#define _LINUX_LIMITS_H
+#ifndef _UAPI_LINUX_LIMITS_H
+#define _UAPI_LINUX_LIMITS_H
 
 #define NR_OPEN	        1024
 
-- 
cgit v1.2.3


From 3c82066e6a920b30de84fce00fb7fd701bf23f09 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 7 Mar 2019 16:27:18 -0800
Subject: include/linux/pid.h: remove next_pidmap() declaration

Commit 95846ecf9dac ("pid: replace pid bitmap implementation with IDR
API") removed next_pidmap() but left its declaration.

Remove it.  No functional change.

Link: http://lkml.kernel.org/r/20190213113736.21922-1-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gargi Sharma <gs051095@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pid.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 14a9a39da9c7..b6f4ba16065a 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -109,7 +109,6 @@ extern struct pid *find_vpid(int nr);
  */
 extern struct pid *find_get_pid(int nr);
 extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
-int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
 extern void free_pid(struct pid *pid);
-- 
cgit v1.2.3


From e0b73d7beb919ada05465a7d70e9ce134e7a6d8a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:21 -0800
Subject: linux/device.h: use DYNAMIC_DEBUG_BRANCH in dev_dbg_ratelimited

Patch series "various dynamic_debug patches", v4.

This started as an experiment to see how hard it would be to change the
four pointers in struct _ddebug into relative offsets, a la
CONFIG_GENERIC_BUG_RELATIVE_POINTERS, thus saving 16 bytes per pr_debug
site (and thus exactly making up for the extra space used by the
introduction of jump labels in 9049fc74).  I stumbled on a few things
that are probably worth fixing regardless of whether that goal is deemed
worthwhile.

Back at v3 (in November), I redid the implementation on top of the fancy
new asm-macros stuff.  Luckily enough, v3 didn't get picked up, since
the asm-macros were backed out again.  I still want to do the
relative-pointers thing eventually, but we're close to the merge window
opening, so here's just most of the "incidental" patches, some of which
also serve as preparation for the relative pointers.

This patch (of 4):

dev_dbg_ratelimited tests the dynamic debug descriptor the old-fashioned
way, and doesn't utilize the static key/jump label implementation when
CONFIG_JUMP_LABEL is set.  Use the DYNAMIC_DEBUG_BRANCH which is defined
appropriately.

Link: http://lkml.kernel.org/r/20190212214150.4807-2-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 54b586105179..f40f6064ba05 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1568,7 +1568,7 @@ do {									\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
-	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&				\
 	    __ratelimit(&_rs))						\
 		__dynamic_dev_dbg(&descriptor, dev, dev_fmt(fmt),	\
 				  ##__VA_ARGS__);			\
-- 
cgit v1.2.3


From 3f16d181174879eccc523300a53b9eac2eee6e6d Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:25 -0800
Subject: linux/net.h: use DYNAMIC_DEBUG_BRANCH in net_dbg_ratelimited

net_dbg_ratelimited tests the dynamic debug descriptor the old-fashioned
way, and doesn't utilize the static key/jump label implementation when
CONFIG_JUMP_LABEL is set.  Use the DYNAMIC_DEBUG_BRANCH which is defined
appropriately.

Link: http://lkml.kernel.org/r/20190212214150.4807-3-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index e0930678c8bf..651fca72286c 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -263,7 +263,7 @@ do {								\
 #define net_dbg_ratelimited(fmt, ...)					\
 do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
-	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&				\
 	    net_ratelimit())						\
 		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),		\
 		                   ##__VA_ARGS__);			\
-- 
cgit v1.2.3


From a9d4ab7a91165f325060d6441169ebeab08a2fec Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:29 -0800
Subject: linux/printk.h: use DYNAMIC_DEBUG_BRANCH in pr_debug_ratelimited

pr_debug_ratelimited tests the dynamic debug descriptor the
old-fashioned way, and doesn't utilize the static key/jump label
implementation when CONFIG_JUMP_LABEL is set.  Use the
DYNAMIC_DEBUG_BRANCH which is defined appropriately.

Link: http://lkml.kernel.org/r/20190212214150.4807-4-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Petr Mladek <pmladek@suse.com>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 77740a506ebb..02b5c115d89b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -461,7 +461,7 @@ do {									\
 				      DEFAULT_RATELIMIT_INTERVAL,	\
 				      DEFAULT_RATELIMIT_BURST);		\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, pr_fmt(fmt));		\
-	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) &&	\
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) &&				\
 	    __ratelimit(&_rs))						\
 		__dynamic_pr_debug(&descriptor, pr_fmt(fmt), ##__VA_ARGS__);	\
 } while (0)
-- 
cgit v1.2.3


From 2bdde670beedf73de38b8607f0b1913358af7381 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:33 -0800
Subject: dynamic_debug: consolidate DEFINE_DYNAMIC_DEBUG_METADATA definitions

Instead of defining DEFINE_DYNAMIC_DEBUG_METADATA in terms of a helper
DEFINE_DYNAMIC_DEBUG_METADATA_KEY, that needs another helper dd_key_init
to be properly defined, just make the various #ifdef branches define a
_DPRINTK_KEY_INIT that can be used directly, similar to
_DPRINTK_FLAGS_DEFAULT.

Link: http://lkml.kernel.org/r/20190212214150.4807-5-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dynamic_debug.h | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index b3419da1a776..b17725400f75 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -71,7 +71,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 			  const struct net_device *dev,
 			  const char *fmt, ...);
 
-#define DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, key, init)	\
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)		\
 	static struct _ddebug  __aligned(8)			\
 	__attribute__((section("__verbose"))) name = {		\
 		.modname = KBUILD_MODNAME,			\
@@ -80,35 +80,27 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 		.format = (fmt),				\
 		.lineno = __LINE__,				\
 		.flags = _DPRINTK_FLAGS_DEFAULT,		\
-		dd_key_init(key, init)				\
+		_DPRINTK_KEY_INIT				\
 	}
 
 #ifdef CONFIG_JUMP_LABEL
 
-#define dd_key_init(key, init) key = (init)
-
 #ifdef DEBUG
-#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
-	DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, .key.dd_key_true, \
-					  (STATIC_KEY_TRUE_INIT))
+
+#define _DPRINTK_KEY_INIT .key.dd_key_true = (STATIC_KEY_TRUE_INIT)
 
 #define DYNAMIC_DEBUG_BRANCH(descriptor) \
 	static_branch_likely(&descriptor.key.dd_key_true)
 #else
-#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
-	DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, .key.dd_key_false, \
-					  (STATIC_KEY_FALSE_INIT))
+#define _DPRINTK_KEY_INIT .key.dd_key_false = (STATIC_KEY_FALSE_INIT)
 
 #define DYNAMIC_DEBUG_BRANCH(descriptor) \
 	static_branch_unlikely(&descriptor.key.dd_key_false)
 #endif
 
-#else
-
-#define dd_key_init(key, init)
+#else /* !HAVE_JUMP_LABEL */
 
-#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) \
-	DEFINE_DYNAMIC_DEBUG_METADATA_KEY(name, fmt, 0, 0)
+#define _DPRINTK_KEY_INIT
 
 #ifdef DEBUG
 #define DYNAMIC_DEBUG_BRANCH(descriptor) \
-- 
cgit v1.2.3


From a4507fedcd2580d510d8d91ac6b99537f869f62a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:52 -0800
Subject: dynamic_debug: add static inline stub for ddebug_add_module

For symmetry with ddebug_remove_module, and to avoid a bit of ifdeffery
in module.c, move the declaration of ddebug_add_module inside #if
defined(CONFIG_DYNAMIC_DEBUG) and add a corresponding no-op stub in the
#else branch.

Link: http://lkml.kernel.org/r/20190212214150.4807-10-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dynamic_debug.h | 10 ++++++++--
 kernel/module.c               |  2 --
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index b17725400f75..3f8977cfa479 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -47,10 +47,10 @@ struct _ddebug {
 } __attribute__((aligned(8)));
 
 
-int ddebug_add_module(struct _ddebug *tab, unsigned int n,
-				const char *modname);
 
 #if defined(CONFIG_DYNAMIC_DEBUG)
+int ddebug_add_module(struct _ddebug *tab, unsigned int n,
+				const char *modname);
 extern int ddebug_remove_module(const char *mod_name);
 extern __printf(2, 3)
 void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
@@ -152,6 +152,12 @@ do {								\
 #include <linux/string.h>
 #include <linux/errno.h>
 
+static inline int ddebug_add_module(struct _ddebug *tab, unsigned int n,
+				    const char *modname)
+{
+	return 0;
+}
+
 static inline int ddebug_remove_module(const char *mod)
 {
 	return 0;
diff --git a/kernel/module.c b/kernel/module.c
index 7b1d437c1ea6..0b9aa8ab89f0 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2719,9 +2719,7 @@ static void dynamic_debug_setup(struct module *mod, struct _ddebug *debug, unsig
 {
 	if (!debug)
 		return;
-#ifdef CONFIG_DYNAMIC_DEBUG
 	ddebug_add_module(debug, num, mod->name);
-#endif
 }
 
 static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
-- 
cgit v1.2.3


From 47cdd64be4832ff645dfa0aaf6886edd555369f0 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:27:56 -0800
Subject: dynamic_debug: refactor dynamic_pr_debug and friends

For the upcoming 'define the _ddebug descriptor in assembly', we need
all the descriptors in a translation unit to have distinct names
(because asm does not understand C scope).  The easiest way to achieve
that is as usual with an extra level of macros, passing the identifier
to use to the innermost macro, generating it via __UNIQUE_ID or
something.

However, instead of repeating that exercise for dynamic_pr_debug,
dynamic_dev_dbg, dynamic_netdev_dbg and dynamic_hex_dump separately, we
can use the similarity between their bodies to implement them via a
common macro, _dynamic_func_call - though the hex_dump case requires a
slight variant, since print_hex_dump does not take the _ddebug
descriptor.  We'll also get to use that variant elsewhere (btrfs).

Link: http://lkml.kernel.org/r/20190212214150.4807-11-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Rafael J . Wysocki" <rafael.j.wysocki@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/dynamic_debug.h | 72 ++++++++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 3f8977cfa479..c2be029b9b53 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -112,40 +112,54 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 
 #endif
 
-#define dynamic_pr_debug(fmt, ...)				\
-do {								\
-	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);		\
-	if (DYNAMIC_DEBUG_BRANCH(descriptor))			\
-		__dynamic_pr_debug(&descriptor, pr_fmt(fmt),	\
-				   ##__VA_ARGS__);		\
+#define __dynamic_func_call(id, fmt, func, ...) do {	\
+	DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt);		\
+	if (DYNAMIC_DEBUG_BRANCH(id))			\
+		func(&id, ##__VA_ARGS__);		\
 } while (0)
 
-#define dynamic_dev_dbg(dev, fmt, ...)				\
-do {								\
-	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);		\
-	if (DYNAMIC_DEBUG_BRANCH(descriptor))			\
-		__dynamic_dev_dbg(&descriptor, dev, fmt,	\
-				  ##__VA_ARGS__);		\
+#define __dynamic_func_call_no_desc(id, fmt, func, ...) do {	\
+	DEFINE_DYNAMIC_DEBUG_METADATA(id, fmt);			\
+	if (DYNAMIC_DEBUG_BRANCH(id))				\
+		func(__VA_ARGS__);				\
 } while (0)
 
-#define dynamic_netdev_dbg(dev, fmt, ...)			\
-do {								\
-	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);		\
-	if (DYNAMIC_DEBUG_BRANCH(descriptor))			\
-		__dynamic_netdev_dbg(&descriptor, dev, fmt,	\
-				     ##__VA_ARGS__);		\
-} while (0)
+/*
+ * "Factory macro" for generating a call to func, guarded by a
+ * DYNAMIC_DEBUG_BRANCH. The dynamic debug decriptor will be
+ * initialized using the fmt argument. The function will be called with
+ * the address of the descriptor as first argument, followed by all
+ * the varargs. Note that fmt is repeated in invocations of this
+ * macro.
+ */
+#define _dynamic_func_call(fmt, func, ...)				\
+	__dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__)
+/*
+ * A variant that does the same, except that the descriptor is not
+ * passed as the first argument to the function; it is only called
+ * with precisely the macro's varargs.
+ */
+#define _dynamic_func_call_no_desc(fmt, func, ...)	\
+	__dynamic_func_call_no_desc(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__)
 
-#define dynamic_hex_dump(prefix_str, prefix_type, rowsize,	\
-			 groupsize, buf, len, ascii)		\
-do {								\
-	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor,		\
-		__builtin_constant_p(prefix_str) ? prefix_str : "hexdump");\
-	if (DYNAMIC_DEBUG_BRANCH(descriptor))			\
-		print_hex_dump(KERN_DEBUG, prefix_str,		\
-			       prefix_type, rowsize, groupsize,	\
-			       buf, len, ascii);		\
-} while (0)
+#define dynamic_pr_debug(fmt, ...)				\
+	_dynamic_func_call(fmt,	__dynamic_pr_debug,		\
+			   pr_fmt(fmt), ##__VA_ARGS__)
+
+#define dynamic_dev_dbg(dev, fmt, ...)				\
+	_dynamic_func_call(fmt,__dynamic_dev_dbg, 		\
+			   dev, fmt, ##__VA_ARGS__)
+
+#define dynamic_netdev_dbg(dev, fmt, ...)			\
+	_dynamic_func_call(fmt, __dynamic_netdev_dbg,		\
+			   dev, fmt, ##__VA_ARGS__)
+
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize,		\
+			 groupsize, buf, len, ascii)			\
+	_dynamic_func_call_no_desc(__builtin_constant_p(prefix_str) ? prefix_str : "hexdump", \
+				   print_hex_dump,			\
+				   KERN_DEBUG, prefix_str, prefix_type,	\
+				   rowsize, groupsize, buf, len, ascii)
 
 #else
 
-- 
cgit v1.2.3


From 6ad6e54abb5dc5cf0533c23f772dd51ede0c759a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:28:03 -0800
Subject: ACPI: use proper DYNAMIC_DEBUG_BRANCH macro

dynamic debug may be implemented via static keys, but ACPI is missing
out on that runtime benefit since it open-codes one possible definition
of DYNAMIC_DEBUG_BRANCH.

Link: http://lkml.kernel.org/r/20190212214150.4807-13-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 03b4c4f225d0..c15a007f1790 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -987,7 +987,7 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
 #define acpi_handle_debug(handle, fmt, ...)				\
 do {									\
 	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
-	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT))		\
+	if (DYNAMIC_DEBUG_BRANCH(descriptor))				\
 		__acpi_handle_debug(&descriptor, handle, pr_fmt(fmt),	\
 				##__VA_ARGS__);				\
 } while (0)
-- 
cgit v1.2.3


From 902f99a38bd1998166ceb9f26f68afca4b71c34b Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:28:07 -0800
Subject: ACPI: remove unused __acpi_handle_debug macro

If CONFIG_DYNAMIC_DEBUG is not set, acpi_handle_debug directly invokes
acpi_handle_printk (if DEBUG) or does a no-printk (if !DEBUG).  So this
macro is never used.

Link: http://lkml.kernel.org/r/20190212214150.4807-14-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Jason Baron <jbaron@akamai.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c15a007f1790..3f381e892f7c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -953,9 +953,6 @@ acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
 #if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG)
 __printf(3, 4)
 void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...);
-#else
-#define __acpi_handle_debug(descriptor, handle, fmt, ...)		\
-	acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__);
 #endif
 
 /*
-- 
cgit v1.2.3


From f1ebe04f5ba2f49fd672f12cdef46acda73cd9cf Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 7 Mar 2019 16:28:10 -0800
Subject: ACPI: implement acpi_handle_debug in terms of _dynamic_func_call

With coming changes on x86-64, all dynamic debug descriptors in a
translation unit must have distinct names.  The macro _dynamic_func_call
takes care of that.  No functional change.

Link: http://lkml.kernel.org/r/20190212214150.4807-15-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Jason Baron <jbaron@akamai.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/acpi.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3f381e892f7c..dca5f244d63d 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -982,12 +982,8 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
 #else
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #define acpi_handle_debug(handle, fmt, ...)				\
-do {									\
-	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
-	if (DYNAMIC_DEBUG_BRANCH(descriptor))				\
-		__acpi_handle_debug(&descriptor, handle, pr_fmt(fmt),	\
-				##__VA_ARGS__);				\
-} while (0)
+	_dynamic_func_call(fmt, __acpi_handle_debug,			\
+			   handle, pr_fmt(fmt), ##__VA_ARGS__)
 #else
 #define acpi_handle_debug(handle, fmt, ...)				\
 ({									\
-- 
cgit v1.2.3


From 1db604f676b2edb7b18de7881f4d5988e97be616 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vineet.gupta1@synopsys.com>
Date: Thu, 7 Mar 2019 16:28:14 -0800
Subject: include/linux/bitops.h: set_mask_bits() to return old value

| > Also, set_mask_bits is used in fs quite a bit and we can possibly come up
| > with a generic llsc based implementation (w/o the cmpxchg loop)
|
| May I also suggest changing the return value of set_mask_bits() to old.
|
| You can compute the new value given old, but you cannot compute the old
| value given new, therefore old is the better return value. Also, no
| current user seems to use the return value, so changing it is without
| risk.

Link: http://lkml.kernel.org/g/20150807110955.GH16853@twins.programming.kicks-ass.net
Link: http://lkml.kernel.org/r/1548275584-18096-4-git-send-email-vgupta@synopsys.com
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Anthony Yznaga <anthony.yznaga@oracle.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 705f7c442691..602af23b98c7 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -246,7 +246,7 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
 		new__ = (old__ & ~mask__) | bits__;		\
 	} while (cmpxchg(ptr, old__, new__) != old__);		\
 								\
-	new__;							\
+	old__;							\
 })
 #endif
 
-- 
cgit v1.2.3


From 8496ecd0bed4c70b43f39cecf0872b84360f0d14 Mon Sep 17 00:00:00 2001
From: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Date: Thu, 7 Mar 2019 16:29:06 -0800
Subject: init/calibrate.c: provide proper prototype

Sparse issues a warning:

    CHECK   init/calibrate.c
  init/calibrate.c:271:28: warning: symbol 'calibration_delay_done' was not declared. Should it be static?

The actual issue is that it's a __weak symbol that archs can override
(in fact, ARM does so), but no prototype is provided.  Let's provide one
to prevent surprises.

Link: http://lkml.kernel.org/r/18827.1548750938@turing-police.cc.vt.edu
Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delay.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index b78bab4395d8..8e6828094c1e 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -55,6 +55,7 @@ static inline void ndelay(unsigned long x)
 
 extern unsigned long lpj_fine;
 void calibrate_delay(void);
+void __attribute__((weak)) calibration_delay_done(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
 void usleep_range(unsigned long min, unsigned long max);
-- 
cgit v1.2.3


From 60d6d04ca3abb34d5e89f030dbea440d9715a168 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Thu, 7 Mar 2019 16:29:09 -0800
Subject: autofs: add ignore mount option

Add an autofs file system mount option that can be used to provide a
generic indicator to applications that the mount entry should be ignored
when displaying mount information.

In other OSes that provide autofs and that provide a mount list to user
space based on the kernel mount list a no-op mount option ("ignore" is
the one use on the most common OS) is allowed so that autofs file system
users can optionally use it.

The idea is that it be used by user space programs to exclude autofs
mounts from consideration when reading the mounts list.

Prior to the change to link /etc/mtab to /proc/self/mounts all I needed
to do to achieve this was to use mount(2) and not update the mtab but
now that no longer works.

I know the symlinking happened a long time ago and I considered doing
this then but, at the time I couldn't remember the commonly used option
name and thought persuading the various utility maintainers would be too
hard.

But now I have a RHEL request to do this for compatibility for a widely
used product so I want to go ahead with it and try and enlist the help
of some utility package maintainers.

Clearly, without the option nothing can be done so it's at least a
start.

Link: http://lkml.kernel.org/r/154725123970.11260.6113771566924907275.stgit@pluto-themaw-net
Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs/autofs_i.h         | 1 +
 fs/autofs/inode.c            | 9 ++++++++-
 include/uapi/linux/auto_fs.h | 2 +-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 3e59f0ed777b..b735f2b1e462 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -105,6 +105,7 @@ struct autofs_wait_queue {
 
 #define AUTOFS_SBI_CATATONIC	0x0001
 #define AUTOFS_SBI_STRICTEXPIRE 0x0002
+#define AUTOFS_SBI_IGNORE	0x0004
 
 struct autofs_sb_info {
 	u32 magic;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 078992eee299..8647ecaa89fc 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -89,6 +89,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root)
 		seq_printf(m, ",indirect");
 	if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE)
 		seq_printf(m, ",strictexpire");
+	if (sbi->flags & AUTOFS_SBI_IGNORE)
+		seq_printf(m, ",ignore");
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	if (sbi->pipe)
 		seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino);
@@ -111,7 +113,8 @@ static const struct super_operations autofs_sops = {
 };
 
 enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto,
-	Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire};
+	Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire,
+	Opt_ignore};
 
 static const match_table_t tokens = {
 	{Opt_fd, "fd=%u"},
@@ -124,6 +127,7 @@ static const match_table_t tokens = {
 	{Opt_direct, "direct"},
 	{Opt_offset, "offset"},
 	{Opt_strictexpire, "strictexpire"},
+	{Opt_ignore, "ignore"},
 	{Opt_err, NULL}
 };
 
@@ -206,6 +210,9 @@ static int parse_options(char *options,
 		case Opt_strictexpire:
 			sbi->flags |= AUTOFS_SBI_STRICTEXPIRE;
 			break;
+		case Opt_ignore:
+			sbi->flags |= AUTOFS_SBI_IGNORE;
+			break;
 		default:
 			return 1;
 		}
diff --git a/include/uapi/linux/auto_fs.h b/include/uapi/linux/auto_fs.h
index 082119630b49..1f7925afad2d 100644
--- a/include/uapi/linux/auto_fs.h
+++ b/include/uapi/linux/auto_fs.h
@@ -23,7 +23,7 @@
 #define AUTOFS_MIN_PROTO_VERSION	3
 #define AUTOFS_MAX_PROTO_VERSION	5
 
-#define AUTOFS_PROTO_SUBVERSION		4
+#define AUTOFS_PROTO_SUBVERSION		5
 
 /*
  * The wait_queue_token (autofs_wqt_t) is part of a structure which is passed
-- 
cgit v1.2.3


From 6eb3c3d0a52dca337e327ae8868ca1f44a712e02 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 7 Mar 2019 16:29:26 -0800
Subject: exec: increase BINPRM_BUF_SIZE to 256

Large enterprise clients often run applications out of networked file
systems where the IT mandated layout of project volumes can end up
leading to paths that are longer than 128 characters.  Bumping this up
to the next order of two solves this problem in all but the most
egregious case while still fitting into a 512b slab.

[oleg@redhat.com: update comment, per Kees]
Link: http://lkml.kernel.org/r/20181112160956.GA28472@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Ben Woodard <woodard@redhat.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                    | 2 +-
 include/uapi/linux/binfmts.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index bf0ace3841ad..2e0033348d8e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1563,7 +1563,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
 
 /*
  * Fill the binprm structure from the inode.
- * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
+ * Check permissions, then read the first BINPRM_BUF_SIZE bytes
  *
  * This may be called multiple times for binary chains (scripts for example).
  */
diff --git a/include/uapi/linux/binfmts.h b/include/uapi/linux/binfmts.h
index 4abad03a8853..689025d9c185 100644
--- a/include/uapi/linux/binfmts.h
+++ b/include/uapi/linux/binfmts.h
@@ -16,6 +16,6 @@ struct pt_regs;
 #define MAX_ARG_STRINGS 0x7FFFFFFF
 
 /* sizeof(linux_binprm->buf) */
-#define BINPRM_BUF_SIZE 128
+#define BINPRM_BUF_SIZE 256
 
 #endif /* _UAPI_LINUX_BINFMTS_H */
-- 
cgit v1.2.3


From 5ee4014af99f77dac89e01961b717d13ff1a8ea5 Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 7 Mar 2019 16:30:40 -0800
Subject: lib/lzo: implement run-length encoding

Patch series "lib/lzo: run-length encoding support", v5.

Following on from the previous lzo-rle patchset:

  https://lkml.org/lkml/2018/11/30/972

This patchset contains only the RLE patches, and should be applied on
top of the non-RLE patches ( https://lkml.org/lkml/2019/2/5/366 ).

Previously, some questions were raised around the RLE patches.  I've
done some additional benchmarking to answer these questions.  In short:

 - RLE offers significant additional performance (data-dependent)

 - I didn't measure any regressions that were clearly outside the noise

One concern with this patchset was around performance - specifically,
measuring RLE impact separately from Matt Sealey's patches (CTZ & fast
copy).  I have done some additional benchmarking which I hope clarifies
the benefits of each part of the patchset.

Firstly, I've captured some memory via /dev/fmem from a Chromebook with
many tabs open which is starting to swap, and then split this into 4178
4k pages.  I've excluded the all-zero pages (as zram does), and also the
no-zero pages (which won't tell us anything about RLE performance).
This should give a realistic test dataset for zram.  What I found was
that the data is VERY bimodal: 44% of pages in this dataset contain 5%
or fewer zeros, and 44% contain over 90% zeros (30% if you include the
no-zero pages).  This supports the idea of special-casing zeros in zram.

Next, I've benchmarked four variants of lzo on these pages (on 64-bit
Arm at max frequency): baseline LZO; baseline + Matt Sealey's patches
(aka MS); baseline + RLE only; baseline + MS + RLE.  Numbers are for
weighted roundtrip throughput (the weighting reflects that zram does
more compression than decompression).

  https://drive.google.com/file/d/1VLtLjRVxgUNuWFOxaGPwJYhl_hMQXpHe/view?usp=sharing

Matt's patches help in all cases for Arm (and no effect on Intel), as
expected.

RLE also behaves as expected: with few zeros present, it makes no
difference; above ~75%, it gives a good improvement (50 - 300 MB/s on
top of the benefit from Matt's patches).

Best performance is seen with both MS and RLE patches.

Finally, I have benchmarked the same dataset on an x86-64 device.  Here,
the MS patches make no difference (as expected); RLE helps, similarly as
on Arm.  There were no definite regressions; allowing for observational
error, 0.1% (3/4178) of cases had a regression > 1 standard deviation,
of which the largest was 4.6% (1.2 standard deviations).  I think this
is probably within the noise.

  https://drive.google.com/file/d/1xCUVwmiGD0heEMx5gcVEmLBI4eLaageV/view?usp=sharing

One point to note is that the graphs show RLE appears to help very
slightly with no zeros present! This is because the extra code causes
the clang optimiser to change code layout in a way that happens to have
a significant benefit.  Taking baseline LZO and adding a do-nothing line
like "__builtin_prefetch(out_len);" immediately before the "goto next"
has the same effect.  So this is a real, but basically spurious effect -
it's small enough not to upset the overall findings.

This patch (of 3):

When using zram, we frequently encounter long runs of zero bytes.  This
adds a special case which identifies runs of zeros and encodes them
using run-length encoding.

This is faster for both compression and decompresion.  For high-entropy
data which doesn't hit this case, impact is minimal.

Compression ratio is within a few percent in all cases.

This modifies the bitstream in a way which is backwards compatible
(i.e., we can decompress old bitstreams, but old versions of lzo cannot
decompress new bitstreams).

Link: http://lkml.kernel.org/r/20190205155944.16007-2-dave.rodgman@arm.com
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Cc: Matt Sealey <matt.sealey@arm.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <nitingupta910@gmail.com>
Cc: Richard Purdie <rpurdie@openedhand.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Sonny Rao <sonnyrao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lzo.txt           |  35 +++++++++++---
 include/linux/lzo.h             |   2 +-
 lib/lzo/lzo1x_compress.c        | 100 +++++++++++++++++++++++++++++++++++-----
 lib/lzo/lzo1x_decompress_safe.c |  75 +++++++++++++++++++++---------
 lib/lzo/lzodefs.h               |  12 ++++-
 5 files changed, 181 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index 6fa6a93d0949..306c60344ca7 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -78,16 +78,30 @@ Description
      is an implementation design choice independent on the algorithm or
      encoding.
 
+Versions
+
+0: Original version
+1: LZO-RLE
+
+Version 1 of LZO implements an extension to encode runs of zeros using run
+length encoding. This improves speed for data with many zeros, which is a
+common case for zram. This modifies the bitstream in a backwards compatible way
+(v1 can correctly decompress v0 compressed data, but v0 cannot read v1 data).
+
 Byte sequences
 ==============
 
   First byte encoding::
 
-      0..17   : follow regular instruction encoding, see below. It is worth
-                noting that codes 16 and 17 will represent a block copy from
-                the dictionary which is empty, and that they will always be
+      0..16   : follow regular instruction encoding, see below. It is worth
+                noting that code 16 will represent a block copy from the
+                dictionary which is empty, and that it will always be
                 invalid at this place.
 
+      17      : bitstream version. If the first byte is 17, the next byte
+                gives the bitstream version. If the first byte is not 17,
+                the bitstream version is 0.
+
       18..21  : copy 0..3 literals
                 state = (byte - 17) = 0..3  [ copy <state> literals ]
                 skip byte
@@ -140,6 +154,11 @@ Byte sequences
            state = S (copy S literals after this block)
            End of stream is reached if distance == 16384
 
+        In version 1, this instruction is also used to encode a run of zeros if
+        distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
+           In this case, it is followed by a fourth byte, X.
+           run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
+
       0 0 1 L L L L L  (32..63)
            Copy of small block within 16kB distance (preferably less than 34B)
            length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
@@ -165,7 +184,9 @@ Authors
 =======
 
   This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
-  analysis of the decompression code available in Linux 3.16-rc5. The code is
-  tricky, it is possible that this document contains mistakes or that a few
-  corner cases were overlooked. In any case, please report any doubt, fix, or
-  proposed updates to the author(s) so that the document can be updated.
+  analysis of the decompression code available in Linux 3.16-rc5, and updated
+  by Dave Rodgman <dave.rodgman@arm.com> on 2018/10/30 to introduce run-length
+  encoding. The code is tricky, it is possible that this document contains
+  mistakes or that a few corner cases were overlooked. In any case, please
+  report any doubt, fix, or proposed updates to the author(s) so that the
+  document can be updated.
diff --git a/include/linux/lzo.h b/include/linux/lzo.h
index 2ae27cb89927..547a86c71e1b 100644
--- a/include/linux/lzo.h
+++ b/include/linux/lzo.h
@@ -18,7 +18,7 @@
 #define LZO1X_1_MEM_COMPRESS	(8192 * sizeof(unsigned short))
 #define LZO1X_MEM_COMPRESS	LZO1X_1_MEM_COMPRESS
 
-#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3)
+#define lzo1x_worst_compress(x) ((x) + ((x) / 16) + 64 + 3 + 2)
 
 /* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
 int lzo1x_1_compress(const unsigned char *src, size_t src_len,
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 236eb21167b5..89cd561201ff 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -20,7 +20,7 @@
 static noinline size_t
 lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 		    unsigned char *out, size_t *out_len,
-		    size_t ti, void *wrkmem)
+		    size_t ti, void *wrkmem, signed char *state_offset)
 {
 	const unsigned char *ip;
 	unsigned char *op;
@@ -35,27 +35,85 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 	ip += ti < 4 ? 4 - ti : 0;
 
 	for (;;) {
-		const unsigned char *m_pos;
+		const unsigned char *m_pos = NULL;
 		size_t t, m_len, m_off;
 		u32 dv;
+		u32 run_length = 0;
 literal:
 		ip += 1 + ((ip - ii) >> 5);
 next:
 		if (unlikely(ip >= ip_end))
 			break;
 		dv = get_unaligned_le32(ip);
-		t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
-		m_pos = in + dict[t];
-		dict[t] = (lzo_dict_t) (ip - in);
-		if (unlikely(dv != get_unaligned_le32(m_pos)))
-			goto literal;
+
+		if (dv == 0) {
+			const unsigned char *ir = ip + 4;
+			const unsigned char *limit = ip_end
+				< (ip + MAX_ZERO_RUN_LENGTH + 1)
+				? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \
+	defined(LZO_FAST_64BIT_MEMORY_ACCESS)
+			u64 dv64;
+
+			for (; (ir + 32) <= limit; ir += 32) {
+				dv64 = get_unaligned((u64 *)ir);
+				dv64 |= get_unaligned((u64 *)ir + 1);
+				dv64 |= get_unaligned((u64 *)ir + 2);
+				dv64 |= get_unaligned((u64 *)ir + 3);
+				if (dv64)
+					break;
+			}
+			for (; (ir + 8) <= limit; ir += 8) {
+				dv64 = get_unaligned((u64 *)ir);
+				if (dv64) {
+#  if defined(__LITTLE_ENDIAN)
+					ir += __builtin_ctzll(dv64) >> 3;
+#  elif defined(__BIG_ENDIAN)
+					ir += __builtin_clzll(dv64) >> 3;
+#  else
+#    error "missing endian definition"
+#  endif
+					break;
+				}
+			}
+#else
+			while ((ir < (const unsigned char *)
+					ALIGN((uintptr_t)ir, 4)) &&
+					(ir < limit) && (*ir == 0))
+				ir++;
+			for (; (ir + 4) <= limit; ir += 4) {
+				dv = *((u32 *)ir);
+				if (dv) {
+#  if defined(__LITTLE_ENDIAN)
+					ir += __builtin_ctz(dv) >> 3;
+#  elif defined(__BIG_ENDIAN)
+					ir += __builtin_clz(dv) >> 3;
+#  else
+#    error "missing endian definition"
+#  endif
+					break;
+				}
+			}
+#endif
+			while (likely(ir < limit) && unlikely(*ir == 0))
+				ir++;
+			run_length = ir - ip;
+			if (run_length > MAX_ZERO_RUN_LENGTH)
+				run_length = MAX_ZERO_RUN_LENGTH;
+		} else {
+			t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
+			m_pos = in + dict[t];
+			dict[t] = (lzo_dict_t) (ip - in);
+			if (unlikely(dv != get_unaligned_le32(m_pos)))
+				goto literal;
+		}
 
 		ii -= ti;
 		ti = 0;
 		t = ip - ii;
 		if (t != 0) {
 			if (t <= 3) {
-				op[-2] |= t;
+				op[*state_offset] |= t;
 				COPY4(op, ii);
 				op += t;
 			} else if (t <= 16) {
@@ -88,6 +146,17 @@ next:
 			}
 		}
 
+		if (unlikely(run_length)) {
+			ip += run_length;
+			run_length -= MIN_ZERO_RUN_LENGTH;
+			put_unaligned_le32((run_length << 21) | 0xfffc18
+					   | (run_length & 0x7), op);
+			op += 4;
+			run_length = 0;
+			*state_offset = -3;
+			goto finished_writing_instruction;
+		}
+
 		m_len = 4;
 		{
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
@@ -170,7 +239,6 @@ m_len_done:
 
 		m_off = ip - m_pos;
 		ip += m_len;
-		ii = ip;
 		if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
 			m_off -= 1;
 			*op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
@@ -207,6 +275,9 @@ m_len_done:
 			*op++ = (m_off << 2);
 			*op++ = (m_off >> 6);
 		}
+		*state_offset = -2;
+finished_writing_instruction:
+		ii = ip;
 		goto next;
 	}
 	*out_len = op - out;
@@ -221,6 +292,12 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 	unsigned char *op = out;
 	size_t l = in_len;
 	size_t t = 0;
+	signed char state_offset = -2;
+
+	// LZO v0 will never write 17 as first byte,
+	// so this is used to version the bitstream
+	*op++ = 17;
+	*op++ = LZO_VERSION;
 
 	while (l > 20) {
 		size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
@@ -229,7 +306,8 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 			break;
 		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
 		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
-		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
+		t = lzo1x_1_do_compress(ip, ll, op, out_len,
+					t, wrkmem, &state_offset);
 		ip += ll;
 		op += *out_len;
 		l  -= ll;
@@ -242,7 +320,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 		if (op == out && t <= 238) {
 			*op++ = (17 + t);
 		} else if (t <= 3) {
-			op[-2] |= t;
+			op[state_offset] |= t;
 		} else if (t <= 18) {
 			*op++ = (t - 3);
 		} else {
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index a1c387f6afba..6d2600ea3b55 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -46,11 +46,23 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 	const unsigned char * const ip_end = in + in_len;
 	unsigned char * const op_end = out + *out_len;
 
+	unsigned char bitstream_version;
+
 	op = out;
 	ip = in;
 
 	if (unlikely(in_len < 3))
 		goto input_overrun;
+
+	if (likely(*ip == 17)) {
+		bitstream_version = ip[1];
+		ip += 2;
+		if (unlikely(in_len < 5))
+			goto input_overrun;
+	} else {
+		bitstream_version = 0;
+	}
+
 	if (*ip > 17) {
 		t = *ip++ - 17;
 		if (t < 4) {
@@ -154,32 +166,49 @@ copy_literal_run:
 			m_pos -= next >> 2;
 			next &= 3;
 		} else {
-			m_pos = op;
-			m_pos -= (t & 8) << 11;
-			t = (t & 7) + (3 - 1);
-			if (unlikely(t == 2)) {
-				size_t offset;
-				const unsigned char *ip_last = ip;
+			NEED_IP(2);
+			next = get_unaligned_le16(ip);
+			if (((next & 0xfffc) == 0xfffc) &&
+			    ((t & 0xf8) == 0x18) &&
+			    likely(bitstream_version)) {
+				NEED_IP(3);
+				t &= 7;
+				t |= ip[2] << 3;
+				t += MIN_ZERO_RUN_LENGTH;
+				NEED_OP(t);
+				memset(op, 0, t);
+				op += t;
+				next &= 3;
+				ip += 3;
+				goto match_next;
+			} else {
+				m_pos = op;
+				m_pos -= (t & 8) << 11;
+				t = (t & 7) + (3 - 1);
+				if (unlikely(t == 2)) {
+					size_t offset;
+					const unsigned char *ip_last = ip;
 
-				while (unlikely(*ip == 0)) {
-					ip++;
-					NEED_IP(1);
-				}
-				offset = ip - ip_last;
-				if (unlikely(offset > MAX_255_COUNT))
-					return LZO_E_ERROR;
+					while (unlikely(*ip == 0)) {
+						ip++;
+						NEED_IP(1);
+					}
+					offset = ip - ip_last;
+					if (unlikely(offset > MAX_255_COUNT))
+						return LZO_E_ERROR;
 
-				offset = (offset << 8) - offset;
-				t += offset + 7 + *ip++;
-				NEED_IP(2);
+					offset = (offset << 8) - offset;
+					t += offset + 7 + *ip++;
+					NEED_IP(2);
+					next = get_unaligned_le16(ip);
+				}
+				ip += 2;
+				m_pos -= next >> 2;
+				next &= 3;
+				if (m_pos == op)
+					goto eof_found;
+				m_pos -= 0x4000;
 			}
-			next = get_unaligned_le16(ip);
-			ip += 2;
-			m_pos -= next >> 2;
-			next &= 3;
-			if (m_pos == op)
-				goto eof_found;
-			m_pos -= 0x4000;
 		}
 		TEST_LB(m_pos);
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index fa0a45fed8c4..ac64159ee344 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -13,6 +13,12 @@
  */
 
 
+/* Version
+ * 0: original lzo version
+ * 1: lzo with support for RLE
+ */
+#define LZO_VERSION 1
+
 #define COPY4(dst, src)	\
 		put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
 #if defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
@@ -28,6 +34,7 @@
 #elif defined(CONFIG_X86_64) || defined(CONFIG_ARM64)
 #define LZO_USE_CTZ64	1
 #define LZO_USE_CTZ32	1
+#define LZO_FAST_64BIT_MEMORY_ACCESS
 #elif defined(CONFIG_X86) || defined(CONFIG_PPC)
 #define LZO_USE_CTZ32	1
 #elif defined(CONFIG_ARM) && (__LINUX_ARM_ARCH__ >= 5)
@@ -37,7 +44,7 @@
 #define M1_MAX_OFFSET	0x0400
 #define M2_MAX_OFFSET	0x0800
 #define M3_MAX_OFFSET	0x4000
-#define M4_MAX_OFFSET	0xbfff
+#define M4_MAX_OFFSET	0xbffe
 
 #define M1_MIN_LEN	2
 #define M1_MAX_LEN	2
@@ -53,6 +60,9 @@
 #define M3_MARKER	32
 #define M4_MARKER	16
 
+#define MIN_ZERO_RUN_LENGTH	4
+#define MAX_ZERO_RUN_LENGTH	(2047 + MIN_ZERO_RUN_LENGTH)
+
 #define lzo_dict_t      unsigned short
 #define D_BITS		13
 #define D_SIZE		(1u << D_BITS)
-- 
cgit v1.2.3


From 45ec975efb527625629d123f30597673889f52ca Mon Sep 17 00:00:00 2001
From: Dave Rodgman <dave.rodgman@arm.com>
Date: Thu, 7 Mar 2019 16:30:44 -0800
Subject: lib/lzo: separate lzo-rle from lzo

To prevent any issues with persistent data, separate lzo-rle from lzo so
that it is treated as a separate algorithm, and lzo is still available.

Link: http://lkml.kernel.org/r/20190205155944.16007-3-dave.rodgman@arm.com
Signed-off-by: Dave Rodgman <dave.rodgman@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Markus F.X.J. Oberhumer <markus@oberhumer.com>
Cc: Matt Sealey <matt.sealey@arm.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <nitingupta910@gmail.com>
Cc: Richard Purdie <rpurdie@openedhand.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Sonny Rao <sonnyrao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/lzo.txt      |  12 ++--
 crypto/Makefile            |   2 +-
 crypto/lzo-rle.c           | 175 +++++++++++++++++++++++++++++++++++++++++++++
 crypto/tcrypt.c            |   4 +-
 drivers/block/zram/zcomp.c |   1 +
 include/linux/lzo.h        |   4 ++
 lib/lzo/lzo1x_compress.c   |  42 ++++++++---
 lib/lzo/lzodefs.h          |   3 +-
 8 files changed, 226 insertions(+), 17 deletions(-)
 create mode 100644 crypto/lzo-rle.c

(limited to 'include')

diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
index 306c60344ca7..f79934225d8d 100644
--- a/Documentation/lzo.txt
+++ b/Documentation/lzo.txt
@@ -88,6 +88,10 @@ length encoding. This improves speed for data with many zeros, which is a
 common case for zram. This modifies the bitstream in a backwards compatible way
 (v1 can correctly decompress v0 compressed data, but v0 cannot read v1 data).
 
+For maximum compatibility, both versions are available under different names
+(lzo and lzo-rle). Differences in the encoding are noted in this document with
+e.g.: version 1 only.
+
 Byte sequences
 ==============
 
@@ -99,8 +103,8 @@ Byte sequences
                 invalid at this place.
 
       17      : bitstream version. If the first byte is 17, the next byte
-                gives the bitstream version. If the first byte is not 17,
-                the bitstream version is 0.
+                gives the bitstream version (version 1 only). If the first byte
+                is not 17, the bitstream version is 0.
 
       18..21  : copy 0..3 literals
                 state = (byte - 17) = 0..3  [ copy <state> literals ]
@@ -154,8 +158,8 @@ Byte sequences
            state = S (copy S literals after this block)
            End of stream is reached if distance == 16384
 
-        In version 1, this instruction is also used to encode a run of zeros if
-        distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
+        In version 1 only, this instruction is also used to encode a run of
+        zeros if distance = 0xbfff, i.e. H = 1 and the D bits are all 1.
            In this case, it is followed by a fourth byte, X.
            run length = ((X << 3) | (0 0 0 0 0 L L L)) + 4.
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 799ed5e94606..fb5bf2a3a666 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -128,7 +128,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
 obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
-obj-$(CONFIG_CRYPTO_LZO) += lzo.o
+obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
diff --git a/crypto/lzo-rle.c b/crypto/lzo-rle.c
new file mode 100644
index 000000000000..ea9c75b1db49
--- /dev/null
+++ b/crypto/lzo-rle.c
@@ -0,0 +1,175 @@
+/*
+ * Cryptographic API.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/lzo.h>
+#include <crypto/internal/scompress.h>
+
+struct lzorle_ctx {
+	void *lzorle_comp_mem;
+};
+
+static void *lzorle_alloc_ctx(struct crypto_scomp *tfm)
+{
+	void *ctx;
+
+	ctx = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	return ctx;
+}
+
+static int lzorle_init(struct crypto_tfm *tfm)
+{
+	struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->lzorle_comp_mem = lzorle_alloc_ctx(NULL);
+	if (IS_ERR(ctx->lzorle_comp_mem))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void lzorle_free_ctx(struct crypto_scomp *tfm, void *ctx)
+{
+	kvfree(ctx);
+}
+
+static void lzorle_exit(struct crypto_tfm *tfm)
+{
+	struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lzorle_free_ctx(NULL, ctx->lzorle_comp_mem);
+}
+
+static int __lzorle_compress(const u8 *src, unsigned int slen,
+			  u8 *dst, unsigned int *dlen, void *ctx)
+{
+	size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
+	int err;
+
+	err = lzorle1x_1_compress(src, slen, dst, &tmp_len, ctx);
+
+	if (err != LZO_E_OK)
+		return -EINVAL;
+
+	*dlen = tmp_len;
+	return 0;
+}
+
+static int lzorle_compress(struct crypto_tfm *tfm, const u8 *src,
+			unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	struct lzorle_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	return __lzorle_compress(src, slen, dst, dlen, ctx->lzorle_comp_mem);
+}
+
+static int lzorle_scompress(struct crypto_scomp *tfm, const u8 *src,
+			 unsigned int slen, u8 *dst, unsigned int *dlen,
+			 void *ctx)
+{
+	return __lzorle_compress(src, slen, dst, dlen, ctx);
+}
+
+static int __lzorle_decompress(const u8 *src, unsigned int slen,
+			    u8 *dst, unsigned int *dlen)
+{
+	int err;
+	size_t tmp_len = *dlen; /* size_t(ulong) <-> uint on 64 bit */
+
+	err = lzo1x_decompress_safe(src, slen, dst, &tmp_len);
+
+	if (err != LZO_E_OK)
+		return -EINVAL;
+
+	*dlen = tmp_len;
+	return 0;
+}
+
+static int lzorle_decompress(struct crypto_tfm *tfm, const u8 *src,
+			  unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+	return __lzorle_decompress(src, slen, dst, dlen);
+}
+
+static int lzorle_sdecompress(struct crypto_scomp *tfm, const u8 *src,
+			   unsigned int slen, u8 *dst, unsigned int *dlen,
+			   void *ctx)
+{
+	return __lzorle_decompress(src, slen, dst, dlen);
+}
+
+static struct crypto_alg alg = {
+	.cra_name		= "lzo-rle",
+	.cra_flags		= CRYPTO_ALG_TYPE_COMPRESS,
+	.cra_ctxsize		= sizeof(struct lzorle_ctx),
+	.cra_module		= THIS_MODULE,
+	.cra_init		= lzorle_init,
+	.cra_exit		= lzorle_exit,
+	.cra_u			= { .compress = {
+	.coa_compress		= lzorle_compress,
+	.coa_decompress		= lzorle_decompress } }
+};
+
+static struct scomp_alg scomp = {
+	.alloc_ctx		= lzorle_alloc_ctx,
+	.free_ctx		= lzorle_free_ctx,
+	.compress		= lzorle_scompress,
+	.decompress		= lzorle_sdecompress,
+	.base			= {
+		.cra_name	= "lzo-rle",
+		.cra_driver_name = "lzo-rle-scomp",
+		.cra_module	 = THIS_MODULE,
+	}
+};
+
+static int __init lzorle_mod_init(void)
+{
+	int ret;
+
+	ret = crypto_register_alg(&alg);
+	if (ret)
+		return ret;
+
+	ret = crypto_register_scomp(&scomp);
+	if (ret) {
+		crypto_unregister_alg(&alg);
+		return ret;
+	}
+
+	return ret;
+}
+
+static void __exit lzorle_mod_fini(void)
+{
+	crypto_unregister_alg(&alg);
+	crypto_unregister_scomp(&scomp);
+}
+
+module_init(lzorle_mod_init);
+module_exit(lzorle_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZO-RLE Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lzo-rle");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index e7fb87e114a5..1ea2d5007ff5 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,8 +76,8 @@ static char *check[] = {
 	"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
 	"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta",  "fcrypt",
 	"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
-	"lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
-	"streebog256", "streebog512",
+	"lzo", "lzo-rle", "cts", "sha3-224", "sha3-256", "sha3-384",
+	"sha3-512", "streebog256", "streebog512",
 	NULL
 };
 
diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 4ed0a78fdc09..4d9a38890965 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -20,6 +20,7 @@
 
 static const char * const backends[] = {
 	"lzo",
+	"lzo-rle",
 #if IS_ENABLED(CONFIG_CRYPTO_LZ4)
 	"lz4",
 #endif
diff --git a/include/linux/lzo.h b/include/linux/lzo.h
index 547a86c71e1b..e95c7d1092b2 100644
--- a/include/linux/lzo.h
+++ b/include/linux/lzo.h
@@ -24,6 +24,10 @@
 int lzo1x_1_compress(const unsigned char *src, size_t src_len,
 		     unsigned char *dst, size_t *dst_len, void *wrkmem);
 
+/* This requires 'wrkmem' of size LZO1X_1_MEM_COMPRESS */
+int lzorle1x_1_compress(const unsigned char *src, size_t src_len,
+		     unsigned char *dst, size_t *dst_len, void *wrkmem);
+
 /* safe decompression with overrun testing */
 int lzo1x_decompress_safe(const unsigned char *src, size_t src_len,
 			  unsigned char *dst, size_t *dst_len);
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 89cd561201ff..4525fb094844 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -20,7 +20,8 @@
 static noinline size_t
 lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 		    unsigned char *out, size_t *out_len,
-		    size_t ti, void *wrkmem, signed char *state_offset)
+		    size_t ti, void *wrkmem, signed char *state_offset,
+		    const unsigned char bitstream_version)
 {
 	const unsigned char *ip;
 	unsigned char *op;
@@ -46,7 +47,7 @@ next:
 			break;
 		dv = get_unaligned_le32(ip);
 
-		if (dv == 0) {
+		if (dv == 0 && bitstream_version) {
 			const unsigned char *ir = ip + 4;
 			const unsigned char *limit = ip_end
 				< (ip + MAX_ZERO_RUN_LENGTH + 1)
@@ -284,30 +285,36 @@ finished_writing_instruction:
 	return in_end - (ii - ti);
 }
 
-int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len,
 		     unsigned char *out, size_t *out_len,
-		     void *wrkmem)
+		     void *wrkmem, const unsigned char bitstream_version)
 {
 	const unsigned char *ip = in;
 	unsigned char *op = out;
 	size_t l = in_len;
 	size_t t = 0;
 	signed char state_offset = -2;
+	unsigned int m4_max_offset;
 
 	// LZO v0 will never write 17 as first byte,
 	// so this is used to version the bitstream
-	*op++ = 17;
-	*op++ = LZO_VERSION;
+	if (bitstream_version > 0) {
+		*op++ = 17;
+		*op++ = bitstream_version;
+		m4_max_offset = M4_MAX_OFFSET_V1;
+	} else {
+		m4_max_offset = M4_MAX_OFFSET_V0;
+	}
 
 	while (l > 20) {
-		size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
+		size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1);
 		uintptr_t ll_end = (uintptr_t) ip + ll;
 		if ((ll_end + ((t + ll) >> 5)) <= ll_end)
 			break;
 		BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
 		memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
-		t = lzo1x_1_do_compress(ip, ll, op, out_len,
-					t, wrkmem, &state_offset);
+		t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem,
+					&state_offset, bitstream_version);
 		ip += ll;
 		op += *out_len;
 		l  -= ll;
@@ -351,7 +358,24 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 	*out_len = op - out;
 	return LZO_E_OK;
 }
+
+int lzo1x_1_compress(const unsigned char *in, size_t in_len,
+		     unsigned char *out, size_t *out_len,
+		     void *wrkmem)
+{
+	return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0);
+}
+
+int lzorle1x_1_compress(const unsigned char *in, size_t in_len,
+		     unsigned char *out, size_t *out_len,
+		     void *wrkmem)
+{
+	return lzogeneric1x_1_compress(in, in_len, out, out_len,
+				       wrkmem, LZO_VERSION);
+}
+
 EXPORT_SYMBOL_GPL(lzo1x_1_compress);
+EXPORT_SYMBOL_GPL(lzorle1x_1_compress);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X-1 Compressor");
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index ac64159ee344..b60851fcf6ce 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -44,7 +44,8 @@
 #define M1_MAX_OFFSET	0x0400
 #define M2_MAX_OFFSET	0x0800
 #define M3_MAX_OFFSET	0x4000
-#define M4_MAX_OFFSET	0xbffe
+#define M4_MAX_OFFSET_V0	0xbfff
+#define M4_MAX_OFFSET_V1	0xbffe
 
 #define M1_MIN_LEN	2
 #define M1_MAX_LEN	2
-- 
cgit v1.2.3


From 3d3539018d2cbd12e5af4a132636ee7fd8d43ef0 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 7 Mar 2019 16:31:14 -0800
Subject: mm: create the new vm_fault_t type

Page fault handlers are supposed to return VM_FAULT codes, but some
drivers/file systems mistakenly return error numbers.  Now that all
drivers/file systems have been converted to use the vm_fault_t return
type, change the type definition to no longer be compatible with 'int'.
By making it an unsigned int, the function prototype becomes
incompatible with a function which returns int.  Sparse will detect any
attempts to return a value which is not a VM_FAULT code.

VM_FAULT_SET_HINDEX and VM_FAULT_GET_HINDEX values are changed to avoid
conflict with other VM_FAULT codes.

[jrdr.linux@gmail.com: fix warnings]
  Link: http://lkml.kernel.org/r/20190109183742.GA24326@jordon-HP-15-Notebook-PC
Link: http://lkml.kernel.org/r/20190108183041.GA12137@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/fault.c      |  2 +-
 include/linux/mm.h       | 46 ------------------------------
 include/linux/mm_types.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9d5c75f02295..667f1da36208 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1031,7 +1031,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
 
 static void
 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
-	  unsigned int fault)
+	  vm_fault_t fault)
 {
 	struct task_struct *tsk = current;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 20ec56f8e2bb..5801ee849f36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1322,52 +1322,6 @@ static inline void clear_page_pfmemalloc(struct page *page)
 	page->index = 0;
 }
 
-/*
- * Different kinds of faults, as returned by handle_mm_fault().
- * Used to decide whether a process gets delivered SIGBUS or
- * just gets major/minor fault counters bumped up.
- */
-
-#define VM_FAULT_OOM	0x0001
-#define VM_FAULT_SIGBUS	0x0002
-#define VM_FAULT_MAJOR	0x0004
-#define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
-#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned small page */
-#define VM_FAULT_HWPOISON_LARGE 0x0020  /* Hit poisoned large page. Index encoded in upper bits */
-#define VM_FAULT_SIGSEGV 0x0040
-
-#define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
-#define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
-#define VM_FAULT_RETRY	0x0400	/* ->fault blocked, must retry */
-#define VM_FAULT_FALLBACK 0x0800	/* huge page fault failed, fall back to small */
-#define VM_FAULT_DONE_COW   0x1000	/* ->fault has fully handled COW */
-#define VM_FAULT_NEEDDSYNC  0x2000	/* ->fault did not modify page tables
-					 * and needs fsync() to complete (for
-					 * synchronous page faults in DAX) */
-
-#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \
-			 VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \
-			 VM_FAULT_FALLBACK)
-
-#define VM_FAULT_RESULT_TRACE \
-	{ VM_FAULT_OOM,			"OOM" }, \
-	{ VM_FAULT_SIGBUS,		"SIGBUS" }, \
-	{ VM_FAULT_MAJOR,		"MAJOR" }, \
-	{ VM_FAULT_WRITE,		"WRITE" }, \
-	{ VM_FAULT_HWPOISON,		"HWPOISON" }, \
-	{ VM_FAULT_HWPOISON_LARGE,	"HWPOISON_LARGE" }, \
-	{ VM_FAULT_SIGSEGV,		"SIGSEGV" }, \
-	{ VM_FAULT_NOPAGE,		"NOPAGE" }, \
-	{ VM_FAULT_LOCKED,		"LOCKED" }, \
-	{ VM_FAULT_RETRY,		"RETRY" }, \
-	{ VM_FAULT_FALLBACK,		"FALLBACK" }, \
-	{ VM_FAULT_DONE_COW,		"DONE_COW" }, \
-	{ VM_FAULT_NEEDDSYNC,		"NEEDDSYNC" }
-
-/* Encode hstate index for a hwpoisoned large page */
-#define VM_FAULT_SET_HINDEX(x) ((x) << 12)
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 12) & 0xf)
-
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
  */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ab9b48420200..86e7a7a46353 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -22,7 +22,6 @@
 #endif
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
-typedef int vm_fault_t;
 
 struct address_space;
 struct mem_cgroup;
@@ -621,6 +620,78 @@ static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
 
 struct vm_fault;
 
+/**
+ * typedef vm_fault_t - Return type for page fault handlers.
+ *
+ * Page fault handlers return a bitmask of %VM_FAULT values.
+ */
+typedef __bitwise unsigned int vm_fault_t;
+
+/**
+ * enum vm_fault_reason - Page fault handlers return a bitmask of
+ * these values to tell the core VM what happened when handling the
+ * fault. Used to decide whether a process gets delivered SIGBUS or
+ * just gets major/minor fault counters bumped up.
+ *
+ * @VM_FAULT_OOM:		Out Of Memory
+ * @VM_FAULT_SIGBUS:		Bad access
+ * @VM_FAULT_MAJOR:		Page read from storage
+ * @VM_FAULT_WRITE:		Special case for get_user_pages
+ * @VM_FAULT_HWPOISON:		Hit poisoned small page
+ * @VM_FAULT_HWPOISON_LARGE:	Hit poisoned large page. Index encoded
+ *				in upper bits
+ * @VM_FAULT_SIGSEGV:		segmentation fault
+ * @VM_FAULT_NOPAGE:		->fault installed the pte, not return page
+ * @VM_FAULT_LOCKED:		->fault locked the returned page
+ * @VM_FAULT_RETRY:		->fault blocked, must retry
+ * @VM_FAULT_FALLBACK:		huge page fault failed, fall back to small
+ * @VM_FAULT_DONE_COW:		->fault has fully handled COW
+ * @VM_FAULT_NEEDDSYNC:		->fault did not modify page tables and needs
+ *				fsync() to complete (for synchronous page faults
+ *				in DAX)
+ * @VM_FAULT_HINDEX_MASK:	mask HINDEX value
+ *
+ */
+enum vm_fault_reason {
+	VM_FAULT_OOM            = (__force vm_fault_t)0x000001,
+	VM_FAULT_SIGBUS         = (__force vm_fault_t)0x000002,
+	VM_FAULT_MAJOR          = (__force vm_fault_t)0x000004,
+	VM_FAULT_WRITE          = (__force vm_fault_t)0x000008,
+	VM_FAULT_HWPOISON       = (__force vm_fault_t)0x000010,
+	VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
+	VM_FAULT_SIGSEGV        = (__force vm_fault_t)0x000040,
+	VM_FAULT_NOPAGE         = (__force vm_fault_t)0x000100,
+	VM_FAULT_LOCKED         = (__force vm_fault_t)0x000200,
+	VM_FAULT_RETRY          = (__force vm_fault_t)0x000400,
+	VM_FAULT_FALLBACK       = (__force vm_fault_t)0x000800,
+	VM_FAULT_DONE_COW       = (__force vm_fault_t)0x001000,
+	VM_FAULT_NEEDDSYNC      = (__force vm_fault_t)0x002000,
+	VM_FAULT_HINDEX_MASK    = (__force vm_fault_t)0x0f0000,
+};
+
+/* Encode hstate index for a hwpoisoned large page */
+#define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
+#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+
+#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |	\
+			VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |	\
+			VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
+
+#define VM_FAULT_RESULT_TRACE \
+	{ VM_FAULT_OOM,                 "OOM" },	\
+	{ VM_FAULT_SIGBUS,              "SIGBUS" },	\
+	{ VM_FAULT_MAJOR,               "MAJOR" },	\
+	{ VM_FAULT_WRITE,               "WRITE" },	\
+	{ VM_FAULT_HWPOISON,            "HWPOISON" },	\
+	{ VM_FAULT_HWPOISON_LARGE,      "HWPOISON_LARGE" },	\
+	{ VM_FAULT_SIGSEGV,             "SIGSEGV" },	\
+	{ VM_FAULT_NOPAGE,              "NOPAGE" },	\
+	{ VM_FAULT_LOCKED,              "LOCKED" },	\
+	{ VM_FAULT_RETRY,               "RETRY" },	\
+	{ VM_FAULT_FALLBACK,            "FALLBACK" },	\
+	{ VM_FAULT_DONE_COW,            "DONE_COW" },	\
+	{ VM_FAULT_NEEDDSYNC,           "NEEDDSYNC" }
+
 struct vm_special_mapping {
 	const char *name;	/* The name, e.g. "[vdso]". */
 
-- 
cgit v1.2.3


From 62461ac2e5b6520b6d65fc6d7d7b4b8df4b848d8 Mon Sep 17 00:00:00 2001
From: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Date: Thu, 7 Mar 2019 16:31:28 -0800
Subject: include/linux/relay.h: fix percpu annotation in struct rchan

The percpu member of this structure is declared as:
	struct ... ** __percpu member;
So its type is:
	__percpu pointer to pointer to struct ...

But looking at how it's used, its type should be:
	pointer to __percpu pointer to struct ...
and it should thus be declared as:
	struct ... * __percpu *member;

So fix the placement of '__percpu' in the definition of this
structures.

This silents a few Sparse's warnings like:
	warning: incorrect type in initializer (different address spaces)
	  expected void const [noderef] <asn:3> *__vpp_verify
	  got struct sched_domain **

Link: http://lkml.kernel.org/r/20190118144902.79065-1-luc.vanoostenryck@gmail.com
Fixes: 017c59c042d01 ("relay: Use per CPU constructs for the relay channel buffer pointers")
Signed-off-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Jens Axboe <axboe@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/relay.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/relay.h b/include/linux/relay.h
index e1bdf01a86e2..c759f96e39c1 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -66,7 +66,7 @@ struct rchan
 	struct kref kref;		/* channel refcount */
 	void *private_data;		/* for user-defined data */
 	size_t last_toobig;		/* tried to log event > subbuf size */
-	struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */
+	struct rchan_buf * __percpu *buf; /* per-cpu channel buffers */
 	int is_global;			/* One global buffer ? */
 	struct list_head list;		/* for channel list */
 	struct dentry *parent;		/* parent dentry passed to open */
-- 
cgit v1.2.3


From 40ba1d9b4d19796afc9b7ece872f5f3e8f5e2c13 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Mar 2019 00:58:53 +0100
Subject: netfilter: nf_tables: fix set double-free in abort path

The abort path can cause a double-free of an anonymous set.
Added-and-to-be-aborted rule looks like this:

udp dport { 137, 138 } drop

The to-be-aborted transaction list looks like this:

newset
newsetelem
newsetelem
rule

This gets walked in reverse order, so first pass disables the rule, the
set elements, then the set.

After synchronize_rcu(), we then destroy those in same order: rule, set
element, set element, newset.

Problem is that the anonymous set has already been bound to the rule, so
the rule (lookup expression destructor) already frees the set, when then
cause use-after-free when trying to delete the elements from this set,
then try to free the set again when handling the newset expression.

Rule releases the bound set in first place from the abort path, this
causes the use-after-free on set element removal when undoing the new
element transactions. To handle this, skip new element transaction if
set is bound from the abort path.

This is still causes the use-after-free on set element removal.  To
handle this, remove transaction from the list when the set is already
bound.

Joint work with Florian Westphal.

Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path")
Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1325
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  6 ++----
 net/netfilter/nf_tables_api.c     | 17 +++++++++++------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index c331e96a713b..ed0687b0e0f4 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -416,7 +416,8 @@ struct nft_set {
 	unsigned char			*udata;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
-	u16				flags:14,
+	u16				flags:13,
+					bound:1,
 					genmask:2;
 	u8				klen;
 	u8				dlen;
@@ -1344,15 +1345,12 @@ struct nft_trans_rule {
 struct nft_trans_set {
 	struct nft_set			*set;
 	u32				set_id;
-	bool				bound;
 };
 
 #define nft_trans_set(trans)	\
 	(((struct nft_trans_set *)trans->data)->set)
 #define nft_trans_set_id(trans)	\
 	(((struct nft_trans_set *)trans->data)->set_id)
-#define nft_trans_set_bound(trans)	\
-	(((struct nft_trans_set *)trans->data)->bound)
 
 struct nft_trans_chain {
 	bool				update;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index faf6bd10a19f..1333bf97dc26 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
 	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
 		if (trans->msg_type == NFT_MSG_NEWSET &&
 		    nft_trans_set(trans) == set) {
-			nft_trans_set_bound(trans) = true;
+			set->bound = true;
 			break;
 		}
 	}
@@ -6709,8 +6709,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 		nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
 		break;
 	case NFT_MSG_NEWSET:
-		if (!nft_trans_set_bound(trans))
-			nft_set_destroy(nft_trans_set(trans));
+		nft_set_destroy(nft_trans_set(trans));
 		break;
 	case NFT_MSG_NEWSETELEM:
 		nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6783,8 +6782,11 @@ static int __nf_tables_abort(struct net *net)
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
-			if (!nft_trans_set_bound(trans))
-				list_del_rcu(&nft_trans_set(trans)->list);
+			if (nft_trans_set(trans)->bound) {
+				nft_trans_destroy(trans);
+				break;
+			}
+			list_del_rcu(&nft_trans_set(trans)->list);
 			break;
 		case NFT_MSG_DELSET:
 			trans->ctx.table->use++;
@@ -6792,8 +6794,11 @@ static int __nf_tables_abort(struct net *net)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set(trans)->bound) {
+				nft_trans_destroy(trans);
+				break;
+			}
 			te = (struct nft_trans_elem *)trans->data;
-
 			te->set->ops->remove(net, te->set, &te->elem);
 			atomic_dec(&te->set->nelems);
 			break;
-- 
cgit v1.2.3


From 930c9f9125c85b5134b3e711bc252ecc094708e3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 8 Mar 2019 12:48:39 +0000
Subject: rxrpc: Fix client call connect/disconnect race

rxrpc_disconnect_client_call() reads the call's connection ID protocol
value (call->cid) as part of that function's variable declarations.  This
is bad because it's not inside the locked section and so may race with
someone granting use of the channel to the call.

This manifests as an assertion failure (see below) where the call in the
presumed channel (0 because call->cid wasn't set when we read it) doesn't
match the call attached to the channel we were actually granted (if 1, 2 or
3).

Fix this by moving the read and dependent calculations inside of the
channel_lock section.  Also, only set the channel number and pointer
variables if cid is not zero (ie. unset).

This problem can be induced by injecting an occasional error in
rxrpc_wait_for_channel() before the call to schedule().

Make two further changes also:

 (1) Add a trace for wait failure in rxrpc_connect_call().

 (2) Drop channel_lock before BUG'ing in the case of the assertion failure.

The failure causes a trace akin to the following:

rxrpc: Assertion failed - 18446612685268945920(0xffff8880beab8c00) == 18446612685268621312(0xffff8880bea69800) is false
------------[ cut here ]------------
kernel BUG at net/rxrpc/conn_client.c:824!
...
RIP: 0010:rxrpc_disconnect_client_call+0x2bf/0x99d
...
Call Trace:
 rxrpc_connect_call+0x902/0x9b3
 ? wake_up_q+0x54/0x54
 rxrpc_new_client_call+0x3a0/0x751
 ? rxrpc_kernel_begin_call+0x141/0x1bc
 ? afs_alloc_call+0x1b5/0x1b5
 rxrpc_kernel_begin_call+0x141/0x1bc
 afs_make_call+0x20c/0x525
 ? afs_alloc_call+0x1b5/0x1b5
 ? __lock_is_held+0x40/0x71
 ? lockdep_init_map+0xaf/0x193
 ? lockdep_init_map+0xaf/0x193
 ? __lock_is_held+0x40/0x71
 ? yfs_fs_fetch_data+0x33b/0x34a
 yfs_fs_fetch_data+0x33b/0x34a
 afs_fetch_data+0xdc/0x3b7
 afs_read_dir+0x52d/0x97f
 afs_dir_iterate+0xa0/0x661
 ? iterate_dir+0x63/0x141
 iterate_dir+0xa2/0x141
 ksys_getdents64+0x9f/0x11b
 ? filldir+0x111/0x111
 ? do_syscall_64+0x3e/0x1a0
 __x64_sys_getdents64+0x16/0x19
 do_syscall_64+0x7d/0x1a0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 45025bceef17 ("rxrpc: Improve management and caching of client connection objects")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h |  2 ++
 net/rxrpc/conn_client.c      | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 5b50fe4906d2..7b60fd186cfe 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -76,6 +76,7 @@ enum rxrpc_client_trace {
 	rxrpc_client_chan_disconnect,
 	rxrpc_client_chan_pass,
 	rxrpc_client_chan_unstarted,
+	rxrpc_client_chan_wait_failed,
 	rxrpc_client_cleanup,
 	rxrpc_client_count,
 	rxrpc_client_discard,
@@ -276,6 +277,7 @@ enum rxrpc_tx_point {
 	EM(rxrpc_client_chan_disconnect,	"ChDisc") \
 	EM(rxrpc_client_chan_pass,		"ChPass") \
 	EM(rxrpc_client_chan_unstarted,		"ChUnst") \
+	EM(rxrpc_client_chan_wait_failed,	"ChWtFl") \
 	EM(rxrpc_client_cleanup,		"Clean ") \
 	EM(rxrpc_client_count,			"Count ") \
 	EM(rxrpc_client_discard,		"Discar") \
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index b2adfa825363..f307a05076e1 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -704,6 +704,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 
 	ret = rxrpc_wait_for_channel(call, gfp);
 	if (ret < 0) {
+		trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
 		rxrpc_disconnect_client_call(call);
 		goto out;
 	}
@@ -774,16 +775,22 @@ static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
  */
 void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 {
-	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
 	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_channel *chan = NULL;
 	struct rxrpc_net *rxnet = conn->params.local->rxnet;
+	unsigned int channel = -1;
+	u32 cid;
 
+	spin_lock(&conn->channel_lock);
+
+	cid = call->cid;
+	if (cid) {
+		channel = cid & RXRPC_CHANNELMASK;
+		chan = &conn->channels[channel];
+	}
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
 	call->conn = NULL;
 
-	spin_lock(&conn->channel_lock);
-
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.  If the conn didn't get used either, it will follow
 	 * immediately unless someone else grabs it in the meantime.
@@ -807,7 +814,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call)
 		goto out;
 	}
 
-	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+	if (rcu_access_pointer(chan->call) != call) {
+		spin_unlock(&conn->channel_lock);
+		BUG();
+	}
 
 	/* If a client call was exposed to the world, we save the result for
 	 * retransmission.
-- 
cgit v1.2.3


From 71b91a506bb05f9aef3acd57af2e835d85721942 Mon Sep 17 00:00:00 2001
From: Bo YU <tsu.yubo@gmail.com>
Date: Fri, 8 Mar 2019 01:45:51 -0500
Subject: bpf: fix warning about using plain integer as NULL

Sparse warning below:

sudo make C=2 CF=-D__CHECK_ENDIAN__ M=net/bpf/
CHECK   net/bpf//test_run.c
net/bpf//test_run.c:19:77: warning: Using plain integer as NULL pointer
./include/linux/bpf-cgroup.h:295:77: warning: Using plain integer as NULL pointer

Fixes: 8bad74f9840f ("bpf: extend cgroup bpf core to allow multiple cgroup storage types")
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Bo YU <tsu.yubo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 2 +-
 net/bpf/test_run.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 695b2a880d9a..a4c644c1c091 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -292,7 +292,7 @@ static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
 					      struct bpf_map *map) {}
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
+	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
 static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index da7051d62727..fab142b796ef 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -16,7 +16,7 @@
 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
 			u32 *retval, u32 *time)
 {
-	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
 	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
 	int ret = 0;
-- 
cgit v1.2.3


From 161e613755e93c45cc47e75ab046f0f8de9e6d49 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Tue, 5 Mar 2019 11:35:54 -0300
Subject: net: add missing documentation in linux/skbuff.h

This patch adds missing documentation for some inline functions on
linux/skbuff.h. The patch is incomplete and a lot more can be added,
just wondering if it's of interest of the netdev developers.

Also fixed some whitespaces.

Signed-off-by: Pedro Tammela <pctammela@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 27beb549ffbe..730b333be591 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -327,26 +327,49 @@ struct skb_frag_struct {
 #endif
 };
 
+/**
+ * skb_frag_size - Returns the size of a skb fragment
+ * @frag: skb fragment
+ */
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 {
 	return frag->size;
 }
 
+/**
+ * skb_frag_size_set - Sets the size of a skb fragment
+ * @frag: skb fragment
+ * @size: size of fragment
+ */
 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 {
 	frag->size = size;
 }
 
+/**
+ * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
+ * @frag: skb fragment
+ * @delta: value to add
+ */
 static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 {
 	frag->size += delta;
 }
 
+/**
+ * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
+ * @frag: skb fragment
+ * @delta: value to subtract
+ */
 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 {
 	frag->size -= delta;
 }
 
+/**
+ * skb_frag_must_loop - Test if %p is a high memory page
+ * @p: fragment's page
+ */
 static inline bool skb_frag_must_loop(struct page *p)
 {
 #if defined(CONFIG_HIGHMEM)
@@ -590,7 +613,7 @@ typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
-/** 
+/**
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
@@ -648,7 +671,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
  *	@dst_pending_confirm: need to confirm neighbour
  *	@decrypted: Decrypted SKB
-  *	@napi_id: id of the NAPI struct this skb came from
+ *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
  *	@vlan_proto: vlan encapsulation protocol
@@ -883,7 +906,10 @@ struct sk_buff {
 #define SKB_ALLOC_RX		0x02
 #define SKB_ALLOC_NAPI		0x04
 
-/* Returns true if the skb was allocated from PFMEMALLOC reserves */
+/**
+ * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves
+ * @skb: buffer
+ */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 {
 	return unlikely(skb->pfmemalloc);
@@ -905,7 +931,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
  */
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
-	/* If refdst was not refcounted, check we still are in a 
+	/* If refdst was not refcounted, check we still are in a
 	 * rcu_read_lock section
 	 */
 	WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
@@ -952,6 +978,10 @@ static inline bool skb_dst_is_noref(const struct sk_buff *skb)
 	return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
 }
 
+/**
+ * skb_rtable - Returns the skb &rtable
+ * @skb: buffer
+ */
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 {
 	return (struct rtable *)skb_dst(skb);
@@ -966,6 +996,10 @@ static inline bool skb_pkt_type_ok(u32 ptype)
 	return ptype <= PACKET_OTHERHOST;
 }
 
+/**
+ * skb_napi_id - Returns the skb's NAPI id
+ * @skb: buffer
+ */
 static inline unsigned int skb_napi_id(const struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -975,7 +1009,12 @@ static inline unsigned int skb_napi_id(const struct sk_buff *skb)
 #endif
 }
 
-/* decrement the reference count and return true if we can free the skb */
+/**
+ * skb_unref - decrement the skb's reference count
+ * @skb: buffer
+ *
+ * Returns true if we can free the skb.
+ */
 static inline bool skb_unref(struct sk_buff *skb)
 {
 	if (unlikely(!skb))
@@ -1005,6 +1044,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
 			    int node);
 struct sk_buff *__build_skb(void *data, unsigned int frag_size);
 struct sk_buff *build_skb(void *data, unsigned int frag_size);
+
+/**
+ * alloc_skb - allocate a network buffer
+ * @size: size to allocate
+ * @priority: allocation mask
+ *
+ * This function is a convenient wrapper around __alloc_skb().
+ */
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
@@ -1047,6 +1094,13 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 	       fclones->skb2.sk == sk;
 }
 
+/**
+ * alloc_skb_fclone - allocate a network buffer from fclone cache
+ * @size: size to allocate
+ * @priority: allocation mask
+ *
+ * This function is a convenient wrapper around __alloc_skb().
+ */
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
-- 
cgit v1.2.3


From 1039c6e1936ef2be0f342bd56fcf0fb25c1df5fe Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 9 Mar 2019 10:26:53 +0100
Subject: net: keep refcount warning in reqsk_free()

As Eric Dumazet said, "We do not have a way to tell if the req was ever
inserted in a hash table, so better play safe.".
Let's remove this comment, so that nobody will be tempted to drop the
WARN_ON_ONCE() line.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 347015515a7d..21a5243fecd1 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -108,7 +108,6 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 
 static inline void reqsk_free(struct request_sock *req)
 {
-	/* temporary debugging */
 	WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0);
 
 	req->rsk_ops->destructor(req);
-- 
cgit v1.2.3


From 083b78a9ed64bc71957dd7da866c128a307ea062 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 9 Mar 2019 14:43:38 -0800
Subject: ip: fix ip_mc_may_pull() return value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ip_mc_may_pull() must return 0 if there is a problem, not an errno.

syzbot reported :

BUG: KASAN: use-after-free in br_ip4_multicast_igmp3_report net/bridge/br_multicast.c:947 [inline]
BUG: KASAN: use-after-free in br_multicast_ipv4_rcv net/bridge/br_multicast.c:1631 [inline]
BUG: KASAN: use-after-free in br_multicast_rcv+0x3cd8/0x4440 net/bridge/br_multicast.c:1741
Read of size 4 at addr ffff88820a4084ee by task syz-executor.2/11183

CPU: 1 PID: 11183 Comm: syz-executor.2 Not tainted 5.0.0+ #14
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187
 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:131
 br_ip4_multicast_igmp3_report net/bridge/br_multicast.c:947 [inline]
 br_multicast_ipv4_rcv net/bridge/br_multicast.c:1631 [inline]
 br_multicast_rcv+0x3cd8/0x4440 net/bridge/br_multicast.c:1741
 br_handle_frame_finish+0xa3a/0x14c0 net/bridge/br_input.c:108
 br_nf_hook_thresh+0x2ec/0x380 net/bridge/br_netfilter_hooks.c:1005
 br_nf_pre_routing_finish+0x8e2/0x1750 net/bridge/br_netfilter_hooks.c:410
 NF_HOOK include/linux/netfilter.h:289 [inline]
 NF_HOOK include/linux/netfilter.h:283 [inline]
 br_nf_pre_routing+0x7e7/0x13a0 net/bridge/br_netfilter_hooks.c:506
 nf_hook_entry_hookfn include/linux/netfilter.h:119 [inline]
 nf_hook_slow+0xbf/0x1f0 net/netfilter/core.c:511
 nf_hook include/linux/netfilter.h:244 [inline]
 NF_HOOK include/linux/netfilter.h:287 [inline]
 br_handle_frame+0x95b/0x1450 net/bridge/br_input.c:305
 __netif_receive_skb_core+0xa96/0x3040 net/core/dev.c:4902
 __netif_receive_skb_one_core+0xa8/0x1a0 net/core/dev.c:4971
 __netif_receive_skb+0x2c/0x1c0 net/core/dev.c:5083
 netif_receive_skb_internal+0x117/0x660 net/core/dev.c:5186
 netif_receive_skb+0x6e/0x5a0 net/core/dev.c:5261

Fixes: ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Linus Lüssing <linus.luessing@c0d3.blue>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index cc85f4524dbf..9c94b2ea789c 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -110,7 +110,7 @@ struct ip_mc_list {
 static inline int ip_mc_may_pull(struct sk_buff *skb, unsigned int len)
 {
 	if (skb_transport_offset(skb) + ip_transport_len(skb) < len)
-		return -EINVAL;
+		return 0;
 
 	return pskb_may_pull(skb, len);
 }
-- 
cgit v1.2.3


From 009a82f6437490c262584d65a14094a818bcb747 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 9 Mar 2019 12:07:17 -0500
Subject: SUNRPC: Micro-optimise when the task is known not to be sleeping

In cases where we know the task is not sleeping, try to optimise
away the indirect call to task->tk_action() by replacing it with
a direct call.
Only change tail calls, to allow gcc to perform tail call
elimination.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  8 ++++
 net/sunrpc/clnt.c            | 99 +++++++++++++++++++++++++++++---------------
 2 files changed, 73 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 52d41d0c1ae1..ec861cd0cfe8 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -304,4 +304,12 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 }
 #endif /* CONFIG_SUNRPC_SWAP */
 
+static inline bool
+rpc_task_need_resched(const struct rpc_task *task)
+{
+	if (RPC_IS_QUEUED(task) || task->tk_callback)
+		return true;
+	return false;
+}
+
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 67c955d8b21b..498dd6ad5bc5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1540,6 +1540,7 @@ call_start(struct rpc_task *task)
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
+	call_reserve(task);
 }
 
 /*
@@ -1553,6 +1554,9 @@ call_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
+	if (rpc_task_need_resched(task))
+		return;
+	 call_reserveresult(task);
 }
 
 static void call_retry_reserve(struct rpc_task *task);
@@ -1575,6 +1579,7 @@ call_reserveresult(struct rpc_task *task)
 	if (status >= 0) {
 		if (task->tk_rqstp) {
 			task->tk_action = call_refresh;
+			call_refresh(task);
 			return;
 		}
 
@@ -1600,6 +1605,7 @@ call_reserveresult(struct rpc_task *task)
 		/* fall through */
 	case -EAGAIN:	/* woken up; retry */
 		task->tk_action = call_retry_reserve;
+		call_retry_reserve(task);
 		return;
 	case -EIO:	/* probably a shutdown */
 		break;
@@ -1622,6 +1628,9 @@ call_retry_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_retry_reserve(task);
+	if (rpc_task_need_resched(task))
+		return;
+	call_reserveresult(task);
 }
 
 /*
@@ -1636,6 +1645,9 @@ call_refresh(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
 	rpcauth_refreshcred(task);
+	if (rpc_task_need_resched(task))
+		return;
+	call_refreshresult(task);
 }
 
 /*
@@ -1654,6 +1666,7 @@ call_refreshresult(struct rpc_task *task)
 	case 0:
 		if (rpcauth_uptodatecred(task)) {
 			task->tk_action = call_allocate;
+			call_allocate(task);
 			return;
 		}
 		/* Use rate-limiting and a max number of retries if refresh
@@ -1672,6 +1685,7 @@ call_refreshresult(struct rpc_task *task)
 		task->tk_cred_retry--;
 		dprintk("RPC: %5u %s: retry refresh creds\n",
 				task->tk_pid, __func__);
+		call_refresh(task);
 		return;
 	}
 	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1697,8 +1711,10 @@ call_allocate(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_action = call_encode;
 
-	if (req->rq_buffer)
+	if (req->rq_buffer) {
+		call_encode(task);
 		return;
+	}
 
 	if (proc->p_proc != 0) {
 		BUG_ON(proc->p_arglen == 0);
@@ -1719,8 +1735,12 @@ call_allocate(struct rpc_task *task)
 
 	status = xprt->ops->buf_alloc(task);
 	xprt_inject_disconnect(xprt);
-	if (status == 0)
+	if (status == 0) {
+		if (rpc_task_need_resched(task))
+			return;
+		call_encode(task);
 		return;
+	}
 	if (status != -ENOMEM) {
 		rpc_exit(task, status);
 		return;
@@ -1803,12 +1823,8 @@ call_encode(struct rpc_task *task)
 		xprt_request_enqueue_receive(task);
 	xprt_request_enqueue_transmit(task);
 out:
-	task->tk_action = call_transmit;
-	/* Check that the connection is OK */
-	if (!xprt_bound(task->tk_xprt))
-		task->tk_action = call_bind;
-	else if (!xprt_connected(task->tk_xprt))
-		task->tk_action = call_connect;
+	task->tk_action = call_bind;
+	call_bind(task);
 }
 
 /*
@@ -1842,14 +1858,17 @@ call_bind(struct rpc_task *task)
 		return;
 	}
 
+	if (xprt_bound(xprt)) {
+		task->tk_action = call_connect;
+		call_connect(task);
+		return;
+	}
+
 	dprint_status(task);
 
-	task->tk_action = call_connect;
-	if (!xprt_bound(xprt)) {
-		task->tk_action = call_bind_status;
-		task->tk_timeout = xprt->bind_timeout;
-		xprt->ops->rpcbind(task);
-	}
+	task->tk_action = call_bind_status;
+	task->tk_timeout = xprt->bind_timeout;
+	xprt->ops->rpcbind(task);
 }
 
 /*
@@ -1869,6 +1888,7 @@ call_bind_status(struct rpc_task *task)
 		dprint_status(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
+		call_connect(task);
 		return;
 	}
 
@@ -1949,21 +1969,24 @@ call_connect(struct rpc_task *task)
 		return;
 	}
 
+	if (xprt_connected(xprt)) {
+		task->tk_action = call_transmit;
+		call_transmit(task);
+		return;
+	}
+
 	dprintk("RPC: %5u call_connect xprt %p %s connected\n",
 			task->tk_pid, xprt,
 			(xprt_connected(xprt) ? "is" : "is not"));
 
-	task->tk_action = call_transmit;
-	if (!xprt_connected(xprt)) {
-		task->tk_action = call_connect_status;
-		if (task->tk_status < 0)
-			return;
-		if (task->tk_flags & RPC_TASK_NOCONNECT) {
-			rpc_exit(task, -ENOTCONN);
-			return;
-		}
-		xprt_connect(task);
+	task->tk_action = call_connect_status;
+	if (task->tk_status < 0)
+		return;
+	if (task->tk_flags & RPC_TASK_NOCONNECT) {
+		rpc_exit(task, -ENOTCONN);
+		return;
 	}
+	xprt_connect(task);
 }
 
 /*
@@ -2016,6 +2039,7 @@ call_connect_status(struct rpc_task *task)
 	case 0:
 		clnt->cl_stats->netreconn++;
 		task->tk_action = call_transmit;
+		call_transmit(task);
 		return;
 	}
 	rpc_exit(task, status);
@@ -2040,19 +2064,20 @@ call_transmit(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_action = call_transmit_status;
+	if (!xprt_prepare_transmit(task))
+		return;
+	task->tk_status = 0;
 	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
-		if (!xprt_prepare_transmit(task))
+		if (!xprt_connected(task->tk_xprt)) {
+			task->tk_status = -ENOTCONN;
 			return;
-		task->tk_status = 0;
-		if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
-			if (!xprt_connected(task->tk_xprt)) {
-				task->tk_status = -ENOTCONN;
-				return;
-			}
-			xprt_transmit(task);
 		}
+		xprt_transmit(task);
 	}
 	xprt_end_transmit(task);
+	if (rpc_task_need_resched(task))
+		return;
+	call_transmit_status(task);
 }
 
 /*
@@ -2067,8 +2092,12 @@ call_transmit_status(struct rpc_task *task)
 	 * Common case: success.  Force the compiler to put this
 	 * test first.
 	 */
-	if (task->tk_status == 0) {
-		xprt_request_wait_receive(task);
+	if (rpc_task_transmitted(task)) {
+		if (task->tk_status == 0)
+			xprt_request_wait_receive(task);
+		if (rpc_task_need_resched(task))
+			return;
+		call_status(task);
 		return;
 	}
 
@@ -2129,6 +2158,7 @@ call_bc_encode(struct rpc_task *task)
 {
 	xprt_request_enqueue_transmit(task);
 	task->tk_action = call_bc_transmit;
+	call_bc_transmit(task);
 }
 
 /*
@@ -2219,6 +2249,7 @@ call_status(struct rpc_task *task)
 	status = task->tk_status;
 	if (status >= 0) {
 		task->tk_action = call_decode;
+		call_decode(task);
 		return;
 	}
 
-- 
cgit v1.2.3


From 273fe3f1006ea5ebc63d6729e43e8e45e32b256a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 8 Mar 2019 15:30:03 +0100
Subject: netfilter: nf_tables: bogus EBUSY when deleting set after flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set deletion after flush coming in the same batch results in EBUSY. Add
set use counter to track the number of references to this set from
rules. We cannot rely on the list of bindings for this since such list
is still populated from the preparation phase.

Reported-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  6 ++++++
 net/netfilter/nf_tables_api.c     | 28 +++++++++++++++++++++++++++-
 net/netfilter/nft_dynset.c        | 13 +++++++++----
 net/netfilter/nft_lookup.c        | 13 +++++++++----
 net/netfilter/nft_objref.c        | 13 +++++++++----
 5 files changed, 60 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index ed0687b0e0f4..3e9ab643eedf 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -382,6 +382,7 @@ void nft_unregister_set(struct nft_set_type *type);
  * 	@dtype: data type (verdict or numeric type defined by userspace)
  * 	@objtype: object type (see NFT_OBJECT_* definitions)
  * 	@size: maximum set size
+ *	@use: number of rules references to this set
  * 	@nelems: number of elements
  * 	@ndeact: number of deactivated elements queued for removal
  *	@timeout: default timeout value in jiffies
@@ -407,6 +408,7 @@ struct nft_set {
 	u32				dtype;
 	u32				objtype;
 	u32				size;
+	u32				use;
 	atomic_t			nelems;
 	u32				ndeact;
 	u64				timeout;
@@ -467,6 +469,10 @@ struct nft_set_binding {
 	u32				flags;
 };
 
+enum nft_trans_phase;
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+			      struct nft_set_binding *binding,
+			      enum nft_trans_phase phase);
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 		       struct nft_set_binding *binding);
 void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1333bf97dc26..ca09ef037ca5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3672,6 +3672,9 @@ err1:
 
 static void nft_set_destroy(struct nft_set *set)
 {
+	if (WARN_ON(set->use > 0))
+		return;
+
 	set->ops->destroy(set);
 	module_put(to_set_type(set->ops)->owner);
 	kfree(set->name);
@@ -3712,7 +3715,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
 		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(set);
 	}
-	if (!list_empty(&set->bindings) ||
+	if (set->use ||
 	    (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
@@ -3742,6 +3745,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_binding *i;
 	struct nft_set_iter iter;
 
+	if (set->use == UINT_MAX)
+		return -EOVERFLOW;
+
 	if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
 		return -EBUSY;
 
@@ -3769,6 +3775,7 @@ bind:
 	binding->chain = ctx->chain;
 	list_add_tail_rcu(&binding->list, &set->bindings);
 	nft_set_trans_bind(ctx, set);
+	set->use++;
 
 	return 0;
 }
@@ -3788,6 +3795,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 }
 EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
 
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+			      struct nft_set_binding *binding,
+			      enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE:
+		set->use--;
+		return;
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		set->use--;
+		/* fall through */
+	default:
+		nf_tables_unbind_set(ctx, set, binding,
+				     phase == NFT_TRANS_COMMIT);
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
 void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index a8a74a16f9c4..e461007558e8 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = {
 	.eval		= nft_dynset_eval,
 	.init		= nft_dynset_init,
 	.destroy	= nft_dynset_destroy,
+	.activate	= nft_dynset_activate,
 	.deactivate	= nft_dynset_deactivate,
 	.dump		= nft_dynset_dump,
 };
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 14496da5141d..161c3451a747 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+				const struct nft_expr *expr)
+{
+	struct nft_lookup *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
 	.eval		= nft_lookup_eval,
 	.init		= nft_lookup_init,
+	.activate	= nft_lookup_activate,
 	.deactivate	= nft_lookup_deactivate,
 	.destroy	= nft_lookup_destroy,
 	.dump		= nft_lookup_dump,
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 79ef074c18ca..457a9ceb46af 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -162,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
 {
 	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	if (phase == NFT_TRANS_PREPARE)
-		return;
+	nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+				    const struct nft_expr *expr)
+{
+	struct nft_objref_map *priv = nft_expr_priv(expr);
 
-	nf_tables_unbind_set(ctx, priv->set, &priv->binding,
-			     phase == NFT_TRANS_COMMIT);
+	priv->set->use++;
 }
 
 static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -183,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
 	.eval		= nft_objref_map_eval,
 	.init		= nft_objref_map_init,
+	.activate	= nft_objref_map_activate,
 	.deactivate	= nft_objref_map_deactivate,
 	.destroy	= nft_objref_map_destroy,
 	.dump		= nft_objref_map_dump,
-- 
cgit v1.2.3


From b41fdc4a7bf9045e4871c5b15905ea732ffd044f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 11 Mar 2019 15:38:10 +0000
Subject: irqchip/gic: Drop support for secondary GIC in non-DT systems

We do not have any in-tree platform with this pathological setup,
and only a single system (Cavium's cns3xxx) isn't DT aware.

Let's drop the secondary GIC support for now, until we remove
the above horror altogether.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/mach-cns3xxx/core.c    |  2 +-
 drivers/irqchip/irq-gic.c       | 45 +++++++++++++++--------------------------
 include/linux/irqchip/arm-gic.h |  3 +--
 3 files changed, 18 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 7d5a44a06648..f676592d8402 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -90,7 +90,7 @@ void __init cns3xxx_map_io(void)
 /* used by entry-macro.S */
 void __init cns3xxx_init_irq(void)
 {
-	gic_init(0, 29, IOMEM(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT),
+	gic_init(IOMEM(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT),
 		 IOMEM(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT));
 }
 
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index ba2a37a27a54..fd3110c171ba 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1089,11 +1089,10 @@ static void gic_init_chip(struct gic_chip_data *gic, struct device *dev,
 #endif
 }
 
-static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
+static int gic_init_bases(struct gic_chip_data *gic,
 			  struct fwnode_handle *handle)
 {
-	irq_hw_number_t hwirq_base;
-	int gic_irqs, irq_base, ret;
+	int gic_irqs, ret;
 
 	if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) {
 		/* Frankein-GIC without banked registers... */
@@ -1145,28 +1144,21 @@ static int gic_init_bases(struct gic_chip_data *gic, int irq_start,
 	} else {		/* Legacy support */
 		/*
 		 * For primary GICs, skip over SGIs.
-		 * For secondary GICs, skip over PPIs, too.
+		 * No secondary GIC support whatsoever.
 		 */
-		if (gic == &gic_data[0] && (irq_start & 31) > 0) {
-			hwirq_base = 16;
-			if (irq_start != -1)
-				irq_start = (irq_start & ~31) + 16;
-		} else {
-			hwirq_base = 32;
-		}
+		int irq_base;
 
-		gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */
+		gic_irqs -= 16; /* calculate # of irqs to allocate */
 
-		irq_base = irq_alloc_descs(irq_start, 16, gic_irqs,
+		irq_base = irq_alloc_descs(16, 16, gic_irqs,
 					   numa_node_id());
 		if (irq_base < 0) {
-			WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
-			     irq_start);
-			irq_base = irq_start;
+			WARN(1, "Cannot allocate irq_descs @ IRQ16, assuming pre-allocated\n");
+			irq_base = 16;
 		}
 
 		gic->domain = irq_domain_add_legacy(NULL, gic_irqs, irq_base,
-					hwirq_base, &gic_irq_domain_ops, gic);
+						    16, &gic_irq_domain_ops, gic);
 	}
 
 	if (WARN_ON(!gic->domain)) {
@@ -1195,7 +1187,6 @@ error:
 }
 
 static int __init __gic_init_bases(struct gic_chip_data *gic,
-				   int irq_start,
 				   struct fwnode_handle *handle)
 {
 	char *name;
@@ -1231,32 +1222,28 @@ static int __init __gic_init_bases(struct gic_chip_data *gic,
 		gic_init_chip(gic, NULL, name, false);
 	}
 
-	ret = gic_init_bases(gic, irq_start, handle);
+	ret = gic_init_bases(gic, handle);
 	if (ret)
 		kfree(name);
 
 	return ret;
 }
 
-void __init gic_init(unsigned int gic_nr, int irq_start,
-		     void __iomem *dist_base, void __iomem *cpu_base)
+void __init gic_init(void __iomem *dist_base, void __iomem *cpu_base)
 {
 	struct gic_chip_data *gic;
 
-	if (WARN_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR))
-		return;
-
 	/*
 	 * Non-DT/ACPI systems won't run a hypervisor, so let's not
 	 * bother with these...
 	 */
 	static_branch_disable(&supports_deactivate_key);
 
-	gic = &gic_data[gic_nr];
+	gic = &gic_data[0];
 	gic->raw_dist_base = dist_base;
 	gic->raw_cpu_base = cpu_base;
 
-	__gic_init_bases(gic, irq_start, NULL);
+	__gic_init_bases(gic, NULL);
 }
 
 static void gic_teardown(struct gic_chip_data *gic)
@@ -1399,7 +1386,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq)
 	if (ret)
 		return ret;
 
-	ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode);
+	ret = gic_init_bases(*gic, &dev->of_node->fwnode);
 	if (ret) {
 		gic_teardown(*gic);
 		return ret;
@@ -1459,7 +1446,7 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 	if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base))
 		static_branch_disable(&supports_deactivate_key);
 
-	ret = __gic_init_bases(gic, -1, &node->fwnode);
+	ret = __gic_init_bases(gic, &node->fwnode);
 	if (ret) {
 		gic_teardown(gic);
 		return ret;
@@ -1650,7 +1637,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header,
 		return -ENOMEM;
 	}
 
-	ret = __gic_init_bases(gic, -1, domain_handle);
+	ret = __gic_init_bases(gic, domain_handle);
 	if (ret) {
 		pr_err("Failed to initialise GIC\n");
 		irq_domain_free_fwnode(domain_handle);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 626179077bb0..0f049b384ccd 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -158,8 +158,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq);
  * Legacy platforms not converted to DT yet must use this to init
  * their GIC
  */
-void gic_init(unsigned int nr, int start,
-	      void __iomem *dist , void __iomem *cpu);
+void gic_init(void __iomem *dist , void __iomem *cpu);
 
 int gicv2m_init(struct fwnode_handle *parent_handle,
 		struct irq_domain *parent);
-- 
cgit v1.2.3


From a623a7a1a5670c25a16881f5078072d272d96b71 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 11 Mar 2019 16:38:17 +0100
Subject: y2038: fix socket.h header inclusion

Referencing the __kernel_long_t type caused some user space applications
to stop compiling when they had not already included linux/posix_types.h,
e.g.

s/multicast.c -o ext/sockets/multicast.lo
In file included from /builddir/build/BUILD/php-7.3.3/main/php.h:468,
                 from /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:27:
/builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c: In function 'zm_startup_sockets':
/builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:776:40: error: '__kernel_long_t' undeclared (first use in this function)
  776 |  REGISTER_LONG_CONSTANT("SO_SNDTIMEO", SO_SNDTIMEO, CONST_CS | CONST_PERSISTENT);

It is safe to include that header here, since it only contains kernel
internal types that do not conflict with other user space types.

It's still possible that some related build failures remain, but those
are likely to be for code that is not already y2038 safe.

Reported-by: Laura Abbott <labbott@redhat.com>
Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h  | 2 +-
 arch/mips/include/uapi/asm/socket.h   | 2 +-
 arch/parisc/include/uapi/asm/socket.h | 2 +-
 arch/sparc/include/uapi/asm/socket.h  | 2 +-
 include/uapi/asm-generic/socket.h     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 0d0fddb7e738..976e89b116e5 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include <linux/posix_types.h>
 #include <asm/sockios.h>
-#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 /*
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index eb9f33f8a8b3..d41765cfbc6e 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -10,8 +10,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include <linux/posix_types.h>
 #include <asm/sockios.h>
-#include <asm/bitsperlong.h>
 
 /*
  * For setsockopt(2)
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 16e428f03526..66c5dd245ac7 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _UAPI_ASM_SOCKET_H
 #define _UAPI_ASM_SOCKET_H
 
+#include <linux/posix_types.h>
 #include <asm/sockios.h>
-#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	0xffff
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 88fe4f978aca..9265a9eece15 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -2,8 +2,8 @@
 #ifndef _ASM_SOCKET_H
 #define _ASM_SOCKET_H
 
+#include <linux/posix_types.h>
 #include <asm/sockios.h>
-#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	0xffff
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c8b430cb6dc4..8c1391c89171 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -2,8 +2,8 @@
 #ifndef __ASM_GENERIC_SOCKET_H
 #define __ASM_GENERIC_SOCKET_H
 
+#include <linux/posix_types.h>
 #include <asm/sockios.h>
-#include <asm/bitsperlong.h>
 
 /* For setsockopt(2) */
 #define SOL_SOCKET	1
-- 
cgit v1.2.3


From 623217a0cc45a6c179303b3bbfdc594806a464cc Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 11 Mar 2019 12:53:59 +0100
Subject: PM / wakeup: Drop wakeup_source_drop()

After commit d856f39ac1cc ("PM / wakeup: Rework wakeup source timer
cancellation") wakeup_source_drop() is a trivial wrapper around
__pm_relax() and it has no users except for wakeup_source_destroy()
and wakeup_source_trash() which also has no users, so drop it along
with the latter and make wakeup_source_destroy() call __pm_relax()
directly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/base/power/wakeup.c | 18 +-----------------
 include/linux/pm_wakeup.h   |  9 ---------
 2 files changed, 1 insertion(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index a25d2d82f44d..ecbe152d151f 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -106,22 +106,6 @@ struct wakeup_source *wakeup_source_create(const char *name)
 }
 EXPORT_SYMBOL_GPL(wakeup_source_create);
 
-/**
- * wakeup_source_drop - Prepare a struct wakeup_source object for destruction.
- * @ws: Wakeup source to prepare for destruction.
- *
- * Callers must ensure that __pm_stay_awake() or __pm_wakeup_event() will never
- * be run in parallel with this function for the same wakeup source object.
- */
-void wakeup_source_drop(struct wakeup_source *ws)
-{
-	if (!ws)
-		return;
-
-	__pm_relax(ws);
-}
-EXPORT_SYMBOL_GPL(wakeup_source_drop);
-
 /*
  * Record wakeup_source statistics being deleted into a dummy wakeup_source.
  */
@@ -161,7 +145,7 @@ void wakeup_source_destroy(struct wakeup_source *ws)
 	if (!ws)
 		return;
 
-	wakeup_source_drop(ws);
+	__pm_relax(ws);
 	wakeup_source_record(ws);
 	kfree_const(ws->name);
 	kfree(ws);
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 4238dde0aaf0..0ff134d6575a 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -96,7 +96,6 @@ static inline void device_set_wakeup_path(struct device *dev)
 /* drivers/base/power/wakeup.c */
 extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name);
 extern struct wakeup_source *wakeup_source_create(const char *name);
-extern void wakeup_source_drop(struct wakeup_source *ws);
 extern void wakeup_source_destroy(struct wakeup_source *ws);
 extern void wakeup_source_add(struct wakeup_source *ws);
 extern void wakeup_source_remove(struct wakeup_source *ws);
@@ -134,8 +133,6 @@ static inline struct wakeup_source *wakeup_source_create(const char *name)
 	return NULL;
 }
 
-static inline void wakeup_source_drop(struct wakeup_source *ws) {}
-
 static inline void wakeup_source_destroy(struct wakeup_source *ws) {}
 
 static inline void wakeup_source_add(struct wakeup_source *ws) {}
@@ -204,12 +201,6 @@ static inline void wakeup_source_init(struct wakeup_source *ws,
 	wakeup_source_add(ws);
 }
 
-static inline void wakeup_source_trash(struct wakeup_source *ws)
-{
-	wakeup_source_remove(ws);
-	wakeup_source_drop(ws);
-}
-
 static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 {
 	return pm_wakeup_ws_event(ws, msec, false);
-- 
cgit v1.2.3


From b57e622e6da9048c96fa0ed6943834949a398e3f Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Mon, 11 Mar 2019 23:28:10 -0700
Subject: mm/hmm: convert to use vm_fault_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert to use vm_fault_t type as return type for fault handler.

kbuild reported warning during testing of
*mm-create-the-new-vm_fault_t-type.patch* available in below link -
https://patchwork.kernel.org/patch/10752741/

  kernel/memremap.c:46:34: warning: incorrect type in return expression
                           (different base types)
  kernel/memremap.c:46:34: expected restricted vm_fault_t
  kernel/memremap.c:46:34: got int

This patch has fixed the warnings and also hmm_devmem_fault() is
converted to return vm_fault_t to avoid further warnings.

[sfr@canb.auug.org.au: drm/nouveau/dmem: update for struct hmm_devmem_ops member change]
  Link: http://lkml.kernel.org/r/20190220174407.753d94e5@canb.auug.org.au
Link: http://lkml.kernel.org/r/20190110145900.GA1317@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +-
 include/linux/hmm.h                    | 4 ++--
 mm/hmm.c                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 8be7a83ced9b..aa9fec80492d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -261,7 +261,7 @@ static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
 	.finalize_and_map	= nouveau_dmem_fault_finalize_and_map,
 };
 
-static int
+static vm_fault_t
 nouveau_dmem_fault(struct hmm_devmem *devmem,
 		   struct vm_area_struct *vma,
 		   unsigned long addr,
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 66f9ebbb1df3..ad50b7b4f141 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -468,7 +468,7 @@ struct hmm_devmem_ops {
 	 * Note that mmap semaphore is held in read mode at least when this
 	 * callback occurs, hence the vma is valid upon callback entry.
 	 */
-	int (*fault)(struct hmm_devmem *devmem,
+	vm_fault_t (*fault)(struct hmm_devmem *devmem,
 		     struct vm_area_struct *vma,
 		     unsigned long addr,
 		     const struct page *page,
@@ -511,7 +511,7 @@ struct hmm_devmem_ops {
  * chunk, as an optimization. It must, however, prioritize the faulting address
  * over all the others.
  */
-typedef int (*dev_page_fault_t)(struct vm_area_struct *vma,
+typedef vm_fault_t (*dev_page_fault_t)(struct vm_area_struct *vma,
 				unsigned long addr,
 				const struct page *page,
 				unsigned int flags,
diff --git a/mm/hmm.c b/mm/hmm.c
index a04e4b810610..fe1cd87e49ac 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -990,7 +990,7 @@ static void hmm_devmem_ref_kill(struct percpu_ref *ref)
 	percpu_ref_kill(ref);
 }
 
-static int hmm_devmem_fault(struct vm_area_struct *vma,
+static vm_fault_t hmm_devmem_fault(struct vm_area_struct *vma,
 			    unsigned long addr,
 			    const struct page *page,
 			    unsigned int flags,
-- 
cgit v1.2.3


From b5420237ec817b0b5f729a674c81ace0865c3b3b Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Mon, 11 Mar 2019 23:28:13 -0700
Subject: mm: refactor readahead defines in mm.h

All users of VM_MAX_READAHEAD actually convert it to kbytes and then to
pages. Define the macro explicitly as (SZ_128K / PAGE_SIZE). This
simplifies the expression in every filesystem. Also rename the macro to
VM_READAHEAD_PAGES to properly convey its meaning. Finally remove unused
VM_MIN_READAHEAD

[akpm@linux-foundation.org: fix fs/io_uring.c, per Stephen]
Link: http://lkml.kernel.org/r/20181221144053.24318-1-nborisov@suse.com
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Latchesar Ionkov <lucho@ionkov.net>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/blk-core.c   | 3 +--
 fs/9p/vfs_super.c  | 2 +-
 fs/afs/super.c     | 2 +-
 fs/btrfs/disk-io.c | 2 +-
 fs/fuse/inode.c    | 2 +-
 fs/io_uring.c      | 2 +-
 include/linux/mm.h | 4 ++--
 7 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6b78ec56a4f2..4673ebe42255 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -500,8 +500,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q->stats)
 		goto fail_stats;
 
-	q->backing_dev_info->ra_pages =
-			(VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+	q->backing_dev_info->ra_pages = VM_READAHEAD_PAGES;
 	q->backing_dev_info->capabilities = BDI_CAP_CGROUP_WRITEBACK;
 	q->backing_dev_info->name = "block";
 	q->node = node_id;
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 48ce50484e80..10d3bd3f534b 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -92,7 +92,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 		return ret;
 
 	if (v9ses->cache)
-		sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
+		sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
 
 	sb->s_flags |= SB_ACTIVE | SB_DIRSYNC;
 	if (!v9ses->cache)
diff --git a/fs/afs/super.c b/fs/afs/super.c
index dcd07fe99871..e684f6769b15 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -399,7 +399,7 @@ static int afs_fill_super(struct super_block *sb,
 	ret = super_setup_bdi(sb);
 	if (ret)
 		return ret;
-	sb->s_bdi->ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
+	sb->s_bdi->ra_pages	= VM_READAHEAD_PAGES;
 
 	/* allocate the root inode and dentry */
 	if (as->dyn_root) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f0cdb53f3e2d..6fe9197f6ee4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2958,7 +2958,7 @@ int open_ctree(struct super_block *sb,
 	sb->s_bdi->congested_fn = btrfs_congested_fn;
 	sb->s_bdi->congested_data = fs_info;
 	sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
-	sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE;
+	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
 	sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super);
 	sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE);
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c2d4099429be..16750ed591ae 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1010,7 +1010,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 	if (err)
 		return err;
 
-	sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
 	/* fuse does it's own writeback accounting */
 	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5d99376d2369..c88088d92613 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -923,7 +923,7 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
 		/* Use 8x RA size as a decent limiter for both reads/writes */
 		max_pages = filp->f_ra.ra_pages;
 		if (!max_pages)
-			max_pages = VM_MAX_READAHEAD >> (PAGE_SHIFT - 10);
+			max_pages = VM_READAHEAD_PAGES;
 		max_pages *= 8;
 
 		/* If max pages are exceeded, reset the state */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5801ee849f36..76769749b5a5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -26,6 +26,7 @@
 #include <linux/page_ref.h>
 #include <linux/memremap.h>
 #include <linux/overflow.h>
+#include <linux/sizes.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -2402,8 +2403,7 @@ int __must_check write_one_page(struct page *page);
 void task_dirty_inc(struct task_struct *tsk);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
-#define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
+#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
 
 int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
 			pgoff_t offset, unsigned long nr_to_read);
-- 
cgit v1.2.3


From 53d818d2747ca84f1a87a0006b903523cd5bf0cd Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:11 -0700
Subject: memblock: drop memblock_alloc_base_nid()

memblock_alloc_base_nid() is a oneliner wrapper for
memblock_alloc_range_nid() without any side effect.

Replace it's usage by the direct calls to memblock_alloc_range_nid().

Link: http://lkml.kernel.org/r/1548057848-15136-5-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  3 ---
 mm/memblock.c            | 15 ++++-----------
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 859b55b66db2..4db53f7c6b17 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -446,9 +446,6 @@ static inline bool memblock_bottom_up(void)
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
 					enum memblock_flags flags);
-phys_addr_t memblock_alloc_base_nid(phys_addr_t size,
-					phys_addr_t align, phys_addr_t max_addr,
-					int nid, enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index 470601115892..e9e440cfd210 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1289,21 +1289,14 @@ phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					flags);
 }
 
-phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
-					phys_addr_t align, phys_addr_t max_addr,
-					int nid, enum memblock_flags flags)
-{
-	return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
-}
-
 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t ret;
 
 again:
-	ret = memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE,
-				      nid, flags);
+	ret = memblock_alloc_range_nid(size, align, 0,
+				       MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags);
 
 	if (!ret && (flags & MEMBLOCK_MIRROR)) {
 		flags &= ~MEMBLOCK_MIRROR;
@@ -1314,8 +1307,8 @@ again:
 
 phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
-	return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE,
-				       MEMBLOCK_NONE);
+	return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
+					MEMBLOCK_NONE);
 }
 
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-- 
cgit v1.2.3


From 8a770c2a83eaf4c3d493ca4056abd6d6ddce6f18 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:16 -0700
Subject: memblock: emphasize that memblock_alloc_range() returns a physical
 address

Rename memblock_alloc_range() to memblock_phys_alloc_range() to
emphasize that it returns a physical address.

While on it, remove the 'enum memblock_flags' parameter from this
function as its only user anyway sets it to MEMBLOCK_NONE, which is the
default for the most of memblock allocations.

Link: http://lkml.kernel.org/r/1548057848-15136-6-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  5 ++---
 mm/cma.c                 | 10 ++++------
 mm/memblock.c            |  9 +++++----
 3 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 4db53f7c6b17..251cd66b151b 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -325,6 +325,8 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 #define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
 #endif
 
+phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
+				      phys_addr_t start, phys_addr_t end);
 phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
@@ -443,9 +445,6 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 
-phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
-					phys_addr_t start, phys_addr_t end,
-					enum memblock_flags flags);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/cma.c b/mm/cma.c
index f4f3a8a57d86..bb2d333ffcb3 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -327,16 +327,14 @@ int __init cma_declare_contiguous(phys_addr_t base,
 		 * memory in case of failure.
 		 */
 		if (base < highmem_start && limit > highmem_start) {
-			addr = memblock_alloc_range(size, alignment,
-						    highmem_start, limit,
-						    MEMBLOCK_NONE);
+			addr = memblock_phys_alloc_range(size, alignment,
+							 highmem_start, limit);
 			limit = highmem_start;
 		}
 
 		if (!addr) {
-			addr = memblock_alloc_range(size, alignment, base,
-						    limit,
-						    MEMBLOCK_NONE);
+			addr = memblock_phys_alloc_range(size, alignment, base,
+							 limit);
 			if (!addr) {
 				ret = -ENOMEM;
 				goto err;
diff --git a/mm/memblock.c b/mm/memblock.c
index e9e440cfd210..eb785ea6757b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1281,12 +1281,13 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 	return 0;
 }
 
-phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
-					phys_addr_t start, phys_addr_t end,
-					enum memblock_flags flags)
+phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
+					     phys_addr_t align,
+					     phys_addr_t start,
+					     phys_addr_t end)
 {
 	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
-					flags);
+					MEMBLOCK_NONE);
 }
 
 phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
-- 
cgit v1.2.3


From ecc3e771f4ca98c52a072e41804434b4979bdf84 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:26 -0700
Subject: memblock: memblock_phys_alloc(): don't panic

Make the memblock_phys_alloc() function an inline wrapper for
memblock_phys_alloc_range() and update the memblock_phys_alloc() callers
to check the returned value and panic in case of error.

Link: http://lkml.kernel.org/r/1548057848-15136-8-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c                   | 4 ++++
 arch/arm64/mm/mmu.c                  | 2 ++
 arch/powerpc/sysdev/dart_iommu.c     | 3 +++
 arch/s390/kernel/crash_dump.c        | 3 +++
 arch/s390/kernel/setup.c             | 3 +++
 arch/sh/boards/mach-ap325rxa/setup.c | 3 +++
 arch/sh/boards/mach-ecovec24/setup.c | 6 ++++++
 arch/sh/boards/mach-kfr2r09/setup.c  | 3 +++
 arch/sh/boards/mach-migor/setup.c    | 3 +++
 arch/sh/boards/mach-se/7724/setup.c  | 6 ++++++
 arch/xtensa/mm/kasan_init.c          | 3 +++
 include/linux/memblock.h             | 7 ++++++-
 mm/memblock.c                        | 5 -----
 13 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index b76b90eb9356..15dddfe43319 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -206,6 +206,10 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 	BUG_ON(!arm_memblock_steal_permitted);
 
 	phys = memblock_phys_alloc(size, align);
+	if (!phys)
+		panic("Failed to steal %pa bytes at %pS\n",
+		      &size, (void *)_RET_IP_);
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 402b6495ff58..e97f018ff740 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -103,6 +103,8 @@ static phys_addr_t __init early_pgtable_alloc(void)
 	void *ptr;
 
 	phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate page table page\n");
 
 	/*
 	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index fc5c5c23303e..2a751795ec1e 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -265,6 +265,9 @@ static void allocate_dart(void)
 	 * prefetching into invalid pages and corrupting data
 	 */
 	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	if (!tmp)
+		panic("DART: table allocation failed\n");
+
 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 					 DARTMAP_RPNMASK);
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 97eae3871868..f96a5857bbfd 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -61,6 +61,9 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 	struct save_area *sa;
 
 	sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
+	if (!sa)
+		panic("Failed to allocate save area\n");
+
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 12934e8fbb91..d7920f3e76c6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -966,6 +966,9 @@ static void __init setup_randomness(void)
 
 	vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
 							    PAGE_SIZE);
+	if (!vmms)
+		panic("Failed to allocate memory for sysinfo structure\n");
+
 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
 	memblock_free((unsigned long) vmms, PAGE_SIZE);
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 97774424fbee..8301a4378f50 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -557,6 +557,9 @@ static void __init ap325rxa_mv_mem_reserve(void)
 	phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index d329bf3be487..34e5414c5563 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1477,11 +1477,17 @@ static void __init ecovec_mv_mem_reserve(void)
 	phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU0 memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 	ceu0_dma_membase = phys;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU1 memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 	ceu1_dma_membase = phys;
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 5c258ae9c43a..1cf9a47ac90e 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -631,6 +631,9 @@ static void __init kfr2r09_mv_mem_reserve(void)
 	phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 193d91bb84bf..90702740f207 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -631,6 +631,9 @@ static void __init migor_mv_mem_reserve(void)
 	phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 5c7aa37bfd86..3674064816c7 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -964,11 +964,17 @@ static void __init ms7724se_mv_mem_reserve(void)
 	phys_addr_t size = CEU_BUFFER_MEMORY_SIZE;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU0 memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 	ceu0_dma_membase = phys;
 
 	phys = memblock_phys_alloc(size, PAGE_SIZE);
+	if (!phys)
+		panic("Failed to allocate CEU1 memory\n");
+
 	memblock_free(phys, size);
 	memblock_remove(phys, size);
 	ceu1_dma_membase = phys;
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 48dbb03f4f6f..4852848a0c28 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -54,6 +54,9 @@ static void __init populate(void *start, void *end)
 			phys_addr_t phys =
 				memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 
+			if (!phys)
+				panic("Failed to allocate page table page\n");
+
 			set_pte(pte + j, pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
 		}
 	}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 251cd66b151b..7caecb42bfea 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -330,7 +330,12 @@ phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
 phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
-phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align);
+static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
+					      phys_addr_t align)
+{
+	return memblock_phys_alloc_range(size, align, 0,
+					 MEMBLOCK_ALLOC_ACCESSIBLE);
+}
 
 void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
 				 phys_addr_t min_addr, phys_addr_t max_addr,
diff --git a/mm/memblock.c b/mm/memblock.c
index ac57bd3082bb..d0b76bb7340d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1325,11 +1325,6 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys
 	return alloc;
 }
 
-phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
-}
-
 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
-- 
cgit v1.2.3


From 42b46aeff2e366bad54bd1c069b7b5381d9be8b3 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:31 -0700
Subject: memblock: drop __memblock_alloc_base()

The __memblock_alloc_base() function tries to allocate a memory up to
the limit specified by its max_addr parameter.  Depending on the value
of this parameter, the __memblock_alloc_base() can is replaced with the
appropriate memblock_phys_alloc*() variant.

Link: http://lkml.kernel.org/r/1548057848-15136-9-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Rob Herring <robh@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sh/kernel/machine_kexec.c |  3 ++-
 arch/x86/kernel/e820.c         |  2 +-
 arch/x86/mm/numa.c             | 12 ++++--------
 drivers/of/of_reserved_mem.c   |  7 ++-----
 include/linux/memblock.h       |  2 --
 mm/memblock.c                  |  9 ++-------
 6 files changed, 11 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/machine_kexec.c b/arch/sh/kernel/machine_kexec.c
index b9f9f1a5afdc..63d63a36f6f2 100644
--- a/arch/sh/kernel/machine_kexec.c
+++ b/arch/sh/kernel/machine_kexec.c
@@ -168,7 +168,8 @@ void __init reserve_crashkernel(void)
 	crash_size = PAGE_ALIGN(resource_size(&crashk_res));
 	if (!crashk_res.start) {
 		unsigned long max = memblock_end_of_DRAM() - memory_limit;
-		crashk_res.start = __memblock_alloc_base(crash_size, PAGE_SIZE, max);
+		crashk_res.start = memblock_phys_alloc_range(crash_size,
+							     PAGE_SIZE, 0, max);
 		if (!crashk_res.start) {
 			pr_err("crashkernel allocation failed\n");
 			goto disable;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a687d10da417..5203ee4e6435 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -775,7 +775,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
 {
 	u64 addr;
 
-	addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
+	addr = memblock_phys_alloc(size, align);
 	if (addr) {
 		e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
 		pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 12c1b7a83ed7..dfb6c4df639a 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -195,15 +195,11 @@ static void __init alloc_node_data(int nid)
 	 * Allocate node data.  Try node-local memory and then any node.
 	 * Never allocate in DMA zone.
 	 */
-	nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	if (!nd_pa) {
-		nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
-					      MEMBLOCK_ALLOC_ACCESSIBLE);
-		if (!nd_pa) {
-			pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
-			       nd_size, nid);
-			return;
-		}
+		pr_err("Cannot find %zu bytes in any node (initial node: %d)\n",
+		       nd_size, nid);
+		return;
 	}
 	nd = __va(nd_pa);
 
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index e773063c6de9..8c07d7da5256 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -31,13 +31,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t *res_base)
 {
 	phys_addr_t base;
-	/*
-	 * We use __memblock_alloc_base() because memblock_alloc_base()
-	 * panic()s on allocation failure.
-	 */
+
 	end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
 	align = !align ? SMP_CACHE_BYTES : align;
-	base = __memblock_alloc_base(size, align, end);
+	base = memblock_phys_alloc_range(size, align, 0, end);
 	if (!base)
 		return -ENOMEM;
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 7caecb42bfea..017aeb223b24 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -452,8 +452,6 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
-phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
-				  phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index d0b76bb7340d..5b6aeb8108d9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1306,17 +1306,12 @@ again:
 	return ret;
 }
 
-phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-{
-	return memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-					MEMBLOCK_NONE);
-}
-
 phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
 {
 	phys_addr_t alloc;
 
-	alloc = __memblock_alloc_base(size, align, max_addr);
+	alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
+					MEMBLOCK_NONE);
 
 	if (alloc == 0)
 		panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
-- 
cgit v1.2.3


From 0ba9e6edd4c2e563a9b34c8a46649218814a363f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:35 -0700
Subject: memblock: drop memblock_alloc_base()

The memblock_alloc_base() function tries to allocate a memory up to the
limit specified by its max_addr parameter and panics if the allocation
fails.  Replace its usage with memblock_phys_alloc_range() and make the
callers check the return value and panic in case of error.

Link: http://lkml.kernel.org/r/1548057848-15136-10-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>		[powerpc]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/rtas.c      |  6 +++++-
 arch/powerpc/mm/hash_utils_64.c |  8 ++++++--
 arch/s390/kernel/smp.c          |  6 +++++-
 drivers/macintosh/smu.c         |  2 +-
 include/linux/memblock.h        |  2 --
 mm/memblock.c                   | 14 --------------
 6 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index de35bd8f047f..fbc676160adf 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1187,7 +1187,11 @@ void __init rtas_initialize(void)
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
 #endif
-	rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_RMOBUF_MAX, PAGE_SIZE,
+						 0, rtas_region);
+	if (!rtas_rmo_buf)
+		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+		      PAGE_SIZE, &rtas_region);
 
 #ifdef CONFIG_RTAS_ERROR_LOGGING
 	rtas_last_error_token = rtas_token("rtas-last-error");
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3d4b2399192f..880a366c229c 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -882,8 +882,12 @@ static void __init htab_initialize(void)
 		}
 #endif /* CONFIG_PPC_CELL */
 
-		table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
-					    limit);
+		table = memblock_phys_alloc_range(htab_size_bytes,
+						  htab_size_bytes,
+						  0, limit);
+		if (!table)
+			panic("ERROR: Failed to allocate %pa bytes below %pa\n",
+			      &htab_size_bytes, &limit);
 
 		DBG("Hash table allocated at %lx, size: %lx\n", table,
 		    htab_size_bytes);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b198ece2aad6..5e3cccc408b8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -656,7 +656,11 @@ void __init smp_save_dump_cpus(void)
 		/* No previous system present, normal boot. */
 		return;
 	/* Allocate a page as dumping area for the store status sigps */
-	page = memblock_alloc_base(PAGE_SIZE, PAGE_SIZE, 1UL << 31);
+	page = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, 1UL << 31);
+	if (!page)
+		panic("ERROR: Failed to allocate %x bytes below %lx\n",
+		      PAGE_SIZE, 1UL << 31);
+
 	/* Set multi-threading state to the previous system. */
 	pcpu_set_smt(sclp.mtid_prev);
 	boot_cpu_addr = stap();
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 0a0b8e1f4236..42cf68d15da3 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -485,7 +485,7 @@ int __init smu_init (void)
 	 * SMU based G5s need some memory below 2Gb. Thankfully this is
 	 * called at a time where memblock is still available.
 	 */
-	smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL);
+	smu_cmdbuf_abs = memblock_phys_alloc_range(4096, 4096, 0, 0x80000000UL);
 	if (smu_cmdbuf_abs == 0) {
 		printk(KERN_ERR "SMU: Command buffer allocation failed !\n");
 		ret = -EINVAL;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017aeb223b24..0c8375120322 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -450,8 +450,6 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 
-phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
-				phys_addr_t max_addr);
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
 phys_addr_t memblock_mem_size(unsigned long limit_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index 5b6aeb8108d9..42fe65447d8b 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1306,20 +1306,6 @@ again:
 	return ret;
 }
 
-phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
-{
-	phys_addr_t alloc;
-
-	alloc = memblock_alloc_range_nid(size, align, 0, max_addr, NUMA_NO_NODE,
-					MEMBLOCK_NONE);
-
-	if (alloc == 0)
-		panic("ERROR: Failed to allocate %pa bytes below %pa.\n",
-		      &size, &max_addr);
-
-	return alloc;
-}
-
 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
-- 
cgit v1.2.3


From 92d12f9544b7b133b54cb64f687f3f45fce0043c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:41 -0700
Subject: memblock: refactor internal allocation functions

Currently, memblock has several internal functions with overlapping
functionality.  They all call memblock_find_in_range_node() to find free
memory and then reserve the allocated range and mark it with kmemleak.
However, there is difference in the allocation constraints and in
fallback strategies.

The allocations returning physical address first attempt to find free
memory on the specified node within mirrored memory regions, then retry
on the same node without the requirement for memory mirroring and
finally fall back to all available memory.

The allocations returning virtual address start with clamping the
allowed range to memblock.current_limit, attempt to allocate from the
specified node from regions with mirroring and with user defined minimal
address.  If such allocation fails, next attempt is done with node
restriction lifted.  Next, the allocation is retried with minimal
address reset to zero and at last without the requirement for mirrored
regions.

Let's consolidate various fallbacks handling and make them more
consistent for physical and virtual variants.  Most of the fallback
handling is moved to memblock_alloc_range_nid() and it now handles node
and mirror fallbacks.

The memblock_alloc_internal() uses memblock_alloc_range_nid() to get a
physical address of the allocated range and converts it to virtual
address.

The fallback for allocation below the specified minimal address remains
in memblock_alloc_internal() because memblock_alloc_range_nid() is used
by CMA with exact requirement for lower bounds.

The memblock_phys_alloc_nid() function is completely dropped as it is not
used anywhere outside memblock and its only usage can be replaced by a
call to memblock_alloc_range_nid().

[rppt@linux.ibm.com: fix parameter order in memblock_phys_alloc_try_nid()]
  Link: http://lkml.kernel.org/r/20190203113915.GC8620@rapoport-lnx
Link: http://lkml.kernel.org/r/1548057848-15136-11-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |   1 -
 mm/memblock.c            | 171 +++++++++++++++++++++--------------------------
 2 files changed, 77 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 0c8375120322..c1315c331a8e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -327,7 +327,6 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 
 phys_addr_t memblock_phys_alloc_range(phys_addr_t size, phys_addr_t align,
 				      phys_addr_t start, phys_addr_t end);
-phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
 static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
diff --git a/mm/memblock.c b/mm/memblock.c
index 42fe65447d8b..31e89dac9a23 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1255,30 +1255,84 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+/**
+ * memblock_alloc_range_nid - allocate boot memory block
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @start: the lower bound of the memory region to allocate (phys address)
+ * @end: the upper bound of the memory region to allocate (phys address)
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * The allocation is performed from memory region limited by
+ * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
+ *
+ * If the specified node can not hold the requested memory the
+ * allocation falls back to any node in the system
+ *
+ * For systems with memory mirroring, the allocation is attempted first
+ * from the regions with mirroring enabled and then retried from any
+ * memory region.
+ *
+ * In addition, function sets the min_count to 0 using kmemleak_alloc_phys for
+ * allocated boot memory block, so that it is never reported as leaks.
+ *
+ * Return:
+ * Physical address of allocated memory block on success, %0 on failure.
+ */
 static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
-					phys_addr_t end, int nid,
-					enum memblock_flags flags)
+					phys_addr_t end, int nid)
 {
+	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t found;
 
+	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+		nid = NUMA_NO_NODE;
+
 	if (!align) {
 		/* Can't use WARNs this early in boot on powerpc */
 		dump_stack();
 		align = SMP_CACHE_BYTES;
 	}
 
+	if (end > memblock.current_limit)
+		end = memblock.current_limit;
+
+again:
 	found = memblock_find_in_range_node(size, align, start, end, nid,
 					    flags);
-	if (found && !memblock_reserve(found, size)) {
+	if (found && !memblock_reserve(found, size))
+		goto done;
+
+	if (nid != NUMA_NO_NODE) {
+		found = memblock_find_in_range_node(size, align, start,
+						    end, NUMA_NO_NODE,
+						    flags);
+		if (found && !memblock_reserve(found, size))
+			goto done;
+	}
+
+	if (flags & MEMBLOCK_MIRROR) {
+		flags &= ~MEMBLOCK_MIRROR;
+		pr_warn("Could not allocate %pap bytes of mirrored memory\n",
+			&size);
+		goto again;
+	}
+
+	return 0;
+
+done:
+	/* Skip kmemleak for kasan_init() due to high volume. */
+	if (end != MEMBLOCK_ALLOC_KASAN)
 		/*
-		 * The min_count is set to 0 so that memblock allocations are
-		 * never reported as leaks.
+		 * The min_count is set to 0 so that memblock allocated
+		 * blocks are never reported as leaks. This is because many
+		 * of these blocks are only referred via the physical
+		 * address which is not looked up by kmemleak.
 		 */
 		kmemleak_alloc_phys(found, size, 0, 0);
-		return found;
-	}
-	return 0;
+
+	return found;
 }
 
 phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
@@ -1286,35 +1340,13 @@ phys_addr_t __init memblock_phys_alloc_range(phys_addr_t size,
 					     phys_addr_t start,
 					     phys_addr_t end)
 {
-	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE,
-					MEMBLOCK_NONE);
-}
-
-phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
-{
-	enum memblock_flags flags = choose_memblock_flags();
-	phys_addr_t ret;
-
-again:
-	ret = memblock_alloc_range_nid(size, align, 0,
-				       MEMBLOCK_ALLOC_ACCESSIBLE, nid, flags);
-
-	if (!ret && (flags & MEMBLOCK_MIRROR)) {
-		flags &= ~MEMBLOCK_MIRROR;
-		goto again;
-	}
-	return ret;
+	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
 }
 
 phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
-
-	if (res)
-		return res;
 	return memblock_alloc_range_nid(size, align, 0,
-					MEMBLOCK_ALLOC_ACCESSIBLE,
-					NUMA_NO_NODE, MEMBLOCK_NONE);
+					MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
 /**
@@ -1325,19 +1357,13 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
  * @max_addr: the upper bound of the memory region to allocate (phys address)
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * The @min_addr limit is dropped if it can not be satisfied and the allocation
- * will fall back to memory below @min_addr. Also, allocation may fall back
- * to any node in the system if the specified node can not
- * hold the requested memory.
- *
- * The allocation is performed from memory region limited by
- * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
- *
- * The phys address of allocated boot memory block is converted to virtual and
- * allocated memory is reset to 0.
+ * Allocates memory block using memblock_alloc_range_nid() and
+ * converts the returned physical address to virtual.
  *
- * In addition, function sets the min_count to 0 using kmemleak_alloc for
- * allocated boot memory block, so that it is never reported as leaks.
+ * The @min_addr limit is dropped if it can not be satisfied and the allocation
+ * will fall back to memory below @min_addr. Other constraints, such
+ * as node and mirrored memory will be handled again in
+ * memblock_alloc_range_nid().
  *
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
@@ -1348,11 +1374,6 @@ static void * __init memblock_alloc_internal(
 				int nid)
 {
 	phys_addr_t alloc;
-	void *ptr;
-	enum memblock_flags flags = choose_memblock_flags();
-
-	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
-		nid = NUMA_NO_NODE;
 
 	/*
 	 * Detect any accidental use of these APIs after slab is ready, as at
@@ -1362,54 +1383,16 @@ static void * __init memblock_alloc_internal(
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, nid);
 
-	if (!align) {
-		dump_stack();
-		align = SMP_CACHE_BYTES;
-	}
-
-	if (max_addr > memblock.current_limit)
-		max_addr = memblock.current_limit;
-again:
-	alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
-					    nid, flags);
-	if (alloc && !memblock_reserve(alloc, size))
-		goto done;
-
-	if (nid != NUMA_NO_NODE) {
-		alloc = memblock_find_in_range_node(size, align, min_addr,
-						    max_addr, NUMA_NO_NODE,
-						    flags);
-		if (alloc && !memblock_reserve(alloc, size))
-			goto done;
-	}
-
-	if (min_addr) {
-		min_addr = 0;
-		goto again;
-	}
-
-	if (flags & MEMBLOCK_MIRROR) {
-		flags &= ~MEMBLOCK_MIRROR;
-		pr_warn("Could not allocate %pap bytes of mirrored memory\n",
-			&size);
-		goto again;
-	}
+	alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
 
-	return NULL;
-done:
-	ptr = phys_to_virt(alloc);
+	/* retry allocation without lower limit */
+	if (!alloc && min_addr)
+		alloc = memblock_alloc_range_nid(size, align, 0, max_addr, nid);
 
-	/* Skip kmemleak for kasan_init() due to high volume. */
-	if (max_addr != MEMBLOCK_ALLOC_KASAN)
-		/*
-		 * The min_count is set to 0 so that bootmem allocated
-		 * blocks are never reported as leaks. This is because many
-		 * of these blocks are only referred via the physical
-		 * address which is not looked up by kmemleak.
-		 */
-		kmemleak_alloc(ptr, size, 0, 0);
+	if (!alloc)
+		return NULL;
 
-	return ptr;
+	return phys_to_virt(alloc);
 }
 
 /**
-- 
cgit v1.2.3


From c366ea89fa40f244d1210e74485fce110835b71b Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:29:46 -0700
Subject: memblock: make memblock_find_in_range_node() and
 choose_memblock_flags() static

These functions are not used outside memblock.  Make them static.

Link: http://lkml.kernel.org/r/1548057848-15136-12-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 4 ----
 mm/memblock.c            | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c1315c331a8e..c077227e6d53 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -108,9 +108,6 @@ void memblock_discard(void);
 #define memblock_dbg(fmt, ...) \
 	if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 
-phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align,
-					phys_addr_t start, phys_addr_t end,
-					int nid, enum memblock_flags flags);
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
 				   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -127,7 +124,6 @@ int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_mark_mirror(phys_addr_t base, phys_addr_t size);
 int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
-enum memblock_flags choose_memblock_flags(void);
 
 unsigned long memblock_free_all(void);
 void reset_node_managed_pages(pg_data_t *pgdat);
diff --git a/mm/memblock.c b/mm/memblock.c
index 31e89dac9a23..618f94a1eedb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -132,7 +132,7 @@ static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
 static int memblock_reserved_in_slab __initdata_memblock = 0;
 
-enum memblock_flags __init_memblock choose_memblock_flags(void)
+static enum memblock_flags __init_memblock choose_memblock_flags(void)
 {
 	return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
 }
@@ -261,7 +261,7 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
  * Return:
  * Found address on success, 0 on failure.
  */
-phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
+static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 					phys_addr_t align, phys_addr_t start,
 					phys_addr_t end, int nid,
 					enum memblock_flags flags)
-- 
cgit v1.2.3


From 26fb3dae0a1ec78bdde4b5b72e0e709503e8c596 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:30:42 -0700
Subject: memblock: drop memblock_alloc_*_nopanic() variants

As all the memblock allocation functions return NULL in case of error
rather than panic(), the duplicates with _nopanic suffix can be removed.

Link: http://lkml.kernel.org/r/1548057848-15136-22-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>		[printk]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Christoph Hellwig <hch@lst.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Guo Ren <guoren@kernel.org>
Cc: Guo Ren <ren_guo@c-sky.com>				[c-sky]
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Juergen Gross <jgross@suse.com>			[Xen]
Cc: Mark Salter <msalter@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/kernel/unwind.c       |  3 +--
 arch/sh/mm/init.c              |  2 +-
 arch/x86/kernel/setup_percpu.c | 10 +++++-----
 arch/x86/mm/kasan_init_64.c    | 14 ++++++++------
 drivers/firmware/memmap.c      |  2 +-
 drivers/usb/early/xhci-dbc.c   |  2 +-
 include/linux/memblock.h       | 35 -----------------------------------
 kernel/dma/swiotlb.c           |  2 +-
 kernel/printk/printk.c         |  9 +--------
 mm/memblock.c                  | 35 -----------------------------------
 mm/page_alloc.c                | 10 +++++-----
 mm/page_ext.c                  |  2 +-
 mm/percpu.c                    | 11 ++++-------
 mm/sparse.c                    |  6 ++----
 14 files changed, 31 insertions(+), 112 deletions(-)

(limited to 'include')

diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index d34f69eb1a95..271e9fafa479 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -181,8 +181,7 @@ static void init_unwind_hdr(struct unwind_table *table,
  */
 static void *__init unw_hdr_alloc_early(unsigned long sz)
 {
-	return memblock_alloc_from_nopanic(sz, sizeof(unsigned int),
-					   MAX_DMA_ADDRESS);
+	return memblock_alloc_from(sz, sizeof(unsigned int), MAX_DMA_ADDRESS);
 }
 
 static void *unw_hdr_alloc(unsigned long sz)
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fceefd92016f..70621324db41 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -202,7 +202,7 @@ void __init allocate_pgdat(unsigned int nid)
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-	NODE_DATA(nid) = memblock_alloc_try_nid_nopanic(
+	NODE_DATA(nid) = memblock_alloc_try_nid(
 				sizeof(struct pglist_data),
 				SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
 				MEMBLOCK_ALLOC_ACCESSIBLE, nid);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 13af08827eef..4bf46575568a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -106,22 +106,22 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = memblock_alloc_from_nopanic(size, align, goal);
+		ptr = memblock_alloc_from(size, align, goal);
 		pr_info("cpu %d has no node %d or node-local memory\n",
 			cpu, node);
 		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
 			 cpu, size, __pa(ptr));
 	} else {
-		ptr = memblock_alloc_try_nid_nopanic(size, align, goal,
-						     MEMBLOCK_ALLOC_ACCESSIBLE,
-						     node);
+		ptr = memblock_alloc_try_nid(size, align, goal,
+					     MEMBLOCK_ALLOC_ACCESSIBLE,
+					     node);
 
 		pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n",
 			 cpu, size, node, __pa(ptr));
 	}
 	return ptr;
 #else
-	return memblock_alloc_from_nopanic(size, align, goal);
+	return memblock_alloc_from(size, align, goal);
 #endif
 }
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 462fde83b515..8dc0fc0b1382 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -24,14 +24,16 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
 static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 
-static __init void *early_alloc(size_t size, int nid, bool panic)
+static __init void *early_alloc(size_t size, int nid, bool should_panic)
 {
-	if (panic)
-		return memblock_alloc_try_nid(size, size,
-			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-	else
-		return memblock_alloc_try_nid_nopanic(size, size,
+	void *ptr = memblock_alloc_try_nid(size, size,
 			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+
+	if (!ptr && should_panic)
+		panic("%pS: Failed to allocate page, nid=%d from=%lx\n",
+		      (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS));
+
+	return ptr;
 }
 
 static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index ec4fd253a4e9..d168c87c7d30 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = memblock_alloc_nopanic(sizeof(struct firmware_map_entry),
+	entry = memblock_alloc(sizeof(struct firmware_map_entry),
 			       SMP_CACHE_BYTES);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index d2652dccc699..c9cfb100ecdc 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -94,7 +94,7 @@ static void * __init xdbc_get_page(dma_addr_t *dma_addr)
 {
 	void *virt;
 
-	virt = memblock_alloc_nopanic(PAGE_SIZE, PAGE_SIZE);
+	virt = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	if (!virt)
 		return NULL;
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c077227e6d53..db69ad97aa2e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -335,9 +335,6 @@ static inline phys_addr_t memblock_phys_alloc(phys_addr_t size,
 void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
 				 phys_addr_t min_addr, phys_addr_t max_addr,
 				 int nid);
-void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align,
-				     phys_addr_t min_addr, phys_addr_t max_addr,
-				     int nid);
 void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
 			     phys_addr_t min_addr, phys_addr_t max_addr,
 			     int nid);
@@ -364,36 +361,12 @@ static inline void * __init memblock_alloc_from(phys_addr_t size,
 				      MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_alloc_nopanic(phys_addr_t size,
-						   phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
-					      MEMBLOCK_ALLOC_ACCESSIBLE,
-					      NUMA_NO_NODE);
-}
-
 static inline void * __init memblock_alloc_low(phys_addr_t size,
 					       phys_addr_t align)
 {
 	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
 				      ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE);
 }
-static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size,
-						       phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
-					      ARCH_LOW_ADDRESS_LIMIT,
-					      NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size,
-							phys_addr_t align,
-							phys_addr_t min_addr)
-{
-	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
-					      MEMBLOCK_ALLOC_ACCESSIBLE,
-					      NUMA_NO_NODE);
-}
 
 static inline void * __init memblock_alloc_node(phys_addr_t size,
 						phys_addr_t align, int nid)
@@ -402,14 +375,6 @@ static inline void * __init memblock_alloc_node(phys_addr_t size,
 				      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
-static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size,
-							int nid)
-{
-	return memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES,
-					      MEMBLOCK_LOW_LIMIT,
-					      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-}
-
 static inline void __init memblock_free_early(phys_addr_t base,
 					      phys_addr_t size)
 {
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 56ac77a80b1f..53012db1e53c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -256,7 +256,7 @@ swiotlb_init(int verbose)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = memblock_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+	vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8eee85bb2687..6b7654b8001f 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1143,14 +1143,7 @@ void __init setup_log_buf(int early)
 	if (!new_log_buf_len)
 		return;
 
-	if (early) {
-		new_log_buf =
-			memblock_alloc(new_log_buf_len, LOG_ALIGN);
-	} else {
-		new_log_buf = memblock_alloc_nopanic(new_log_buf_len,
-							  LOG_ALIGN);
-	}
-
+	new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
 	if (unlikely(!new_log_buf)) {
 		pr_err("log_buf_len: %lu bytes not available\n",
 			new_log_buf_len);
diff --git a/mm/memblock.c b/mm/memblock.c
index a838c50ca9a8..0ab30d0185bc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1433,41 +1433,6 @@ void * __init memblock_alloc_try_nid_raw(
 	return ptr;
 }
 
-/**
- * memblock_alloc_try_nid_nopanic - allocate boot memory block
- * @size: size of memory block to be allocated in bytes
- * @align: alignment of the region and block's size
- * @min_addr: the lower bound of the memory region from where the allocation
- *	  is preferred (phys address)
- * @max_addr: the upper bound of the memory region from where the allocation
- *	      is preferred (phys address), or %MEMBLOCK_ALLOC_ACCESSIBLE to
- *	      allocate only from memory limited by memblock.current_limit value
- * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
- *
- * Public function, provides additional debug information (including caller
- * info), if enabled. This function zeroes the allocated memory.
- *
- * Return:
- * Virtual address of allocated memory block on success, NULL on failure.
- */
-void * __init memblock_alloc_try_nid_nopanic(
-				phys_addr_t size, phys_addr_t align,
-				phys_addr_t min_addr, phys_addr_t max_addr,
-				int nid)
-{
-	void *ptr;
-
-	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
-		     __func__, (u64)size, (u64)align, nid, &min_addr,
-		     &max_addr, (void *)_RET_IP_);
-
-	ptr = memblock_alloc_internal(size, align,
-					   min_addr, max_addr, nid);
-	if (ptr)
-		memset(ptr, 0, size);
-	return ptr;
-}
-
 /**
  * memblock_alloc_try_nid - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3eb01dedfb50..03fcf73d47da 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6445,8 +6445,8 @@ static void __ref setup_usemap(struct pglist_data *pgdat,
 	zone->pageblock_flags = NULL;
 	if (usemapsize) {
 		zone->pageblock_flags =
-			memblock_alloc_node_nopanic(usemapsize,
-							 pgdat->node_id);
+			memblock_alloc_node(usemapsize, SMP_CACHE_BYTES,
+					    pgdat->node_id);
 		if (!zone->pageblock_flags)
 			panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n",
 			      usemapsize, zone->name, pgdat->node_id);
@@ -6679,7 +6679,8 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 		end = pgdat_end_pfn(pgdat);
 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
 		size =  (end - start) * sizeof(struct page);
-		map = memblock_alloc_node_nopanic(size, pgdat->node_id);
+		map = memblock_alloc_node(size, SMP_CACHE_BYTES,
+					  pgdat->node_id);
 		if (!map)
 			panic("Failed to allocate %ld bytes for node %d memory map\n",
 			      size, pgdat->node_id);
@@ -7959,8 +7960,7 @@ void *__init alloc_large_system_hash(const char *tablename,
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
-				table = memblock_alloc_nopanic(size,
-							       SMP_CACHE_BYTES);
+				table = memblock_alloc(size, SMP_CACHE_BYTES);
 			else
 				table = memblock_alloc_raw(size,
 							   SMP_CACHE_BYTES);
diff --git a/mm/page_ext.c b/mm/page_ext.c
index ab4244920e0f..d8f1aca4ad43 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -161,7 +161,7 @@ static int __init alloc_node_page_ext(int nid)
 
 	table_size = get_entry_size() * nr_pages;
 
-	base = memblock_alloc_try_nid_nopanic(
+	base = memblock_alloc_try_nid(
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 			MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 	if (!base)
diff --git a/mm/percpu.c b/mm/percpu.c
index 3f9fb3086a9b..2e6fc8d552c9 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1905,7 +1905,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 			  __alignof__(ai->groups[0].cpu_map[0]));
 	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
 
-	ptr = memblock_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
+	ptr = memblock_alloc(PFN_ALIGN(ai_size), PAGE_SIZE);
 	if (!ptr)
 		return NULL;
 	ai = ptr;
@@ -2496,7 +2496,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = memblock_alloc_nopanic(areas_size, SMP_CACHE_BYTES);
+	areas = memblock_alloc(areas_size, SMP_CACHE_BYTES);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -2729,8 +2729,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
 static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 				       size_t align)
 {
-	return  memblock_alloc_from_nopanic(
-			size, align, __pa(MAX_DMA_ADDRESS));
+	return  memblock_alloc_from(size, align, __pa(MAX_DMA_ADDRESS));
 }
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
@@ -2778,9 +2777,7 @@ void __init setup_per_cpu_areas(void)
 	void *fc;
 
 	ai = pcpu_alloc_alloc_info(1, 1);
-	fc = memblock_alloc_from_nopanic(unit_size,
-					      PAGE_SIZE,
-					      __pa(MAX_DMA_ADDRESS));
+	fc = memblock_alloc_from(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
 	if (!ai || !fc)
 		panic("Failed to allocate memory for percpu areas.");
 	/* kmemleak tracks the percpu allocations separately */
diff --git a/mm/sparse.c b/mm/sparse.c
index 7397fb4e78b4..69904aa6165b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -330,9 +330,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	limit = goal + (1UL << PA_SECTION_SHIFT);
 	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
-	p = memblock_alloc_try_nid_nopanic(size,
-						SMP_CACHE_BYTES, goal, limit,
-						nid);
+	p = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
 	if (!p && limit) {
 		limit = 0;
 		goto again;
@@ -386,7 +384,7 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	return memblock_alloc_node_nopanic(size, pgdat->node_id);
+	return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
-- 
cgit v1.2.3


From fe145124dbe53c86bf32b941b2f2f88f891d985d Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 11 Mar 2019 23:30:46 -0700
Subject: memblock: remove memblock_{set,clear}_region_flags

The memblock API provides dedicated helpers to set or clear a flag on a
memory region, e.g.  memblock_{mark,clear}_hotplug().

The memblock_{set,clear}_region_flags() functions are used only by the
memblock internal function that adjusts the region flags.  Drop these
functions and use open-coded implementation instead.

Link: http://lkml.kernel.org/r/1549455025-17706-2-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 12 ------------
 mm/memblock.c            |  9 ++++++---
 2 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index db69ad97aa2e..294d5d80e150 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -273,18 +273,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
 			       nid, flags, p_start, p_end, p_nid)
 
-static inline void memblock_set_region_flags(struct memblock_region *r,
-					     enum memblock_flags flags)
-{
-	r->flags |= flags;
-}
-
-static inline void memblock_clear_region_flags(struct memblock_region *r,
-					       enum memblock_flags flags)
-{
-	r->flags &= ~flags;
-}
-
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 int memblock_set_node(phys_addr_t base, phys_addr_t size,
 		      struct memblock_type *type, int nid);
diff --git a/mm/memblock.c b/mm/memblock.c
index 0ab30d0185bc..068e147695ee 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -858,11 +858,14 @@ static int __init_memblock memblock_setclr_flag(phys_addr_t base,
 	if (ret)
 		return ret;
 
-	for (i = start_rgn; i < end_rgn; i++)
+	for (i = start_rgn; i < end_rgn; i++) {
+		struct memblock_region *r = &type->regions[i];
+
 		if (set)
-			memblock_set_region_flags(&type->regions[i], flag);
+			r->flags |= flag;
 		else
-			memblock_clear_region_flags(&type->regions[i], flag);
+			r->flags &= ~flag;
+	}
 
 	memblock_merge_regions(type);
 	return 0;
-- 
cgit v1.2.3


From ba20ba2e3743bac786dff777954c11930256075e Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 11 Mar 2019 23:31:14 -0700
Subject: generic radix trees

Very simple radix tree implementation that supports storing arbitrary
size entries, up to PAGE_SIZE - upcoming patches will convert existing
flex_array users to genradixes.  The new genradix code has a much
simpler API and implementation, and doesn't have a hard limit on the
number of elements like flex_array does.

Link: http://lkml.kernel.org/r/20181217131929.11727-5-kent.overstreet@gmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/generic-radix-tree.rst |  12 ++
 Documentation/core-api/index.rst              |   1 +
 include/linux/generic-radix-tree.h            | 231 ++++++++++++++++++++++++++
 lib/Makefile                                  |   3 +-
 lib/generic-radix-tree.c                      | 217 ++++++++++++++++++++++++
 5 files changed, 463 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/core-api/generic-radix-tree.rst
 create mode 100644 include/linux/generic-radix-tree.h
 create mode 100644 lib/generic-radix-tree.c

(limited to 'include')

diff --git a/Documentation/core-api/generic-radix-tree.rst b/Documentation/core-api/generic-radix-tree.rst
new file mode 100644
index 000000000000..ed42839ae42f
--- /dev/null
+++ b/Documentation/core-api/generic-radix-tree.rst
@@ -0,0 +1,12 @@
+=================================
+Generic radix trees/sparse arrays
+=================================
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+   :doc: Generic radix trees/sparse arrays
+
+generic radix tree functions
+----------------------------
+
+.. kernel-doc:: include/linux/generic-radix-tree.h
+   :functions:
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index 3adee82be311..6870baffef82 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -28,6 +28,7 @@ Core utilities
    errseq
    printk-formats
    circular-buffers
+   generic-radix-tree
    memory-allocation
    mm-api
    gfp_mask-from-fs-io
diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
new file mode 100644
index 000000000000..3a91130a4fbd
--- /dev/null
+++ b/include/linux/generic-radix-tree.h
@@ -0,0 +1,231 @@
+#ifndef _LINUX_GENERIC_RADIX_TREE_H
+#define _LINUX_GENERIC_RADIX_TREE_H
+
+/**
+ * DOC: Generic radix trees/sparse arrays:
+ *
+ * Very simple and minimalistic, supporting arbitrary size entries up to
+ * PAGE_SIZE.
+ *
+ * A genradix is defined with the type it will store, like so:
+ *
+ * static GENRADIX(struct foo) foo_genradix;
+ *
+ * The main operations are:
+ *
+ * - genradix_init(radix) - initialize an empty genradix
+ *
+ * - genradix_free(radix) - free all memory owned by the genradix and
+ *   reinitialize it
+ *
+ * - genradix_ptr(radix, idx) - gets a pointer to the entry at idx, returning
+ *   NULL if that entry does not exist
+ *
+ * - genradix_ptr_alloc(radix, idx, gfp) - gets a pointer to an entry,
+ *   allocating it if necessary
+ *
+ * - genradix_for_each(radix, iter, p) - iterate over each entry in a genradix
+ *
+ * The radix tree allocates one page of entries at a time, so entries may exist
+ * that were never explicitly allocated - they will be initialized to all
+ * zeroes.
+ *
+ * Internally, a genradix is just a radix tree of pages, and indexing works in
+ * terms of byte offsets. The wrappers in this header file use sizeof on the
+ * type the radix contains to calculate a byte offset from the index - see
+ * __idx_to_offset.
+ */
+
+#include <asm/page.h>
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+struct genradix_root;
+
+struct __genradix {
+	struct genradix_root __rcu	*root;
+};
+
+/*
+ * NOTE: currently, sizeof(_type) must not be larger than PAGE_SIZE:
+ */
+
+#define __GENRADIX_INITIALIZER					\
+	{							\
+		.tree = {					\
+			.root = NULL,				\
+		}						\
+	}
+
+/*
+ * We use a 0 size array to stash the type we're storing without taking any
+ * space at runtime - then the various accessor macros can use typeof() to get
+ * to it for casts/sizeof - we also force the alignment so that storing a type
+ * with a ridiculous alignment doesn't blow up the alignment or size of the
+ * genradix.
+ */
+
+#define GENRADIX(_type)						\
+struct {							\
+	struct __genradix	tree;				\
+	_type			type[0] __aligned(1);		\
+}
+
+#define DEFINE_GENRADIX(_name, _type)				\
+	GENRADIX(_type) _name = __GENRADIX_INITIALIZER
+
+/**
+ * genradix_init - initialize a genradix
+ * @_radix:	genradix to initialize
+ *
+ * Does not fail
+ */
+#define genradix_init(_radix)					\
+do {								\
+	*(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER;	\
+} while (0)
+
+void __genradix_free(struct __genradix *);
+
+/**
+ * genradix_free: free all memory owned by a genradix
+ * @_radix: the genradix to free
+ *
+ * After freeing, @_radix will be reinitialized and empty
+ */
+#define genradix_free(_radix)	__genradix_free(&(_radix)->tree)
+
+static inline size_t __idx_to_offset(size_t idx, size_t obj_size)
+{
+	if (__builtin_constant_p(obj_size))
+		BUILD_BUG_ON(obj_size > PAGE_SIZE);
+	else
+		BUG_ON(obj_size > PAGE_SIZE);
+
+	if (!is_power_of_2(obj_size)) {
+		size_t objs_per_page = PAGE_SIZE / obj_size;
+
+		return (idx / objs_per_page) * PAGE_SIZE +
+			(idx % objs_per_page) * obj_size;
+	} else {
+		return idx * obj_size;
+	}
+}
+
+#define __genradix_cast(_radix)		(typeof((_radix)->type[0]) *)
+#define __genradix_obj_size(_radix)	sizeof((_radix)->type[0])
+#define __genradix_idx_to_offset(_radix, _idx)			\
+	__idx_to_offset(_idx, __genradix_obj_size(_radix))
+
+void *__genradix_ptr(struct __genradix *, size_t);
+
+/**
+ * genradix_ptr - get a pointer to a genradix entry
+ * @_radix:	genradix to access
+ * @_idx:	index to fetch
+ *
+ * Returns a pointer to entry at @_idx, or NULL if that entry does not exist.
+ */
+#define genradix_ptr(_radix, _idx)				\
+	(__genradix_cast(_radix)				\
+	 __genradix_ptr(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _idx)))
+
+void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_ptr_alloc - get a pointer to a genradix entry, allocating it
+ *			if necessary
+ * @_radix:	genradix to access
+ * @_idx:	index to fetch
+ * @_gfp:	gfp mask
+ *
+ * Returns a pointer to entry at @_idx, or NULL on allocation failure
+ */
+#define genradix_ptr_alloc(_radix, _idx, _gfp)			\
+	(__genradix_cast(_radix)				\
+	 __genradix_ptr_alloc(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _idx),	\
+			_gfp))
+
+struct genradix_iter {
+	size_t			offset;
+	size_t			pos;
+};
+
+/**
+ * genradix_iter_init - initialize a genradix_iter
+ * @_radix:	genradix that will be iterated over
+ * @_idx:	index to start iterating from
+ */
+#define genradix_iter_init(_radix, _idx)			\
+	((struct genradix_iter) {				\
+		.pos	= (_idx),				\
+		.offset	= __genradix_idx_to_offset((_radix), (_idx)),\
+	})
+
+void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t);
+
+/**
+ * genradix_iter_peek - get first entry at or above iterator's current
+ *			position
+ * @_iter:	a genradix_iter
+ * @_radix:	genradix being iterated over
+ *
+ * If no more entries exist at or above @_iter's current position, returns NULL
+ */
+#define genradix_iter_peek(_iter, _radix)			\
+	(__genradix_cast(_radix)				\
+	 __genradix_iter_peek(_iter, &(_radix)->tree,		\
+			      PAGE_SIZE / __genradix_obj_size(_radix)))
+
+static inline void __genradix_iter_advance(struct genradix_iter *iter,
+					   size_t obj_size)
+{
+	iter->offset += obj_size;
+
+	if (!is_power_of_2(obj_size) &&
+	    (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE)
+		iter->offset = round_up(iter->offset, PAGE_SIZE);
+
+	iter->pos++;
+}
+
+#define genradix_iter_advance(_iter, _radix)			\
+	__genradix_iter_advance(_iter, __genradix_obj_size(_radix))
+
+#define genradix_for_each_from(_radix, _iter, _p, _start)	\
+	for (_iter = genradix_iter_init(_radix, _start);	\
+	     (_p = genradix_iter_peek(&_iter, _radix)) != NULL;	\
+	     genradix_iter_advance(&_iter, _radix))
+
+/**
+ * genradix_for_each - iterate over entry in a genradix
+ * @_radix:	genradix to iterate over
+ * @_iter:	a genradix_iter to track current position
+ * @_p:		pointer to genradix entry type
+ *
+ * On every iteration, @_p will point to the current entry, and @_iter.pos
+ * will be the current entry's index.
+ */
+#define genradix_for_each(_radix, _iter, _p)			\
+	genradix_for_each_from(_radix, _iter, _p, 0)
+
+int __genradix_prealloc(struct __genradix *, size_t, gfp_t);
+
+/**
+ * genradix_prealloc - preallocate entries in a generic radix tree
+ * @_radix:	genradix to preallocate
+ * @_nr:	number of entries to preallocate
+ * @_gfp:	gfp mask
+ *
+ * Returns 0 on success, -ENOMEM on failure
+ */
+#define genradix_prealloc(_radix, _nr, _gfp)			\
+	 __genradix_prealloc(&(_radix)->tree,			\
+			__genradix_idx_to_offset(_radix, _nr + 1),\
+			_gfp)
+
+
+#endif /* _LINUX_GENERIC_RADIX_TREE_H */
diff --git a/lib/Makefile b/lib/Makefile
index 647517940b29..b798b41d01ae 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -38,7 +38,8 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o rhashtable.o reciprocal_div.o \
-	 once.o refcount.o usercopy.o errseq.o bucket_locks.o
+	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
+	 generic-radix-tree.o
 obj-$(CONFIG_STRING_SELFTEST) += test_string.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
new file mode 100644
index 000000000000..a7bafc413730
--- /dev/null
+++ b/lib/generic-radix-tree.c
@@ -0,0 +1,217 @@
+
+#include <linux/export.h>
+#include <linux/generic-radix-tree.h>
+#include <linux/gfp.h>
+
+#define GENRADIX_ARY		(PAGE_SIZE / sizeof(struct genradix_node *))
+#define GENRADIX_ARY_SHIFT	ilog2(GENRADIX_ARY)
+
+struct genradix_node {
+	union {
+		/* Interior node: */
+		struct genradix_node	*children[GENRADIX_ARY];
+
+		/* Leaf: */
+		u8			data[PAGE_SIZE];
+	};
+};
+
+static inline int genradix_depth_shift(unsigned depth)
+{
+	return PAGE_SHIFT + GENRADIX_ARY_SHIFT * depth;
+}
+
+/*
+ * Returns size (of data, in bytes) that a tree of a given depth holds:
+ */
+static inline size_t genradix_depth_size(unsigned depth)
+{
+	return 1UL << genradix_depth_shift(depth);
+}
+
+/* depth that's needed for a genradix that can address up to ULONG_MAX: */
+#define GENRADIX_MAX_DEPTH	\
+	DIV_ROUND_UP(BITS_PER_LONG - PAGE_SHIFT, GENRADIX_ARY_SHIFT)
+
+#define GENRADIX_DEPTH_MASK				\
+	((unsigned long) (roundup_pow_of_two(GENRADIX_MAX_DEPTH + 1) - 1))
+
+unsigned genradix_root_to_depth(struct genradix_root *r)
+{
+	return (unsigned long) r & GENRADIX_DEPTH_MASK;
+}
+
+struct genradix_node *genradix_root_to_node(struct genradix_root *r)
+{
+	return (void *) ((unsigned long) r & ~GENRADIX_DEPTH_MASK);
+}
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, or NULL if not
+ * allocated
+ */
+void *__genradix_ptr(struct __genradix *radix, size_t offset)
+{
+	struct genradix_root *r = READ_ONCE(radix->root);
+	struct genradix_node *n = genradix_root_to_node(r);
+	unsigned level		= genradix_root_to_depth(r);
+
+	if (ilog2(offset) >= genradix_depth_shift(level))
+		return NULL;
+
+	while (1) {
+		if (!n)
+			return NULL;
+		if (!level)
+			break;
+
+		level--;
+
+		n = n->children[offset >> genradix_depth_shift(level)];
+		offset &= genradix_depth_size(level) - 1;
+	}
+
+	return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr);
+
+/*
+ * Returns pointer to the specified byte @offset within @radix, allocating it if
+ * necessary - newly allocated slots are always zeroed out:
+ */
+void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
+			   gfp_t gfp_mask)
+{
+	struct genradix_root *v = READ_ONCE(radix->root);
+	struct genradix_node *n, *new_node = NULL;
+	unsigned level;
+
+	/* Increase tree depth if necessary: */
+	while (1) {
+		struct genradix_root *r = v, *new_root;
+
+		n	= genradix_root_to_node(r);
+		level	= genradix_root_to_depth(r);
+
+		if (n && ilog2(offset) < genradix_depth_shift(level))
+			break;
+
+		if (!new_node) {
+			new_node = (void *)
+				__get_free_page(gfp_mask|__GFP_ZERO);
+			if (!new_node)
+				return NULL;
+		}
+
+		new_node->children[0] = n;
+		new_root = ((struct genradix_root *)
+			    ((unsigned long) new_node | (n ? level + 1 : 0)));
+
+		if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) {
+			v = new_root;
+			new_node = NULL;
+		}
+	}
+
+	while (level--) {
+		struct genradix_node **p =
+			&n->children[offset >> genradix_depth_shift(level)];
+		offset &= genradix_depth_size(level) - 1;
+
+		n = READ_ONCE(*p);
+		if (!n) {
+			if (!new_node) {
+				new_node = (void *)
+					__get_free_page(gfp_mask|__GFP_ZERO);
+				if (!new_node)
+					return NULL;
+			}
+
+			if (!(n = cmpxchg_release(p, NULL, new_node)))
+				swap(n, new_node);
+		}
+	}
+
+	if (new_node)
+		free_page((unsigned long) new_node);
+
+	return &n->data[offset];
+}
+EXPORT_SYMBOL(__genradix_ptr_alloc);
+
+void *__genradix_iter_peek(struct genradix_iter *iter,
+			   struct __genradix *radix,
+			   size_t objs_per_page)
+{
+	struct genradix_root *r;
+	struct genradix_node *n;
+	unsigned level, i;
+restart:
+	r = READ_ONCE(radix->root);
+	if (!r)
+		return NULL;
+
+	n	= genradix_root_to_node(r);
+	level	= genradix_root_to_depth(r);
+
+	if (ilog2(iter->offset) >= genradix_depth_shift(level))
+		return NULL;
+
+	while (level) {
+		level--;
+
+		i = (iter->offset >> genradix_depth_shift(level)) &
+			(GENRADIX_ARY - 1);
+
+		while (!n->children[i]) {
+			i++;
+			iter->offset = round_down(iter->offset +
+					   genradix_depth_size(level),
+					   genradix_depth_size(level));
+			iter->pos = (iter->offset >> PAGE_SHIFT) *
+				objs_per_page;
+			if (i == GENRADIX_ARY)
+				goto restart;
+		}
+
+		n = n->children[i];
+	}
+
+	return &n->data[iter->offset & (PAGE_SIZE - 1)];
+}
+EXPORT_SYMBOL(__genradix_iter_peek);
+
+static void genradix_free_recurse(struct genradix_node *n, unsigned level)
+{
+	if (level) {
+		unsigned i;
+
+		for (i = 0; i < GENRADIX_ARY; i++)
+			if (n->children[i])
+				genradix_free_recurse(n->children[i], level - 1);
+	}
+
+	free_page((unsigned long) n);
+}
+
+int __genradix_prealloc(struct __genradix *radix, size_t size,
+			gfp_t gfp_mask)
+{
+	size_t offset;
+
+	for (offset = 0; offset < size; offset += PAGE_SIZE)
+		if (!__genradix_ptr_alloc(radix, offset, gfp_mask))
+			return -ENOMEM;
+
+	return 0;
+}
+EXPORT_SYMBOL(__genradix_prealloc);
+
+void __genradix_free(struct __genradix *radix)
+{
+	struct genradix_root *r = xchg(&radix->root, NULL);
+
+	genradix_free_recurse(genradix_root_to_node(r),
+			      genradix_root_to_depth(r));
+}
+EXPORT_SYMBOL(__genradix_free);
-- 
cgit v1.2.3


From 2075e50caf5ea28be3cba0d01b3058bb5c3b0168 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 11 Mar 2019 23:31:22 -0700
Subject: sctp: convert to genradix

This also makes sctp_stream_alloc_(out|in) saner, in that they no longer
allocate new flex_arrays/genradixes, they just preallocate more
elements.

This code does however have a suspicious lack of locking.

Link: http://lkml.kernel.org/r/20181217131929.11727-7-kent.overstreet@gmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Eric Paris <eparis@parisplace.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/net/sctp/structs.h   |  15 +++---
 net/sctp/stream.c            | 125 +++++++------------------------------------
 net/sctp/stream_interleave.c |   2 +-
 3 files changed, 28 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 58e4b23cecf4..140fd836a396 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -48,6 +48,7 @@
 #define __sctp_structs_h__
 
 #include <linux/ktime.h>
+#include <linux/generic-radix-tree.h>
 #include <linux/rhashtable-types.h>
 #include <linux/socket.h>	/* linux/in.h needs this!!    */
 #include <linux/in.h>		/* We get struct sockaddr_in. */
@@ -57,7 +58,6 @@
 #include <linux/atomic.h>		/* This gets us atomic counters.  */
 #include <linux/skbuff.h>	/* We need sk_buff_head. */
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
-#include <linux/flex_array.h>	/* We need flex_array.   */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
 #include <net/sctp/auth.h>	/* We need auth specific structs */
 #include <net/ip.h>		/* For inet_skb_parm */
@@ -1449,8 +1449,9 @@ struct sctp_stream_in {
 };
 
 struct sctp_stream {
-	struct flex_array *out;
-	struct flex_array *in;
+	GENRADIX(struct sctp_stream_out) out;
+	GENRADIX(struct sctp_stream_in)	in;
+
 	__u16 outcnt;
 	__u16 incnt;
 	/* Current stream being sent, if any */
@@ -1473,17 +1474,17 @@ struct sctp_stream {
 };
 
 static inline struct sctp_stream_out *sctp_stream_out(
-	const struct sctp_stream *stream,
+	struct sctp_stream *stream,
 	__u16 sid)
 {
-	return flex_array_get(stream->out, sid);
+	return genradix_ptr(&stream->out, sid);
 }
 
 static inline struct sctp_stream_in *sctp_stream_in(
-	const struct sctp_stream *stream,
+	struct sctp_stream *stream,
 	__u16 sid)
 {
-	return flex_array_get(stream->in, sid);
+	return genradix_ptr(&stream->in, sid);
 }
 
 #define SCTP_SO(s, i) sctp_stream_out((s), (i))
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 3b47457862cc..b6bb68adac6e 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -37,66 +37,6 @@
 #include <net/sctp/sm.h>
 #include <net/sctp/stream_sched.h>
 
-static struct flex_array *fa_alloc(size_t elem_size, size_t elem_count,
-				   gfp_t gfp)
-{
-	struct flex_array *result;
-	int err;
-
-	result = flex_array_alloc(elem_size, elem_count, gfp);
-	if (result) {
-		err = flex_array_prealloc(result, 0, elem_count, gfp);
-		if (err) {
-			flex_array_free(result);
-			result = NULL;
-		}
-	}
-
-	return result;
-}
-
-static void fa_free(struct flex_array *fa)
-{
-	if (fa)
-		flex_array_free(fa);
-}
-
-static void fa_copy(struct flex_array *fa, struct flex_array *from,
-		    size_t index, size_t count)
-{
-	void *elem;
-
-	while (count--) {
-		elem = flex_array_get(from, index);
-		flex_array_put(fa, index, elem, 0);
-		index++;
-	}
-}
-
-static void fa_zero(struct flex_array *fa, size_t index, size_t count)
-{
-	void *elem;
-
-	while (count--) {
-		elem = flex_array_get(fa, index);
-		memset(elem, 0, fa->element_size);
-		index++;
-	}
-}
-
-static size_t fa_index(struct flex_array *fa, void *elem, size_t count)
-{
-	size_t index = 0;
-
-	while (count--) {
-		if (elem == flex_array_get(fa, index))
-			break;
-		index++;
-	}
-
-	return index;
-}
-
 /* Migrates chunks from stream queues to new stream queues if needed,
  * but not across associations. Also, removes those chunks to streams
  * higher than the new max.
@@ -153,53 +93,32 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream,
 static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
 				 gfp_t gfp)
 {
-	struct flex_array *out;
-	size_t elem_size = sizeof(struct sctp_stream_out);
-
-	out = fa_alloc(elem_size, outcnt, gfp);
-	if (!out)
-		return -ENOMEM;
-
-	if (stream->out) {
-		fa_copy(out, stream->out, 0, min(outcnt, stream->outcnt));
-		if (stream->out_curr) {
-			size_t index = fa_index(stream->out, stream->out_curr,
-						stream->outcnt);
-
-			BUG_ON(index == stream->outcnt);
-			stream->out_curr = flex_array_get(out, index);
-		}
-		fa_free(stream->out);
-	}
+	int ret;
 
-	if (outcnt > stream->outcnt)
-		fa_zero(out, stream->outcnt, (outcnt - stream->outcnt));
+	if (outcnt <= stream->outcnt)
+		return 0;
 
-	stream->out = out;
+	ret = genradix_prealloc(&stream->out, outcnt, gfp);
+	if (ret)
+		return ret;
 
+	stream->outcnt = outcnt;
 	return 0;
 }
 
 static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
 				gfp_t gfp)
 {
-	struct flex_array *in;
-	size_t elem_size = sizeof(struct sctp_stream_in);
-
-	in = fa_alloc(elem_size, incnt, gfp);
-	if (!in)
-		return -ENOMEM;
-
-	if (stream->in) {
-		fa_copy(in, stream->in, 0, min(incnt, stream->incnt));
-		fa_free(stream->in);
-	}
+	int ret;
 
-	if (incnt > stream->incnt)
-		fa_zero(in, stream->incnt, (incnt - stream->incnt));
+	if (incnt <= stream->incnt)
+		return 0;
 
-	stream->in = in;
+	ret = genradix_prealloc(&stream->in, incnt, gfp);
+	if (ret)
+		return ret;
 
+	stream->incnt = incnt;
 	return 0;
 }
 
@@ -226,7 +145,6 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
 	if (ret)
 		goto out;
 
-	stream->outcnt = outcnt;
 	for (i = 0; i < stream->outcnt; i++)
 		SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
 
@@ -238,14 +156,11 @@ in:
 	ret = sctp_stream_alloc_in(stream, incnt, gfp);
 	if (ret) {
 		sched->free(stream);
-		fa_free(stream->out);
-		stream->out = NULL;
+		genradix_free(&stream->out);
 		stream->outcnt = 0;
 		goto out;
 	}
 
-	stream->incnt = incnt;
-
 out:
 	return ret;
 }
@@ -270,8 +185,8 @@ void sctp_stream_free(struct sctp_stream *stream)
 	sched->free(stream);
 	for (i = 0; i < stream->outcnt; i++)
 		kfree(SCTP_SO(stream, i)->ext);
-	fa_free(stream->out);
-	fa_free(stream->in);
+	genradix_free(&stream->out);
+	genradix_free(&stream->in);
 }
 
 void sctp_stream_clear(struct sctp_stream *stream)
@@ -302,8 +217,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
 
 	sched->sched_all(stream);
 
-	new->out = NULL;
-	new->in  = NULL;
+	new->out.tree.root = NULL;
+	new->in.tree.root  = NULL;
 	new->outcnt = 0;
 	new->incnt  = 0;
 }
@@ -555,8 +470,6 @@ int sctp_send_add_streams(struct sctp_association *asoc,
 		goto out;
 	}
 
-	stream->outcnt = outcnt;
-
 	asoc->strreset_outstanding = !!out + !!in;
 
 out:
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index a6bf21579466..102c6fefe38c 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -101,7 +101,7 @@ static void sctp_chunk_assign_mid(struct sctp_chunk *chunk)
 
 static bool sctp_validate_data(struct sctp_chunk *chunk)
 {
-	const struct sctp_stream *stream;
+	struct sctp_stream *stream;
 	__u16 sid, ssn;
 
 	if (chunk->chunk_hdr->type != SCTP_CID_DATA)
-- 
cgit v1.2.3


From 586187d7de71b4da7956ba588ae42253b9ff6482 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 11 Mar 2019 23:31:26 -0700
Subject: Drop flex_arrays

All existing users have been converted to generic radix trees

Link: http://lkml.kernel.org/r/20181217131929.11727-8-kent.overstreet@gmail.com
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Paris <eparis@parisplace.org>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Stephen Smalley <sds@tycho.nsa.gov>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/flexible-arrays.rst | 130 ----------
 Documentation/flexible-arrays.txt          | 123 ---------
 include/linux/flex_array.h                 | 149 -----------
 include/linux/poison.h                     |   3 -
 lib/Makefile                               |   2 +-
 lib/flex_array.c                           | 398 -----------------------------
 tools/include/linux/poison.h               |   3 -
 7 files changed, 1 insertion(+), 807 deletions(-)
 delete mode 100644 Documentation/core-api/flexible-arrays.rst
 delete mode 100644 Documentation/flexible-arrays.txt
 delete mode 100644 include/linux/flex_array.h
 delete mode 100644 lib/flex_array.c

(limited to 'include')

diff --git a/Documentation/core-api/flexible-arrays.rst b/Documentation/core-api/flexible-arrays.rst
deleted file mode 100644
index b6b85a1b518e..000000000000
--- a/Documentation/core-api/flexible-arrays.rst
+++ /dev/null
@@ -1,130 +0,0 @@
-
-===================================
-Using flexible arrays in the kernel
-===================================
-
-Large contiguous memory allocations can be unreliable in the Linux kernel.
-Kernel programmers will sometimes respond to this problem by allocating
-pages with :c:func:`vmalloc()`.  This solution not ideal, though.  On 32-bit
-systems, memory from vmalloc() must be mapped into a relatively small address
-space; it's easy to run out.  On SMP systems, the page table changes required
-by vmalloc() allocations can require expensive cross-processor interrupts on
-all CPUs.  And, on all systems, use of space in the vmalloc() range increases
-pressure on the translation lookaside buffer (TLB), reducing the performance
-of the system.
-
-In many cases, the need for memory from vmalloc() can be eliminated by piecing
-together an array from smaller parts; the flexible array library exists to make
-this task easier.
-
-A flexible array holds an arbitrary (within limits) number of fixed-sized
-objects, accessed via an integer index.  Sparse arrays are handled
-reasonably well.  Only single-page allocations are made, so memory
-allocation failures should be relatively rare.  The down sides are that the
-arrays cannot be indexed directly, individual object size cannot exceed the
-system page size, and putting data into a flexible array requires a copy
-operation.  It's also worth noting that flexible arrays do no internal
-locking at all; if concurrent access to an array is possible, then the
-caller must arrange for appropriate mutual exclusion.
-
-The creation of a flexible array is done with :c:func:`flex_array_alloc()`::
-
-    #include <linux/flex_array.h>
-
-    struct flex_array *flex_array_alloc(int element_size,
-					unsigned int total,
-					gfp_t flags);
-
-The individual object size is provided by ``element_size``, while total is the
-maximum number of objects which can be stored in the array.  The flags
-argument is passed directly to the internal memory allocation calls.  With
-the current code, using flags to ask for high memory is likely to lead to
-notably unpleasant side effects.
-
-It is also possible to define flexible arrays at compile time with::
-
-    DEFINE_FLEX_ARRAY(name, element_size, total);
-
-This macro will result in a definition of an array with the given name; the
-element size and total will be checked for validity at compile time.
-
-Storing data into a flexible array is accomplished with a call to
-:c:func:`flex_array_put()`::
-
-    int flex_array_put(struct flex_array *array, unsigned int element_nr,
-    		       void *src, gfp_t flags);
-
-This call will copy the data from src into the array, in the position
-indicated by ``element_nr`` (which must be less than the maximum specified when
-the array was created).  If any memory allocations must be performed, flags
-will be used.  The return value is zero on success, a negative error code
-otherwise.
-
-There might possibly be a need to store data into a flexible array while
-running in some sort of atomic context; in this situation, sleeping in the
-memory allocator would be a bad thing.  That can be avoided by using
-``GFP_ATOMIC`` for the flags value, but, often, there is a better way.  The
-trick is to ensure that any needed memory allocations are done before
-entering atomic context, using :c:func:`flex_array_prealloc()`::
-
-    int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int nr_elements, gfp_t flags);
-
-This function will ensure that memory for the elements indexed in the range
-defined by ``start`` and ``nr_elements`` has been allocated.  Thereafter, a
-``flex_array_put()`` call on an element in that range is guaranteed not to
-block.
-
-Getting data back out of the array is done with :c:func:`flex_array_get()`::
-
-    void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
-
-The return value is a pointer to the data element, or NULL if that
-particular element has never been allocated.
-
-Note that it is possible to get back a valid pointer for an element which
-has never been stored in the array.  Memory for array elements is allocated
-one page at a time; a single allocation could provide memory for several
-adjacent elements.  Flexible array elements are normally initialized to the
-value ``FLEX_ARRAY_FREE`` (defined as 0x6c in <linux/poison.h>), so errors
-involving that number probably result from use of unstored array entries.
-Note that, if array elements are allocated with ``__GFP_ZERO``, they will be
-initialized to zero and this poisoning will not happen.
-
-Individual elements in the array can be cleared with
-:c:func:`flex_array_clear()`::
-
-    int flex_array_clear(struct flex_array *array, unsigned int element_nr);
-
-This function will set the given element to ``FLEX_ARRAY_FREE`` and return
-zero.  If storage for the indicated element is not allocated for the array,
-``flex_array_clear()`` will return ``-EINVAL`` instead.  Note that clearing an
-element does not release the storage associated with it; to reduce the
-allocated size of an array, call :c:func:`flex_array_shrink()`::
-
-    int flex_array_shrink(struct flex_array *array);
-
-The return value will be the number of pages of memory actually freed.
-This function works by scanning the array for pages containing nothing but
-``FLEX_ARRAY_FREE`` bytes, so (1) it can be expensive, and (2) it will not work
-if the array's pages are allocated with ``__GFP_ZERO``.
-
-It is possible to remove all elements of an array with a call to
-:c:func:`flex_array_free_parts()`::
-
-    void flex_array_free_parts(struct flex_array *array);
-
-This call frees all elements, but leaves the array itself in place.
-Freeing the entire array is done with :c:func:`flex_array_free()`::
-
-    void flex_array_free(struct flex_array *array);
-
-As of this writing, there are no users of flexible arrays in the mainline
-kernel.  The functions described here are also not exported to modules;
-that will probably be fixed when somebody comes up with a need for it.
-
-
-Flexible array functions
-------------------------
-
-.. kernel-doc:: include/linux/flex_array.h
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
deleted file mode 100644
index a0f2989dd804..000000000000
--- a/Documentation/flexible-arrays.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-===================================
-Using flexible arrays in the kernel
-===================================
-
-:Updated: Last updated for 2.6.32
-:Author: Jonathan Corbet <corbet@lwn.net>
-
-Large contiguous memory allocations can be unreliable in the Linux kernel.
-Kernel programmers will sometimes respond to this problem by allocating
-pages with vmalloc().  This solution not ideal, though.  On 32-bit systems,
-memory from vmalloc() must be mapped into a relatively small address space;
-it's easy to run out.  On SMP systems, the page table changes required by
-vmalloc() allocations can require expensive cross-processor interrupts on
-all CPUs.  And, on all systems, use of space in the vmalloc() range
-increases pressure on the translation lookaside buffer (TLB), reducing the
-performance of the system.
-
-In many cases, the need for memory from vmalloc() can be eliminated by
-piecing together an array from smaller parts; the flexible array library
-exists to make this task easier.
-
-A flexible array holds an arbitrary (within limits) number of fixed-sized
-objects, accessed via an integer index.  Sparse arrays are handled
-reasonably well.  Only single-page allocations are made, so memory
-allocation failures should be relatively rare.  The down sides are that the
-arrays cannot be indexed directly, individual object size cannot exceed the
-system page size, and putting data into a flexible array requires a copy
-operation.  It's also worth noting that flexible arrays do no internal
-locking at all; if concurrent access to an array is possible, then the
-caller must arrange for appropriate mutual exclusion.
-
-The creation of a flexible array is done with::
-
-    #include <linux/flex_array.h>
-
-    struct flex_array *flex_array_alloc(int element_size,
-					unsigned int total,
-					gfp_t flags);
-
-The individual object size is provided by element_size, while total is the
-maximum number of objects which can be stored in the array.  The flags
-argument is passed directly to the internal memory allocation calls.  With
-the current code, using flags to ask for high memory is likely to lead to
-notably unpleasant side effects.
-
-It is also possible to define flexible arrays at compile time with::
-
-    DEFINE_FLEX_ARRAY(name, element_size, total);
-
-This macro will result in a definition of an array with the given name; the
-element size and total will be checked for validity at compile time.
-
-Storing data into a flexible array is accomplished with a call to::
-
-    int flex_array_put(struct flex_array *array, unsigned int element_nr,
-    		       void *src, gfp_t flags);
-
-This call will copy the data from src into the array, in the position
-indicated by element_nr (which must be less than the maximum specified when
-the array was created).  If any memory allocations must be performed, flags
-will be used.  The return value is zero on success, a negative error code
-otherwise.
-
-There might possibly be a need to store data into a flexible array while
-running in some sort of atomic context; in this situation, sleeping in the
-memory allocator would be a bad thing.  That can be avoided by using
-GFP_ATOMIC for the flags value, but, often, there is a better way.  The
-trick is to ensure that any needed memory allocations are done before
-entering atomic context, using::
-
-    int flex_array_prealloc(struct flex_array *array, unsigned int start,
-			    unsigned int nr_elements, gfp_t flags);
-
-This function will ensure that memory for the elements indexed in the range
-defined by start and nr_elements has been allocated.  Thereafter, a
-flex_array_put() call on an element in that range is guaranteed not to
-block.
-
-Getting data back out of the array is done with::
-
-    void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
-
-The return value is a pointer to the data element, or NULL if that
-particular element has never been allocated.
-
-Note that it is possible to get back a valid pointer for an element which
-has never been stored in the array.  Memory for array elements is allocated
-one page at a time; a single allocation could provide memory for several
-adjacent elements.  Flexible array elements are normally initialized to the
-value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors
-involving that number probably result from use of unstored array entries.
-Note that, if array elements are allocated with __GFP_ZERO, they will be
-initialized to zero and this poisoning will not happen.
-
-Individual elements in the array can be cleared with::
-
-    int flex_array_clear(struct flex_array *array, unsigned int element_nr);
-
-This function will set the given element to FLEX_ARRAY_FREE and return
-zero.  If storage for the indicated element is not allocated for the array,
-flex_array_clear() will return -EINVAL instead.  Note that clearing an
-element does not release the storage associated with it; to reduce the
-allocated size of an array, call::
-
-    int flex_array_shrink(struct flex_array *array);
-
-The return value will be the number of pages of memory actually freed.
-This function works by scanning the array for pages containing nothing but
-FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work
-if the array's pages are allocated with __GFP_ZERO.
-
-It is possible to remove all elements of an array with a call to::
-
-    void flex_array_free_parts(struct flex_array *array);
-
-This call frees all elements, but leaves the array itself in place.
-Freeing the entire array is done with::
-
-    void flex_array_free(struct flex_array *array);
-
-As of this writing, there are no users of flexible arrays in the mainline
-kernel.  The functions described here are also not exported to modules;
-that will probably be fixed when somebody comes up with a need for it.
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
deleted file mode 100644
index b94fa61b51fb..000000000000
--- a/include/linux/flex_array.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _FLEX_ARRAY_H
-#define _FLEX_ARRAY_H
-
-#include <linux/types.h>
-#include <linux/reciprocal_div.h>
-#include <asm/page.h>
-
-#define FLEX_ARRAY_PART_SIZE PAGE_SIZE
-#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE
-
-struct flex_array_part;
-
-/*
- * This is meant to replace cases where an array-like
- * structure has gotten too big to fit into kmalloc()
- * and the developer is getting tempted to use
- * vmalloc().
- */
-
-struct flex_array {
-	union {
-		struct {
-			int element_size;
-			int total_nr_elements;
-			int elems_per_part;
-			struct reciprocal_value reciprocal_elems;
-			struct flex_array_part *parts[];
-		};
-		/*
-		 * This little trick makes sure that
-		 * sizeof(flex_array) == PAGE_SIZE
-		 */
-		char padding[FLEX_ARRAY_BASE_SIZE];
-	};
-};
-
-/* Number of bytes left in base struct flex_array, excluding metadata */
-#define FLEX_ARRAY_BASE_BYTES_LEFT					\
-	(FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts))
-
-/* Number of pointers in base to struct flex_array_part pages */
-#define FLEX_ARRAY_NR_BASE_PTRS						\
-	(FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *))
-
-/* Number of elements of size that fit in struct flex_array_part */
-#define FLEX_ARRAY_ELEMENTS_PER_PART(size)				\
-	(FLEX_ARRAY_PART_SIZE / size)
-
-/*
- * Defines a statically allocated flex array and ensures its parameters are
- * valid.
- */
-#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total)		\
-	struct flex_array __arrayname = { { {				\
-		.element_size = (__element_size),			\
-		.total_nr_elements = (__total),				\
-	} } };								\
-	static inline void __arrayname##_invalid_parameter(void)	\
-	{								\
-		BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS *	\
-			FLEX_ARRAY_ELEMENTS_PER_PART(__element_size));	\
-	}
-
-/**
- * flex_array_alloc() - Creates a flexible array.
- * @element_size:	individual object size.
- * @total:		maximum number of objects which can be stored.
- * @flags:		GFP flags
- *
- * Return:		Returns an object of structure flex_array.
- */
-struct flex_array *flex_array_alloc(int element_size, unsigned int total,
-		gfp_t flags);
-
-/**
- * flex_array_prealloc() - Ensures that memory for the elements indexed in the
- * range defined by start and nr_elements has been allocated.
- * @fa:			array to allocate memory to.
- * @start:		start address
- * @nr_elements:	number of elements to be allocated.
- * @flags:		GFP flags
- *
- */
-int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-		unsigned int nr_elements, gfp_t flags);
-
-/**
- * flex_array_free() - Removes all elements of a flexible array.
- * @fa:		array to be freed.
- */
-void flex_array_free(struct flex_array *fa);
-
-/**
- * flex_array_free_parts() - Removes all elements of a flexible array, but
- * leaves the array itself in place.
- * @fa:		array to be emptied.
- */
-void flex_array_free_parts(struct flex_array *fa);
-
-/**
- * flex_array_put() - Stores data into a flexible array.
- * @fa:		array where element is to be stored.
- * @element_nr:	position to copy, must be less than the maximum specified when
- *		the array was created.
- * @src:	data source to be copied into the array.
- * @flags:	GFP flags
- *
- * Return:	Returns zero on success, a negative error code otherwise.
- */
-int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
-		gfp_t flags);
-
-/**
- * flex_array_clear() - Clears an individual element in the array, sets the
- * given element to FLEX_ARRAY_FREE.
- * @element_nr:	element position to clear.
- * @fa:		array to which element to be cleared belongs.
- *
- * Return:	Returns zero on success, -EINVAL otherwise.
- */
-int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
-
-/**
- * flex_array_get() - Retrieves data into a flexible array.
- *
- * @element_nr:	Element position to retrieve data from.
- * @fa:		array from which data is to be retrieved.
- *
- * Return:	Returns a pointer to the data element, or NULL if that
- *		particular element has never been allocated.
- */
-void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
-
-/**
- * flex_array_shrink() - Reduces the allocated size of an array.
- * @fa:		array to shrink.
- *
- * Return:	Returns number of pages of memory actually freed.
- *
- */
-int flex_array_shrink(struct flex_array *fa);
-
-#define flex_array_put_ptr(fa, nr, src, gfp) \
-	flex_array_put(fa, nr, (void *)&(src), gfp)
-
-void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr);
-
-#endif /* _FLEX_ARRAY_H */
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 5046bad0c1c5..d6d980a681c7 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -83,9 +83,6 @@
 #define MUTEX_DEBUG_FREE	0x22
 #define MUTEX_POISON_WW_CTX	((void *) 0x500 + POISON_POINTER_DELTA)
 
-/********** lib/flex_array.c **********/
-#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
-
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
diff --git a/lib/Makefile b/lib/Makefile
index b798b41d01ae..4e066120a0d6 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -35,7 +35,7 @@ obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
+	 gcd.o lcm.o list_sort.o uuid.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o rhashtable.o reciprocal_div.o \
 	 once.o refcount.o usercopy.o errseq.o bucket_locks.o \
diff --git a/lib/flex_array.c b/lib/flex_array.c
deleted file mode 100644
index 2eed22fa507c..000000000000
--- a/lib/flex_array.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- * Flexible array managed in PAGE_SIZE parts
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2009
- *
- * Author: Dave Hansen <dave@linux.vnet.ibm.com>
- */
-
-#include <linux/flex_array.h>
-#include <linux/slab.h>
-#include <linux/stddef.h>
-#include <linux/export.h>
-#include <linux/reciprocal_div.h>
-
-struct flex_array_part {
-	char elements[FLEX_ARRAY_PART_SIZE];
-};
-
-/*
- * If a user requests an allocation which is small
- * enough, we may simply use the space in the
- * flex_array->parts[] array to store the user
- * data.
- */
-static inline int elements_fit_in_base(struct flex_array *fa)
-{
-	int data_size = fa->element_size * fa->total_nr_elements;
-	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
-		return 1;
-	return 0;
-}
-
-/**
- * flex_array_alloc - allocate a new flexible array
- * @element_size:	the size of individual elements in the array
- * @total:		total number of elements that this should hold
- * @flags:		page allocation flags to use for base array
- *
- * Note: all locking must be provided by the caller.
- *
- * @total is used to size internal structures.  If the user ever
- * accesses any array indexes >=@total, it will produce errors.
- *
- * The maximum number of elements is defined as: the number of
- * elements that can be stored in a page times the number of
- * page pointers that we can fit in the base structure or (using
- * integer math):
- *
- * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
- *
- * Here's a table showing example capacities.  Note that the maximum
- * index that the get/put() functions is just nr_objects-1.   This
- * basically means that you get 4MB of storage on 32-bit and 2MB on
- * 64-bit.
- *
- *
- * Element size | Objects | Objects |
- * PAGE_SIZE=4k |  32-bit |  64-bit |
- * ---------------------------------|
- *      1 bytes | 4177920 | 2088960 |
- *      2 bytes | 2088960 | 1044480 |
- *      3 bytes | 1392300 |  696150 |
- *      4 bytes | 1044480 |  522240 |
- *     32 bytes |  130560 |   65408 |
- *     33 bytes |  126480 |   63240 |
- *   2048 bytes |    2040 |    1020 |
- *   2049 bytes |    1020 |     510 |
- *       void * | 1044480 |  261120 |
- *
- * Since 64-bit pointers are twice the size, we lose half the
- * capacity in the base structure.  Also note that no effort is made
- * to efficiently pack objects across page boundaries.
- */
-struct flex_array *flex_array_alloc(int element_size, unsigned int total,
-					gfp_t flags)
-{
-	struct flex_array *ret;
-	int elems_per_part = 0;
-	int max_size = 0;
-	struct reciprocal_value reciprocal_elems = { 0 };
-
-	if (element_size) {
-		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
-		reciprocal_elems = reciprocal_value(elems_per_part);
-		max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
-	}
-
-	/* max_size will end up 0 if element_size > PAGE_SIZE */
-	if (total > max_size)
-		return NULL;
-	ret = kzalloc(sizeof(struct flex_array), flags);
-	if (!ret)
-		return NULL;
-	ret->element_size = element_size;
-	ret->total_nr_elements = total;
-	ret->elems_per_part = elems_per_part;
-	ret->reciprocal_elems = reciprocal_elems;
-	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(&ret->parts[0], FLEX_ARRAY_FREE,
-						FLEX_ARRAY_BASE_BYTES_LEFT);
-	return ret;
-}
-EXPORT_SYMBOL(flex_array_alloc);
-
-static int fa_element_to_part_nr(struct flex_array *fa,
-					unsigned int element_nr)
-{
-	/*
-	 * if element_size == 0 we don't get here, so we never touch
-	 * the zeroed fa->reciprocal_elems, which would yield invalid
-	 * results
-	 */
-	return reciprocal_divide(element_nr, fa->reciprocal_elems);
-}
-
-/**
- * flex_array_free_parts - just free the second-level pages
- * @fa:		the flex array from which to free parts
- *
- * This is to be used in cases where the base 'struct flex_array'
- * has been statically allocated and should not be free.
- */
-void flex_array_free_parts(struct flex_array *fa)
-{
-	int part_nr;
-
-	if (elements_fit_in_base(fa))
-		return;
-	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
-		kfree(fa->parts[part_nr]);
-}
-EXPORT_SYMBOL(flex_array_free_parts);
-
-void flex_array_free(struct flex_array *fa)
-{
-	flex_array_free_parts(fa);
-	kfree(fa);
-}
-EXPORT_SYMBOL(flex_array_free);
-
-static unsigned int index_inside_part(struct flex_array *fa,
-					unsigned int element_nr,
-					unsigned int part_nr)
-{
-	unsigned int part_offset;
-
-	part_offset = element_nr - part_nr * fa->elems_per_part;
-	return part_offset * fa->element_size;
-}
-
-static struct flex_array_part *
-__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
-{
-	struct flex_array_part *part = fa->parts[part_nr];
-	if (!part) {
-		part = kmalloc(sizeof(struct flex_array_part), flags);
-		if (!part)
-			return NULL;
-		if (!(flags & __GFP_ZERO))
-			memset(part, FLEX_ARRAY_FREE,
-				sizeof(struct flex_array_part));
-		fa->parts[part_nr] = part;
-	}
-	return part;
-}
-
-/**
- * flex_array_put - copy data into the array at @element_nr
- * @fa:		the flex array to copy data into
- * @element_nr:	index of the position in which to insert
- * 		the new element.
- * @src:	address of data to copy into the array
- * @flags:	page allocation flags to use for array expansion
- *
- *
- * Note that this *copies* the contents of @src into
- * the array.  If you are trying to store an array of
- * pointers, make sure to pass in &ptr instead of ptr.
- * You may instead wish to use the flex_array_put_ptr()
- * helper function.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
-			gfp_t flags)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-	void *dst;
-
-	if (element_nr >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = __fa_get_part(fa, part_nr, flags);
-		if (!part)
-			return -ENOMEM;
-	}
-	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
-	memcpy(dst, src, fa->element_size);
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_put);
-
-/**
- * flex_array_clear - clear element in array at @element_nr
- * @fa:		the flex array of the element.
- * @element_nr:	index of the position to clear.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-	void *dst;
-
-	if (element_nr >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = fa->parts[part_nr];
-		if (!part)
-			return -EINVAL;
-	}
-	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
-	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_clear);
-
-/**
- * flex_array_prealloc - guarantee that array space exists
- * @fa:			the flex array for which to preallocate parts
- * @start:		index of first array element for which space is allocated
- * @nr_elements:	number of elements for which space is allocated
- * @flags:		page allocation flags
- *
- * This will guarantee that no future calls to flex_array_put()
- * will allocate memory.  It can be used if you are expecting to
- * be holding a lock or in some atomic context while writing
- * data into the array.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_prealloc(struct flex_array *fa, unsigned int start,
-			unsigned int nr_elements, gfp_t flags)
-{
-	int start_part;
-	int end_part;
-	int part_nr;
-	unsigned int end;
-	struct flex_array_part *part;
-
-	if (!start && !nr_elements)
-		return 0;
-	if (start >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!nr_elements)
-		return 0;
-
-	end = start + nr_elements - 1;
-
-	if (end >= fa->total_nr_elements)
-		return -ENOSPC;
-	if (!fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		return 0;
-	start_part = fa_element_to_part_nr(fa, start);
-	end_part = fa_element_to_part_nr(fa, end);
-	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
-		part = __fa_get_part(fa, part_nr, flags);
-		if (!part)
-			return -ENOMEM;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(flex_array_prealloc);
-
-/**
- * flex_array_get - pull data back out of the array
- * @fa:		the flex array from which to extract data
- * @element_nr:	index of the element to fetch from the array
- *
- * Returns a pointer to the data at index @element_nr.  Note
- * that this is a copy of the data that was passed in.  If you
- * are using this to store pointers, you'll get back &ptr.  You
- * may instead wish to use the flex_array_get_ptr helper.
- *
- * Locking must be provided by the caller.
- */
-void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
-{
-	int part_nr = 0;
-	struct flex_array_part *part;
-
-	if (!fa->element_size)
-		return NULL;
-	if (element_nr >= fa->total_nr_elements)
-		return NULL;
-	if (elements_fit_in_base(fa))
-		part = (struct flex_array_part *)&fa->parts[0];
-	else {
-		part_nr = fa_element_to_part_nr(fa, element_nr);
-		part = fa->parts[part_nr];
-		if (!part)
-			return NULL;
-	}
-	return &part->elements[index_inside_part(fa, element_nr, part_nr)];
-}
-EXPORT_SYMBOL(flex_array_get);
-
-/**
- * flex_array_get_ptr - pull a ptr back out of the array
- * @fa:		the flex array from which to extract data
- * @element_nr:	index of the element to fetch from the array
- *
- * Returns the pointer placed in the flex array at element_nr using
- * flex_array_put_ptr().  This function should not be called if the
- * element in question was not set using the _put_ptr() helper.
- */
-void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
-{
-	void **tmp;
-
-	tmp = flex_array_get(fa, element_nr);
-	if (!tmp)
-		return NULL;
-
-	return *tmp;
-}
-EXPORT_SYMBOL(flex_array_get_ptr);
-
-static int part_is_free(struct flex_array_part *part)
-{
-	int i;
-
-	for (i = 0; i < sizeof(struct flex_array_part); i++)
-		if (part->elements[i] != FLEX_ARRAY_FREE)
-			return 0;
-	return 1;
-}
-
-/**
- * flex_array_shrink - free unused second-level pages
- * @fa:		the flex array to shrink
- *
- * Frees all second-level pages that consist solely of unused
- * elements.  Returns the number of pages freed.
- *
- * Locking must be provided by the caller.
- */
-int flex_array_shrink(struct flex_array *fa)
-{
-	struct flex_array_part *part;
-	int part_nr;
-	int ret = 0;
-
-	if (!fa->total_nr_elements || !fa->element_size)
-		return 0;
-	if (elements_fit_in_base(fa))
-		return ret;
-	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
-		part = fa->parts[part_nr];
-		if (!part)
-			continue;
-		if (part_is_free(part)) {
-			fa->parts[part_nr] = NULL;
-			kfree(part);
-			ret++;
-		}
-	}
-	return ret;
-}
-EXPORT_SYMBOL(flex_array_shrink);
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
index 9fdcd3eaac3b..d29725769107 100644
--- a/tools/include/linux/poison.h
+++ b/tools/include/linux/poison.h
@@ -87,9 +87,6 @@
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
 
-/********** lib/flex_array.c **********/
-#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
-
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
-- 
cgit v1.2.3


From 9a751b999d17a037d5562318d3a21dd5e5bd55eb Mon Sep 17 00:00:00 2001
From: José Roberto de Souza <jose.souza@intel.com>
Date: Fri, 8 Mar 2019 13:56:46 -0800
Subject: drm/i915: Add new ICL PCI ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A new PCI ID for ICL was added to BSpec, lets keep it in tight sync
as ICL is not protected by the alpha support flag anymore.

v2: Keeping BSpec order(Rodrigo)

BSepc: 21141
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190308215646.30436-1-jose.souza@intel.com
---
 include/drm/i915_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index d2fad7b0fcf6..d200000feeaa 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -469,6 +469,7 @@
 	INTEL_VGA_DEVICE(0x8A57, info), \
 	INTEL_VGA_DEVICE(0x8A56, info), \
 	INTEL_VGA_DEVICE(0x8A71, info), \
-	INTEL_VGA_DEVICE(0x8A70, info)
+	INTEL_VGA_DEVICE(0x8A70, info), \
+	INTEL_VGA_DEVICE(0x8A53, info)
 
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From f87b543af45ea217fa1d000483f9a40944c1ff73 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Tue, 12 Mar 2019 12:06:35 -0400
Subject: fix null pointer deref in tracepoints in back channel

Backchannel doesn't have the rq_task->tk_clientid pointer set.

Otherwise can lead to the following oops:
ocalhost login: [  111.385319] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[  111.388073] #PF error: [normal kernel read fault]
[  111.389452] PGD 80000000290d8067 P4D 80000000290d8067 PUD 75f25067 PMD 0
[  111.391224] Oops: 0000 [#1] SMP PTI
[  111.392151] CPU: 0 PID: 3533 Comm: NFSv4 callback Not tainted 5.0.0-rc7+ #1
[  111.393787] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[  111.396340] RIP: 0010:trace_event_raw_event_xprt_enq_xmit+0x6f/0xf0 [sunrpc]
[  111.397974] Code: 00 00 00 48 89 ee 48 89 e7 e8 bd 0a 85 d7 48 85 c0 74 4a 41 0f b7 94 24 e0 00 00 00 48 89 e7 89 50 08 49 8b 94 24 a8 00 00 00 <8b> 52 04 89 50 0c 49 8b 94 24 c0 00 00 00 8b 92 a8 00 00 00 0f ca
[  111.402215] RSP: 0018:ffffb98743263cf8 EFLAGS: 00010286
[  111.403406] RAX: ffffa0890fc3bc88 RBX: 0000000000000003 RCX: 0000000000000000
[  111.405057] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffb98743263cf8
[  111.406656] RBP: ffffa0896f5368f0 R08: 0000000000000246 R09: 0000000000000000
[  111.408437] R10: ffffe19b01c01500 R11: 0000000000000000 R12: ffffa08977d28a00
[  111.410210] R13: 0000000000000004 R14: ffffa089315303f0 R15: ffffa08931530000
[  111.411856] FS:  0000000000000000(0000) GS:ffffa0897bc00000(0000) knlGS:0000000000000000
[  111.413699] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  111.415068] CR2: 0000000000000004 CR3: 000000002ac90004 CR4: 00000000001606f0
[  111.416745] Call Trace:
[  111.417339]  xprt_request_enqueue_transmit+0x2b6/0x4a0 [sunrpc]
[  111.418709]  ? rpc_task_need_encode+0x40/0x40 [sunrpc]
[  111.419957]  call_bc_transmit+0xd5/0x170 [sunrpc]
[  111.421067]  __rpc_execute+0x7e/0x3f0 [sunrpc]
[  111.422177]  rpc_run_bc_task+0x78/0xd0 [sunrpc]
[  111.423212]  bc_svc_process+0x281/0x340 [sunrpc]
[  111.424325]  nfs41_callback_svc+0x130/0x1c0 [nfsv4]
[  111.425430]  ? remove_wait_queue+0x60/0x60
[  111.426398]  kthread+0xf5/0x130
[  111.427155]  ? nfs_callback_authenticate+0x50/0x50 [nfsv4]
[  111.428388]  ? kthread_bind+0x10/0x10
[  111.429270]  ret_from_fork+0x1f/0x30

localhost login: [  467.462259] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004
[  467.464411] #PF error: [normal kernel read fault]
[  467.465445] PGD 80000000728c1067 P4D 80000000728c1067 PUD 728c0067 PMD 0
[  467.466980] Oops: 0000 [#1] SMP PTI
[  467.467759] CPU: 0 PID: 3517 Comm: NFSv4 callback Not tainted 5.0.0-rc7+ #1
[  467.469393] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/02/2015
[  467.471840] RIP: 0010:trace_event_raw_event_xprt_transmit+0x7c/0xf0 [sunrpc]
[  467.473392] Code: f6 48 85 c0 74 4b 49 8b 94 24 98 00 00 00 48 89 e7 0f b7 92 e0 00 00 00 89 50 08 49 8b 94 24 98 00 00 00 48 8b 92 a8 00 00 00 <8b> 52 04 89 50 0c 41 8b 94 24 a8 00 00 00 0f ca 89 50 10 41 8b 94
[  467.477605] RSP: 0018:ffffabe7434fbcd0 EFLAGS: 00010282
[  467.478793] RAX: ffff99720fc3bce0 RBX: 0000000000000003 RCX: 0000000000000000
[  467.480409] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffabe7434fbcd0
[  467.482011] RBP: ffff99726f631948 R08: 0000000000000246 R09: 0000000000000000
[  467.483591] R10: 0000000070000000 R11: 0000000000000000 R12: ffff997277dfcc00
[  467.485226] R13: 0000000000000000 R14: 0000000000000000 R15: ffff99722fecdca8
[  467.486830] FS:  0000000000000000(0000) GS:ffff99727bc00000(0000) knlGS:0000000000000000
[  467.488596] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  467.489931] CR2: 0000000000000004 CR3: 00000000270e6006 CR4: 00000000001606f0
[  467.491559] Call Trace:
[  467.492128]  xprt_transmit+0x303/0x3f0 [sunrpc]
[  467.493143]  ? rpc_task_need_encode+0x40/0x40 [sunrpc]
[  467.494328]  call_bc_transmit+0x49/0x170 [sunrpc]
[  467.495379]  __rpc_execute+0x7e/0x3f0 [sunrpc]
[  467.496451]  rpc_run_bc_task+0x78/0xd0 [sunrpc]
[  467.497467]  bc_svc_process+0x281/0x340 [sunrpc]
[  467.498507]  nfs41_callback_svc+0x130/0x1c0 [nfsv4]
[  467.499751]  ? remove_wait_queue+0x60/0x60
[  467.500686]  kthread+0xf5/0x130
[  467.501438]  ? nfs_callback_authenticate+0x50/0x50 [nfsv4]
[  467.502640]  ? kthread_bind+0x10/0x10
[  467.503454]  ret_from_fork+0x1f/0x30

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/trace/events/sunrpc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 8451f30c6a0f..7e899e635d33 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -712,7 +712,8 @@ TRACE_EVENT(xprt_transmit,
 
 	TP_fast_assign(
 		__entry->task_id = rqst->rq_task->tk_pid;
-		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->client_id = rqst->rq_task->tk_client ?
+			rqst->rq_task->tk_client->cl_clid : -1;
 		__entry->xid = be32_to_cpu(rqst->rq_xid);
 		__entry->seqno = rqst->rq_seqno;
 		__entry->status = status;
@@ -742,7 +743,8 @@ TRACE_EVENT(xprt_enq_xmit,
 
 	TP_fast_assign(
 		__entry->task_id = task->tk_pid;
-		__entry->client_id = task->tk_client->cl_clid;
+		__entry->client_id = task->tk_client ?
+			task->tk_client->cl_clid : -1;
 		__entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
 		__entry->seqno = task->tk_rqstp->rq_seqno;
 		__entry->stage = stage;
-- 
cgit v1.2.3


From 010d5166bbe95523e8584f3caca9f1bbeac9ea6e Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@nxp.com>
Date: Tue, 5 Mar 2019 09:49:16 +0000
Subject: dt-bindings: clock: imx8mq: Fix numbering overlaps and gaps

IMX8MQ_CLK_USB_PHY_REF changes from 163 to 153, this way removing the gap.
All the following clock ids are now decreased by 10 to keep the numbering
right. Doing this, the IMX8MQ_CLK_CSI2_CORE is not overlapped with
IMX8MQ_CLK_GPT1 anymore. IMX8MQ_CLK_GPT1_ROOT changes from 193 to 183 and
all the following ids are updated accordingly.

Reported-by: Patrick Wildt <patrick@blueri.se>
Fixes: 1cf3817b ("dt-bindings: Add binding for i.MX8MQ CCM")
Signed-off-by: Abel Vesa <abel.vesa@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/imx8mq-clock.h | 220 +++++++++++++++----------------
 1 file changed, 110 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/clock/imx8mq-clock.h b/include/dt-bindings/clock/imx8mq-clock.h
index b58cc643c9c9..6677e920dc2d 100644
--- a/include/dt-bindings/clock/imx8mq-clock.h
+++ b/include/dt-bindings/clock/imx8mq-clock.h
@@ -245,160 +245,160 @@
 /* USB_CORE_REF */
 #define IMX8MQ_CLK_USB_CORE_REF		152
 /* USB_PHY_REF */
-#define IMX8MQ_CLK_USB_PHY_REF		163
+#define IMX8MQ_CLK_USB_PHY_REF		153
 /* ECSPI1 */
-#define IMX8MQ_CLK_ECSPI1		164
+#define IMX8MQ_CLK_ECSPI1		154
 /* ECSPI2 */
-#define IMX8MQ_CLK_ECSPI2		165
+#define IMX8MQ_CLK_ECSPI2		155
 /* PWM1 */
-#define IMX8MQ_CLK_PWM1			166
+#define IMX8MQ_CLK_PWM1			156
 /* PWM2 */
-#define IMX8MQ_CLK_PWM2			167
+#define IMX8MQ_CLK_PWM2			157
 /* PWM3 */
-#define IMX8MQ_CLK_PWM3			168
+#define IMX8MQ_CLK_PWM3			158
 /* PWM4 */
-#define IMX8MQ_CLK_PWM4			169
+#define IMX8MQ_CLK_PWM4			159
 /* GPT1 */
-#define IMX8MQ_CLK_GPT1			170
+#define IMX8MQ_CLK_GPT1			160
 /* WDOG */
-#define IMX8MQ_CLK_WDOG			171
+#define IMX8MQ_CLK_WDOG			161
 /* WRCLK */
-#define IMX8MQ_CLK_WRCLK		172
+#define IMX8MQ_CLK_WRCLK		162
 /* DSI_CORE */
-#define IMX8MQ_CLK_DSI_CORE		173
+#define IMX8MQ_CLK_DSI_CORE		163
 /* DSI_PHY */
-#define IMX8MQ_CLK_DSI_PHY_REF		174
+#define IMX8MQ_CLK_DSI_PHY_REF		164
 /* DSI_DBI */
-#define IMX8MQ_CLK_DSI_DBI		175
+#define IMX8MQ_CLK_DSI_DBI		165
 /*DSI_ESC */
-#define IMX8MQ_CLK_DSI_ESC		176
+#define IMX8MQ_CLK_DSI_ESC		166
 /* CSI1_CORE */
-#define IMX8MQ_CLK_CSI1_CORE		177
+#define IMX8MQ_CLK_CSI1_CORE		167
 /* CSI1_PHY */
-#define IMX8MQ_CLK_CSI1_PHY_REF		178
+#define IMX8MQ_CLK_CSI1_PHY_REF		168
 /* CSI_ESC */
-#define IMX8MQ_CLK_CSI1_ESC		179
+#define IMX8MQ_CLK_CSI1_ESC		169
 /* CSI2_CORE */
 #define IMX8MQ_CLK_CSI2_CORE		170
 /* CSI2_PHY */
-#define IMX8MQ_CLK_CSI2_PHY_REF		181
+#define IMX8MQ_CLK_CSI2_PHY_REF		171
 /* CSI2_ESC */
-#define IMX8MQ_CLK_CSI2_ESC		182
+#define IMX8MQ_CLK_CSI2_ESC		172
 /* PCIE2_CTRL */
-#define IMX8MQ_CLK_PCIE2_CTRL		183
+#define IMX8MQ_CLK_PCIE2_CTRL		173
 /* PCIE2_PHY */
-#define IMX8MQ_CLK_PCIE2_PHY		184
+#define IMX8MQ_CLK_PCIE2_PHY		174
 /* PCIE2_AUX */
-#define IMX8MQ_CLK_PCIE2_AUX		185
+#define IMX8MQ_CLK_PCIE2_AUX		175
 /* ECSPI3 */
-#define IMX8MQ_CLK_ECSPI3		186
+#define IMX8MQ_CLK_ECSPI3		176
 
 /* CCGR clocks */
-#define IMX8MQ_CLK_A53_ROOT			187
-#define IMX8MQ_CLK_DRAM_ROOT			188
-#define IMX8MQ_CLK_ECSPI1_ROOT			189
+#define IMX8MQ_CLK_A53_ROOT			177
+#define IMX8MQ_CLK_DRAM_ROOT			178
+#define IMX8MQ_CLK_ECSPI1_ROOT			179
 #define IMX8MQ_CLK_ECSPI2_ROOT			180
 #define IMX8MQ_CLK_ECSPI3_ROOT			181
 #define IMX8MQ_CLK_ENET1_ROOT			182
-#define IMX8MQ_CLK_GPT1_ROOT			193
-#define IMX8MQ_CLK_I2C1_ROOT			194
-#define IMX8MQ_CLK_I2C2_ROOT			195
-#define IMX8MQ_CLK_I2C3_ROOT			196
-#define IMX8MQ_CLK_I2C4_ROOT			197
-#define IMX8MQ_CLK_M4_ROOT			198
-#define IMX8MQ_CLK_PCIE1_ROOT			199
-#define IMX8MQ_CLK_PCIE2_ROOT			200
-#define IMX8MQ_CLK_PWM1_ROOT			201
-#define IMX8MQ_CLK_PWM2_ROOT			202
-#define IMX8MQ_CLK_PWM3_ROOT			203
-#define IMX8MQ_CLK_PWM4_ROOT			204
-#define IMX8MQ_CLK_QSPI_ROOT			205
-#define IMX8MQ_CLK_SAI1_ROOT			206
-#define IMX8MQ_CLK_SAI2_ROOT			207
-#define IMX8MQ_CLK_SAI3_ROOT			208
-#define IMX8MQ_CLK_SAI4_ROOT			209
-#define IMX8MQ_CLK_SAI5_ROOT			210
-#define IMX8MQ_CLK_SAI6_ROOT			212
-#define IMX8MQ_CLK_UART1_ROOT			213
-#define IMX8MQ_CLK_UART2_ROOT			214
-#define IMX8MQ_CLK_UART3_ROOT			215
-#define IMX8MQ_CLK_UART4_ROOT			216
-#define IMX8MQ_CLK_USB1_CTRL_ROOT		217
-#define IMX8MQ_CLK_USB2_CTRL_ROOT		218
-#define IMX8MQ_CLK_USB1_PHY_ROOT		219
-#define IMX8MQ_CLK_USB2_PHY_ROOT		220
-#define IMX8MQ_CLK_USDHC1_ROOT			221
-#define IMX8MQ_CLK_USDHC2_ROOT			222
-#define IMX8MQ_CLK_WDOG1_ROOT			223
-#define IMX8MQ_CLK_WDOG2_ROOT			224
-#define IMX8MQ_CLK_WDOG3_ROOT			225
-#define IMX8MQ_CLK_GPU_ROOT			226
-#define IMX8MQ_CLK_HEVC_ROOT			227
-#define IMX8MQ_CLK_AVC_ROOT			228
-#define IMX8MQ_CLK_VP9_ROOT			229
-#define IMX8MQ_CLK_HEVC_INTER_ROOT		230
-#define IMX8MQ_CLK_DISP_ROOT			231
-#define IMX8MQ_CLK_HDMI_ROOT			232
-#define IMX8MQ_CLK_HDMI_PHY_ROOT		233
-#define IMX8MQ_CLK_VPU_DEC_ROOT			234
-#define IMX8MQ_CLK_CSI1_ROOT			235
-#define IMX8MQ_CLK_CSI2_ROOT			236
-#define IMX8MQ_CLK_RAWNAND_ROOT			237
-#define IMX8MQ_CLK_SDMA1_ROOT			238
-#define IMX8MQ_CLK_SDMA2_ROOT			239
-#define IMX8MQ_CLK_VPU_G1_ROOT			240
-#define IMX8MQ_CLK_VPU_G2_ROOT			241
+#define IMX8MQ_CLK_GPT1_ROOT			183
+#define IMX8MQ_CLK_I2C1_ROOT			184
+#define IMX8MQ_CLK_I2C2_ROOT			185
+#define IMX8MQ_CLK_I2C3_ROOT			186
+#define IMX8MQ_CLK_I2C4_ROOT			187
+#define IMX8MQ_CLK_M4_ROOT			188
+#define IMX8MQ_CLK_PCIE1_ROOT			189
+#define IMX8MQ_CLK_PCIE2_ROOT			190
+#define IMX8MQ_CLK_PWM1_ROOT			191
+#define IMX8MQ_CLK_PWM2_ROOT			192
+#define IMX8MQ_CLK_PWM3_ROOT			193
+#define IMX8MQ_CLK_PWM4_ROOT			194
+#define IMX8MQ_CLK_QSPI_ROOT			195
+#define IMX8MQ_CLK_SAI1_ROOT			196
+#define IMX8MQ_CLK_SAI2_ROOT			197
+#define IMX8MQ_CLK_SAI3_ROOT			198
+#define IMX8MQ_CLK_SAI4_ROOT			199
+#define IMX8MQ_CLK_SAI5_ROOT			200
+#define IMX8MQ_CLK_SAI6_ROOT			201
+#define IMX8MQ_CLK_UART1_ROOT			202
+#define IMX8MQ_CLK_UART2_ROOT			203
+#define IMX8MQ_CLK_UART3_ROOT			204
+#define IMX8MQ_CLK_UART4_ROOT			205
+#define IMX8MQ_CLK_USB1_CTRL_ROOT		206
+#define IMX8MQ_CLK_USB2_CTRL_ROOT		207
+#define IMX8MQ_CLK_USB1_PHY_ROOT		208
+#define IMX8MQ_CLK_USB2_PHY_ROOT		209
+#define IMX8MQ_CLK_USDHC1_ROOT			210
+#define IMX8MQ_CLK_USDHC2_ROOT			211
+#define IMX8MQ_CLK_WDOG1_ROOT			212
+#define IMX8MQ_CLK_WDOG2_ROOT			213
+#define IMX8MQ_CLK_WDOG3_ROOT			214
+#define IMX8MQ_CLK_GPU_ROOT			215
+#define IMX8MQ_CLK_HEVC_ROOT			216
+#define IMX8MQ_CLK_AVC_ROOT			217
+#define IMX8MQ_CLK_VP9_ROOT			218
+#define IMX8MQ_CLK_HEVC_INTER_ROOT		219
+#define IMX8MQ_CLK_DISP_ROOT			220
+#define IMX8MQ_CLK_HDMI_ROOT			221
+#define IMX8MQ_CLK_HDMI_PHY_ROOT		222
+#define IMX8MQ_CLK_VPU_DEC_ROOT			223
+#define IMX8MQ_CLK_CSI1_ROOT			224
+#define IMX8MQ_CLK_CSI2_ROOT			225
+#define IMX8MQ_CLK_RAWNAND_ROOT			226
+#define IMX8MQ_CLK_SDMA1_ROOT			227
+#define IMX8MQ_CLK_SDMA2_ROOT			228
+#define IMX8MQ_CLK_VPU_G1_ROOT			229
+#define IMX8MQ_CLK_VPU_G2_ROOT			230
 
 /* SCCG PLL GATE */
-#define IMX8MQ_SYS1_PLL_OUT			242
-#define IMX8MQ_SYS2_PLL_OUT			243
-#define IMX8MQ_SYS3_PLL_OUT			244
-#define IMX8MQ_DRAM_PLL_OUT			245
-
-#define IMX8MQ_GPT_3M_CLK			246
-
-#define IMX8MQ_CLK_IPG_ROOT			247
-#define IMX8MQ_CLK_IPG_AUDIO_ROOT		248
-#define IMX8MQ_CLK_SAI1_IPG			249
-#define IMX8MQ_CLK_SAI2_IPG			250
-#define IMX8MQ_CLK_SAI3_IPG			251
-#define IMX8MQ_CLK_SAI4_IPG			252
-#define IMX8MQ_CLK_SAI5_IPG			253
-#define IMX8MQ_CLK_SAI6_IPG			254
+#define IMX8MQ_SYS1_PLL_OUT			231
+#define IMX8MQ_SYS2_PLL_OUT			232
+#define IMX8MQ_SYS3_PLL_OUT			233
+#define IMX8MQ_DRAM_PLL_OUT			234
+
+#define IMX8MQ_GPT_3M_CLK			235
+
+#define IMX8MQ_CLK_IPG_ROOT			236
+#define IMX8MQ_CLK_IPG_AUDIO_ROOT		237
+#define IMX8MQ_CLK_SAI1_IPG			238
+#define IMX8MQ_CLK_SAI2_IPG			239
+#define IMX8MQ_CLK_SAI3_IPG			240
+#define IMX8MQ_CLK_SAI4_IPG			241
+#define IMX8MQ_CLK_SAI5_IPG			242
+#define IMX8MQ_CLK_SAI6_IPG			243
 
 /* DSI AHB/IPG clocks */
 /* rxesc clock */
-#define IMX8MQ_CLK_DSI_AHB			255
+#define IMX8MQ_CLK_DSI_AHB			244
 /* txesc clock */
-#define IMX8MQ_CLK_DSI_IPG_DIV                  256
+#define IMX8MQ_CLK_DSI_IPG_DIV                  245
 
-#define IMX8MQ_CLK_TMU_ROOT			257
+#define IMX8MQ_CLK_TMU_ROOT			246
 
 /* Display root clocks */
-#define IMX8MQ_CLK_DISP_AXI_ROOT		258
-#define IMX8MQ_CLK_DISP_APB_ROOT		259
-#define IMX8MQ_CLK_DISP_RTRM_ROOT		260
+#define IMX8MQ_CLK_DISP_AXI_ROOT		247
+#define IMX8MQ_CLK_DISP_APB_ROOT		248
+#define IMX8MQ_CLK_DISP_RTRM_ROOT		249
 
-#define IMX8MQ_CLK_OCOTP_ROOT			261
+#define IMX8MQ_CLK_OCOTP_ROOT			250
 
-#define IMX8MQ_CLK_DRAM_ALT_ROOT		262
-#define IMX8MQ_CLK_DRAM_CORE			263
+#define IMX8MQ_CLK_DRAM_ALT_ROOT		251
+#define IMX8MQ_CLK_DRAM_CORE			252
 
-#define IMX8MQ_CLK_MU_ROOT			264
-#define IMX8MQ_VIDEO2_PLL_OUT			265
+#define IMX8MQ_CLK_MU_ROOT			253
+#define IMX8MQ_VIDEO2_PLL_OUT			254
 
-#define IMX8MQ_CLK_CLKO2			266
+#define IMX8MQ_CLK_CLKO2			255
 
-#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK	267
+#define IMX8MQ_CLK_NAND_USDHC_BUS_RAWNAND_CLK	256
 
-#define IMX8MQ_CLK_CLKO1			268
-#define IMX8MQ_CLK_ARM				269
+#define IMX8MQ_CLK_CLKO1			257
+#define IMX8MQ_CLK_ARM				258
 
-#define IMX8MQ_CLK_GPIO1_ROOT			270
-#define IMX8MQ_CLK_GPIO2_ROOT			271
-#define IMX8MQ_CLK_GPIO3_ROOT			272
-#define IMX8MQ_CLK_GPIO4_ROOT			273
-#define IMX8MQ_CLK_GPIO5_ROOT			274
+#define IMX8MQ_CLK_GPIO1_ROOT			259
+#define IMX8MQ_CLK_GPIO2_ROOT			260
+#define IMX8MQ_CLK_GPIO3_ROOT			261
+#define IMX8MQ_CLK_GPIO4_ROOT			262
+#define IMX8MQ_CLK_GPIO5_ROOT			263
 
-#define IMX8MQ_CLK_END				275
+#define IMX8MQ_CLK_END				264
 #endif /* __DT_BINDINGS_CLOCK_IMX8MQ_H */
-- 
cgit v1.2.3


From 68b79cdc6de97fe270ceb40082a4aa6ad3e41ea7 Mon Sep 17 00:00:00 2001
From: Zeng Guangyue <zengguangyue@hisilicon.com>
Date: Mon, 18 Feb 2019 14:26:41 +0800
Subject: f2fs: correct spelling mistake

correct spelling mistake for "nunmber"

Signed-off-by: Zeng Guangyue <zengguangyue@hisilicon.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 666db8eb71e0..f5740423b002 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -285,7 +285,7 @@ enum {
 
 struct node_footer {
 	__le32 nid;		/* node id */
-	__le32 ino;		/* inode nunmber */
+	__le32 ino;		/* inode number */
 	__le32 flag;		/* include cold/fsync/dentry marks and offset */
 	__le64 cp_ver;		/* checkpoint version */
 	__le32 next_blkaddr;	/* next node page block address */
-- 
cgit v1.2.3


From 559e87c497a820e132995ad56c8361509e5410da Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 26 Feb 2019 19:01:15 +0800
Subject: f2fs: trace f2fs_ioc_shutdown

This patch supports to trace f2fs_ioc_shutdown.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c              |  3 +++
 include/trace/events/f2fs.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 3a8c8eb0f549..807a97ad2430 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1987,6 +1987,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
 out:
 	if (in != F2FS_GOING_DOWN_FULLSYNC)
 		mnt_drop_write_file(filp);
+
+	trace_f2fs_shutdown(sbi, in, ret);
+
 	return ret;
 }
 
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 8a28a2b1be74..0cb746fe6ae1 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -149,6 +149,14 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 		{ CP_SPEC_LOG_NUM,	"log type is 2" },		\
 		{ CP_RECOVER_DIR,	"dir needs recovery" })
 
+#define show_shutdown_mode(type)					\
+	__print_symbolic(type,						\
+		{ F2FS_GOING_DOWN_FULLSYNC,	"full sync" },		\
+		{ F2FS_GOING_DOWN_METASYNC,	"meta sync" },		\
+		{ F2FS_GOING_DOWN_NOSYNC,	"no sync" },		\
+		{ F2FS_GOING_DOWN_METAFLUSH,	"meta flush" },		\
+		{ F2FS_GOING_DOWN_NEED_FSCK,	"need fsck" })
+
 struct f2fs_sb_info;
 struct f2fs_io_info;
 struct extent_info;
@@ -1619,6 +1627,30 @@ DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
 	TP_ARGS(sb, type, count)
 );
 
+TRACE_EVENT(f2fs_shutdown,
+
+	TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret),
+
+	TP_ARGS(sbi, mode, ret),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(unsigned int, mode)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->dev = sbi->sb->s_dev;
+		__entry->mode = mode;
+		__entry->ret = ret;
+	),
+
+	TP_printk("dev = (%d,%d), mode: %s, ret:%d",
+		show_dev(__entry->dev),
+		show_shutdown_mode(__entry->mode),
+		__entry->ret)
+);
+
 #endif /* _TRACE_F2FS_H */
 
  /* This part must be outside protection */
-- 
cgit v1.2.3


From 76630f20059245372a78ccc0845db9d698098c34 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 26 Feb 2019 19:01:16 +0800
Subject: f2fs: print more parameters in trace_f2fs_map_blocks

for better map_blocks trace.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 0cb746fe6ae1..a3916b4dd57e 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -544,6 +544,9 @@ TRACE_EVENT(f2fs_map_blocks,
 		__field(block_t,	m_lblk)
 		__field(block_t,	m_pblk)
 		__field(unsigned int,	m_len)
+		__field(unsigned int,	m_flags)
+		__field(int,	m_seg_type)
+		__field(bool,	m_may_create)
 		__field(int,	ret)
 	),
 
@@ -553,15 +556,22 @@ TRACE_EVENT(f2fs_map_blocks,
 		__entry->m_lblk		= map->m_lblk;
 		__entry->m_pblk		= map->m_pblk;
 		__entry->m_len		= map->m_len;
+		__entry->m_flags	= map->m_flags;
+		__entry->m_seg_type	= map->m_seg_type;
+		__entry->m_may_create	= map->m_may_create;
 		__entry->ret		= ret;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
-		"start blkaddr = 0x%llx, len = 0x%llx, err = %d",
+		"start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
+		"seg_type = %d, may_create = %d, err = %d",
 		show_dev_ino(__entry),
 		(unsigned long long)__entry->m_lblk,
 		(unsigned long long)__entry->m_pblk,
 		(unsigned long long)__entry->m_len,
+		__entry->m_flags,
+		__entry->m_seg_type,
+		__entry->m_may_create,
 		__entry->ret)
 );
 
-- 
cgit v1.2.3


From 31b265b3baaf55f209229888b7ffea523ddab366 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Fri, 8 Mar 2019 11:32:04 -0800
Subject: tracing: kdb: Fix ftdump to not sleep

As reported back in 2016-11 [1], the "ftdump" kdb command triggers a
BUG for "sleeping function called from invalid context".

kdb's "ftdump" command wants to call ring_buffer_read_prepare() in
atomic context.  A very simple solution for this is to add allocation
flags to ring_buffer_read_prepare() so kdb can call it without
triggering the allocation error.  This patch does that.

Note that in the original email thread about this, it was suggested
that perhaps the solution for kdb was to either preallocate the buffer
ahead of time or create our own iterator.  I'm hoping that this
alternative of adding allocation flags to ring_buffer_read_prepare()
can be considered since it means I don't need to duplicate more of the
core trace code into "trace_kdb.c" (for either creating my own
iterator or re-preparing a ring allocator whose memory was already
allocated).

NOTE: another option for kdb is to actually figure out how to make it
reuse the existing ftrace_dump() function and totally eliminate the
duplication.  This sounds very appealing and actually works (the "sr
z" command can be seen to properly dump the ftrace buffer).  The
downside here is that ftrace_dump() fully consumes the trace buffer.
Unless that is changed I'd rather not use it because it means "ftdump
| grep xyz" won't be very useful to search the ftrace buffer since it
will throw away the whole trace on the first grep.  A future patch to
dump only the last few lines of the buffer will also be hard to
implement.

[1] https://lkml.kernel.org/r/20161117191605.GA21459@google.com

Link: http://lkml.kernel.org/r/20190308193205.213659-1-dianders@chromium.org

Reported-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 2 +-
 kernel/trace/ring_buffer.c  | 5 +++--
 kernel/trace/trace.c        | 6 ++++--
 kernel/trace/trace_kdb.c    | 6 ++++--
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index f1429675f252..1a40277b512c 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -128,7 +128,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
 		    unsigned long *lost_events);
 
 struct ring_buffer_iter *
-ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu);
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags);
 void ring_buffer_read_prepare_sync(void);
 void ring_buffer_read_start(struct ring_buffer_iter *iter);
 void ring_buffer_read_finish(struct ring_buffer_iter *iter);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9a91479bbbfe..41b6f96e5366 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -4191,6 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
  * @buffer: The ring buffer to read from
  * @cpu: The cpu buffer to iterate over
+ * @flags: gfp flags to use for memory allocation
  *
  * This performs the initial preparations necessary to iterate
  * through the buffer.  Memory is allocated, buffer recording
@@ -4208,7 +4209,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
  * This overall must be paired with ring_buffer_read_finish.
  */
 struct ring_buffer_iter *
-ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu, gfp_t flags)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_iter *iter;
@@ -4216,7 +4217,7 @@ ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
 
-	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+	iter = kmalloc(sizeof(*iter), flags);
 	if (!iter)
 		return NULL;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e9cc47e59d25..ccd759eaad79 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4077,7 +4077,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
 		for_each_tracing_cpu(cpu) {
 			iter->buffer_iter[cpu] =
-				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+				ring_buffer_read_prepare(iter->trace_buffer->buffer,
+							 cpu, GFP_KERNEL);
 		}
 		ring_buffer_read_prepare_sync();
 		for_each_tracing_cpu(cpu) {
@@ -4087,7 +4088,8 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
 	} else {
 		cpu = iter->cpu_file;
 		iter->buffer_iter[cpu] =
-			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
+			ring_buffer_read_prepare(iter->trace_buffer->buffer,
+						 cpu, GFP_KERNEL);
 		ring_buffer_read_prepare_sync();
 		ring_buffer_read_start(iter->buffer_iter[cpu]);
 		tracing_iter_reset(iter, cpu);
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index d953c163a079..810d78a8d14c 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -51,14 +51,16 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
 	if (cpu_file == RING_BUFFER_ALL_CPUS) {
 		for_each_tracing_cpu(cpu) {
 			iter.buffer_iter[cpu] =
-			ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
+			ring_buffer_read_prepare(iter.trace_buffer->buffer,
+						 cpu, GFP_ATOMIC);
 			ring_buffer_read_start(iter.buffer_iter[cpu]);
 			tracing_iter_reset(&iter, cpu);
 		}
 	} else {
 		iter.cpu_file = cpu_file;
 		iter.buffer_iter[cpu_file] =
-			ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
+			ring_buffer_read_prepare(iter.trace_buffer->buffer,
+						 cpu_file, GFP_ATOMIC);
 		ring_buffer_read_start(iter.buffer_iter[cpu_file]);
 		tracing_iter_reset(&iter, cpu_file);
 	}
-- 
cgit v1.2.3


From c24a1269652006b401bda29fea15a0e42b7870d1 Mon Sep 17 00:00:00 2001
From: Ricardo Biehl Pasquali <pasqualirb@gmail.com>
Date: Wed, 13 Mar 2019 16:06:48 -0300
Subject: ALSA: pcm: Fix function name in kernel-doc comment

Signed-off-by: Ricardo Biehl Pasquali <pasqualirb@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 465d7d033c4c..18bd8c3ea605 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -750,7 +750,7 @@ static inline snd_pcm_uframes_t snd_pcm_playback_avail(struct snd_pcm_runtime *r
 }
 
 /**
- * snd_pcm_playback_avail - Get the available (readable) space for capture
+ * snd_pcm_capture_avail - Get the available (readable) space for capture
  * @runtime: PCM runtime instance
  *
  * Result is between 0 ... (boundary - 1)
-- 
cgit v1.2.3


From a4046c06be50a4f01d435aa7fe57514818e6cc82 Mon Sep 17 00:00:00 2001
From: Pi-Hsun Shih <pihsun@chromium.org>
Date: Wed, 13 Mar 2019 11:44:33 -0700
Subject: include/linux/swap.h: use offsetof() instead of custom __swapoffset
 macro

Use offsetof() to calculate offset of a field to take advantage of
compiler built-in version when possible, and avoid UBSAN warning when
compiling with Clang:

  UBSAN: Undefined behaviour in mm/swapfile.c:3010:38
  member access within null pointer of type 'union swap_header'
  CPU: 6 PID: 1833 Comm: swapon Tainted: G S                4.19.23 #43
  Call trace:
   dump_backtrace+0x0/0x194
   show_stack+0x20/0x2c
   __dump_stack+0x20/0x28
   dump_stack+0x70/0x94
   ubsan_epilogue+0x14/0x44
   ubsan_type_mismatch_common+0xf4/0xfc
   __ubsan_handle_type_mismatch_v1+0x34/0x54
   __se_sys_swapon+0x654/0x1084
   __arm64_sys_swapon+0x1c/0x24
   el0_svc_common+0xa8/0x150
   el0_svc_compat_handler+0x2c/0x38
   el0_svc_compat+0x8/0x18

Link: http://lkml.kernel.org/r/20190312081902.223764-1-pihsun@chromium.org
Signed-off-by: Pi-Hsun Shih <pihsun@chromium.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index fc50e21b3b88..4bfb5c4ac108 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -157,9 +157,9 @@ struct swap_extent {
 /*
  * Max bad pages in the new format..
  */
-#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x)
 #define MAX_SWAP_BADPAGES \
-	((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int))
+	((offsetof(union swap_header, magic.magic) - \
+	  offsetof(union swap_header, info.badpages)) / sizeof(int))
 
 enum {
 	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
-- 
cgit v1.2.3


From a75d4c33377277b6034dd1e2663bce444f952c14 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 13 Mar 2019 11:44:14 -0700
Subject: filemap: kill page_cache_read usage in filemap_fault

Patch series "drop the mmap_sem when doing IO in the fault path", v6.

Now that we have proper isolation in place with cgroups2 we have started
going through and fixing the various priority inversions.  Most are all
gone now, but this one is sort of weird since it's not necessarily a
priority inversion that happens within the kernel, but rather because of
something userspace does.

We have giant applications that we want to protect, and parts of these
giant applications do things like watch the system state to determine how
healthy the box is for load balancing and such.  This involves running
'ps' or other such utilities.  These utilities will often walk
/proc/<pid>/whatever, and these files can sometimes need to
down_read(&task->mmap_sem).  Not usually a big deal, but we noticed when
we are stress testing that sometimes our protected application has latency
spikes trying to get the mmap_sem for tasks that are in lower priority
cgroups.

This is because any down_write() on a semaphore essentially turns it into
a mutex, so even if we currently have it held for reading, any new readers
will not be allowed on to keep from starving the writer.  This is fine,
except a lower priority task could be stuck doing IO because it has been
throttled to the point that its IO is taking much longer than normal.  But
because a higher priority group depends on this completing it is now stuck
behind lower priority work.

In order to avoid this particular priority inversion we want to use the
existing retry mechanism to stop from holding the mmap_sem at all if we
are going to do IO.  This already exists in the read case sort of, but
needed to be extended for more than just grabbing the page lock.  With
io.latency we throttle at submit_bio() time, so the readahead stuff can
block and even page_cache_read can block, so all these paths need to have
the mmap_sem dropped.

The other big thing is ->page_mkwrite.  btrfs is particularly shitty here
because we have to reserve space for the dirty page, which can be a very
expensive operation.  We use the same retry method as the read path, and
simply cache the page and verify the page is still setup properly the next
pass through ->page_mkwrite().

I've tested these patches with xfstests and there are no regressions.

This patch (of 3):

If we do not have a page at filemap_fault time we'll do this weird forced
page_cache_read thing to populate the page, and then drop it again and
loop around and find it.  This makes for 2 ways we can read a page in
filemap_fault, and it's not really needed.  Instead add a FGP_FOR_MMAP
flag so that pagecache_get_page() will return a unlocked page that's in
pagecache.  Then use the normal page locking and readpage logic already in
filemap_fault.  This simplifies the no page in page cache case
significantly.

[akpm@linux-foundation.org: fix comment text]
[josef@toxicpanda.com: don't unlock null page in FGP_FOR_MMAP case]
  Link: http://lkml.kernel.org/r/20190312201742.22935-1-josef@toxicpanda.com
Link: http://lkml.kernel.org/r/20181211173801.29535-2-josef@toxicpanda.com
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |  1 +
 mm/filemap.c            | 75 ++++++++++---------------------------------------
 2 files changed, 16 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b477a70cc2e4..bcf909d0de5f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -239,6 +239,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_WRITE		0x00000008
 #define FGP_NOFS		0x00000010
 #define FGP_NOWAIT		0x00000020
+#define FGP_FOR_MMAP		0x00000040
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 		int fgp_flags, gfp_t cache_gfp_mask);
diff --git a/mm/filemap.c b/mm/filemap.c
index ec6566ffbd90..64d014f940e9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1587,6 +1587,9 @@ EXPORT_SYMBOL(find_lock_entry);
  *   @gfp_mask and added to the page cache and the VM's LRU
  *   list. The page is returned locked and with an increased
  *   refcount.
+ * - FGP_FOR_MMAP: Similar to FGP_CREAT, only we want to allow the caller to do
+ *   its own locking dance if the page is already in cache, or unlock the page
+ *   before returning if we had to add the page to pagecache.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1641,7 +1644,7 @@ no_page:
 		if (!page)
 			return NULL;
 
-		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
 			fgp_flags |= FGP_LOCK;
 
 		/* Init accessed so avoid atomic mark_page_accessed later */
@@ -1655,6 +1658,13 @@ no_page:
 			if (err == -EEXIST)
 				goto repeat;
 		}
+
+		/*
+		 * add_to_page_cache_lru locks the page, and for mmap we expect
+		 * an unlocked page.
+		 */
+		if (page && (fgp_flags & FGP_FOR_MMAP))
+			unlock_page(page);
 	}
 
 	return page;
@@ -2379,41 +2389,6 @@ out:
 EXPORT_SYMBOL(generic_file_read_iter);
 
 #ifdef CONFIG_MMU
-/**
- * page_cache_read - adds requested page to the page cache if not already there
- * @file:	file to read
- * @offset:	page index
- * @gfp_mask:	memory allocation flags
- *
- * This adds the requested page to the page cache if it isn't already there,
- * and schedules an I/O to read in its contents from disk.
- *
- * Return: %0 on success, negative error code otherwise.
- */
-static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
-{
-	struct address_space *mapping = file->f_mapping;
-	struct page *page;
-	int ret;
-
-	do {
-		page = __page_cache_alloc(gfp_mask);
-		if (!page)
-			return -ENOMEM;
-
-		ret = add_to_page_cache_lru(page, mapping, offset, gfp_mask);
-		if (ret == 0)
-			ret = mapping->a_ops->readpage(file, page);
-		else if (ret == -EEXIST)
-			ret = 0; /* losing race to add is OK */
-
-		put_page(page);
-
-	} while (ret == AOP_TRUNCATED_PAGE);
-
-	return ret;
-}
-
 #define MMAP_LOTSAMISS  (100)
 
 /*
@@ -2539,9 +2514,11 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
 		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
 		ret = VM_FAULT_MAJOR;
 retry_find:
-		page = find_get_page(mapping, offset);
+		page = pagecache_get_page(mapping, offset,
+					  FGP_CREAT|FGP_FOR_MMAP,
+					  vmf->gfp_mask);
 		if (!page)
-			goto no_cached_page;
+			return vmf_error(-ENOMEM);
 	}
 
 	if (!lock_page_or_retry(page, vmf->vma->vm_mm, vmf->flags)) {
@@ -2578,28 +2555,6 @@ retry_find:
 	vmf->page = page;
 	return ret | VM_FAULT_LOCKED;
 
-no_cached_page:
-	/*
-	 * We're only likely to ever get here if MADV_RANDOM is in
-	 * effect.
-	 */
-	error = page_cache_read(file, offset, vmf->gfp_mask);
-
-	/*
-	 * The page we want has now been added to the page cache.
-	 * In the unlikely event that someone removed it in the
-	 * meantime, we'll just come back here and read it again.
-	 */
-	if (error >= 0)
-		goto retry_find;
-
-	/*
-	 * An error return from page_cache_read can result if the
-	 * system is low on memory, or a problem occurs while trying
-	 * to schedule I/O.
-	 */
-	return vmf_error(error);
-
 page_not_uptodate:
 	/*
 	 * Umm, take care of errors if the page isn't up-to-date.
-- 
cgit v1.2.3


From 037fc3368be46dc1a2a90f6e50c8cbce49d75fd6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 17 Mar 2019 11:01:09 +0900
Subject: kbuild: force all architectures except um to include mandatory-y

Currently, every arch/*/include/uapi/asm/Kbuild explicitly includes
the common Kbuild.asm file. Factor out the duplicated include directives
to scripts/Makefile.asm-generic so that no architecture would opt out
of the mandatory-y mechanism.

um is not forced to include mandatory-y since it is a very exceptional
case which does not support UAPI.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 Documentation/kbuild/makefiles.txt      |  2 +-
 Makefile                                |  6 ++++--
 arch/alpha/include/uapi/asm/Kbuild      |  2 --
 arch/arc/include/uapi/asm/Kbuild        |  2 --
 arch/arm/include/uapi/asm/Kbuild        |  1 -
 arch/arm64/include/uapi/asm/Kbuild      |  1 -
 arch/c6x/include/uapi/asm/Kbuild        |  2 --
 arch/csky/include/uapi/asm/Kbuild       |  2 --
 arch/h8300/include/uapi/asm/Kbuild      |  2 --
 arch/hexagon/include/uapi/asm/Kbuild    |  2 --
 arch/ia64/include/uapi/asm/Kbuild       |  2 --
 arch/m68k/include/uapi/asm/Kbuild       |  2 --
 arch/microblaze/include/uapi/asm/Kbuild |  2 --
 arch/mips/include/uapi/asm/Kbuild       |  2 --
 arch/nds32/include/uapi/asm/Kbuild      |  2 --
 arch/nios2/include/uapi/asm/Kbuild      |  2 --
 arch/openrisc/include/uapi/asm/Kbuild   |  2 --
 arch/parisc/include/uapi/asm/Kbuild     |  2 --
 arch/powerpc/include/uapi/asm/Kbuild    |  2 --
 arch/riscv/include/uapi/asm/Kbuild      |  1 -
 arch/s390/include/uapi/asm/Kbuild       |  1 -
 arch/sh/include/uapi/asm/Kbuild         |  1 -
 arch/sparc/include/uapi/asm/Kbuild      |  2 --
 arch/unicore32/include/uapi/asm/Kbuild  |  2 --
 arch/x86/include/uapi/asm/Kbuild        |  2 --
 arch/xtensa/include/uapi/asm/Kbuild     |  2 --
 include/asm-generic/Kbuild              |  5 +++++
 include/uapi/asm-generic/Kbuild         | 36 +++++++++++++++++++++++++++++++++
 include/uapi/asm-generic/Kbuild.asm     | 34 -------------------------------
 scripts/Makefile.asm-generic            |  5 +++++
 30 files changed, 51 insertions(+), 80 deletions(-)
 create mode 100644 include/asm-generic/Kbuild
 create mode 100644 include/uapi/asm-generic/Kbuild
 delete mode 100644 include/uapi/asm-generic/Kbuild.asm

(limited to 'include')

diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index f124be6e4c3a..03c065855eaf 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -1274,7 +1274,7 @@ See subsequent chapter for the syntax of the Kbuild file.
 
 --- 7.4 mandatory-y
 
-	mandatory-y is essentially used by include/(uapi/)asm-generic/Kbuild.asm
+	mandatory-y is essentially used by include/(uapi/)asm-generic/Kbuild
 	to define the minimum set of ASM headers that all architectures must have.
 
 	This works like optional generic-y. If a mandatory header is missing
diff --git a/Makefile b/Makefile
index 08a01add09a6..4f45ae628f54 100644
--- a/Makefile
+++ b/Makefile
@@ -1098,9 +1098,11 @@ asm-generic := -f $(srctree)/scripts/Makefile.asm-generic obj
 
 PHONY += asm-generic uapi-asm-generic
 asm-generic: uapi-asm-generic
-	$(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/asm
+	$(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/asm \
+	generic=include/asm-generic
 uapi-asm-generic:
-	$(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/uapi/asm
+	$(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/uapi/asm \
+	generic=include/uapi/asm-generic
 
 PHONY += prepare-objtool
 prepare-objtool: $(objtool_target)
diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
index 439f5157aa35..7417847dc438 100644
--- a/arch/alpha/include/uapi/asm/Kbuild
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -1,3 +1 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index eee8f7d23899..23b4464c0995 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-include include/uapi/asm-generic/Kbuild.asm
 
 generated-y += unistd-common.h
 generated-y += unistd-oabi.h
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
index 87eea29b24ab..602d137932dc 100644
--- a/arch/arm64/include/uapi/asm/Kbuild
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-include include/uapi/asm-generic/Kbuild.asm
 
 generic-y += kvm_para.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/csky/include/uapi/asm/Kbuild b/arch/csky/include/uapi/asm/Kbuild
index c1b06dcf6cf8..1c72f04ff75d 100644
--- a/arch/csky/include/uapi/asm/Kbuild
+++ b/arch/csky/include/uapi/asm/Kbuild
@@ -1,3 +1 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += ucontext.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index c1b06dcf6cf8..1c72f04ff75d 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,3 +1 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += ucontext.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 5b819e53c397..20018cb883a9 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_64.h
 generic-y += kvm_para.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 960bf1e4be53..8a7ad40be463 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generic-y += kvm_para.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 97823ec46e97..3ce84fbb2678 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -1,5 +1,3 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
index 0851c103a8ce..c3798bfe0486 100644
--- a/arch/mips/include/uapi/asm/Kbuild
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -1,5 +1,3 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_n32.h
 generated-y += unistd_n64.h
 generated-y += unistd_o32.h
diff --git a/arch/nds32/include/uapi/asm/Kbuild b/arch/nds32/include/uapi/asm/Kbuild
index c1b06dcf6cf8..1c72f04ff75d 100644
--- a/arch/nds32/include/uapi/asm/Kbuild
+++ b/arch/nds32/include/uapi/asm/Kbuild
@@ -1,3 +1 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += ucontext.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index c54353d390ff..22fdbd08cdc8 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -1,5 +1,3 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generated-y += unistd_64.h
 generic-y += kvm_para.h
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 214a39acdf25..2bd5b392277c 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generated-y += unistd_64.h
diff --git a/arch/riscv/include/uapi/asm/Kbuild b/arch/riscv/include/uapi/asm/Kbuild
index d2ee86b4c091..e69de29bb2d1 100644
--- a/arch/riscv/include/uapi/asm/Kbuild
+++ b/arch/riscv/include/uapi/asm/Kbuild
@@ -1 +0,0 @@
-include include/uapi/asm-generic/Kbuild.asm
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index da3e0d48abbc..46c1ff0b842a 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-include include/uapi/asm-generic/Kbuild.asm
 
 generated-y += unistd_32.h
 generated-y += unistd_64.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index eaa30bcd93bf..ecfbd40924dd 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-include include/uapi/asm-generic/Kbuild.asm
 
 generated-y += unistd_32.h
 generic-y += kvm_para.h
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
index 214a39acdf25..2bd5b392277c 100644
--- a/arch/sparc/include/uapi/asm/Kbuild
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generated-y += unistd_64.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 0febf1a07c30..755bb11323d8 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index f6648e9928b3..59b5ad310f78 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -1,5 +1,3 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generated-y += unistd_64.h
 generated-y += unistd_x32.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 960bf1e4be53..8a7ad40be463 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,4 +1,2 @@
-include include/uapi/asm-generic/Kbuild.asm
-
 generated-y += unistd_32.h
 generic-y += kvm_para.h
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
new file mode 100644
index 000000000000..6f4536d70b8e
--- /dev/null
+++ b/include/asm-generic/Kbuild
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# asm headers that all architectures except um should have
+# (This file is not included when SRCARCH=um since UML borrows several
+# asm headers from the host architecutre.)
diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
new file mode 100644
index 000000000000..ebb180aac74e
--- /dev/null
+++ b/include/uapi/asm-generic/Kbuild
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Headers that are mandatory in usr/include/asm/
+# (This file is not included when SRCARCH=um since UML does not support UAPI.)
+
+mandatory-y += auxvec.h
+mandatory-y += bitsperlong.h
+mandatory-y += bpf_perf_event.h
+mandatory-y += byteorder.h
+mandatory-y += errno.h
+mandatory-y += fcntl.h
+mandatory-y += ioctl.h
+mandatory-y += ioctls.h
+mandatory-y += ipcbuf.h
+mandatory-y += mman.h
+mandatory-y += msgbuf.h
+mandatory-y += param.h
+mandatory-y += poll.h
+mandatory-y += posix_types.h
+mandatory-y += ptrace.h
+mandatory-y += resource.h
+mandatory-y += sembuf.h
+mandatory-y += setup.h
+mandatory-y += shmbuf.h
+mandatory-y += sigcontext.h
+mandatory-y += siginfo.h
+mandatory-y += signal.h
+mandatory-y += socket.h
+mandatory-y += sockios.h
+mandatory-y += stat.h
+mandatory-y += statfs.h
+mandatory-y += swab.h
+mandatory-y += termbits.h
+mandatory-y += termios.h
+mandatory-y += types.h
+mandatory-y += unistd.h
diff --git a/include/uapi/asm-generic/Kbuild.asm b/include/uapi/asm-generic/Kbuild.asm
deleted file mode 100644
index 355c4ac2c0b0..000000000000
--- a/include/uapi/asm-generic/Kbuild.asm
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Headers that are mandatory in usr/include/asm/
-#
-mandatory-y += auxvec.h
-mandatory-y += bitsperlong.h
-mandatory-y += bpf_perf_event.h
-mandatory-y += byteorder.h
-mandatory-y += errno.h
-mandatory-y += fcntl.h
-mandatory-y += ioctl.h
-mandatory-y += ioctls.h
-mandatory-y += ipcbuf.h
-mandatory-y += mman.h
-mandatory-y += msgbuf.h
-mandatory-y += param.h
-mandatory-y += poll.h
-mandatory-y += posix_types.h
-mandatory-y += ptrace.h
-mandatory-y += resource.h
-mandatory-y += sembuf.h
-mandatory-y += setup.h
-mandatory-y += shmbuf.h
-mandatory-y += sigcontext.h
-mandatory-y += siginfo.h
-mandatory-y += signal.h
-mandatory-y += socket.h
-mandatory-y += sockios.h
-mandatory-y += stat.h
-mandatory-y += statfs.h
-mandatory-y += swab.h
-mandatory-y += termbits.h
-mandatory-y += termios.h
-mandatory-y += types.h
-mandatory-y += unistd.h
diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index c7d2b7acad26..82ad63dcd62b 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic
@@ -12,6 +12,11 @@ all:
 src := $(subst /generated,,$(obj))
 -include $(src)/Kbuild
 
+# $(generic)/Kbuild lists mandatory-y. Exclude um since it is a special case.
+ifneq ($(SRCARCH),um)
+include $(generic)/Kbuild
+endif
+
 include scripts/Kbuild.include
 
 redundant := $(filter $(mandatory-y) $(generated-y), $(generic-y))
-- 
cgit v1.2.3


From 8e44fc85060ec997e9c6f3c49a04274db6621d26 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 12 Mar 2019 16:44:30 +0200
Subject: auxdisplay: charlcd: Introduce charlcd_free() helper

The charlcd_free() is a counterpart to charlcd_alloc()
and should be called symmetrically on tear down.

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/charlcd.c | 6 ++++++
 include/misc/charlcd.h       | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 407acd22efa8..a351a9400054 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -818,6 +818,12 @@ struct charlcd *charlcd_alloc(unsigned int drvdata_size)
 }
 EXPORT_SYMBOL_GPL(charlcd_alloc);
 
+void charlcd_free(struct charlcd *lcd)
+{
+	kfree(charlcd_to_priv(lcd));
+}
+EXPORT_SYMBOL_GPL(charlcd_free);
+
 static int panel_notify_sys(struct notifier_block *this, unsigned long code,
 			    void *unused)
 {
diff --git a/include/misc/charlcd.h b/include/misc/charlcd.h
index 23f61850f363..1832402324ce 100644
--- a/include/misc/charlcd.h
+++ b/include/misc/charlcd.h
@@ -35,6 +35,7 @@ struct charlcd_ops {
 };
 
 struct charlcd *charlcd_alloc(unsigned int drvdata_size);
+void charlcd_free(struct charlcd *lcd);
 
 int charlcd_register(struct charlcd *lcd);
 int charlcd_unregister(struct charlcd *lcd);
-- 
cgit v1.2.3


From c5ae1954c47d3fd8815bd5a592aba18702c93f33 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Wed, 6 Mar 2019 19:21:42 +0200
Subject: IB/mlx5: Use mlx5 core to create/destroy a DEVX DCT

To prevent a hardware memory leak when a DEVX DCT object is destroyed
without calling DRAIN DCT before, (e.g. under cleanup flow), need to
manage its creation and destruction via mlx5 core.

In that case the DRAIN DCT command will be called and only once that it
will be completed the DESTROY DCT command will be called.  Otherwise, the
DESTROY DCT may fail and a hardware leak may occur.

As of that change the DRAIN DCT command should not be exposed any more
from DEVX, it's managed internally by the driver to work as expected by
the device specification.

Fixes: 7efce3691d33 ("IB/mlx5: Add obj create and destroy functionality")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c            | 34 +++++++++++++++++++++-------
 drivers/infiniband/hw/mlx5/qp.c              |  4 +++-
 drivers/net/ethernet/mellanox/mlx5/core/qp.c |  6 ++---
 include/linux/mlx5/qp.h                      |  3 ++-
 4 files changed, 34 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index eaa055007f28..9e08df7914aa 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -20,6 +20,7 @@
 
 enum devx_obj_flags {
 	DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0,
+	DEVX_OBJ_FLAGS_DCT = 1 << 1,
 };
 
 struct devx_async_data {
@@ -39,7 +40,10 @@ struct devx_obj {
 	u32			dinlen; /* destroy inbox length */
 	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
 	u32			flags;
-	struct mlx5_ib_devx_mr	devx_mr;
+	union {
+		struct mlx5_ib_devx_mr	devx_mr;
+		struct mlx5_core_dct	core_dct;
+	};
 };
 
 struct devx_umem {
@@ -347,7 +351,6 @@ static u64 devx_get_obj_id(const void *in)
 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
 					MLX5_GET(arm_rq_in, in, srq_number));
 		break;
-	case MLX5_CMD_OP_DRAIN_DCT:
 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
 		obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
 					MLX5_GET(drain_dct_in, in, dctn));
@@ -618,7 +621,6 @@ static bool devx_is_obj_modify_cmd(const void *in)
 	case MLX5_CMD_OP_2RST_QP:
 	case MLX5_CMD_OP_ARM_XRC_SRQ:
 	case MLX5_CMD_OP_ARM_RQ:
-	case MLX5_CMD_OP_DRAIN_DCT:
 	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
 	case MLX5_CMD_OP_ARM_XRQ:
 	case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
@@ -1124,7 +1126,11 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
 		devx_cleanup_mkey(obj);
 
-	ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+		ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+	else
+		ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
+				    sizeof(out));
 	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
@@ -1185,9 +1191,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 		devx_set_umem_valid(cmd_in);
 	}
 
-	err = mlx5_cmd_exec(dev->mdev, cmd_in,
-			    cmd_in_len,
-			    cmd_out, cmd_out_len);
+	if (opcode == MLX5_CMD_OP_CREATE_DCT) {
+		obj->flags |= DEVX_OBJ_FLAGS_DCT;
+		err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
+					   cmd_in, cmd_in_len,
+					   cmd_out, cmd_out_len);
+	} else {
+		err = mlx5_cmd_exec(dev->mdev, cmd_in,
+				    cmd_in_len,
+				    cmd_out, cmd_out_len);
+	}
+
 	if (err)
 		goto obj_free;
 
@@ -1214,7 +1228,11 @@ err_copy:
 	if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
 		devx_cleanup_mkey(obj);
 obj_destroy:
-	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
+		mlx5_core_destroy_dct(obj->mdev, &obj->core_dct);
+	else
+		mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out,
+			      sizeof(out));
 obj_free:
 	kfree(obj);
 	return err;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6b1f0e76900b..7cd006da1dae 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3729,6 +3729,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
 		struct mlx5_ib_modify_qp_resp resp = {};
+		u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0};
 		u32 min_resp_len = offsetof(typeof(resp), dctn) +
 				   sizeof(resp.dctn);
 
@@ -3747,7 +3748,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
 
 		err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in,
-					   MLX5_ST_SZ_BYTES(create_dct_in));
+					   MLX5_ST_SZ_BYTES(create_dct_in), out,
+					   sizeof(out));
 		if (err)
 			return err;
 		resp.dctn = qp->dct.mdct.mqp.qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index c7c2920c05c4..b8ba74de9555 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -263,16 +263,16 @@ destroy:
 
 int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 			 struct mlx5_core_dct *dct,
-			 u32 *in, int inlen)
+			 u32 *in, int inlen,
+			 u32 *out, int outlen)
 {
-	u32 out[MLX5_ST_SZ_DW(create_dct_out)]   = {0};
 	struct mlx5_core_qp *qp = &dct->mqp;
 	int err;
 
 	init_completion(&dct->drained);
 	MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
 
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err) {
 		mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
 		return err;
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index b26ea9077384..0343c81d4c5f 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -557,7 +557,8 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev,
 
 int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 			 struct mlx5_core_dct *qp,
-			 u32 *in, int inlen);
+			 u32 *in, int inlen,
+			 u32 *out, int outlen);
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			u32 *in,
-- 
cgit v1.2.3


From a792fa0e21876c9cbae7cc170083016299153051 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 22 Sep 2018 14:43:56 +0300
Subject: drm: Clarify definition of the DRM_BUS_FLAG_(PIXDATA|SYNC)_* macros

The DRM_BUS_FLAG_PIXDATA_POSEDGE and DRM_BUS_FLAG_PIXDATA_NEGEDGE macros
and their DRM_BUS_FLAG_SYNC_* counterparts define on which pixel clock
edge data and sync signals are driven. They are however used in some
drivers to define on which pixel clock edge data and sync signals are
sampled, which should usually (but not always) be the opposite edge of
the driving edge. This creates confusion.

Create four new macros for both PIXDATA and SYNC that explicitly state
the driving and sampling edge in their name to remove the confusion. The
driving macros are defined as the opposite of the sampling macros to
made code simpler based on the assumption that the driving and sampling
edges are opposite.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/drm/drm_connector.h | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 8fe22abb1e10..411c0eb4c00e 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -330,19 +330,47 @@ struct drm_display_info {
 
 #define DRM_BUS_FLAG_DE_LOW		(1<<0)
 #define DRM_BUS_FLAG_DE_HIGH		(1<<1)
-/* drive data on pos. edge */
+
+/*
+ * Don't use those two flags directly, use the DRM_BUS_FLAG_PIXDATA_DRIVE_*
+ * and DRM_BUS_FLAG_PIXDATA_SAMPLE_* variants to qualify the flags explicitly.
+ * The DRM_BUS_FLAG_PIXDATA_SAMPLE_* flags are defined as the opposite of the
+ * DRM_BUS_FLAG_PIXDATA_DRIVE_* flags to make code simpler, as signals are
+ * usually to be sampled on the opposite edge of the driving edge.
+ */
 #define DRM_BUS_FLAG_PIXDATA_POSEDGE	(1<<2)
-/* drive data on neg. edge */
 #define DRM_BUS_FLAG_PIXDATA_NEGEDGE	(1<<3)
+
+/* Drive data on rising edge */
+#define DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE	DRM_BUS_FLAG_PIXDATA_POSEDGE
+/* Drive data on falling edge */
+#define DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE	DRM_BUS_FLAG_PIXDATA_NEGEDGE
+/* Sample data on rising edge */
+#define DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE	DRM_BUS_FLAG_PIXDATA_NEGEDGE
+/* Sample data on falling edge */
+#define DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE	DRM_BUS_FLAG_PIXDATA_POSEDGE
+
 /* data is transmitted MSB to LSB on the bus */
 #define DRM_BUS_FLAG_DATA_MSB_TO_LSB	(1<<4)
 /* data is transmitted LSB to MSB on the bus */
 #define DRM_BUS_FLAG_DATA_LSB_TO_MSB	(1<<5)
-/* drive sync on pos. edge */
+
+/*
+ * Similarly to the DRM_BUS_FLAG_PIXDATA_* flags, don't use these two flags
+ * directly, use one of the DRM_BUS_FLAG_SYNC_(DRIVE|SAMPLE)_* instead.
+ */
 #define DRM_BUS_FLAG_SYNC_POSEDGE	(1<<6)
-/* drive sync on neg. edge */
 #define DRM_BUS_FLAG_SYNC_NEGEDGE	(1<<7)
 
+/* Drive sync on rising edge */
+#define DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE		DRM_BUS_FLAG_SYNC_POSEDGE
+/* Drive sync on falling edge */
+#define DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE		DRM_BUS_FLAG_SYNC_NEGEDGE
+/* Sample sync on rising edge */
+#define DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE	DRM_BUS_FLAG_SYNC_NEGEDGE
+/* Sample sync on falling edge */
+#define DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE	DRM_BUS_FLAG_SYNC_POSEDGE
+
 	/**
 	 * @bus_flags: Additional information (like pixel signal polarity) for
 	 * the pixel data on the bus, using DRM_BUS_FLAGS\_ defines.
-- 
cgit v1.2.3


From 88bc4178568b8e0331143cc0616640ab72f0cba1 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Sat, 22 Sep 2018 15:02:42 +0300
Subject: drm: Use new DRM_BUS_FLAG_*_(DRIVE|SAMPLE)_(POS|NEG)EDGE flags

The DRM_BUS_FLAG_PIXDATA_(POS|NEG)EDGE and
DRM_BUS_FLAG_SYNC_(POS|NEG)EDGE flags are deprecated in favour of the
new DRM_BUS_FLAG_PIXDATA_(DRIVE|SAMPLE)_(POS|NEG)EDGE and
new DRM_BUS_FLAG_SYNC_(DRIVE|SAMPLE)_(POS|NEG)EDGE flags. Replace them
through the code.

This effectively changes the value of the .sampling_edge bridge timings
field in the dumb-vga-dac driver. This is safe to do as no driver
consumes these values yet.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/bridge/dumb-vga-dac.c              |  6 +++---
 drivers/gpu/drm/bridge/tc358767.c                  |  4 ++--
 drivers/gpu/drm/drm_modes.c                        | 12 +++++------
 drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c         |  2 +-
 drivers/gpu/drm/imx/ipuv3-crtc.c                   |  2 +-
 drivers/gpu/drm/mxsfb/mxsfb_crtc.c                 |  6 +++---
 drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c  |  5 +++--
 .../omapdrm/displays/panel-lgphilips-lb035q02.c    |  5 +++--
 .../drm/omapdrm/displays/panel-nec-nl8048hl11.c    |  5 +++--
 .../drm/omapdrm/displays/panel-sharp-ls037v7dw01.c |  5 +++--
 .../drm/omapdrm/displays/panel-sony-acx565akm.c    |  5 +++--
 .../drm/omapdrm/displays/panel-tpo-td028ttec1.c    |  5 +++--
 .../drm/omapdrm/displays/panel-tpo-td043mtea1.c    |  5 +++--
 drivers/gpu/drm/omapdrm/dss/dsi.c                  |  4 ++--
 drivers/gpu/drm/omapdrm/dss/sdi.c                  |  4 ++--
 drivers/gpu/drm/omapdrm/omap_encoder.c             |  8 ++++----
 drivers/gpu/drm/panel/panel-arm-versatile.c        |  4 ++--
 drivers/gpu/drm/panel/panel-ilitek-ili9322.c       |  4 ++--
 drivers/gpu/drm/panel/panel-seiko-43wvf1g.c        |  2 +-
 drivers/gpu/drm/panel/panel-simple.c               | 24 +++++++++++-----------
 drivers/gpu/drm/panel/panel-tpo-tpg110.c           | 10 ++++-----
 drivers/gpu/drm/pl111/pl111_display.c              |  2 +-
 drivers/gpu/drm/sun4i/sun4i_tcon.c                 |  4 ++--
 drivers/gpu/drm/tve200/tve200_display.c            |  3 ++-
 include/drm/drm_bridge.h                           |  9 ++++----
 25 files changed, 77 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 0805801f4e94..94ed450e308d 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -234,7 +234,7 @@ static int dumb_vga_remove(struct platform_device *pdev)
  */
 static const struct drm_bridge_timings default_dac_timings = {
 	/* Timing specifications, datasheet page 7 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	.setup_time_ps = 500,
 	.hold_time_ps = 1500,
 };
@@ -245,7 +245,7 @@ static const struct drm_bridge_timings default_dac_timings = {
  */
 static const struct drm_bridge_timings ti_ths8134_dac_timings = {
 	/* From timing diagram, datasheet page 9 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	/* From datasheet, page 12 */
 	.setup_time_ps = 3000,
 	/* I guess this means latched input */
@@ -258,7 +258,7 @@ static const struct drm_bridge_timings ti_ths8134_dac_timings = {
  */
 static const struct drm_bridge_timings ti_ths8135_dac_timings = {
 	/* From timing diagram, datasheet page 14 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	/* From datasheet, page 16 */
 	.setup_time_ps = 2000,
 	.hold_time_ps = 500,
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 888980d4bc74..e570c9dee180 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1222,8 +1222,8 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 					 &bus_format, 1);
 	tc->connector.display_info.bus_flags =
 		DRM_BUS_FLAG_DE_HIGH |
-		DRM_BUS_FLAG_PIXDATA_NEGEDGE |
-		DRM_BUS_FLAG_SYNC_NEGEDGE;
+		DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE |
+		DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE;
 	drm_connector_attach_encoder(&tc->connector, tc->bridge.encoder);
 
 	return 0;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 869ac6f4671e..56f92a0bba62 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -655,22 +655,22 @@ EXPORT_SYMBOL_GPL(drm_display_mode_to_videomode);
  * @bus_flags: information about pixelclk, sync and DE polarity will be stored
  * here
  *
- * Sets DRM_BUS_FLAG_DE_(LOW|HIGH),  DRM_BUS_FLAG_PIXDATA_(POS|NEG)EDGE and
- * DISPLAY_FLAGS_SYNC_(POS|NEG)EDGE in @bus_flags according to DISPLAY_FLAGS
+ * Sets DRM_BUS_FLAG_DE_(LOW|HIGH),  DRM_BUS_FLAG_PIXDATA_DRIVE_(POS|NEG)EDGE
+ * and DISPLAY_FLAGS_SYNC_(POS|NEG)EDGE in @bus_flags according to DISPLAY_FLAGS
  * found in @vm
  */
 void drm_bus_flags_from_videomode(const struct videomode *vm, u32 *bus_flags)
 {
 	*bus_flags = 0;
 	if (vm->flags & DISPLAY_FLAGS_PIXDATA_POSEDGE)
-		*bus_flags |= DRM_BUS_FLAG_PIXDATA_POSEDGE;
+		*bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 	if (vm->flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE)
-		*bus_flags |= DRM_BUS_FLAG_PIXDATA_NEGEDGE;
+		*bus_flags |= DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE;
 
 	if (vm->flags & DISPLAY_FLAGS_SYNC_POSEDGE)
-		*bus_flags |= DRM_BUS_FLAG_SYNC_POSEDGE;
+		*bus_flags |= DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE;
 	if (vm->flags & DISPLAY_FLAGS_SYNC_NEGEDGE)
-		*bus_flags |= DRM_BUS_FLAG_SYNC_NEGEDGE;
+		*bus_flags |= DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE;
 
 	if (vm->flags & DISPLAY_FLAGS_DE_LOW)
 		*bus_flags |= DRM_BUS_FLAG_DE_LOW;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index bf256971063d..83c841b50272 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -94,7 +94,7 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	drm_display_mode_to_videomode(mode, &vm);
 
 	/* INV_PXCK as default (most display sample data on rising edge) */
-	if (!(con->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE))
+	if (!(con->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE))
 		pol |= DCU_SYN_POL_INV_PXCK;
 
 	if (vm.flags & DISPLAY_FLAGS_HSYNC_LOW)
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index ec3602ebbc1c..311a20c942eb 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -295,7 +295,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	sig_cfg.enable_pol = !(imx_crtc_state->bus_flags & DRM_BUS_FLAG_DE_LOW);
 	/* Default to driving pixel data on negative clock edges */
 	sig_cfg.clk_pol = !!(imx_crtc_state->bus_flags &
-			     DRM_BUS_FLAG_PIXDATA_POSEDGE);
+			     DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE);
 	sig_cfg.bus_format = imx_crtc_state->bus_format;
 	sig_cfg.v_to_h_sync = 0;
 	sig_cfg.hsync_pin = imx_crtc_state->di_hsync_pin;
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
index 0ee1ca8a316a..98e9bda91e80 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_crtc.c
@@ -253,12 +253,12 @@ static void mxsfb_crtc_mode_set_nofb(struct mxsfb_drm_private *mxsfb)
 	if (!(bus_flags & DRM_BUS_FLAG_DE_LOW))
 		vdctrl0 |= VDCTRL0_ENABLE_ACT_HIGH;
 	/*
-	 * DRM_BUS_FLAG_PIXDATA_ defines are controller centric,
+	 * DRM_BUS_FLAG_PIXDATA_DRIVE_ defines are controller centric,
 	 * controllers VDCTRL0_DOTCLK is display centric.
 	 * Drive on positive edge       -> display samples on falling edge
-	 * DRM_BUS_FLAG_PIXDATA_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING
+	 * DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE -> VDCTRL0_DOTCLK_ACT_FALLING
 	 */
-	if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
+	if (bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE)
 		vdctrl0 |= VDCTRL0_DOTCLK_ACT_FALLING;
 
 	writel(vdctrl0, mxsfb->base + LCDC_VDCTRL0);
diff --git a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
index fb88537de1cc..de954182c4bb 100644
--- a/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
+++ b/drivers/gpu/drm/omapdrm/displays/encoder-tfp410.c
@@ -86,8 +86,9 @@ static int tfp410_probe(struct platform_device *pdev)
 	dssdev->type = OMAP_DISPLAY_TYPE_DPI;
 	dssdev->owner = THIS_MODULE;
 	dssdev->of_ports = BIT(1) | BIT(0);
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_POSEDGE
-			  | DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 
 	dssdev->next = omapdss_of_find_connected_device(pdev->dev.of_node, 1);
 	if (IS_ERR(dssdev->next)) {
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
index 4133351924be..99f2350d462c 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-lgphilips-lb035q02.c
@@ -207,8 +207,9 @@ static int lb035q02_panel_spi_probe(struct spi_device *spi)
 	 * DE is active LOW
 	 * DATA needs to be driven on the FALLING edge
 	 */
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_NEGEDGE
-			  | DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
index 498a88307f04..c2409815a204 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-nec-nl8048hl11.c
@@ -196,8 +196,9 @@ static int nec_8048_probe(struct spi_device *spi)
 	dssdev->owner = THIS_MODULE;
 	dssdev->of_ports = BIT(0);
 	dssdev->ops_flags = OMAP_DSS_DEVICE_OP_MODES;
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_POSEDGE
-			  | DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
index 1e68e0d14f87..9c545de430f6 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-sharp-ls037v7dw01.c
@@ -216,8 +216,9 @@ static int sharp_ls_probe(struct platform_device *pdev)
 	 * Note: According to the panel documentation:
 	 * DATA needs to be driven on the FALLING edge
 	 */
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_NEGEDGE
-			  | DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-sony-acx565akm.c b/drivers/gpu/drm/omapdrm/displays/panel-sony-acx565akm.c
index abee0b348cff..2038def14ba1 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-sony-acx565akm.c
@@ -710,8 +710,9 @@ static int acx565akm_probe(struct spi_device *spi)
 	dssdev->owner = THIS_MODULE;
 	dssdev->of_ports = BIT(0);
 	dssdev->ops_flags = OMAP_DSS_DEVICE_OP_MODES;
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_NEGEDGE
-			  | DRM_BUS_FLAG_PIXDATA_POSEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
index 5b4fbcc196bc..fa80d4f7f699 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td028ttec1.c
@@ -330,8 +330,9 @@ static int td028ttec1_panel_probe(struct spi_device *spi)
 	 * Note: According to the panel documentation:
 	 * SYNC needs to be driven on the FALLING edge
 	 */
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_POSEDGE
-			  | DRM_BUS_FLAG_PIXDATA_NEGEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
index 781dc8b0afe7..0b692fc7e5ea 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-tpo-td043mtea1.c
@@ -429,8 +429,9 @@ static int tpo_td043_probe(struct spi_device *spi)
 	 * Note: According to the panel documentation:
 	 * SYNC needs to be driven on the FALLING edge
 	 */
-	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_SYNC_POSEDGE
-			  | DRM_BUS_FLAG_PIXDATA_NEGEDGE;
+	dssdev->bus_flags = DRM_BUS_FLAG_DE_HIGH
+			  | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE
+			  | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE;
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 2582b566e0b0..5c34fba51975 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5127,9 +5127,9 @@ static int dsi_init_output(struct dsi_data *dsi)
 	out->ops = &dsi_ops;
 	out->owner = THIS_MODULE;
 	out->of_ports = BIT(0);
-	out->bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE
+	out->bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE
 		       | DRM_BUS_FLAG_DE_HIGH
-		       | DRM_BUS_FLAG_SYNC_NEGEDGE;
+		       | DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE;
 
 	r = omapdss_device_init_output(out);
 	if (r < 0)
diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index e7945322c471..7aae52984fed 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -279,8 +279,8 @@ static int sdi_init_output(struct sdi_device *sdi)
 	out->of_ports = BIT(1);
 	out->ops = &sdi_ops;
 	out->owner = THIS_MODULE;
-	out->bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE	/* 15.5.9.1.2 */
-		       | DRM_BUS_FLAG_SYNC_POSEDGE;
+	out->bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE	/* 15.5.9.1.2 */
+		       | DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE;
 
 	r = omapdss_device_init_output(out);
 	if (r < 0)
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 42748479d374..b83a2ae64a03 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -114,17 +114,17 @@ static void omap_encoder_mode_set(struct drm_encoder *encoder,
 
 		if (!(vm.flags & (DISPLAY_FLAGS_PIXDATA_POSEDGE |
 				  DISPLAY_FLAGS_PIXDATA_NEGEDGE))) {
-			if (bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
+			if (bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE)
 				vm.flags |= DISPLAY_FLAGS_PIXDATA_POSEDGE;
-			else if (bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+			else if (bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
 				vm.flags |= DISPLAY_FLAGS_PIXDATA_NEGEDGE;
 		}
 
 		if (!(vm.flags & (DISPLAY_FLAGS_SYNC_POSEDGE |
 				  DISPLAY_FLAGS_SYNC_NEGEDGE))) {
-			if (bus_flags & DRM_BUS_FLAG_SYNC_POSEDGE)
+			if (bus_flags & DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE)
 				vm.flags |= DISPLAY_FLAGS_SYNC_POSEDGE;
-			else if (bus_flags & DRM_BUS_FLAG_SYNC_NEGEDGE)
+			else if (bus_flags & DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE)
 				vm.flags |= DISPLAY_FLAGS_SYNC_NEGEDGE;
 		}
 	}
diff --git a/drivers/gpu/drm/panel/panel-arm-versatile.c b/drivers/gpu/drm/panel/panel-arm-versatile.c
index b428c4678106..078fa2c0eef8 100644
--- a/drivers/gpu/drm/panel/panel-arm-versatile.c
+++ b/drivers/gpu/drm/panel/panel-arm-versatile.c
@@ -191,7 +191,7 @@ static const struct versatile_panel_type versatile_panels[] = {
 			.vrefresh = 390,
 			.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 	},
 	/*
 	 * Sanyo ALR252RGT 240x320 portrait display found on the
@@ -215,7 +215,7 @@ static const struct versatile_panel_type versatile_panels[] = {
 			.vrefresh = 116,
 			.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 		.ib2 = true,
 	},
 };
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
index bd38bf4f1ba6..35497ff08391 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9322.c
@@ -412,11 +412,11 @@ static int ili9322_init(struct drm_panel *panel, struct ili9322 *ili)
 	if (ili->conf->dclk_active_high) {
 		reg = ILI9322_POL_DCLK;
 		connector->display_info.bus_flags |=
-			DRM_BUS_FLAG_PIXDATA_POSEDGE;
+			DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE;
 	} else {
 		reg = 0;
 		connector->display_info.bus_flags |=
-			DRM_BUS_FLAG_PIXDATA_NEGEDGE;
+			DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE;
 	}
 	if (ili->conf->de_active_high) {
 		reg |= ILI9322_POL_DE;
diff --git a/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c b/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
index 2d99e28ff117..bdcc5d80823d 100644
--- a/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
+++ b/drivers/gpu/drm/panel/panel-seiko-43wvf1g.c
@@ -328,7 +328,7 @@ static const struct seiko_panel_desc seiko_43wvf1g = {
 		.height = 57,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 };
 
 static const struct of_device_id platform_of_match[] = {
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 9e8218f6a3f2..64516f23a957 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -914,7 +914,7 @@ static const struct panel_desc cdtech_s043wq26h_ct7 = {
 		.width = 95,
 		.height = 54,
 	},
-	.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode cdtech_s070wv95_ct16_mode = {
@@ -1034,7 +1034,7 @@ static const struct panel_desc dataimage_scf0700c48ggu18 = {
 		.height = 91,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct display_timing dlc_dlc0700yzg_1_timing = {
@@ -1119,7 +1119,7 @@ static const struct panel_desc edt_et057090dhu = {
 		.height = 86,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 };
 
 static const struct drm_display_mode edt_etm0700g0dh6_mode = {
@@ -1145,7 +1145,7 @@ static const struct panel_desc edt_etm0700g0dh6 = {
 		.height = 91,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
 };
 
 static const struct panel_desc edt_etm0700g0bdh6 = {
@@ -1157,7 +1157,7 @@ static const struct panel_desc edt_etm0700g0bdh6 = {
 		.height = 91,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode foxlink_fl500wvr00_a0t_mode = {
@@ -1311,7 +1311,7 @@ static const struct panel_desc innolux_at043tn24 = {
 		.height = 54,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode innolux_at070tn92_mode = {
@@ -1818,7 +1818,7 @@ static const struct panel_desc nec_nl4827hc19_05b = {
 		.height = 54,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode netron_dy_e231732_mode = {
@@ -1867,8 +1867,8 @@ static const struct panel_desc newhaven_nhd_43_480272ef_atxl = {
 		.height = 54,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE |
-		     DRM_BUS_FLAG_SYNC_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE |
+		     DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE,
 };
 
 static const struct display_timing nlt_nl192108ac18_02d_timing = {
@@ -2029,7 +2029,7 @@ static const struct panel_desc ortustech_com43h4m85ulc = {
 		.height = 93,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode pda_91_00156_a0_mode = {
@@ -2398,7 +2398,7 @@ static const struct panel_desc toshiba_lt089ac29000 = {
 		.height = 116,
 	},
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
-	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode tpk_f07a_0102_mode = {
@@ -2421,7 +2421,7 @@ static const struct panel_desc tpk_f07a_0102 = {
 		.width = 152,
 		.height = 91,
 	},
-	.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
 static const struct drm_display_mode tpk_f10a_0102_mode = {
diff --git a/drivers/gpu/drm/panel/panel-tpo-tpg110.c b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
index 5a9f8f4d5d24..25f00cfc1af4 100644
--- a/drivers/gpu/drm/panel/panel-tpo-tpg110.c
+++ b/drivers/gpu/drm/panel/panel-tpo-tpg110.c
@@ -118,7 +118,7 @@ static const struct tpg110_panel_mode tpg110_modes[] = {
 			.vtotal = 480 + 10 + 1 + 35,
 			.vrefresh = 60,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 	},
 	{
 		.name = "640x480 RGB",
@@ -135,7 +135,7 @@ static const struct tpg110_panel_mode tpg110_modes[] = {
 			.vtotal = 480 + 18 + 1 + 27,
 			.vrefresh = 60,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 	},
 	{
 		.name = "480x272 RGB",
@@ -152,7 +152,7 @@ static const struct tpg110_panel_mode tpg110_modes[] = {
 			.vtotal = 272 + 2 + 1 + 12,
 			.vrefresh = 60,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 	},
 	{
 		.name = "480x640 RGB",
@@ -169,7 +169,7 @@ static const struct tpg110_panel_mode tpg110_modes[] = {
 			.vtotal = 640 + 4 + 1 + 8,
 			.vrefresh = 60,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 	},
 	{
 		.name = "400x240 RGB",
@@ -186,7 +186,7 @@ static const struct tpg110_panel_mode tpg110_modes[] = {
 			.vtotal = 240 + 2 + 1 + 20,
 			.vrefresh = 60,
 		},
-		.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+		.bus_flags = DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 	},
 };
 
diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c
index 754f6b25f265..0c5d391f0a8f 100644
--- a/drivers/gpu/drm/pl111/pl111_display.c
+++ b/drivers/gpu/drm/pl111/pl111_display.c
@@ -188,7 +188,7 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe,
 			tim2 |= TIM2_IOE;
 
 		if (connector->display_info.bus_flags &
-		    DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+		    DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
 			tim2 |= TIM2_IPC;
 	}
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 7136fc91c603..ca713d200280 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -561,10 +561,10 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
 	 * Following code is a way to avoid quirks all around TCON
 	 * and DOTCLOCK drivers.
 	 */
-	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
+	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE)
 		clk_set_phase(tcon->dclk, 240);
 
-	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+	if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
 		clk_set_phase(tcon->dclk, 0);
 
 	regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG,
diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c
index e8723a2412a6..d775d10dbe6a 100644
--- a/drivers/gpu/drm/tve200/tve200_display.c
+++ b/drivers/gpu/drm/tve200/tve200_display.c
@@ -149,7 +149,8 @@ static void tve200_display_enable(struct drm_simple_display_pipe *pipe,
 	/* Vsync IRQ at start of Vsync at first */
 	ctrl1 |= TVE200_VSTSTYPE_VSYNC;
 
-	if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
+	if (connector->display_info.bus_flags &
+	    DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE)
 		ctrl1 |= TVE200_CTRL_TVCLKP;
 
 	if ((mode->hdisplay == 352 && mode->vdisplay == 240) || /* SIF(525) */
diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 9da8c93f7976..5e5129206f40 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -246,10 +246,11 @@ struct drm_bridge_timings {
 	/**
 	 * @sampling_edge:
 	 *
-	 * Tells whether the bridge samples the digital input signal
-	 * from the display engine on the positive or negative edge of the
-	 * clock, this should reuse the DRM_BUS_FLAG_PIXDATA_[POS|NEG]EDGE
-	 * bitwise flags from the DRM connector (bit 2 and 3 valid).
+	 * Tells whether the bridge samples the digital input signals from the
+	 * display engine on the positive or negative edge of the clock. This
+	 * should use the DRM_BUS_FLAG_PIXDATA_SAMPLE_[POS|NEG]EDGE and
+	 * DRM_BUS_FLAG_SYNC_SAMPLE_[POS|NEG]EDGE bitwise flags from the DRM
+	 * connector (bit 2, 3, 6 and 7 valid).
 	 */
 	u32 sampling_edge;
 	/**
-- 
cgit v1.2.3


From d23286ff3d79e1b78b4a4b030a62c3ddc120c607 Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Tue, 4 Sep 2018 22:21:08 -0700
Subject: drm/bridge: use bus flags in bridge timings

The DRM bus flags convey additional information on pixel data on
the bus. All current available bus flags might be of interest for
a bridge. Remove the sampling_edge field and use bus_flags.

In the case at hand a dumb VGA bridge needs a specific data enable
polarity (DRM_BUS_FLAG_DE_LOW).

Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Tested-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/bridge/dumb-vga-dac.c |  6 +++---
 include/drm/drm_bridge.h              | 12 +++++-------
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 94ed450e308d..e64736c39a9f 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -234,7 +234,7 @@ static int dumb_vga_remove(struct platform_device *pdev)
  */
 static const struct drm_bridge_timings default_dac_timings = {
 	/* Timing specifications, datasheet page 7 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
+	.input_bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	.setup_time_ps = 500,
 	.hold_time_ps = 1500,
 };
@@ -245,7 +245,7 @@ static const struct drm_bridge_timings default_dac_timings = {
  */
 static const struct drm_bridge_timings ti_ths8134_dac_timings = {
 	/* From timing diagram, datasheet page 9 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
+	.input_bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	/* From datasheet, page 12 */
 	.setup_time_ps = 3000,
 	/* I guess this means latched input */
@@ -258,7 +258,7 @@ static const struct drm_bridge_timings ti_ths8134_dac_timings = {
  */
 static const struct drm_bridge_timings ti_ths8135_dac_timings = {
 	/* From timing diagram, datasheet page 14 */
-	.sampling_edge = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
+	.input_bus_flags = DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE,
 	/* From datasheet, page 16 */
 	.setup_time_ps = 2000,
 	.hold_time_ps = 500,
diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 5e5129206f40..d4428913a4e1 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -244,15 +244,13 @@ struct drm_bridge_funcs {
  */
 struct drm_bridge_timings {
 	/**
-	 * @sampling_edge:
+	 * @input_bus_flags:
 	 *
-	 * Tells whether the bridge samples the digital input signals from the
-	 * display engine on the positive or negative edge of the clock. This
-	 * should use the DRM_BUS_FLAG_PIXDATA_SAMPLE_[POS|NEG]EDGE and
-	 * DRM_BUS_FLAG_SYNC_SAMPLE_[POS|NEG]EDGE bitwise flags from the DRM
-	 * connector (bit 2, 3, 6 and 7 valid).
+	 * Tells what additional settings for the pixel data on the bus
+	 * this bridge requires (like pixel signal polarity). See also
+	 * &drm_display_info->bus_flags.
 	 */
-	u32 sampling_edge;
+	u32 input_bus_flags;
 	/**
 	 * @setup_time_ps:
 	 *
-- 
cgit v1.2.3


From a0970e87b5d357c8a1ec9c18e128a8d7921c29f9 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Sat, 12 Jan 2019 02:42:39 +0200
Subject: drm: Turn bus flags macros into an enum

This allows nicer kerneldoc with an easy way to reference the enum and
the values.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/drm/drm_connector.h | 108 ++++++++++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h
index 411c0eb4c00e..c5fbc128e864 100644
--- a/include/drm/drm_connector.h
+++ b/include/drm/drm_connector.h
@@ -253,6 +253,68 @@ enum drm_panel_orientation {
 	DRM_MODE_PANEL_ORIENTATION_RIGHT_UP,
 };
 
+/**
+ * enum drm_bus_flags - bus_flags info for &drm_display_info
+ *
+ * This enum defines signal polarities and clock edge information for signals on
+ * a bus as bitmask flags.
+ *
+ * The clock edge information is conveyed by two sets of symbols,
+ * DRM_BUS_FLAGS_*_DRIVE_\* and DRM_BUS_FLAGS_*_SAMPLE_\*. When this enum is
+ * used to describe a bus from the point of view of the transmitter, the
+ * \*_DRIVE_\* flags should be used. When used from the point of view of the
+ * receiver, the \*_SAMPLE_\* flags should be used. The \*_DRIVE_\* and
+ * \*_SAMPLE_\* flags alias each other, with the \*_SAMPLE_POSEDGE and
+ * \*_SAMPLE_NEGEDGE flags being equal to \*_DRIVE_NEGEDGE and \*_DRIVE_POSEDGE
+ * respectively. This simplifies code as signals are usually sampled on the
+ * opposite edge of the driving edge. Transmitters and receivers may however
+ * need to take other signal timings into account to convert between driving
+ * and sample edges.
+ *
+ * @DRM_BUS_FLAG_DE_LOW:		The Data Enable signal is active low
+ * @DRM_BUS_FLAG_DE_HIGH:		The Data Enable signal is active high
+ * @DRM_BUS_FLAG_PIXDATA_POSEDGE:	Legacy value, do not use
+ * @DRM_BUS_FLAG_PIXDATA_NEGEDGE:	Legacy value, do not use
+ * @DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE:	Data is driven on the rising edge of
+ *					the pixel clock
+ * @DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE:	Data is driven on the falling edge of
+ *					the pixel clock
+ * @DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE: Data is sampled on the rising edge of
+ *					the pixel clock
+ * @DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE: Data is sampled on the falling edge of
+ *					the pixel clock
+ * @DRM_BUS_FLAG_DATA_MSB_TO_LSB:	Data is transmitted MSB to LSB on the bus
+ * @DRM_BUS_FLAG_DATA_LSB_TO_MSB:	Data is transmitted LSB to MSB on the bus
+ * @DRM_BUS_FLAG_SYNC_POSEDGE:		Legacy value, do not use
+ * @DRM_BUS_FLAG_SYNC_NEGEDGE:		Legacy value, do not use
+ * @DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE:	Sync signals are driven on the rising
+ *					edge of the pixel clock
+ * @DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE:	Sync signals are driven on the falling
+ *					edge of the pixel clock
+ * @DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE:	Sync signals are sampled on the rising
+ *					edge of the pixel clock
+ * @DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE:	Sync signals are sampled on the falling
+ *					edge of the pixel clock
+ */
+enum drm_bus_flags {
+	DRM_BUS_FLAG_DE_LOW = BIT(0),
+	DRM_BUS_FLAG_DE_HIGH = BIT(1),
+	DRM_BUS_FLAG_PIXDATA_POSEDGE = BIT(2),
+	DRM_BUS_FLAG_PIXDATA_NEGEDGE = BIT(3),
+	DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE = DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+	DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE = DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+	DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE = DRM_BUS_FLAG_PIXDATA_POSEDGE,
+	DRM_BUS_FLAG_DATA_MSB_TO_LSB = BIT(4),
+	DRM_BUS_FLAG_DATA_LSB_TO_MSB = BIT(5),
+	DRM_BUS_FLAG_SYNC_POSEDGE = BIT(6),
+	DRM_BUS_FLAG_SYNC_NEGEDGE = BIT(7),
+	DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE = DRM_BUS_FLAG_SYNC_POSEDGE,
+	DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE = DRM_BUS_FLAG_SYNC_NEGEDGE,
+	DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE = DRM_BUS_FLAG_SYNC_NEGEDGE,
+	DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE = DRM_BUS_FLAG_SYNC_POSEDGE,
+};
+
 /**
  * struct drm_display_info - runtime data about the connected sink
  *
@@ -328,52 +390,10 @@ struct drm_display_info {
 	 */
 	unsigned int num_bus_formats;
 
-#define DRM_BUS_FLAG_DE_LOW		(1<<0)
-#define DRM_BUS_FLAG_DE_HIGH		(1<<1)
-
-/*
- * Don't use those two flags directly, use the DRM_BUS_FLAG_PIXDATA_DRIVE_*
- * and DRM_BUS_FLAG_PIXDATA_SAMPLE_* variants to qualify the flags explicitly.
- * The DRM_BUS_FLAG_PIXDATA_SAMPLE_* flags are defined as the opposite of the
- * DRM_BUS_FLAG_PIXDATA_DRIVE_* flags to make code simpler, as signals are
- * usually to be sampled on the opposite edge of the driving edge.
- */
-#define DRM_BUS_FLAG_PIXDATA_POSEDGE	(1<<2)
-#define DRM_BUS_FLAG_PIXDATA_NEGEDGE	(1<<3)
-
-/* Drive data on rising edge */
-#define DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE	DRM_BUS_FLAG_PIXDATA_POSEDGE
-/* Drive data on falling edge */
-#define DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE	DRM_BUS_FLAG_PIXDATA_NEGEDGE
-/* Sample data on rising edge */
-#define DRM_BUS_FLAG_PIXDATA_SAMPLE_POSEDGE	DRM_BUS_FLAG_PIXDATA_NEGEDGE
-/* Sample data on falling edge */
-#define DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE	DRM_BUS_FLAG_PIXDATA_POSEDGE
-
-/* data is transmitted MSB to LSB on the bus */
-#define DRM_BUS_FLAG_DATA_MSB_TO_LSB	(1<<4)
-/* data is transmitted LSB to MSB on the bus */
-#define DRM_BUS_FLAG_DATA_LSB_TO_MSB	(1<<5)
-
-/*
- * Similarly to the DRM_BUS_FLAG_PIXDATA_* flags, don't use these two flags
- * directly, use one of the DRM_BUS_FLAG_SYNC_(DRIVE|SAMPLE)_* instead.
- */
-#define DRM_BUS_FLAG_SYNC_POSEDGE	(1<<6)
-#define DRM_BUS_FLAG_SYNC_NEGEDGE	(1<<7)
-
-/* Drive sync on rising edge */
-#define DRM_BUS_FLAG_SYNC_DRIVE_POSEDGE		DRM_BUS_FLAG_SYNC_POSEDGE
-/* Drive sync on falling edge */
-#define DRM_BUS_FLAG_SYNC_DRIVE_NEGEDGE		DRM_BUS_FLAG_SYNC_NEGEDGE
-/* Sample sync on rising edge */
-#define DRM_BUS_FLAG_SYNC_SAMPLE_POSEDGE	DRM_BUS_FLAG_SYNC_NEGEDGE
-/* Sample sync on falling edge */
-#define DRM_BUS_FLAG_SYNC_SAMPLE_NEGEDGE	DRM_BUS_FLAG_SYNC_POSEDGE
-
 	/**
 	 * @bus_flags: Additional information (like pixel signal polarity) for
-	 * the pixel data on the bus, using DRM_BUS_FLAGS\_ defines.
+	 * the pixel data on the bus, using &enum drm_bus_flags values
+	 * DRM_BUS_FLAGS\_.
 	 */
 	u32 bus_flags;
 
-- 
cgit v1.2.3


From 09e513e390eab1f2db4d4242ef209cbe0c39c444 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 21 Feb 2019 03:38:06 +0200
Subject: media: vsp1: drm: Extend frame completion API to the DU driver

The VSP1 driver will need to pass extra flags to the DU through the
frame completion API. Replace the completed bool flag by a bitmask to
support this.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/gpu/drm/rcar-du/rcar_du_vsp.c  | 4 ++--
 drivers/media/platform/vsp1/vsp1_dl.h  | 1 +
 drivers/media/platform/vsp1/vsp1_drm.c | 4 ++--
 drivers/media/platform/vsp1/vsp1_drm.h | 2 +-
 include/media/vsp1.h                   | 4 +++-
 5 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 0878accbd134..d4aaad11ccd4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -27,14 +27,14 @@
 #include "rcar_du_kms.h"
 #include "rcar_du_vsp.h"
 
-static void rcar_du_vsp_complete(void *private, bool completed, u32 crc)
+static void rcar_du_vsp_complete(void *private, unsigned int status, u32 crc)
 {
 	struct rcar_du_crtc *crtc = private;
 
 	if (crtc->vblank_enable)
 		drm_crtc_handle_vblank(&crtc->crtc);
 
-	if (completed)
+	if (status & VSP1_DU_STATUS_COMPLETE)
 		rcar_du_crtc_finish_page_flip(crtc);
 
 	drm_crtc_add_crc_entry(&crtc->crtc, false, 0, &crc);
diff --git a/drivers/media/platform/vsp1/vsp1_dl.h b/drivers/media/platform/vsp1/vsp1_dl.h
index e0fdb145e6ed..079ba5af0315 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.h
+++ b/drivers/media/platform/vsp1/vsp1_dl.h
@@ -17,6 +17,7 @@ struct vsp1_dl_body_pool;
 struct vsp1_dl_list;
 struct vsp1_dl_manager;
 
+/* Keep these flags in sync with VSP1_DU_STATUS_* in include/media/vsp1.h. */
 #define VSP1_DL_FRAME_END_COMPLETED		BIT(0)
 #define VSP1_DL_FRAME_END_INTERNAL		BIT(1)
 
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index d1c88e8aee52..bd95683c1cd9 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -34,14 +34,14 @@ static void vsp1_du_pipeline_frame_end(struct vsp1_pipeline *pipe,
 				       unsigned int completion)
 {
 	struct vsp1_drm_pipeline *drm_pipe = to_vsp1_drm_pipeline(pipe);
-	bool complete = completion & VSP1_DL_FRAME_END_COMPLETED;
 
 	if (drm_pipe->du_complete) {
 		struct vsp1_entity *uif = drm_pipe->uif;
+		unsigned int status = completion & VSP1_DU_STATUS_COMPLETE;
 		u32 crc;
 
 		crc = uif ? vsp1_uif_get_crc(to_uif(&uif->subdev)) : 0;
-		drm_pipe->du_complete(drm_pipe->du_private, complete, crc);
+		drm_pipe->du_complete(drm_pipe->du_private, status, crc);
 	}
 
 	if (completion & VSP1_DL_FRAME_END_INTERNAL) {
diff --git a/drivers/media/platform/vsp1/vsp1_drm.h b/drivers/media/platform/vsp1/vsp1_drm.h
index 8dfd274a59e2..e85ad4366fbb 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.h
+++ b/drivers/media/platform/vsp1/vsp1_drm.h
@@ -42,7 +42,7 @@ struct vsp1_drm_pipeline {
 	struct vsp1_du_crc_config crc;
 
 	/* Frame synchronisation */
-	void (*du_complete)(void *data, bool completed, u32 crc);
+	void (*du_complete)(void *data, unsigned int status, u32 crc);
 	void *du_private;
 };
 
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 1cf868360701..877496936487 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -17,6 +17,8 @@ struct device;
 
 int vsp1_du_init(struct device *dev);
 
+#define VSP1_DU_STATUS_COMPLETE		BIT(0)
+
 /**
  * struct vsp1_du_lif_config - VSP LIF configuration
  * @width: output frame width
@@ -32,7 +34,7 @@ struct vsp1_du_lif_config {
 	unsigned int height;
 	bool interlaced;
 
-	void (*callback)(void *data, bool completed, u32 crc);
+	void (*callback)(void *data, unsigned int status, u32 crc);
 	void *callback_data;
 };
 
-- 
cgit v1.2.3


From a63722aeb76c65f39c98c66a1e730a2f3da123ab Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 21 Feb 2019 03:46:42 +0200
Subject: media: vsp1: drm: Implement writeback support

Extend the vsp1_du_atomic_flush() API with writeback support by adding
format, pitch and memory addresses of the writeback framebuffer.
Writeback completion is reported through the existing frame completion
callback with a new VSP1_DU_STATUS_WRITEBACK status flag.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/vsp1/vsp1_dl.c  | 14 ++++++++++++++
 drivers/media/platform/vsp1/vsp1_dl.h  |  3 ++-
 drivers/media/platform/vsp1/vsp1_drm.c | 25 ++++++++++++++++++++++++-
 include/media/vsp1.h                   | 15 +++++++++++++++
 4 files changed, 55 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/vsp1/vsp1_dl.c b/drivers/media/platform/vsp1/vsp1_dl.c
index ed7cda4130f2..104b6f514536 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.c
+++ b/drivers/media/platform/vsp1/vsp1_dl.c
@@ -958,6 +958,9 @@ void vsp1_dl_list_commit(struct vsp1_dl_list *dl, unsigned int dl_flags)
  *
  * The VSP1_DL_FRAME_END_INTERNAL flag indicates that the display list that just
  * became active had been queued with the internal notification flag.
+ *
+ * The VSP1_DL_FRAME_END_WRITEBACK flag indicates that the previously active
+ * display list had been queued with the writeback flag.
  */
 unsigned int vsp1_dlm_irq_frame_end(struct vsp1_dl_manager *dlm)
 {
@@ -995,6 +998,17 @@ unsigned int vsp1_dlm_irq_frame_end(struct vsp1_dl_manager *dlm)
 	if (status & VI6_STATUS_FLD_STD(dlm->index))
 		goto done;
 
+	/*
+	 * If the active display list has the writeback flag set, the frame
+	 * completion marks the end of the writeback capture. Return the
+	 * VSP1_DL_FRAME_END_WRITEBACK flag and reset the display list's
+	 * writeback flag.
+	 */
+	if (dlm->active && (dlm->active->flags & VSP1_DL_FRAME_END_WRITEBACK)) {
+		flags |= VSP1_DL_FRAME_END_WRITEBACK;
+		dlm->active->flags &= ~VSP1_DL_FRAME_END_WRITEBACK;
+	}
+
 	/*
 	 * The device starts processing the queued display list right after the
 	 * frame end interrupt. The display list thus becomes active.
diff --git a/drivers/media/platform/vsp1/vsp1_dl.h b/drivers/media/platform/vsp1/vsp1_dl.h
index 079ba5af0315..bebe16483ca5 100644
--- a/drivers/media/platform/vsp1/vsp1_dl.h
+++ b/drivers/media/platform/vsp1/vsp1_dl.h
@@ -19,7 +19,8 @@ struct vsp1_dl_manager;
 
 /* Keep these flags in sync with VSP1_DU_STATUS_* in include/media/vsp1.h. */
 #define VSP1_DL_FRAME_END_COMPLETED		BIT(0)
-#define VSP1_DL_FRAME_END_INTERNAL		BIT(1)
+#define VSP1_DL_FRAME_END_WRITEBACK		BIT(1)
+#define VSP1_DL_FRAME_END_INTERNAL		BIT(2)
 
 /**
  * struct vsp1_dl_ext_cmd - Extended Display command
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index bd95683c1cd9..a4a45d68a6ef 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -37,7 +37,9 @@ static void vsp1_du_pipeline_frame_end(struct vsp1_pipeline *pipe,
 
 	if (drm_pipe->du_complete) {
 		struct vsp1_entity *uif = drm_pipe->uif;
-		unsigned int status = completion & VSP1_DU_STATUS_COMPLETE;
+		unsigned int status = completion
+				    & (VSP1_DU_STATUS_COMPLETE |
+				       VSP1_DU_STATUS_WRITEBACK);
 		u32 crc;
 
 		crc = uif ? vsp1_uif_get_crc(to_uif(&uif->subdev)) : 0;
@@ -541,6 +543,8 @@ static void vsp1_du_pipeline_configure(struct vsp1_pipeline *pipe)
 
 	if (drm_pipe->force_brx_release)
 		dl_flags |= VSP1_DL_FRAME_END_INTERNAL;
+	if (pipe->output->writeback)
+		dl_flags |= VSP1_DL_FRAME_END_WRITEBACK;
 
 	dl = vsp1_dl_list_get(pipe->output->dlm);
 	dlb = vsp1_dl_list_get_body0(dl);
@@ -870,12 +874,31 @@ void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index,
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_drm_pipeline *drm_pipe = &vsp1->drm->pipe[pipe_index];
 	struct vsp1_pipeline *pipe = &drm_pipe->pipe;
+	int ret;
 
 	drm_pipe->crc = cfg->crc;
 
 	mutex_lock(&vsp1->drm->lock);
+
+	if (cfg->writeback.pixelformat) {
+		const struct vsp1_du_writeback_config *wb_cfg = &cfg->writeback;
+
+		ret = vsp1_du_pipeline_set_rwpf_format(vsp1, pipe->output,
+						       wb_cfg->pixelformat,
+						       wb_cfg->pitch);
+		if (WARN_ON(ret < 0))
+			goto done;
+
+		pipe->output->mem.addr[0] = wb_cfg->mem[0];
+		pipe->output->mem.addr[1] = wb_cfg->mem[1];
+		pipe->output->mem.addr[2] = wb_cfg->mem[2];
+		pipe->output->writeback = true;
+	}
+
 	vsp1_du_pipeline_setup_inputs(vsp1, pipe);
 	vsp1_du_pipeline_configure(pipe);
+
+done:
 	mutex_unlock(&vsp1->drm->lock);
 }
 EXPORT_SYMBOL_GPL(vsp1_du_atomic_flush);
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index 877496936487..cc1b0d42ce95 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -18,6 +18,7 @@ struct device;
 int vsp1_du_init(struct device *dev);
 
 #define VSP1_DU_STATUS_COMPLETE		BIT(0)
+#define VSP1_DU_STATUS_WRITEBACK	BIT(1)
 
 /**
  * struct vsp1_du_lif_config - VSP LIF configuration
@@ -83,12 +84,26 @@ struct vsp1_du_crc_config {
 	unsigned int index;
 };
 
+/**
+ * struct vsp1_du_writeback_config - VSP writeback configuration parameters
+ * @pixelformat: plane pixel format (V4L2 4CC)
+ * @pitch: line pitch in bytes for the first plane
+ * @mem: DMA memory address for each plane of the frame buffer
+ */
+struct vsp1_du_writeback_config {
+	u32 pixelformat;
+	unsigned int pitch;
+	dma_addr_t mem[3];
+};
+
 /**
  * struct vsp1_du_atomic_pipe_config - VSP atomic pipe configuration parameters
  * @crc: CRC computation configuration
+ * @writeback: writeback configuration
  */
 struct vsp1_du_atomic_pipe_config {
 	struct vsp1_du_crc_config crc;
+	struct vsp1_du_writeback_config writeback;
 };
 
 void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index);
-- 
cgit v1.2.3


From 97eb9eaeb95ba8eb3c7a77a7ec7786d47a1bcaee Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 21 Feb 2019 12:17:32 +0200
Subject: drm: writeback: Cleanup job ownership handling when queuing job

The drm_writeback_queue_job() function takes ownership of the passed job
and requires the caller to manually set the connector state
writeback_job pointer to NULL. To simplify drivers and avoid errors
(such as the missing NULL set in the vc4 driver), pass the connector
state pointer to the function instead of the job pointer, and set the
writeback_job pointer to NULL internally.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Brian Starkey <brian.starkey@arm.com>
Acked-by: Eric Anholt <eric@anholt.net>
Acked-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
---
 drivers/gpu/drm/arm/malidp_mw.c |  3 +--
 drivers/gpu/drm/drm_writeback.c | 15 ++++++++++-----
 drivers/gpu/drm/vc4/vc4_txp.c   |  2 +-
 include/drm/drm_writeback.h     |  2 +-
 4 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 041a64dc7167..87627219ce3b 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -252,8 +252,7 @@ void malidp_mw_atomic_commit(struct drm_device *drm,
 				     &mw_state->addrs[0],
 				     mw_state->format);
 
-		drm_writeback_queue_job(mw_conn, conn_state->writeback_job);
-		conn_state->writeback_job = NULL;
+		drm_writeback_queue_job(mw_conn, conn_state);
 		hwdev->hw->enable_memwrite(hwdev, mw_state->addrs,
 					   mw_state->pitches, mw_state->n_planes,
 					   fb->width, fb->height, mw_state->format,
diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index c20e6fe00cb3..338b993d7c9f 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -242,11 +242,12 @@ EXPORT_SYMBOL(drm_writeback_connector_init);
 /**
  * drm_writeback_queue_job - Queue a writeback job for later signalling
  * @wb_connector: The writeback connector to queue a job on
- * @job: The job to queue
+ * @conn_state: The connector state containing the job to queue
  *
- * This function adds a job to the job_queue for a writeback connector. It
- * should be considered to take ownership of the writeback job, and so any other
- * references to the job must be cleared after calling this function.
+ * This function adds the job contained in @conn_state to the job_queue for a
+ * writeback connector. It takes ownership of the writeback job and sets the
+ * @conn_state->writeback_job to NULL, and so no access to the job may be
+ * performed by the caller after this function returns.
  *
  * Drivers must ensure that for a given writeback connector, jobs are queued in
  * exactly the same order as they will be completed by the hardware (and
@@ -258,10 +259,14 @@ EXPORT_SYMBOL(drm_writeback_connector_init);
  * See also: drm_writeback_signal_completion()
  */
 void drm_writeback_queue_job(struct drm_writeback_connector *wb_connector,
-			     struct drm_writeback_job *job)
+			     struct drm_connector_state *conn_state)
 {
+	struct drm_writeback_job *job;
 	unsigned long flags;
 
+	job = conn_state->writeback_job;
+	conn_state->writeback_job = NULL;
+
 	spin_lock_irqsave(&wb_connector->job_lock, flags);
 	list_add_tail(&job->list_entry, &wb_connector->job_queue);
 	spin_unlock_irqrestore(&wb_connector->job_lock, flags);
diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c
index aa279b5b0de7..5dabd91f2d7e 100644
--- a/drivers/gpu/drm/vc4/vc4_txp.c
+++ b/drivers/gpu/drm/vc4/vc4_txp.c
@@ -327,7 +327,7 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn,
 
 	TXP_WRITE(TXP_DST_CTRL, ctrl);
 
-	drm_writeback_queue_job(&txp->connector, conn_state->writeback_job);
+	drm_writeback_queue_job(&txp->connector, conn_state);
 }
 
 static const struct drm_connector_helper_funcs vc4_txp_connector_helper_funcs = {
diff --git a/include/drm/drm_writeback.h b/include/drm/drm_writeback.h
index 23df9d463003..47662c362743 100644
--- a/include/drm/drm_writeback.h
+++ b/include/drm/drm_writeback.h
@@ -123,7 +123,7 @@ int drm_writeback_connector_init(struct drm_device *dev,
 				 const u32 *formats, int n_formats);
 
 void drm_writeback_queue_job(struct drm_writeback_connector *wb_connector,
-			     struct drm_writeback_job *job);
+			     struct drm_connector_state *conn_state);
 
 void drm_writeback_cleanup_job(struct drm_writeback_job *job);
 
-- 
cgit v1.2.3


From 9d2230dc1351d54953a94e4ba9b746f8a0408a12 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 21 Feb 2019 03:01:38 +0200
Subject: drm: writeback: Add job prepare and cleanup operations

As writeback jobs contain a framebuffer, drivers may need to prepare and
cleanup them the same way they can prepare and cleanup framebuffers for
planes. Add two new optional connector helper operations,
.prepare_writeback_job() and .cleanup_writeback_job() to support this.

The job prepare operation is called from
drm_atomic_helper_prepare_planes() to avoid a new atomic commit helper
that would need to be called by all drivers not using
drm_atomic_helper_commit(). The job cleanup operation is called from the
existing drm_writeback_cleanup_job() function, invoked both when
destroying the job as part of a aborted commit, or when the job
completes.

The drm_writeback_job structure is extended with a priv field to let
drivers store per-job data, such as mappings related to the writeback
framebuffer.

For internal plumbing reasons the drm_writeback_job structure needs to
store a back-pointer to the drm_writeback_connector. To avoid pushing
too much writeback-specific knowledge to drm_atomic_uapi.c, create a
drm_writeback_set_fb() function, move the writeback job setup code
there, and set the connector backpointer. The prepare_signaling()
function doesn't need to allocate writeback jobs and can ignore
connectors without a job, as it is called after the writeback jobs are
allocated to store framebuffers, and a writeback fence with a
framebuffer is an invalid configuration that gets rejected by the commit
check.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
---
 drivers/gpu/drm/drm_atomic_helper.c      | 11 ++++++++
 drivers/gpu/drm/drm_atomic_uapi.c        | 31 ++++++----------------
 drivers/gpu/drm/drm_writeback.c          | 44 ++++++++++++++++++++++++++++++++
 include/drm/drm_modeset_helper_vtables.h |  7 +++++
 include/drm/drm_writeback.h              | 28 +++++++++++++++++++-
 5 files changed, 97 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 40ac19848034..f89641216a44 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2261,10 +2261,21 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_cleanup_done);
 int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 				     struct drm_atomic_state *state)
 {
+	struct drm_connector *connector;
+	struct drm_connector_state *new_conn_state;
 	struct drm_plane *plane;
 	struct drm_plane_state *new_plane_state;
 	int ret, i, j;
 
+	for_each_new_connector_in_state(state, connector, new_conn_state, i) {
+		if (!new_conn_state->writeback_job)
+			continue;
+
+		ret = drm_writeback_prepare_job(new_conn_state->writeback_job);
+		if (ret < 0)
+			return ret;
+	}
+
 	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs;
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index 0aabd401d3ca..8fa77def577f 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -647,28 +647,15 @@ drm_atomic_plane_get_property(struct drm_plane *plane,
 	return 0;
 }
 
-static struct drm_writeback_job *
-drm_atomic_get_writeback_job(struct drm_connector_state *conn_state)
-{
-	WARN_ON(conn_state->connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK);
-
-	if (!conn_state->writeback_job)
-		conn_state->writeback_job =
-			kzalloc(sizeof(*conn_state->writeback_job), GFP_KERNEL);
-
-	return conn_state->writeback_job;
-}
-
 static int drm_atomic_set_writeback_fb_for_connector(
 		struct drm_connector_state *conn_state,
 		struct drm_framebuffer *fb)
 {
-	struct drm_writeback_job *job =
-		drm_atomic_get_writeback_job(conn_state);
-	if (!job)
-		return -ENOMEM;
+	int ret;
 
-	drm_framebuffer_assign(&job->fb, fb);
+	ret = drm_writeback_set_fb(conn_state, fb);
+	if (ret < 0)
+		return ret;
 
 	if (fb)
 		DRM_DEBUG_ATOMIC("Set [FB:%d] for connector state %p\n",
@@ -1158,19 +1145,17 @@ static int prepare_signaling(struct drm_device *dev,
 
 	for_each_new_connector_in_state(state, conn, conn_state, i) {
 		struct drm_writeback_connector *wb_conn;
-		struct drm_writeback_job *job;
 		struct drm_out_fence_state *f;
 		struct dma_fence *fence;
 		s32 __user *fence_ptr;
 
+		if (!conn_state->writeback_job)
+			continue;
+
 		fence_ptr = get_out_fence_for_connector(state, conn);
 		if (!fence_ptr)
 			continue;
 
-		job = drm_atomic_get_writeback_job(conn_state);
-		if (!job)
-			return -ENOMEM;
-
 		f = krealloc(*fence_state, sizeof(**fence_state) *
 			     (*num_fences + 1), GFP_KERNEL);
 		if (!f)
@@ -1192,7 +1177,7 @@ static int prepare_signaling(struct drm_device *dev,
 			return ret;
 		}
 
-		job->out_fence = fence;
+		conn_state->writeback_job->out_fence = fence;
 	}
 
 	/*
diff --git a/drivers/gpu/drm/drm_writeback.c b/drivers/gpu/drm/drm_writeback.c
index 1b497d3530b5..79ac014701c8 100644
--- a/drivers/gpu/drm/drm_writeback.c
+++ b/drivers/gpu/drm/drm_writeback.c
@@ -239,6 +239,43 @@ fail:
 }
 EXPORT_SYMBOL(drm_writeback_connector_init);
 
+int drm_writeback_set_fb(struct drm_connector_state *conn_state,
+			 struct drm_framebuffer *fb)
+{
+	WARN_ON(conn_state->connector->connector_type != DRM_MODE_CONNECTOR_WRITEBACK);
+
+	if (!conn_state->writeback_job) {
+		conn_state->writeback_job =
+			kzalloc(sizeof(*conn_state->writeback_job), GFP_KERNEL);
+		if (!conn_state->writeback_job)
+			return -ENOMEM;
+
+		conn_state->writeback_job->connector =
+			drm_connector_to_writeback(conn_state->connector);
+	}
+
+	drm_framebuffer_assign(&conn_state->writeback_job->fb, fb);
+	return 0;
+}
+
+int drm_writeback_prepare_job(struct drm_writeback_job *job)
+{
+	struct drm_writeback_connector *connector = job->connector;
+	const struct drm_connector_helper_funcs *funcs =
+		connector->base.helper_private;
+	int ret;
+
+	if (funcs->prepare_writeback_job) {
+		ret = funcs->prepare_writeback_job(connector, job);
+		if (ret < 0)
+			return ret;
+	}
+
+	job->prepared = true;
+	return 0;
+}
+EXPORT_SYMBOL(drm_writeback_prepare_job);
+
 /**
  * drm_writeback_queue_job - Queue a writeback job for later signalling
  * @wb_connector: The writeback connector to queue a job on
@@ -275,6 +312,13 @@ EXPORT_SYMBOL(drm_writeback_queue_job);
 
 void drm_writeback_cleanup_job(struct drm_writeback_job *job)
 {
+	struct drm_writeback_connector *connector = job->connector;
+	const struct drm_connector_helper_funcs *funcs =
+		connector->base.helper_private;
+
+	if (job->prepared && funcs->cleanup_writeback_job)
+		funcs->cleanup_writeback_job(connector, job);
+
 	if (job->fb)
 		drm_framebuffer_put(job->fb);
 
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index cfb7be40bed7..8f3602811eb5 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -49,6 +49,8 @@
  */
 
 enum mode_set_atomic;
+struct drm_writeback_connector;
+struct drm_writeback_job;
 
 /**
  * struct drm_crtc_helper_funcs - helper operations for CRTCs
@@ -989,6 +991,11 @@ struct drm_connector_helper_funcs {
 	 */
 	void (*atomic_commit)(struct drm_connector *connector,
 			      struct drm_connector_state *state);
+
+	int (*prepare_writeback_job)(struct drm_writeback_connector *connector,
+				     struct drm_writeback_job *job);
+	void (*cleanup_writeback_job)(struct drm_writeback_connector *connector,
+				      struct drm_writeback_job *job);
 };
 
 /**
diff --git a/include/drm/drm_writeback.h b/include/drm/drm_writeback.h
index 47662c362743..777c14c847f0 100644
--- a/include/drm/drm_writeback.h
+++ b/include/drm/drm_writeback.h
@@ -79,6 +79,20 @@ struct drm_writeback_connector {
 };
 
 struct drm_writeback_job {
+	/**
+	 * @connector:
+	 *
+	 * Back-pointer to the writeback connector associated with the job
+	 */
+	struct drm_writeback_connector *connector;
+
+	/**
+	 * @prepared:
+	 *
+	 * Set when the job has been prepared with drm_writeback_prepare_job()
+	 */
+	bool prepared;
+
 	/**
 	 * @cleanup_work:
 	 *
@@ -98,7 +112,7 @@ struct drm_writeback_job {
 	 * @fb:
 	 *
 	 * Framebuffer to be written to by the writeback connector. Do not set
-	 * directly, use drm_atomic_set_writeback_fb_for_connector()
+	 * directly, use drm_writeback_set_fb()
 	 */
 	struct drm_framebuffer *fb;
 
@@ -108,6 +122,13 @@ struct drm_writeback_job {
 	 * Fence which will signal once the writeback has completed
 	 */
 	struct dma_fence *out_fence;
+
+	/**
+	 * @priv:
+	 *
+	 * Driver-private data
+	 */
+	void *priv;
 };
 
 static inline struct drm_writeback_connector *
@@ -122,6 +143,11 @@ int drm_writeback_connector_init(struct drm_device *dev,
 				 const struct drm_encoder_helper_funcs *enc_helper_funcs,
 				 const u32 *formats, int n_formats);
 
+int drm_writeback_set_fb(struct drm_connector_state *conn_state,
+			 struct drm_framebuffer *fb);
+
+int drm_writeback_prepare_job(struct drm_writeback_job *job);
+
 void drm_writeback_queue_job(struct drm_writeback_connector *wb_connector,
 			     struct drm_connector_state *conn_state);
 
-- 
cgit v1.2.3


From 875f1d0769cdcfe1596ff0ca609b453359e42ec9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 27 Feb 2019 13:05:25 -0700
Subject: iov_iter: add ITER_BVEC_FLAG_NO_REF flag

For ITER_BVEC, if we're holding on to kernel pages, the caller
doesn't need to grab a reference to the bvec pages, and drop that
same reference on IO completion. This is essentially safe for any
ITER_BVEC, but some use cases end up reusing pages and uncondtionally
dropping a page reference on completion. And example of that is
sendfile(2), that ends up being a splice_in + splice_out on the
pipe pages.

Add a flag that tells us it's fine to not grab a page reference
to the bvec pages, since that caller knows not to drop a reference
when it's done with the pages.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c       |  3 +++
 include/linux/uio.h | 24 +++++++++++++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4c6a5e60ddbe..c592a0933b0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
 	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
 	if (offset)
 		iov_iter_advance(iter, offset);
+
+	/* don't drop a reference to these pages */
+	iter->type |= ITER_BVEC_FLAG_NO_REF;
 	return 0;
 }
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ecf584f6b82d..4e926641fa80 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -23,14 +23,23 @@ struct kvec {
 };
 
 enum iter_type {
-	ITER_IOVEC = 0,
-	ITER_KVEC = 2,
-	ITER_BVEC = 4,
-	ITER_PIPE = 8,
-	ITER_DISCARD = 16,
+	/* set if ITER_BVEC doesn't hold a bv_page ref */
+	ITER_BVEC_FLAG_NO_REF = 2,
+
+	/* iter types */
+	ITER_IOVEC = 4,
+	ITER_KVEC = 8,
+	ITER_BVEC = 16,
+	ITER_PIPE = 32,
+	ITER_DISCARD = 64,
 };
 
 struct iov_iter {
+	/*
+	 * Bit 0 is the read/write bit, set if we're writing.
+	 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
+	 * the caller isn't expecting to drop a page reference when done.
+	 */
 	unsigned int type;
 	size_t iov_offset;
 	size_t count;
@@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 	return i->type & (READ | WRITE);
 }
 
+static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
+{
+	return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
-- 
cgit v1.2.3


From 399254aaf4892113c806816f7e64cf40c804d46d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 27 Feb 2019 13:13:23 -0700
Subject: block: add BIO_NO_PAGE_REF flag

If bio_iov_iter_get_pages() is called on an iov_iter that is flagged
with NO_REF, then we don't need to add a page reference for the pages
that we add.

Add BIO_NO_PAGE_REF to track this in the bio, so IO completion knows
not to drop a reference to these pages.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 43 ++++++++++++++++++++++++-------------------
 fs/block_dev.c            | 12 +++++++-----
 fs/iomap.c                | 12 +++++++-----
 include/linux/blk_types.h |  1 +
 4 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 71a78d9fb8b7..b64cedc7f87c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
 	size = bio_add_page(bio, bv->bv_page, len,
 				bv->bv_offset + iter->iov_offset);
 	if (size == len) {
-		struct page *page;
-		int i;
+		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+			struct page *page;
+			int i;
+
+			mp_bvec_for_each_page(page, bv, i)
+				get_page(page);
+		}
 
-		/*
-		 * For the normal O_DIRECT case, we could skip grabbing this
-		 * reference and then not have to put them again when IO
-		 * completes. But this breaks some in-kernel users, like
-		 * splicing to/from a loop device, where we release the pipe
-		 * pages unconditionally. If we can fix that case, we can
-		 * get rid of the get here and the need to call
-		 * bio_release_pages() at IO completion time.
-		 */
-		mp_bvec_for_each_page(page, bv, i)
-			get_page(page);
 		iov_iter_advance(iter, size);
 		return 0;
 	}
@@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  * This takes either an iterator pointing to user memory, or one pointing to
  * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
  * map them into the kernel. On IO completion, the caller should put those
- * pages. For now, when adding kernel pages, we still grab a reference to the
- * page. This isn't strictly needed for the common case, but some call paths
- * end up releasing pages from eg a pipe and we can't easily control these.
- * See comment in __bio_iov_bvec_add_pages().
+ * pages. If we're adding kernel pages, and the caller told us it's safe to
+ * do so, we just have to add the pages to the bio directly. We don't grab an
+ * extra reference to those pages (the user should already have that), and we
+ * don't put the page on IO completion. The caller needs to check if the bio is
+ * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be
+ * released.
  *
  * The function tries, but does not guarantee, to pin as many pages as
  * fit into the bio, or are requested in *iter, whatever is smaller. If
@@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	const bool is_bvec = iov_iter_is_bvec(iter);
 	unsigned short orig_vcnt = bio->bi_vcnt;
 
+	/*
+	 * If this is a BVEC iter, then the pages are kernel pages. Don't
+	 * release them on IO completion, if the caller asked us to.
+	 */
+	if (is_bvec && iov_iter_bvec_no_ref(iter))
+		bio_set_flag(bio, BIO_NO_PAGE_REF);
+
 	do {
 		int ret;
 
@@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work)
 		next = bio->bi_private;
 
 		bio_set_pages_dirty(bio);
-		bio_release_pages(bio);
+		if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+			bio_release_pages(bio);
 		bio_put(bio);
 	}
 }
@@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio)
 			goto defer;
 	}
 
-	bio_release_pages(bio);
+	if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+		bio_release_pages(bio);
 	bio_put(bio);
 	return;
 defer:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index e9faa52bb489..78d3257435c0 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio)
 	if (should_dirty) {
 		bio_check_pages_dirty(bio);
 	} else {
-		struct bio_vec *bvec;
-		int i;
-		struct bvec_iter_all iter_all;
+		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+			struct bvec_iter_all iter_all;
+			struct bio_vec *bvec;
+			int i;
 
-		bio_for_each_segment_all(bvec, bio, i, iter_all)
-			put_page(bvec->bv_page);
+			bio_for_each_segment_all(bvec, bio, i, iter_all)
+				put_page(bvec->bv_page);
+		}
 		bio_put(bio);
 	}
 }
diff --git a/fs/iomap.c b/fs/iomap.c
index 97cb9d486a7d..abdd18e404f8 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 	if (should_dirty) {
 		bio_check_pages_dirty(bio);
 	} else {
-		struct bio_vec *bvec;
-		int i;
-		struct bvec_iter_all iter_all;
+		if (!bio_flagged(bio, BIO_NO_PAGE_REF)) {
+			struct bvec_iter_all iter_all;
+			struct bio_vec *bvec;
+			int i;
 
-		bio_for_each_segment_all(bvec, bio, i, iter_all)
-			put_page(bvec->bv_page);
+			bio_for_each_segment_all(bvec, bio, i, iter_all)
+				put_page(bvec->bv_page);
+		}
 		bio_put(bio);
 	}
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d66bf5f32610..791fee35df88 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -215,6 +215,7 @@ struct bio {
 /*
  * bio flags
  */
+#define BIO_NO_PAGE_REF	0	/* don't put release vec pages */
 #define BIO_SEG_VALID	1	/* bi_phys_segments valid */
 #define BIO_CLONED	2	/* doesn't own data */
 #define BIO_BOUNCED	3	/* bio is a bounce bio */
-- 
cgit v1.2.3


From 9496c015ed39ddfce971d63a1442e6d258504a7d Mon Sep 17 00:00:00 2001
From: Dongli Zhang <dongli.zhang@oracle.com>
Date: Tue, 19 Mar 2019 23:05:18 +0800
Subject: blk-mq: remove unused 'nr_expired' from blk_mq_hw_ctx

There is no usage of 'nr_expired'.

The 'nr_expired' was introduced by commit 1d9bd5161ba3 ("blk-mq: replace
timeout synchronization with a RCU and generation based scheme"). Its usage
was removed since commit 12f5b9314545 ("blk-mq: Remove generation
seqeunce").

Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b0c814bcc7e3..35359697318b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -57,7 +57,6 @@ struct blk_mq_hw_ctx {
 	unsigned int		queue_num;
 
 	atomic_t		nr_active;
-	unsigned int		nr_expired;
 
 	struct hlist_node	cpuhp_dead;
 	struct kobject		kobj;
-- 
cgit v1.2.3


From aa8e2435b3d4878a0db8917805e86f61001fe174 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 7 Feb 2019 09:59:28 +0100
Subject: drm/ttm: Define a single DRM_FILE_PAGE_OFFSET constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most TTM drivers define the constant DRM_FILE_PAGE_OFFSET of the same
value. The only exception is vboxvideo, which is being converted to the
new offset by this patch. Unifying the constants in a single place
simplifies the driver code.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 2 --
 drivers/gpu/drm/ast/ast_drv.h               | 2 --
 drivers/gpu/drm/bochs/bochs.h               | 2 --
 drivers/gpu/drm/cirrus/cirrus_drv.h         | 1 -
 drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c | 2 --
 drivers/gpu/drm/mgag200/mgag200_drv.h       | 1 -
 drivers/gpu/drm/nouveau/nouveau_drv.h       | 2 --
 drivers/gpu/drm/qxl/qxl_drv.h               | 3 ---
 drivers/gpu/drm/radeon/radeon_ttm.c         | 2 --
 drivers/gpu/drm/virtio/virtgpu_ttm.c        | 2 --
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c         | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h         | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c    | 2 +-
 drivers/staging/vboxvideo/vbox_drv.h        | 2 --
 include/drm/ttm/ttm_bo_driver.h             | 2 ++
 15 files changed, 4 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fee73ff00153..d5349c631ffc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -51,8 +51,6 @@
 #include "amdgpu_sdma.h"
 #include "bif/bif_4_1_d.h"
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 			     struct ttm_mem_reg *mem, unsigned num_pages,
 			     uint64_t offset, unsigned window,
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index bfc65040dfcb..c43b927f73af 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -353,8 +353,6 @@ extern int ast_dumb_mmap_offset(struct drm_file *file,
 				uint32_t handle,
 				uint64_t *offset);
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 int ast_mm_init(struct ast_private *ast);
 void ast_mm_fini(struct ast_private *ast);
 
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 03711394f1ed..7425d083b944 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -101,8 +101,6 @@ static inline struct bochs_bo *gem_to_bochs_bo(struct drm_gem_object *gem)
 	return container_of(gem, struct bochs_bo, gem);
 }
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 static inline u64 bochs_bo_mmap_offset(struct bochs_bo *bo)
 {
 	return drm_vma_node_offset_addr(&bo->bo.vma_node);
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
index f2b2e0d169fa..ba77507e9bad 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.h
@@ -169,7 +169,6 @@ cirrus_bo(struct ttm_buffer_object *bo)
 
 
 #define to_cirrus_obj(x) container_of(x, struct cirrus_gem_object, base)
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 
 				/* cirrus_main.c */
 int cirrus_device_init(struct cirrus_device *cdev,
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index dd383267884c..a06725c2bbfa 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -21,8 +21,6 @@
 
 #include "hibmc_drm_drv.h"
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 static inline struct hibmc_drm_private *
 hibmc_bdev(struct ttm_bo_device *bd)
 {
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 0aaedc554879..706c902aad2d 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -269,7 +269,6 @@ mgag200_dumb_mmap_offset(struct drm_file *file,
 struct mga_i2c_chan *mgag200_i2c_create(struct drm_device *dev);
 void mgag200_i2c_destroy(struct mga_i2c_chan *i2c);
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 void mgag200_ttm_placement(struct mgag200_bo *bo, int domain);
 
 static inline int mgag200_bo_reserve(struct mgag200_bo *bo, bool no_wait)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index da847244479d..35ff0ca01a3b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -60,8 +60,6 @@
 struct nouveau_channel;
 struct platform_device;
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 #include "nouveau_fence.h"
 #include "nouveau_bios.h"
 #include "nouveau_vmm.h"
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 4a0331b3ff7d..2896bb6fdbf4 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -65,9 +65,6 @@
 extern int qxl_num_crtc;
 extern int qxl_max_ioctls;
 
-#define DRM_FILE_OFFSET 0x100000000ULL
-#define DRM_FILE_PAGE_OFFSET (DRM_FILE_OFFSET >> PAGE_SHIFT)
-
 #define QXL_INTERRUPT_MASK (\
 	QXL_INTERRUPT_DISPLAY |\
 	QXL_INTERRUPT_CURSOR |\
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 9920a6fc11bf..071a872e5b02 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -45,8 +45,6 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 static int radeon_ttm_debugfs_init(struct radeon_device *rdev);
 static void radeon_ttm_debugfs_fini(struct radeon_device *rdev);
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 4bfbf25fabff..64f5b8dec3e9 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -37,8 +37,6 @@
 
 #include <linux/delay.h>
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 static struct
 virtio_gpu_device *virtio_gpu_get_vgdev(struct ttm_bo_device *bdev)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 6165fe2c4504..a31d38e2216f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -851,7 +851,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	ret = ttm_bo_device_init(&dev_priv->bdev,
 				 &vmw_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 VMWGFX_FILE_PAGE_OFFSET,
+				 DRM_FILE_PAGE_OFFSET,
 				 false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index accb2fafe2f1..6302c12c2298 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -48,7 +48,6 @@
 #define VMWGFX_DRIVER_MAJOR 2
 #define VMWGFX_DRIVER_MINOR 15
 #define VMWGFX_DRIVER_PATCHLEVEL 0
-#define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
 #define VMWGFX_MAX_VALIDATIONS 2048
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index e6d75e377dd8..ec5b4b237f2c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -33,7 +33,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_file *file_priv;
 	struct vmw_private *dev_priv;
 
-	if (unlikely(vma->vm_pgoff < VMWGFX_FILE_PAGE_OFFSET)) {
+	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) {
 		DRM_ERROR("Illegal attempt to mmap old fifo space.\n");
 		return -EINVAL;
 	}
diff --git a/drivers/staging/vboxvideo/vbox_drv.h b/drivers/staging/vboxvideo/vbox_drv.h
index eba5a8bc9cea..77f2a4e9000e 100644
--- a/drivers/staging/vboxvideo/vbox_drv.h
+++ b/drivers/staging/vboxvideo/vbox_drv.h
@@ -209,8 +209,6 @@ int vbox_dumb_mmap_offset(struct drm_file *file,
 			  struct drm_device *dev,
 			  u32 handle, u64 *offset);
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 int vbox_mm_init(struct vbox_private *vbox);
 void vbox_mm_fini(struct vbox_private *vbox);
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cbf3180cb612..c0bed72492f3 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -49,6 +49,8 @@
 #define TTM_MEMTYPE_FLAG_MAPPABLE      (1 << 1)	/* Memory mappable */
 #define TTM_MEMTYPE_FLAG_CMA           (1 << 3)	/* Can't map aperture */
 
+#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
+
 struct ttm_mem_type_manager;
 
 struct ttm_mem_type_manager_func {
-- 
cgit v1.2.3


From 7d1500f9fbfc78e6a34be3f8f4b5ba13e8227199 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 7 Feb 2019 09:59:29 +0100
Subject: drm/ttm: Remove file_page_offset parameter from ttm_bo_device_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The parameter file_page_offset is a constant shared by all drivers. Just
replace it with the constant itself.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     | 1 -
 drivers/gpu/drm/ast/ast_ttm.c               | 1 -
 drivers/gpu/drm/bochs/bochs_mm.c            | 1 -
 drivers/gpu/drm/cirrus/cirrus_ttm.c         | 1 -
 drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c | 1 -
 drivers/gpu/drm/mgag200/mgag200_ttm.c       | 1 -
 drivers/gpu/drm/nouveau/nouveau_ttm.c       | 1 -
 drivers/gpu/drm/qxl/qxl_ttm.c               | 2 +-
 drivers/gpu/drm/radeon/radeon_ttm.c         | 1 -
 drivers/gpu/drm/ttm/ttm_bo.c                | 4 ++--
 drivers/gpu/drm/virtio/virtgpu_ttm.c        | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c         | 1 -
 drivers/staging/vboxvideo/vbox_ttm.c        | 2 +-
 include/drm/ttm/ttm_bo_driver.h             | 2 +-
 14 files changed, 6 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index d5349c631ffc..d5140d0085d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1618,7 +1618,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	r = ttm_bo_device_init(&adev->mman.bdev,
 			       &amdgpu_bo_driver,
 			       adev->ddev->anon_inode->i_mapping,
-			       DRM_FILE_PAGE_OFFSET,
 			       adev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index c168d62fe8f9..c410804a5e2c 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -178,7 +178,6 @@ int ast_mm_init(struct ast_private *ast)
 	ret = ttm_bo_device_init(&ast->ttm.bdev,
 				 &ast_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 49463348a07a..ebc9dbd94923 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -156,7 +156,6 @@ int bochs_mm_init(struct bochs_device *bochs)
 	ret = ttm_bo_device_init(&bochs->ttm.bdev,
 				 &bochs_bo_driver,
 				 bochs->dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index e075810b4bd4..223a75ba77a8 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -178,7 +178,6 @@ int cirrus_mm_init(struct cirrus_device *cirrus)
 	ret = ttm_bo_device_init(&cirrus->ttm.bdev,
 				 &cirrus_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
index a06725c2bbfa..845488922865 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_ttm.c
@@ -189,7 +189,6 @@ int hibmc_mm_init(struct hibmc_drm_private *hibmc)
 	ret = ttm_bo_device_init(&hibmc->bdev,
 				 &hibmc_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("error initializing bo driver: %d\n", ret);
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index d96a9b32455e..ce739da5161d 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -178,7 +178,6 @@ int mgag200_mm_init(struct mga_device *mdev)
 	ret = ttm_bo_device_init(&mdev->ttm.bdev,
 				 &mgag200_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 1543c2f8d3d3..c66467232604 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -239,7 +239,6 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	ret = ttm_bo_device_init(&drm->ttm.bdev,
 				  &nouveau_bo_driver,
 				  dev->anon_inode->i_mapping,
-				  DRM_FILE_PAGE_OFFSET,
 				  drm->client.mmu.dmabits <= 32 ? true : false);
 	if (ret) {
 		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 92f5db5b296f..6b3b92d1f17f 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -328,7 +328,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
 	r = ttm_bo_device_init(&qdev->mman.bdev,
 			       &qxl_bo_driver,
 			       qdev->ddev.anon_inode->i_mapping,
-			       DRM_FILE_PAGE_OFFSET, 0);
+			       false);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 071a872e5b02..7c10e1560780 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -790,7 +790,6 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       &radeon_bo_driver,
 			       rdev->ddev->anon_inode->i_mapping,
-			       DRM_FILE_PAGE_OFFSET,
 			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3f56647cdb35..e1b65775f3ee 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1624,7 +1624,6 @@ EXPORT_SYMBOL(ttm_bo_device_release);
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_bo_driver *driver,
 		       struct address_space *mapping,
-		       uint64_t file_page_offset,
 		       bool need_dma32)
 {
 	struct ttm_bo_global *glob = &ttm_bo_glob;
@@ -1646,7 +1645,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	if (unlikely(ret != 0))
 		goto out_no_sys;
 
-	drm_vma_offset_manager_init(&bdev->vma_manager, file_page_offset,
+	drm_vma_offset_manager_init(&bdev->vma_manager,
+				    DRM_FILE_PAGE_OFFSET,
 				    0x10000000);
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
 	INIT_LIST_HEAD(&bdev->ddestroy);
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 64f5b8dec3e9..d6225ba20b30 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -328,7 +328,7 @@ int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev)
 	r = ttm_bo_device_init(&vgdev->mman.bdev,
 			       &virtio_gpu_bo_driver,
 			       vgdev->ddev->anon_inode->i_mapping,
-			       DRM_FILE_PAGE_OFFSET, 0);
+			       false);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		goto err_dev_init;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a31d38e2216f..be25ce9440ad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -851,7 +851,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	ret = ttm_bo_device_init(&dev_priv->bdev,
 				 &vmw_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET,
 				 false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed initializing TTM buffer object driver.\n");
diff --git a/drivers/staging/vboxvideo/vbox_ttm.c b/drivers/staging/vboxvideo/vbox_ttm.c
index 30f270027acf..b7db7a7952d4 100644
--- a/drivers/staging/vboxvideo/vbox_ttm.c
+++ b/drivers/staging/vboxvideo/vbox_ttm.c
@@ -156,7 +156,7 @@ int vbox_mm_init(struct vbox_private *vbox)
 	ret = ttm_bo_device_init(&vbox->ttm.bdev,
 				 &vbox_bo_driver,
 				 dev->anon_inode->i_mapping,
-				 DRM_FILE_PAGE_OFFSET, true);
+				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
 		return ret;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index c0bed72492f3..ff0ec43fce7f 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -599,7 +599,7 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev);
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_bo_driver *driver,
 		       struct address_space *mapping,
-		       uint64_t file_page_offset, bool need_dma32);
+		       bool need_dma32);
 
 /**
  * ttm_bo_unmap_virtual
-- 
cgit v1.2.3


From bf141a88b329d12c9aaea4f7e7a8fdbd2dfe987d Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Thu, 7 Feb 2019 09:59:31 +0100
Subject: drm: Use the same mmap-range offset and size for GEM and TTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GEM defines DRM_FILE_PAGE_OFFSET_{START,SIZE} constants for the
mmap-able range of addresses. TTM can use them as well.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Christian König <christian.koenig@amd.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/drm_gem.c       | 17 -----------------
 drivers/gpu/drm/ttm/ttm_bo.c    |  4 ++--
 drivers/gpu/drm/ttm/ttm_bo_vm.c |  2 +-
 include/drm/drm_vma_manager.h   | 12 ++++++++++++
 include/drm/ttm/ttm_bo_driver.h |  2 --
 5 files changed, 15 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index d0b9f6a9953f..ea988509ae8f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -72,23 +72,6 @@
  * up at a later date, and as our interface with shmfs for memory allocation.
  */
 
-/*
- * We make up offsets for buffer objects so we can recognize them at
- * mmap time.
- */
-
-/* pgoff in mmap is an unsigned long, so we need to make sure that
- * the faked up offset will fit
- */
-
-#if BITS_PER_LONG == 64
-#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
-#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
-#else
-#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1)
-#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16)
-#endif
-
 /**
  * drm_gem_init - Initialize the GEM device fields
  * @dev: drm_devic structure to initialize
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e1b65775f3ee..988416fb8a0b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1646,8 +1646,8 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		goto out_no_sys;
 
 	drm_vma_offset_manager_init(&bdev->vma_manager,
-				    DRM_FILE_PAGE_OFFSET,
-				    0x10000000);
+				    DRM_FILE_PAGE_OFFSET_START,
+				    DRM_FILE_PAGE_OFFSET_SIZE);
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = mapping;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index e94c2ab3718a..6dacff49c1cc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -432,7 +432,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	struct ttm_buffer_object *bo;
 	int ret;
 
-	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
+	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
 		return -EINVAL;
 
 	bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
index c7987daeaed0..f4f8ff1cdeec 100644
--- a/include/drm/drm_vma_manager.h
+++ b/include/drm/drm_vma_manager.h
@@ -29,6 +29,18 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
+/* We make up offsets for buffer objects so we can recognize them at
+ * mmap time. pgoff in mmap is an unsigned long, so we need to make sure
+ * that the faked up offset will fit
+ */
+#if BITS_PER_LONG == 64
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFFUL >> PAGE_SHIFT) * 16)
+#else
+#define DRM_FILE_PAGE_OFFSET_START ((0xFFFFFFFUL >> PAGE_SHIFT) + 1)
+#define DRM_FILE_PAGE_OFFSET_SIZE ((0xFFFFFFFUL >> PAGE_SHIFT) * 16)
+#endif
+
 struct drm_file;
 
 struct drm_vma_offset_file {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ff0ec43fce7f..61b80ec78e80 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -49,8 +49,6 @@
 #define TTM_MEMTYPE_FLAG_MAPPABLE      (1 << 1)	/* Memory mappable */
 #define TTM_MEMTYPE_FLAG_CMA           (1 << 3)	/* Can't map aperture */
 
-#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
-
 struct ttm_mem_type_manager;
 
 struct ttm_mem_type_manager_func {
-- 
cgit v1.2.3


From 9b9ca62ddeec541bc7633410dacccefd96f0cc68 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Tue, 20 Nov 2018 10:22:36 +0800
Subject: drm/amdgpu: export ta fw info

Output the ta fw, aka xgmi/ras, via debugfs.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 21 +++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index b65e18101108..4d0ccca891cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -295,6 +295,17 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->ver = adev->pm.fw_version;
 		fw_info->feature = 0;
 		break;
+	case AMDGPU_INFO_FW_TA:
+		if (query_fw->index > 1)
+			return -EINVAL;
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->psp.ta_fw_version;
+			fw_info->feature = adev->psp.ta_xgmi_ucode_version;
+		} else {
+			fw_info->ver = adev->psp.ta_fw_version;
+			fw_info->feature = adev->psp.ta_ras_ucode_version;
+		}
+		break;
 	case AMDGPU_INFO_FW_SDMA:
 		if (query_fw->index >= adev->sdma.num_instances)
 			return -EINVAL;
@@ -1327,6 +1338,16 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
 	seq_printf(m, "ASD feature version: %u, firmware version: 0x%08x\n",
 		   fw_info.feature, fw_info.ver);
 
+	query_fw.fw_type = AMDGPU_INFO_FW_TA;
+	for (i = 0; i < 2; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			continue;
+		seq_printf(m, "TA %s feature version: %u, firmware version: 0x%08x\n",
+				i ? "RAS" : "XGMI", fw_info.feature, fw_info.ver);
+	}
+
 	/* SMC */
 	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
 	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 4a53f6cfa034..e5275d4481f5 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -680,6 +680,7 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
 	/* Subquery id: Query DMCU firmware version */
 	#define AMDGPU_INFO_FW_DMCU		0x12
+	#define AMDGPU_INFO_FW_TA		0x13
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
 /* the used VRAM size */
-- 
cgit v1.2.3


From ae363a212b145a0d4e0507e26e6c1b937181ec34 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Mon, 17 Dec 2018 14:31:12 +0800
Subject: drm/amdgpu: Add a new flag to AMDGPU_CTX_OP_QUERY_STATE2

Add AMDGPU_CTX_QUERY2_FLAGS_RAS_CE/UE which indicate if any error happened
between previous query and this query.

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 17 +++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  2 ++
 include/uapi/drm/amdgpu_drm.h           |  3 +++
 3 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 7b526593eb77..736ed1d67ec2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -26,6 +26,7 @@
 #include <drm/drm_auth.h>
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
+#include "amdgpu_ras.h"
 
 #define to_amdgpu_ctx_entity(e)	\
 	container_of((e), struct amdgpu_ctx_entity, entity)
@@ -344,6 +345,7 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 {
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx_mgr *mgr;
+	uint32_t ras_counter;
 
 	if (!fpriv)
 		return -EINVAL;
@@ -368,6 +370,21 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev,
 	if (atomic_read(&ctx->guilty))
 		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;
 
+	/*query ue count*/
+	ras_counter = amdgpu_ras_query_error_count(adev, false);
+	/*ras counter is monotonic increasing*/
+	if (ras_counter != ctx->ras_counter_ue) {
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
+		ctx->ras_counter_ue = ras_counter;
+	}
+
+	/*query ce count*/
+	ras_counter = amdgpu_ras_query_error_count(adev, true);
+	if (ras_counter != ctx->ras_counter_ce) {
+		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
+		ctx->ras_counter_ce = ras_counter;
+	}
+
 	mutex_unlock(&mgr->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index b3b012c0a7da..8e561daa64cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -49,6 +49,8 @@ struct amdgpu_ctx {
 	enum drm_sched_priority		override_priority;
 	struct mutex			lock;
 	atomic_t			guilty;
+	uint32_t			ras_counter_ce;
+	uint32_t			ras_counter_ue;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index e5275d4481f5..722598b25f37 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -210,6 +210,9 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1)
 /* indicate some job from this context once cause gpu hang */
 #define AMDGPU_CTX_QUERY2_FLAGS_GUILTY   (1<<2)
+/* indicate some errors are detected by RAS */
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE   (1<<3)
+#define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE   (1<<4)
 
 /* Context priority level */
 #define AMDGPU_CTX_PRIORITY_UNSET       -2048
-- 
cgit v1.2.3


From 5cb771143ed9552c672b6e1d6a3005a435b54e44 Mon Sep 17 00:00:00 2001
From: xinhui pan <xinhui.pan@amd.com>
Date: Mon, 17 Dec 2018 15:09:07 +0800
Subject: drm/amdgpu: add ioctl query for enabled ras features (v2)

Add a query for userspace to check which RAS features
are enabled.

v2: squash in warning fix

Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ++++++++++
 include/uapi/drm/amdgpu_drm.h           | 31 +++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 4d0ccca891cb..df40f35db89c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -39,6 +39,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_display.h"
+#include "amdgpu_ras.h"
 
 static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
@@ -919,6 +920,15 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_VRAM_LOST_COUNTER:
 		ui32 = atomic_read(&adev->vram_lost_counter);
 		return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0;
+	case AMDGPU_INFO_RAS_ENABLED_FEATURES: {
+		struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+		if (!ras)
+			return -EINVAL;
+		return copy_to_user(out, &ras->features,
+				min_t(u32, size, sizeof(ras->features))) ?
+			-EFAULT : 0;
+	}
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->query);
 		return -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 722598b25f37..e3a97da4add9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -737,6 +737,37 @@ struct drm_amdgpu_cs_chunk_data {
 /* Number of VRAM page faults on CPU access. */
 #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS	0x1E
 #define AMDGPU_INFO_VRAM_LOST_COUNTER		0x1F
+/* query ras mask of enabled features*/
+#define AMDGPU_INFO_RAS_ENABLED_FEATURES	0x20
+
+/* RAS MASK: UMC (VRAM) */
+#define AMDGPU_INFO_RAS_ENABLED_UMC			(1 << 0)
+/* RAS MASK: SDMA */
+#define AMDGPU_INFO_RAS_ENABLED_SDMA			(1 << 1)
+/* RAS MASK: GFX */
+#define AMDGPU_INFO_RAS_ENABLED_GFX			(1 << 2)
+/* RAS MASK: MMHUB */
+#define AMDGPU_INFO_RAS_ENABLED_MMHUB			(1 << 3)
+/* RAS MASK: ATHUB */
+#define AMDGPU_INFO_RAS_ENABLED_ATHUB			(1 << 4)
+/* RAS MASK: PCIE */
+#define AMDGPU_INFO_RAS_ENABLED_PCIE			(1 << 5)
+/* RAS MASK: HDP */
+#define AMDGPU_INFO_RAS_ENABLED_HDP			(1 << 6)
+/* RAS MASK: XGMI */
+#define AMDGPU_INFO_RAS_ENABLED_XGMI			(1 << 7)
+/* RAS MASK: DF */
+#define AMDGPU_INFO_RAS_ENABLED_DF			(1 << 8)
+/* RAS MASK: SMN */
+#define AMDGPU_INFO_RAS_ENABLED_SMN			(1 << 9)
+/* RAS MASK: SEM */
+#define AMDGPU_INFO_RAS_ENABLED_SEM			(1 << 10)
+/* RAS MASK: MP0 */
+#define AMDGPU_INFO_RAS_ENABLED_MP0			(1 << 11)
+/* RAS MASK: MP1 */
+#define AMDGPU_INFO_RAS_ENABLED_MP1			(1 << 12)
+/* RAS MASK: FUSE */
+#define AMDGPU_INFO_RAS_ENABLED_FUSE			(1 << 13)
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT	0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK	0xff
-- 
cgit v1.2.3


From 9b54d2017687df9fa827faf9e4022973b87fc0ff Mon Sep 17 00:00:00 2001
From: Eric Huang <JinhuiEric.Huang@amd.com>
Date: Fri, 11 Jan 2019 14:38:51 -0500
Subject: drm/amdkfd: add RAS ECC event support (v3)

RAS ECC event will combine with GPU reset event, due to
ECC interrupts are caused by uncorrectable error that triggers
GPU reset.

v2: Fix misleading-indentation warning
v3: fix build with CONFIG_HSA_AMD disabled

Signed-off-by: Eric Huang <JinhuiEric.Huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  1 +
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |  1 +
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c     |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 11 +++++++++++
 drivers/gpu/drm/amd/amdkfd/kfd_events.c    | 18 +++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  3 +++
 include/uapi/linux/kfd_ioctl.h             | 12 +++++++++++-
 9 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index fe1d7368c1e6..acf8ae0cee9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -640,4 +640,8 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
 }
+
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0e1711a75b68..e6a503760b62 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -229,5 +229,6 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm);
 int kgd2kfd_resume_mm(struct mm_struct *mm);
 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 					       struct dma_fence *fence);
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 88c45f990f05..6bb71f6ee18e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4805,6 +4805,7 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
 		struct amdgpu_iv_entry *entry)
 {
 	/* TODO ue will trigger an interrupt. */
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 	amdgpu_ras_reset_gpu(adev, 0);
 	return AMDGPU_RAS_UE;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2daa5ea1c2ea..0252345a1f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -354,6 +354,7 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
 static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
 		struct amdgpu_iv_entry *entry)
 {
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
 	amdgpu_ras_reset_gpu(adev, 0);
 	return AMDGPU_RAS_UE;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 058b9daec514..f7a6fafd70ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1851,6 +1851,8 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev,
 		return 0;
 	}
 
+	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
 	amdgpu_ras_reset_gpu(adev, 0);
 
 	return AMDGPU_RAS_UE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 8be9677c0c07..b3cdbf79f47b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -466,6 +466,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 	memset(&kfd->doorbell_available_index, 0,
 		sizeof(kfd->doorbell_available_index));
 
+	atomic_set(&kfd->sram_ecc_flag, 0);
+
 	return kfd;
 }
 
@@ -661,6 +663,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
 		return ret;
 	count = atomic_dec_return(&kfd_locked);
 	WARN_ONCE(count != 0, "KFD reset ref. error");
+
+	atomic_set(&kfd->sram_ecc_flag, 0);
+
 	return 0;
 }
 
@@ -1024,6 +1029,12 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
 	return 0;
 }
 
+void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
+{
+	if (kfd)
+		atomic_inc(&kfd->sram_ecc_flag);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 /* This function will send a package to HIQ to hang the HWS
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index e9f0e0a1b41c..6e1d41c5bf86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1011,25 +1011,41 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
 void kfd_signal_reset_event(struct kfd_dev *dev)
 {
 	struct kfd_hsa_hw_exception_data hw_exception_data;
+	struct kfd_hsa_memory_exception_data memory_exception_data;
 	struct kfd_process *p;
 	struct kfd_event *ev;
 	unsigned int temp;
 	uint32_t id, idx;
+	int reset_cause = atomic_read(&dev->sram_ecc_flag) ?
+			KFD_HW_EXCEPTION_ECC :
+			KFD_HW_EXCEPTION_GPU_HANG;
 
 	/* Whole gpu reset caused by GPU hang and memory is lost */
 	memset(&hw_exception_data, 0, sizeof(hw_exception_data));
 	hw_exception_data.gpu_id = dev->id;
 	hw_exception_data.memory_lost = 1;
+	hw_exception_data.reset_cause = reset_cause;
+
+	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
+	memory_exception_data.ErrorType = KFD_MEM_ERR_SRAM_ECC;
+	memory_exception_data.gpu_id = dev->id;
+	memory_exception_data.failure.imprecise = true;
 
 	idx = srcu_read_lock(&kfd_processes_srcu);
 	hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
 		mutex_lock(&p->event_mutex);
 		id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-		idr_for_each_entry_continue(&p->event_idr, ev, id)
+		idr_for_each_entry_continue(&p->event_idr, ev, id) {
 			if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
 				ev->hw_exception_data = hw_exception_data;
 				set_event(ev);
 			}
+			if (ev->type == KFD_EVENT_TYPE_MEMORY &&
+			    reset_cause == KFD_HW_EXCEPTION_ECC) {
+				ev->memory_exception_data = memory_exception_data;
+				set_event(ev);
+			}
+		}
 		mutex_unlock(&p->event_mutex);
 	}
 	srcu_read_unlock(&kfd_processes_srcu, idx);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0eeee3c6d6dc..9e0230965675 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -276,6 +276,9 @@ struct kfd_dev {
 	uint64_t hive_id;
 
 	bool pci_atomic_requested;
+
+	/* SRAM ECC flag */
+	atomic_t sram_ecc_flag;
 };
 
 enum kfd_mempool {
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index e622fd1fbd46..dc067ed0b72d 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -211,6 +211,11 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_HW_EXCEPTION_GPU_HANG	0
 #define KFD_HW_EXCEPTION_ECC		1
 
+/* For kfd_hsa_memory_exception_data.ErrorType */
+#define KFD_MEM_ERR_NO_RAS		0
+#define KFD_MEM_ERR_SRAM_ECC		1
+#define KFD_MEM_ERR_POISON_CONSUMED	2
+#define KFD_MEM_ERR_GPU_HANG		3
 
 struct kfd_ioctl_create_event_args {
 	__u64 event_page_offset;	/* from KFD */
@@ -250,7 +255,12 @@ struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
 	__u64 va;
 	__u32 gpu_id;
-	__u32 pad;
+	__u32 ErrorType; /* 0 = no RAS error,
+			  * 1 = ECC_SRAM,
+			  * 2 = Link_SYNFLOOD (poison),
+			  * 3 = GPU hang (not attributable to a specific cause),
+			  * other values reserved
+			  */
 };
 
 /* hw exception data */
-- 
cgit v1.2.3


From a7b4deeb02b978bc59808cb13c93ba84f01023a4 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Mon, 18 Mar 2019 13:01:32 -0700
Subject: drm/i915/cml: Add CML PCI IDS

Comet Lake is a Intel Processor containing Gen9
Intel HD Graphics. This patch adds the initial set of
PCI IDs. Comet Lake comes off of Coffee Lake - adding
the IDs to Coffee Lake ID list.

More support and features will be in the patches that follow.

v2: Split IDs according to GT. (Rodrigo)

v3: Update IDs.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190318200133.9666-1-anusha.srivatsa@intel.com
---
 drivers/gpu/drm/i915/i915_pci.c |  2 ++
 include/drm/i915_pciids.h       | 28 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index ef7410c492fd..7a6054eadb8e 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -795,6 +795,8 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_WHL_U_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_AML_CFL_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_WHL_U_GT3_IDS(&intel_coffeelake_gt3_info),
+	INTEL_CML_GT1_IDS(&intel_coffeelake_gt1_info),
+	INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CNL_IDS(&intel_cannonlake_info),
 	INTEL_ICL_11_IDS(&intel_icelake_11_info),
 	{0, 0, 0}
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index d200000feeaa..291b5e3fa59c 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -373,6 +373,30 @@
 #define INTEL_AML_CFL_GT2_IDS(info) \
 	INTEL_VGA_DEVICE(0x87CA, info)
 
+/* CML GT1 */
+#define INTEL_CML_GT1_IDS(info)	\
+	INTEL_VGA_DEVICE(0x9B21, info), \
+	INTEL_VGA_DEVICE(0x9BAA, info), \
+	INTEL_VGA_DEVICE(0x9BAB, info), \
+	INTEL_VGA_DEVICE(0x9BAC, info), \
+	INTEL_VGA_DEVICE(0x9BA0, info), \
+	INTEL_VGA_DEVICE(0x9BA5, info), \
+	INTEL_VGA_DEVICE(0x9BA8, info), \
+	INTEL_VGA_DEVICE(0x9BA4, info), \
+	INTEL_VGA_DEVICE(0x9BA2, info)
+
+/* CML GT2 */
+#define INTEL_CML_GT2_IDS(info)	\
+	INTEL_VGA_DEVICE(0x9B41, info), \
+	INTEL_VGA_DEVICE(0x9BCA, info), \
+	INTEL_VGA_DEVICE(0x9BCB, info), \
+	INTEL_VGA_DEVICE(0x9BCC, info), \
+	INTEL_VGA_DEVICE(0x9BC0, info), \
+	INTEL_VGA_DEVICE(0x9BC5, info), \
+	INTEL_VGA_DEVICE(0x9BC8, info), \
+	INTEL_VGA_DEVICE(0x9BC4, info), \
+	INTEL_VGA_DEVICE(0x9BC2, info)
+
 #define INTEL_KBL_IDS(info) \
 	INTEL_KBL_GT1_IDS(info), \
 	INTEL_KBL_GT2_IDS(info), \
@@ -436,7 +460,9 @@
 	INTEL_WHL_U_GT1_IDS(info), \
 	INTEL_WHL_U_GT2_IDS(info), \
 	INTEL_WHL_U_GT3_IDS(info), \
-	INTEL_AML_CFL_GT2_IDS(info)
+	INTEL_AML_CFL_GT2_IDS(info), \
+	INTEL_CML_GT1_IDS(info), \
+	INTEL_CML_GT2_IDS(info)
 
 /* CNL */
 #define INTEL_CNL_IDS(info) \
-- 
cgit v1.2.3


From bb229bbb3bf63d23128e851a1f3b85c083178fa1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 20 Mar 2019 09:46:58 +0100
Subject: libceph: wait for latest osdmap in ceph_monc_blacklist_add()

Because map updates are distributed lazily, an OSD may not know about
the new blacklist for quite some time after "osd blacklist add" command
is completed.  This makes it possible for a blacklisted but still alive
client to overwrite a post-blacklist update, resulting in data
corruption.

Waiting for latest osdmap in ceph_monc_blacklist_add() and thus using
the post-blacklist epoch for all post-blacklist requests ensures that
all such requests "wait" for the blacklist to come into force on their
respective OSDs.

Cc: stable@vger.kernel.org
Fixes: 6305a3b41515 ("libceph: support for blacklisting clients")
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 include/linux/ceph/libceph.h |  2 ++
 net/ceph/ceph_common.c       | 18 +++++++++++++++++-
 net/ceph/mon_client.c        |  9 +++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index a420c07904bc..337d5049ff93 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -294,6 +294,8 @@ extern void ceph_destroy_client(struct ceph_client *client);
 extern int __ceph_open_session(struct ceph_client *client,
 			       unsigned long started);
 extern int ceph_open_session(struct ceph_client *client);
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+				unsigned long timeout);
 
 /* pagevec.c */
 extern void ceph_release_page_vector(struct page **pages, int num_pages);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 9cab80207ced..79eac465ec65 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started)
 }
 EXPORT_SYMBOL(__ceph_open_session);
 
-
 int ceph_open_session(struct ceph_client *client)
 {
 	int ret;
@@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client)
 }
 EXPORT_SYMBOL(ceph_open_session);
 
+int ceph_wait_for_latest_osdmap(struct ceph_client *client,
+				unsigned long timeout)
+{
+	u64 newest_epoch;
+	int ret;
+
+	ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch);
+	if (ret)
+		return ret;
+
+	if (client->osdc.osdmap->epoch >= newest_epoch)
+		return 0;
+
+	ceph_osdc_maybe_request_map(&client->osdc);
+	return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout);
+}
+EXPORT_SYMBOL(ceph_wait_for_latest_osdmap);
 
 static int __init init_ceph_lib(void)
 {
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 18deb3d889c4..a53e4fbb6319 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc,
 	mutex_unlock(&monc->mutex);
 
 	ret = wait_generic_request(req);
+	if (!ret)
+		/*
+		 * Make sure we have the osdmap that includes the blacklist
+		 * entry.  This is needed to ensure that the OSDs pick up the
+		 * new blacklist before processing any future requests from
+		 * this client.
+		 */
+		ret = ceph_wait_for_latest_osdmap(monc->client, 0);
+
 out:
 	put_generic_request(req);
 	return ret;
-- 
cgit v1.2.3


From 29ece8b4354f8c5eaee798a3d8a1b356efee426f Mon Sep 17 00:00:00 2001
From: Yufen Yu <yuyufen@huawei.com>
Date: Mon, 18 Mar 2019 22:44:41 +0800
Subject: block: add BLK_MQ_POLL_CLASSIC for hybrid poll and return EINVAL for
 unexpected value

For q->poll_nsec == -1, means doing classic poll, not hybrid poll.
We introduce a new flag BLK_MQ_POLL_CLASSIC to replace -1, which
may make code much easier to read.

Additionally, since val is an int obtained with kstrtoint(), val can be
a negative value other than -1, so return -EINVAL for that case.

Thanks to Damien Le Moal for some good suggestion.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         |  4 ++--
 block/blk-sysfs.c      | 12 +++++++-----
 include/linux/blkdev.h |  3 +++
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ea01c23b58a3..76a3f78c566a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2856,7 +2856,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	/*
 	 * Default to classic polling
 	 */
-	q->poll_nsec = -1;
+	q->poll_nsec = BLK_MQ_POLL_CLASSIC;
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 	blk_mq_add_queue_tag_set(set, q);
@@ -3391,7 +3391,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q,
 {
 	struct request *rq;
 
-	if (q->poll_nsec == -1)
+	if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
 		return false;
 
 	if (!blk_qc_t_is_internal(cookie))
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 59685918167e..422327089e0f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
 {
 	int val;
 
-	if (q->poll_nsec == -1)
-		val = -1;
+	if (q->poll_nsec == BLK_MQ_POLL_CLASSIC)
+		val = BLK_MQ_POLL_CLASSIC;
 	else
 		val = q->poll_nsec / 1000;
 
@@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
 	if (err < 0)
 		return err;
 
-	if (val == -1)
-		q->poll_nsec = -1;
-	else
+	if (val == BLK_MQ_POLL_CLASSIC)
+		q->poll_nsec = BLK_MQ_POLL_CLASSIC;
+	else if (val >= 0)
 		q->poll_nsec = val * 1000;
+	else
+		return -EINVAL;
 
 	return count;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0de92b29f589..5c58a3b2bf00 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -50,6 +50,9 @@ struct blk_stat_callback;
 /* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
 
+/* Doing classic polling */
+#define BLK_MQ_POLL_CLASSIC -1
+
 /*
  * Maximum number of blkcg policies allowed to be registered concurrently.
  * Defined here to simplify include dependency.
-- 
cgit v1.2.3


From e6c987120e24cb913cb7bd4e675129a30fa49e0d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 20 Mar 2019 13:14:37 -0700
Subject: block: Unexport blk_mq_add_to_requeue_list()

This function is not used outside the block layer core. Hence unexport it.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 1 -
 block/blk-mq.h         | 2 ++
 include/linux/blk-mq.h | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 76a3f78c566a..70b210a308c4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -782,7 +782,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
 	if (kick_requeue_list)
 		blk_mq_kick_requeue_list(q);
 }
-EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
 
 void blk_mq_kick_requeue_list(struct request_queue *q)
 {
diff --git a/block/blk-mq.h b/block/blk-mq.h
index c11353a3749d..0ed8e5a8729f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+				bool kick_requeue_list);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
 bool blk_mq_get_driver_tag(struct request *rq);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 35359697318b..cb2aa7ecafff 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -299,8 +299,6 @@ void blk_mq_end_request(struct request *rq, blk_status_t error);
 void __blk_mq_end_request(struct request *rq, blk_status_t error);
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
-				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
-- 
cgit v1.2.3


From 551417af91b163bd697eb50b3601adae2177c28a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 18 Mar 2019 14:51:23 +0800
Subject: genirq: Fix typo in comment of IRQD_MOVE_PCNTXT

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Dou Liyang <douliyangs@gmail.com>
Cc: Julien Thierry <julien.thierry@arm.com>
Link: https://lkml.kernel.org/r/20190318065123.11862-1-peterx@redhat.com
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d6160d479b14..7ae8de5ad0f2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -195,7 +195,7 @@ struct irq_data {
  * IRQD_LEVEL			- Interrupt is level triggered
  * IRQD_WAKEUP_STATE		- Interrupt is configured for wakeup
  *				  from suspend
- * IRDQ_MOVE_PCNTXT		- Interrupt can be moved in process
+ * IRQD_MOVE_PCNTXT		- Interrupt can be moved in process
  *				  context
  * IRQD_IRQ_DISABLED		- Disabled state of the interrupt
  * IRQD_IRQ_MASKED		- Masked state of the interrupt
-- 
cgit v1.2.3


From 9d1305ef80b95dde0337106ed8b826604e2155ad Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 22 Mar 2019 09:23:22 +0000
Subject: drm/i915: Introduce the i915_user_extension_method

An idea for extending uABI inspired by Vulkan's extension chains.
Instead of expanding the data struct for each ioctl every time we need
to add a new feature, define an extension chain instead. As we add
optional interfaces to control the ioctl, we define a new extension
struct that can be linked into the ioctl data only when required by the
user. The key advantage being able to ignore large control structs for
optional interfaces/extensions, while being able to process them in a
consistent manner.

In comparison to other extensible ioctls, the key difference is the
use of a linked chain of extension structs vs an array of tagged
pointers. For example,

struct drm_amdgpu_cs_chunk {
        __u32           chunk_id;
        __u32           length_dw;
        __u64           chunk_data;
};

struct drm_amdgpu_cs_in {
        __u32           ctx_id;
        __u32           bo_list_handle;
        __u32           num_chunks;
        __u32           _pad;
        __u64           chunks;
};

allows userspace to pass in array of pointers to extension structs, but
must therefore keep constructing that array along side the command stream.
In dynamic situations like that, a linked list is preferred and does not
similar from extra cache line misses as the extension structs themselves
must still be loaded separate to the chunks array.

v2: Apply the tail call optimisation directly to nip the worry of stack
overflow in the bud.
v3: Defend against recursion.
v4: Fixup local types to match new uabi

Opens:
- do we include the result as an out-field in each chain?
struct i915_user_extension {
	__u64 next_extension;
	__u64 name;
	__s32 result;
	__u32 mbz; /* reserved for future use */
};
* Undecided, so provision some room for future expansion.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/Makefile               |  1 +
 drivers/gpu/drm/i915/i915_user_extensions.c | 61 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_user_extensions.h | 20 ++++++++++
 drivers/gpu/drm/i915/i915_utils.h           | 31 +++++++++++++++
 include/uapi/drm/i915_drm.h                 | 22 +++++++++++
 5 files changed, 135 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.c
 create mode 100644 drivers/gpu/drm/i915/i915_user_extensions.h

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 68fecf355471..60de05f3fa60 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -46,6 +46,7 @@ i915-y := i915_drv.o \
 	  i915_sw_fence.o \
 	  i915_syncmap.o \
 	  i915_sysfs.o \
+	  i915_user_extensions.o \
 	  intel_csr.o \
 	  intel_device_info.o \
 	  intel_pm.o \
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
new file mode 100644
index 000000000000..c822d0aafd2d
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include <linux/nospec.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_user_extensions.h"
+#include "i915_utils.h"
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned int count,
+			 void *data)
+{
+	unsigned int stackdepth = 512;
+
+	while (ext) {
+		int i, err;
+		u32 name;
+		u64 next;
+
+		if (!stackdepth--) /* recursion vs useful flexibility */
+			return -E2BIG;
+
+		err = check_user_mbz(&ext->flags);
+		if (err)
+			return err;
+
+		for (i = 0; i < ARRAY_SIZE(ext->rsvd); i++) {
+			err = check_user_mbz(&ext->rsvd[i]);
+			if (err)
+				return err;
+		}
+
+		if (get_user(name, &ext->name))
+			return -EFAULT;
+
+		err = -EINVAL;
+		if (name < count) {
+			name = array_index_nospec(name, count);
+			if (tbl[name])
+				err = tbl[name](ext, data);
+		}
+		if (err)
+			return err;
+
+		if (get_user(next, &ext->next_extension) ||
+		    overflows_type(next, ext))
+			return -EFAULT;
+
+		ext = u64_to_user_ptr(next);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.h b/drivers/gpu/drm/i915/i915_user_extensions.h
new file mode 100644
index 000000000000..a14bf6bba9a1
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_user_extensions.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef I915_USER_EXTENSIONS_H
+#define I915_USER_EXTENSIONS_H
+
+struct i915_user_extension;
+
+typedef int (*i915_user_extension_fn)(struct i915_user_extension __user *ext,
+				      void *data);
+
+int i915_user_extensions(struct i915_user_extension __user *ext,
+			 const i915_user_extension_fn *tbl,
+			 unsigned int count,
+			 void *data);
+
+#endif /* I915_USER_EXTENSIONS_H */
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 9726df37c4c4..8baedcaddbba 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -105,6 +105,37 @@
 	__T;								\
 })
 
+/*
+ * container_of_user: Extract the superclass from a pointer to a member.
+ *
+ * Exactly like container_of() with the exception that it plays nicely
+ * with sparse for __user @ptr.
+ */
+#define container_of_user(ptr, type, member) ({				\
+	void __user *__mptr = (void __user *)(ptr);			\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type __user *)(__mptr - offsetof(type, member))); })
+
+/*
+ * check_user_mbz: Check that a user value exists and is zero
+ *
+ * Frequently in our uABI we reserve space for future extensions, and
+ * two ensure that userspace is prepared we enforce that space must
+ * be zero. (Then any future extension can safely assume a default value
+ * of 0.)
+ *
+ * check_user_mbz() combines checking that the user pointer is accessible
+ * and that the contained value is zero.
+ *
+ * Returns: -EFAULT if not accessible, -EINVAL if !zero, or 0 on success.
+ */
+#define check_user_mbz(U) ({						\
+	typeof(*(U)) mbz__;						\
+	get_user(mbz__, (U)) ? -EFAULT : mbz__ ? -EINVAL : 0;		\
+})
+
 static inline u64 ptr_to_u64(const void *ptr)
 {
 	return (uintptr_t)ptr;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index aa2d4c73a97d..1c69ed16a923 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,28 @@ extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * i915_user_extension: Base class for defining a chain of extensions
+ *
+ * Many interfaces need to grow over time. In most cases we can simply
+ * extend the struct and have userspace pass in more data. Another option,
+ * as demonstrated by Vulkan's approach to providing extensions for forward
+ * and backward compatibility, is to use a list of optional structs to
+ * provide those extra details.
+ *
+ * The key advantage to using an extension chain is that it allows us to
+ * redefine the interface more easily than an ever growing struct of
+ * increasing complexity, and for large parts of that interface to be
+ * entirely optional. The downside is more pointer chasing; chasing across
+ * the __user boundary with pointers encapsulated inside u64.
+ */
+struct i915_user_extension {
+	__u64 next_extension;
+	__u32 name;
+	__u32 flags; /* All undefined bits must be zero. */
+	__u32 rsvd[4]; /* Reserved for future use; must be zero. */
+};
+
 /*
  * MOCS indexes used for GPU surfaces, defining the cacheability of the
  * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
-- 
cgit v1.2.3


From e0695db7298ec2181d38bd3bfc5d0ff7a46f96f3 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 22 Mar 2019 09:23:23 +0000
Subject: drm/i915: Create/destroy VM (ppGTT) for use with contexts

In preparation to making the ppGTT binding for a context explicit (to
facilitate reusing the same ppGTT between different contexts), allow the
user to create and destroy named ppGTT.

v2: Replace global barrier for swapping over the ppgtt and tlbs with a
local context barrier (Tvrtko)
v3: serialise with struct_mutex; it's lazy but required dammit
v4: Rewrite igt_ctx_shared_exec to be more different (aimed to be more
similarly, turned out different!)

v5: Fix up test unwind for aliasing-ppgtt (snb)
v6: Tighten language for uapi struct drm_i915_gem_vm_control.
v7: Patch the context image for runtime ppgtt switching!

Testcase: igt/gem_vm_create
Testcase: igt/gem_ctx_param/vm
Testcase: igt/gem_ctx_clone/vm
Testcase: igt/gem_ctx_shared
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-2-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.c                   |   2 +
 drivers/gpu/drm/i915/i915_drv.h                   |   3 +
 drivers/gpu/drm/i915/i915_gem_context.c           | 331 +++++++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_context.h           |   5 +
 drivers/gpu/drm/i915/i915_gem_gtt.c               |  19 +-
 drivers/gpu/drm/i915/i915_gem_gtt.h               |  10 +-
 drivers/gpu/drm/i915/selftests/huge_pages.c       |   1 -
 drivers/gpu/drm/i915/selftests/i915_gem_context.c | 238 ++++++++++++----
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c     |   1 -
 drivers/gpu/drm/i915/selftests/mock_context.c     |   8 +-
 include/uapi/drm/i915_drm.h                       |  43 +++
 11 files changed, 580 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d01689737f1e..f6403ba1d56a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3122,6 +3122,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fefcb39aefc4..f723b15527f8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -220,6 +220,9 @@ struct drm_i915_file_private {
 	struct idr context_idr;
 	struct mutex context_idr_lock; /* guards context_idr */
 
+	struct idr vm_idr;
+	struct mutex vm_idr_lock; /* guards vm_idr */
+
 	unsigned int bsd_engine;
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 00dec72f6875..d0a56c8d0bb9 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -90,6 +90,7 @@
 #include "i915_drv.h"
 #include "i915_globals.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
@@ -120,12 +121,15 @@ static void lut_close(struct i915_gem_context *ctx)
 		list_del(&lut->obj_link);
 		i915_lut_handle_free(lut);
 	}
+	INIT_LIST_HEAD(&ctx->handles_list);
 
 	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
 		struct i915_vma *vma = rcu_dereference_raw(*slot);
 
 		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+
+		vma->open_count--;
 		__i915_gem_object_release_unless_active(vma->obj);
 	}
 	rcu_read_unlock();
@@ -305,8 +309,6 @@ static void context_close(struct i915_gem_context *ctx)
 	 * the ppgtt).
 	 */
 	lut_close(ctx);
-	if (ctx->ppgtt)
-		i915_ppgtt_close(&ctx->ppgtt->vm);
 
 	ctx->file_priv = ERR_PTR(-EBADF);
 	i915_gem_context_put(ctx);
@@ -378,6 +380,28 @@ __create_context(struct drm_i915_private *dev_priv)
 	return ctx;
 }
 
+static struct i915_hw_ppgtt *
+__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
+{
+	struct i915_hw_ppgtt *old = ctx->ppgtt;
+
+	ctx->ppgtt = i915_ppgtt_get(ppgtt);
+	ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
+
+	return old;
+}
+
+static void __assign_ppgtt(struct i915_gem_context *ctx,
+			   struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt == ctx->ppgtt)
+		return;
+
+	ppgtt = __set_ppgtt(ctx, ppgtt);
+	if (ppgtt)
+		i915_ppgtt_put(ppgtt);
+}
+
 static struct i915_gem_context *
 i915_gem_create_context(struct drm_i915_private *dev_priv)
 {
@@ -403,8 +427,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv)
 			return ERR_CAST(ppgtt);
 		}
 
-		ctx->ppgtt = ppgtt;
-		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
+		__assign_ppgtt(ctx, ppgtt);
+		i915_ppgtt_put(ppgtt);
 	}
 
 	trace_i915_context_create(ctx);
@@ -583,6 +607,12 @@ static int context_idr_cleanup(int id, void *p, void *data)
 	return 0;
 }
 
+static int vm_idr_cleanup(int id, void *p, void *data)
+{
+	i915_ppgtt_put(p);
+	return 0;
+}
+
 static int gem_context_register(struct i915_gem_context *ctx,
 				struct drm_i915_file_private *fpriv)
 {
@@ -621,8 +651,11 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	struct i915_gem_context *ctx;
 	int err;
 
-	idr_init(&file_priv->context_idr);
 	mutex_init(&file_priv->context_idr_lock);
+	mutex_init(&file_priv->vm_idr_lock);
+
+	idr_init(&file_priv->context_idr);
+	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
 	ctx = i915_gem_create_context(i915);
@@ -646,8 +679,10 @@ err_ctx:
 	context_close(ctx);
 	mutex_unlock(&i915->drm.struct_mutex);
 err:
-	mutex_destroy(&file_priv->context_idr_lock);
+	idr_destroy(&file_priv->vm_idr);
 	idr_destroy(&file_priv->context_idr);
+	mutex_destroy(&file_priv->vm_idr_lock);
+	mutex_destroy(&file_priv->context_idr_lock);
 	return PTR_ERR(ctx);
 }
 
@@ -660,6 +695,99 @@ void i915_gem_context_close(struct drm_file *file)
 	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
 	idr_destroy(&file_priv->context_idr);
 	mutex_destroy(&file_priv->context_idr_lock);
+
+	idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
+	idr_destroy(&file_priv->vm_idr);
+	mutex_destroy(&file_priv->vm_idr_lock);
+}
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_vm_control *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (args->flags)
+		return -EINVAL;
+
+	ppgtt = i915_ppgtt_create(i915);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	ppgtt->vm.file = file_priv;
+
+	if (args->extensions) {
+		err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   NULL, 0,
+					   ppgtt);
+		if (err)
+			goto err_put;
+	}
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		goto err_put;
+
+	err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+	if (err < 0)
+		goto err_unlock;
+
+	GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
+	ppgtt->user_handle = err;
+
+	mutex_unlock(&file_priv->vm_idr_lock);
+
+	args->vm_id = err;
+	return 0;
+
+err_unlock:
+	mutex_unlock(&file_priv->vm_idr_lock);
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_vm_control *args = data;
+	struct i915_hw_ppgtt *ppgtt;
+	int err;
+	u32 id;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	id = args->vm_id;
+	if (!id)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_remove(&file_priv->vm_idr, id);
+	if (ppgtt) {
+		GEM_BUG_ON(ppgtt->user_handle != id);
+		ppgtt->user_handle = 0;
+	}
+
+	mutex_unlock(&file_priv->vm_idr_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	i915_ppgtt_put(ppgtt);
+	return 0;
 }
 
 static struct i915_request *
@@ -702,12 +830,13 @@ static void cb_retire(struct i915_active *base)
 I915_SELFTEST_DECLARE(static unsigned long context_barrier_inject_fault);
 static int context_barrier_task(struct i915_gem_context *ctx,
 				unsigned long engines,
+				int (*emit)(struct i915_request *rq, void *data),
 				void (*task)(void *data),
 				void *data)
 {
 	struct drm_i915_private *i915 = ctx->i915;
 	struct context_barrier_task *cb;
-	struct intel_context *ce;
+	struct intel_context *ce, *next;
 	intel_wakeref_t wakeref;
 	int err = 0;
 
@@ -722,11 +851,11 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 	i915_active_acquire(&cb->base);
 
 	wakeref = intel_runtime_pm_get(i915);
-	list_for_each_entry(ce, &ctx->active_engines, active_link) {
+	rbtree_postorder_for_each_entry_safe(ce, next, &ctx->hw_contexts, node) {
 		struct intel_engine_cs *engine = ce->engine;
 		struct i915_request *rq;
 
-		if (!(ce->engine->mask & engines))
+		if (!(engine->mask & engines))
 			continue;
 
 		if (I915_SELFTEST_ONLY(context_barrier_inject_fault &
@@ -741,7 +870,12 @@ static int context_barrier_task(struct i915_gem_context *ctx,
 			break;
 		}
 
-		err = i915_active_ref(&cb->base, rq->fence.context, rq);
+		err = 0;
+		if (emit)
+			err = emit(rq, data);
+		if (err == 0)
+			err = i915_active_ref(&cb->base, rq->fence.context, rq);
+
 		i915_request_add(rq);
 		if (err)
 			break;
@@ -804,6 +938,170 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
 	return 0;
 }
 
+static int get_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	/* XXX rcu acquire? */
+	ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (ret)
+		return ret;
+
+	ppgtt = i915_ppgtt_get(ctx->ppgtt);
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+	ret = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (ret)
+		goto err_put;
+
+	if (!ppgtt->user_handle) {
+		ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
+		GEM_BUG_ON(!ret);
+		if (ret < 0)
+			goto err_unlock;
+
+		ppgtt->user_handle = ret;
+		i915_ppgtt_get(ppgtt);
+	}
+
+	args->size = 0;
+	args->value = ppgtt->user_handle;
+
+	ret = 0;
+err_unlock:
+	mutex_unlock(&file_priv->vm_idr_lock);
+err_put:
+	i915_ppgtt_put(ppgtt);
+	return ret;
+}
+
+static void set_ppgtt_barrier(void *data)
+{
+	struct i915_hw_ppgtt *old = data;
+
+	if (INTEL_GEN(old->vm.i915) < 8)
+		gen6_ppgtt_unpin_all(old);
+
+	i915_ppgtt_put(old);
+}
+
+static int emit_ppgtt_update(struct i915_request *rq, void *data)
+{
+	struct i915_hw_ppgtt *ppgtt = rq->gem_context->ppgtt;
+	struct intel_engine_cs *engine = rq->engine;
+	u32 *cs;
+	int i;
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4);
+
+		cs = intel_ring_begin(rq, 6);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2);
+
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, 0));
+		*cs++ = upper_32_bits(pd_daddr);
+		*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, 0));
+		*cs++ = lower_32_bits(pd_daddr);
+
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else if (HAS_LOGICAL_RING_CONTEXTS(engine->i915)) {
+		cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2);
+		if (IS_ERR(cs))
+			return PTR_ERR(cs);
+
+		*cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES);
+		for (i = GEN8_3LVL_PDPES; i--; ) {
+			const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
+
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i));
+			*cs++ = upper_32_bits(pd_daddr);
+			*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i));
+			*cs++ = lower_32_bits(pd_daddr);
+		}
+		*cs++ = MI_NOOP;
+		intel_ring_advance(rq, cs);
+	} else {
+		/* ppGTT is not part of the legacy context image */
+		gen6_ppgtt_pin(ppgtt);
+	}
+
+	return 0;
+}
+
+static int set_ppgtt(struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_file_private *file_priv = ctx->file_priv;
+	struct i915_hw_ppgtt *ppgtt, *old;
+	int err;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!ctx->ppgtt)
+		return -ENODEV;
+
+	if (upper_32_bits(args->value))
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&file_priv->vm_idr_lock);
+	if (err)
+		return err;
+
+	ppgtt = idr_find(&file_priv->vm_idr, args->value);
+	if (ppgtt) {
+		GEM_BUG_ON(ppgtt->user_handle != args->value);
+		i915_ppgtt_get(ppgtt);
+	}
+	mutex_unlock(&file_priv->vm_idr_lock);
+	if (!ppgtt)
+		return -ENOENT;
+
+	err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
+	if (err)
+		goto out;
+
+	if (ppgtt == ctx->ppgtt)
+		goto unlock;
+
+	/* Teardown the existing obj:vma cache, it will have to be rebuilt. */
+	lut_close(ctx);
+
+	old = __set_ppgtt(ctx, ppgtt);
+
+	/*
+	 * We need to flush any requests using the current ppgtt before
+	 * we release it as the requests do not hold a reference themselves,
+	 * only indirectly through the context.
+	 */
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   emit_ppgtt_update,
+				   set_ppgtt_barrier,
+				   old);
+	if (err) {
+		ctx->ppgtt = old;
+		ctx->desc_template = default_desc_template(ctx->i915, old);
+		i915_ppgtt_put(ppgtt);
+	}
+
+unlock:
+	mutex_unlock(&ctx->i915->drm.struct_mutex);
+
+out:
+	i915_ppgtt_put(ppgtt);
+	return err;
+}
+
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
@@ -984,6 +1282,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = get_sseu(ctx, args);
 		break;
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1285,9 +1586,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
 			ret = -EINVAL;
@@ -1343,9 +1641,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
+
 	case I915_CONTEXT_PARAM_SSEU:
 		ret = set_sseu(ctx, args);
 		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = set_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7e79d1ffbaef..edc6ba3f0288 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -148,6 +148,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
 struct i915_gem_context *
 i915_gem_context_create_gvt(struct drm_device *dev);
 
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b9e0e3a00223..736c845eb77f 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1937,6 +1937,8 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
 	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
 	int err;
 
+	GEM_BUG_ON(ppgtt->base.vm.closed);
+
 	/*
 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
 	 * which will be pinned into every active context.
@@ -1975,6 +1977,17 @@ void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
 	i915_vma_unpin(ppgtt->vma);
 }
 
+void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base)
+{
+	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+
+	if (!ppgtt->pin_count)
+		return;
+
+	ppgtt->pin_count = 0;
+	i915_vma_unpin(ppgtt->vma);
+}
+
 static struct i915_hw_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915)
 {
 	struct i915_ggtt * const ggtt = &i915->ggtt;
@@ -2082,12 +2095,6 @@ i915_ppgtt_create(struct drm_i915_private *i915)
 	return ppgtt;
 }
 
-void i915_ppgtt_close(struct i915_address_space *vm)
-{
-	GEM_BUG_ON(vm->closed);
-	vm->closed = true;
-}
-
 static void ppgtt_destroy_vma(struct i915_address_space *vm)
 {
 	struct list_head *phases[] = {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index b76ab4c2a0e6..83ded9fc761a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -396,6 +396,8 @@ struct i915_hw_ppgtt {
 		struct i915_page_directory_pointer pdp;	/* GEN8+ */
 		struct i915_page_directory pd;		/* GEN6-7 */
 	};
+
+	u32 user_handle;
 };
 
 struct gen6_hw_ppgtt {
@@ -605,13 +607,12 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
 int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
 
 struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv);
-void i915_ppgtt_close(struct i915_address_space *vm);
 void i915_ppgtt_release(struct kref *kref);
 
-static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
 {
-	if (ppgtt)
-		kref_get(&ppgtt->ref);
+	kref_get(&ppgtt->ref);
+	return ppgtt;
 }
 
 static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
@@ -622,6 +623,7 @@ static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
 
 int gen6_ppgtt_pin(struct i915_hw_ppgtt *base);
 void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base);
+void gen6_ppgtt_unpin_all(struct i915_hw_ppgtt *base);
 
 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv);
 void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index c5c8ba6c059f..90721b54e7ae 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1732,7 +1732,6 @@ int i915_gem_huge_page_mock_selftests(void)
 	err = i915_subtests(tests, ppgtt);
 
 out_close:
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 
 out_unlock:
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
index fb6dc54ce7ff..45f73b8b4e6d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
@@ -373,7 +373,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
 	return 0;
 }
 
-static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
 {
 	unsigned int n, m, needs_flush;
 	int err;
@@ -391,8 +392,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (m = 0; m < max; m++) {
 			if (map[m] != m) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], m);
+				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -400,8 +403,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
 
 		for (; m < DW_PER_PAGE; m++) {
 			if (map[m] != STACK_MAGIC) {
-				pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
-				       n, m, map[m], STACK_MAGIC);
+				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
 				err = -EINVAL;
 				goto out_unmap;
 			}
@@ -479,12 +483,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
 static int igt_ctx_exec(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
-	struct drm_i915_gem_object *obj = NULL;
-	unsigned long ncontexts, ndwords, dw;
-	struct igt_live_test t;
-	struct drm_file *file;
-	IGT_TIMEOUT(end_time);
-	LIST_HEAD(objects);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int err = -ENODEV;
 
 	/*
@@ -496,44 +496,167 @@ static int igt_ctx_exec(void *arg)
 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
 		return 0;
 
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct igt_live_test t;
+		struct drm_file *file;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		mutex_lock(&i915->drm.struct_mutex);
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_unlock;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			intel_wakeref_t wakeref;
+
+			ctx = live_context(i915, file);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_unlock;
+			}
+
+			if (!obj) {
+				obj = create_test_object(ctx, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					goto out_unlock;
+				}
+			}
+
+			with_intel_runtime_pm(i915, wakeref)
+				err = gpu_fill(obj, ctx, engine, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name, ctx->hw_id,
+				       yesno(!!ctx->ppgtt), err);
+				goto out_unlock;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_unlock:
+		if (igt_live_test_end(&t))
+			err = -EIO;
+		mutex_unlock(&i915->drm.struct_mutex);
+
+		mock_file_free(i915, file);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *parent;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct igt_live_test t;
+	struct drm_file *file;
+	int err = 0;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
 	file = mock_file(i915);
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
 	mutex_lock(&i915->drm.struct_mutex);
 
+	parent = live_context(i915, file);
+	if (IS_ERR(parent)) {
+		err = PTR_ERR(parent);
+		goto out_unlock;
+	}
+
+	if (!parent->ppgtt) { /* not full-ppgtt; nothing to share */
+		err = 0;
+		goto out_unlock;
+	}
+
 	err = igt_live_test_begin(&t, i915, __func__, "");
 	if (err)
 		goto out_unlock;
 
-	ncontexts = 0;
-	ndwords = 0;
-	dw = 0;
-	while (!time_after(jiffies, end_time)) {
-		struct intel_engine_cs *engine;
-		struct i915_gem_context *ctx;
-		unsigned int id;
+	for_each_engine(engine, i915, id) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
 
-		ctx = live_context(i915, file);
-		if (IS_ERR(ctx)) {
-			err = PTR_ERR(ctx);
-			goto out_unlock;
-		}
+		if (!intel_engine_can_store_dword(engine))
+			continue;
 
-		for_each_engine(engine, i915, id) {
+		dw = 0;
+		ndwords = 0;
+		ncontexts = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
 			intel_wakeref_t wakeref;
 
-			if (!engine->context_size)
-				continue; /* No logical context support in HW */
+			ctx = kernel_context(i915);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_test;
+			}
 
-			if (!intel_engine_can_store_dword(engine))
-				continue;
+			__assign_ppgtt(ctx, parent->ppgtt);
 
 			if (!obj) {
-				obj = create_test_object(ctx, file, &objects);
+				obj = create_test_object(parent, file, &objects);
 				if (IS_ERR(obj)) {
 					err = PTR_ERR(obj);
-					goto out_unlock;
+					kernel_context_close(ctx);
+					goto out_test;
 				}
 			}
 
@@ -545,35 +668,39 @@ static int igt_ctx_exec(void *arg)
 				       ndwords, dw, max_dwords(obj),
 				       engine->name, ctx->hw_id,
 				       yesno(!!ctx->ppgtt), err);
-				goto out_unlock;
+				kernel_context_close(ctx);
+				goto out_test;
 			}
 
 			if (++dw == max_dwords(obj)) {
 				obj = NULL;
 				dw = 0;
 			}
+
 			ndwords++;
+			ncontexts++;
+
+			kernel_context_close(ctx);
 		}
-		ncontexts++;
-	}
-	pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
-		ncontexts, RUNTIME_INFO(i915)->num_engines, ndwords);
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
 
-	dw = 0;
-	list_for_each_entry(obj, &objects, st_link) {
-		unsigned int rem =
-			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
 
-		err = cpu_check(obj, rem);
-		if (err)
-			break;
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				goto out_test;
 
-		dw += rem;
+			dw += rem;
+		}
 	}
-
-out_unlock:
+out_test:
 	if (igt_live_test_end(&t))
 		err = -EIO;
+out_unlock:
 	mutex_unlock(&i915->drm.struct_mutex);
 
 	mock_file_free(i915, file);
@@ -1046,7 +1173,7 @@ static int igt_ctx_readonly(void *arg)
 	struct drm_i915_gem_object *obj = NULL;
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
-	unsigned long ndwords, dw;
+	unsigned long idx, ndwords, dw;
 	struct igt_live_test t;
 	struct drm_file *file;
 	I915_RND_STATE(prng);
@@ -1127,6 +1254,7 @@ static int igt_ctx_readonly(void *arg)
 		ndwords, RUNTIME_INFO(i915)->num_engines);
 
 	dw = 0;
+	idx = 0;
 	list_for_each_entry(obj, &objects, st_link) {
 		unsigned int rem =
 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
@@ -1136,7 +1264,7 @@ static int igt_ctx_readonly(void *arg)
 		if (i915_gem_object_is_readonly(obj))
 			num_writes = 0;
 
-		err = cpu_check(obj, num_writes);
+		err = cpu_check(obj, idx++, num_writes);
 		if (err)
 			break;
 
@@ -1619,7 +1747,8 @@ static int mock_context_barrier(void *arg)
 	}
 
 	counter = 0;
-	err = context_barrier_task(ctx, 0, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, 0,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1631,8 +1760,8 @@ static int mock_context_barrier(void *arg)
 	}
 
 	counter = 0;
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1655,8 +1784,8 @@ static int mock_context_barrier(void *arg)
 
 	counter = 0;
 	context_barrier_inject_fault = BIT(RCS0);
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	context_barrier_inject_fault = 0;
 	if (err == -ENXIO)
 		err = 0;
@@ -1670,8 +1799,8 @@ static int mock_context_barrier(void *arg)
 		goto out;
 
 	counter = 0;
-	err = context_barrier_task(ctx,
-				   ALL_ENGINES, mock_barrier_task, &counter);
+	err = context_barrier_task(ctx, ALL_ENGINES,
+				   NULL, mock_barrier_task, &counter);
 	if (err) {
 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
 		goto out;
@@ -1719,6 +1848,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
 		SUBTEST(igt_ctx_exec),
 		SUBTEST(igt_ctx_readonly),
 		SUBTEST(igt_ctx_sseu),
+		SUBTEST(igt_shared_ctx_exec),
 		SUBTEST(igt_vm_isolation),
 	};
 
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 01084f6b4fb7..9cca66e4420a 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
 
 	err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
 
-	i915_ppgtt_close(&ppgtt->vm);
 	i915_ppgtt_put(ppgtt);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index bc14e9aec274..81e5ace18b81 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -54,13 +54,17 @@ mock_context(struct drm_i915_private *i915,
 		goto err_handles;
 
 	if (name) {
+		struct i915_hw_ppgtt *ppgtt;
+
 		ctx->name = kstrdup(name, GFP_KERNEL);
 		if (!ctx->name)
 			goto err_put;
 
-		ctx->ppgtt = mock_ppgtt(i915, name);
-		if (!ctx->ppgtt)
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
 			goto err_put;
+
+		__set_ppgtt(ctx, ppgtt);
 	}
 
 	return ctx;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 1c69ed16a923..9af7a8e6a46e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -343,6 +343,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
+#define DRM_I915_GEM_VM_CREATE		0x3a
+#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -402,6 +404,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
+#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1453,6 +1457,33 @@ struct drm_i915_gem_context_destroy {
 	__u32 pad;
 };
 
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned in the outparam @vm_id.
+ *
+ * No flags are defined, with all bits reserved and must be zero.
+ *
+ * An extension chain maybe provided, starting with @extensions, and terminated
+ * by the @next_extension being 0. Currently, no extensions are defined.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id, specified in @vm_id.
+ *
+ * No extensions or flags are allowed currently, and so must be zero.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 vm_id;
+};
+
 struct drm_i915_reg_read {
 	/*
 	 * Register offset.
@@ -1542,7 +1573,19 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
+
+	/*
+	 * The id of the associated virtual memory address space (ppGTT) of
+	 * this context. Can be retrieved and passed to another context
+	 * (on the same fd) for both to use the same ppGTT and so share
+	 * address layouts, and avoid reloading the page tables on context
+	 * switches between themselves.
+	 *
+	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
+	 */
+#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
+
 	__u64 value;
 };
 
-- 
cgit v1.2.3


From b91715417244a7e478304425ba671bfafee9c43e Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 22 Mar 2019 09:23:24 +0000
Subject: drm/i915: Extend CONTEXT_CREATE to set parameters upon construction

It can be useful to have a single ioctl to create a context with all
the initial parameters instead of a series of create + setparam + setparam
ioctls. This extension to create context allows any of the parameters
to be passed in as a linked list to be applied to the newly constructed
context.

v2: Make a local copy of user setparam (Tvrtko)
v3: Use flags to detect availability of extension interface

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-3-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_drv.c         |   2 +-
 drivers/gpu/drm/i915/i915_gem_context.c | 452 ++++++++++++++++++--------------
 include/uapi/drm/i915_drm.h             | 180 +++++++------
 3 files changed, 349 insertions(+), 285 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f6403ba1d56a..d3df0ebaa09c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3111,7 +3111,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
 	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE_EXT, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_gem_context_reset_stats_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d0a56c8d0bb9..68f9c7c7bd6c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -1102,198 +1102,6 @@ out:
 	return err;
 }
 
-static bool client_is_banned(struct drm_i915_file_private *file_priv)
-{
-	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
-}
-
-int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
-				  struct drm_file *file)
-{
-	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_i915_gem_context_create *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-	int ret;
-
-	if (!DRIVER_CAPS(i915)->has_logical_contexts)
-		return -ENODEV;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	ret = i915_terminally_wedged(i915);
-	if (ret)
-		return ret;
-
-	if (client_is_banned(file_priv)) {
-		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
-			  current->comm,
-			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
-
-		return -EIO;
-	}
-
-	ret = i915_mutex_lock_interruptible(dev);
-	if (ret)
-		return ret;
-
-	ctx = i915_gem_create_context(i915);
-	mutex_unlock(&dev->struct_mutex);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	ret = gem_context_register(ctx, file_priv);
-	if (ret < 0)
-		goto err_ctx;
-
-	args->ctx_id = ret;
-	DRM_DEBUG("HW context %d created\n", args->ctx_id);
-
-	return 0;
-
-err_ctx:
-	mutex_lock(&dev->struct_mutex);
-	context_close(ctx);
-	mutex_unlock(&dev->struct_mutex);
-	return ret;
-}
-
-int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
-				   struct drm_file *file)
-{
-	struct drm_i915_gem_context_destroy *args = data;
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_gem_context *ctx;
-
-	if (args->pad != 0)
-		return -EINVAL;
-
-	if (!args->ctx_id)
-		return -ENOENT;
-
-	if (mutex_lock_interruptible(&file_priv->context_idr_lock))
-		return -EINTR;
-
-	ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
-	mutex_unlock(&file_priv->context_idr_lock);
-	if (!ctx)
-		return -ENOENT;
-
-	mutex_lock(&dev->struct_mutex);
-	context_close(ctx);
-	mutex_unlock(&dev->struct_mutex);
-
-	return 0;
-}
-
-static int get_sseu(struct i915_gem_context *ctx,
-		    struct drm_i915_gem_context_param *args)
-{
-	struct drm_i915_gem_context_param_sseu user_sseu;
-	struct intel_engine_cs *engine;
-	struct intel_context *ce;
-
-	if (args->size == 0)
-		goto out;
-	else if (args->size < sizeof(user_sseu))
-		return -EINVAL;
-
-	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
-			   sizeof(user_sseu)))
-		return -EFAULT;
-
-	if (user_sseu.flags || user_sseu.rsvd)
-		return -EINVAL;
-
-	engine = intel_engine_lookup_user(ctx->i915,
-					  user_sseu.engine_class,
-					  user_sseu.engine_instance);
-	if (!engine)
-		return -EINVAL;
-
-	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
-	if (IS_ERR(ce))
-		return PTR_ERR(ce);
-
-	user_sseu.slice_mask = ce->sseu.slice_mask;
-	user_sseu.subslice_mask = ce->sseu.subslice_mask;
-	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
-	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
-
-	intel_context_pin_unlock(ce);
-
-	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
-			 sizeof(user_sseu)))
-		return -EFAULT;
-
-out:
-	args->size = sizeof(user_sseu);
-
-	return 0;
-}
-
-int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
-{
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
-	int ret = 0;
-
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
-	switch (args->param) {
-	case I915_CONTEXT_PARAM_BAN_PERIOD:
-		ret = -EINVAL;
-		break;
-	case I915_CONTEXT_PARAM_NO_ZEROMAP:
-		args->size = 0;
-		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
-		break;
-	case I915_CONTEXT_PARAM_GTT_SIZE:
-		args->size = 0;
-
-		if (ctx->ppgtt)
-			args->value = ctx->ppgtt->vm.total;
-		else if (to_i915(dev)->mm.aliasing_ppgtt)
-			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
-		else
-			args->value = to_i915(dev)->ggtt.vm.total;
-		break;
-	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
-		args->size = 0;
-		args->value = i915_gem_context_no_error_capture(ctx);
-		break;
-	case I915_CONTEXT_PARAM_BANNABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_bannable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_RECOVERABLE:
-		args->size = 0;
-		args->value = i915_gem_context_is_recoverable(ctx);
-		break;
-	case I915_CONTEXT_PARAM_PRIORITY:
-		args->size = 0;
-		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
-		break;
-	case I915_CONTEXT_PARAM_SSEU:
-		ret = get_sseu(ctx, args);
-		break;
-	case I915_CONTEXT_PARAM_VM:
-		ret = get_ppgtt(ctx, args);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	i915_gem_context_put(ctx);
-	return ret;
-}
-
 static int gen8_emit_rpcs_config(struct i915_request *rq,
 				 struct intel_context *ce,
 				 struct intel_sseu sseu)
@@ -1573,18 +1381,11 @@ static int set_sseu(struct i915_gem_context *ctx,
 	return 0;
 }
 
-int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
-				    struct drm_file *file)
+static int ctx_setparam(struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
 {
-	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct drm_i915_gem_context_param *args = data;
-	struct i915_gem_context *ctx;
 	int ret = 0;
 
-	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
-	if (!ctx)
-		return -ENOENT;
-
 	switch (args->param) {
 	case I915_CONTEXT_PARAM_NO_ZEROMAP:
 		if (args->size)
@@ -1594,6 +1395,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			clear_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
 		break;
+
 	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1602,6 +1404,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		else
 			i915_gem_context_clear_no_error_capture(ctx);
 		break;
+
 	case I915_CONTEXT_PARAM_BANNABLE:
 		if (args->size)
 			ret = -EINVAL;
@@ -1628,7 +1431,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 
 			if (args->size)
 				ret = -EINVAL;
-			else if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+			else if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
 				ret = -ENODEV;
 			else if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
 				 priority < I915_CONTEXT_MIN_USER_PRIORITY)
@@ -1656,6 +1459,251 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 
+	return ret;
+}
+
+struct create_ext {
+	struct i915_gem_context *ctx;
+	struct drm_i915_file_private *fpriv;
+};
+
+static int create_setparam(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_setparam local;
+	const struct create_ext *arg = data;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.param.ctx_id)
+		return -EINVAL;
+
+	return ctx_setparam(arg->ctx, &local.param);
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+};
+
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+}
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_context_create_ext *args = data;
+	struct create_ext ext_data;
+	int ret;
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return -ENODEV;
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	ret = i915_terminally_wedged(i915);
+	if (ret)
+		return ret;
+
+	ext_data.fpriv = file->driver_priv;
+	if (client_is_banned(ext_data.fpriv)) {
+		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
+			  current->comm,
+			  pid_nr(get_task_pid(current, PIDTYPE_PID)));
+		return -EIO;
+	}
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	ext_data.ctx = i915_gem_create_context(i915);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(ext_data.ctx))
+		return PTR_ERR(ext_data.ctx);
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
+		ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   create_extensions,
+					   ARRAY_SIZE(create_extensions),
+					   &ext_data);
+		if (ret)
+			goto err_ctx;
+	}
+
+	ret = gem_context_register(ext_data.ctx, ext_data.fpriv);
+	if (ret < 0)
+		goto err_ctx;
+
+	args->ctx_id = ret;
+	DRM_DEBUG("HW context %d created\n", args->ctx_id);
+
+	return 0;
+
+err_ctx:
+	mutex_lock(&dev->struct_mutex);
+	context_close(ext_data.ctx);
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_context_destroy *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_context *ctx;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (!args->ctx_id)
+		return -ENOENT;
+
+	if (mutex_lock_interruptible(&file_priv->context_idr_lock))
+		return -EINTR;
+
+	ctx = idr_remove(&file_priv->context_idr, args->ctx_id);
+	mutex_unlock(&file_priv->context_idr_lock);
+	if (!ctx)
+		return -ENOENT;
+
+	mutex_lock(&dev->struct_mutex);
+	context_close(ctx);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_engine_cs *engine;
+	struct intel_context *ce;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.flags || user_sseu.rsvd)
+		return -EINVAL;
+
+	engine = intel_engine_lookup_user(ctx->i915,
+					  user_sseu.engine_class,
+					  user_sseu.engine_instance);
+	if (!engine)
+		return -EINVAL;
+
+	ce = intel_context_pin_lock(ctx, engine); /* serialises with set_sseu */
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	intel_context_pin_unlock(ce);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret = 0;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+		args->size = 0;
+		args->value = test_bit(UCONTEXT_NO_ZEROMAP, &ctx->user_flags);
+		break;
+
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+		if (ctx->ppgtt)
+			args->value = ctx->ppgtt->vm.total;
+		else if (to_i915(dev)->mm.aliasing_ppgtt)
+			args->value = to_i915(dev)->mm.aliasing_ppgtt->vm.total;
+		else
+			args->value = to_i915(dev)->ggtt.vm.total;
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
+		args->value = i915_gem_context_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
+		args->value = ctx->sched.priority >> I915_USER_PRIORITY_SHIFT;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	int ret;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (!ctx)
+		return -ENOENT;
+
+	ret = ctx_setparam(ctx, args);
+
 	i915_gem_context_put(ctx);
 	return ret;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 9af7a8e6a46e..d45b79746fc4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -394,6 +394,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
 #define DRM_IOCTL_I915_GEM_WAIT		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
@@ -1447,92 +1448,17 @@ struct drm_i915_gem_wait {
 };
 
 struct drm_i915_gem_context_create {
-	/*  output: id of new context*/
-	__u32 ctx_id;
-	__u32 pad;
-};
-
-struct drm_i915_gem_context_destroy {
-	__u32 ctx_id;
-	__u32 pad;
-};
-
-/*
- * DRM_I915_GEM_VM_CREATE -
- *
- * Create a new virtual memory address space (ppGTT) for use within a context
- * on the same file. Extensions can be provided to configure exactly how the
- * address space is setup upon creation.
- *
- * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
- * returned in the outparam @vm_id.
- *
- * No flags are defined, with all bits reserved and must be zero.
- *
- * An extension chain maybe provided, starting with @extensions, and terminated
- * by the @next_extension being 0. Currently, no extensions are defined.
- *
- * DRM_I915_GEM_VM_DESTROY -
- *
- * Destroys a previously created VM id, specified in @vm_id.
- *
- * No extensions or flags are allowed currently, and so must be zero.
- */
-struct drm_i915_gem_vm_control {
-	__u64 extensions;
-	__u32 flags;
-	__u32 vm_id;
-};
-
-struct drm_i915_reg_read {
-	/*
-	 * Register offset.
-	 * For 64bit wide registers where the upper 32bits don't immediately
-	 * follow the lower 32bits, the offset of the lower 32bits must
-	 * be specified
-	 */
-	__u64 offset;
-#define I915_REG_READ_8B_WA (1ul << 0)
-
-	__u64 val; /* Return value */
-};
-/* Known registers:
- *
- * Render engine timestamp - 0x2358 + 64bit - gen7+
- * - Note this register returns an invalid value if using the default
- *   single instruction 8byte read, in order to workaround that pass
- *   flag I915_REG_READ_8B_WA in offset field.
- *
- */
-
-struct drm_i915_reset_stats {
-	__u32 ctx_id;
-	__u32 flags;
-
-	/* All resets since boot/module reload, for all contexts */
-	__u32 reset_count;
-
-	/* Number of batches lost when active in GPU, for this context */
-	__u32 batch_active;
-
-	/* Number of batches lost pending for execution, for this context */
-	__u32 batch_pending;
-
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 pad;
 };
 
-struct drm_i915_gem_userptr {
-	__u64 user_ptr;
-	__u64 user_size;
+struct drm_i915_gem_context_create_ext {
+	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
-#define I915_USERPTR_READ_ONLY 0x1
-#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
-	/**
-	 * Returned handle for the object.
-	 *
-	 * Object handles are nonzero.
-	 */
-	__u32 handle;
+#define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
+	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	__u64 extensions;
 };
 
 struct drm_i915_gem_context_param {
@@ -1648,6 +1574,96 @@ struct drm_i915_gem_context_param_sseu {
 	__u32 rsvd;
 };
 
+struct drm_i915_gem_context_create_ext_setparam {
+#define I915_CONTEXT_CREATE_EXT_SETPARAM 0
+	struct i915_user_extension base;
+	struct drm_i915_gem_context_param param;
+};
+
+struct drm_i915_gem_context_destroy {
+	__u32 ctx_id;
+	__u32 pad;
+};
+
+/*
+ * DRM_I915_GEM_VM_CREATE -
+ *
+ * Create a new virtual memory address space (ppGTT) for use within a context
+ * on the same file. Extensions can be provided to configure exactly how the
+ * address space is setup upon creation.
+ *
+ * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
+ * returned in the outparam @id.
+ *
+ * No flags are defined, with all bits reserved and must be zero.
+ *
+ * An extension chain maybe provided, starting with @extensions, and terminated
+ * by the @next_extension being 0. Currently, no extensions are defined.
+ *
+ * DRM_I915_GEM_VM_DESTROY -
+ *
+ * Destroys a previously created VM id, specified in @id.
+ *
+ * No extensions or flags are allowed currently, and so must be zero.
+ */
+struct drm_i915_gem_vm_control {
+	__u64 extensions;
+	__u32 flags;
+	__u32 vm_id;
+};
+
+struct drm_i915_reg_read {
+	/*
+	 * Register offset.
+	 * For 64bit wide registers where the upper 32bits don't immediately
+	 * follow the lower 32bits, the offset of the lower 32bits must
+	 * be specified
+	 */
+	__u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
+	__u64 val; /* Return value */
+};
+
+/* Known registers:
+ *
+ * Render engine timestamp - 0x2358 + 64bit - gen7+
+ * - Note this register returns an invalid value if using the default
+ *   single instruction 8byte read, in order to workaround that pass
+ *   flag I915_REG_READ_8B_WA in offset field.
+ *
+ */
+
+struct drm_i915_reset_stats {
+	__u32 ctx_id;
+	__u32 flags;
+
+	/* All resets since boot/module reload, for all contexts */
+	__u32 reset_count;
+
+	/* Number of batches lost when active in GPU, for this context */
+	__u32 batch_active;
+
+	/* Number of batches lost pending for execution, for this context */
+	__u32 batch_pending;
+
+	__u32 pad;
+};
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
-- 
cgit v1.2.3


From ea593dbba4c8ed841630fa5445202627e1046ba6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 22 Mar 2019 09:23:25 +0000
Subject: drm/i915: Allow contexts to share a single timeline across all
 engines

Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

v2: Move the specialised timeline ordering to its own function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-4-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/i915_gem_context.c       | 31 +++++++++--
 drivers/gpu/drm/i915/i915_gem_context_types.h |  2 +
 drivers/gpu/drm/i915/i915_request.c           | 80 ++++++++++++++++++---------
 drivers/gpu/drm/i915/i915_request.h           |  5 +-
 drivers/gpu/drm/i915/i915_sw_fence.c          | 39 +++++++++++--
 drivers/gpu/drm/i915/i915_sw_fence.h          | 13 ++++-
 drivers/gpu/drm/i915/intel_lrc.c              |  5 +-
 drivers/gpu/drm/i915/selftests/mock_context.c |  2 +-
 include/uapi/drm/i915_drm.h                   |  3 +-
 9 files changed, 138 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 68f9c7c7bd6c..39ca3cb6af66 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
 	rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
 		intel_context_put(it);
 
+	if (ctx->timeline)
+		i915_timeline_put(ctx->timeline);
+
 	kfree(ctx->name);
 	put_pid(ctx->pid);
 
@@ -403,12 +406,16 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv)
+i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 {
 	struct i915_gem_context *ctx;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+	    !HAS_EXECLISTS(dev_priv))
+		return ERR_PTR(-EINVAL);
+
 	/* Reap the most stale context */
 	contexts_free_first(dev_priv);
 
@@ -431,6 +438,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv)
 		i915_ppgtt_put(ppgtt);
 	}
 
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+		struct i915_timeline *timeline;
+
+		timeline = i915_timeline_create(dev_priv, NULL);
+		if (IS_ERR(timeline)) {
+			context_close(ctx);
+			return ERR_CAST(timeline);
+		}
+
+		ctx->timeline = timeline;
+	}
+
 	trace_i915_context_create(ctx);
 
 	return ctx;
@@ -459,7 +478,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	if (ret)
 		return ERR_PTR(ret);
 
-	ctx = i915_gem_create_context(to_i915(dev));
+	ctx = i915_gem_create_context(to_i915(dev), 0);
 	if (IS_ERR(ctx))
 		goto out;
 
@@ -495,7 +514,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 	struct i915_gem_context *ctx;
 	int err;
 
-	ctx = i915_gem_create_context(i915);
+	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
@@ -658,7 +677,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
 	idr_init_base(&file_priv->vm_idr, 1);
 
 	mutex_lock(&i915->drm.struct_mutex);
-	ctx = i915_gem_create_context(i915);
+	ctx = i915_gem_create_context(i915, 0);
 	mutex_unlock(&i915->drm.struct_mutex);
 	if (IS_ERR(ctx)) {
 		err = PTR_ERR(ctx);
@@ -800,7 +819,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
 	rq = i915_active_request_raw(&timeline->last_request,
 				     &engine->i915->drm.struct_mutex);
-	if (rq && rq->engine == engine) {
+	if (rq && rq->engine->mask & engine->mask) {
 		GEM_TRACE("last request on engine %s: %llx:%llu\n",
 			  engine->name, rq->fence.context, rq->fence.seqno);
 		GEM_BUG_ON(rq->timeline != timeline);
@@ -1520,7 +1539,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;
 
-	ext_data.ctx = i915_gem_create_context(i915);
+	ext_data.ctx = i915_gem_create_context(i915, args->flags);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ext_data.ctx))
 		return PTR_ERR(ext_data.ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h
index 63ae8eb21939..e2ec58b10fb2 100644
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,8 @@ struct i915_gem_context {
 	/** file_priv: owning file descriptor */
 	struct drm_i915_file_private *file_priv;
 
+	struct i915_timeline *timeline;
+
 	/**
 	 * @ppgtt: unique address space (GTT)
 	 *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 1529824d7c61..e9c2094ab8ea 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -992,6 +992,60 @@ void i915_request_skip(struct i915_request *rq, int error)
 	memset(vaddr + head, 0, rq->postfix - head);
 }
 
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+	struct i915_timeline *timeline = rq->timeline;
+	struct i915_request *prev;
+
+	/*
+	 * Dependency tracking and request ordering along the timeline
+	 * is special cased so that we can eliminate redundant ordering
+	 * operations while building the request (we know that the timeline
+	 * itself is ordered, and here we guarantee it).
+	 *
+	 * As we know we will need to emit tracking along the timeline,
+	 * we embed the hooks into our request struct -- at the cost of
+	 * having to have specialised no-allocation interfaces (which will
+	 * be beneficial elsewhere).
+	 *
+	 * A second benefit to open-coding i915_request_await_request is
+	 * that we can apply a slight variant of the rules specialised
+	 * for timelines that jump between engines (such as virtual engines).
+	 * If we consider the case of virtual engine, we must emit a dma-fence
+	 * to prevent scheduling of the second request until the first is
+	 * complete (to maximise our greedy late load balancing) and this
+	 * precludes optimising to use semaphores serialisation of a single
+	 * timeline across engines.
+	 */
+	prev = i915_active_request_raw(&timeline->last_request,
+				       &rq->i915->drm.struct_mutex);
+	if (prev && !i915_request_completed(prev)) {
+		if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+			i915_sw_fence_await_sw_fence(&rq->submit,
+						     &prev->submit,
+						     &rq->submitq);
+		else
+			__i915_sw_fence_await_dma_fence(&rq->submit,
+							&prev->fence,
+							&rq->dmaq);
+		if (rq->engine->schedule)
+			__i915_sched_node_add_dependency(&rq->sched,
+							 &prev->sched,
+							 &rq->dep,
+							 0);
+	}
+
+	spin_lock_irq(&timeline->lock);
+	list_add_tail(&rq->link, &timeline->requests);
+	spin_unlock_irq(&timeline->lock);
+
+	GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
+	__i915_active_request_set(&timeline->last_request, rq);
+
+	return prev;
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -1036,31 +1090,7 @@ void i915_request_add(struct i915_request *request)
 	GEM_BUG_ON(IS_ERR(cs));
 	request->postfix = intel_ring_offset(request, cs);
 
-	/*
-	 * Seal the request and mark it as pending execution. Note that
-	 * we may inspect this state, without holding any locks, during
-	 * hangcheck. Hence we apply the barrier to ensure that we do not
-	 * see a more recent value in the hws than we are tracking.
-	 */
-
-	prev = i915_active_request_raw(&timeline->last_request,
-				       &request->i915->drm.struct_mutex);
-	if (prev && !i915_request_completed(prev)) {
-		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-					     &request->submitq);
-		if (engine->schedule)
-			__i915_sched_node_add_dependency(&request->sched,
-							 &prev->sched,
-							 &request->dep,
-							 0);
-	}
-
-	spin_lock_irq(&timeline->lock);
-	list_add_tail(&request->link, &timeline->requests);
-	spin_unlock_irq(&timeline->lock);
-
-	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
-	__i915_active_request_set(&timeline->last_request, request);
+	prev = __i915_request_add_to_timeline(request);
 
 	list_add_tail(&request->ring_link, &ring->request_list);
 	if (list_is_first(&request->ring_link, &ring->request_list))
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8c8fa5010644..cd6c130964cd 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -128,7 +128,10 @@ struct i915_request {
 	 * It is used by the driver to then queue the request for execution.
 	 */
 	struct i915_sw_fence submit;
-	wait_queue_entry_t submitq;
+	union {
+		wait_queue_entry_t submitq;
+		struct i915_sw_dma_fence_cb dmaq;
+	};
 	struct list_head execute_cb;
 
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 8d1400d378d7..5387aafd3424 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 	return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-	struct dma_fence_cb base;
-	struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
 	struct i915_sw_dma_fence_cb base;
 	struct dma_fence *dma;
@@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+				     struct dma_fence_cb *data)
+{
+	struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+	i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb)
+{
+	int ret;
+
+	debug_fence_assert(fence);
+
+	if (dma_fence_is_signaled(dma))
+		return 0;
+
+	cb->fence = fence;
+	i915_sw_fence_await(fence);
+
+	ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+	if (ret == 0) {
+		ret = 1;
+	} else {
+		i915_sw_fence_complete(fence);
+		if (ret == -ENOENT) /* fence already signaled */
+			ret = 0;
+	}
+
+	return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h
index 6dec9e1d1102..9cb5c3b307a6 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -9,14 +9,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
 				     struct i915_sw_fence *after,
 				     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+	struct dma_fence_cb base;
+	struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+				    struct dma_fence *dma,
+				    struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 				  struct dma_fence *dma,
 				  unsigned long timeout,
 				  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 				    struct reservation_object *resv,
 				    const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 35f7ef9e75c8..66bc3cd4e166 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2802,7 +2802,10 @@ err_unpin_ctx:
 
 static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
 {
-	return i915_timeline_create(ctx->i915, NULL);
+	if (ctx->timeline)
+		return i915_timeline_get(ctx->timeline);
+	else
+		return i915_timeline_create(ctx->i915, NULL);
 }
 
 static int execlists_context_deferred_alloc(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
index 81e5ace18b81..0426093bf1d9 100644
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -97,7 +97,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 
 	lockdep_assert_held(&i915->drm.struct_mutex);
 
-	ctx = i915_gem_create_context(i915);
+	ctx = i915_gem_create_context(i915, 0);
 	if (IS_ERR(ctx))
 		return ctx;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d45b79746fc4..9999f7d6a5a9 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,8 +1456,9 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
 	__u64 extensions;
 };
 
-- 
cgit v1.2.3


From 1e4471e74c75acb3f89959ffa02a241227937ae2 Mon Sep 17 00:00:00 2001
From: Shenghui Wang <shhuiw@foxmail.com>
Date: Sat, 16 Mar 2019 16:24:37 +0800
Subject: sbitmap: trivial - update comment for sbitmap_deferred_clear_bit

"sbitmap_batch_clear" should be "sbitmap_deferred_clear"

Acked-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Shenghui Wang <shhuiw@foxmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 14d558146aea..20f3e3f029b9 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -330,7 +330,7 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr)
 /*
  * This one is special, since it doesn't actually clear the bit, rather it
  * sets the corresponding bit in the ->cleared mask instead. Paired with
- * the caller doing sbitmap_batch_clear() if a given index is full, which
+ * the caller doing sbitmap_deferred_clear() if a given index is full, which
  * will clear the previously freed entries in the corresponding ->word.
  */
 static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr)
-- 
cgit v1.2.3


From 29f3863d33d1e2ad6c87fe64a61538041bd965bd Mon Sep 17 00:00:00 2001
From: James Ausmus <james.ausmus@intel.com>
Date: Fri, 22 Mar 2019 10:58:42 -0700
Subject: drm/i915/ehl: Add EHL platform info and PCI IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add known EHL PCI IDs.

v2 (Rodrigo): Removed x86 early quirk. To be sent in a separated
   	      patch cc'ing the appropriated list and maintainers for
	      proper ack.
v3: (Rodrigo): - Removed .num_pipes = 3 that is coming since GEN&_FEATURES.
    	       - Added ppgtt type and size after rework from Bob and Chris
v4: (Rodrigo): - remove ppgtt type added on v3. Jose pointed it is not
    	       	 needed.

Cc: Bob Paauwe <bob.j.paauwe@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: James Ausmus <james.ausmus@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Bob Paauwe <bob.j.paauwe@intel.com>
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322175847.25707-1-rodrigo.vivi@intel.com
---
 drivers/gpu/drm/i915/i915_pci.c | 9 +++++++++
 include/drm/i915_pciids.h       | 7 +++++++
 2 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 7a6054eadb8e..fa99d68cbe7d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -730,6 +730,14 @@ static const struct intel_device_info intel_icelake_11_info = {
 		BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
 };
 
+static const struct intel_device_info intel_elkhartlake_info = {
+	GEN11_FEATURES,
+	PLATFORM(INTEL_ICELAKE),
+	.is_alpha_support = 1,
+	.engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(VCS0),
+	.ppgtt_size = 36,
+};
+
 #undef GEN
 #undef PLATFORM
 
@@ -799,6 +807,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_CML_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CNL_IDS(&intel_cannonlake_info),
 	INTEL_ICL_11_IDS(&intel_icelake_11_info),
+	INTEL_EHL_IDS(&intel_elkhartlake_info),
 	{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 291b5e3fa59c..c7cdbfc4d033 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -498,4 +498,11 @@
 	INTEL_VGA_DEVICE(0x8A70, info), \
 	INTEL_VGA_DEVICE(0x8A53, info)
 
+/* EHL */
+#define INTEL_EHL_IDS(info) \
+	INTEL_VGA_DEVICE(0x4500, info),	\
+	INTEL_VGA_DEVICE(0x4571, info), \
+	INTEL_VGA_DEVICE(0x4551, info), \
+	INTEL_VGA_DEVICE(0x4541, info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From ffc8599aa9763f39f6736a79da4d1575e7006f9a Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@redhat.com>
Date: Fri, 8 Mar 2019 11:05:08 +0800
Subject: x86/gart: Exclude GART aperture from kcore

On machines where the GART aperture is mapped over physical RAM,
/proc/kcore contains the GART aperture range. Accessing the GART range via
/proc/kcore results in a kernel crash.

vmcore used to have the same issue, until it was fixed with commit
2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")', leveraging
existing hook infrastructure in vmcore to let /proc/vmcore return zeroes
when attempting to read the aperture region, and so it won't read from the
actual memory.

Apply the same workaround for kcore. First implement the same hook
infrastructure for kcore, then reuse the hook functions introduced in the
previous vmcore fix. Just with some minor adjustment, rename some functions
for more general usage, and simplify the hook infrastructure a bit as there
is no module usage yet.

Suggested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Kairui Song <kasong@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jiri Bohac <jbohac@suse.cz>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Omar Sandoval <osandov@fb.com>
Cc: Dave Young <dyoung@redhat.com>
Link: https://lkml.kernel.org/r/20190308030508.13548-1-kasong@redhat.com
---
 arch/x86/kernel/aperture_64.c | 20 +++++++++++++-------
 fs/proc/kcore.c               | 27 +++++++++++++++++++++++++++
 include/linux/kcore.h         |  2 ++
 3 files changed, 42 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 58176b56354e..294ed4392a0e 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "AGP: " fmt
 
 #include <linux/kernel.h>
+#include <linux/kcore.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
@@ -57,7 +58,7 @@ int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 
-#ifdef CONFIG_PROC_VMCORE
+#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE)
 /*
  * If the first kernel maps the aperture over e820 RAM, the kdump kernel will
  * use the same range because it will remain configured in the northbridge.
@@ -66,20 +67,25 @@ int fix_aperture __initdata = 1;
  */
 static unsigned long aperture_pfn_start, aperture_page_count;
 
-static int gart_oldmem_pfn_is_ram(unsigned long pfn)
+static int gart_mem_pfn_is_ram(unsigned long pfn)
 {
 	return likely((pfn < aperture_pfn_start) ||
 		      (pfn >= aperture_pfn_start + aperture_page_count));
 }
 
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void __init exclude_from_core(u64 aper_base, u32 aper_order)
 {
 	aperture_pfn_start = aper_base >> PAGE_SHIFT;
 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
-	WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram));
+#ifdef CONFIG_PROC_VMCORE
+	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
+#ifdef CONFIG_PROC_KCORE
+	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
+#endif
 }
 #else
-static void exclude_from_vmcore(u64 aper_base, u32 aper_order)
+static void exclude_from_core(u64 aper_base, u32 aper_order)
 {
 }
 #endif
@@ -474,7 +480,7 @@ out:
 			 * may have allocated the range over its e820 RAM
 			 * and fixed up the northbridge
 			 */
-			exclude_from_vmcore(last_aper_base, last_aper_order);
+			exclude_from_core(last_aper_base, last_aper_order);
 
 			return 1;
 		}
@@ -520,7 +526,7 @@ out:
 	 * overlap with the first kernel's memory. We can't access the
 	 * range through vmcore even though it should be part of the dump.
 	 */
-	exclude_from_vmcore(aper_alloc, aper_order);
+	exclude_from_core(aper_alloc, aper_order);
 
 	/* Fix up the north bridges */
 	for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index bbcc185062bb..d29d869abec1 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head);
 static DECLARE_RWSEM(kclist_lock);
 static int kcore_need_update = 1;
 
+/*
+ * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
+ * Same as oldmem_pfn_is_ram in vmcore
+ */
+static int (*mem_pfn_is_ram)(unsigned long pfn);
+
+int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn))
+{
+	if (mem_pfn_is_ram)
+		return -EBUSY;
+	mem_pfn_is_ram = fn;
+	return 0;
+}
+
+static int pfn_is_ram(unsigned long pfn)
+{
+	if (mem_pfn_is_ram)
+		return mem_pfn_is_ram(pfn);
+	else
+		return 1;
+}
+
 /* This doesn't grab kclist_lock, so it should only be used at init time. */
 void __init kclist_add(struct kcore_list *new, void *addr, size_t size,
 		       int type)
@@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 				goto out;
 			}
 			m = NULL;	/* skip the list anchor */
+		} else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) {
+			if (clear_user(buffer, tsz)) {
+				ret = -EFAULT;
+				goto out;
+			}
 		} else if (m->type == KCORE_VMALLOC) {
 			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..c843f4a9c512 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
 	m->vaddr = (unsigned long)vaddr;
 	kclist_add(m, addr, sz, KCORE_REMAP);
 }
+
+extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn));
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
-- 
cgit v1.2.3


From 96fd2c6633b0484b030eb15e646ad50426c41e6a Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 27 Mar 2019 10:58:14 +0000
Subject: drm/i915: Drop new chunks of context creation ABI (for now)

The intent was to expose these as part of the means to perform full
context recovery (though not the SINGLE_TIMELINE, that is for later and
just sucked as collateral damage). As that requires a couple more
patches to complete the series, roll back the earlier chunks of ABI for
an intervening PR. We keep all the internals intact and under selftests.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190327105814.14694-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         |  2 --
 drivers/gpu/drm/i915/i915_gem_context.c |  9 +++++++++
 include/uapi/drm/i915_drm.h             | 18 +-----------------
 3 files changed, 10 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ca8a8891b7b3..a9fcb32f4989 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -3122,8 +3122,6 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver driver = {
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 25f267a03d3d..662da485e15f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -94,6 +94,9 @@
 #include "intel_lrc_reg.h"
 #include "intel_workarounds.h"
 
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1 << 1)
+#define I915_CONTEXT_PARAM_VM 0x9
+
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
 static struct i915_global_gem_context {
@@ -412,6 +415,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+	BUILD_BUG_ON(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &
+		     ~I915_CONTEXT_CREATE_FLAGS_UNKNOWN);
 	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
 	    !HAS_EXECLISTS(dev_priv))
 		return ERR_PTR(-EINVAL);
@@ -971,6 +976,8 @@ static int get_ppgtt(struct i915_gem_context *ctx,
 	struct i915_hw_ppgtt *ppgtt;
 	int ret;
 
+	return -EINVAL; /* nothing to see here; please move along */
+
 	if (!ctx->ppgtt)
 		return -ENODEV;
 
@@ -1071,6 +1078,8 @@ static int set_ppgtt(struct i915_gem_context *ctx,
 	struct i915_hw_ppgtt *ppgtt, *old;
 	int err;
 
+	return -EINVAL; /* nothing to see here; please move along */
+
 	if (args->size)
 		return -EINVAL;
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 9999f7d6a5a9..52051d24d89d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -343,8 +343,6 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_PERF_ADD_CONFIG	0x37
 #define DRM_I915_PERF_REMOVE_CONFIG	0x38
 #define DRM_I915_QUERY			0x39
-#define DRM_I915_GEM_VM_CREATE		0x3a
-#define DRM_I915_GEM_VM_DESTROY		0x3b
 /* Must be kept compact -- no holes */
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
@@ -405,8 +403,6 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
 #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 #define DRM_IOCTL_I915_QUERY			DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
-#define DRM_IOCTL_I915_GEM_VM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
-#define DRM_IOCTL_I915_GEM_VM_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -1456,9 +1452,8 @@ struct drm_i915_gem_context_create_ext {
 	__u32 ctx_id; /* output: id of new context*/
 	__u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS	(1u << 0)
-#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE	(1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-	(-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
+	(-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
 	__u64 extensions;
 };
 
@@ -1500,17 +1495,6 @@ struct drm_i915_gem_context_param {
  * On creation, all new contexts are marked as recoverable.
  */
 #define I915_CONTEXT_PARAM_RECOVERABLE	0x8
-
-	/*
-	 * The id of the associated virtual memory address space (ppGTT) of
-	 * this context. Can be retrieved and passed to another context
-	 * (on the same fd) for both to use the same ppGTT and so share
-	 * address layouts, and avoid reloading the page tables on context
-	 * switches between themselves.
-	 *
-	 * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
-	 */
-#define I915_CONTEXT_PARAM_VM		0x9
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;
-- 
cgit v1.2.3


From bb10211058880b14da5ec2ff927ff7d3b424aa05 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Thu, 14 Mar 2019 22:04:18 +0000
Subject: drm: Fix subtle spelling error in drm_crtc_state

The drm_crtc_state documentation contains a subtle misspelling of the
word subtle. Correct it.

Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 include/drm/drm_crtc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index f7c3022dbdf4..58ad983d7cd6 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -78,7 +78,7 @@ struct drm_plane_helper_funcs;
 /**
  * struct drm_crtc_state - mutable CRTC state
  *
- * Note that the distinction between @enable and @active is rather subtile:
+ * Note that the distinction between @enable and @active is rather subtle:
  * Flipping @active while @enable is set without changing anything else may
  * never return in a failure from the &drm_mode_config_funcs.atomic_check
  * callback. Userspace assumes that a DPMS On will always succeed. In other
-- 
cgit v1.2.3


From 2471e7a534b7c7b42803b175d2fcc2b9dca600c3 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 13 Mar 2019 12:29:05 +0200
Subject: drm: Forward-declare struct drm_format_info in drm_framebuffer.h

drm_framebuffer.h makes use of a pointer to struct drm_format_info but
doesn't include drm_fourcc.h (neither directly nor indirectly).
Forward-declare the structure.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/drm/drm_framebuffer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
index f0b34c977ec5..c23016748e3f 100644
--- a/include/drm/drm_framebuffer.h
+++ b/include/drm/drm_framebuffer.h
@@ -32,6 +32,7 @@
 struct drm_clip_rect;
 struct drm_device;
 struct drm_file;
+struct drm_format_info;
 struct drm_framebuffer;
 struct drm_gem_object;
 
-- 
cgit v1.2.3